scalapack-1.8.0/0000750000175000017500000000000010605620006013310 5ustar muammarmuammarscalapack-1.8.0/REDIST/0000750000175000017500000000000010605234747014317 5ustar muammarmuammarscalapack-1.8.0/REDIST/TESTING/0000750000175000017500000000000010605234747015434 5ustar muammarmuammarscalapack-1.8.0/REDIST/TESTING/pitrmrdrv.c0000640000175000017500000003520010363532303017620 0ustar muammarmuammar#include "redist.h" /* $Id: pitrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * pitrmrdrv.c : * * * PURPOSE: * * this driver is testing the PITRMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed INTEGER (block * scattered) matrix. Then it calls PITRMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pitrmr.c file for detailed info on the PITRMR2D function. * * * The testing parameters are read from the file TRMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are INTEGER * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(int) bytes, * * * - use the procedures of the files: * * pitrmr.o pitrmr2.o pitrmraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pitrmr2do_ #define fortran_mr2dnew pitrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PITRMR2D #define fortran_mr2d PITRMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pitrmr2do #define fortran_mr2dnew pitrmr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Citrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 itrscanD0 #define dispmat itrdispmat #define setmemory itrsetmemory #define freememory itrfreememory #define scan_intervals itrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpitrmr2do(); extern void Cpitrmr2d(); /* some defines for Cpitrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) int *block; int m, n; { int *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xitrmr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; char *uplo, *diag; int *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open TRMR2D.dat\n"); exit(1); }; printf("\n// ITRMR2D TESTER for INTEGER //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, &u, &d, NULL); uplo = u ? "UPPER" : "LOWER"; diag = d ? "UNIT" : "NONUNIT"; printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", toupper(*diag) == 'U' ? "unit" : "nonunit"); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; parameters[22] = *uplo == 'U'; parameters[23] = *diag == 'U'; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; uplo = parameters[22] ? "UPPER" : "LOWER"; diag = parameters[23] ? "UNIT" : "NONUNIT"; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(int)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpitrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpitrmr2d(uplo, diag, m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; else if (toupper(*uplo) == 'U') in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); else in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; printf("Proc %d : Error element number %d, value = %d , initvalue =%d \n" ,mypnum, i, ptrmyblockvide[i], ptrmyblockcopy[i]); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d INTEGER elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pzgemrdrv.c0000640000175000017500000003424010363532303017612 0ustar muammarmuammar#include "redist.h" /* $Id: pzgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pzgemrdrv.c : * * * PURPOSE: * * this driver is testing the PZGEMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed COMPLEX*16 (block * scattered) matrix. Then it calls PZGEMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pzgemr.c file for detailed info on the PZGEMR2D function. * * * The testing parameters are read from the file GEMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are COMPLEX*16 * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(dcomplex) * bytes, * * * - use the procedures of the files: * * pzgemr.o pzgemr2.o pzgemraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pzgemr2do_ #define fortran_mr2dnew pzgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZGEMR2D #define fortran_mr2d PZGEMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pzgemr2do #define fortran_mr2dnew pzgemr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Czgelacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 zgescanD0 #define dispmat zgedispmat #define setmemory zgesetmemory #define freememory zgefreememory #define scan_intervals zgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpzgemr2do(); extern void Cpzgemr2d(); /* some defines for Cpzgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) dcomplex *block; int m, n; { dcomplex *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata).r = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xzgemr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; dcomplex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("GEMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open GEMR2D.dat\n"); exit(1); }; printf("\n// ZGEMR2D TESTER for COMPLEX*16 //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, NULL); printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(dcomplex)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i].r = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpzgemr2d(m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpzgemr2d(m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; if (!in) { ptrmyblockcopy[i].r = -1; } if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i].r, ptrmyblockcopy[i].r); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d COMPLEX*16 elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/Makefile0000640000175000017500000000561310363532303017070 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Redistribution Testing Makefile # # Creation date: March 20, 1995 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc igemrexe = $(REDISTTSTdir)/xigemr sgemrexe = $(REDISTTSTdir)/xsgemr dgemrexe = $(REDISTTSTdir)/xdgemr cgemrexe = $(REDISTTSTdir)/xcgemr zgemrexe = $(REDISTTSTdir)/xzgemr itrmrexe = $(REDISTTSTdir)/xitrmr strmrexe = $(REDISTTSTdir)/xstrmr dtrmrexe = $(REDISTTSTdir)/xdtrmr ctrmrexe = $(REDISTTSTdir)/xctrmr ztrmrexe = $(REDISTTSTdir)/xztrmr igemr = pigemrdrv.o sgemr = psgemrdrv.o dgemr = pdgemrdrv.o cgemr = pcgemrdrv.o zgemr = pzgemrdrv.o itrmr = pitrmrdrv.o strmr = pstrmrdrv.o dtrmr = pdtrmrdrv.o ctrmr = pctrmrdrv.o ztrmr = pztrmrdrv.o all: exe exe: integer single double complex complex16 integer: $(igemrexe) $(itrmrexe) single: $(sgemrexe) $(strmrexe) double: $(dgemrexe) $(dtrmrexe) complex: $(cgemrexe) $(ctrmrexe) complex16: $(zgemrexe) $(ztrmrexe) $(TESTINGdir)/GEMR2D.dat: GEMR2D.dat cp GEMR2D.dat $(TESTINGdir) $(igemrexe): $(SCALAPACKLIB) $(igemr) $(CCLOADER) $(CCLOADFLAGS) -o $(igemrexe) $(igemr) $(PRLIBS) $(MAKE) $(TESTINGdir)/GEMR2D.dat $(sgemrexe): $(SCALAPACKLIB) $(sgemr) $(CCLOADER) $(CCLOADFLAGS) -o $(sgemrexe) $(sgemr) $(PRLIBS) $(MAKE) $(TESTINGdir)/GEMR2D.dat $(dgemrexe): $(SCALAPACKLIB) $(dgemr) $(CCLOADER) $(CCLOADFLAGS) -o $(dgemrexe) $(dgemr) $(PRLIBS) $(MAKE) $(TESTINGdir)/GEMR2D.dat $(cgemrexe): $(SCALAPACKLIB) $(cgemr) $(CCLOADER) $(CCLOADFLAGS) -o $(cgemrexe) $(cgemr) $(PRLIBS) $(MAKE) $(TESTINGdir)/GEMR2D.dat $(zgemrexe): $(SCALAPACKLIB) $(zgemr) $(CCLOADER) $(CCLOADFLAGS) -o $(zgemrexe) $(zgemr) $(PRLIBS) $(MAKE) $(TESTINGdir)/GEMR2D.dat $(TESTINGdir)/TRMR2D.dat: TRMR2D.dat cp TRMR2D.dat $(TESTINGdir) $(itrmrexe): $(SCALAPACKLIB) $(itrmr) $(CCLOADER) $(CCLOADFLAGS) -o $(itrmrexe) $(itrmr) $(PRLIBS) $(MAKE) $(TESTINGdir)/TRMR2D.dat $(strmrexe): $(SCALAPACKLIB) $(strmr) $(CCLOADER) $(CCLOADFLAGS) -o $(strmrexe) $(strmr) $(PRLIBS) $(MAKE) $(TESTINGdir)/TRMR2D.dat $(dtrmrexe): $(SCALAPACKLIB) $(dtrmr) $(CCLOADER) $(CCLOADFLAGS) -o $(dtrmrexe) $(dtrmr) $(PRLIBS) $(MAKE) $(TESTINGdir)/TRMR2D.dat $(ctrmrexe): $(SCALAPACKLIB) $(ctrmr) $(CCLOADER) $(CCLOADFLAGS) -o $(ctrmrexe) $(ctrmr) $(PRLIBS) $(MAKE) $(TESTINGdir)/TRMR2D.dat $(ztrmrexe): $(SCALAPACKLIB) $(ztrmr) $(CCLOADER) $(CCLOADFLAGS) -o $(ztrmrexe) $(ztrmr) $(PRLIBS) $(MAKE) $(TESTINGdir)/TRMR2D.dat $(igemr): $(FRC) $(sgemr): $(FRC) $(dgemr): $(FRC) $(cgemr): $(FRC) $(zgemr): $(FRC) $(itrmr): $(FRC) $(strmr): $(FRC) $(dtrmr): $(FRC) $(ctrmr): $(FRC) $(ztrmr): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/REDIST/TESTING/GEMR2D.dat0000640000175000017500000001761110363532303017043 0ustar muammarmuammar90 200 200 200 200 0 0 1 1 1 4 3 6 200 200 0 0 1 1 1 4 5 3 200 200 200 200 0 0 1 1 4 1 7 9 200 200 0 0 1 1 4 1 4 9 200 200 200 200 0 0 1 1 4 1 2 9 200 200 0 0 1 1 2 2 7 7 200 200 200 200 0 0 1 1 2 2 6 3 200 200 0 0 1 1 2 2 5 9 200 200 200 200 0 0 1 1 2 2 9 10 200 200 0 0 1 1 1 4 3 4 200 200 200 200 0 0 1 1 2 2 1 10 200 200 0 0 1 1 1 3 1 4 200 200 200 200 0 0 1 1 1 4 14 6 200 200 0 0 1 1 1 3 8 5 200 200 200 200 0 0 1 1 1 3 6 4 200 200 0 0 1 1 4 1 2 2 200 200 200 200 0 0 1 1 4 1 3 10 200 200 0 0 1 1 1 4 11 5 200 200 200 200 0 0 1 1 2 2 10 3 200 200 0 0 1 1 1 3 11 7 300 300 300 300 0 0 1 1 1 4 10 2 300 300 0 0 1 1 2 2 4 5 300 300 300 300 0 0 1 1 2 2 1 12 300 300 0 0 1 1 3 1 10 6 300 300 300 300 0 0 1 1 1 4 13 8 300 300 0 0 1 1 2 2 12 4 300 300 300 300 0 0 1 1 2 2 12 9 300 300 0 0 1 1 1 3 16 10 300 300 300 300 0 0 1 1 1 4 15 2 300 300 0 0 1 1 2 2 12 1 300 300 300 300 0 0 1 1 2 2 4 11 300 300 0 0 1 1 1 3 9 3 300 300 300 300 0 0 1 1 2 2 4 12 300 300 0 0 1 1 4 1 6 1 300 300 300 300 0 0 1 1 3 1 7 7 300 300 0 0 1 1 4 1 5 11 300 300 300 300 0 0 1 1 3 1 10 11 300 300 0 0 1 1 1 3 10 3 300 300 300 300 0 0 1 1 2 2 1 1 300 300 0 0 1 1 1 3 9 2 400 400 400 400 0 0 1 1 4 1 2 4 400 400 0 0 1 1 1 3 5 10 400 400 400 400 0 0 1 1 1 3 5 1 400 400 0 0 1 1 2 2 14 4 400 400 400 400 0 0 1 1 4 1 3 16 400 400 0 0 1 1 1 3 1 7 400 400 400 400 0 0 1 1 1 3 8 5 400 400 0 0 1 1 3 1 9 2 400 400 400 400 0 0 1 1 1 4 9 3 400 400 0 0 1 1 1 3 11 3 400 400 400 400 0 0 1 1 2 2 12 11 400 400 0 0 1 1 4 1 5 9 400 400 400 400 0 0 1 1 3 1 6 12 400 400 0 0 1 1 1 4 15 6 400 400 400 400 0 0 1 1 4 1 5 5 400 400 0 0 1 1 4 1 7 15 400 400 400 400 0 0 1 1 3 1 4 6 400 400 0 0 1 1 4 1 2 4 400 400 400 400 0 0 1 1 3 1 6 3 400 400 0 0 1 1 1 4 18 10 500 500 500 500 0 0 1 1 3 1 9 21 500 500 0 0 1 1 1 3 13 4 500 500 500 500 0 0 1 1 2 2 5 8 500 500 0 0 1 1 4 1 4 19 500 500 500 500 0 0 1 1 1 4 14 9 500 500 0 0 1 1 1 4 17 11 500 500 500 500 0 0 1 1 1 4 4 2 500 500 0 0 1 1 2 2 4 1 500 500 500 500 0 0 1 1 1 3 7 4 500 500 0 0 1 1 2 2 3 12 500 500 500 500 0 0 1 1 2 2 8 8 500 500 0 0 1 1 1 4 16 4 500 500 500 500 0 0 1 1 1 4 20 10 500 500 0 0 1 1 2 2 8 7 500 500 500 500 0 0 1 1 2 2 1 14 500 500 0 0 1 1 3 1 6 20 500 500 500 500 0 0 1 1 4 1 5 15 500 500 0 0 1 1 1 3 2 8 500 500 500 500 0 0 1 1 1 3 6 3 500 500 0 0 1 1 3 1 12 4 600 600 600 600 0 0 1 1 1 4 4 8 600 600 0 0 1 1 2 2 2 9 600 600 600 600 0 0 1 1 2 2 13 7 600 600 0 0 1 1 3 1 2 6 600 600 600 600 0 0 1 1 1 3 2 2 600 600 0 0 1 1 1 4 13 4 600 600 600 600 0 0 1 1 3 1 7 1 600 600 0 0 1 1 2 2 1 9 600 600 600 600 0 0 1 1 1 3 17 9 600 600 0 0 1 1 2 2 7 17 600 600 600 600 0 0 1 1 2 2 13 3 600 600 0 0 1 1 1 3 15 4 600 600 600 600 0 0 1 1 1 3 3 13 600 600 0 0 1 1 1 3 22 3 600 600 600 600 0 0 1 1 1 4 2 8 600 600 0 0 1 1 2 2 9 14 600 600 600 600 0 0 1 1 2 2 8 5 600 600 0 0 1 1 1 3 5 3 600 600 600 600 0 0 1 1 3 1 3 3 600 600 0 0 1 1 2 2 13 14 700 700 700 700 0 0 1 1 1 3 24 2 700 700 0 0 1 1 1 4 11 11 700 700 700 700 0 0 1 1 3 1 10 6 700 700 0 0 1 1 1 4 11 7 700 700 700 700 0 0 1 1 1 3 24 10 700 700 0 0 1 1 1 3 25 4 700 700 700 700 0 0 1 1 3 1 8 3 700 700 0 0 1 1 4 1 5 9 700 700 700 700 0 0 1 1 1 3 18 6 700 700 0 0 1 1 1 4 22 11 700 700 700 700 0 0 1 1 2 2 17 14 700 700 0 0 1 1 3 1 8 21 700 700 700 700 0 0 1 1 3 1 11 20 700 700 0 0 1 1 4 1 8 5 700 700 700 700 0 0 1 1 2 2 13 16 700 700 0 0 1 1 2 2 8 11 700 700 700 700 0 0 1 1 4 1 5 11 700 700 0 0 1 1 3 1 5 11 700 700 700 700 0 0 1 1 2 2 11 17 700 700 0 0 1 1 1 3 1 6 800 800 800 800 0 0 1 1 1 4 23 13 800 800 0 0 1 1 3 1 15 17 800 800 800 800 0 0 1 1 1 3 27 8 800 800 0 0 1 1 3 1 8 3 800 800 800 800 0 0 1 1 3 1 12 6 800 800 0 0 1 1 4 1 4 7 800 800 800 800 0 0 1 1 3 1 16 14 800 800 0 0 1 1 4 1 14 6 800 800 800 800 0 0 1 1 4 1 9 4 800 800 0 0 1 1 3 1 3 15 800 800 800 800 0 0 1 1 4 1 5 12 800 800 0 0 1 1 1 3 22 9 800 800 800 800 0 0 1 1 3 1 9 11 800 800 0 0 1 1 2 2 2 19 800 800 800 800 0 0 1 1 1 4 16 11 800 800 0 0 1 1 1 3 13 5 800 800 800 800 0 0 1 1 1 3 18 4 800 800 0 0 1 1 3 1 9 8 800 800 800 800 0 0 1 1 3 1 1 7 800 800 0 0 1 1 3 1 7 4 900 900 900 900 0 0 1 1 1 3 19 11 900 900 0 0 1 1 4 1 12 25 900 900 900 900 0 0 1 1 1 3 18 15 900 900 0 0 1 1 4 1 9 6 900 900 900 900 0 0 1 1 3 1 11 10 900 900 0 0 1 1 4 1 15 18 900 900 900 900 0 0 1 1 3 1 2 30 900 900 0 0 1 1 1 3 3 5 900 900 900 900 0 0 1 1 1 4 22 12 900 900 0 0 1 1 3 1 1 20 900 900 900 900 0 0 1 1 3 1 10 20 900 900 0 0 1 1 1 3 2 5 900 900 900 900 0 0 1 1 2 2 11 21 900 900 0 0 1 1 1 3 22 12 900 900 900 900 0 0 1 1 3 1 4 5 900 900 0 0 1 1 3 1 5 11 900 900 900 900 0 0 1 1 1 4 29 12 900 900 0 0 1 1 2 2 3 15 900 900 900 900 0 0 1 1 2 2 15 7 900 900 0 0 1 1 3 1 17 7 1000 1000 1000 1000 0 0 1 1 1 3 1 15 1000 1000 0 0 1 1 2 2 7 18 1000 1000 1000 1000 0 0 1 1 1 3 6 5 1000 1000 0 0 1 1 3 1 18 15 1000 1000 1000 1000 0 0 1 1 1 3 11 8 1000 1000 0 0 1 1 3 1 10 29 1000 1000 1000 1000 0 0 1 1 4 1 8 31 1000 1000 0 0 1 1 2 2 17 22 1000 1000 1000 1000 0 0 1 1 3 1 9 26 1000 1000 0 0 1 1 2 2 22 20 1000 1000 1000 1000 0 0 1 1 2 2 2 17 1000 1000 0 0 1 1 1 3 29 6 1000 1000 1000 1000 0 0 1 1 3 1 8 17 1000 1000 0 0 1 1 3 1 4 20 1000 1000 1000 1000 0 0 1 1 1 3 21 12 1000 1000 0 0 1 1 4 1 15 26 1000 1000 1000 1000 0 0 1 1 1 3 8 3 1000 1000 0 0 1 1 2 2 11 12 1000 1000 1000 1000 0 0 1 1 2 2 10 20 1000 1000 0 0 1 1 2 2 11 20 scalapack-1.8.0/REDIST/TESTING/pdgemrdrv.c0000640000175000017500000003416010363532303017565 0ustar muammarmuammar#include "redist.h" /* $Id: pdgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pdgemrdrv.c : * * * PURPOSE: * * this driver is testing the PDGEMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed DOUBLE PRECISION * (block scattered) matrix. Then it calls PDGEMR2D for the inverse * redistribution and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pdgemr.c file for detailed info on the PDGEMR2D function. * * * The testing parameters are read from the file GEMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are DOUBLE PRECISION * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(double) * bytes, * * * - use the procedures of the files: * * pdgemr.o pdgemr2.o pdgemraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdgemr2do_ #define fortran_mr2dnew pdgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDGEMR2D #define fortran_mr2d PDGEMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdgemr2do #define fortran_mr2dnew pdgemr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dgescanD0 #define dispmat dgedispmat #define setmemory dgesetmemory #define freememory dgefreememory #define scan_intervals dgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdgemr2do(); extern void Cpdgemr2d(); /* some defines for Cpdgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) double *block; int m, n; { double *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xdgemr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; double *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("GEMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open GEMR2D.dat\n"); exit(1); }; printf("\n// DGEMR2D TESTER for DOUBLE PRECISION //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, NULL); printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(double)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpdgemr2d(m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpdgemr2d(m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i], ptrmyblockcopy[i]); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d DOUBLE PRECISION elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/TRMR2D.dat0000640000175000017500000000231410363532303017067 0ustar muammarmuammar# test file for SCALAPACK routine TRMR2D 10 # number of tests # m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1 UP UNIT 12 20 81 79 0 0 18 52 1 1 6 8 56 103 1 1 42 34 2 2 8 8 1 0 59 79 98 100 0 1 13 22 1 2 8 1 62 173 0 0 4 25 1 2 9 8 1 0 22 25 87 121 0 1 17 15 2 2 12 51 90 157 1 1 19 11 2 2 36 91 1 0 1 13 1 109 0 0 1 51 2 2 8 9 134 123 0 1 15 74 1 2 16 8 0 0 129 11 187 74 0 0 1 11 2 2 7 8 185 94 0 0 27 20 2 1 8 21 1 1 43 10 45 27 1 0 2 4 2 1 10 2 149 96 0 0 78 70 1 1 9 9 1 0 1 27 80 29 0 1 72 1 2 2 19 8 41 43 1 0 18 10 2 1 8 9 0 0 29 5 37 40 0 1 1 29 1 2 17 9 46 86 0 0 5 81 2 1 7 9 0 1 59 25 91 151 0 1 20 62 2 2 5 150 81 89 0 0 5 9 1 1 9 12 0 0 31 76 114 95 0 1 79 17 1 2 8 49 169 169 0 1 50 16 1 2 9 8 1 0 12 127 22 191 1 1 11 21 2 2 8 1 28 193 1 0 11 46 2 1 9 8 1 1 scalapack-1.8.0/REDIST/TESTING/pigemrdrv.c0000640000175000017500000003407210363532303017574 0ustar muammarmuammar#include "redist.h" /* $Id: pigemrdrv.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * pigemrdrv.c : * * * PURPOSE: * * this driver is testing the PIGEMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed INTEGER (block * scattered) matrix. Then it calls PIGEMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pigemr.c file for detailed info on the PIGEMR2D function. * * * The testing parameters are read from the file GEMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are INTEGER * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(int) bytes, * * * - use the procedures of the files: * * pigemr.o pigemr2.o pigemraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pigemr2do_ #define fortran_mr2dnew pigemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PIGEMR2D #define fortran_mr2d PIGEMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pigemr2do #define fortran_mr2dnew pigemr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Cigelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 igescanD0 #define dispmat igedispmat #define setmemory igesetmemory #define freememory igefreememory #define scan_intervals igescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpigemr2do(); extern void Cpigemr2d(); /* some defines for Cpigemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) int *block; int m, n; { int *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xigemr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; int *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("GEMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open GEMR2D.dat\n"); exit(1); }; printf("\n// IGEMR2D TESTER for INTEGER //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, NULL); printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(int)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpigemr2d(m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpigemr2d(m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; printf("Proc %d : Error element number %d, value = %d , initvalue =%d \n" ,mypnum, i, ptrmyblockvide[i], ptrmyblockcopy[i]); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d INTEGER elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pztrmrdrv.c0000640000175000017500000003534610363532303017654 0ustar muammarmuammar#include "redist.h" /* $Id: pztrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pztrmrdrv.c : * * * PURPOSE: * * this driver is testing the PZTRMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed COMPLEX*16 (block * scattered) matrix. Then it calls PZTRMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pztrmr.c file for detailed info on the PZTRMR2D function. * * * The testing parameters are read from the file TRMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are COMPLEX*16 * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(dcomplex) * bytes, * * * - use the procedures of the files: * * pztrmr.o pztrmr2.o pztrmraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pztrmr2do_ #define fortran_mr2dnew pztrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZTRMR2D #define fortran_mr2d PZTRMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pztrmr2do #define fortran_mr2dnew pztrmr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Cztrlacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ztrscanD0 #define dispmat ztrdispmat #define setmemory ztrsetmemory #define freememory ztrfreememory #define scan_intervals ztrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpztrmr2do(); extern void Cpztrmr2d(); /* some defines for Cpztrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) dcomplex *block; int m, n; { dcomplex *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata).r = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xztrmr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; char *uplo, *diag; dcomplex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open TRMR2D.dat\n"); exit(1); }; printf("\n// ZTRMR2D TESTER for COMPLEX*16 //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, &u, &d, NULL); uplo = u ? "UPPER" : "LOWER"; diag = d ? "UNIT" : "NONUNIT"; printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", toupper(*diag) == 'U' ? "unit" : "nonunit"); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; parameters[22] = *uplo == 'U'; parameters[23] = *diag == 'U'; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; uplo = parameters[22] ? "UPPER" : "LOWER"; diag = parameters[23] ? "UNIT" : "NONUNIT"; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(dcomplex)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i].r = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpztrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpztrmr2d(uplo, diag, m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; else if (toupper(*uplo) == 'U') in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); else in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); if (!in) { ptrmyblockcopy[i].r = -1; } if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i].r, ptrmyblockcopy[i].r); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d COMPLEX*16 elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pstrmrdrv.c0000640000175000017500000003477010363532303017645 0ustar muammarmuammar#include "redist.h" /* $Id: pstrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pstrmrdrv.c : * * * PURPOSE: * * this driver is testing the PSTRMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed REAL (block scattered) * matrix. Then it calls PSTRMR2D for the inverse redistribution and checks * the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pstrmr.c file for detailed info on the PSTRMR2D function. * * * The testing parameters are read from the file TRMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are REAL * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(float) bytes, * * * - use the procedures of the files: * * pstrmr.o pstrmr2.o pstrmraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pstrmr2do_ #define fortran_mr2dnew pstrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSTRMR2D #define fortran_mr2d PSTRMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d pstrmr2do #define fortran_mr2dnew pstrmr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Cstrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 strscanD0 #define dispmat strdispmat #define setmemory strsetmemory #define freememory strfreememory #define scan_intervals strscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpstrmr2do(); extern void Cpstrmr2d(); /* some defines for Cpstrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) float *block; int m, n; { float *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xstrmr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; char *uplo, *diag; float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open TRMR2D.dat\n"); exit(1); }; printf("\n// STRMR2D TESTER for REAL //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, &u, &d, NULL); uplo = u ? "UPPER" : "LOWER"; diag = d ? "UNIT" : "NONUNIT"; printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", toupper(*diag) == 'U' ? "unit" : "nonunit"); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; parameters[22] = *uplo == 'U'; parameters[23] = *diag == 'U'; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; uplo = parameters[22] ? "UPPER" : "LOWER"; diag = parameters[23] ? "UNIT" : "NONUNIT"; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(float)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpstrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpstrmr2d(uplo, diag, m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; else if (toupper(*uplo) == 'U') in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); else in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; }; }; if (nberrors > 0) { printf("Processor %d, has tested %d REAL elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/psgemrdrv.c0000640000175000017500000003366210363532303017612 0ustar muammarmuammar#include "redist.h" /* $Id: psgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * psgemrdrv.c : * * * PURPOSE: * * this driver is testing the PSGEMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed REAL (block scattered) * matrix. Then it calls PSGEMR2D for the inverse redistribution and checks * the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See psgemr.c file for detailed info on the PSGEMR2D function. * * * The testing parameters are read from the file GEMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are REAL * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(float) bytes, * * * - use the procedures of the files: * * psgemr.o psgemr2.o psgemraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d psgemr2do_ #define fortran_mr2dnew psgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSGEMR2D #define fortran_mr2d PSGEMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d psgemr2do #define fortran_mr2dnew psgemr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Csgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 sgescanD0 #define dispmat sgedispmat #define setmemory sgesetmemory #define freememory sgefreememory #define scan_intervals sgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpsgemr2do(); extern void Cpsgemr2d(); /* some defines for Cpsgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) float *block; int m, n; { float *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xsgemr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("GEMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open GEMR2D.dat\n"); exit(1); }; printf("\n// SGEMR2D TESTER for REAL //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, NULL); printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(float)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpsgemr2d(m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpsgemr2d(m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; }; }; if (nberrors > 0) { printf("Processor %d, has tested %d REAL elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pdtrmrdrv.c0000640000175000017500000003526610363532303017627 0ustar muammarmuammar#include "redist.h" /* $Id: pdtrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pdtrmrdrv.c : * * * PURPOSE: * * this driver is testing the PDTRMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed DOUBLE PRECISION * (block scattered) matrix. Then it calls PDTRMR2D for the inverse * redistribution and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pdtrmr.c file for detailed info on the PDTRMR2D function. * * * The testing parameters are read from the file TRMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are DOUBLE PRECISION * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(double) * bytes, * * * - use the procedures of the files: * * pdtrmr.o pdtrmr2.o pdtrmraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdtrmr2do_ #define fortran_mr2dnew pdtrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDTRMR2D #define fortran_mr2d PDTRMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdtrmr2do #define fortran_mr2dnew pdtrmr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdtrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dtrscanD0 #define dispmat dtrdispmat #define setmemory dtrsetmemory #define freememory dtrfreememory #define scan_intervals dtrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdtrmr2do(); extern void Cpdtrmr2d(); /* some defines for Cpdtrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) double *block; int m, n; { double *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata) = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xdtrmr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; char *uplo, *diag; double *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open TRMR2D.dat\n"); exit(1); }; printf("\n// DTRMR2D TESTER for DOUBLE PRECISION //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, &u, &d, NULL); uplo = u ? "UPPER" : "LOWER"; diag = d ? "UNIT" : "NONUNIT"; printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", toupper(*diag) == 'U' ? "unit" : "nonunit"); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; parameters[22] = *uplo == 'U'; parameters[23] = *diag == 'U'; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; uplo = parameters[22] ? "UPPER" : "LOWER"; diag = parameters[23] ? "UNIT" : "NONUNIT"; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(double)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i] = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpdtrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpdtrmr2d(uplo, diag, m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; else if (toupper(*uplo) == 'U') in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); else in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); if (!in) { ptrmyblockcopy[i] = -1; } if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i], ptrmyblockcopy[i]); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d DOUBLE PRECISION elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pctrmrdrv.c0000640000175000017500000003532310363532303017620 0ustar muammarmuammar#include "redist.h" /* $Id: pctrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pctrmrdrv.c : * * * PURPOSE: * * this driver is testing the PCTRMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed COMPLEX (block * scattered) matrix. Then it calls PCTRMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pctrmr.c file for detailed info on the PCTRMR2D function. * * * The testing parameters are read from the file TRMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are COMPLEX * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(complex) * bytes, * * * - use the procedures of the files: * * pctrmr.o pctrmr2.o pctrmraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pctrmr2do_ #define fortran_mr2dnew pctrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCTRMR2D #define fortran_mr2d PCTRMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pctrmr2do #define fortran_mr2dnew pctrmr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Cctrlacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ctrscanD0 #define dispmat ctrdispmat #define setmemory ctrsetmemory #define freememory ctrfreememory #define scan_intervals ctrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpctrmr2do(); extern void Cpctrmr2d(); /* some defines for Cpctrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) complex *block; int m, n; { complex *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata).r = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xctrmr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; char *uplo, *diag; complex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open TRMR2D.dat\n"); exit(1); }; printf("\n// CTRMR2D TESTER for COMPLEX //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, &u, &d, NULL); uplo = u ? "UPPER" : "LOWER"; diag = d ? "UNIT" : "NONUNIT"; printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", toupper(*diag) == 'U' ? "unit" : "nonunit"); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; parameters[22] = *uplo == 'U'; parameters[23] = *diag == 'U'; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; uplo = parameters[22] ? "UPPER" : "LOWER"; diag = parameters[23] ? "UNIT" : "NONUNIT"; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(complex)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i].r = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpctrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpctrmr2d(uplo, diag, m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; else if (toupper(*uplo) == 'U') in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); else in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); if (!in) { ptrmyblockcopy[i].r = -1; } if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i].r, ptrmyblockcopy[i].r); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d COMPLEX elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/pcgemrdrv.c0000640000175000017500000003421510363532303017565 0ustar muammarmuammar#include "redist.h" /* $Id: pcgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ * * pcgemrdrv.c : * * * PURPOSE: * * this driver is testing the PCGEMR2D routine. It calls it to obtain a new * scattered block data decomposition of a distributed COMPLEX (block * scattered) matrix. Then it calls PCGEMR2D for the inverse redistribution * and checks the results with the initial data. * * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition * on the processor grid p0 x q0. * * See pcgemr.c file for detailed info on the PCGEMR2D function. * * * The testing parameters are read from the file GEMR2D.dat, see the file in the * distribution to have an example. * * created by Bernard Tourancheau in April 1994. * * modifications : see sccs history * * =================================== * * * NOTE : * * - the matrix elements are COMPLEX * * - memory requirements : this procedure requires approximately 3 times the * memory space of the initial data block in grid 0 (initial block, copy for * test and second redistribution result) and 1 time the memory space of the * result data block in grid 1. with the element size = sizeof(complex) * bytes, * * * - use the procedures of the files: * * pcgemr.o pcgemr2.o pcgemraux.o * * * ====================================== * * WARNING ASSUMPTIONS : * * * ======================================== * * * Planned changes: * * * * ========================================= */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pcgemr2do_ #define fortran_mr2dnew pcgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCGEMR2D #define fortran_mr2d PCGEMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pcgemr2do #define fortran_mr2dnew pcgemr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Ccgelacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 cgescanD0 #define dispmat cgedispmat #define setmemory cgesetmemory #define freememory cgefreememory #define scan_intervals cgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpcgemr2do(); extern void Cpcgemr2d(); /* some defines for Cpcgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* initblock: intialize the local part of a matrix with random data (well, * not very random) */ static2 void initblock(block, m, n) complex *block; int m, n; { complex *pdata; int i; pdata = block; for (i = 0; i < m * n; i++, pdata++) { (*pdata).r = i; }; } /* getparam:read from a file a list of integer parameters, the end of the * parameters to read is given by a NULL at the end of the args list */ #ifdef __STDC__ #include static void getparam(FILE * f,...) { #else #include static void getparam(va_alist) va_dcl { FILE *f; #endif va_list ap; int i; static int nbline; char *ptr, *next; int *var; static char buffer[200]; #ifdef __STDC__ va_start(ap, f); #else va_start(ap); f = va_arg(ap, FILE *); #endif do { next = fgets(buffer, 200, f); if (next == NULL) { fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); exit(1); } nbline += 1; } while (buffer[0] == '#'); ptr = buffer; var = va_arg(ap, int *); while (var != NULL) { *var = strtol(ptr, &next, 10); if (ptr == next) { fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); exit(1); } ptr = next; var = va_arg(ap, int *); } va_end(ap); } void initforpvm(argc, argv) int argc; char *argv[]; { int pnum, nproc; Cblacs_pinfo(&pnum, &nproc); if (nproc < 1) { /* we are with PVM */ if (pnum == 0) { if (argc < 2) { fprintf(stderr, "usage with PVM:xcgemr nbproc\n\ \t where nbproc is the number of nodes to initialize\n"); exit(1); } nproc = atoi(argv[1]); } Cblacs_setup(&pnum, &nproc); } } int main(argc, argv) int argc; char *argv[]; { /* We initialize the data-block on the current processor, then redistribute * it, and perform the inverse redistribution to compare the local memory * with the initial one. */ /* Data file */ FILE *fp; int nbre, nbremax; /* Data distribution 0 parameters */ int p0, /* # of rows in the processor grid */ q0; /* # of columns in the processor grid */ /* Data distribution 1 parameters */ int p1, q1; /* # of parameter to be read on the keyboard */ #define nbparameter 24 /* General variables */ int blocksize0; int mypnum, nprocs; int parameters[nbparameter], nberrors; int i; int ia, ja, ib, jb, m, n; int gcontext, context0, context1; int myprow1, myprow0, mypcol0, mypcol1; int dummy; MDESC ma, mb; complex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; #ifdef UsingMpiBlacs MPI_Init(&argc, &argv); #endif setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #ifdef T3D free(malloc(14000000)); #endif initforpvm(argc, argv); /* Read physical parameters */ Cblacs_pinfo(&mypnum, &nprocs); /* initialize BLACS for the parameter communication */ Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", nprocs, 1); Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); if (mypnum == 0) { if ((fp = fopen("GEMR2D.dat", "r")) == NULL) { fprintf(stderr, "Can't open GEMR2D.dat\n"); exit(1); }; printf("\n// CGEMR2D TESTER for COMPLEX //\n"); getparam(fp, &nbre, NULL); printf("////////// %d tests \n\n", nbre); parameters[0] = nbre; Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); nbre = parameters[0]; }; if (mypnum == 0) { printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); }; /****** TEST LOOP *****/ /* Here we are in grip 1xnprocs */ nbremax = nbre; #ifdef DEBUG fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); #endif while (nbre-- != 0) { /* Loop on the serie of tests */ /* All the processors read the parameters so we have to be in a 1xnprocs * grid at each iteration */ /* Read processors grid and matrices parameters */ if (mypnum == 0) { int u, d; getparam(fp, &m, &n, &ma.m, &ma.n, &ma.sprow, &ma.spcol, &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, &mb.m, &mb.n, &mb.sprow, &mb.spcol, &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, NULL); printf("\t\t************* TEST # %d **********\n", nbremax - nbre); printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", m, n, ma.m, ma.n, ma.sprow, ma.spcol, ia, ja, p0, q0, ma.nbrow, ma.nbcol, mb.m, mb.n, mb.sprow, mb.spcol, ib, jb, p1, q1, mb.nbrow, mb.nbcol); printf("\n"); if (p0 * q0 > nprocs || p1 * q1 > nprocs) { fprintf(stderr, "not enough nodes:%d processors required\n", max(p0 * q0, p1 * q1)); exit(1); } parameters[0] = p0; parameters[1] = q0; parameters[2] = ma.nbrow; parameters[3] = ma.nbcol; parameters[4] = p1; parameters[5] = q1; parameters[6] = mb.nbrow; parameters[7] = mb.nbcol; parameters[8] = ma.m; parameters[9] = ma.n; parameters[10] = ma.sprow; parameters[11] = ma.spcol; parameters[12] = mb.sprow; parameters[13] = mb.spcol; parameters[14] = ia; parameters[15] = ja; parameters[16] = ib; parameters[17] = jb; parameters[18] = m; parameters[19] = n; parameters[20] = mb.m; parameters[21] = mb.n; Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); } else { Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); p0 = parameters[0]; q0 = parameters[1]; ma.nbrow = parameters[2]; ma.nbcol = parameters[3]; p1 = parameters[4]; q1 = parameters[5]; mb.nbrow = parameters[6]; mb.nbcol = parameters[7]; ma.m = parameters[8]; ma.n = parameters[9]; ma.sprow = parameters[10]; ma.spcol = parameters[11]; mb.sprow = parameters[12]; mb.spcol = parameters[13]; ia = parameters[14]; ja = parameters[15]; ib = parameters[16]; jb = parameters[17]; m = parameters[18]; n = parameters[19]; mb.m = parameters[20]; mb.n = parameters[21]; ma.desctype = BLOCK_CYCLIC_2D; mb.desctype = BLOCK_CYCLIC_2D; }; Cblacs_get(0, 0, &context0); Cblacs_gridinit(&context0, "R", p0, q0); Cblacs_get(0, 0, &context1); Cblacs_gridinit(&context1, "R", p1, q1); Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); ma.ctxt = context0; mb.ctxt = context1; /* From here, we are not assuming that only the processors working in the * redistribution are calling xxMR2D, but the ones not concerned will do * nothing. */ /* We compute the exact size of the local memory block for the memory * allocations */ if (myprow0 >= 0 && mypcol0 >= 0) { blocksize0 = memoryblocksize(&ma); ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); setmemory(&ptrmyblock, blocksize0); initblock(ptrmyblock, 1, blocksize0); setmemory(&ptrmyblockcopy, blocksize0); memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, blocksize0 * sizeof(complex)); setmemory(&ptrmyblockvide, blocksize0); for (i = 0; i < blocksize0; i++) ptrmyblockvide[i].r = -1; }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { setmemory(&ptrsavemyblock, memoryblocksize(&mb)); mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); }; /* if (mypnum < p1 * q1) */ /* Redistribute the matrix from grid 0 to grid 1 (memory location * ptrmyblock to ptrsavemyblock) */ Cpcgemr2d(m, n, ptrmyblock, ia, ja, &ma, ptrsavemyblock, ib, jb, &mb, gcontext); /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 * (memory location ptrsavemyblock to ptrmyblockvide) */ Cpcgemr2d(m, n, ptrsavemyblock, ib, jb, &mb, ptrmyblockvide, ia, ja, &ma, gcontext); /* Check the differences */ nberrors = 0; if (myprow0 >= 0 && mypcol0 >= 0) { /* only for the processors that do have data at the begining */ for (i = 0; i < blocksize0; i++) { int li, lj, gi, gj; int in; in = 1; li = i % ma.lda; lj = i / ma.lda; gi = (li / ma.nbrow) * p0 * ma.nbrow + SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; gj = (lj / ma.nbcol) * q0 * ma.nbcol + SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; assert(gi < ma.m && gj < ma.n); gi -= (ia - 1); gj -= (ja - 1); if (gi < 0 || gj < 0 || gi >= m || gj >= n) in = 0; if (!in) { ptrmyblockcopy[i].r = -1; } if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) { nberrors++; printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" ,mypnum, i, ptrmyblockvide[i].r, ptrmyblockcopy[i].r); }; }; if (nberrors > 0) { printf("Processor %d, has tested %d COMPLEX elements,\ Number of redistribution errors = %d \n", mypnum, blocksize0, nberrors); } } /* Look at the errors on all the processors at this point. */ Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); if (mypnum == 0) if (nberrors) printf(" => Total number of redistribution errors = %d \n", nberrors); else printf("TEST PASSED OK\n"); /* release memory for the next iteration */ if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrmyblock); freememory((char *) ptrmyblockvide); freememory((char *) ptrmyblockcopy); }; /* if (mypnum < p0 * q0) */ /* release memory for the next iteration */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrsavemyblock); }; if (myprow0 >= 0) Cblacs_gridexit(context0); if (myprow1 >= 0) Cblacs_gridexit(context1); }; /* while nbre != 0 */ if (mypnum == 0) { fclose(fp); }; Cblacs_exit(0); return 0; }/* main */ scalapack-1.8.0/REDIST/TESTING/redist.h0000640000175000017500000000016610363532303017071 0ustar muammarmuammar#ifdef T3D #define float double #endif #ifdef T3E #define float double #endif #ifdef CRAY #define float double #endif scalapack-1.8.0/REDIST/SRC/0000750000175000017500000000000010605234747014746 5ustar muammarmuammarscalapack-1.8.0/REDIST/SRC/psgemr.c0000640000175000017500000005430510363532303016405 0ustar muammarmuammar#include "redist.h" /** $Id: psgemr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PSGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PSGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) REAL On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) REAL On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d psgemr2do_ #define fortran_mr2dnew psgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSGEMR2D #define fortran_mr2d PSGEMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d psgemr2do #define fortran_mr2dnew psgemr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Csgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 sgescanD0 #define dispmat sgedispmat #define setmemory sgesetmemory #define freememory sgefreememory #define scan_intervals sgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpsgemr2do(); extern void Cpsgemr2d(); /* some defines for Cpsgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; float *A, *B; { Cpsgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; float *A, *B; int *gcontext; { Cpsgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpsgemr2do(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) float *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpsgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpsgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) float *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; float *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock, ma, ptrsendbuff); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Csgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Csgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { buff2block(v_inter, vinter_nb, h_inter, hinter_nb, recvptr, ptrmynewblock, mb); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ { \ float *_a,*_b; \ int _m,_n,_lda,_ldb; \ int _i,_j; \ _m = (mo);_n = (no); \ _a = (ao);_b = (bo); \ _lda = (ldao) - _m; \ _ldb = (ldbo) - _m; \ assert(_lda >= 0 && _ldb >= 0); \ for (_j=0;_j<_n;_j++) { \ for (_i=0;_i<_m;_i++) \ *_b++ = *_a++; \ _b += _ldb; \ _a += _lda; \ } \ } static2 int block2buff(vi, vinb, hi, hinb, ptra, ma, buff) int hinb, vinb; IDESC *hi, *vi; MDESC *ma; float *buff, *ptra; { int h, v, sizebuff; float *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptra + hi[h].lstart * ma->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, ptr2 + vi[v].lstart, ma->lda, buff + sizebuff, vi[v].len); sizebuff += hi[h].len * vi[v].len; } } return sizebuff; } static2 void buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) int hinb, vinb; IDESC *hi, *vi; MDESC *mb; float *buff, *ptrb; { int h, v, sizebuff; float *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptrb + hi[h].lstart * mb->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, buff + sizebuff, vi[v].len, ptr2 + vi[v].lstart, mb->lda); sizebuff += hi[h].len * vi[v].len; } } } static2 int inter_len(hinb, hi, vinb, vi) int hinb, vinb; IDESC *hi, *vi; { int hlen, vlen, h, v; hlen = 0; for (h = 0; h < hinb; h++) hlen += hi[h].len; vlen = 0; for (v = 0; v < vinb; v++) vlen += vi[v].len; return hlen * vlen; } void Clacpy(m, n, a, lda, b, ldb) float *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/Makefile0000640000175000017500000000602410363532303016377 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK Redistrib # # Module: Makefile # # Purpose: Redistribution Sources Makefile # # Creation date: March 20, 1995 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc ####################################################################### # This is the makefile to create a library for redistribution. # The files are organized as follows: # ALLAUX -- Auxiliary routines called from all precisions # IMRSRC -- Integer REDIST routines # SMRSRC -- Single precision real REDIST routines # CMRSRC -- Single precision complex REDIST routines # DMRSRC -- Double precision real REDIST routines # ZMRSRC -- Double precision complex REDIST routines # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ARCH, ARCHFLAGS, RANLIB, # CC and CFLAGS definitions in ../../SLmake.inc to match your library # archiver, compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # The name of the library is defined by $(SCALAPACKLIB) in # ../../SLmake.inc and is created at the next higher directory level. # # To remove the object files after the library is created, enter # make clean # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC # ####################################################################### ALLAUX = \ pgemraux.o IMRSRC = \ pigemr.o pigemr2.o pitrmr.o pitrmr2.o SMRSRC = \ psgemr.o psgemr2.o pstrmr.o pstrmr2.o CMRSRC = \ pcgemr.o pcgemr2.o pctrmr.o pctrmr2.o DMRSRC = \ pdgemr.o pdgemr2.o pdtrmr.o pdtrmr2.o ZMRSRC = \ pzgemr.o pzgemr2.o pztrmr.o pztrmr2.o all: integer single complex double complex16 lib: all integer: $(IMRSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(IMRSRC) $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) single: $(SMRSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(SMRSRC) $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) complex: $(CMRSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(CMRSRC) $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) double: $(DMRSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(DMRSRC) $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) complex16: $(ZMRSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ZMRSRC) $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) $(ALLAUX): $(FRC) $(IMRSRC): $(FRC) $(SMRSRC): $(FRC) $(CMRSRC): $(FRC) $(DMRSRC): $(FRC) $(ZMRSRC): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/REDIST/SRC/pctrmr.c0000640000175000017500000005356210363532303016423 0ustar muammarmuammar#include "redist.h" /** $Id: pctrmr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PCTRMR2D(UPLO, DIAG, M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PCTRMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. The submatrix to be copied is assumed to be trapezoidal. So only the upper or the lower part will be copied. The other part is unchanged. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== UPLO (input) CHARACTER*1. On entry, UPLO specifies whether we should copy the upper part of the lower part of the defined submatrix: UPLO = 'U' or 'u' copy the upper triangular part. UPLO = 'L' or 'l' copy the lower triangular part. Unchanged on exit. DIAG (input) CHARACTER*1. On entry, DIAG specifies whether we should copy the diagonal. DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix. DIAG = 'N' or 'n' DO copy the diagonal of the submatrix. Unchanged on exit. M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) COMPLEX On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) COMPLEX On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pctrmr2do_ #define fortran_mr2dnew pctrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCTRMR2D #define fortran_mr2d PCTRMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pctrmr2do #define fortran_mr2dnew pctrmr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Cctrlacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ctrscanD0 #define dispmat ctrdispmat #define setmemory ctrsetmemory #define freememory ctrfreememory #define scan_intervals ctrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpctrmr2do(); extern void Cpctrmr2d(); /* some defines for Cpctrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; complex *A, *B; { Cpctrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; complex *A, *B; int *gcontext; { Cpctrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpctrmr2do(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) char *uplo, *diag; complex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpctrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpctrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) char *uplo, *diag; complex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { complex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; complex *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Ccgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Ccgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } void Clacpy(m, n, a, lda, b, ldb) complex *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pctrmr2.c0000640000175000017500000002633210363532303016500 0ustar muammarmuammar#include "redist.h" /* $Id: pctrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * some functions used by the pctrmr2d routine see file pctrmr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pctrmr2do_ #define fortran_mr2dnew pctrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCTRMR2D #define fortran_mr2d PCTRMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pctrmr2do #define fortran_mr2dnew pctrmr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Cctrlacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ctrscanD0 #define dispmat ctrdispmat #define setmemory ctrsetmemory #define freememory ctrfreememory #define scan_intervals ctrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpctrmr2do(); extern void Cpctrmr2d(); /* some defines for Cpctrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) complex **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (complex *) mr2d_malloc( blocksize * sizeof(complex)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) complex *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern ccopy_(); */ /**************************************************************/ /* return the number of elements int the column after i and the distance of * the first one from i, i,j can be negative out of borns, the number of * elements returned can be negative (means 0) */ static2 int insidemat(uplo, diag, i, j, m, n, offset) int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ char *uplo, *diag; int *offset; { /* tests outside mxn */ assert(j >= 0 && j < n); assert(i >= 0); if (toupper(*uplo) == 'U') { int nbline; /* number of lines in the j_th column */ int virtualnbline; /* number of line if we were not limited by m */ *offset = 0; virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); nbline = min(virtualnbline, m); return nbline - i; } else { int firstline; /* first line in the j_th column */ int diagcol; /* column where the diag begin */ int virtualline; /* virtual first line if the matrix was extended with * negative indices */ int off; diagcol = max(n - m, 0);; virtualline = j - diagcol + (toupper(*diag) == 'U'); firstline = max(0, virtualline); off = max(firstline - i, 0); *offset = off; i += off; return m - i; } }/* insidemat() */ /********************************************************************/ /* Execute an action on the local memories when an intersection occurs (the * action can be the filling of the memory buffer, the count of the memory * buffer size or the setting of the memory with the element received) */ static2 void intersect(uplo, diag, j, start, end, action, ptrsizebuff, pptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1) int action, *ptrsizebuff; int j, start, end; complex **pptrbuff, *ptrblock; int templateheight0, templatewidth0; int templateheight1, templatewidth1; MDESC *ma, *mb; int ia, ja, ib, jb, m, n; char *uplo, *diag; /* Execute the action on the local memory for the current interval and * increment pptrbuff and ptrsizebuff of the intervalsize */ /* Notice that if the interval is contigous in the virtual matrice, it is * also contigous in the real one ! */ { /* int un = 1; only when we use dcopy instead of memcpy */ complex *ptrstart; int offset, nbline; int intervalsize; assert(start < end); assert(j >= 0 && j < n); nbline = insidemat(uplo, diag, start, j, m, n, &offset); if (nbline <= 0) return; start += offset; if (start >= end) return; intervalsize = min(end - start, nbline); (*ptrsizebuff) += intervalsize; switch (action) { case SENDBUFF: /* fill buff with local elements to be sent */ ptrstart = ptrblock + localindice(start + ia, j + ja, templateheight0, templatewidth0, ma); memcpy((char *) (*pptrbuff), (char *) ptrstart, intervalsize * sizeof(complex)); /* ccopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), * &un); */ (*pptrbuff) += intervalsize; break; case RECVBUFF: /* fill local memory with the values received */ ptrstart = ptrblock + localindice(start + ib, j + jb, templateheight1, templatewidth1, mb); memcpy((char *) ptrstart, (char *) (*pptrbuff), intervalsize * sizeof(complex)); /* ccopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), * &un); */ (*pptrbuff) += intervalsize; break; case SIZEBUFF: /* computation of sizebuff */ break; default: printf("action is %d outside the scope of the case [0..2] !! \n ", action); exit(0); break; }; /* switch (action) */ }/* intersect() */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpctrmr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].gstart = start; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } /*********************************************************************/ /* Do the scanning of intervals and the requested action */ void scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrblock) int action, /* # of the action done on the intersected intervals */ *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be * an output parameter in every cases) */ complex *ptrbuff /* address of the communication ptrbuffer (a * suffisant memory space is supposed to be allocated before the call) */ , *ptrblock; int p0, q0, p1, q1; IDESC *v_inter, *h_inter; int vinter_nb, hinter_nb; int m, n; int ia, ja, ib, jb; MDESC *ma, *mb; char *uplo, *diag; {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to * the [a..a+au-1] interval of length au */ int templateheight1, templatewidth1; int templateheight0, templatewidth0; int h, v; /* for scanning the intervals */ /* initializations */ templateheight1 = p1 * mb->nbrow; templateheight0 = p0 * ma->nbrow; templatewidth1 = q1 * mb->nbcol; templatewidth0 = q0 * ma->nbcol; /* we now will deal will logical grids, that's to say we change our * numbering of processors so that (0,0) begin on logical processor (0,0) */ /* in case we will not enter the while loop */ (*ptrsizebuff) = 0; for (h = 0; h < hinter_nb; h++) for (v = 0; v < vinter_nb; v++) { int j; for (j = 0; j < h_inter[h].len; j++) intersect(uplo, diag, j + h_inter[h].gstart, v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, action, ptrsizebuff, &ptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1); } }/* scanD0() */ scalapack-1.8.0/REDIST/SRC/pigemr.c0000640000175000017500000005427610363532303016402 0ustar muammarmuammar#include "redist.h" /** $Id: pigemr.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PIGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PIGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) INTEGER On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) INTEGER On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pigemr2do_ #define fortran_mr2dnew pigemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PIGEMR2D #define fortran_mr2d PIGEMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pigemr2do #define fortran_mr2dnew pigemr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Cigelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 igescanD0 #define dispmat igedispmat #define setmemory igesetmemory #define freememory igefreememory #define scan_intervals igescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpigemr2do(); extern void Cpigemr2d(); /* some defines for Cpigemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; int *A, *B; { Cpigemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; int *A, *B; int *gcontext; { Cpigemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpigemr2do(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) int *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpigemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpigemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) int *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; int *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock, ma, ptrsendbuff); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Cigesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Cigerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { buff2block(v_inter, vinter_nb, h_inter, hinter_nb, recvptr, ptrmynewblock, mb); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ { \ int *_a,*_b; \ int _m,_n,_lda,_ldb; \ int _i,_j; \ _m = (mo);_n = (no); \ _a = (ao);_b = (bo); \ _lda = (ldao) - _m; \ _ldb = (ldbo) - _m; \ assert(_lda >= 0 && _ldb >= 0); \ for (_j=0;_j<_n;_j++) { \ for (_i=0;_i<_m;_i++) \ *_b++ = *_a++; \ _b += _ldb; \ _a += _lda; \ } \ } static2 int block2buff(vi, vinb, hi, hinb, ptra, ma, buff) int hinb, vinb; IDESC *hi, *vi; MDESC *ma; int *buff, *ptra; { int h, v, sizebuff; int *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptra + hi[h].lstart * ma->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, ptr2 + vi[v].lstart, ma->lda, buff + sizebuff, vi[v].len); sizebuff += hi[h].len * vi[v].len; } } return sizebuff; } static2 void buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) int hinb, vinb; IDESC *hi, *vi; MDESC *mb; int *buff, *ptrb; { int h, v, sizebuff; int *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptrb + hi[h].lstart * mb->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, buff + sizebuff, vi[v].len, ptr2 + vi[v].lstart, mb->lda); sizebuff += hi[h].len * vi[v].len; } } } static2 int inter_len(hinb, hi, vinb, vi) int hinb, vinb; IDESC *hi, *vi; { int hlen, vlen, h, v; hlen = 0; for (h = 0; h < hinb; h++) hlen += hi[h].len; vlen = 0; for (v = 0; v < vinb; v++) vlen += vi[v].len; return hlen * vlen; } void Clacpy(m, n, a, lda, b, ldb) int *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pigemr2.c0000640000175000017500000001332610363532303016453 0ustar muammarmuammar#include "redist.h" /* $Id: pigemr2.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp $ * * some functions used by the pigemr2d routine see file pigemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pigemr2do_ #define fortran_mr2dnew pigemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PIGEMR2D #define fortran_mr2d PIGEMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pigemr2do #define fortran_mr2dnew pigemr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Cigelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 igescanD0 #define dispmat igedispmat #define setmemory igesetmemory #define freememory igefreememory #define scan_intervals igescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpigemr2do(); extern void Cpigemr2d(); /* some defines for Cpigemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) int **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (int *) mr2d_malloc( blocksize * sizeof(int)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) int *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern icopy_(); */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpigemr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].lstart = l + start - j0; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } scalapack-1.8.0/REDIST/SRC/pstrmr.c0000640000175000017500000005346010363532303016440 0ustar muammarmuammar#include "redist.h" /** $Id: pstrmr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PSTRMR2D(UPLO, DIAG, M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PSTRMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. The submatrix to be copied is assumed to be trapezoidal. So only the upper or the lower part will be copied. The other part is unchanged. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== UPLO (input) CHARACTER*1. On entry, UPLO specifies whether we should copy the upper part of the lower part of the defined submatrix: UPLO = 'U' or 'u' copy the upper triangular part. UPLO = 'L' or 'l' copy the lower triangular part. Unchanged on exit. DIAG (input) CHARACTER*1. On entry, DIAG specifies whether we should copy the diagonal. DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix. DIAG = 'N' or 'n' DO copy the diagonal of the submatrix. Unchanged on exit. M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) REAL On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) REAL On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pstrmr2do_ #define fortran_mr2dnew pstrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSTRMR2D #define fortran_mr2d PSTRMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d pstrmr2do #define fortran_mr2dnew pstrmr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Cstrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 strscanD0 #define dispmat strdispmat #define setmemory strsetmemory #define freememory strfreememory #define scan_intervals strscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpstrmr2do(); extern void Cpstrmr2d(); /* some defines for Cpstrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; float *A, *B; { Cpstrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; float *A, *B; int *gcontext; { Cpstrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpstrmr2do(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) char *uplo, *diag; float *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpstrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpstrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) char *uplo, *diag; float *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; float *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Csgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Csgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } void Clacpy(m, n, a, lda, b, ldb) float *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pcgemr2.c0000640000175000017500000001342710363532303016447 0ustar muammarmuammar#include "redist.h" /* $Id: pcgemr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ * * some functions used by the pcgemr2d routine see file pcgemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pcgemr2do_ #define fortran_mr2dnew pcgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCGEMR2D #define fortran_mr2d PCGEMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pcgemr2do #define fortran_mr2dnew pcgemr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Ccgelacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 cgescanD0 #define dispmat cgedispmat #define setmemory cgesetmemory #define freememory cgefreememory #define scan_intervals cgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpcgemr2do(); extern void Cpcgemr2d(); /* some defines for Cpcgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) complex **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (complex *) mr2d_malloc( blocksize * sizeof(complex)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) complex *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern ccopy_(); */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpcgemr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].lstart = l + start - j0; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } scalapack-1.8.0/REDIST/SRC/pitrmr.c0000640000175000017500000005345710363532303016434 0ustar muammarmuammar#include "redist.h" /** $Id: pitrmr.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PITRMR2D(UPLO, DIAG, M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PITRMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. The submatrix to be copied is assumed to be trapezoidal. So only the upper or the lower part will be copied. The other part is unchanged. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== UPLO (input) CHARACTER*1. On entry, UPLO specifies whether we should copy the upper part of the lower part of the defined submatrix: UPLO = 'U' or 'u' copy the upper triangular part. UPLO = 'L' or 'l' copy the lower triangular part. Unchanged on exit. DIAG (input) CHARACTER*1. On entry, DIAG specifies whether we should copy the diagonal. DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix. DIAG = 'N' or 'n' DO copy the diagonal of the submatrix. Unchanged on exit. M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) INTEGER On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) INTEGER On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pitrmr2do_ #define fortran_mr2dnew pitrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PITRMR2D #define fortran_mr2d PITRMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pitrmr2do #define fortran_mr2dnew pitrmr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Citrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 itrscanD0 #define dispmat itrdispmat #define setmemory itrsetmemory #define freememory itrfreememory #define scan_intervals itrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpitrmr2do(); extern void Cpitrmr2d(); /* some defines for Cpitrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; int *A, *B; { Cpitrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; int *A, *B; int *gcontext; { Cpitrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpitrmr2do(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) char *uplo, *diag; int *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpitrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpitrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) char *uplo, *diag; int *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; int *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Cigesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Cigerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } void Clacpy(m, n, a, lda, b, ldb) int *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pzgemr2.c0000640000175000017500000001342710363532303016476 0ustar muammarmuammar#include "redist.h" /* $Id: pzgemr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * some functions used by the pzgemr2d routine see file pzgemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pzgemr2do_ #define fortran_mr2dnew pzgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZGEMR2D #define fortran_mr2d PZGEMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pzgemr2do #define fortran_mr2dnew pzgemr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Czgelacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 zgescanD0 #define dispmat zgedispmat #define setmemory zgesetmemory #define freememory zgefreememory #define scan_intervals zgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpzgemr2do(); extern void Cpzgemr2d(); /* some defines for Cpzgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) dcomplex **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (dcomplex *) mr2d_malloc( blocksize * sizeof(dcomplex)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) dcomplex *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern zcopy_(); */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpzgemr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].lstart = l + start - j0; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } scalapack-1.8.0/REDIST/SRC/pstrmr2.c0000640000175000017500000002623010363532303016515 0ustar muammarmuammar#include "redist.h" /* $Id: pstrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ * * some functions used by the pstrmr2d routine see file pstrmr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pstrmr2do_ #define fortran_mr2dnew pstrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSTRMR2D #define fortran_mr2d PSTRMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d pstrmr2do #define fortran_mr2dnew pstrmr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Cstrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 strscanD0 #define dispmat strdispmat #define setmemory strsetmemory #define freememory strfreememory #define scan_intervals strscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpstrmr2do(); extern void Cpstrmr2d(); /* some defines for Cpstrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) float **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (float *) mr2d_malloc( blocksize * sizeof(float)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) float *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern scopy_(); */ /**************************************************************/ /* return the number of elements int the column after i and the distance of * the first one from i, i,j can be negative out of borns, the number of * elements returned can be negative (means 0) */ static2 int insidemat(uplo, diag, i, j, m, n, offset) int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ char *uplo, *diag; int *offset; { /* tests outside mxn */ assert(j >= 0 && j < n); assert(i >= 0); if (toupper(*uplo) == 'U') { int nbline; /* number of lines in the j_th column */ int virtualnbline; /* number of line if we were not limited by m */ *offset = 0; virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); nbline = min(virtualnbline, m); return nbline - i; } else { int firstline; /* first line in the j_th column */ int diagcol; /* column where the diag begin */ int virtualline; /* virtual first line if the matrix was extended with * negative indices */ int off; diagcol = max(n - m, 0);; virtualline = j - diagcol + (toupper(*diag) == 'U'); firstline = max(0, virtualline); off = max(firstline - i, 0); *offset = off; i += off; return m - i; } }/* insidemat() */ /********************************************************************/ /* Execute an action on the local memories when an intersection occurs (the * action can be the filling of the memory buffer, the count of the memory * buffer size or the setting of the memory with the element received) */ static2 void intersect(uplo, diag, j, start, end, action, ptrsizebuff, pptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1) int action, *ptrsizebuff; int j, start, end; float **pptrbuff, *ptrblock; int templateheight0, templatewidth0; int templateheight1, templatewidth1; MDESC *ma, *mb; int ia, ja, ib, jb, m, n; char *uplo, *diag; /* Execute the action on the local memory for the current interval and * increment pptrbuff and ptrsizebuff of the intervalsize */ /* Notice that if the interval is contigous in the virtual matrice, it is * also contigous in the real one ! */ { /* int un = 1; only when we use dcopy instead of memcpy */ float *ptrstart; int offset, nbline; int intervalsize; assert(start < end); assert(j >= 0 && j < n); nbline = insidemat(uplo, diag, start, j, m, n, &offset); if (nbline <= 0) return; start += offset; if (start >= end) return; intervalsize = min(end - start, nbline); (*ptrsizebuff) += intervalsize; switch (action) { case SENDBUFF: /* fill buff with local elements to be sent */ ptrstart = ptrblock + localindice(start + ia, j + ja, templateheight0, templatewidth0, ma); memcpy((char *) (*pptrbuff), (char *) ptrstart, intervalsize * sizeof(float)); /* scopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), * &un); */ (*pptrbuff) += intervalsize; break; case RECVBUFF: /* fill local memory with the values received */ ptrstart = ptrblock + localindice(start + ib, j + jb, templateheight1, templatewidth1, mb); memcpy((char *) ptrstart, (char *) (*pptrbuff), intervalsize * sizeof(float)); /* scopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), * &un); */ (*pptrbuff) += intervalsize; break; case SIZEBUFF: /* computation of sizebuff */ break; default: printf("action is %d outside the scope of the case [0..2] !! \n ", action); exit(0); break; }; /* switch (action) */ }/* intersect() */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpstrmr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].gstart = start; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } /*********************************************************************/ /* Do the scanning of intervals and the requested action */ void scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrblock) int action, /* # of the action done on the intersected intervals */ *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be * an output parameter in every cases) */ float *ptrbuff /* address of the communication ptrbuffer (a * suffisant memory space is supposed to be allocated before the call) */ , *ptrblock; int p0, q0, p1, q1; IDESC *v_inter, *h_inter; int vinter_nb, hinter_nb; int m, n; int ia, ja, ib, jb; MDESC *ma, *mb; char *uplo, *diag; {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to * the [a..a+au-1] interval of length au */ int templateheight1, templatewidth1; int templateheight0, templatewidth0; int h, v; /* for scanning the intervals */ /* initializations */ templateheight1 = p1 * mb->nbrow; templateheight0 = p0 * ma->nbrow; templatewidth1 = q1 * mb->nbcol; templatewidth0 = q0 * ma->nbcol; /* we now will deal will logical grids, that's to say we change our * numbering of processors so that (0,0) begin on logical processor (0,0) */ /* in case we will not enter the while loop */ (*ptrsizebuff) = 0; for (h = 0; h < hinter_nb; h++) for (v = 0; v < vinter_nb; v++) { int j; for (j = 0; j < h_inter[h].len; j++) intersect(uplo, diag, j + h_inter[h].gstart, v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, action, ptrsizebuff, &ptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1); } }/* scanD0() */ scalapack-1.8.0/REDIST/SRC/pdtrmr.c0000640000175000017500000005351710363532303016424 0ustar muammarmuammar#include "redist.h" /** $Id: pdtrmr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PDTRMR2D(UPLO, DIAG, M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PDTRMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. The submatrix to be copied is assumed to be trapezoidal. So only the upper or the lower part will be copied. The other part is unchanged. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== UPLO (input) CHARACTER*1. On entry, UPLO specifies whether we should copy the upper part of the lower part of the defined submatrix: UPLO = 'U' or 'u' copy the upper triangular part. UPLO = 'L' or 'l' copy the lower triangular part. Unchanged on exit. DIAG (input) CHARACTER*1. On entry, DIAG specifies whether we should copy the diagonal. DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix. DIAG = 'N' or 'n' DO copy the diagonal of the submatrix. Unchanged on exit. M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) DOUBLE PRECISION On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) DOUBLE PRECISION On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdtrmr2do_ #define fortran_mr2dnew pdtrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDTRMR2D #define fortran_mr2d PDTRMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdtrmr2do #define fortran_mr2dnew pdtrmr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdtrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dtrscanD0 #define dispmat dtrdispmat #define setmemory dtrsetmemory #define freememory dtrfreememory #define scan_intervals dtrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdtrmr2do(); extern void Cpdtrmr2d(); /* some defines for Cpdtrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; double *A, *B; { Cpdtrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; double *A, *B; int *gcontext; { Cpdtrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpdtrmr2do(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) char *uplo, *diag; double *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpdtrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpdtrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) char *uplo, *diag; double *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { double *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; double *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Cdgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Cdgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } void Clacpy(m, n, a, lda, b, ldb) double *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pgemraux.c0000640000175000017500000002133610363532303016736 0ustar muammarmuammar#include "redist.h" /* $Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * some functions used by the pigemr2d routine see file pigemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pigemr2do_ #define fortran_mr2dnew pigemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PIGEMR2D #define fortran_mr2d PIGEMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pigemr2do #define fortran_mr2dnew pigemr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Cigelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 igescanD0 #define dispmat igedispmat #define setmemory igesetmemory #define freememory igefreememory #define scan_intervals igescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpigemr2do(); extern void Cpigemr2d(); /* some defines for Cpigemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include void * mr2d_malloc(n) int n; { void *ptr; assert(n > 0); ptr = (void *) malloc(n); if (ptr == NULL) { fprintf(stderr, "xxmr2d:out of memory\n"); exit(2); } return ptr; } int pgcd(a, b) int a, b; { int aux; if (a < b) return pgcd(b, a); else { aux = a % b; if (aux == 0) return b; else return pgcd(b, aux); } } int ppcm(a, b) int a, b; { int pg; pg = pgcd(a, b); return a * (b / pg); } /* localsize:return the number of rows on the local processor given by its * row number myprow, of a distributed matrix with m rows distributed of on a * grid of processors with p rows with blocksize nbrow : this procedure can * also be used to compute the number of cols by replacing rows by cols */ int localsize(myprow, p, nbrow, m) int myprow, p, nbrow, m; { int templateheight, blockheight; templateheight = p * nbrow; if (m % templateheight != 0) { /* not an exact boundary */ if ((m % templateheight) > (nbrow * myprow)) { /* processor * (myprow,mypcol) has * some elements in that * incomplete template */ if ((m % templateheight) >= (nbrow * (myprow + 1))) { /* processor * (myprow,mypcol)'s * part is complete */ blockheight = (m / templateheight) * nbrow + nbrow; } else { /* processor (myprow,mypcol)'s part is not complete */ blockheight = (m / templateheight) * nbrow + (m % nbrow); }; /* if ((m%templateheight) > (nbrow*(myprow+1))) */ } else { /* processor (myprow,mypcol) has no element in that * incomplete template */ blockheight = (m / templateheight) * nbrow; }; /* if ((m%templateheight) > (nbrow*myprow)) */ } else { /* exact boundary */ blockheight = m / p; /* (m/templateheight) * nbrow */ }; /* if (m%templateheight !=0) */ return blockheight; } /****************************************************************/ /* Returns the exact memory block size corresponding to the parameters */ int memoryblocksize(a) MDESC *a; { int myprow, mypcol, p, q; /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We * assume the row-major ordering of the BLACS */ Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol); myprow = SHIFT(myprow, a->sprow, p); mypcol = SHIFT(mypcol, a->spcol, q); assert(myprow >= 0 && mypcol >= 0); return localsize(myprow, p, a->nbrow, a->m) * localsize(mypcol, q, a->nbcol, a->n); } void checkequal(ctxt, a) int a, ctxt; { int np, dummy, nbrow, myp, b; Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp); assert(nbrow == 1); if (np == 1) return; if (myp == 0) { Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1); Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1); assert(a == b); } else { Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1); assert(a == b); Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np); } } void paramcheck(a, i, j, m, n, p, q, gcontext) MDESC *a; int i, j, m, n, p, q; { int p2, q2, myprow, mypcol; #ifndef NDEBUG checkequal(gcontext, p); checkequal(gcontext, q); checkequal(gcontext, a->sprow); checkequal(gcontext, a->spcol); checkequal(gcontext, a->m); checkequal(gcontext, a->n); checkequal(gcontext, i); checkequal(gcontext, j); checkequal(gcontext, a->nbrow); checkequal(gcontext, a->nbcol); #endif Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow >= p2 || mypcol >= q2) myprow = mypcol = -1; if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) { fprintf(stderr, "??MR2D:incoherent p,q parameters\n"); exit(1); } assert(myprow < p && mypcol < q); if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) { fprintf(stderr, "??MR2D:Bad first processor coordinates\n"); exit(1); } if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) { fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\ m=%d,n=%d,M=%d,N=%d\n", i, j, m, n, a->m, a->n); exit(1); } if ((myprow >= 0 || mypcol >= 0) && localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) { fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\ nbrow=%d,lda=%d,sprow=%d\n", myprow, a->m, p, a->nbrow, a->lda, a->sprow); exit(1); } } /* to change from the submatrix beginning at line i to one beginning at line * i' with i'< blocksize return the line number on the local process where * the new matrix begin, the new process number, and i' */ int changeorigin(myp, sp, p, bs, i, decal, newsp) int myp, sp, p, bs, i; int *decal, *newsp; { int tempheight, firstblock, firsttemp; /* we begin by changing the parameters so that ia < templatewidth,... */ tempheight = bs * p; firsttemp = i / tempheight; firstblock = (i / bs) % p; *newsp = (sp + firstblock) % p; if (myp >= 0) *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0); else *decal = 0; return i % bs; } /******************************************************************/ /* Return the indice in local memory of element of indice a in the matrix */ int localindice(ig, jg, templateheight, templatewidth, a) int templateheight, templatewidth, ig, jg; MDESC *a; /* Return the indice in local memory (scattered distribution) of the element * of indice a in global matrix */ { int vtemp, htemp, vsubtemp, hsubtemp, il, jl; assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n); /* coordinates in global matrix with the tests in intersect, ig MUST BE in * [0..m] and jg in [0..n] */ /* coordinates of the template that "owns" the element */ vtemp = ig / templateheight; htemp = jg / templatewidth; /* coordinates of the element in the subblock of the (vtemp, htemp) * template */ vsubtemp = ig % a->nbrow; hsubtemp = jg % a->nbcol; /* coordinates of the element in the local block of the processor */ il = a->nbrow * vtemp + vsubtemp; jl = a->nbcol * htemp + hsubtemp; assert(il < a->lda); #ifndef NDEBUG { int pr, pc, p, q, lp, lq; Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc); p = templateheight / a->nbrow; q = templatewidth / a->nbcol; lp = ig % templateheight / a->nbrow; lq = jg % templatewidth / a->nbcol; assert(lp == SHIFT(pr, a->sprow, p)); assert(lq == SHIFT(pc, a->spcol, q)); } #endif return (jl * a->lda + il); } scalapack-1.8.0/REDIST/SRC/pitrmr2.c0000640000175000017500000002620110363532303016501 0ustar muammarmuammar#include "redist.h" /* $Id: pitrmr2.c,v 1.1.1.1 2000/02/15 18:04:08 susan Exp $ * * some functions used by the pitrmr2d routine see file pitrmr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pitrmr2do_ #define fortran_mr2dnew pitrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PITRMR2D #define fortran_mr2d PITRMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pitrmr2do #define fortran_mr2dnew pitrmr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Citrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 itrscanD0 #define dispmat itrdispmat #define setmemory itrsetmemory #define freememory itrfreememory #define scan_intervals itrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpitrmr2do(); extern void Cpitrmr2d(); /* some defines for Cpitrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) int **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (int *) mr2d_malloc( blocksize * sizeof(int)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) int *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern icopy_(); */ /**************************************************************/ /* return the number of elements int the column after i and the distance of * the first one from i, i,j can be negative out of borns, the number of * elements returned can be negative (means 0) */ static2 int insidemat(uplo, diag, i, j, m, n, offset) int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ char *uplo, *diag; int *offset; { /* tests outside mxn */ assert(j >= 0 && j < n); assert(i >= 0); if (toupper(*uplo) == 'U') { int nbline; /* number of lines in the j_th column */ int virtualnbline; /* number of line if we were not limited by m */ *offset = 0; virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); nbline = min(virtualnbline, m); return nbline - i; } else { int firstline; /* first line in the j_th column */ int diagcol; /* column where the diag begin */ int virtualline; /* virtual first line if the matrix was extended with * negative indices */ int off; diagcol = max(n - m, 0);; virtualline = j - diagcol + (toupper(*diag) == 'U'); firstline = max(0, virtualline); off = max(firstline - i, 0); *offset = off; i += off; return m - i; } }/* insidemat() */ /********************************************************************/ /* Execute an action on the local memories when an intersection occurs (the * action can be the filling of the memory buffer, the count of the memory * buffer size or the setting of the memory with the element received) */ static2 void intersect(uplo, diag, j, start, end, action, ptrsizebuff, pptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1) int action, *ptrsizebuff; int j, start, end; int **pptrbuff, *ptrblock; int templateheight0, templatewidth0; int templateheight1, templatewidth1; MDESC *ma, *mb; int ia, ja, ib, jb, m, n; char *uplo, *diag; /* Execute the action on the local memory for the current interval and * increment pptrbuff and ptrsizebuff of the intervalsize */ /* Notice that if the interval is contigous in the virtual matrice, it is * also contigous in the real one ! */ { /* int un = 1; only when we use dcopy instead of memcpy */ int *ptrstart; int offset, nbline; int intervalsize; assert(start < end); assert(j >= 0 && j < n); nbline = insidemat(uplo, diag, start, j, m, n, &offset); if (nbline <= 0) return; start += offset; if (start >= end) return; intervalsize = min(end - start, nbline); (*ptrsizebuff) += intervalsize; switch (action) { case SENDBUFF: /* fill buff with local elements to be sent */ ptrstart = ptrblock + localindice(start + ia, j + ja, templateheight0, templatewidth0, ma); memcpy((char *) (*pptrbuff), (char *) ptrstart, intervalsize * sizeof(int)); /* icopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), * &un); */ (*pptrbuff) += intervalsize; break; case RECVBUFF: /* fill local memory with the values received */ ptrstart = ptrblock + localindice(start + ib, j + jb, templateheight1, templatewidth1, mb); memcpy((char *) ptrstart, (char *) (*pptrbuff), intervalsize * sizeof(int)); /* icopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), * &un); */ (*pptrbuff) += intervalsize; break; case SIZEBUFF: /* computation of sizebuff */ break; default: printf("action is %d outside the scope of the case [0..2] !! \n ", action); exit(0); break; }; /* switch (action) */ }/* intersect() */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpitrmr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].gstart = start; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } /*********************************************************************/ /* Do the scanning of intervals and the requested action */ void scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrblock) int action, /* # of the action done on the intersected intervals */ *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be * an output parameter in every cases) */ int *ptrbuff /* address of the communication ptrbuffer (a suffisant memory space is supposed to be allocated before the call) */ , *ptrblock; int p0, q0, p1, q1; IDESC *v_inter, *h_inter; int vinter_nb, hinter_nb; int m, n; int ia, ja, ib, jb; MDESC *ma, *mb; char *uplo, *diag; {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to * the [a..a+au-1] interval of length au */ int templateheight1, templatewidth1; int templateheight0, templatewidth0; int h, v; /* for scanning the intervals */ /* initializations */ templateheight1 = p1 * mb->nbrow; templateheight0 = p0 * ma->nbrow; templatewidth1 = q1 * mb->nbcol; templatewidth0 = q0 * ma->nbcol; /* we now will deal will logical grids, that's to say we change our * numbering of processors so that (0,0) begin on logical processor (0,0) */ /* in case we will not enter the while loop */ (*ptrsizebuff) = 0; for (h = 0; h < hinter_nb; h++) for (v = 0; v < vinter_nb; v++) { int j; for (j = 0; j < h_inter[h].len; j++) intersect(uplo, diag, j + h_inter[h].gstart, v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, action, ptrsizebuff, &ptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1); } }/* scanD0() */ scalapack-1.8.0/REDIST/SRC/pztrmr2.c0000640000175000017500000002633710363532303016534 0ustar muammarmuammar#include "redist.h" /* $Id: pztrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * some functions used by the pztrmr2d routine see file pztrmr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pztrmr2do_ #define fortran_mr2dnew pztrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZTRMR2D #define fortran_mr2d PZTRMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pztrmr2do #define fortran_mr2dnew pztrmr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Cztrlacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ztrscanD0 #define dispmat ztrdispmat #define setmemory ztrsetmemory #define freememory ztrfreememory #define scan_intervals ztrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpztrmr2do(); extern void Cpztrmr2d(); /* some defines for Cpztrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) dcomplex **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (dcomplex *) mr2d_malloc( blocksize * sizeof(dcomplex)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) dcomplex *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern zcopy_(); */ /**************************************************************/ /* return the number of elements int the column after i and the distance of * the first one from i, i,j can be negative out of borns, the number of * elements returned can be negative (means 0) */ static2 int insidemat(uplo, diag, i, j, m, n, offset) int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ char *uplo, *diag; int *offset; { /* tests outside mxn */ assert(j >= 0 && j < n); assert(i >= 0); if (toupper(*uplo) == 'U') { int nbline; /* number of lines in the j_th column */ int virtualnbline; /* number of line if we were not limited by m */ *offset = 0; virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); nbline = min(virtualnbline, m); return nbline - i; } else { int firstline; /* first line in the j_th column */ int diagcol; /* column where the diag begin */ int virtualline; /* virtual first line if the matrix was extended with * negative indices */ int off; diagcol = max(n - m, 0);; virtualline = j - diagcol + (toupper(*diag) == 'U'); firstline = max(0, virtualline); off = max(firstline - i, 0); *offset = off; i += off; return m - i; } }/* insidemat() */ /********************************************************************/ /* Execute an action on the local memories when an intersection occurs (the * action can be the filling of the memory buffer, the count of the memory * buffer size or the setting of the memory with the element received) */ static2 void intersect(uplo, diag, j, start, end, action, ptrsizebuff, pptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1) int action, *ptrsizebuff; int j, start, end; dcomplex **pptrbuff, *ptrblock; int templateheight0, templatewidth0; int templateheight1, templatewidth1; MDESC *ma, *mb; int ia, ja, ib, jb, m, n; char *uplo, *diag; /* Execute the action on the local memory for the current interval and * increment pptrbuff and ptrsizebuff of the intervalsize */ /* Notice that if the interval is contigous in the virtual matrice, it is * also contigous in the real one ! */ { /* int un = 1; only when we use dcopy instead of memcpy */ dcomplex *ptrstart; int offset, nbline; int intervalsize; assert(start < end); assert(j >= 0 && j < n); nbline = insidemat(uplo, diag, start, j, m, n, &offset); if (nbline <= 0) return; start += offset; if (start >= end) return; intervalsize = min(end - start, nbline); (*ptrsizebuff) += intervalsize; switch (action) { case SENDBUFF: /* fill buff with local elements to be sent */ ptrstart = ptrblock + localindice(start + ia, j + ja, templateheight0, templatewidth0, ma); memcpy((char *) (*pptrbuff), (char *) ptrstart, intervalsize * sizeof(dcomplex)); /* zcopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), * &un); */ (*pptrbuff) += intervalsize; break; case RECVBUFF: /* fill local memory with the values received */ ptrstart = ptrblock + localindice(start + ib, j + jb, templateheight1, templatewidth1, mb); memcpy((char *) ptrstart, (char *) (*pptrbuff), intervalsize * sizeof(dcomplex)); /* zcopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), * &un); */ (*pptrbuff) += intervalsize; break; case SIZEBUFF: /* computation of sizebuff */ break; default: printf("action is %d outside the scope of the case [0..2] !! \n ", action); exit(0); break; }; /* switch (action) */ }/* intersect() */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpztrmr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].gstart = start; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } /*********************************************************************/ /* Do the scanning of intervals and the requested action */ void scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrblock) int action, /* # of the action done on the intersected intervals */ *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be * an output parameter in every cases) */ dcomplex *ptrbuff /* address of the communication ptrbuffer (a * suffisant memory space is supposed to be allocated before the call) */ , *ptrblock; int p0, q0, p1, q1; IDESC *v_inter, *h_inter; int vinter_nb, hinter_nb; int m, n; int ia, ja, ib, jb; MDESC *ma, *mb; char *uplo, *diag; {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to * the [a..a+au-1] interval of length au */ int templateheight1, templatewidth1; int templateheight0, templatewidth0; int h, v; /* for scanning the intervals */ /* initializations */ templateheight1 = p1 * mb->nbrow; templateheight0 = p0 * ma->nbrow; templatewidth1 = q1 * mb->nbcol; templatewidth0 = q0 * ma->nbcol; /* we now will deal will logical grids, that's to say we change our * numbering of processors so that (0,0) begin on logical processor (0,0) */ /* in case we will not enter the while loop */ (*ptrsizebuff) = 0; for (h = 0; h < hinter_nb; h++) for (v = 0; v < vinter_nb; v++) { int j; for (j = 0; j < h_inter[h].len; j++) intersect(uplo, diag, j + h_inter[h].gstart, v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, action, ptrsizebuff, &ptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1); } }/* scanD0() */ scalapack-1.8.0/REDIST/SRC/pdgemr2.c0000640000175000017500000001334410363532303016446 0ustar muammarmuammar#include "redist.h" /* $Id: pdgemr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ * * some functions used by the pdgemr2d routine see file pdgemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdgemr2do_ #define fortran_mr2dnew pdgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDGEMR2D #define fortran_mr2d PDGEMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdgemr2do #define fortran_mr2dnew pdgemr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dgescanD0 #define dispmat dgedispmat #define setmemory dgesetmemory #define freememory dgefreememory #define scan_intervals dgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdgemr2do(); extern void Cpdgemr2d(); /* some defines for Cpdgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) double **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (double *) mr2d_malloc( blocksize * sizeof(double)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) double *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern dcopy_(); */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpdgemr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].lstart = l + start - j0; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } scalapack-1.8.0/REDIST/SRC/pcgemr.c0000640000175000017500000005442110363532303016364 0ustar muammarmuammar#include "redist.h" /** $Id: pcgemr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PCGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PCGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) COMPLEX On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) COMPLEX On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pcgemr2do_ #define fortran_mr2dnew pcgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PCGEMR2D #define fortran_mr2d PCGEMR2DO #define ccopy_ CCOPY #define clacpy_ CLACPY #else #define fortran_mr2d pcgemr2do #define fortran_mr2dnew pcgemr2d #define ccopy_ ccopy #define clacpy_ clacpy #endif #define Clacpy Ccgelacpy void Clacpy(); typedef struct { float r, i; } complex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Ccgesd2d(); extern void Ccgerv2d(); /* lapack */ void clacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 cgescanD0 #define dispmat cgedispmat #define setmemory cgesetmemory #define freememory cgefreememory #define scan_intervals cgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpcgemr2do(); extern void Cpcgemr2d(); /* some defines for Cpcgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; complex *A, *B; { Cpcgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; complex *A, *B; int *gcontext; { Cpcgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpcgemr2do(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) complex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpcgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpcgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) complex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { complex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; complex *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock, ma, ptrsendbuff); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Ccgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Ccgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { buff2block(v_inter, vinter_nb, h_inter, hinter_nb, recvptr, ptrmynewblock, mb); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ { \ complex *_a,*_b; \ int _m,_n,_lda,_ldb; \ int _i,_j; \ _m = (mo);_n = (no); \ _a = (ao);_b = (bo); \ _lda = (ldao) - _m; \ _ldb = (ldbo) - _m; \ assert(_lda >= 0 && _ldb >= 0); \ for (_j=0;_j<_n;_j++) { \ for (_i=0;_i<_m;_i++) \ *_b++ = *_a++; \ _b += _ldb; \ _a += _lda; \ } \ } static2 int block2buff(vi, vinb, hi, hinb, ptra, ma, buff) int hinb, vinb; IDESC *hi, *vi; MDESC *ma; complex *buff, *ptra; { int h, v, sizebuff; complex *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptra + hi[h].lstart * ma->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, ptr2 + vi[v].lstart, ma->lda, buff + sizebuff, vi[v].len); sizebuff += hi[h].len * vi[v].len; } } return sizebuff; } static2 void buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) int hinb, vinb; IDESC *hi, *vi; MDESC *mb; complex *buff, *ptrb; { int h, v, sizebuff; complex *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptrb + hi[h].lstart * mb->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, buff + sizebuff, vi[v].len, ptr2 + vi[v].lstart, mb->lda); sizebuff += hi[h].len * vi[v].len; } } } static2 int inter_len(hinb, hi, vinb, vi) int hinb, vinb; IDESC *hi, *vi; { int hlen, vlen, h, v; hlen = 0; for (h = 0; h < hinb; h++) hlen += hi[h].len; vlen = 0; for (v = 0; v < vinb; v++) vlen += vi[v].len; return hlen * vlen; } void Clacpy(m, n, a, lda, b, ldb) complex *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pdgemr.c0000640000175000017500000005435110363532303016367 0ustar muammarmuammar#include "redist.h" /** $Id: pdgemr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PDGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PDGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) DOUBLE PRECISION On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) DOUBLE PRECISION On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdgemr2do_ #define fortran_mr2dnew pdgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDGEMR2D #define fortran_mr2d PDGEMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdgemr2do #define fortran_mr2dnew pdgemr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dgescanD0 #define dispmat dgedispmat #define setmemory dgesetmemory #define freememory dgefreememory #define scan_intervals dgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdgemr2do(); extern void Cpdgemr2d(); /* some defines for Cpdgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; double *A, *B; { Cpdgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; double *A, *B; int *gcontext; { Cpdgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpdgemr2do(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) double *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpdgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpdgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) double *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { double *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; double *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock, ma, ptrsendbuff); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Cdgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Cdgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { buff2block(v_inter, vinter_nb, h_inter, hinter_nb, recvptr, ptrmynewblock, mb); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ { \ double *_a,*_b; \ int _m,_n,_lda,_ldb; \ int _i,_j; \ _m = (mo);_n = (no); \ _a = (ao);_b = (bo); \ _lda = (ldao) - _m; \ _ldb = (ldbo) - _m; \ assert(_lda >= 0 && _ldb >= 0); \ for (_j=0;_j<_n;_j++) { \ for (_i=0;_i<_m;_i++) \ *_b++ = *_a++; \ _b += _ldb; \ _a += _lda; \ } \ } static2 int block2buff(vi, vinb, hi, hinb, ptra, ma, buff) int hinb, vinb; IDESC *hi, *vi; MDESC *ma; double *buff, *ptra; { int h, v, sizebuff; double *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptra + hi[h].lstart * ma->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, ptr2 + vi[v].lstart, ma->lda, buff + sizebuff, vi[v].len); sizebuff += hi[h].len * vi[v].len; } } return sizebuff; } static2 void buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) int hinb, vinb; IDESC *hi, *vi; MDESC *mb; double *buff, *ptrb; { int h, v, sizebuff; double *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptrb + hi[h].lstart * mb->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, buff + sizebuff, vi[v].len, ptr2 + vi[v].lstart, mb->lda); sizebuff += hi[h].len * vi[v].len; } } } static2 int inter_len(hinb, hi, vinb, vi) int hinb, vinb; IDESC *hi, *vi; { int hlen, vlen, h, v; hlen = 0; for (h = 0; h < hinb; h++) hlen += hi[h].len; vlen = 0; for (v = 0; v < vinb; v++) vlen += vi[v].len; return hlen * vlen; } void Clacpy(m, n, a, lda, b, ldb) double *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/pztrmr.c0000640000175000017500000005360110363532303016444 0ustar muammarmuammar#include "redist.h" /** $Id: pztrmr.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PZTRMR2D(UPLO, DIAG, M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PZTRMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. The submatrix to be copied is assumed to be trapezoidal. So only the upper or the lower part will be copied. The other part is unchanged. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== UPLO (input) CHARACTER*1. On entry, UPLO specifies whether we should copy the upper part of the lower part of the defined submatrix: UPLO = 'U' or 'u' copy the upper triangular part. UPLO = 'L' or 'l' copy the lower triangular part. Unchanged on exit. DIAG (input) CHARACTER*1. On entry, DIAG specifies whether we should copy the diagonal. DIAG = 'U' or 'u' do NOT copy the diagonal of the submatrix. DIAG = 'N' or 'n' DO copy the diagonal of the submatrix. Unchanged on exit. M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) COMPLEX*16 On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) COMPLEX*16 On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pztrmr2do_ #define fortran_mr2dnew pztrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZTRMR2D #define fortran_mr2d PZTRMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pztrmr2do #define fortran_mr2dnew pztrmr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Cztrlacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 ztrscanD0 #define dispmat ztrdispmat #define setmemory ztrsetmemory #define freememory ztrfreememory #define scan_intervals ztrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpztrmr2do(); extern void Cpztrmr2d(); /* some defines for Cpztrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; dcomplex *A, *B; { Cpztrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(uplo, diag, m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) char *uplo, *diag; int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; dcomplex *A, *B; int *gcontext; { Cpztrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpztrmr2do(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) char *uplo, *diag; dcomplex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpztrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpztrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) char *uplo, *diag; dcomplex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { dcomplex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; dcomplex *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Czgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Czgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } void Clacpy(m, n, a, lda, b, ldb) dcomplex *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/REDIST/SRC/psgemr2.c0000640000175000017500000001333710363532303016467 0ustar muammarmuammar#include "redist.h" /* $Id: psgemr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ * * some functions used by the psgemr2d routine see file psgemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d psgemr2do_ #define fortran_mr2dnew psgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PSGEMR2D #define fortran_mr2d PSGEMR2DO #define scopy_ SCOPY #define slacpy_ SLACPY #else #define fortran_mr2d psgemr2do #define fortran_mr2dnew psgemr2d #define scopy_ scopy #define slacpy_ slacpy #endif #define Clacpy Csgelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Csgesd2d(); extern void Csgerv2d(); /* lapack */ void slacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 sgescanD0 #define dispmat sgedispmat #define setmemory sgesetmemory #define freememory sgefreememory #define scan_intervals sgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpsgemr2do(); extern void Cpsgemr2d(); /* some defines for Cpsgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) float **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (float *) mr2d_malloc( blocksize * sizeof(float)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) float *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern scopy_(); */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpsgemr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].lstart = l + start - j0; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } scalapack-1.8.0/REDIST/SRC/pdtrmr2.c0000640000175000017500000002624210363532303016501 0ustar muammarmuammar#include "redist.h" /* $Id: pdtrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ * * some functions used by the pdtrmr2d routine see file pdtrmr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pdtrmr2do_ #define fortran_mr2dnew pdtrmr2d_ #elif defined(UpCase) #define fortran_mr2dnew PDTRMR2D #define fortran_mr2d PDTRMR2DO #define dcopy_ DCOPY #define dlacpy_ DLACPY #else #define fortran_mr2d pdtrmr2do #define fortran_mr2dnew pdtrmr2d #define dcopy_ dcopy #define dlacpy_ dlacpy #endif #define Clacpy Cdtrlacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int gstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cdgesd2d(); extern void Cdgerv2d(); /* lapack */ void dlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 dtrscanD0 #define dispmat dtrdispmat #define setmemory dtrsetmemory #define freememory dtrfreememory #define scan_intervals dtrscan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpdtrmr2do(); extern void Cpdtrmr2d(); /* some defines for Cpdtrmr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #include #include /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ /************************************************************************/ /* Set the memory space with the malloc function */ void setmemory(adpointer, blocksize) double **adpointer; int blocksize; { assert(blocksize >= 0); if (blocksize == 0) { *adpointer = NULL; return; } *adpointer = (double *) mr2d_malloc( blocksize * sizeof(double)); } /******************************************************************/ /* Free the memory space after the malloc */ void freememory(ptrtobefreed) double *ptrtobefreed; { if (ptrtobefreed == NULL) return; free((char *) ptrtobefreed); } /* extern functions for intersect() extern dcopy_(); */ /**************************************************************/ /* return the number of elements int the column after i and the distance of * the first one from i, i,j can be negative out of borns, the number of * elements returned can be negative (means 0) */ static2 int insidemat(uplo, diag, i, j, m, n, offset) int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ char *uplo, *diag; int *offset; { /* tests outside mxn */ assert(j >= 0 && j < n); assert(i >= 0); if (toupper(*uplo) == 'U') { int nbline; /* number of lines in the j_th column */ int virtualnbline; /* number of line if we were not limited by m */ *offset = 0; virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); nbline = min(virtualnbline, m); return nbline - i; } else { int firstline; /* first line in the j_th column */ int diagcol; /* column where the diag begin */ int virtualline; /* virtual first line if the matrix was extended with * negative indices */ int off; diagcol = max(n - m, 0);; virtualline = j - diagcol + (toupper(*diag) == 'U'); firstline = max(0, virtualline); off = max(firstline - i, 0); *offset = off; i += off; return m - i; } }/* insidemat() */ /********************************************************************/ /* Execute an action on the local memories when an intersection occurs (the * action can be the filling of the memory buffer, the count of the memory * buffer size or the setting of the memory with the element received) */ static2 void intersect(uplo, diag, j, start, end, action, ptrsizebuff, pptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1) int action, *ptrsizebuff; int j, start, end; double **pptrbuff, *ptrblock; int templateheight0, templatewidth0; int templateheight1, templatewidth1; MDESC *ma, *mb; int ia, ja, ib, jb, m, n; char *uplo, *diag; /* Execute the action on the local memory for the current interval and * increment pptrbuff and ptrsizebuff of the intervalsize */ /* Notice that if the interval is contigous in the virtual matrice, it is * also contigous in the real one ! */ { /* int un = 1; only when we use dcopy instead of memcpy */ double *ptrstart; int offset, nbline; int intervalsize; assert(start < end); assert(j >= 0 && j < n); nbline = insidemat(uplo, diag, start, j, m, n, &offset); if (nbline <= 0) return; start += offset; if (start >= end) return; intervalsize = min(end - start, nbline); (*ptrsizebuff) += intervalsize; switch (action) { case SENDBUFF: /* fill buff with local elements to be sent */ ptrstart = ptrblock + localindice(start + ia, j + ja, templateheight0, templatewidth0, ma); memcpy((char *) (*pptrbuff), (char *) ptrstart, intervalsize * sizeof(double)); /* dcopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), * &un); */ (*pptrbuff) += intervalsize; break; case RECVBUFF: /* fill local memory with the values received */ ptrstart = ptrblock + localindice(start + ib, j + jb, templateheight1, templatewidth1, mb); memcpy((char *) ptrstart, (char *) (*pptrbuff), intervalsize * sizeof(double)); /* dcopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), * &un); */ (*pptrbuff) += intervalsize; break; case SIZEBUFF: /* computation of sizebuff */ break; default: printf("action is %d outside the scope of the case [0..2] !! \n ", action); exit(0); break; }; /* switch (action) */ }/* intersect() */ /* scan_intervals: scans two distributions in one dimension, and compute the * intersections on the local processor. result must be long enough to * contains the result that are stocked in IDESC structure, the function * returns the number of intersections found */ int scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, result) char type; int ja, jb, n, q0, q1, col0, col1; MDESC *ma, *mb; IDESC *result; { int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; int l; /* local indice on the beginning of the interval */ assert(type == 'c' || type == 'r'); nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); templatewidth0 = q0 * nbcol0; templatewidth1 = q1 * nbcol1; { int sp0 = (type == 'c' ? ma->spcol : ma->sprow); int sp1 = (type == 'c' ? mb->spcol : mb->sprow); j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; } offset = 0; l = 0; /* a small check to verify that the submatrix begin inside the first block * of the original matrix, this done by a sort of coordinate change at the * beginning of the Cpdtrmr2d */ assert(j0 + nbcol0 > 0); assert(j1 + nbcol1 > 0); while ((j0 < n) && (j1 < n)) { int end0, end1; int start, end; end0 = j0 + nbcol0; end1 = j1 + nbcol1; if (end0 <= j1) { j0 += templatewidth0; l += nbcol0; continue; } if (end1 <= j0) { j1 += templatewidth1; continue; } /* compute the raw intersection */ start = max(j0, j1); start = max(start, 0); /* the start is correct now, update the corresponding fields */ result[offset].gstart = start; end = min(end0, end1); if (end0 == end) { j0 += templatewidth0; l += nbcol0; } if (end1 == end) j1 += templatewidth1; /* throw the limit if they go out of the matrix */ end = min(end, n); assert(end > start); /* it is a bit tricky to see why the length is always positive after all * this min and max, first we have the property that every interval * considered is at least partly into the submatrix, second we arrive * here only if the raw intersection is non-void, if we remove a limit * that means the corresponding frontier is in both intervals which * proove the final interval is non-void, clear ?? */ result[offset].len = end - start; offset += 1; } /* while */ return offset; } /*********************************************************************/ /* Do the scanning of intervals and the requested action */ void scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1, v_inter, vinter_nb, h_inter, hinter_nb, ptrblock) int action, /* # of the action done on the intersected intervals */ *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be * an output parameter in every cases) */ double *ptrbuff /* address of the communication ptrbuffer (a * suffisant memory space is supposed to be allocated before the call) */ , *ptrblock; int p0, q0, p1, q1; IDESC *v_inter, *h_inter; int vinter_nb, hinter_nb; int m, n; int ia, ja, ib, jb; MDESC *ma, *mb; char *uplo, *diag; {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to * the [a..a+au-1] interval of length au */ int templateheight1, templatewidth1; int templateheight0, templatewidth0; int h, v; /* for scanning the intervals */ /* initializations */ templateheight1 = p1 * mb->nbrow; templateheight0 = p0 * ma->nbrow; templatewidth1 = q1 * mb->nbcol; templatewidth0 = q0 * ma->nbcol; /* we now will deal will logical grids, that's to say we change our * numbering of processors so that (0,0) begin on logical processor (0,0) */ /* in case we will not enter the while loop */ (*ptrsizebuff) = 0; for (h = 0; h < hinter_nb; h++) for (v = 0; v < vinter_nb; v++) { int j; for (j = 0; j < h_inter[h].len; j++) intersect(uplo, diag, j + h_inter[h].gstart, v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, action, ptrsizebuff, &ptrbuff, ptrblock, m, n, ma, ia, ja, templateheight0, templatewidth0, mb, ib, jb, templateheight1, templatewidth1); } }/* scanD0() */ scalapack-1.8.0/REDIST/SRC/redist.h0000640000175000017500000000016610363532303016403 0ustar muammarmuammar#ifdef T3D #define float double #endif #ifdef T3E #define float double #endif #ifdef CRAY #define float double #endif scalapack-1.8.0/REDIST/SRC/pzgemr.c0000640000175000017500000005444510363532303016421 0ustar muammarmuammar#include "redist.h" /** $Id: pzgemr.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ ------------------------------------------------------------------------ -- ScaLAPACK routine (version 1.7) -- Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994. SUBROUTINE PZGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC, $ CTXT) ------------------------------------------------------------------------ Purpose ======= PZGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B. The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule: - If a processor is in A context, all parameters related to A must be valid. - If a processor is in B context, all parameters related to B must be valid. - ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1. - M and N must be valid for everyone. - other parameters are not examined. Notes ===== A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location. In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A: NOTATION STORED IN EXPLANATION --------------- -------------- -------------------------------------- DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)). Important notice ================ The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution. Be aware that all processors included in this context must call the redistribution routine. Parameters ========== M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit. N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit. A (input) COMPLEX*16 On entry, the source matrix. Unchanged on exit. IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit. ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1. B (output) COMPLEX*16 On entry, the destination matrix. The portion corresponding to the defined submatrix are updated. IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit. BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1. CTXT (input) a context englobing at least all processors included in either A context or B context Memory requirement : ==================== for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1. ============================================================ Created March 1993 by B. Tourancheau (See sccs for modifications). Modifications by Loic PRYLLI 1995 ============================================================ */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pzgemr2do_ #define fortran_mr2dnew pzgemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PZGEMR2D #define fortran_mr2d PZGEMR2DO #define zcopy_ ZCOPY #define zlacpy_ ZLACPY #else #define fortran_mr2d pzgemr2do #define fortran_mr2dnew pzgemr2d #define zcopy_ zcopy #define zlacpy_ zlacpy #endif #define Clacpy Czgelacpy void Clacpy(); typedef struct { double r, i; } dcomplex; typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Czgesd2d(); extern void Czgerv2d(); /* lapack */ void zlacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 zgescanD0 #define dispmat zgedispmat #define setmemory zgesetmemory #define freememory zgefreememory #define scan_intervals zgescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpzgemr2do(); extern void Cpzgemr2d(); /* some defines for Cpzgemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include #define DESCLEN 9 void fortran_mr2d(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; dcomplex *A, *B; { Cpzgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B); return; } void fortran_mr2dnew(m, n, A, ia, ja, desc_A, B, ib, jb, desc_B, gcontext) int *ia, *ib, *ja, *jb, *m, *n; int desc_A[DESCLEN], desc_B[DESCLEN]; dcomplex *A, *B; int *gcontext; { Cpzgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, B, *ib, *jb, (MDESC *) desc_B, *gcontext); return; } static2 void init_chenille(); static2 int inter_len(); static2 int block2buff(); static2 void buff2block(); static2 void gridreshape(); void Cpzgemr2do(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb) dcomplex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n; { int dummy, nprocs; int gcontext; /* first we initialize a global grid which serve as a reference to * communicate from grid a to grid b */ Cblacs_pinfo(&dummy, &nprocs); Cblacs_get(0, 0, &gcontext); Cblacs_gridinit(&gcontext, "R", 1, nprocs); Cpzgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, gcontext); Cblacs_gridexit(gcontext); } #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis * idem B puis ia,ja puis ib,jb */ #define MAGIC_MAX 100000000 void Cpzgemr2d(m, n, ptrmyblock, ia, ja, ma, ptrmynewblock, ib, jb, mb, globcontext) dcomplex *ptrmyblock, *ptrmynewblock; /* pointers to the memory location of the matrix and the redistributed matrix */ MDESC *ma; MDESC *mb; int ia, ja, ib, jb, m, n, globcontext; { dcomplex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; dcomplex *recvptr; MDESC newa, newb; int *proc0, *proc1, *param; int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; int i, j; int nprow, npcol, gcontext; int recvsize, sendsize; IDESC *h_inter; /* to store the horizontal intersections */ IDESC *v_inter; /* to store the vertical intersections */ int hinter_nb, vinter_nb; /* number of intrsections in both directions */ int dummy; int p0, q0, p1, q1; int *ra, *ca; /* end of variables */ /* To simplify further calcul we change the matrix indexation from * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ if (m == 0 || n == 0) return; ia -= 1; ja -= 1; ib -= 1; jb -= 1; Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); gcontext = globcontext; nprocs = nprow * npcol; /* if the global context that is given to us has not the shape of a line * (nprow != 1), create a new context. TODO: to be optimal, we should * avoid this because it is an uncessary synchronisation */ if (nprow != 1) { gridreshape(&gcontext); Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); } Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow0 >= p0 || mypcol0 >= q0) myprow0 = mypcol0 = -1; assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); if (myprow1 >= p1 || mypcol1 >= q1) myprow1 = mypcol1 = -1; assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); /* exchange the missing parameters among the processors: shape of grids and * location of the processors */ param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); ra = param + nprocs * 2 + NBPARAM; ca = param + (nprocs * 2 + NBPARAM) * 2; for (i = 0; i < nprocs * 2 + NBPARAM; i++) param[i] = MAGIC_MAX; proc0 = param + NBPARAM; proc1 = param + NBPARAM + nprocs; /* we calulate proc0 and proc1 that will give the number of a proc in * respectively a or b in the global context */ if (myprow0 >= 0) { proc0[myprow0 * q0 + mypcol0] = mypnum; param[0] = p0; param[1] = q0; param[4] = ma->m; param[5] = ma->n; param[6] = ma->nbrow; param[7] = ma->nbcol; param[8] = ma->sprow; param[9] = ma->spcol; param[10] = ia; param[11] = ja; } if (myprow1 >= 0) { proc1[myprow1 * q1 + mypcol1] = mypnum; param[2] = p1; param[3] = q1; param[12] = mb->m; param[13] = mb->n; param[14] = mb->nbrow; param[15] = mb->nbcol; param[16] = mb->sprow; param[17] = mb->spcol; param[18] = ib; param[19] = jb; } Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, ra, ca, 2 * nprocs + NBPARAM, -1, -1); newa = *ma; newb = *mb; ma = &newa; mb = &newb; if (myprow0 == -1) { p0 = param[0]; q0 = param[1]; ma->m = param[4]; ma->n = param[5]; ma->nbrow = param[6]; ma->nbcol = param[7]; ma->sprow = param[8]; ma->spcol = param[9]; ia = param[10]; ja = param[11]; } if (myprow1 == -1) { p1 = param[2]; q1 = param[3]; mb->m = param[12]; mb->n = param[13]; mb->nbrow = param[14]; mb->nbcol = param[15]; mb->sprow = param[16]; mb->spcol = param[17]; ib = param[18]; jb = param[19]; } for (i = 0; i < NBPARAM; i++) { if (param[i] == MAGIC_MAX) { fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); exit(1); } } #ifndef NDEBUG for (i = 0; i < p0 * q0; i++) assert(proc0[i] >= 0 && proc0[i] < nprocs); for (i = 0; i < p1 * q1; i++) assert(proc1[i] >= 0 && proc1[i] < nprocs); #endif /* check the validity of the parameters */ paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ { int decal; ia = changeorigin(myprow0, ma->sprow, p0, ma->nbrow, ia, &decal, &ma->sprow); ptrmyblock += decal; ja = changeorigin(mypcol0, ma->spcol, q0, ma->nbcol, ja, &decal, &ma->spcol); ptrmyblock += decal * ma->lda; ma->m = ia + m; ma->n = ja + n; ib = changeorigin(myprow1, mb->sprow, p1, mb->nbrow, ib, &decal, &mb->sprow); ptrmynewblock += decal; jb = changeorigin(mypcol1, mb->spcol, q1, mb->nbcol, jb, &decal, &mb->spcol); ptrmynewblock += decal * mb->lda; mb->m = ib + m; mb->n = jb + n; if (p0 == 1) ma->nbrow = ma->m; if (q0 == 1) ma->nbcol = ma->n; if (p1 == 1) mb->nbrow = mb->m; if (q1 == 1) mb->nbcol = mb->n; #ifndef NDEBUG paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); #endif } /* We compute the size of the memory buffer ( we choose the worst case, * when the buffer sizes == the memory block sizes). */ if (myprow0 >= 0 && mypcol0 >= 0) { /* Initialize pointer variables */ setmemory(&ptrsendbuff, memoryblocksize(ma)); }; /* if (mypnum < p0 * q0) */ if (myprow1 >= 0 && mypcol1 >= 0) { /* Initialize pointer variables */ setmemory(&ptrrecvbuff, memoryblocksize(mb)); }; /* if (mypnum < p1 * q1) */ /* allocing room for the tabs, alloc for the worst case,local_n or local_m * intervals, in fact the worst case should be less, perhaps half that,I * should think of that one day. */ h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * ma->nbcol * sizeof(IDESC)); v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) * ma->nbrow * sizeof(IDESC)); /* We go for the scanning of indices. For each processor including mypnum, * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send * it. Then for each processor, we compute the size of message to be * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements * of recvbuff the right place (scanD)(RECVBUFF)) */ recvptr = ptrrecvbuff; { int tot, myrang, step, sens; int *sender, *recver; int mesending, merecving; tot = max(p0 * q0, p1 * q1); init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, &sender, &recver, &myrang); if (myrang == -1) goto after_comm; mesending = myprow0 >= 0; assert(sender[myrang] >= 0 || !mesending); assert(!mesending || proc0[sender[myrang]] == mypnum); merecving = myprow1 >= 0; assert(recver[myrang] >= 0 || !merecving); assert(!merecving || proc1[recver[myrang]] == mypnum); step = tot - 1 - myrang; do { for (sens = 0; sens < 2; sens++) { /* be careful here, when we communicating with ourselves, we must * send first (myrang > step == 0) */ if (mesending && recver[step] >= 0 && (sens == 0)) { i = recver[step] / q1; j = recver[step] % q1; vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, v_inter); hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, h_inter); sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, ptrmyblock, ma, ptrsendbuff); } /* if (mesending...) { */ if (mesending && recver[step] >= 0 && (sens == myrang > step)) { i = recver[step] / q1; j = recver[step] % q1; if (sendsize > 0 && (step != myrang || !merecving) ) { Czgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 0, proc1[i * q1 + j]); } /* sendsize > 0 */ } /* if (mesending ... */ if (merecving && sender[step] >= 0 && (sens == myrang <= step)) { i = sender[step] / q0; j = sender[step] % q0; vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, v_inter); hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, h_inter); recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); if (recvsize > 0) { if (step == myrang && mesending) { Clacpy(recvsize, 1, ptrsendbuff, recvsize, ptrrecvbuff, recvsize); } else { Czgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 0, proc0[i * q0 + j]); } } /* recvsize > 0 */ } /* if (merecving ...) */ if (merecving && sender[step] >= 0 && sens == 1) { buff2block(v_inter, vinter_nb, h_inter, hinter_nb, recvptr, ptrmynewblock, mb); } /* if (merecving...) */ } /* for (sens = 0) */ step -= 1; if (step < 0) step = tot - 1; } while (step != tot - 1 - myrang); after_comm: free(sender); } /* { int tot,nr,ns ...} */ /* don't forget to clean up things! */ if (myprow1 >= 0 && mypcol1 >= 0) { freememory((char *) ptrrecvbuff); }; if (myprow0 >= 0 && mypcol0 >= 0) { freememory((char *) ptrsendbuff); }; if (nprow != 1) Cblacs_gridexit(gcontext); free(v_inter); free(h_inter); free(param); }/* distrib */ static2 void init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) int nprocs, mypnum, n0, n1; int *proc0, *proc1, **psend, **precv, *myrang; { int ns, nr, i, tot; int *sender, *recver, *g0, *g1; tot = max(n0, n1); sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); recver = sender + tot; *psend = sender; *precv = recver; g0 = recver + tot; g1 = g0 + nprocs; for (i = 0; i < nprocs; i++) { g0[i] = -1; g1[i] = -1; } for (i = 0; i < tot; i++) { sender[i] = -1; recver[i] = -1; } for (i = 0; i < n0; i++) g0[proc0[i]] = i; for (i = 0; i < n1; i++) g1[proc1[i]] = i; ns = 0; nr = 0; *myrang = -1; for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] >= 0) { if (i == mypnum) *myrang = nr; sender[ns] = g0[i]; ns += 1; recver[nr] = g1[i]; nr += 1; assert(ns <= n0 && nr <= n1 && nr == ns); } for (i = 0; i < nprocs; i++) if (g0[i] >= 0 && g1[i] < 0) { if (i == mypnum) *myrang = ns; sender[ns] = g0[i]; ns += 1; assert(ns <= n0); } for (i = 0; i < nprocs; i++) if (g1[i] >= 0 && g0[i] < 0) { if (i == mypnum) *myrang = nr; recver[nr] = g1[i]; nr += 1; assert(nr <= n1); } } #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ { \ dcomplex *_a,*_b; \ int _m,_n,_lda,_ldb; \ int _i,_j; \ _m = (mo);_n = (no); \ _a = (ao);_b = (bo); \ _lda = (ldao) - _m; \ _ldb = (ldbo) - _m; \ assert(_lda >= 0 && _ldb >= 0); \ for (_j=0;_j<_n;_j++) { \ for (_i=0;_i<_m;_i++) \ *_b++ = *_a++; \ _b += _ldb; \ _a += _lda; \ } \ } static2 int block2buff(vi, vinb, hi, hinb, ptra, ma, buff) int hinb, vinb; IDESC *hi, *vi; MDESC *ma; dcomplex *buff, *ptra; { int h, v, sizebuff; dcomplex *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptra + hi[h].lstart * ma->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, ptr2 + vi[v].lstart, ma->lda, buff + sizebuff, vi[v].len); sizebuff += hi[h].len * vi[v].len; } } return sizebuff; } static2 void buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) int hinb, vinb; IDESC *hi, *vi; MDESC *mb; dcomplex *buff, *ptrb; { int h, v, sizebuff; dcomplex *ptr2; sizebuff = 0; for (h = 0; h < hinb; h++) { ptr2 = ptrb + hi[h].lstart * mb->lda; for (v = 0; v < vinb; v++) { Mlacpy(vi[v].len, hi[h].len, buff + sizebuff, vi[v].len, ptr2 + vi[v].lstart, mb->lda); sizebuff += hi[h].len * vi[v].len; } } } static2 int inter_len(hinb, hi, vinb, vi) int hinb, vinb; IDESC *hi, *vi; { int hlen, vlen, h, v; hlen = 0; for (h = 0; h < hinb; h++) hlen += hi[h].len; vlen = 0; for (v = 0; v < vinb; v++) vlen += vi[v].len; return hlen * vlen; } void Clacpy(m, n, a, lda, b, ldb) dcomplex *a, *b; int m, n, lda, ldb; { int i, j; lda -= m; ldb -= m; assert(lda >= 0 && ldb >= 0); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) *b++ = *a++; b += ldb; a += lda; } } static2 void gridreshape(ctxtp) int *ctxtp; { int ori, final; /* original context, and new context created, with * line form */ int nprow, npcol, myrow, mycol; int *usermap; int i, j; ori = *ctxtp; Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); usermap = mr2d_malloc(sizeof(int) * nprow * npcol); for (i = 0; i < nprow; i++) for (j = 0; j < npcol; j++) { usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); } /* Cblacs_get(0, 0, &final); */ Cblacs_get(ori, 10, &final); Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); *ctxtp = final; free(usermap); } scalapack-1.8.0/LOG-SUMMARY0000600000175000017500000005246610605264707015035 0ustar muammarmuammarlogs from the modifications from scalapack-1.7.0 to scalapack-1.8.0 ******************************************************************************** From Rev:1 to Rev:16 A EXAMPLE A EXAMPLE/CSCAEXMAT.dat A EXAMPLE/CSCAEXRHS.dat A EXAMPLE/DSCAEXMAT.dat A EXAMPLE/DSCAEXRHS.dat A EXAMPLE/Makefile A EXAMPLE/SCAEX.dat A EXAMPLE/SSCAEXMAT.dat A EXAMPLE/SSCAEXRHS.dat A EXAMPLE/ZSCAEXMAT.dat A EXAMPLE/ZSCAEXRHS.dat A EXAMPLE/pcscaex.f A EXAMPLE/pdscaex.f A EXAMPLE/pdscaexinfo.f A EXAMPLE/psscaex.f A EXAMPLE/pzscaex.f M Makefile M README M PBLAS/SRC/PBtools.h M PBLAS/SRC/pblas.h M PBLAS/SRC/pcscal_.c M PBLAS/SRC/pdscal_.c M PBLAS/SRC/psscal_.c M PBLAS/SRC/pzscal_.c D SLmake.inc A SLmake.inc.example (from SLmake.inc:16) M SRC/Makefile M SRC/dlasorte.f M SRC/pslared1d.f M SRC/pdlared1d.f M SRC/pslared2d.f M SRC/pdlared2d.f M SRC/pcgesv.f M SRC/psgesv.f M SRC/pdgesv.f M SRC/pzgesv.f M SRC/pslahqr.f M SRC/pslahrd.f M SRC/pslasmsub.f M SRC/psstein.f M SRC/pclahqr.f M SRC/pdlahqr.f M SRC/pdlahrd.f M SRC/pdlasmsub.f M SRC/pdstein.f M SRC/pzlahqr.f M SRC/pzlahrd.f M SRC/pzlasmsub.f M SRC/pzrot.c M SRC/pzstein.f M SRC/pztrevc.f M SRC/slasorte.f M SRC/pclahrd.f M SRC/pclasmsub.f M SRC/pcrot.c M SRC/pcstein.f M SRC/pctrevc.f M SRC/pzgesvd.f M SRC/pcgesvd.f M SRC/psgesvd.f M SRC/pdgesvd.f M SRC/psdbtrf.f M SRC/pcheevd.f M SRC/pzheevd.f M SRC/pcheevx.f M SRC/pzheevx.f M SRC/pchegvx.f M SRC/pzhegvx.f M SRC/pdsygvx.f M SRC/pssyevx.f M SRC/pssygvx.f M SRC/pdsyevx.f M SRC/pcgetri.f M SRC/pdgetri.f M SRC/psgetri.f M SRC/pzgetri.f M TESTING/EIG/pcevcdriver.f M TESTING/EIG/pcgehdrv.f M TESTING/EIG/pcgsepreq.f M TESTING/EIG/pcseptst.f M TESTING/EIG/pdgehdrv.f M TESTING/EIG/pdgsepreq.f M TESTING/EIG/psgehdrv.f M TESTING/EIG/psgsepreq.f M TESTING/EIG/pzevcdriver.f M TESTING/EIG/pzgehdrv.f M TESTING/EIG/pzgsepreq.f M TESTING/EIG/pzseptst.f M TESTING/LIN/pcinvdriver.f M TESTING/LIN/pdinvdriver.f M TESTING/LIN/psinvdriver.f M TESTING/LIN/pzinvdriver.f M TOOLS/LAPACK/Makefile D TOOLS/LAPACK/cbdsqr.f D TOOLS/LAPACK/cgbtf2.f D TOOLS/LAPACK/cgbtrf.f D TOOLS/LAPACK/cgetf2.f D TOOLS/LAPACK/cgetrf.f D TOOLS/LAPACK/cgetrs.f D TOOLS/LAPACK/chetd2.f D TOOLS/LAPACK/chetrd.f D TOOLS/LAPACK/clacgv.f D TOOLS/LAPACK/clacpy.f D TOOLS/LAPACK/cladiv.f D TOOLS/LAPACK/claev2.f M TOOLS/LAPACK/clagge.f M TOOLS/LAPACK/claghe.f M TOOLS/LAPACK/clagsy.f D TOOLS/LAPACK/clanhs.f D TOOLS/LAPACK/clarf.f D TOOLS/LAPACK/clarfb.f D TOOLS/LAPACK/clarfg.f D TOOLS/LAPACK/clarft.f M TOOLS/LAPACK/clarnd.f M TOOLS/LAPACK/clarot.f D TOOLS/LAPACK/clartg.f D TOOLS/LAPACK/clascl.f D TOOLS/LAPACK/claset.f D TOOLS/LAPACK/clasr.f D TOOLS/LAPACK/classq.f D TOOLS/LAPACK/claswp.f M TOOLS/LAPACK/clatm1.f M TOOLS/LAPACK/clatms.f D TOOLS/LAPACK/clatrd.f D TOOLS/LAPACK/cpbtf2.f D TOOLS/LAPACK/cpbtrf.f D TOOLS/LAPACK/cpotf2.f D TOOLS/LAPACK/cpotrf.f D TOOLS/LAPACK/cpttrf.f D TOOLS/LAPACK/crot.f D TOOLS/LAPACK/cstedc.f D TOOLS/LAPACK/cstein.f D TOOLS/LAPACK/csymv.f D TOOLS/LAPACK/ctbtrs.f D TOOLS/LAPACK/ctrtrs.f D TOOLS/LAPACK/cunm2l.f D TOOLS/LAPACK/cunm2r.f D TOOLS/LAPACK/cunmql.f D TOOLS/LAPACK/cunmqr.f D TOOLS/LAPACK/cunmtr.f D TOOLS/LAPACK/dbdsqr.f D TOOLS/LAPACK/dgbtf2.f D TOOLS/LAPACK/dgbtrf.f D TOOLS/LAPACK/dgetf2.f D TOOLS/LAPACK/dgetrf.f D TOOLS/LAPACK/dgetrs.f D TOOLS/LAPACK/dlabad.f D TOOLS/LAPACK/dlacpy.f D TOOLS/LAPACK/dladiv.f D TOOLS/LAPACK/dlae2.f D TOOLS/LAPACK/dlaed0.f D TOOLS/LAPACK/dlaed1.f D TOOLS/LAPACK/dlaed2.f D TOOLS/LAPACK/dlaed3.f D TOOLS/LAPACK/dlaed4.f D TOOLS/LAPACK/dlaed5.f D TOOLS/LAPACK/dlaed6.f D TOOLS/LAPACK/dlaed7.f D TOOLS/LAPACK/dlaed8.f D TOOLS/LAPACK/dlaed9.f D TOOLS/LAPACK/dlaeda.f D TOOLS/LAPACK/dlaev2.f M TOOLS/LAPACK/dlagge.f M TOOLS/LAPACK/dlagsy.f D TOOLS/LAPACK/dlagtf.f D TOOLS/LAPACK/dlagts.f D TOOLS/LAPACK/dlahqr.f D TOOLS/LAPACK/dlamch.f D TOOLS/LAPACK/dlamrg.f D TOOLS/LAPACK/dlange.f D TOOLS/LAPACK/dlanhs.f D TOOLS/LAPACK/dlanst.f D TOOLS/LAPACK/dlanv2.f D TOOLS/LAPACK/dlapy2.f D TOOLS/LAPACK/dlapy3.f M TOOLS/LAPACK/dlaran.f D TOOLS/LAPACK/dlarf.f D TOOLS/LAPACK/dlarfb.f D TOOLS/LAPACK/dlarfg.f D TOOLS/LAPACK/dlarft.f M TOOLS/LAPACK/dlarnd.f D TOOLS/LAPACK/dlarnv.f M TOOLS/LAPACK/dlarot.f D TOOLS/LAPACK/dlartg.f D TOOLS/LAPACK/dlaruv.f D TOOLS/LAPACK/dlas2.f D TOOLS/LAPACK/dlascl.f D TOOLS/LAPACK/dlaset.f D TOOLS/LAPACK/dlasq1.f D TOOLS/LAPACK/dlasq2.f D TOOLS/LAPACK/dlasq3.f D TOOLS/LAPACK/dlasq4.f D TOOLS/LAPACK/dlasq5.f D TOOLS/LAPACK/dlasq6.f D TOOLS/LAPACK/dlasr.f D TOOLS/LAPACK/dlasrt.f D TOOLS/LAPACK/dlassq.f D TOOLS/LAPACK/dlasv2.f D TOOLS/LAPACK/dlaswp.f M TOOLS/LAPACK/dlatm1.f M TOOLS/LAPACK/dlatms.f D TOOLS/LAPACK/dlatrd.f D TOOLS/LAPACK/dorm2l.f D TOOLS/LAPACK/dorm2r.f D TOOLS/LAPACK/dormql.f D TOOLS/LAPACK/dormqr.f D TOOLS/LAPACK/dormtr.f D TOOLS/LAPACK/dpbtf2.f D TOOLS/LAPACK/dpbtrf.f D TOOLS/LAPACK/dpotf2.f D TOOLS/LAPACK/dpotrf.f D TOOLS/LAPACK/dpttrf.f D TOOLS/LAPACK/dstedc.f D TOOLS/LAPACK/dstein.f D TOOLS/LAPACK/dsteqr.f D TOOLS/LAPACK/dsterf.f D TOOLS/LAPACK/dsytd2.f D TOOLS/LAPACK/dsytrd.f D TOOLS/LAPACK/dtbtrs.f D TOOLS/LAPACK/dtrtrs.f D TOOLS/LAPACK/dzsum1.f D TOOLS/LAPACK/ilaenv.f D TOOLS/LAPACK/lsame.f D TOOLS/LAPACK/lsamen.f D TOOLS/LAPACK/sbdsqr.f D TOOLS/LAPACK/scsum1.f D TOOLS/LAPACK/sgbtf2.f D TOOLS/LAPACK/sgbtrf.f D TOOLS/LAPACK/sgetf2.f D TOOLS/LAPACK/sgetrf.f D TOOLS/LAPACK/sgetrs.f D TOOLS/LAPACK/slabad.f D TOOLS/LAPACK/slacpy.f D TOOLS/LAPACK/sladiv.f D TOOLS/LAPACK/slae2.f D TOOLS/LAPACK/slaed0.f D TOOLS/LAPACK/slaed1.f D TOOLS/LAPACK/slaed2.f D TOOLS/LAPACK/slaed3.f D TOOLS/LAPACK/slaed4.f D TOOLS/LAPACK/slaed5.f D TOOLS/LAPACK/slaed6.f D TOOLS/LAPACK/slaed7.f D TOOLS/LAPACK/slaed8.f D TOOLS/LAPACK/slaed9.f D TOOLS/LAPACK/slaeda.f D TOOLS/LAPACK/slaev2.f M TOOLS/LAPACK/slagge.f M TOOLS/LAPACK/slagsy.f D TOOLS/LAPACK/slagtf.f D TOOLS/LAPACK/slagts.f D TOOLS/LAPACK/slahqr.f D TOOLS/LAPACK/slamch.f D TOOLS/LAPACK/slamrg.f D TOOLS/LAPACK/slange.f D TOOLS/LAPACK/slanhs.f D TOOLS/LAPACK/slanst.f D TOOLS/LAPACK/slanv2.f D TOOLS/LAPACK/slapy2.f D TOOLS/LAPACK/slapy3.f M TOOLS/LAPACK/slaran.f D TOOLS/LAPACK/slarf.f D TOOLS/LAPACK/slarfb.f D TOOLS/LAPACK/slarfg.f D TOOLS/LAPACK/slarft.f M TOOLS/LAPACK/slarnd.f D TOOLS/LAPACK/slarnv.f M TOOLS/LAPACK/slarot.f D TOOLS/LAPACK/slartg.f D TOOLS/LAPACK/slaruv.f D TOOLS/LAPACK/slas2.f D TOOLS/LAPACK/slascl.f D TOOLS/LAPACK/slaset.f D TOOLS/LAPACK/slasq1.f D TOOLS/LAPACK/slasq2.f D TOOLS/LAPACK/slasq3.f D TOOLS/LAPACK/slasq4.f D TOOLS/LAPACK/slasq5.f D TOOLS/LAPACK/slasq6.f D TOOLS/LAPACK/slasr.f D TOOLS/LAPACK/slasrt.f D TOOLS/LAPACK/slassq.f D TOOLS/LAPACK/slasv2.f D TOOLS/LAPACK/slaswp.f M TOOLS/LAPACK/slatm1.f M TOOLS/LAPACK/slatms.f D TOOLS/LAPACK/slatrd.f D TOOLS/LAPACK/sorm2l.f D TOOLS/LAPACK/sorm2r.f D TOOLS/LAPACK/sormql.f D TOOLS/LAPACK/sormqr.f D TOOLS/LAPACK/sormtr.f D TOOLS/LAPACK/spbtf2.f D TOOLS/LAPACK/spbtrf.f D TOOLS/LAPACK/spotf2.f D TOOLS/LAPACK/spotrf.f D TOOLS/LAPACK/spttrf.f D TOOLS/LAPACK/sstedc.f D TOOLS/LAPACK/sstein.f D TOOLS/LAPACK/ssteqr.f D TOOLS/LAPACK/ssterf.f D TOOLS/LAPACK/ssytd2.f D TOOLS/LAPACK/ssytrd.f D TOOLS/LAPACK/stbtrs.f D TOOLS/LAPACK/strtrs.f D TOOLS/LAPACK/xerbla.f D TOOLS/LAPACK/zbdsqr.f D TOOLS/LAPACK/zgbtf2.f D TOOLS/LAPACK/zgbtrf.f D TOOLS/LAPACK/zgetf2.f D TOOLS/LAPACK/zgetrf.f D TOOLS/LAPACK/zgetrs.f D TOOLS/LAPACK/zhetd2.f D TOOLS/LAPACK/zhetrd.f D TOOLS/LAPACK/zlacgv.f D TOOLS/LAPACK/zlacpy.f D TOOLS/LAPACK/zladiv.f D TOOLS/LAPACK/zlaev2.f M TOOLS/LAPACK/zlagge.f M TOOLS/LAPACK/zlaghe.f M TOOLS/LAPACK/zlagsy.f D TOOLS/LAPACK/zlanhs.f D TOOLS/LAPACK/zlarf.f D TOOLS/LAPACK/zlarfb.f D TOOLS/LAPACK/zlarfg.f D TOOLS/LAPACK/zlarft.f M TOOLS/LAPACK/zlarnd.f M TOOLS/LAPACK/zlarot.f D TOOLS/LAPACK/zlartg.f D TOOLS/LAPACK/zlascl.f D TOOLS/LAPACK/zlaset.f D TOOLS/LAPACK/zlasr.f D TOOLS/LAPACK/zlassq.f D TOOLS/LAPACK/zlaswp.f M TOOLS/LAPACK/zlatm1.f M TOOLS/LAPACK/zlatms.f D TOOLS/LAPACK/zlatrd.f D TOOLS/LAPACK/zpbtf2.f D TOOLS/LAPACK/zpbtrf.f D TOOLS/LAPACK/zpotf2.f D TOOLS/LAPACK/zpotrf.f D TOOLS/LAPACK/zpttrf.f D TOOLS/LAPACK/zrot.f D TOOLS/LAPACK/zstedc.f D TOOLS/LAPACK/zstein.f D TOOLS/LAPACK/zsymv.f D TOOLS/LAPACK/ztbtrs.f D TOOLS/LAPACK/ztrtrs.f D TOOLS/LAPACK/zunm2l.f D TOOLS/LAPACK/zunm2r.f D TOOLS/LAPACK/zunmql.f D TOOLS/LAPACK/zunmqr.f D TOOLS/LAPACK/zunmtr.f M TOOLS/Makefile A TOOLS/pclaread.f A TOOLS/pclawrite.f A TOOLS/pdlaread.f A TOOLS/pdlawrite.f A TOOLS/pslaread.f A TOOLS/pslawrite.f A TOOLS/pzlaread.f A TOOLS/pzlawrite.f ******************************************************************************** ******************************************************************************** r17 - Add p[sdcz]read and p[sdcz]write routines in TOOLS. The pdlaread and pdlawrite routine were contained in the scalapack example. I generates the 3 other precisions. - Add the example in the four precisions in the EXAMPLE directory. - Modify the Makefile to add a "make example" and a "make cleanexample" - Change the SLmake.inc to SLmake.inc.example - Update the README file ******************************************************************************** r16 Remove LAPACK from ScaLAPACK. ============================= Some LAPACK routines from TESTING/MATGEN still remains inside. I updated them to the latest LAPACK release.(ie 3.1.1) Add the LAPACKLIB variable in the SLmake.inc Modify the Makefile in TOOLS/LAPACK. ******************************************************************************** r15 Add crot and zrot in SRC/pblas.h ******************************************************************************** r14 Patches provided by Ake Sandgren and Robert Granat --------------------------------------------------- The set of patches for scalapack 1.7.4 does two things. 1 - reduce the usage of uninitialized variables 2 - fix a couple of incorrect calls to blacs (bad LDA) The gehdrv patch is just the complete patch related to https://icl.cs.utk.edu/lapack-forum/viewtopic.php?p=1153#1153 pxsepinfo doesnt initialize THRESH when INFO != 0. I'm not sure if the pxlahrd patch is the best. Maybe something should be done in pxlarfg instead since alpha isn't set in all cases there, like myrow != ixrow for row distribution and likewise for column distribution. pxlasmsub destroys irow1/icol1 in the "find some norm of the local H" part. pxrot used incorrect LDA values for buff in several places, not sure if the intention was to have buff Mx1 or 1xM but it shouldn't really matter should it? PBLAS/pxscal must not test ALPHA unless it is really going to be used since scalapack routines sometimes call pxscal with ALPHA uninitialized when myrow != Xrow/mycol != Xcol. pxstein must initialize ONENRM since it isn't always initialized in the "IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN" case before being used in the "IF( TMPFAC.GT.ODM18 ) THEN" case. Maybe setting to ZERO is wrong but its not worse then the original code. pxtrevc and pxevcdriver are just incorrect LDA param to blacs routines. All these was found with pathscale compiler with -trapuv -O0 -g which initialized everything to NaN and turns FPE traps on. -------------------------------------------------------------------------- The lahqr patch and a fix to lasorte needed by lahqr which used to get IERR != 0 back from lasorte. The T2 = T1*V2 and T3 = T1*V3 moves are needed due to uninitialized data. The 2 changed IF-statements where brought about to make getting and sending SMALLA consistent. The ISTOP change at the bottom is a copy of the corresponding statement at the top of the loop. The init of VCOPY and SMALLA are neccesary. lasorte couldn't handle a situation where the top S(1,1) eigenvalue was real. This set of patches have been tested as can be seen on https://icl.cs.utk.edu/lapack-forum/viewtopic.php?p=1196#1196 so i'm fairly certain that they work correctly. ******************************************************************************** r13 Last Modification: julie - Tue, 16 Jan 2007 Add 2 missing routines in TOOLS/LAPACK : cbdsqr.f and zbdsqr.f needed by the complex version of the SVD driver ******************************************************************************** r12 Last Modification: langou - Wed, 10 May 2006 Following up on r11. In r11, we have increased the size of the integer workspace in the rectangular case. We now report the new integer block size calculation in the tester. So that the LIWORK given by the tester to the PxGETRI is big enough ... ******************************************************************************** r11 Last Modification: langou - Wed, 10 May 2006 ------------------------------------------------------------- Correct the integer workspace (IWORK) calculation in PxGETRI. ------------------------------------------------------------- Bug report send by Desheng Wang from Caltech on scalapack@cs.utk.edu, Mon, 1 May 2006. Fix: Replace the line 221-222: LIWMIN = NQ + MAX( ICEIL( ICEIL( MP, DESCA( MB_ ) ), $ LCM / NPROW ), DESCA( NB_ ) ) By: LIWMIN = NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW $ + MOD ( IA - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) Yep, slightly more complex... The error in the first computation is that it misinterpret the statement in PxLAPIV: The formula for the integer worskpace calculation in PxLAPIV is LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) where M_P is the local size of the IPIV. But the IPIV is slighlty bigger than A, the global size of IPIV is: MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW (and not DESCA(M_)). The other quantities are given by M_P is the global length of the pivot vector MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW I_P is IA I_P = IA MB_P is the block size use for the block cyclic distribution of the pivot vector MB_P = DESCA (MB_ ) LOCc ( . ) NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) LOCr ( . ) NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) CEIL ( X / Y ) ICEIL( X, Y ) LCM LCM = ILCM( NPROW, NPCOL ) and this gives the new formula to compute the integer workspace. ******************************************************************************** Rev: 10 Last modification: langou - Wed, 22 Mar 2006 Bug report from Yasuhiro Nakahara (Canon inc.) on 03/13/2006: Description: pzlahqr routine was aborted due to a segmentation fault. I found an invalid memory access at the line 525 in pzlahqr.f. In the DO-loop, with II=1, S1(1, 0) was accessed. Patch from Greg Henry (Intel) and Mark Fahey (ORNL) Greg said: > There is an easy fix for this- the idea of exceptional shifts is > to just try something outside the norm based on the size of the diagonal > elements. The offending part can be removed from the code without a > loss of generality. I think I may be able to come with an alternate > solution. move from --------------------------------------------------------------- * * Exceptional shift. * DO 20 II = 2*JBLK, 1, -1 S1( II, II ) = CONST*( CABS1( S1( II, II ) )+ $ CABS1( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE --------------------------------------------------------------- (with problem when II=1 ...) to --------------------------------------------------------------- * * Exceptional shift. * DO 20 II = 2*JBLK, 2, -1 S1( II, II ) = CONST*( CABS1( S1( II, II ) )+ $ CABS1( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE S1( 1, 1 ) = CONST*CABS1( S1( 1, 1 ) ) --------------------------------------------------------------- Note that this part of the code is not exercized by the testing. (So the bug was hard to find.) ******************************************************************************** Rev: 9 Last modification: julie - Thu, 23 Feb 2006 Correct typo in the [S,D,C,Z]gesvd files for the delaclaration of WP[S,D,C,Z]ORMBRQLN ******************************************************************************** Rev: 8 Last modification: julie - Wed, 22 Feb 2006 Modify typo in comment + description of workspace. When RANGE='V', work need to be of dimension 3 ******************************************************************************** Rev: 7 Last modification: julie - Mon, 20 Feb 2006 Correction of a Typo mistake in the work comment. ******************************************************************************** Rev: 6 Last modification: julie - Wed, 01 Feb 2006 Modify the makefile to add the two new driver: pcgesvd.f and pzgesvd.f ******************************************************************************** Rev: 5 Last modification: Rev 5 - langou - 2006-01-31 05:13:22 Log message: M SRC/psgesvd.f M SRC/pcgesvd.f M SRC/pzgesvd.f M SRC/pdgesvd.f modify the workspace size of xBDSQR to follow the revision 184 of LAPACK the workspace size of xBDSQR has moved from * WDBDSQR = MAX(1, 4*SIZE ) to * WDBDSQR = MAX(1, 2*SIZE + (2*SIZE - 4)*MAX(WANTU, WANTVT)) and is now back to * WDBDSQR = MAX(1, 4*SIZE ) so SVD of ScaLAPACK is following (at least let us take the max of both until LAPACK is fixed on its workspace size) ******************************************************************************** Rev: 4 Last modification: Rev 4 - langou - 2006-01-31 04:52:48 Log message: M SRC/pslahrd.f M SRC/pdlahqr.f (forgot to change the date in the header in the last revision, corrected) M SRC/psgesvd.f M SRC/pdgesvd.f [Julien/Osni] correct a bug in the workspace utilisation of p_gesvd. In the case jobU='V' and jobVT='V', the routine has good pointers, otherwise the pointers in the workspace where shifted as if matrices U and VT existed which implied out of bound reference for the value stored at the end of the workspace. There was also a few problems at the end of the code with some sizes in the case of rectangular matrices. A SRC/pcgesvd.f A SRC/pzgesvd.f add the complex version of the SVD driver contributed code by Peng Du (Graduate Research Assistant at UTK, Fall 2005) supervised by Julien ******************************************************************************** Rev: 3 Last modification: Rev 3 - langou - 2006-01-30 17:35:23 Log message: SRC/p[s,d,c,z]gesv.f [Documentation correction] correction in the description of the parameter NRHS (it's the number of columns of B not A) SRC/p[s,d]lared1d.f SRC/p[s,d]lared2d.f [Documentation correction] [Julien] The comments in the routines p[s,d]lared2d (where the initial vectors are stored by row) were wrong (basically replace BYCOL by BYROW) Some homogeneization among the 4 routines as well SRC/p[s/d]lahrd.f Although the Schur form returned by p[s/d]lahqr was correct (as tested by the testing routine), the returned eigenvalues were not computed correctely. This bug was reported by Interactive Supercompting (Thanks!). The bug was already found by Greg Henry in March 2002 but the patch has never been released. Here we go. ******************************************************************************** Rev: 2 ******************************************************************************** Rev: 1 SCALAPACK/PBLAS/SRC/PBtools.h 3/12/2002 Comment out CSYMM reference (line 57) SCALAPACK/PBLAS/SRC/pblas.h 3/15/2002 Added missing crot define SCALAPACK/SRC/psdbtrf.f 3/12/2002 Typo (DLACPY->SLACPY) in EXTERNAL declaration (line 374) SCALAPACK/SRC/pcheevd.f 3/25/2002 SCALAPACK/SRC/pzheevd.f Correction to LRWORK (lines 117, 248) and INFO=0 return SCALAPACK/TESTING/EIG/pcseptst.f 3/15/2002 SCALAPACK/TESTING/EIG/pzseptst.f Correction to LHEEVDSIZE calculation (line 1064) for more information, please visit: http://www.netlib.org/scalapack/errata.html#sourcecode ******************************************************************************** scalapack-1.8.0/Makefile0000640000175000017500000000526610604624224014767 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Top-level Makefile # # Creation date: March 20, 1995 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include SLmake.inc #PRECISIONS = single double complex complex16 FRC=FRC PRECISIONS = single double complex complex16 ############################################################################ # # The library can be set up to include routines for any combination of the # four PRECISIONS. First, modify the ARCH, ARCHFLAGS, RANLIB, F77, CC, # F77FLAGS, CCFLAGS, F77LOADER, CCLOADER, F77LOADFLAGS, CCLOADFLAGS and # CDEFS definitions in SLmake.inc to match your library archiver, compiler # and the options to be used. # # The command # make # without any arguments creates the library of precisions defined by the # environment variable PRECISIONS as well as the corresponding testing # executables, # make lib # creates only the library, # make exe # creates only the testing executables. # make example # creates only the example # # The name of the library is defined in the file called SLmake.inc and # is created at this directory level. # # To remove the object files after the library and testing executables # are created, enter # make clean # ############################################################################ all: lib #all: lib exe example lib: toolslib pblaslib redistlib scalapacklib exe: pblasexe redistexe scalapackexe clean: cleanlib cleanexe cleanexample pblaslib: ( cd $(PBLASdir)/SRC; $(MAKE) $(PRECISIONS) ) redistlib: ( cd $(REDISTdir)/SRC; $(MAKE) integer $(PRECISIONS) ) scalapacklib: ( cd $(SRCdir); $(MAKE) $(PRECISIONS) ) toolslib: ( cd $(TOOLSdir); $(MAKE) $(PRECISIONS) ) pblasexe: ( cd $(PBLASdir)/TESTING; $(MAKE) $(PRECISIONS) ) ( cd $(PBLASdir)/TIMING; $(MAKE) $(PRECISIONS) ) scalapackexe: ( cd $(TESTdir)/LIN; $(MAKE) $(PRECISIONS) ) ( cd $(TESTdir)/EIG; $(MAKE) $(PRECISIONS) ) redistexe: ( cd $(REDISTdir)/TESTING; $(MAKE) integer $(PRECISIONS) ) example: ( cd EXAMPLE; $(MAKE) $(PRECISIONS) ) cleanexe: ( cd $(PBLASdir)/TESTING; $(MAKE) clean ) ( cd $(PBLASdir)/TIMING; $(MAKE) clean ) ( cd $(TESTdir)/LIN; $(MAKE) clean ) ( cd $(TESTdir)/EIG; $(MAKE) clean ) ( cd $(REDISTdir)/TESTING; $(MAKE) clean ) cleanlib: ( cd $(PBLASdir)/SRC; $(MAKE) clean ) ( cd $(SRCdir); $(MAKE) clean ) ( cd $(TOOLSdir); $(MAKE) clean ) ( cd $(REDISTdir)/SRC; $(MAKE) clean ) cleanexample: ( cd EXAMPLE; $(MAKE) clean ) scalapack-1.8.0/SLmake.inc0000644000175000017500000001000310605620006015156 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(BASEDIR) # # The platform identifier to suffix to the end of library names # PLAT = LINUX # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = /usr/lib # # MPI setup; tailor to your system if using MPIBLACS # ifeq ($(MPI),lam) USEMPI = -DUsingMpiBlacs ifeq ($(BUILD),static) SMPLIB = -lmpi BLACSFINIT = /usr/lib/libblacsF77init-lam.a BLACSCINIT = /usr/lib/libblacsCinit-lam.a BLACSLIB = /usr/lib/libblacs-lam.a else SMPLIB = -lmpi BLACSFINIT = -lblacsF77init-lam BLACSCINIT = -lblacsCinit-lam BLACSLIB = -lblacs-lam endif TESTINGdir = $(home)/TESTING endif ifeq ($(MPI),mpich) USEMPI = -DUsingMpiBlacs ifeq ($(BUILD),static) SMPLIB = /usr/lib/mpich/lib/libmpich.a BLACSFINIT = /usr/lib/libblacsF77init-mpich.a BLACSCINIT = /usr/lib/libblacsCinit-mpich.a BLACSLIB = /usr/lib/libblacs-mpich.a else SMPLIB = /usr/lib/mpich/lib/libmpich.a BLACSFINIT = -lblacsF77init-mpich BLACSCINIT = -lblacsCinit-mpich BLACSLIB = -lblacs-mpich endif TESTINGdir = $(home)/TESTING endif ifeq ($(MPI),pvm) USEMPI = ifeq ($(BUILD),static) SMPLIB = /usr/lib/libpvm3.a BLACSFINIT = BLACSCINIT = BLACSLIB = /usr/lib/libblacs-pvm.a else SMPLIB = -lpvm3 BLACSFINIT = BLACSCINIT = BLACSLIB = -lblacs-pvm endif #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) TESTINGdir = $(home)/TESTING endif CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = g77 #F77 = /usr/local/pgi/linux86/bin/pgf77 CC = gcc NOOPT = -w -fno-globals -fno-f90 -fugly-complex $(FPIC) F77FLAGS = -Wall -O6 -funroll-all-loops -ffast-math $(NOOPT) CCFLAGS = -Wall $(FPIC) -O6 -funroll-all-loops -ffast-math SRCFLAG = F77LOADER = $(F77) ifeq ($(MPI),mpich) CCLOADER = $(F77) SYSLIBS = -lm else CCLOADER = $(CC) SYSLIBS = -lm endif F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -Df77IsF2C -DNO_IEEE $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/scalapack_$(MPI).a BLASLIB = -llapack-3 -lblas-3 # BLASLIB = /usr/local/lib/libf77blas.a /usr/local/lib/libatlas.a # ifeq ($(BUILD),static) PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) $(BLASLIB) $(SYSLIBS) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) else PBLIBS = -L $(BASEDIR) -lscalapack-$(MPI) $(FBLACSLIB) $(BLASLIB) $(SMPLIB) PRLIBS = -L $(BASEDIR) -lscalapack-$(MPI) $(CBLACSLIB) $(SMPLIB) $(BLASLIB) $(SYSLIBS) RLIBS = -L $(BASEDIR) -lscalapack-$(MPI) $(FBLACSLIB) $(CBLACSLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) endif scalapack-1.8.0/INSTALL/0000750000175000017500000000000010605235165014426 5ustar muammarmuammarscalapack-1.8.0/INSTALL/SLmake.PGON0000640000175000017500000000550710604316307016274 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = PGON # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; uncomment and tailor to your system if using MPIBLACS # Will need to comment out the default native BLACS setup below below # #USEMPI = -DUsingMpiBlacs #SMPLIB = @(MPIdir)/libmpi.a #BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # system primitive NX BLACS setup, comment out if using MPI # SMPLIB = USEMPI = BLACSFINIT = BLACSCINIT = BLACSLIB = $(HOME)/BLACS/LIB/blacs_NX-$(PLAT)-$(BLACSDBGLVL).a CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(PBLASdir)/TESTING TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(REDISTdir)/TESTING # # The fortran and C compilers, loaders, and their flags # F77 = if77 CC = icc NOOPT = -nx F77FLAGS = -O4 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = -nx CCLOADFLAGS = -nx # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar860 ARCHFLAGS = r RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lkmath LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/scalapack_install.pdf0000640000175000017500000074462610605235053020611 0ustar muammarmuammar%PDF-1.4 3 0 obj << /Length 2201 /Filter /FlateDecode >> stream xÚ­XIwÛ8¾ûWèH½!Àè>ÉvÒ“ØN<–Ó9ôô&a‰ItH*Žÿ}×P’#÷¸'>K¡Ö¯P_½~+³‘L„N25º¾É܈\eù(O•P‰LG×ÕÑùôrlT4«<:9O4|?ãJÓ~©×s\H£㉌š±Œz‡ YdõøÏë÷£‰ÊE"¥Md ¥‰å»u×Ëeôu³æ¿mêÊ1ûÛ¦åµYYœj  €ïë·fd…ÍT†z+ Bl,G©Pq&I„ d26¶é&Òæ"‹5h£c‘Ë8aÓ:šùïñ²(QÔ šTÌI¥£\Ø<&N¹&1ÙH ›C\Ô‰È5˜<Ù!{Å2¦^ÖÉÒ¡çŠd=ÈÓ|OþG‚ÞA‹¦>À?‘Bg:ßåŸ<—?ÇWa2 {R‹4±Š6ƒÔð=… Ëh^´mñH¯ó‚všÍ#Ç\Ž•‰\_÷®? ¶Ì!:KwõNÿ‘_ŠuŃ«à ÿý¼} Ë¡ˆX-³'8;5xWˆ÷ €3™òÔÝmüWÈÝ­ÑÆžÅ6·^†öî€hÓ»Ö#³¬Ýºtäq¥ÑòdßߟÖ5øF>Ãã§ukû2®ÇFƒtt³ùS`ÊEžAdö°tr}lÈõ/;æ¸"ÍW¤=”ñJJCH•I¦Ù ßß\ÍÀšw?°D¨¿Œ'I’DÓ»]»ä ¤Þ<Ç„I”*GûzOoº¾-ÊÞ›fS!Áô}˯ZØèÍ¢"7,¯­*Ð0ªű$-nÆtLãhÁ¹n€Íû† %Wnž&+÷Ì®ç¦í¸¸Ã‚@jxLh/:¦¤êG-F‚ÞD'¸ T× ¯0áºéØøx7]=_×ÿ‰e2°„»Õ¢6IšGTÍéZ_Ï/OèÛ/|.à¤u¾wž 5Å/¶!?ª[Ùp¶_ø3Äá»ï„ËkH‹®àN϶$œŽ™hÿÙ!ÛfCýÒöŠ%*Ž>„X)àTÕ+ZnÝ× †¯Å0©ÔRLqcQ YHpÚñZáx«U>°ZÖ7ÈÈ·,D¶†«˜å}ÑÕUz|¾õFÿÕ!;hÏÔ_™Œ..1xºCX»üý‚‚×ú|=áä>Üo„V䆕a9àV(f8Ø„[Íy΄xÜèÜ#RdRph»ž—…Î ›7è š¼IÐíZ»j(õ*Ô\N(Á>ÞlPR*½°±vèhQ2¥X'aAU›óÝcOài†¨>à6"ËBî!_#^nÀÞ=Ï[Ön“‡l$oÊ œ²u¿Ìé0ç[å÷C¼aóž½çxvK‚V¦ó4êŸAÀæ¯qZ¯B–Và 6m3o‹•לßb^«&>…)(f¥Ç6Õ(rT˜°ÃÙzfÕДß5Bx@ÒÑ›ë# ââ‘%™Àr¤,´ ¹ÎFåêèëÑÆ£ê(½?Š…¶Ð1ÞÃ$†¢º­Žd'›'aey4;ú÷ÀqXNvyã«XÅð’´yNËÔ õ¤L&R%÷^—ð®5Ð|@7NÄfL ÛP‰=BÓ=$¨ÆÃÌD…ßê6wwà)Üî1¯“,jZÓ£(oɹâÀà]IpkÀ G ØîÑñÒ×X{;6Px6ë*Ô!Xû­Ý–IϧP@êÆ3ŠåFúL«5Œ~Ì)ôà6übrè¿°J ÛζÐmº[·æ¨ò´ú6†ÚWåý°qÅ„+Ú¹/xõ²˜ýûX;Ïh=íÁ._miZr[K¯+¼ÔÊÞ+ƒŒ§ÓÁbœLl ߉D¡ëÊ¡KbP¬âœXsð ‡v^Á›ÐñÂx7vÌž¶«@d,XD,v-‚ÕpedŒo&Rmë·Gæ3ØøÎ ¾àð€„Ôø”¼Úÿ e7`N†ÅÑ'AeW0!?EWÛ7¨1û7Œ=žvþ€Ø!.’<ÝzYáŸóhñwNÙrËé›ÉU=‰Ó‰I>^) ýŠ·§î(;` ÆÞ ¸¸‡mÜ™•Á0~Ñ ÑclãÚðäÁÉ ^©iä8ëf ˜{±'n ‰'¼‘x5œ¤®-çújs óÖ¹•/=œc:QЫ îýNˆñÕÄ„ŸÍàõåß»—T"ÑäV.üß8õœ›çdÓ¶ÛÈ‚X¨5-§W÷ Ë¿x ØÖ%æG¹ÏÄB°ûÛé§Ÿãäãzþ<ÁÉ ž5|ÇB¥ oXzÇhzär0­‡|ž· |$šÑõàËðß–ÏšÖÏ3$})C6k–M¸‡;¼t^úiV´uGódK@¹ü·J¦»¿²~ZÅO׳é󜒿ÄwÇì¬\ÿ WàóT1/‰ðãçܾ\ºQðn+2Ê‚fZxx‘‡7xÕÂ]èóÇE€vø¯µ÷CÚLdôSÎ cUºíƒ°Oû õuرendstream endobj 2 0 obj << /Type /Page /Contents 3 0 R /Resources 1 0 R /MediaBox [0 0 612 792] /Parent 19 0 R >> endobj 1 0 obj << /Font << /F16 6 0 R /F8 9 0 R /F25 12 0 R /F20 15 0 R /F19 18 0 R >> /ProcSet [ /PDF /Text ] >> endobj 22 0 obj << /Length 1369 /Filter /FlateDecode >> stream xÚÝYKsâ8¾Ï¯àhª‚c½¥cž»Ù![ °sØÇÁ $¡Æ±Y0[»óë·[’m¶'©’̉¶-„¾‡º[æxôéðœ‘å¡ÒŠtFw.CeŒìHECÁ#ÞÿN²ni—‘ Ÿø9F‹î_£_Ïu‡D¡‰ŒƒÈI¦:=ÁBfŒ¶s­£(¸H»Tù¼K˜ÂñÂ[œ+ŸâÍ´Ûc‘ÂnOÀð­nÌ^Ÿí{jpdµuz„ †ZiÉà"“$έ©¥‹±àª"Ôr9±´ÎA-±—"*B4Ý:O!zÍPü„¤ß¦3ÇQœŽ1çÆsw'˜`Àƒ?#Âal2qvftÍÏ\‡Œk ‚àü‹Û8‰gñí×0¿ÿVcÍC΄ð£÷I#a‰éQFaSó-n¨çælŒöÉ+j€+mÙu@†ýÇøë$œ¦·5˜%†šû¡Ó´ØÒ *0ž¸_A’t¤äïÆuÖKÒQF"x#¶EŠFœç]̓å<T6‚áÁ©½ÈcL`ÉïÉ ÏðSÙM×>JÝåqÿàÉð ¼.ÂþÑÎïžv÷b¤rl×Ç!KS%ÖWwõåÒ‘›yK_Âø« ˆú¨[ÛË¡L(™”›j°&» ›õ’É?H¡Õ$©ÝЦm_8ûä¨?:ù|x ¿›Y`½¨EP©¤þ;U.éRüK™Ùä[»¸•PEþÀmTóÊ4çúiÃ’§y,ŠB)éÖöçž½ÁÒ¬"E¼‚øYOº§#4¤%r‘»1C³DŸc' ˆú~ÒµÕ4ϤbúhóÚ} ®„ˆ(›ù¢'p YÃ(² DPÕ#•mHgpqz1¹Ëï(nŒx§R·2 ÚÕ*ÞÌÀO D*ñ1(ÐmLþgÉJ—¡âÃ7ˆŽRσñ<€®,˜gØ Ü$E=_<¸:³g‘öºqÍÊÐ7¯Òҫ뜙?a\fsGZãèQA¶Ü,µÃ[·_Øê~aŽ×•ý—¶Rãׄ~glÐÆc+Ѷx é.+OS!6¨’þoîsŽ)NEÕ‚f4ãØ{Ð-Ôe·{‘Î0ÓyϧIµÄï¹É§éý³ !¶¤:·À¯Jò¤•÷–Nw#رoîXÑ}WHçSÛuÆ÷>‰7ÏŠÂXÖ0q%¾ƒ‰·©AëÔ Õ@„zdpkuÙ©À€1am¥Fzà0WxÔH¢î­œDøúZSÇÆ†¼˜EqMB¤É[N ¬<¼4/ôñû£ ëX²Т͇ìõ³Â*[©A¸¶¤yÝM9¿UNè㾩¶/ç6)fšÉ6%xTIŸ¨RÁþÚÜÅkIÁ·4x¿€Œq dœÐ ´8ÎÍ)7s‚GݘÚ…}aõ6ùA¶æ±Ë]0Òá… Â ¯–o£ÇÚ9Ø2z¯ÆR­Æ’Ï4VT/^u÷'±Çõ䣨ñ|²¨Þ^ôû({q±šµŠN¢¼îy‚ûäªÍ°òFٕ˨1'ëÖ*£v³å/~ýRªÞIÒ7°¢nH½º@%$³¦±‚5-Ý´j¥w£ÕσӚsÑ>E2m"éW©ä`»¬s²¹XµÊbv#ËÈ-i;{¥÷¨ŒÛ„u‡ÊØ?/ªbм­SQ£oT‡0‰ÿä ö¸Üÿõ Ï>>ýívçendstream endobj 21 0 obj << /Type /Page /Contents 22 0 R /Resources 20 0 R /MediaBox [0 0 612 792] /Parent 19 0 R >> endobj 20 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 31 0 obj << /Length 784 /Filter /FlateDecode >> stream xÚÍWKOã0¾ó+rt ñø[Ú¢} •ˆ ÚCJ‰TTR´ì¯_í¦)M*ín \Z?Æãùæûš וC0)[”5tŸƒgþ÷¹˜–kclÖb^µqÔiäB…½Pi?Ü\ŽÉ…k¯6!5è‰ü&z`©zÍR¼Ãð-¯PeÅÍ€e uWçdæ‘¿<ÛAáL_B ”óXâ»áæË[ d´*žÑÑl…)UâÈ*8ºÀ˜;\JÇ’I¹Çôs¸Î/ÉxsºZŠ×é[¢æ´?0œOPH(LìXøØÈ­Ãá âÁ¸÷–çþy´?_¿mçMT‹pqáx_µµJÆ31óGÝ+–‘‘mŠ6òcJ‘–"ëGŠß ‹|#cÞ q— ÙÑïà:SÌfË¥ÇÖ»8|+ó~Ø_¡nÏêky{ါo̘8È?:c;ùx}{Ev^Âò0]¢§GÔÕ¨»Æ3ÁÞ1y1ÑÏ Ê✯¹Ÿ¹Â̸g†-ÁÖÃMù€†'Eʧ¢*ÊeMäö ,RJ@u¼™DË›©_…ÕîŸèJ° õtÂXóC*R2f’é s‘2 `F’ÑÍÎZšÓ=3Ûàæ&_‡ü¦ „âÒM.𮆯U \ Ð"øU`kas$ôÅïeëä/H¸R„³óQH-µËŰø‰/Êù*G"îñ\ÏÒw¼]?Ü¥Î\Á¤ZúïL&„AxAãäÉ8;ùê¨kxendstream endobj 30 0 obj << /Type /Page /Contents 31 0 R /Resources 29 0 R /MediaBox [0 0 612 792] /Parent 19 0 R >> endobj 29 0 obj << /Font << /F8 9 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 37 0 obj << /Length 2480 /Filter /FlateDecode >> stream xÚ­YKsÛF¾ëWð¶`•3xú&9rV‰ìhKŠ“*E€ÈYƒ€’å_¿ýš)ÓŽ\µb==3ýøº{xvstòN‡³0òu”¨ÙÍÃ,Jü4Ï“Y+_Ea<»©n½p¾C{í\‡Þ=¯ïæ ­¼j[Žû];¿»ùåä]6 ?ò™iíg©ž-"åëX)âu³2,Cïi®R¯ëç¡÷É´ð»äá¶knUõ€¬ËÞÜÏ@[cO¯º¹Ê€uÆŽ¿¦Æ¢i¸S ÏJ˜ÓȽG\Z÷ƒCÓ@èiÆíî¿×eqyz5Ï"ïÏùöW¾ÊðÎÇËÎö~¡öã(ß»ŸÖ^_7tâbÀûèˆYÃÄ5ð(ifÚ!ã€ÊÀl[6Û ¾|ã7,Zµ'Û0ñu¢ÓÙB)?ã„6ÿ+Óz°Í€ˆÎ?ÏQ"¤EÜ«hpKP1’L¢œå1È „]s›Ž«§ãâ²è¶#iëϹʽVCHõŒínËç@{þ“ð_ñÂ(O¼—âÀy:Ü}_à>Ï$ö`_à´3«·h ÉB¯XÔ¤a°SÙ¿çÑuQ¢NW$êzG첨¯j¡$Ë‚oC»|:é·YÁ ¿§Mímún‰G/ÖHâÒä¶\• dÅž“ý÷û¹«“7óE’dÞn–œúË]S¢z#fE‰sä†&¿ªò ’ß¡ˆ‹ž ›"ðÓäSKÏ´¼Ì)ØE<µékÚÓÙZ›4ÿ 
ÆU}PÑ«¢~d¡îÀAm•Qx¹bÜÕþÃj-^W`ùÔ$šÏN+±ÔæÎÐd;C;ÃF¢½+B²5ô ûaóUš_¯Y0.•%`OØHI_FÄ7vì Dqþçéû+è^žs\#·J.w,¬‡ ákÑmÇ™qø‚Á…¨Vâ/x€cmzkg¥APþßf~] dÀVâ( 8DÐûÌ/ȬR_¥°ÿÞ^ïÐ;Av‹(ÈQQxeñ^…iÈŠaÚIbÓ/Xd„)BFø:dÜe3˜¥9‚v)!­‰È¸9ðbÇÐz Œ}Up1È’î Xúgœ>†A Ø4uAr¨yP_?¼¶qU¿Æ~g¦ýð/îÿ¼5îÊ»!¢R®“xÛ½P:]B+ Œ•Y<ô5ñjذqˆPéq'¨–â¾áí´J\®¡ãý€ûy%b3m=‚„˜–Êý( #ëgmÅ')‹–÷uàB[Ý#(’ õ£)˜Úê ‰ÿ`ûj$ï€Ù?éªZ:ènõ½h¨ü(P/$Õ‰vŠ–åyÝm‡ ˜Æx–V{7:dì«qܼ99yzzòY~×/O†²hŠMQ~šZþ¸ür@L/]ˆ±ëʹ: fkð®ãk uÈZô¦– ŠڢkÐ:J Õ]»~@×Jex¦èºŽmÉœxùÃÆ'qûgÙP’´¢/qt_Z|Øö; óR ¥? ’Sl³2õpò 0ÇK^…Fúèâ ó-7÷ Ü@%1A}’ˆ÷5!¦iFCµMðAÔU‡`îᑲpw@hl £$JlL‰mÙ“ØÜб®·Îjäˆ.ÄEOSvlµ5Â/nð$§Â˶xàò æ<ì¨2ÃHyÌ='@dc!š—`a£={?ä‡÷[²»Ê‚âÄÒ®ØG1ìä‹°ÛÖšýXËjÓîp1B¾$ùWõw`òñï­Ô~RÛð˜ (Øž ì}ËõŽ…˜â54¶ƒØ õÖH´„ÇJ| J©I Ø>»œ ûZL aÿòGnsKäßaò¹ØäaxÇHxPN1ï¯.Xš·éÍÎ%ûø^f°x»³ž ¶gƒš÷½Uµ­VV#¶@½8xÿñ(éd‡‘< mìOsìƒËKqŒ2‰(ð(q»¸Ó˜O"|ì‘ΰqA2~Ï ¯¯ø+ÑØ°`ìŸyÛ‚c¸îFŸskÊå!­øÚßüoèªfI6œ(OAp¢áxé4½$jºõ¥Ð‚ñÒFñí0•˜(ñDê_ Á[¿µµ|¢'¦í”Œ1ÄHôŸ„ŠWk7<9Ȉ¾i'ÏÓ zÛfCÚèzšo—‡Àƒ4p!b †>¾·9lPñ²–×RÚV¬v´š?K3…¸‡ªÄÕ ¼‘%wˆåù®pùõîÑ(BM^¬…&Vp¦ïDgÿ+ÎñÉäèá W9ÈŽSÇ ë8^aZ<Ìväyš$Å€2 ÷N‘bѹ¥°´²À5§ü!…#Ç3ÞvªFvßÎpæWúõ(*‚©ÂŽÙOøÂë …Z6?àÕÛVÂaóŸ ‹¢_¿P°–”*ô5»EóìPý@)0åa{/˜pr ½T…ö‰DÞ.z¸¤p Ÿ¹> endobj 35 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F28 40 0 R /F35 43 0 R /F32 28 0 R /F25 12 0 R /F20 15 0 R /F19 18 0 R >> /ProcSet [ /PDF /Text ] >> endobj 46 0 obj << /Length 2508 /Filter /FlateDecode >> stream xÚíY[oÛ8~ϯð£ Ä /¢.}Ú´› 2ã^¦1»èôAµeG¨,¹’Ü$Åüø=R–'Å »@ 4¦¨CžûáÇ£—‹“³Ëd¢¤Heª&‹õ$’B3‰­:Tv²X}´˜Î¬”Á«z7ÕIp?…2º›© r'Á5 çÛéLÙg$ÉEQÁÔgD~—mw師yƒëùÖQÕ’_eÕŠùª€ùÎ-„áM>ý¸øùìÒè¡ÔF*Ii'’¦Q†%5T"V¡'=ÈYîa´Ê™ïoR…e~ŠË'3¦ÂÄ6šÌ”6L5­lwÓ O‹QÊõ=m¶tx0ŒËšH—YWÔN™ ^;2։ƯßÁÃë†éÞýã5Êâ¼m²æ~š„Á)L†0qK_ÎÏÑ’¯®C4æ§þéã7XrMúɱ^=#ø ®6&w iplz½p~~þ7Öqðê~?^žp,X¤& '3­„”QD¼Œ‹¬ Ü¿èsèêݬ̿öQRó¾ï›ÈÄÎ¥¯Á÷ë¢Ìø>´"Q±'¿>È'Ìê¦ÉºŒ©¦úHiÙkx®Ý֤Ǻn¶¬ë‘€Õ±Ð±NÆÎ¹éºÝ‹³³ÛÛ[Qåd‰¨›ÍY»ÌÊl—-?Ÿ­³/â¦Û–¾Ö #ç{ks¿_þ #fGåqêë X{SeÎŽ+ÔÖÕoTuof2zFò%ÉÙz×ÔŸÈtÔºò5v,-XÖŠÙ²{œŠ+ïfQÖ¾B`þ:‘@à?jþÞX[¶bß}9„=/V¥Dj¡ŽŒ¥a!zh¡žŸða_}+@$8p)aБzNøìÎ=ð9íø%C~*–"ŠS_°Ú%&Z6Cƒ¼XTð<Ü|;æZe"‘ÈÏ+)lr¼,(1M´õšN‹[üÃUf Î®Š¶kðùÓ¾óDz`,Æ9ýúÃÉ9¶ü@e2þÂØ \å6벆h  ¸ÇíMAÀÈo˜€p F ‹Šªuë½×A¡ž9ÜcD›qyð5°ÿÂp©­÷_ïPޒݪÑu}¹§…{¨õNtD¥WÞN{üO“ü2vˆ&+½D"„1ßâ>‡r@åÜœuh§­·pµ9‰v¸o½i²mËP•ä]àØ%–NTðŽ ¥‹ˆDQrf Ôo%pî5­ç[“?`ùºÁÆóaºò”:Åz/M¿ŠÕÂu†zË´¸âGG¼ÞÖ^ÀUÁP¶Ç´®«ÇxŒtt…§ñ:­ÇÿèŒø ¥ù(þ²rÞD÷DæJa Žw% áÄÚã9;`n|©xDÆx„÷¾ÇýHˆ“¦NÀm¶±¡`Û8zZìyß{F´oìºÎˆF¦ÑãÖé¨Á Îî5óвÞõÀõÓâ>ìÉÔLô4âV*rþû*Yéç|…¢uÌŸ›iȺo…¸‰<ó $¤ÃÏxß †–ó'¼-œ¦|Ë[! uð/ºpïy%ÜK×r¶‰ÐZ‡cÃU9ù:tÍ9n· ç;´Wì»h%ŠÊ@'¼ä56ÙèVM­%œ¢\r"·û¼V!µ˜ùuñ_{qЇ'*E^¿¤îèfñäæ=®ÿB_úÇ­ûP‚žvRö÷û²<Ö~q7ÔV%‡®­kT«¾3o÷ýG+**ææ3¼Éü=Æô6ë°ÅÞòLí¨zK¨tø)‘6 ê£’áMkt'vÜ©U®?‡ä}k뛷΀ßãÒpÄǾPðíô{”“]ƒÝ‘ºr¾Ûì —¾6ò®âyƒ²²ƒ‘©Æã‰;©H8_è“’zÿ†RÛendstream endobj 45 0 obj << /Type /Page /Contents 46 0 R /Resources 44 0 R /MediaBox [0 0 612 792] /Parent 19 0 R >> endobj 44 0 obj << /Font << /F8 9 0 R /F32 28 0 R /F31 25 0 R /F38 49 0 R >> /ProcSet [ /PDF /Text ] >> endobj 52 0 obj << /Length 3040 /Filter /FlateDecode >> stream xÚ­ZKsÛ8¾ûW¸rYªÊ¢ ÉlíA–Œ'vâ<ÙÃÌ(‰¶X‘DIÅÉþúí’2e;»{1!<F÷×øìöèôÇBøi…Ç·wÇJûqšêã8 ýP‰èøvù»ú£±ðBø#ÂÈ»ÀË¢e¬½f•þ¼ýè$]:‰ôã@§Ç˜]„·ÉàÏ×þøÅþ.̺ÞþZûAFf]±]¬q·Ý2ÇÝ"ï@(ê1›ÂžŸ©ÀµÀ^ä'RGÇã0ð£Ti"rñ6Ë6¸êZk$(`g1 coU@ß6ãpý0CWŽíE{YÃî> _†°‡›N®&7“é‡ÓÙÕ&ûšÃéÜ ûI%fŸDðf5³”UÜ퇪…‰÷ Ù[æKî$^±);rïó&”Ÿè 6»\~œÝN®®†„%S?@XBú‘JCZPïæ$ƒeQYæMYý€MuêÝA_Yq»YfãÀ»Ü"¯M¾æ¡88Á‘Ø»âÜ‚•s`çÇ¢^™xÜ$:5휡V­¹{Ff†Û]MÆC  ™ ù)NûóOp›—•抰Ç!”^†–*ŠŸªÈšv„˜È6ÖÒ°ËX;Jgdl鑸vwÜ.·-µ¢æ¾‡*gù²¢ÁtÁlØW™ƒ cšáÏšª)iVù$¹±RÂ×2Ú3˜w(A² )ùÊ¿ÃéeŸðž ¿î&¥ AaaBa/nÅ3P ð[팭C{¶€û³W{ÓÜ]šáŒ?çSnL®n~™œ ‰¤ÆaøùaÆt‚ ^¸n°ÂІ¨ðȳÂC[J--QºKTâÆœi«”ôE"ZñÌ&›‰2Sa¢1<)UW½v–Üêeá~Z¹.JV—¬ØZæ ŽÁ$·õš÷±ØÞ¿å›AìëT*8"xE­øæ D°‘¡ã†öFï~<%ÎÕÑ&çIY³wùÝe u®s 7hHC·+_%Ñ¿4 Ï(BÑ[_"Ë•A.BІål½ß3°WÃ¥K{!«r3äAZpkIߌd,±W“0»Êòri…÷äd™­PRãȳ†ýÑÆŒ—ü}\ ®ü³59+ :â¹õöHñ ‚Z/Á[çÜXsUVy ,kfƒ¦8®x"©?ˆ”|'ž0#•©Úó¶Mµ[4;’>0£C=è~‚Ò4ƒ°–ÐV í’äVõÈS›U¶µ-®°0»+Ýì]¾ <'8‚³Ïªƒj!Ä~­_‚©N´Dz]—Cþf›S(ãT]íÅ\ÖnÖÆµ%F¤èÐiMy7úHÀ“HįVëÄkä?mcÅwq<äobdG›9ƒÈÀÑb{´Ÿ>ÝÜÙ[J‹çè` šª–›wW“÷³R "iá6!vÈx~ñnvñ(‰é[޶1ÒJA»cÙ¿sö·%]l]̉)ðàTkðÜ»¼æß¨¦œ¥`ú±ÃâNšøAXUf§?³ylc4„š¸§¾ð²®íÏ×fµ³3¾´gXÀ«\˜zHŠ8è(.haX‡ñÅ^7>Ÿ,é.nDp,`™ôeÅÇJ… Öi,6Gýþgp¼< Ž=‚”)M¢ãGøDS µ9’¾kµ¿×G³£:‚cGqÜ%y†¼¾Ë @q7ë OÑ`½ÚªÎøüc9… ñ~(Œ(k]¬ª-äÛ:‰Ùßn¦“ÙÅ‹[Ò¿m` ‘9i öb¾‹ßŒ?”ïb2UÐLy÷‡ÞìêÆ¬\“6•æ×’Ôoè®…Ó U´×?í_u ©¡†ÌT…©/Ôÿ~Ñ–Þ¸C®yO³Á§‹@%n_ªö]\ ‰:Ôp„ÈBS›cšôÓ aaŠk ƒ!v*ý@êU $ö>>ÙOû¢ŸL¡×˳.dS>Ð?¤\|k«p6«(æää0ºÇ‘·ƒJ,án§ñqÿ]¸ @¯‘Ý|¹¢€!ØxëŒI ¶,™ë›Ëƒ9DüjfàP³uå°o …óu³¬~Œh«h"mÒÿP+Aƒ·+Îf¥©ŽÚúöà#€-ìÀ‚naÚ…qš,ÖxnÎbe'·/:tòÌÕX5)<}1†]™¶ÎÄ•ÊÚì·5]&­Œç¬(¶Ü™ymViÒ{“Uºš¿?‘MС §Dëˆm<þº|S²­ƒ¸\çfÝ–CpÛÌ­Xcrb©á–¶»²ßŸ M¸$I´VfÍ+Ž9Ö±ö&ë5¯¤ËàÊæ›ÛIƒ˜ËSçk ~¨„îŠ ]á7#Rfý¼‘óm÷£h‹°ÍªÃ@W'{ÅFߨ‹ìœ °"Ö‚q±‹Ô>¨E÷n”jŠ+ UŒ<éçMF‡@…q\Ø.ç¦|™(°cX7AÜ7…ÆìÄöNlËG@ßMùÀÝœ›±›/ö…)›í¯aâ0ÌhHµê¿°Ý¸€3§¥m ðª[͸\ßÖÏ:ªE UçqŸVФ ê¶*ް–€x£~”ù*uÕÌmÞà6ƒx-#÷¶È•¶Ã øX†d§}ó(Í›%€¨lMû¾³#ã$†M‹3I5ÙjëìÞDâ¡ ü8 ö#ñ¬®‹í½ñMV2?lzƒè4§ôŽX0±üûóUýNuzÕ4oOO}ˆ_V÷§óu¶¨Oá4«â[^Uó÷ˆ1<—Ìy,êüÄ”¦ VbÛ^4Ιq&µ,ꦢcí¬³%eæ¹äɨì¼bÛ(BŠ@ß¹›íÿ_±y(¨á7÷ÿŸçÖ>|ÛôÖ¾J“»¦S’PI¶ŒÆ’Š‘2`§åäR4°CÁG"î¿SØ%97:…lS·¥—P;6Vqêý!¤šp×oœáW3ÉìøžjË–r{l9Ã~3$8´-÷F‚]YMÁö›ËmÝdkW`§qK9Š»”i¬6&G±)E£Äoc)îéŽ=º* Ê÷-‘7*ùKÀNlì3 »¦@9è¦ñôBgX³Õi*løñ[wTèe®Cc11A£¹%泜0£{hÌt.„oyòœýœÝÞ —> endobj 50 0 obj << /Font << /F31 25 0 R /F38 49 0 R /F8 9 0 R /F32 28 0 R /F33 34 0 R /F35 43 0 R >> /ProcSet [ /PDF /Text ] >> endobj 55 0 obj << /Length 1933 /Filter /FlateDecode >> stream xÚ¥XKoÛ8¾÷WØÃÊ@$‹"õêÍ-úÈ6A4»À¢í±å˜ˆl©’7ýõ;R–Sy“Ý“Ãáp8o†zuýbö6›ˆ0ÈÃ\L®W“D©LÄ$£ R"ž\/?{ç«©/eîé-ÿÞO£Ì+¦¾ð¶Sá-«†ÉUݘopÁü(–@U¡÷êbþ‰×KsËnxnZæ@!UÇc¢ï§qâiSjä/é 3^îÖ…ÕFLÚµEã…ÓÏlÛN—%OæÓLzרÄ×ë?&¾H‚\&1 d«<¢ î×f¯§¾’ÒÛÜŒ£Ûb[4º+p¦èþH­jЫ3¸$ê¶d*_GýEáÿo]á^&ÕIºZH‘§P$EÀ„]±À•n‡Â °€Šó^ZÚ]Ó Å †Ç7{;ÍìéVNS´‰òP1:uå=ÙèØ‚Y{Öä ‚@íNw¦"‹G^Eb¯†ÛÈ P§Ù[ c,Jƒ(²‰…A§’]w]ýr6Ûï÷Á¶èÀ‚AÕÜÎnJÝÎVú[°î6åo"HÆî8¾Uw¸÷ùütþ ºÛ¬öQfø£jÏÑoqL±?Œ Ùx€ö¥ÀÇ9¾‚'Ôè’YVägÜL} ‘¶ã-5LSë2Ž©Þ.\Ѝ,ñÈÑKæÆÈi­3qŽnB¡@èÃÕÛBxŒ˜«)VESÐ!›ºtÁ²±!åƒh}ÊKNqØÔvM…zÝ–¼°4NµEµkôm±¤P™Œ¦'âÊ3ο<³ø³]ºœÊ󱔯‹ù^pŽü¯?0mˆB8'gÀï!ˆN±aÌ뛓/fб*°aðÃùC³…SÑAÐiÌ9Ò E|`Ë W›jÃvÛ:KwhsˆÔ_̧R×zq7Ì(ñü ±›ÿóþ9•V0–2‘ÇÚsÅIÁôsäpŸÂC»&L2Ti€ëò ¦ç<æš”zHúë’Ç`IŸ);Ÿ<]`¹¦l¬PB‡qa¥ü[JØ õNÇ«¨úäÅñåÕ9¾„qÈw£8yïX-ßÅürv «WçÀ(ìbCªÅ'ïóÂT%ÖΊí÷v¦ ú„N;´1]ϸÌ"åè@ÞŶVÎÖ#áñ¸ F!§ÈmÚÁ@cÝsš"qÚ)%,èLíLÀêEon®G hq\'°Á.Öƒ#Ð]¸ LLWzýþŒInÝèãˆm ´þ’Î3=Bfî á9-¸lüŸçå/&z}¿‘Odª¿ÛšïÁfÑz[·ÕýlSü[¬Åÿß:{ J½ñ“´d[ «ºØVŸ[„Ïlu‡QJE(Å&ÐUªn»…KFX£êŠƒ`Ì9j*›ÂfKðÂá×B?mv;ÿlûŠüÉïðX³´ŽÎBïsö•z"?Î"ê‹•÷GÓÆE4rJïÝgK:ýÐQâÊf}àòA\ªN ®8ùÝÚªŠÜ«ª9@šmNª¾-yǾo_Q^ e/—&™;ZÚÓ[ ïBFSià(]Rüs'¡¤®!€°™„Ý2wIÀ÷q*,6vgŸÝØT»¬ú-yN^G¦EeQÄ o‰,¬ =>Œ=¯)Zî‰ ¸MwÍüÈÇìÄù-&ByGݰ+Ê‚,¼+ p‡„"н7¤‘Á:•&.aP‘">ÃúýnR”Vx6.dÄi˜À)(üÓkÐd~ÿÐRshªxï ø¿2!#j ‘Ȭ F»šÊ)æL"…WŒi'Ìb·o£á€»b4ÉsD”åq®nµArÁ…kWœú¢Ë–/Ç03N¡½H·žCß—]Â5W¦;_„°'Q¹Ý³Ö-Ÿ|s€|z…‰^a‹r·&F.È+¼! ´T~“æâð 0GXrBð%Ï(é··,¡ø^ø¶sÈàý#óãÈZìºAUGEE|è"`\|×X'Ip€Á›y×ôº´¼ô Óœ­_B¡Dܯ³iéÉ.”ëv{v¥eÄ`ááòV;fe8êJo¨ Ý0aoiˆ§G‘ ´=êdRv«(ÓýóI…é!ÇC8àuÖLD— ‘—,‡š¾a/ièÝ}ǼGDÍâ˜/I„÷7~P¨èQÎû­æÍÎ= &ýÁ7tLiutWGÝ?¹úуåéVŠ›=}Ot 2¹R7 F ¨Ò'„;+à´):žŸ‡–} ‘ZÉH ‚ BW·kQFÀòÁ 8cÃeúôJd„>=wm[¦c³¦[îN7à ÝZw,—3-§7,ß°¹ HèëŠ-‡ŒËH¬é+˽©úG3¼UO·ìÁú{µ»¼3&Œû}Jz­-U §«‡g;Ž|B®CóüìƒÿÒÔb?ª¡$"V?û+Rádo\Êଳ¹¦âÛhÁ/® À¾µLu.ÐNôN¸Üó˜ß·TDâúÈKH9x©ÅO ¨Ì#OéȧxÂ=K»YQ²¡œ¶ƒSR,RP©3CŸM^¼¹~ñè;miendstream endobj 54 0 obj << /Type /Page /Contents 55 0 R /Resources 53 0 R /MediaBox [0 0 612 792] /Parent 56 0 R >> endobj 53 0 obj << /Font << /F8 9 0 R /F32 28 0 R /F31 25 0 R /F38 49 0 R >> /ProcSet [ /PDF /Text ] >> endobj 59 0 obj << /Length 2937 /Filter /FlateDecode >> stream xÚ­ÉrÛÈõî¯Ð¬2Ûè àƒ¬Øeì•ÅJU*É$! ’P -Í×ç-ÝEMY“ä"öúúíô~õæÝG­.d*Ê´”«»‹Üˆ4Wæ"Ï”PFf«í?’}õ[½Xf*KêÇjÿ°«ÿZýõÝÇb|Ñ"W6»Hé†À˼©UÅÅRe"K¡½ë»ÅR—:yZ¨"iO8QÉ÷ÅR&ÍB&GÜó‰æ-×'ÜÜmyÖpòÄ“cWvõð ÍÝÉÌ›u«®©¸h’¶ãÍú‘/`»¯Ö;÷øv±4i1ÆP'›êÀƒ= ë[DèÎáÐßÓ]bË”¡ºÐ¢°V;Æìšu„yJ cËÒô"€–FHr•_,¥™)bSQÈÂÁÛÖÿL¥A–5}Ó"Z%Q¾i;¢ãð¸Y/7@¹ÙOZ- ÒPJ±`œ'ÌÁæ€<ñf_ó`[õ•[BF>¹ Äa·îà­‰£îÚÅÑìz‚2fî}ov;¼°êíÖ\<ŒŸVJ?çhËJ!3åØxóåÃÕõíõ¯¿ÜFžå(9U£µÐ6µSF¾GшÈ]uBJ#gE¨_ó¸ÅaaÜѵ\°‹žÀ™˜>I™£É‚>be2Ëæ‘Åÿ‰Ž ¨ˆsÛö´öãM‹þàq2‘6‚c꟱ô JËí…½åY…ìØáØ€zɤsÚåýBC†ä 3ºä…‹kh^p­Bƒo¼¦äƒ{íÿ„žl¾&Àb±ÌsC® ÃIp68ùÞïݳŽÂovh¨éTòž‰xí¡ó ¼iޤ‡¸+â‚J¾-Ø0€(JÊ+‡ ‘ûµfÿGKǶ¨M&¾ÍÌléõÕ!ö\jÚ€k¯„èó4¹Cÿ‰x€"ÀÃë(I^n²šIú1nKÐIQjBæ9­{ Žà¨ÇL3iN2™yÇle†ÇØ_ˆ#. +t ŘxÓxޱIEZ¦ÙcÖPx ]©Ç~Ìáì¼M0öÃ{ÈÃTä6ùðW¢­Jê)Q«Î$7å,êÃÓªÌ#^/Ÿìx‹d‚ÿC3|‹[9<ÃduõaãQéuš&‡–«îëi_Ð>ú#¹kÉ´¸ŠÃWÞćî£!ZY)rím’HDžy6ðáïIhˆ &âH›ÿz#Šl&•àá/a Á¨Õèw.ZP0ÅÑó`*а²dEü‡$8·³)®Úg20X{x¬;¹Õ}ÛÕî¥{²e1p0aĶZ…î± »˜Ë´QÎîîó18ïÄq-8q(›;NY¬·¬GÜ‹´à1쌯§ŒÓ7%ăŠàÀ˜ˆ³¬nœ5lBJŒœ%¯|B2 þýÊ«ì§7òžWXh™SdXh$Ôo¨‡ÛSµsÛ,µžÏOM‚ÉTr2w€Ý•OÝl¢œ0<¾ãdq`[÷Å%Œ‚2eG—–CÈ„µ¾Ç>]ÞrÚK™^ÀÏß’†ö5º* —-3äÛ«„uð[þíð^äÍ/þ “k€Ô®Ü‰É›GŸ™ﲋL§8Ñ=%r..î8áW~¸:‚ÖY`°€ØØêÓå ª×%Š÷êgÞeÆàÜY”ˆÍB y‰Ip¯ùl† qlöï+‡†³Cºë0rF‹ª¹«g… 9LÓ§)ÚÉÆM" K{ò‰>™06eÇ‚Zx=Ë’×›žw0=¦g<¿ëꑃq .¡ßâ¨úмÑÞoϲ—˜ƒ¶Ï<äQ8È )E™AáîêÎR(© ð(O) _(Æ ËÍ}Ô†ÁÍûO— ŒL4[-Jïôav{jˆŠ×BV©ÈJÃ….'e² ¨Ræ!Ð…lBYÏÎü#Uç@M)Ï”ù9© DfYøä•0YÝ®¶Mk&Xa¬-^ôˆŒCjh§ÌŠùG Ytªd1sÙŸP1EsؼÂ7.Î9_!DÔÙ‚‚Ø4Œ87„áPæ|a¹ò,Çâê¡¡´.:Zžoý©<‡Ê48¦×D¦3†“¤"È på1QASQ…\v¡’OD [获%ÿ8eÄáÌUq÷SaRÄA Î oÌÛ¦ÈS·z­¸KŽÞ1m—"Ms=•µz¥¬ °óUð4 éÀAiÕL&ÏÜ›ÉÏ»7S„Vžãðƒ™×À%Íǧ¥"n8“i6(Ê2;±þ¯…bE‘r*âÿ´Í„vÎÑœ¼R úôûëî4¸vH!:j±æ¾ë‡Ãöà×л<%óÂ(£ÄiÅý×àh s V¸Zdý=/„ŽFž¸•à÷Ãö™…PÕŠtâ[Et[=ÿ^5ÎÛÀb ·ÜU‰=Ü:õ<ÓR†vÇ3¢!KP¡®øA AûÎSxXóþ™øŠ N¡k:Ó6ùŒ~õú —z¡™GÍ*ðÂÿ„|­çv7ÖîÝo¼GH0}b®z²*×.ÃOg*tcÈ»ÿí3ßš?TûœŸ<2<}Dèm`!&§E‘| ÐýKrVÇžº‚Äže¬u5b:(ÉPºt]ÿ‰h9¹ƒÔi ™NG=äøº%¹›zßúv²ÿÊÃû]ÐÊAs÷£Îî/C¥1ê½ÿÜW_êùwÏBÒ5p š´XM6þ2ê¨7ñ…uÎ}ÔA¼ Í­}¾¹¾ú‰‡ÄÍ‚>âe”Æÿ¡àFK¨p`´AB"À¯,aû+ÓÁÂKìÁ‡ï °C¯•ÔáƒWËx¡NK3X)œ?Þ³yªNûÚ?Fî¦(ÇÿŠ‘²I#„o£çGÆËO>r†1K®íüÇ_wu¿|Áî†Ï{'rU&N×Áå8Âá—íØù¡ c Vøy-ó^™ÅùàT€?¶rH-ñ n Á€}'}g’#¥ú ipRuÇ[Q¯$¥³1¸P‡à?¦k¨p/L!2™–ø!Kä€)áVàÖ›«7ÿqýqendstream endobj 58 0 obj << /Type /Page /Contents 59 0 R /Resources 57 0 R /MediaBox [0 0 612 792] /Parent 56 0 R >> endobj 57 0 obj << /Font << /F32 28 0 R /F8 9 0 R /F31 25 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 62 0 obj << /Length 3344 /Filter /FlateDecode >> stream xÚ¥ZKÛȾϯr1X4ÙÍWA{±N¼ñŽg¼—õË‘(‹0EÒ$5òäßžzu“Ôp#¹Œšý¨®®®úêÑóêöêÅël¾ L¸ºÝ¯¢ÄOIVi¬|…ñêv÷›wÌ·k•z‡rzuѯ7:N½S_¬7¡·Ã¯Ä+kþ}X«ÌkNåm Kªr›eSCÓ_oâ0ön)¾[¤r‚±!¿«¤ûSF¸výòzd¼nnH_è}E eW;CÃãwë 0.«zœ84nÍëùú÷Û_VÁjj?ŽŒ¢£·´íPnOUN‡Ê`í´©=¬3MGS­~“H˜0Ä䩿ÅÃÁžÂ0°¼ 'Ï?—8\ñ.  ±)a¢Øy½Ã3½x­ÕôŽuª|cTg¥ÛmËØ£‰3]Б¯XÊÓä|¯×YäÁ©QV›(H}ÇÙ\fÅ·üˆÐÝÏ—XP©¯RëTä+•¨)#°Mìmê–×›“u rn´°ô—%Ò™e‘‘õéxWtL²ÙóoÛ5Û¢ïA6ƒÌÏÂT–ÿ•W,Ÿ;õøÄ ¼@4¦Ó†ŽècQƒ–~$É\"çCÑ-RÔ¡ŸQúÃrøù  ² –%šd”y]ÑVù–l¾îPW–XIŒŸÄÆ®þÖ·wUÞ‡C?,ì4¿a4ƒ8ôP‘a1á†÷³€e™÷fÏ}Î "ãÕÖ–dìL–Ô}‘µ{¦wF\#þ²a•V±ñµNã¹J÷!˜@Q¹Tʼø‰4ºæsQÍ©'VSN`?˜Ë«JÌ.»D®¾èÊêu*Ö‘ZóËÇØ9Æ8ƒ(üµ6AʋӬ,°ë'šuv Æ1Rîýšéi_s£+¾žÊnfúÙxøñ8–\°ÃKqF¡"á‘$ñ£·`Þtö|QÒùð7ÇMÈ»Aö¤I,ˆ#í†ÛŽu÷Q0i::nµËYBlÍ@&x”×nNä LÞ°±Ñdq—²@?Õåèˆ4ÄŒŒHz­(U†*ÒFÐJGA£… cíþ#~òÌG¡¼F;Z¨ˆ7õ`ÿ~ÑÞõ‹»²~!³ÍÓqBÞm † 1š 23 &"Aêȱ#¦Æ3¬Ì ´B·ÁâåM4x½0’‰ü6  ±¾Úè4‚«Pf~'pörC±N¨‚:È™¡qF¥5Vì±NO>‹ÉŽðvqºèz!Ú¡È‘(uª4=¸Ÿ¦c(ßV¤ šåž{›z¤|.û òÒðo‡ké[Ö½¡C#mCs‚¸©Ã0¿ é#²6)ï†?¾ã®›÷ÈßË(¥ŸA;xØÄŒˆü¾yuÍ7IܶaaDKêÚ5-*H™Eõ°tމ«d°Ui¸åØMØCð™"Ž:øL2 ×A~´xùÖF‡Øõ„å¨G: [ÎÍÇwÑ‚Êfä$‘3žå4¡Íc¢ ¿y?U\¡Ò,>ÐÐ}Ñ«s)ïU“ú'~–½}†BÒwòÑ¢5ÇcQïè{ÇöÖ´;^-ÇÊ@SÊ{ižP;‹Îÿ^¹}Êó+—¶–Ê™-õ¹°ÓO’L„þæTI6:>ôCÎ9¾Ë(ËÓ`‹±Ð¶·ŽÝE/™ôáõò ‰}0áE]æË•{´ ýtøGÿ 5Ö|SÓ¹D5þcÀ4 †n±¨Ì¨É¹å’[eFoKÕõ±n„ó\Øóþ@I>{.[Û"aÏ¿žQr¬ÆðW6°8`æL7§ÇÇœ•‹òö»RB•$¥Â¯{ȃ6Òéðvð£bo%³÷2ÛfIÊ4ÙUŽË+YÀ…IÌá^_/ÕÍ´d¡}’ݳ TÇDOÕL ä””PB‹3È®ŸT0«´t÷å±d‹Ò™Ü¢ÔâmiYgc Ü°ø%~¦,„Ô ç¸¼‘ïÞ.c3×X $$޹ Ú3/¯Ü¾Î<úɃüGËR±¨™Ôg N'4P“ÿˆÁ7À‰N^M¥ CÊ–w»I™¨#[gˆZ)Ø ÒÝŸ±ý,K íjläñ‹l+ endstream endobj 61 0 obj << /Type /Page /Contents 62 0 R /Resources 60 0 R /MediaBox [0 0 612 792] /Parent 56 0 R >> endobj 60 0 obj << /Font << /F8 9 0 R /F32 28 0 R /F26 65 0 R /F33 34 0 R /F39 68 0 R /F35 43 0 R >> /ProcSet [ /PDF /Text ] >> endobj 71 0 obj << /Length 3004 /Filter /FlateDecode >> stream xÚ½Mw›Hò>¿ÂGô^„éO`oNâd³ã™IlÍe7{À–y‘„Pbû×O}t7BFŽ“ñÛ‹ÕÕÕÕõ]…_Ï~9}§Ä‰qnŒ<™Ýœh§ynOR#c©…9™-þÉx2‘?Bšè_v›ÉT¥&ênK~øøúâl"¢+~›!Nµ¦¿›%Âltµ«:¸ Ÿ“Ôø¾í*ü©7Å .&ÿýëô]v"’8OrŒ  )äÉT&±Éµ%Æ Bžš$‰Þ-¥TÔù_b U3¡CE4ïêæži+9 P:KO"{õæìâìãÙ›_OñVW§³¿}øýý(S&Îó̺}1¢‡*y*O26Â0¯×Ì«ÎsŽL£ûíd ?å?ÚÄ*P]_Ê‘³%’ ÏòM½‚[®ê‰Ì¢oH¾\ÀYÖF×™G÷üL2Á‡EÑvá Ä ¼#CYT¶@}Az xCÈ´ŒÞM2Õ "šžê¡}Eʃÿ|„­¯q¤É›šz°®Úxêí+žIc+q2*6:—t»î–ÑŒL`ÏšLʈ<*ïX¯»®¸Æë#ˈS4y^¬V ‰1•§`GZ*'¿»v{½*ZÔGdm%È:÷¸¯Æèå±LTæ©-^‚š×íÝüG¨M ¼@¹ŽÞ| G§éPÂw?t†4)œ&&UÆÒ–*æ ©ƒŒË™ !€–é-eLIg*ÑŽ<ùÜìj†Ž;Љm¢<+‹ ŽjÊ9Î /÷ÌJÑòï¢üœ½yÌÐ(èÍÂ+ñê=/®6ó6ÀLe*ÅÐñu"b›Û|(Ú‹ê ¹Ä·ŠªDñiÍn#¥ Ïc-¥÷nðAø] œ-8n” :n@åŒÜU‚­íÐüåO,h%O‡ÖÿóÄ@à:‘6:ÿºüÓ'üçñúsÖÿ,ʵQÑŒóK¡±»LO”ì”ek» ‰'è“B£NñÁG7¤æÕŠx+^VüCnÁGt:FkCöŠu«~Z6Ö6×CåþMjfÚÔsU* …q}ju¨Põ\…‚äg·^[Å”}¶ü[»ðA°Í^v¿alaµrâÏe¬Rq`¶EK~_SvWE‡©ÕøÍ )¿,昋oY`\€Õ¥[< dˆs±Ž¼)±ÙëíÊ_Æ9BçÍ•ŒtKÉ—ýè­rD¼J‹8•iþ¬t9 ØA<•Ž(Z«à£ú ìs)á Œ_@cåòøÀTÇà“éžBµõª÷uõ Þ—âªR`Lw=Íëmµ—'úbRM\˜Œ„xfâ²±ÕR>•¸ð(¾Š0ƒ:Œt´"¦ªµÚƒ~7ð(Î׈‹+» ѹ &4œḛr÷Uö%ÕýP†#5H ï2Œ€€ñ=F¥!¶aÕ£À:ªoUt7$§5ãŽFmC½¤j²å É¢„%WÁΛêšKmG®rÇ^æ¦tûpùÄ+âÄ]ˆtì aÐ5u,2 1t)Ù^ÛdŸl›zþöÃÕŒ›¤Ù$w:l‹:§‘ÄÆ¹²fØ‘é›$‹Å{Õv( t…ŠbÚ)™_½u-‚ßPïH£ª¦5´ë+èÞúdÍ«®¹¼GÙ™$êþ-øGNßòÃõÊ…E¤ÿ…aÄÔý|UÍù}QµSgþ®5È,Ÿj<›GKz •r… ¦LŠ–øK¬®íy Y ­…Ç[@üŽÉÝ4õš¡Å†!EsMÁ»)|S)í #YHIi%–ùÆ Ÿ ?'‰£H2õHŸÆL@™Ü—­K’Ç‚yýVanyÄ-A†²o«‡`bhÆY,ô@Ô®·.6u×gK†-Ií.nò±kP9Kº¹gà5:i8<¥Ã‰G¡¾Õ7±I´þÿ´ú—ä…§³ó«Ù‘fÚzÈ2½¾Îc`7ÅÓüL¯/R%¿Ûì§i–?Ùìkjö3jöµoöuJ®Yt…ƒŽ4û§º·RhðiˆãôŒHÇÓÎGy¤ÇQžó¹`qJGÛçX|~VêÇ€ÈõŸßUËr=–X•Ž•UOÖœmßê¿¡ÅKš¿¡‡c„²l^ûA§M}U˜B/”(’RGCYV0r#êa8ʼnð5wüäýsV»w h§mb¤&JÏ¢¯½ ŸÓPŸjBÝ ‘äg%eãÔ¦aBÓ¾¡ÅKš?“+{Æ» ,Nl¤õpŒ$ e‰˜ƒÆ|X¸@å’áÂ%MT%Cxw¹X%"Ö›ÊûQÞTaÊÁ‘²{š¶ gDEÈÓ¸HÕ˜¾ Ù º ¬´˜]Ô럊ÕÀJÂu¼Ú¶’«H+GÆD޵cÄŠž_ÐÍp% ”)@Æ|φ€AßùxŒ¥ck_1¼?ÿíR¾=Ò8Ù$N-÷ O^–›’<_Hñ½f¡¬ñc•¿Åe&ã4C¿Ùå\‚ýaê5Sa7@QT‘ÓE>ÀtÉnaÑÛ-)Ÿ35ˆ::/¨ôq•ÊÊÏ¢±3¢¨G¥Es÷Ûk#CÛ¬]o«°ÀÅ×–ØØRÑCæNmZHŒÝwé“]ÄÕ¼€*[‡³‰’Ñ›_ýW˜\Á•;‡´£ï,ÝX}¡Tœ¥êQAiÝ<Êjy‹2ëJ—@Ñ9»—« 6^Ô¯uîÆ¼%ÒÔ˦X· p&—÷Å…yࢗ¾´‚š5®ð¦ÞøÀ›"¦o[6{µÂQDýÖ¶þ¢C;Ú/§ƒ<ìv°¤Ú³­ðüïâÏWŒðæ¶^•í—{ü$B '3(Ú:¡qÌÒèÓ%^ÈÒG¸K¼á'\#¹ðŸ.üÃGÇâÇ÷ßøþ•®ßÛ‹¿h¨,Zo h‚ÿÛ‘Kð­/Úî Ømý'#µÎРE½æ’µY2Ô,»¹ëd_]ºsÁËûD&lÔ„†uoûTÉ„æ!ÕþâÑ7†¸\oýÔ¶ñ¾0؉HyõÞÏLÖ%†W^÷*7ì#}½¿œ'˜¨ñV×µTh!‰ŒÚû5V†`ˆKoí=…-Mqmí§‡Øòï¶¡kñÖë²où=Mº9ä5P! y0äeŸZÛûÛnE¶1vÙ¯¶·Úù>3y½f«©™õ%õÆõc£‚>àC[éfýôÌ$&D!^á[¸h…Ë•û-(ËÕ`V®÷ŽPµö"7‰âòá¡)uì6'ãö…É=[å~W…¶ôÄnyŠM‡je#fe m³0ò³‘Ï)Æ@À­UË(”Ð °ŸÐõpTŒ¯7H·áMìõ~|½lÇè}ìwgŸ9ž·A@_I…»vu?fm*G²Lè»] „47`Ù–ß]Щ\«ëöÀ¯<:ÞM¡DÁ||8*xºî´Ú5íŽæt*§|hz–üD î (ä!”xk×\áÈ[Øl4(Œ–[1´ô¾‰p82¨ÆG•?=¨V”: Wõ´½uó-+'ý~˜îÆ®®Z;"p•ÇP åÏ)­‡\ÒPߟÇêÆ:>:LˆöfænЉP?Ä8˜Q³ ³$‡}OÐu>œ OužDg+ê—²Pø„o8€>(à†`ÁÑÞËê *“t›YÕ0ÇølªŒô£7½´aoÆs¼uSmíúûìÊ2ÏûÒœã2n+>GæPXÝrÞázƒ“ž¥Ð2ÉÊíÅÚï.‚ωè✡üÿððö?_{àÇËs@¡ëmûãw¦E𳋘ßüG³‘k­ëf¿àpÊÁ†Æÿö¶¢¾mQÏ(»õþ——Rñ‡²ø6e®ý—B 7½³uá[ÖðóÝàs–o;jÿy£ÿ†¡8;JÿübÀªUKp)º¹Hpñ—óÙ/f–dÀendstream endobj 70 0 obj << /Type /Page /Contents 71 0 R /Resources 69 0 R /MediaBox [0 0 612 792] /Parent 56 0 R >> endobj 69 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F32 28 0 R /F26 65 0 R /F28 40 0 R >> /ProcSet [ /PDF /Text ] >> endobj 74 0 obj << /Length 2555 /Filter /FlateDecode >> stream xÚµZYsÛF~÷¯Ð#XeŒ NWjke[Nœ•EäºRÙìH€"Ê À ŽüúícR¤¢}±s|ÓÓÓýMwSïgoÎ?…g¶%"+²ÏfË3_Š@úöYà9Âqmïl–üǸÙÓµm£™ØÆ*Ŷc4i=1¡+ƒ¾â–ÇSìy˜¨ÆbÛÄsøÈ髿)¥†Ò8‰ž^g•n&$ù–§ LÀq †jÒÛ1 _x!I “7c8¡ðe;kÇtaRäGÁPýI2MºBúq2ž”Ý oÉ€’æ•€6óqu¹–°"Ë;©y-¤¼~™ìÀüYí…‰N€YUÉËÌi§9€sÌü•ÄÉŠ»‰³×Iêtó2Á@òÈ–'ÙPâú%t” +Î9rCz)â&åÞMÅQ[¦—bPÙnU.yV³âhûê˜äTëu4¨ÑÊÛŠâ;µ^í]jÜþ@D«¢uÐ~¤QÀ‹×Ô ,êK˘a[¡Éȇ£¬U …ÅuMo´Ü6Ÿo$-·³š§P›d‹¸ÑQ=ŽRh½†«+­'…˜Óò'è=Š‘ÝñüÖðàx¿¦ \*ð +Á*’¨˜8‘Ñ`ú€3’aŠT-hJ^€vŒ?¶˜ª} ß ÐÊ}…Âõ,MæKÊ"p­À‡7Gõ©[O+ô”Ð3î3˜FCu64cì+T¼¯§­Ûº‡V’jóçoÇ>ÐÍÀ¸Ð9A† þã†Ï­-Ä…w›ÞYœÑ¹CÅþ¼Ž'0ùQ9w^—ÜZ`ïª$¤²VÆÍ[€ Ü¬ÕU°Ðòœ¨LÈé’á³Q uÃNWjAZU”" uGö™m‹ÈóÔ=ØÆO é ŽÀÝBøÇ†Œé†ÒªB+Li«McÙ7:¹X |åD°…%¼ è™ð¢îB˸üíâ |]_]òÊŸ)‚ ]T˜_âGè.ÒGšºŠñënÂd„`1 TäÅ “ár ‚ØŠ#J|w…qÕú~:ª-«,jžD ˜|ÏÌöœ}›©Ë\KÅ‘XÌÿï¦d¢Ð®k6ê5Ïɺ¸½€»n³]+l ØÆT ùêâzºÆ.üð/^YQv ©rÊßׄïK\ôMp×€Ï6ÒÁ!N ÛTCö äg;!**Ü®P†Æ4ë,Aç@æ ,±Jce“˪\+sæ9È-T !_òÀ—&­7öËœKÒa‚Áû*k}SÅÐbþéŸÁQúŸQñ&Û¡~ù6™*ÍH;ÒÇîÆ[Ó îÆnö˜›RÝCõêòÄn©Çþ°<+¦£ÕÛu{ËvÔšÍbo‡Fž!BS(ëíx€­LŸ¦ÜáäCµUB5i&GRŠœ£9dy²Ú®â´ˆÖ¢b€1îŽXE“ú:þÀpŠÖÏS¶ˆb½¢¬Fa%Fº~u$òá|žŽ«³%Ÿ¥c Ùæ:tœL[MÏä.¡ œÄ„cšb"WÇTŽÇvísZ±qJã¾}‘rÍøO­‡¶VûÒ µU¯FºSØÒþÔ ¬éúfäÛ³Ï_äï¾ÀïXGÎ@I&Pr]¦c ÇeöG Fô©›*ó¬Éhqû¼c—m_×î@½R v'|ëÖÜÑ»ùCîVGe×mÈ[…e…þ Ò'úm>õygÃ$tgóÞÿßgX¼î ,JôÈžŽqyÿ½;FvïÙÿz]ÙŸ*QrXB]êBˆÇò íÈàÇ=o°»àصr£^eüâÚ¼å Ë)ØÑ>bµšP쬘!¯LÙKq éŠú=j1}/ä`Rtïò`ý»ôí¯ÝHÏžíiZý0ýpqùÛ—‹}eŽÀ…X\†/¨¸1òÍOÓƒÈÁ±¥˜G z"Ü«IGh{ <”âFGC½îA§ÿ ¦‹?oE¡;¼àߟ4Īƒvô}% ð/ÛÑ“~?IXå«¶ç‹ÐŽöÅpgn'],¨_Èt‡ÄY¾&–[^ôåÆ>¿ÕϬÎ÷8À¢ˆÒ³–š­pÙ.nŸü{LÖ%MäÀVE9–æ ÙBÒ s1 ¢ÌB“PrrDEºøÃxÕÝ [Û,¶ë9¦¨‰%~…+i]§õضbT-ÿÇxÒß¿`€êF®b'W€(þ áÚB¶®·ySóÇ}Fñ“eu1*uë` 2ŠXaBVŒ e H]ƒ N¹Úgv¤ô ÀîÞÝ`¼îg£u†Þ9}«±0eÜYá\\ï$ŸÓç¶øjkÊzWþ¥8ÖAÁg¤°á¬g~Ñž ö-T@¨a~`,"U°˜M"ð~Êà¶óœ2™zE‰RýoÒïñ#aŠÎ޵ ,Cä2Î(ý¬è·’H¥´ØH‹E ^‰ioZQõ¦'È?•Sh~¡ÞP‚§ë=8\ª}ˆ¬°ƒB‹ºì/æ”G©¨‰³ôßô6àjqÔ‚ƒéËrŒM+Äuz |Rd¤„}©KX:§nc³OÈñ¸êðkg%ÔÀ(§ªâ¦Ÿ÷•׉ނ¦s€=¬ù¬ã…ï§áÙªi6ïÎÏïïïE‘6‹²º=¾ÉãM¼ø~¾Œÿ«fërE?V}nmJâ·ËŸp×Þp‘JK­ª¸éÒÇR? JÒ(ÊVÍ]‰£Íã¥ÛéH™Cܵþc ì\¬Š}ÙãE£º‡©ýSÞ"/7]Â_)‘´ã¡þVYÿ\ÔbÛ|i2ö'æÎc¶“!WéÎJ¥¼õ-éê(žÎ¬ 6T4Éæ½—XÕ†wRûî}¾£ g«pÚî<»Z,¿õ]!¯ê§tÛ¶ðWå¥H ߊg$ÂkÓÿæröæÿsâÎendstream endobj 73 0 obj << /Type /Page /Contents 74 0 R /Resources 72 0 R /MediaBox [0 0 612 792] /Parent 56 0 R >> endobj 72 0 obj << /Font << /F8 9 0 R /F32 28 0 R /F31 25 0 R /F28 40 0 R /F26 65 0 R >> /ProcSet [ /PDF /Text ] >> endobj 77 0 obj << /Length 3402 /Filter /FlateDecode >> stream xÚÍZKsÛF¾ëWð¦LÀ ^[•ƒì(¶IÖJÜl6D‚$j‚À(rþüök ÊI%[ÌÁ<{zz¾þºGoæg¯¿VþÄ×®ÒQ0™¯&:rã4&q¸öÃÉ|ù££¦3ßBç~ºÁÿóéLÅ¡s>õ(N½ï°*rºMÎ…ûE­Wç·ÓTCGèóö[5Ÿ¦ÊÉÛŽ¿î÷ÎØåÓŸçß¼þ:™øž›z©â(å&±šÌ0 ’fŽ AàT8,+¶øEãá'Ŷ]k×ë&«Z¬PÎ åÆ–xåÜCyA³ ˜ Š$$&ölê=´wÅ6—92œT†–5ü&Î"ëò%wgA` õ¢Í¨àp7¡ë'þÄ£}Ü¿=‡UÏß~ûz~q?¿¼y÷úêòfD3¥®êÉÌWn¨SÖB¶]Ž-x®¯üää—ïF–ðƒÀM‚H˰vgÄβ ƒ–ºº)H*„µ¡w­4è$çÊEV–ù¨L>l;LŒL;êsv1?CExÔ»:‰ÃIL†èù“EuöñìÇŸ½ÉòÌ›|sæ¹*MÂÉ#|ÀöRÐ@u¦\­P`þ.ÏîÏþi'Ô®¨Šrž™Ò¾ù>”†¢½¡ksx²:t==ÜnuÙ¿ä»ѸN\«Xtò“zcšÓps*»öXqÚKÝÄ ÕßOqL5 ’ô³ŠÓ~ìª( þ âŒ1á­†>¸`â¦*=º$wçW¯Fobä&a”Í.Ž5›†ƒ HåyPðÒ¿bÓô ÷{6”lD¯¾§1kl¿Ïé5hJã^gÊ,ÄËÿ®ûõíÕü|ÿ ëÓSðƒâ¥¡Që§cµĸQ¢Ã¿Ÿ^ƒ(«”øŸUl£cÔú*6ù¬b¿ð#€õëkçq“#°¢ªÑ¡_üØwCrÓH {2¾¼­qð{õxzž` ‚2Gôx¨áQ#ψI9ø­SÇzÊlAå%¶%À6‚Ôy³=8˜Tk%Š.÷c~Îwu¨  Ž"Câ‚W3=>6b+IÙÍ#ˆí, ¹Ø:VÜhKd‹ËõÖÎâ$uÐR²Å|óFZVükX ª¯ËilÇ-¨ÃÆ0!l^M™ÄÑ0“ Y# ¤,"XõƒÔ =ª%Fæœ`f\‰O&å Š›šA]‡Ë<•9Wá6ð·Ø¢¸ûÎ¥åý0=:Zœé_¤á*á¤ÀÃ`¢GZ>·knlsâu]Q£-W.Y1‹¦`~#ufÆ%t-~ò|E›jò-N„2ê%ã/¸ÆUÖÉ´ˆ…Tk?äœ#opZmÙ&žHÖ©j ®s¬´¬¶;¶ï%ʘ[Ó m±.œ;H‰_²fiÏ{´<é¥|’ñÖbð Õu™|pS±¶‡½ÄÐZͨîWÓØ,Š67ÀTâ„ÈY0[^æ°ã0öœKYªÝÔûr*s†Î͇9iÀ;ܺ=-•âA 0ÛG Ø59Qp•2W©÷­‹+±œáy’Ö–gúp Ãç—×çWÜqg–‰ù’TÙv!Ëþ2 #'+÷C“‡1ŒçÇK’9C%ŸKJ¡ù–µ ZˆÏùCa0§¶Ì! Óñ`®•ˆ‡#2&ÝŒ òI¢„Ißþ€ú–à•òß¶ø„M‘çܼá.x*hKú@ex›K²áäG‹/áhš vmg"ì÷¡g~u~Ï8`P¤ÞÐTY‰WEc€FíkÞ€X•ÀJ—5ŒIk®Øq1ž°˜Z´„ªP d„]±+Ådqi$?âß߸:w×î+Ó"HA'Ržóù:kL"Áj¼€ÕX dãÔ¬k(Ø”2¡¶FœeËÕÆBQYÖ¿fMÖ™q|ë¡„ÈQæ­;vTsã3ÈâÑPH…Có„ö˜q Œ Ÿò#—%ø€>‰ðÌ@6Ž\7¬¦-b¡ŸB×~À ¨Ìd¡!¬D8º@Q+.Xe¿É_µ/€îã¦X`¯ ²²X4ì#¡† š ~à—S wç×è® Ü±pÇCÁL¼*ñ/-×›Á|¾PQe¶ª‡gøÚZ^ •â©´&1ã½þØìÞØÎ?¨g,Ê×Ȉæ(`hØÿ– ´_47ð ¶ú•õ«|]kÊ„$À]¿ÃkmŽñˆÄFÊUäO tý.«= —Qž`ŸfÊÙpÎçä3JÝØ‡à;„3WQÀI®ù‡9&¹¼4‚sÑë.+ùF\çUÝÈåÉP`¡!˜1Ô‡n¬ìE5§†wt=¼ai%ðèߨñ\ÞÌßÝÿ:ôàò\åÛu·1ô¹É¹øÖXÕM/Œ\çKl¸™ãöÞ]Üq`i,¸ `¤L#:zL4Œ¨1kÒù °€¿Bº;sCÈ[Ñr+‘0ߣd\¨¯Þ\]X¡’ä/ê«ÿ‚Y¹|{wñö¸4AÛ8’îƒR£å$‡2rJãííÕ÷µŒÏbÈR¥KÅšûa Uš¦ÿ©¾`:"XŒ‚Aëˆd`­÷=ïQg`‘×òÌ‚=6ùšö%?4 .îöh/=ñ´!7мï)íÁ­eoºiG€’ rÆÇú$ ƒ1zÂÐãtwÀv—\UîZ=qÝ‚ð¥ÃéˆÓ,‹ù ¬°ž3ô% þL/Ø”UõÞ&ÌGŽ—æOp†ß0ÿi(§?¤½±¢½RnŸ,ÎñÄ}—LºÀ—^ôhI<8Œ|ç?µ{ì‹Y}Þrõ£Q”÷­àŒQ>c˜_¯Àg½‚"ýPôj“žê-ÓÓ #c‰ö8 ÂöŒ ¸b™’>ð›`Pjz“©+¡2Ø`·o.fa˜“9\í± ˆ#ÿ íáAYiÒ`9Ùƒ([Ø»£¡î©&´[ ]\s™/ŸÐjfÍGXÛýˆÓôo)xçi³±s^–" ¶6xn~ž*vïÝ컥„ÆBìJH:*¹iýäÔ°ºž5k/›¨[€.6±™cê+éÒ-®Õåƒ)[€¾ß ¼W½+b$<²YËKfH#‹iYŒÞIF§ÿaÄ*GÓî˜9^CɃÌd E7„á§b°zQW»=Ÿ–Ÿc‡ÊÖÀ¾ÔnQ%¯É¢ • .ò‰“!ÜGϰ´½¼Û~a: Ú‚.¨b”T‘³2Ø~sNNrUG¨ãÙdúŠ1 «xÇØ‰ÑÚ¤k°­>J™ŠèØyƒ‰}F’¾Æõ0uÓÈSBI‹SuÙù!¦XYìáp¨0zÿˆ}~ŸˆR'À5¶”™Oè¯*ûòÐ#BµÕ$ì¯v‚î½5!fêà ñ¨óðÔñ³ËwÜ)ÿ•ïª=Ζ{ SY–£À¦ÉwØuþþŽB¹÷'²‰Ì÷%qZ¬LŠ…~„I Ç™ü>kf#·ÚĬ|úb;Ò%Ûw5ôÿB–õ¶[Ÿý¦o¾O˜˜7ïøƒBSØ4ß¿çª/ùÇs½àúßë%©AZo˜1jåUÁ„×ÐÚì·Ìî‘ù `>IP׳¦.“Z d`ð»nòŒ8JßcËl)p›>îÙz$ËoúåæeãùùŸ†ïá³Þ>J>LÈ—ÅöyžêøÕ‘Eâùš»õ/D&‹^°™{eëdSÚ§2“ðîÃÒ“‡‡ï`@›s‘$Ī¢â4Ÿ7"­§ìËùС€žd[öÊèȈ­†‹üú›[•Ú¨‰¡ko³²,¨á ¹È“½˜·Ïeyø8S™™B„Yb> endobj 75 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 80 0 obj << /Length 1982 /Filter /FlateDecode >> stream xÚí]oÛ6ð½¿"ØK$ VEQÔÇ€ H²dëÚµAìÖ>(޳•%C’Û¦¿~÷AÚ’K·éÖ§¡/6EïŽ÷}w6{ôø2;a‡¹8š-ޤ2G©Š‚(êhvû·wºè}á•þDx­?‘RzEUñ¢Yð¿d)c¯/;í;>Z~”yïðÇÜø“(e|eÍ@óf…ŸkÀRÑÁ–â-,Nø^·Y­ŠöžoTº¶ †PÑüˆ¥…c 8@sß,›v忞ýþøRFCqLDä2QG“( ¤Èr’Ç¥®u·,¥Uè‰,ô'*Røð¾;áõ{ $xwYòbÑTUó^×wüٖݦ껑î‘A¬âÊ@ÅyDtƈy ‚ZWeÄñ³¨Íb]tÝÌúvSTæÆ²œ¿í‚ÃdNdQèêËDÂ/¡ïÞêõÚâ¹)çŦ3"Bõà¿®ªòÎâÖõzÓóò]QmJCU’‡Y‹4ó,&jÏaЗü?»˜Î¦Q®@ äJE¨\à3Ê‘ª”nÊíDÀÀk~.=`ÙÚW vÒXHá9¸FÍâÇt^<;+ºòóØ;õeäãñS>}ö’ï·¸×l覮ÁIˆ§‘ûíÌ- T'ÄØŒÈHƒhà`koîÚbÅßwdïõÎc…Wô;?>PŸÀÉŠ±"dSw÷+òÀ²oõÜ’¡K† =¹Ö|‡Ž,6— õÎW‰GJâC]\ìo¤·í#‡V²ç®­A‘B¦Þßq"^ƒgE°9'F*Œgìa¸˜Â&?;½ò3Ô =ʧ( ¿†?”>_møc@AWû¦e¢dõ¸èšÊF/¦ÆLâšôpßõ%IÇ0¸½8j›ÞR,\>2 Òb=nL´]sà ùþÈJÐ ‰5òZV„FpòE/ÛÖÞœ#RpMâí[ùþ·ôPŸp{q?Œ…ÑyYÐÍ¥‰kŽÛí–îÝYC܆uõb*8¹ãµó¦mK6ªñÞϤΗÇÂ;oÐo5i ‰ÄyîYs·ÙY¡¶§Ýà5Þêf¯í+ñ°-_…"®Ë•q²78Ë@¯4NÉ8[sG×ójs;H"N ¦E[ºÞ׬ ƒ˜&ñ},YãNÛ¬fB²Ë¦F–$$lo-› ŸDvbÔ=RR_n¶èI³ø@w6‰BúUÊ{c4°— ­yŒdCïhÌ{J»wã'‘~~äxÚâXaŒVA”d’^ïI]ÁÏ,CN²è¿—ÜgŠž÷ÁÐL†x²,ylМº‰ ‘%ô“ €(¡,^–ä!$}/´¾Zß´Ð’r\hE1„l‰‰# Ò4̸Nìv*ôžð”PBc|©+Š­üµ î3?W8KÔä`w¼Ï±ºMãÁé)^ùy‚£<±ƒ'y€§/yyM“¤á Í5Ï“2ÈR9çÖ¶×ÁÐÓôÍY©“G4èåÅhŽñÊέ s™Â;,o`ä¨L0]¸úåáÐPzk†í††z—UlÉý>ä~#]2sûjw8‡æÛW2Ã1ÍH¯Nq‡cYœeòH™f[M&Ý›÷.tU»F,Ç\=aU1_êº Ü`Õ&Eû“,CœþPóª.Vf…þ©vè}¿­]ö°&ðt¨þ•w[¾£Y(®±6²°õfuƒ9i8憈}S•+;×y(ü efèJIxI¾„±=wÝ•Ÿ£ÿüì¸;À4KCÕŸäyz€æ™s€bÓ|=ËRÎ B"裋٣Xàjendstream endobj 79 0 obj << /Type /Page /Contents 80 0 R /Resources 78 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 78 0 obj << /Font << /F8 9 0 R /F32 28 0 R /F31 25 0 R /F28 40 0 R /F26 65 0 R /F40 83 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 87 0 obj << /Length 1707 /Filter /FlateDecode >> stream xÚ•ÉnÛFôž¯z¨) b8C—9Xv’¦vÇVÒI´4²™J¤AR^òõ}%ÑÉÉi¶7oŸ·ÌxòâÕÛPTàgA¦“ù Šý$ËâAb´¯#e“ÙOGJ‰ñÊÕòÒÖÃ‘ÑÆ«æ<~8ÿóâ ~›ü5#ú&Ê4]S `à¶ `r›/V¶yzÛu7ìH~p‘?G3\“βdåñÒf雺šÚ¦©dûª.f‚÷k`‚ªžÙÚÎ4/êG4Ïxø‡pEí"âAó ›ép”¤;D:ÛƒL÷p¦kœ;‘}t"óƒÎšíum›ëj1sÁM:¢V&‚Xtö–ĦéÄ6-ÏŽªræóôMÓÊ,/E•ï[[ËÞ¹w³jÕ¥%77V¥üÌn ìèÌWI±7ÁEn¬ê0 =¢¬<:L"o^á¢ÆH†ókË‹‹i~z8T Û,ò‡¡öŽðø„OÇpB\âb=!up³"¿Â±*óÆÞ)\ùÄp5¬ˆÞF’tû=ªØÏÂØ€8o²(fí2swú‰Ç–¤ÁÙM è««:_òúÊ–¶Î[Kr¢„Ì/À@Y•ÍÃri‘‡‹)\æ%¬fk«8ó–y[S»Ö_àÝí5ŸÝMì±;¡_ŒÖl»GQGQ‚ ‹ó ÷4ìØZ0ðî!!õR}ƒùKذi¾X4 ±ƒŒu6LÑR:ñŽN˜L]­†…ïT<ÎQS–L‘ƒân'8Λjq‹|Ù§ÄH#Mk—8!Æ’î®ãIL+‚»ó©Sg &£ÚPÒ™+½Bžpù˜ÓâGÞUÉ×k6E7V¨9¼aëZdÂÕõ!Ì+\¥Þ|‚ºÀ‘‰×Rz#¼·‚/D²ºá·Ø¸Î!!¦‹Í«z‰!2 Cªp—E˜¶‹´žOxœ®s!èèjñ5PÑP1\yx?Dc ú ˸’GFì¿|ÏEÃÛÍê"‹ñ¾Ûµ¢‰+aE£¨¯Ì«R­Ì]¶¿êˆ‘IÚ?øMëÞ£Á[N`k¤¯ÍDI\@¦õœHµáæcRD÷pM@öǃ ª…ž÷ %ϯ$  "2ϧí–cá>›†£“ˆf0‚æŽÇâ½ó· ˆaþìÞ“nÈ;<‚ '‚¡Ë‚O´ÔÍGZûQ˜êŸWÓE…VÙ<§M¤ÑY²y¸8.…t¼-…â'Ö‰`;ü×AP+_±@¼v!td”éX»˜Ö~w«¦S2òxv<sNÎ/x•Œ™èþÝx½Çï;ø‰À“J)&! å%âç¶¼¡hh%>kxeQ”ö-´íãôÄ™û(è© êL²ÖÏßÀ­‹÷ÇQ£ÐÒ({¤¯t·¾¾wé4…­µÞÙ¥¥| ‚Ài™ž³Œ]XR?Õ*ûy^^;d6~d¢xÍ‹qòA™ Šd ÒÅJ fIA+nqTº-¤åx2pƒ,Ì:Žœ*Îü@~ž¥ßœÊ ÒDm”#! {!@ëȲëŸÔO’$“rNSA§° ƒ(ó¾¼Á"*„wþCž-,oÌ%ÅÀ"ÁÂ,ÆÊ ºFÑA ¥€€tÇ¥Þ o¹´ƒ™L•ÔxÍuT@ŠªwÎ5Þˆ^[˜úÔ£·ÆÜ8Ë¿QäGq úåßaÉ9†â|YµPÞIŠ¢ˆ†Á5_§‰…¼b,ëå'=¼Ëy„70•2pˆ9úi®Ûªá¤WhÂiÁ©‚ò(ÕÏœSïŽuM—.9w/6¾˜½×„¬Ÿ&a??°mN^rýÿÙÖ e*êd|#ÛP·Îºl%_.mGC¥š49{ N矲ÔÁÙç¿}÷ɰnÌo«¶‡ˆge¾´›¦°˜wÝÌî†/Æ©¡#˜Ù[*¬©_Ä.⢮5BÏA@†në¼lnªÆö›-pBbü4ÙÇ”ôzåMg¨â4œ½>t¿— ¬wI¡Åë,øôºù&Ë-=g(k¥»Î‚4ï:ì¶¢:H`O+`ÈÙÄïÕ¦Kt»bÖ^7Ï7ËÂI/ØU§;þ3ÆÿœîQ‰ê ‘àÛŸ`ÑäÆû‰óµËjŸÀÆîŸ’‘r™Ÿ>?â]Úo¼º»>ï<-_µÖØ>òMhå–EIÕ䞎=Œó'ÏšârÕ}\Š“«]xÓï¯|9=£ÅGÜWeUÛÙ³’íþzÚ…êI¡ ¡ úÑHSIÐöè‹7“ÿ—ÊÊendstream endobj 86 0 obj << /Type /Page /Contents 87 0 R /Resources 85 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 85 0 obj << /Font << /F32 28 0 R /F31 25 0 R /F8 9 0 R /F28 40 0 R /F26 65 0 R /F40 83 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 90 0 obj << /Length 2027 /Filter /FlateDecode >> stream xÚíÙnÛFð=_!äÅ`±Ü‹G°Ýqë:†­š>0m1¥Hƒ¤’8_ß9v)Q^».ê§¢/ÒsïÌììðxþê›S%'" ³(“ùÍDÇa’eñ$12”Z˜É|ù{ §3aõfý¡h§3#MÐÜðÿ]Û,Š®ãÉm[.»éó'Ñd&Tht&‰„à}ÉŠÿ4ÿþKø/å?aWelaív HJ€,ŸòjStcY^_x¼Üð¤©6ëº{í“Q…**¬_µE·jª%ÂÄDˆ03F¢MfadâÄ¢ª$`Χ™ Š®Ç5à©Ü48iq”q¼*xr½ÈϦ"¸œf:8š*œàöO¼{~ŽËsž´¸Ñl½¬‹ŽKwÏNÄa¦bÒE¡É4K7G^:ÒÁÉ 5UA‚ýy‹*èÑ‚…ÂÙ@´Ím›¯y~KÀ5¬¬ò¾°š!MZ©—HxÍ+Ýý÷×Eß– ǃ0,ÚÎ{Ä,»´>—ý G2ø45&È+€ ÆÍ²¦› *îž[¿b U–  Le Db$B«¿À y‰C‰?x5GÍ–<î§x*5OÖp`@õ*ßGB£´r¦è,»‘68@üƒDÉBÁü¶©I¹ê~»Ú¡FuÎ0:Gw@( :®ræsß¾ü4µ¼T<⠆宜k{´2 úŽ1€ oìlÑy_Ö·ŒLlïy½EF“Ñ!#øH<Ü„Ë/ ´"Z»ì­x넽âGœõƒVÈš™/òªd,|ªS]NS "à:‘JÀŸ7@4Ž×S> Ø¿É}Óò]™]Smm™Wt÷{óúŒŸ FІ‚Šv-Ö˜»Ý7Ž) à=Ñò+AV± Z6V‰»ÁCÅÅ!.[$‡+‹)œÃª ¿?y U•± Xˆ¶¼™âYÒÙ:ïyRäŒÌXÍ»n 2 ‡Ö DlZöŠ¥ÓIVиw‡óÁ³–Ò0¡t¤=ÊÛÌT®­ dGkn…~dåÚZÒÑàg|ÜL)‡Rœñ!é o-6­.ªÍÒá[^Cš€qsg%—d+"f?øVoÑ©Q'o¾ºæ`a»P‚xš¸¼ªànÏg=],Ȓ‿X{Xeº $Èž­Ñ÷Ta§WÛŒ©ðv"Udó™ÂäÖ1c•è¾åûFŽ.œÜ3 ,ͤš´}ßåd‡ŒÂÅ e%‚{/å{ÅCceÈʯœrqÈn¡ gmåÖp™‚Y.ßÁïA¯N½¢»ñLÊP¤Ú<_öë¦Úó;+¹²i¼`myÆ»dRj£cKæè7')B9ˆï|D 1P£XcŸ´ؤÊB÷7abgÃ!ÖꉙDC'ãìvúT Bäjc^4!3H2IÂ,‰œ¥S𠏓IS¶ß™"™‚;pÏ‹ÛÇ=ýÏÆí‹Eäð4AïÕãK^‘2†WÄWôQѨƒe»[…Ñ3—·ö÷Ò,Ìâĉ~ wÑõ¯¾‹PGa”EfW<âoK§ö†Šµ-èqgÑÐC´t³v%ÙPAâÝr8xXA"‘–plIVD¿p2‡ÏvÒPÚeöŸ¢ø†šÚg=\ÅN1ÌÆkÆ/iWBºÈ¦rºª$õÙ>Áà~^nÕü²Ý-Ò¼”Ÿ*VÉKh­++%· H«…“‰Õø´A†Ü¬ñ×¾‹[Þ¸^mXš\üÀl¬í»Øš\fïNÞ]„/\×ý‚zibçƒ_ÀÒ ñc®57ºüåìæ¼»:ÅŒo½¶è†”ÏZ³ò.W–ã˜o*~¼’1Ñõ…õ`cvNa﬈ãjGòÇ® ©ÿ¯Ôþ¯Ô^¶RSj\©Aæ‰2…MÉ4ŒMê:­Ü4Ø45QpVßagSÁ»÷´¬(²xvC]̧™Á–F½êoys<±»G¸y9Íbl§ È)ÜÜ=Ç9ϯ¨0ãNÀCõ” ÓD{©GõNG nzHáöÍDqˆ±Ÿ¡XÙd‰mh‹ÄOuÄåXåL)•M±¬ïÞ }Û¤Å;jbp“Öµ-ÜëŸzËü˜‡«ch¡)J·pmVÛÐw;Ï­é¾P¤õw׎N~âúùùœܹšT¸TòAÒnY¾ÆÎÁÏ—g ²Î«².B?XUa»#„c:@7‰°¥¿éGôyTçk;Â-mÛ?¯ï‡ªftŒá#,‹OÔ„6LÛ+ì#ß>>TÅÚ~[èʯ…÷㇄ÌÁ_: ~&PøÀ÷ùâÂûUâ).ŽºG¤ÞùÎb€„É#Lÿ±ÚWo¯žúÌcÜw/CÀ~RÑ ¯¢ÇóyºZÞÚ.>4}ØIS¨O‰°AÐWoæ¯þŽÂendstream endobj 89 0 obj << /Type /Page /Contents 90 0 R /Resources 88 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 88 0 obj << /Font << /F32 28 0 R /F31 25 0 R /F8 9 0 R /F28 40 0 R /F26 65 0 R /F40 83 0 R /F27 93 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 96 0 obj << /Length 1694 /Filter /FlateDecode >> stream xÚÙrÛ6ð=_¡ÉCMÍD,qðêL,7™¦q3Š¥3Ih‘’P„‡¤|ä뻋dʆ·/â{`/,v5]¼øù­à#…y”³Ñb5’I˜æy2JcrÉâÑ¢üÄã KxvÛ˪ObzEß«V/«®Óv{ݪ²#ðsGº-«¶*-i¡Úî{FŸŸèóXØøËâ÷Q4š0Æ2çFFxINŸŒ>€c<í®‹zW=’\?ÊöBKûè•FÀsÀ÷›¶ê6º.}t çEôË ¶^{k 7à¢êz‚ÎtS†¾éz u滾jíÞEµrÞõªµá|-1æqÌ1´ ÏCÎxfÔÀÂÐxÆðÔq.s2 Àh‘Ê`¥qÑâ,CxSÑb¾,ÎOÇ œ›Ëàt,xp†è÷„Æh‰‹=`hÒ•ªXãW7EÈ$8–s$XqkÐ;sè½9Ù0QYæ"‰Á¦(Œs™‹QCžgÁƒÇ‚o úŽ6ûÊX×Óê ­^·Å–Ökƒm`·"»Á ËAmÈR[z#ýv¶l«¾UK'y³@wªW€¾è•¥“þ9bàFõVáKN Ì ž§ˆD51é4Ù›<̪bMnäq240NƒmüÀ¾ÄMt³àFõ"¾ÃÍq$W }û1Úîqô[k‰i÷PLP({ž ‰$Y ilœ• d(êzL®- ’›dš3™‡½§mô¿If'¬óÝ©^£¯£`U,{ã5€÷žØI×Ã0d ޝè»®¯L _¡×#JYÄ.õöj×ߟox Ë©ë¡ÄvO¥ÊfNMdûTjvKäÚ˜}~£mgˆJ\jugÑ“Ž¡ ò9Ec¦Ã4VB!„6EGåhÕ®ÐÍíÖЖ„Zêvoβ¯ïÆ™„ã9ÞœÛÓL”•Mlr€_ kQt…Z§ µYõ¦Þ!÷©zKÕÑv·»„²_Iø>VQô™ùÚlB–•6y¦ñ˜“ƒïª%øE‘×áÁŸú¤daÆYþ|]^{lއ¹aÒÙ#EB»Ê¥%j|ª$P³²ÿà¿9,šorrDÔÈ\äN#¯‹ó0’ç«ôÒëœ(KÙ½sì•â Ñ8—a” l— "L ÛJÓbÍâ(x×à{7‘تڴ ¸J ª¶´ ]\‚m¢éã°êHÓõZ2û” 4… šBÓ"fj. B’ ¿Q®A$=Ø]Å5Ôüˆ,„;Ǽþ¤†·?œÈP&;ìOwu±ùÒ=4xö-1¯Þòb_Êk{}±ù·Lô:!/ÕzW)˜¬m™[¹’1,=µ¢–üÊ´6Ôx"VQ_d<ÓeÓû½ší¡é’ÙúÚ@Œo@f©8|ÃN(.§gï_Ñ”Íféf²úëÂN!©ÏvæQ” ®T]ø:“Ù_„~Ì ÎD)$4q‚jŠmu?%ª•nîŽM€‰›žÊêZ--/޾ÃÏOÜDVV+0¨ÏõÍ~xµß?¯®`Ë#"uÇ5þ‘÷²ÆžÑ8M}¯º'¦ÌØŽ—©:íÔ*ìšq7B$"м“ç‡'}âSƒ|£Ê~Ó=ެn›À~.c¿FÓ¿È=®Ò‡é‰W™‰=Ohƒÿ:ðä˜?¦÷ÙãØW.ªÅ®×ÐRâ<›€ f«Ó3=ñÊ_ü6?ü«±Ý¹ýÒæ;&7sr½V€ÜÿáÅÚ¨u£Ûªü¡e~?¢ Ï%p+ð áß@?L¼GÇÞ=q¿Â/}ÿA͆GŸéz·mº—¾ÓÅ‘ÿjõšðhHè|'ИŠ\¤¤z‚¤/Þ,^ü É&®’endstream endobj 95 0 obj << /Type /Page /Contents 96 0 R /Resources 94 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 94 0 obj << /Font << /F32 28 0 R /F31 25 0 R /F8 9 0 R /F28 40 0 R /F26 65 0 R /F40 83 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 99 0 obj << /Length 2477 /Filter /FlateDecode >> stream xÚ½KsÛ6úž_áÙÚš±X ²³=8Ù¸M£¶’ìCÇm´DYL$Ò%¥8ίßïP¤«Iwg/"ž¾÷K/o^|s¥Ä™a–$òìfuëÐd™>3‰ e,’³›åo Gc$ð#dÜŒ2í×ÚÑX™8XÕ8ip’´±.xr½È'—#LGY\Ž” ^áö[ÞÍ/x0)ÌìdÂÌ&veF×í$¯–<¸AH·m·x¦Ø5å‚Ïíz/¥íæ;W.à²à±Ü­qd‚£x°ûÂn–’{6î(*LâL…pŒ•Êâ,Ý«ø €Óö“à·±¸Ÿc‹|³iyýÃJt:Jˆ ½zËË ø_v,qWkü‚Šä‹]Ý|®Å³Oí® ÒíÓ9BYòE½}ØÃtׇšÁÑR~iüÄl,‰é  JMA/•(”=Ò‹‹EÓ N8\ 1ë>&xÏKˆ:~?àVǰrõd·×$tœÄA‘ÓÍ5oÕ£1ÌŠppÐé–OÛmrU7Ûbéð@EÆÕ¦ ñnó4Jã $н¾¶àèxù{$b¸MR¼¯Èxâž;2cÄh J‡ìúÈ¥E´Ý߯ oï,.N¸Ë!‡œ@ñʪÞà«5>óHêpïcM'9|Ë*:Ã4°Dš˜Œ¨zŒÏší0¢C¯jR4Ô=¾£" HÙÁlŽ_¬P )ŸœxàªkˆC§¿‡ÉëÙüꂜbâø¾¨PÀïXãºÁÖõ}]‘¦Áá­5ê–nݧåHg>rÁëH[rWu-Èð>ó¨÷-±×êÍòàŸzšÌ, k½b*:3$‡Tëô¿)‡EŽdY3”8ꃑ±{Í «ùk8}ýæßž'ãx•$öÉï|$@DKDâ°z÷ÎÇuFނ޸Àd!ÄÀÌžù=Š" ! .}€ÒP¤"ýrt¾ó„që—Ä‹K Qxɇ**˜÷˜EC”aœhQo‡yå=¼C,‚+ä aNhëžã†µ‡îú œ{{©-‰°Å¾mÙaÂÕGG*&&¸0GiÔßc®ã£; X‹ SþBL~ Ø%}A}W²ÓrQX—W8v‘ûæöâÈÑô=y¹ƒ£ÇD`(f<ï¢f<.O?Â>g˜óÛ«X©#·-Ó0RÆ&É ¥ÉÓä$ ÞT˜ŽÆ* ®@Æ6ã‰;Žð4dZc2MÊ+ÁDàŽÍ³²Ä¦Íoy%Èy’„vfv2q”Áħø€PæL¯Þ«U©bÈðùé,z‡qgž.‹¾´¼%³ªêDEËFÒ§6qæÀ°±\'äKœsà]޼€™ÌÆ‘•“v?Úí8§³ÇI!KÏm\,­•bÓ…DÔ¥G6ºtÇ©Óæ°áD-T+¦F Ò9‹èòÕ[ào•Ñ/Í® ú8· )k_Ëk%«m—›âܧóç?Mßð‘m¾Xƒ ü§fóduHCjaö=Í£*ßÚ†ŒrÅã¼zê\îX ð´„’jY| Ï£Nœ:[í·wEcÏ®¼Ôû)8ç³ç³‰Mfn4ïF³©ÝÜ"½‰9ÿ-ïSAIƒ‰Uà`X+8B*>EÓCSßmŠ­•e[~*¼„IÞOø#”ýͥ毊zû^þo¨¶ë¿üÓ‰w RÀdÏ®ËhøŽ:õÎÏ_Í“»M½xOþë$Obûø€5öc÷PD$ŸaÀËPxjÉ?õç—GEkáÜ7å²=ØWÝ,‹}ÌËÆîMùóO«ÏÙžè0&ù ˜#$mYöŒ§ :¼ ŽŸ}ôÌZ…Ú¤”XÙCL_cáÁNtÐ[âx å{¿«¤ÿNWi>‘ê[»9)r*d±áôç>oºÔǰX&“kb¹¯UÄ~M»Hš,ˆ80½þ4)êF×&Âc.Bµ¼½s×§Xj%ÃIƒ%%*ýV-»¶ ÆF{w5âô·¹[`$®rÅCm½Ùc®Ì‡P©ù EºaKc|"ån[–Q†Y ÊnÔeäÍö:/9 Œ…Vxq³7yõž!µOÌò£q$8€ “‰ò‹! îmÅXµ¾CfQ(’T¹JåW¥ „Ç£ZQ•ÖF c¼ô©ÔéT_Rz%ò鸆ObéJžÒK˜‹Î”Cfë’…‰éhß¡ ŸÆ^PihR“ kÁgKA´0H …Éö‹@ÃH)@>¡Ñ&jE¯dtÏXD¿•„Bö:^˜­§’-VAÆBHŠLr2„lêÒ°srF¼|F.B)ºù¿ÄEš3f&êw/$¦)÷¶ðò3Ò)„½Þ± ç–Ê $(CFÿYdašÚÎwÊ_¼¾yñ#ýcendstream endobj 98 0 obj << /Type /Page /Contents 99 0 R /Resources 97 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 97 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F28 40 0 R /F26 65 0 R /F40 83 0 R /F33 34 0 R /F32 28 0 R /F27 93 0 R >> /ProcSet [ /PDF /Text ] >> endobj 102 0 obj << /Length 2434 /Filter /FlateDecode >> stream xÚÍY[sÛ6~ϯÐÓššV(®°;}°Ó¤ÍÖÍ8Žw§3m‰–˜J¤KRŽýó{R¢JΦ3Û€sù¾ðâæÙ7/¹™0J,µlrs;Ia©­8á’©ÉÍâ—äWÊôô·›~ór Û5©Zæe^gm>qn’Ì?æÓKªrÊ’[E×hór í—ª¡»X®\Ïl•¡øÂ4Å"÷3ót0µ„k«ÃܱÕ1"¨áA¢ÙΧ\'+T«“v•µ1µR+y÷ÎÏ1­œC»=_fQvzÃök·‰¥ûg“æ.›‡fuÝM‰1ºSû<2±!Jëne_ƒ2i“yµAcÞÁTÛÖÙi2Ê¡¥šÌ˜ JZî^w!'žµÎÞQH˜F™c­‚Š ±m ·QT/“+h>|ÿ~/ß~ígÌÊ6xˆ„ÍÝvŠÞ÷òí þ„6„NQ…% #’£`ç°c¢¨anÞ÷ïc«O Mi–vþsL§D_)¥qg`sYüÀ¦’'¬äÛˆ‘J¦ýJÝdøˆ)ã†h£»mþ–LÓ¤Œh…ÝY-ÍN-‹-_BfsùôõŸÇmÀ ³½’÷àɨ&HÆÙAJ UYb¬°O_ÏI"¹a'ö>ã–)@× Ìj-OG¸WñÜe°6aޤ.bFÚ#Ñ*ÐÒ$›ªÆØ—:©+„¾˜!a¬s"ºáyµÞnÊ0‚A‚0Jr'Ó™4)¼ à“{¬Im‡µÕúó½\zm™Z{)€ƒ¦¹™ÿÀÔÌêêG²™ÐÂN%4'”I~„+öv©x.idây#wß±K%V9 }=†äÝή㚦LíQº°B·µ«jéØ2[û™Ú*<‘XP cÆOJ?ØÅ)‰G¨±„r#‡é|¸&©UgìwŽO,0-;pé€xDÜØ"ÔBN^~B†³q=Ñh4øH¤ZN¤OÚbz è1À~{rÏãøµÇ§ÿG3víXÝ$«&†mˆUê)¸®þ \7Ÿ‹ëM~!¬§„[¨\eÂÀ:’‘êª3°”Ýu.¬OÌ*@0—4Ô=ˆ¶ñÏÕ=ˆîøFÿ0äàäˆëL ®›NQV‡F€ÞU]n (rzîÝõ¢C÷6¯7E™/¼HóªïMÀs®L¸î…9*øÝç>À+Õâ ”ìÀo&)¸f·M¨ãÊ84"è§G«kaÃõ>4RáN åÁ|=e¢@‡€Ð«¯-QL²ã$mÓCô[s.ÌÖnKß÷L§“ ™MC²)Oî³ |¤šb0XÑñ¬Ùq¬m•í!`xj!YS1tdéÂVhg´éè¤;~Á!"¹­0zB¯Õ®`‡ö›kÿÄr s&jÂâcæ"À)ˆ»ƒ FïIò»8!1&E¾P’¤É/Qß*Hü…ϘƬÎÁ\¼¯~ó+,⣉J•>QÔ©T­`àö{8rš[A¹Ì>§™O(M‚wb_¹Ô@‡\¾ ^— ,“¤ïm6?a,ô‡Al¢»øhý %ÑRÚcÞ ·¯Ã¾Åä0#ÞtÒü_lçù=¹#®g¸.z×:­ µ'\á@_ˆÁ´ÅHÝ—\#ü‘R}¼†4a½YÏ£¡±ÇÖHô—øÛ€WñkÅÒîµñÅi ÷§/.–ÀÁÞî•GѵìF˨¥M繿Vêˆ ð2ïéþ«¶O;éë0O:ªQàE‘½ÊÀkÂFàIð~uGÎîCZëô³W:OÍôAÍ51…³¯‘zˆ…ÃjºîxÜc²î/ëü¸§ÁÆóc-Fá(a,?\x‡–ÚÐ Å•=\æ)ÁÕÙ~_ ×}ø}…•?ÎKÑXãK"û*ã©Ãú£Àa¸ÅtJ HÎÓ¿Ä2Gãmpíä"fY¹o¬³ Ò@ö+fðVF9fzI&Ñëü}A8wE¯g”è $À¾—XDxA‘Tãl$©ôùò>6—®Wb²'wz¾¿æ™r<ƒŽÐ>Ñ,9< IØ:€Þ»Žæ‚¤— آɫòn‹pcDò²XãHîÿA%ê7S«’¼ÁË’Â]‚,}¿1#“·óìFÏqðjjSh  Ì’½À¥»Ÿöo]»c Þaä1A°Í-¬™…E¦ãá²»ÚF‚­Ú¬Í¾ÇïÖÃ}Ü®Ãõ6TVÝKîéÈÙ¶ï@ÄõÂÝf…\·yÆ0r‡è\-kœ G×l:1|qw«&õ§–|½ ÁW|à«Ý¶Þ¢øp>CÞ_?ÿ/È ÌDb£ð>Ãæm±ÎÏ\žÒa‚žýtõÊ‹l²ù ¬—º|KÀgŽN)EÇ TûV™mBË–o};+{¢;P›b.p¨Œù}1ï‚èdËíæžrl˜¯D×ù¦ñÿšâ£šCJyH÷¬ÛÐ}¶ÞæÍPéOQ~L³= ‚A+¦áõgoäõÅYtåÖëÞ~ù¼ dDűýŽN,Œ¯+>fà‹‘Iå‰h™çMPµ¬‹E³‹Úª^ä5†ÌŠ:Œ]ùÇßüãÍXDïíâÌ?ä 3ØÒLgÖŽ˜òê´j9˜AN;ªúMÔZ„vVnWuÞ¬ªu`\Á°Ø°JqÄŠl—Î&p"•F†€7ÿh›¶ûXân)ðO®(ú…£°ƒ¬©ã‰<Àfž`ôÕkú·o×NÇ6pJ” XJ,rû€ nÜ_‰ßWö®ßwâ®ú'àrQhá.H‚ïê`½ñ‘îû[?<Üà Ð)Ö»Ïæ0¶ÉZwšÜÕÇø`ìÿøïókÿ§t‡þæqãˆ*÷zæ8Æ]|~_¼ö—„®@ó›?¸KèU 8ýl‘Íöf¯}wžkOH:”}¡&ö_Ê·ëÌuùôø× ^.pÁ²—S¤ÿëîKôì:+>vŸF6¸¥¹¹s%PÕmÑY»9Ú5î{4Ìæê,ˆ‚»°:Å7w,ê5úý"e;gå?ÓÀ¦B5ì6†ŸÈ¿ %AxãÒõ¡œcã ‡˜SRá7 ¼A¾RbŸ½¸yö_ˆñ²endstream endobj 101 0 obj << /Type /Page /Contents 102 0 R /Resources 100 0 R /MediaBox [0 0 612 792] /Parent 84 0 R >> endobj 100 0 obj << /Font << /F28 40 0 R /F8 9 0 R /F26 65 0 R /F27 93 0 R /F33 34 0 R /F32 28 0 R /F31 25 0 R >> /ProcSet [ /PDF /Text ] >> endobj 105 0 obj << /Length 1868 /Filter /FlateDecode >> stream xÚíXKsÛ6¾çWhz¨¨ !@€ÎøàøÝ(ŽìȽ$90e1¡HE9v~}w± HÊkgÒé¥q±X|ذØÕ›É«×'"ìq—EnÄ{“yÏ÷÷=¯(Á„äª7™}t>¹<|žüùú¤#[“®:Ÿ†BFÎé€;Ç{8é`Èby»f•@µ0Äèš¾óxZe "?P<®,ršÄ•+d§9P7Äß}z<¹:1ûÄùŒ7 §¸mr¸›raÆÐ0X"tîð')5z‚ö%Cî1%#¡Mú‚ƒ¡çyzIDKŠoi~CÜ1`Þ£"ç䟮3‡5=‚)®ü—yÓ­¯'WH€?JbŒ¡çL®öˆ»JPùÊÈŽGïAŸýþuŸ´†Ã°Gï÷ûøé¯$oÄ-øü0%$p4ù‡ pC½7cFàÛ¡EÐÚÇHÿZƒ´ÝˆóoøZz’ÜtD4ºNÂæ£ñÑQdBÕE¢«q–ä[#3Q&z!:9 §N+ãá \+ѵRE܉`ú¦ q¸(²dõíÁæÀ·}·^5nÄ;3~Ow›`MשZ˜[ë°j=LXnÅH[S®ÌD' ß„cIwåt8ÇxP'¸þy7çyè°X’Ã7xäHÜO=x¨ é¶a„ìž“«cþp~dÙRJhE[îÛLà¡j­¾~52~'Û2×w}#rp`D¢^À¢À%Ÿy¾ô<©04^p]#©Z’>S¡_kÄm LS½¡Ëâ‚ØÐóÁ¾ç›·oq£bRÉZà“«¬ºHxˆ 6$”ÛTÓeøGÛ0BÆC=ä7«9nðÆxNÛG[¸ T€g;d!—ž÷XÀðÜrøá^Ïs|½†d½“4£ô©GsÕ1"å@v|‘â¼¾èa}Ø!³|˜Æ#˜=ÀÉñ òò¸s¬·$p~ñWÅÚ`Þc»<…‡š3Iºy+‹äEW‰É%:-`²‰713ׯ˜o5y‚n*1à†ËÌ\Z2TßÞ&ýT”Nø-Þçâ¦Ätr¢bI-† LAßµq´è ¥â¬™`&N¢§ÕŠ)WJmu}z0>8| IT 弋«2½'ú<¿ƒ—.ÅÜ‹ÃI²ªtöÅAJáDržfIß–ÉûïÆç$²Œ§‹4O˜] ÂÆ `ðÀp8KʺNT/ …+çõÁÜÆõÏ€7KîÒ©Y Ø6YUËæëå—¤4²óZûÆ'ÕÃ-F¬V#Oî¢Ø·K“þéñEŸÄúPÕä¨!¯ÇG®&¹eNu݉Ük]!!5ÚP×ú)×¼ñ‘m[ù”Q·eñ%K–Æ–UúƒnÊ6† yEîšo=öè+ ß3_…ßv¾‹³uí¯zç‹ëzñ¦ÿ”rF ÙQÕo4ã(׮ʛŸñÛ4Yœ›2µNCQÎ’Ó2Æôù>—»Î7z‡vLéÚ†Q´Ã§ã' »˜¼ƒÙl»úÒ ÍЯJàX@IõÜÌ$Ž•B¤”À¤ËDÄ¡y0=…ø$`BD]W•®Ú!—r¤®Áp =¡“? t¢èÄ/1ñ HüCÊü0{3WGHûN©1ž|  ‰<,Ú/ÀD÷q¾pÎIø¼•œq„y½4ÉÅn’<)c-ƒC]Šå³bIÒy‘×ÞÃrIm ФSïbë3­¡tÇÜ/…ž¾(ß1±A47 ÒK‚nljljš–ªîJ„«=çãP?Ð{0Ë?cã@ƒg™Þ¤©’…ÞRWÒøZ`•€¸‡oI²,t•ŸæI½´0Él]¯LjÔšh9Às–TëkÝwAáÖô%±Îh-u‹ÚDó–7Ö–¢@\½…Θõ×2 ·Ú@•‘ëÄ85-å g¡ÍÁ—™(Õ|ô&2ÉÁú/iý[@# ~#Ú„_ÓFbŒçfj¡ky$±^µ è´´eÜè°ˆ­)±Õ—¡*`ù²®cÈTä–¦o£†MW =ëñù`àôÒ‹™Í9ÀV:ÛôÒX&mTÖ5޾×X¸è£=’)‘êzf !ƒükÓ¨_Óúâq­4/²Œ  ÀùÞ´`žî/á4T:*Xû/¸ç°:Åϳگ«Ö °´¦6°Tב`P]‹n¾ÝQ…<¬%j‹×xë€ëŒì³÷º´\ZÛ´Û€3›n ö×[­í)v˜-øÐ_rÆ£@tûÚK#´Ú¹ùZƒV¯6±*b®ƒížÎ³m@9 ¢Oô@@Bšæ8—ö8ùÐo5ÇþîæØæç Ú`0û\0WFÁOüå£ó™â£±¶Îk¹ûß½3ÿoÿ2¸ÜâÊP>'Ša7[8‚yM _r~ÝA~ÆŸ»»ùðÿ?'þéÏ 8¾>“:Ó+Á8çT­F8÷êxòêoh¿zendstream endobj 104 0 obj << /Type /Page /Contents 105 0 R /Resources 103 0 R /MediaBox [0 0 612 792] /Parent 109 0 R >> endobj 103 0 obj << /Font << /F28 40 0 R /F8 9 0 R /F40 83 0 R /F26 65 0 R /F29 108 0 R /F25 12 0 R /F33 34 0 R /F32 28 0 R /F31 25 0 R /F27 93 0 R >> /ProcSet [ /PDF /Text ] >> endobj 112 0 obj << /Length 1516 /Filter /FlateDecode >> stream xÚíXKsÛ6¾çWhz¨É™Á“$:“ƒóðÄ©›ú¡:I´DYL%QCRvÜ_ß],¨‡ yl'Ç^D<ö½‹X½¾xu¤Ô@pf¹ƒád S–Y›2#™Ô †ãÏ‘b9‹ ø†GNj媋•«è¨šáNI³IÝÐ`[•m,¢®ÂýÅ­wSG©£‹Qq»‡¸yÛFJDoaéw"øpþŽçõÊˈµñ×áÇWGù¶Ý lÌÔ ‘œ«Sgöá$(ȹ¨»¢+Ç´âÄ|‡ÕbŽ£%Œfh˜ÒQ=Y3¹o…¼Hà<†…/\h"îýUè‡:ð„zòe;õUƒšpåµ4tdÈXÇ2nÈGÇt'2‹ÊÙfƒ‘ÏJn;¬½6Lhi×ÚÃÓ÷H# e ‡“jV Ä$B1£­$æ?N‰d^Œ¦Õ¢da2Ê /q’噉`¸#›F‹bîG_¸áÕ„ÆÅâ¦"$6O%yãòºy^¢=íb5¿,Oëu,›úrVÎ[šµÕ¿e”Á9Qâr £ëb¶*Û]iŸöèwœx$øÆã“?@íñ‡ã';þéÍAÐS¹¶?~¦è£écâ$Í÷EàÍsR0*Û¶öËWM5n7ÅP7ã²ÁÓèH‹ª¹£ð”>¿Òçl_Á캎X›…=8}€]¯¥ìe? úÏxŸýnÚ”í´žý1!˜5Fâ16i™0©S<Ï9€à¨íוiœ °€1nÃ&LÜÉŽ`jL €™bÂîÐ!& \Äc8Âû S¤ÌªÔìbæÐé²¹—ƒŽt4C$lz8ƒùU¹(›ÂAž²6jÖŽk¿ÛÞÎÀ–]Szqkt„Ù¼€à9*/ä¦êbFGq›4òÁyµðrq1-ÉÚ“íì!€g×_ÜBGŸñR|…½—8ÏI1›ypfƒèÂÇý4Î1è€Ëˆ¬(Ä!;\N[“[ïj¿ïÇxwŒÊûR·"Ä.ß½w'8¢íRœ«›Œ«$T–ÅU½p~‚½”ÔÌÑK›ôŽiyT¯ï½å.`¦OÓ{C†wU½ ¦†ìušWxwΈ¡lºöA.†|ZŽ0JÿÐ’ P]ãVÙThÑ­ßšœ¾‚¸i¥^ˆ E‡ Á†€§Ó¢õ—¬'öîzóÝÖ¨nÖºÙ-fÑ]¦ƒ`Õ\,éêuQá]ßWgÅüÊ ¾" çèÔ?Ü)Æ{½/êïþÑ_÷+¸Ö‰¾‘ðngâ¯ï?%&µ+ÉÕܸ:»ê•Ñ¡tù·Ü9Ý ê –vÞàOB?äŽèüNÙâ“fË&§ò殄æýÑö~Ã:T1Ù“îèІe23^ÉaÈ Ár‘KOÑÇ㮾>vkµ¤tŒ_—¾›!SЖŒi©µ×4 ÛbdÚ[»j7Á׿ۋ¿!Âë%Ó6Ma$™²ô„>ó¤Ù c6ãŽ2g©Ôj à¢ÈómKv3i—y½ ÝÎUHwÏA Ý ±¼›ê”§žâA1¹': G'5ºÎë`Œ!€ÂˆB Z,ï³Í‚5,¸e67º¥x|ÖÁé[=_"duzF„©IœæÛò¤fœC¦vt¿Ö‹ãw´1w£’ïÊ·o¡È¥ŒoRt¶G ×¹~J‡{Š®õŒj¸_Çò™ũų“Ê`!çLX<>¯ 3L½9 &h‹†>Xö±^„LI!íO°d¿;öñB~ ºÃóLlÜþ1ªváIiH€²0ÊYf²¾Å·?½ÅÛ7ø÷:}Io( ¢g&°ûG+t\N\ó‚Ä'õͺ¿ôß¿–KX 7uNÙü/úŽ0ëûôí†]˜ì ÿ¨ÌÙ×­ßkÌM–=Ô˜ß9+Æpi×væyFu#º½x?|ñÛïÒÍendstream endobj 111 0 obj << /Type /Page /Contents 112 0 R /Resources 110 0 R /MediaBox [0 0 612 792] /Parent 109 0 R >> endobj 110 0 obj << /Font << /F33 34 0 R /F8 9 0 R /F32 28 0 R /F31 25 0 R /F28 40 0 R /F26 65 0 R /F27 93 0 R /F40 83 0 R >> /ProcSet [ /PDF /Text ] >> endobj 115 0 obj << /Length 1842 /Filter /FlateDecode >> stream xÚíXKsÛF ¾ëWhz¨©i´Ù7—ÉÁnœ6fìØ“ió8Ðm1•Ä””b§¿¾Àb)QòÊeÛC/½ˆËÀ ðA'—ƒÇÏ” Î2ž‰áåõP[–f™¦F2©…^Nß'j4N]j’×ëÅUQÆFš¤º¦ç纚MS…훺œ6´üÀ ¯êiQÓÀš—u³{úŒßÒ㎈ÑÇËC> ŌΤ7@]ÓCÀ#s¾äóuqOàáãb+åÐñóØqÁøhlW³ºhfÕ|Šlà:1‚eÆHtœ’Žçè”b£±HBÚär”©¤hV Pòkt˜J-ˆ,hçb’¿:>e:9)™üð’¶OÞ<¥Eâªõ å²ÀGCf¸n…e™²láÌdÚz[.Q D39Ác(_Vx{‘¬èíóH$uuSç z¿ñ*–°ë5,òUPëê‰Ó*ð/«eóu±ð+ä.'H°{jùª.:Á½¢!–Ûr5#ò—‘± ¦Ut•K˜ñævÝøÀYÏ­(¡ÍÒÖ:ÈBOÞaì#òH|„õ#Ø6:™äóyC[1¼ †S †ÁÐ2ÿ¬W^CÑž©è 9¾Þ\꾸à·½¡ Ð?w |ø<‚$Jð#Ã-H’H>Î+´éBhÆ*Ë’+”9Eóò›j‰÷ÆÝkx¯êÅ#r4…Œ(ÞØjZ×>Fšó$'Z÷>þÆÕ²%ø`âÖtÝê(êÚkiåÊ4™µœ ™¿oŠÏ/[ÓQÐõ×@žù 2‘œûÓ³˜*ôTÚÍÍÒÛ¨T2ËZ´Î >ð¼S"M*o3™¸š90¯'šbA›wHù Ýz| <¯0¥Pt‘ø7ð,ÞHéMæß¡lh»Y_Aí1É'¾j­ †zŸá3x\WsÔZ¡šÛCzÓ*£<ó1úžªƒÜ)c¨ )l¥`FfTßá>i¬”„%÷Lo‚U-“`G×&»» ÒvÅd’YnesS$˜®åho¼÷=àVž›/ Ý®|J–˜ú#¿ ev³IXÍ2›¹ ò$f”dÚ88ÖÍÖϺ „ñÇSøm jDÇXš”9è*ÞÙ"ãT‹Ïo:LY–rÏꘕZ ôç¨bÇ‚—1.]:wø>p®bÊA3´ì]ŸËýèBXÚèö’sÿJˆãIÔÙÀ"ŒxÀ׎9ÙŠ`Ñ´ðâ„Èv}Ù+qó¥ÿ”ä¦àae[ER¸ÆBâ4ïÊ“šq‘ÚÑýì ÆÿâùÓˆZƒSŒÙsŠ;ì”OŸb޳ŒCˆËù)\;Ý'ˆ{¹~OŽÔý’Áí&Ã^ᲘÇ<š;;¡Œ&²cªGG>‰ÌÀ·®íæú&j‹ÉÖע)Âþ7,9|¬¿o¢×á.ÝF“@5àUµ[Ÿ”a©ãÚãVˆ[p+ðx „ÈÕðäùò3¢NîY9ßð­A,籂ÆÚ'l áP‘×"Äɰõ’åÂâGXc‚X÷/¦¤ ÌvÑíqèûÜ,«ÀÔÐ<}˺Ã~°À6”yøØñ‡CÔŽñ,a!ÚÀ&OÌíM÷ºð×z¬á!Ág¤¢¼¶Ã#ÛŒ6v-«©iª˜o ¡úíeãíµ;]ûˆ@ê1‚Sœ]¼qQRÀpy]΋£|:úùì9±,òÉ æ g¡ BröZB`¹#›VË|Qlg¾2ÌRùòë¡yΆrZ|)'á(Ž6Övö\ÆgÏ«y±3\SþA9so~³aþ ó£4á½}]€áщðç˜Ä,*HäCâë#êÁk¾>9j¸¦3›ÆÚ×'¹öÿ±¾×X/ºc½ø—cýÅ)ÕBxÑû£}—nÇz© yóPFZ Š„™†ËÛ†X¥mÕ¡í½1 3ùpžÂçVЋ_Oß>ö¸÷'Ä=§oíÙ¡þBóúÆì®ã7€ììî˜áìÓö,~ûôû(0Ðö€0•2ØR^T8M„²—Ð…`VYM¡ŽDÍ,TÒW§)@FPrA¾"¤ò[Ì8Z¼wü. ±äBÍßB¬xç|¡"yg˜½QƒÓËžì U2žrhø0~·ìp²ü>xÿ‘§>|1àLeÎ oá…¨…Ð,iÝ]ؘ.ç‰ãÈqWæ þ‡¸{‡­n|\R»Ê¯˜ì Dzf§Ú[÷׎Yc*cC?äèÌv= —í *£2àûWLÉ¿I É'd¿ŒT¡v=œ‘êÉ!c†ELH.zÞ±ÍÈl¬C¬å°Ç0©Yª¤ÚÁëï”Ûœì—âÏc‚úæ¶qpÉL÷ÉmåÞñ¹Ý‘Ím˜ÕÁ±,àÓá¿Þÿ$ݘgij˧L(C BŠÖw"ߤ7endstream endobj 114 0 obj << /Type /Page /Contents 115 0 R /Resources 113 0 R /MediaBox [0 0 612 792] /Parent 109 0 R >> endobj 113 0 obj << /Font << /F32 28 0 R /F31 25 0 R /F8 9 0 R /F28 40 0 R /F26 65 0 R /F27 93 0 R /F40 83 0 R /F33 34 0 R /F25 12 0 R /F29 108 0 R >> /ProcSet [ /PDF /Text ] >> endobj 118 0 obj << /Length 1936 /Filter /FlateDecode >> stream xÚÍXYsÛ¶~ϯÐ[© !‚Kïô!M“i;‰ã&Jn§7} mZbM‰ IÕv}Ïp‘ 8wÚÎÔ ž gù¾]>yú2ÉPda&gË›Y‹$ËâYb”P‘4³åõÿ‚»uÑó_—?>}©â1µ–"Ë´™…Dö “LøI%´”‰¥(ÛùB'&èÖsü¼É»¦¼Çç(¨oø·(WÅv®Òàwü„2¸êê¦å݆:`±o¶ÌH×¼w·v [^‹¢õÞW–¢—på8ÖÕdוµe_XÅWå Š>óz%NEjTj~ås‹&LãÁ-@2[D*1üÍR eŠv{ LlP08 VÈg°D>‹²±ÏLäÕ¾h‘Ø«¡T1<›ÌÊßWó…’ÁΧh&B™HKØ»Æù·u>Ì‚®eU.î_={ ûÏqçû¡ Yµž/qåçå¿uñ?‰´kùöÚ§© cˆÃØP~Ùv5H¨<ªj%ÂD%ªê‘²ÐibT4õüÔ/jê˜DÄ*q¡ÿ1 5Ð*Üzý­…†?KüÌÇ/2•î@n=æ™ÈÉ0 ”ˆÃdªõrÁ« cãùã:†Ø©ªƒä,ÖP* öèÑ®ÜáÏÁ·üÖŸl8ÑQã~Í¿—`5å¾PÕì禮ªqnÂö ¼7õ†_r–ß–˜F+«)Õ¶;ÔÇJþʨ*Ä|a´ ^Àz~…¯Ù~ a¬cs/VS` 5©ÞÇ­ ŸS¾¾D_=ðsŸ‡ðšR;W–ÛS-»}Çìeð5iI9­ ä|‘Ž橆³ÀVXM(É9}¤¤Heª¦–3Å7üC95Ò©/¤ÝúGaCÄ1RJiÌcâ/æiœñóOö÷üÛÓ:]Lt:¿x‹.yƒÿþ{vì7*Pxw"ÏúúT5@ûÑÊ¢–OîÐÐÔ¯}R_bî«ê Ô$†)lFf88CÖxX=;æuðEtÙ¾åpDƒNâ/G€ŠáG_tޏð×éW¨‡R`é¢(øðêìÃ{„ A„VØ€ùêfhÓ^qв‘ª½ÿþ9‡”|â6u§Îø«<üK\RYvÿwÆ4‘ß•P5¹¦‘SJ.m–Ù*^7·í.¿²‘cë6Z]^ ýŸÓ ýž$hÃ*%9ñ–>Û1lYØÐ†Æâ´#AןiÔËõb×uu}º8.]1ÂS®Ö‡˜ç8Á û jâ ↗ÆÒ˜Y"²$äQdJ"豎#[š=¬"‘h¥g#²ÿ€Fqìeš‰$ Õ„©ò àTÀt•Œyž¤PRÉÿÏ ýE©‰çÖ¯weßt¢¡ÙáK~Åg¾ë†”¹K8’4Œ¦=óÙeË ¢³<ºº*šòjÞó½w¨³sù9ÒŒ mÓr%~Ûî›Q04dU½ª·¹¿i7ß¿ ÈAEÇjòö¨ó}i¿hh ñ"V¬Lãpù˜éA‚«GØÕÛÊrÁž—#ÖùEÛÙ2Ø×ÉÜ‚´ý†«û0Å5=k¼v?} ƒàxö„!N%0 4‡Dš#‘ |‘fëå<‹ƒ‚²pÈkÄú„Œñí†JUÃ/,Þ=°Ž’òÒ‹þH©äÇšî xï-1Ùwå¶h}Óµi¢AÉP˜,â£^R¥+¬C±w-è‡ ô*p.‡)N“*#¦½épfâ)²ÅeƲ´Ýrø¶a“mó¢:Û3q¡Fñe¨å >óQÁ½îA‘¸·.ø¹øÝ?­“˜ ïB õòf_ˆqˆ Æ‘ùeå.u ż‰8y) Ù‡¶K›È)P²“'cøf2^»ØJhNhÒä{7ùô¸“ÈŠØ·}oÇjþmŠ›¢™.!/O¢¹áS™d4g ŠˆYTH³û=ߎÇ{eÒ oñ7cì 8è ‹FeŬìƒ }8{üpá)®lëfãmK~¬l'xã-ñ±ŠT•cB‰° —7VPÜn”0ì>”˜E"ÍRw—ó(Ã,øŽÊñ{KŸúŠE,C3Ó"3iꮓBOÇŠ…Icw$=†$p ±„xXmHß|PQ¨> endobj 116 0 obj << /Font << /F8 9 0 R /F26 65 0 R /F28 40 0 R /F25 12 0 R /F33 34 0 R /F29 108 0 R >> /ProcSet [ /PDF /Text ] >> endobj 121 0 obj << /Length 2542 /Filter /FlateDecode >> stream xÚÝZ[oÛ8~÷¯ðÓÆYÔ,ï—})z]Ìl··d0ÀÎ̃+‰ÛòZrZwÿüžCR¶äH¶“¨Eg ¦(òð\¾s!E6¤ðdžJË©͉¥Ì /gƒÿ~ûƒ':üy@‰pV ?Ã%Ì9>œ $cÄ)#«žéàlðqÀ"űïÃ1DIxÝI1̦@16«M‚L)•…åMjÌuk‡c#Ðä)¾8<}c‡ŒGž_ ¹•t¡‡F "˜QÃóÉo£×Ù)]§§c6šŸr;º=Uz”LWÐ BÈÑ«¬(—8ê;Ë,ŸŸþqþóàõù†©-QÔ][ÿ‘¥sõîça@œ£ë°ê[*£½™¢=0‰ëqPÄP¡Úõ4HÙGŒ¶Â«ýu½^œŽá'ÝÕg´glOÙË(çœP#e“ÓçÞöåÍÌc¤Ä‡Ë; [A ³®O–+’‡˜¶ŽXðÛ&ÓÿLó²›äÞaXHI¬a¬G†7$0,#T¢·Ô~9]eºL'w|‹r"@=rº!y€SÉ1œ²&§ïË›ØhÙúãk”e÷ ß9’×â*L){ p…ÙI{mþ'*’ò®(P#ú¸Œã(ÄT0;íB{e–‚'Á ¡Y'nú’õÛã†õ‰j L±ün&8áO“Ôgë2+}"éÂNðˆ½ro±SœwB§/Q¿)t$”ûÊÑ»<Æ]ÐÑN@Ée„í œ‡ŠïU–\C Ç Êë€Mèãø¬ ýlr µ°a²É§è¬2zäoSeàÐ)¢vô(Ÿ`},Fù|ú†‹¡%Nk‰ã™PBÄj†¯Ã°&Ä ÅÒe\‰š6¢œ0Îxè×Φ?m ›ý:ŸqÄŽUgAÓ‡ZN ¿_lj—õ[û>ïÓõ…" âbe ®[}ßB«·Æ/PmP7zûc<‰34¨3—‘ÀqqÎï”Ò8RÕFj¢¬VÀSkF¯"IÇž"É®Ôw·0VÚ¦Ô6:c-^Å¡‚Ї»*ãÇ€¯ÇRí {ý)sööªSPJ´Ô6Ôé*ÁM§:e]pèŽS½ U‹Sû…R² lsB1ZIeÛ¤²DZéIµm}IÕslóqê‡>ÃÒZ÷–uø7³„* ÖŽ€³»`»Ögµ˜ìŽjjÚ]ÛEÂl¿k?»UN ¿ÓàaíÁ ¦§Gi^3ÌK¼‚_)JM”„ú¿B5¯h@Íþj^¶L¬¯ÎÖPÉû³®v]†3ÛÏO?%XâOð ;âv2äPteì ’êa[ƎеÙ2îפ°”V7UÄDåÎò¸Ð~Ïü¯î‘ÿÛ¶®½©üÏuZ¦´ô¨KJ —6ž™[1ò繓Ã[Ø ñGq [+ ÚÒ¿d˜­ûÕcHn N‚vܵ寻I7é„}?&ú“ExØ ÀììaÆŒ G^ÿÆÏnÓ2[àOü&÷Ò +ü™tÙËÃbêÑü?È ÚëÙ‡¸A+¥¹ÁÁôRÓX›#XÂ¥ÒMûp~œ#\t:B_fú®P}JÕm¶­Ëy;r‡º\ë=y¶çÕ±ÁÿBõåKŠ“/¡«ú í fÉÒ" >Y¦-ªv8lµÉ÷aèf;ö{øõŠÆªZµÕß[â0T+ìºØµƒÚÛQHêš9p/Þ Q½"Èl P1Õãî7éŠà¸NÑk·ÁmášvsÔÎ-–ué¶ *Çæg¯ Ì ½Øá–t“Ê»%~`™#MD5&ÍQ&rØÒ‡T£¹ÇHÊY¨=ãõŠÕ|²±Ž}F•ÝÓB›Võ(uM#ÕË¢†‰ ò‡Þ¦•|cáó"+³€¤ÈXã$q×j°õìHžúŽ<Þƒo[Â$‚€L¸Pòñ¾ ŽëÛ`³Y˜a6`Gùöή‡PFYÔA2Ÿ Fù^4„}à±hØ©VD(¼›Ó„iuø] ñ]Â}eX[Íûˆ%PÔhî…²„rçÇ’&"4‘Ô™o‡ˆf|`Ë'›ˆHZáµ–mÂá,ŸÅÕò«ÐSÝ0Á¾l.òLpß—”žã)4×q èCŒÛ›V÷Tf5xíJø5]æaÎ8t\¯ÔEÔ’ú©Ò¾ï‹ÒÍÊ2v¶L/fC-ê°D°MðŠêxY±Ž‚ªË!»\ÙWÿnïþq:–RŽØ“ðŠÇc‰êó$qZ‡S=U'‘‘{Sû®Ã4Ô¬ÆÚð ( /ká]a™À‘Í—"‚Ã0Q(f(¼ƒÂV9§¶»÷䢶‰a‘sÿ&J•ásÖe<®òS®ð¢Ì²BI~´žÅ jÈñ,Í®ÓÆ­Æ ¬r^ñ‚çg^>!V4”h§!ï A(…î¿J¨—q‡LÑÑOóÅ Ù6tô—öæKCÇ•çU;Ñ©x—¿@‡X¼ý%>x9àál=Û\Z» ^WQðàÛS-+9àõ'|•ûk™¦ÞTpyñjYQ¸ÅùiS‹5…¿b1Ó#‡óùh0:ò'‰ó¼ô‰° êBûK2[DOŒ¦ÈA˜ì½dYÄpXéõÃ*ýÀè2šãhð2ØÁ¾wŽ9áMpνXY¼]µ ´·gŸÞ¬v·ÚÚÆJЃzaÀ–E$7!'ÏC÷E¸É9 $â‰×ULñZ+ìYeÒÉÙeòöù‡ç/ÿu:V\yëGÑðñ5B·È§·`2ßqž¢w`ëM6MO¼´ÉüIxßþ®H´cÀ?Bxà⌫ -‚­y2‹-LœÙUh'óu•ivéj (èMÒ[ï«8>¯#&Bûoá‡Éð»L‹ty›N"y3›—érŽqŸÊ1qݵ¸ÄÏÙàƒ³äK6[Í¢«ÙE¥´< °Xæ—iQ¤E«jÞL²"¹¨t°øR¬ÓÛ-Å“ŠãËlYx—雘ÎÁ`i¾*BW±†ð=+Ƚítþúìy{rÔ8~•=›jÚBùCDÂÇøûîE2ÙV°ÝbÌ.šk~ØÒ}÷áÓû_»Hñ )¦:H}¬“zùþía®:I½{qOÕ-\®íª·¥-ƒ7Œiå~UeQ’«.1Àaé¼/z|±º(Ò²æ°ÏZy€mR ¥7|nòéd»úßCSí86ºô2™p'5òU7H«A)˜8Áâñàæ–Àÿ«âñendstream endobj 120 0 obj << /Type /Page /Contents 121 0 R /Resources 119 0 R /MediaBox [0 0 612 792] /Parent 109 0 R >> endobj 119 0 obj << /Font << /F8 9 0 R /F23 124 0 R /F26 65 0 R /F29 108 0 R /F25 12 0 R /F21 127 0 R /F28 40 0 R /F19 18 0 R /F27 93 0 R /F33 34 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 130 0 obj << /Length 1750 /Filter /FlateDecode >> stream xÚíYÛrÓH}÷Wèqæ*i¶Š‡°„›Ú5ØPÜöAØŠ£B¶²² ,_¿ÝÓ#GI´N`Mí-/Ö\ZgzºûŒdÓÁ½GZERp'œŒ¦'‘‰yâ\%Vqe¤¦ó·l$‡#)¬`ï×U¹iòáÈ*˦U™×Ùj–>D4’š[㔿çÙÜé;\Íiº:¡k“¯›u¯-¬ £à¦Œ¤äÎZ…n‚‘Jyj\ê4™Tð+U̦C§"G:±ì¤ª±³æ4§‘É,;>aC­Ø?ÑðÑäqÆØ3¬ÆvµiðR¬r¼¬É—´1s§c  n‰½CŠÕP²y?«Åp¤lB‹cÃ#áÌ·VC•²C³¬ÜÀhXÈ›f'O±Èƒ!üä3´i`[Á2õDÃõïË%NæMKÌhv™AwÖE>¸´Àƒ»týtš×ÁÏtoÑúR®+Ÿ ÑvÇÝóu˜!ÆœÆ}È뀷Y¾>á¾ñç ̼>šLï¡ñùñá\&Sî·Û›óé0Õ –ÓZC¥AúO[Ç|ïÍ4ÛÑ5†[6ËÊ٦̬Ðjµ¾K£Û¼CûB]x=RÞÆ {uXÓ-YÝ¢WË3Ìm³ èD[wy«%W6e˵J餫ƒ¥îAG"wJ&°sëØx3Ùw;u¬î°‹fŽÇF¼¦£ëCŸo– '‰ð@U'ìMœ9±•&X¾BôÁµM²B^ËðFJ§ìa¬\(ÑÂz‡Ó"ˆbm v6²žPÍÑl9ømðöWÍ"z:\;è'è.y9H7®í–ƒÉàÙnÔ⺀ðát)vÚrg¬ŽlC°…¾q{ö˜r™Êt»E_R½PX `ÁòM?–R‰î†ë2ˆã‰vµetSB W‹idRn„Ū3Ï#´7˜(àПçMV”k~sUÿEõçÃ15ê ñ6Zù…ÓP¼Ž?<~öœ:ázïE-©Ek:2ª†“Àìjt4Ù I–¡¤=ÒcZgÝV«cïÁ*¯^AWHEÙÊ¢ð…Œ^žyýÐ{˜Ý¯“fhØd([É ð±Pjš˜o%´jIò#éªES@1€‰ft5am‰ê­;ÿf‰VªÖ®©¿é¹`г5ífSR±.èîIëœc›ÏÎ|X.xÕ"š.³b5òAPpÿPÿxš&Ž„Ó>iÿ‡„Rendstream endobj 129 0 obj << /Type /Page /Contents 130 0 R /Resources 128 0 R /MediaBox [0 0 612 792] /Parent 109 0 R >> endobj 128 0 obj << /Font << /F32 28 0 R /F31 25 0 R /F8 9 0 R /F26 65 0 R /F25 12 0 R /F28 40 0 R /F33 34 0 R >> /ProcSet [ /PDF /Text ] >> endobj 133 0 obj << /Length 2125 /Filter /FlateDecode >> stream xÚÕXÝoÛÈ×_Á§šŽ›ýþ(ÐÇu_CŽ…C‘ë=Ð2-óBI.%ÅÉýõÙ]R¤DÙ ®}(ˆ»ËÙáÎo>wÞNGoÞÙ„Qâ¨cÉô!‘šçtb'\2•LïM7y¹³´\ÎÇ™"]—‹²ÊëÖ6cnÒïc+S2Τ”éô±@"™>ÀÛUU­ÆÜ¦ÏíÖM±g°m¦Ï8A>UîÆÐaò&µgU/ $¾¯VË𜌛þ<ºœŽXBáÇ!)ÑT¹³Åèߣ_£Éýˆ&?(Ϊä&”0çx²  ·Í¼ÝŽnZ†YË1ë²|{‚Xc÷°ûpþþæÓ_ñ”oÞqݥϘ„ Œ¥"Lá*l¨#­J q†­Ç™D -= d½0I´SI‡êoŠ%Ž8 ŸG"G´ ç„*é<Õ¿(SHòf >jôð²é-ïË£ˆAäšï#Yÿœc 0 Ì(bÖ ÕÍí8S&½‰”¦ —Di­Aœ²ÖSO…Ö A‰ˆñ§dîZ—ŽPmd*$šQyŠ¥i@‰Ãâ°©µ,³.Ooj}x¸QœI´sD9'ü!Á%˜6é\‰Spžð¹!œ*aý2¬F©*z\ƒ«’ú%™C—‘œG©að–Íÿ,ûj Þ¬~а,ÚͱCƒî3Š&uèn1‰Ì ç„ùo[¾ÖÀ_ vŠå+8(…=¯Y~—çåkI¸Ð"Ñ è?nù}ç`‘\@¬e–àÙ‚lŠrøó±¢0¼¢šj8 'ŒLŸ‹ºÂ_pP€s}EöVÁO*!ãI™ó#n!©Í0{>b†Ü¬/¾ŽC„×›U·­ðIwÛ·O°£Iaé}±öɵX"̤&ð©çaó'ù¦Æ\þmH&°o"-h§çýž¨”ˆ€Î²‘ ¸6¢5r¹tÛjæs¿ÏærÎjœÄdHÀiƒ:ó%dy©a/î«ÕâÉK¹òr—¬$°2@’ÕÓ¦lƸŽ@jžÎ˾XÔAfpÝHy=Œƒ³­—‘{‹ŒWõ}˜´§öJÔ¬GÕÓÉOƒC@M¦ÛOy#gGŒ\Ã4&YOus€u”4¥W€Þ‚#I¾ýälùpþñâ=:G¨±®pïÅôŸÓŸÂülrè:q-G,cm¶y,š"­Èœ¡~ uaÔ·ënIª¼=˸O¤ÂÑ 6¹×­gÔªPˆ.Ÿ%R˜PU R"1ƒ?¦hzµ|Ún€³黲ònfþ 0˜ŽJ¡\…Ï•^ŠyXgƒŠ÷v–€·çør2vF`é°ô@p}9Æ7~üiµlŠõ€êÖŽÎL!ŸË‘Ï—l—+ºˆPyØPS¹7 U,»ÑÆâ&ÿD¸—Hà¥õJc²Šè>ìð–‚š=ÖàhC«yªÏ“áÆPãM·U<Äžj÷¢Ñï• ­Ô «„³Û‹óç“ó Qq N òzµ\_,ŠM]ÎÂêe9/–_ój[„ù¤^ÝUÅÂGu¿PEã𡬊³&Çu­þìãä*|Ìgåò…€Áú¡€U) {¬Ãh™/ŠÝˇ0ΗßÛD³ÇV>*j_ËYÜ ¼‡hmC»Ü.î0î{Úø§ ú:ÌÖåÁÒöyD\xxˆðá¡ÃƒÑ榩‡zÝÿðõÐgÄKG½~{¶>‚Eø\<R"”Œk „rÖþÎÀ† at „°‚ð" ÄÈSô\\¾D.¿ø•°PûŽ.ƒÍ„˜†R«&NhÕ†¡¯@y:¹˜~ºüËÅ›Éçv^nÓM˜aØ«›Øåç»’(öP& ѰZÎ}¥÷·I ÆU“JƬyïçpO·ÂñQrV^ãSW•g!ÿ¶Rv5JŸ;}çþd”}/çG Û ( ’ۥϦ߂5S“,îpå{Ì áQ‡±ü~›WM‚ºÇÂ×[_ÖÝl›|,ÓY^ͶU'ømi±W¹ú˜Ÿ Êy¬K³ù^jÓÍsÙm™&]½Ú$â‡M"î*ᎃ—Å?Û$j9f]–‡M"nÂÁw_öp4VLÂÁ§Mf2ÞåkÌw—¶¤Ûô2K‡›LÊ¡ð^%ð€ú•“=­ÇÇ>¼ŠïµyÂeÜa){¥Û ÛûFfYúy°q ýCwëϦYú÷Áâ4gÝFÏ<ƒ`úç nÙLÛ“î·pé¶xe¶†eÖå9ØÙ@ Æ—l¼á+ó¿jíL‡˜bå}{±˜XŠîŒ'ÆÚ‹´xå"½]÷Ü»cÝ,ƒ*¦‹P(ú6òU,?óÊÛöj°{ÜF¥xÁÈ«ò¼B‡îvÒcíÎJ…´„sÓ\þÈ‘Þ-\\±—˜!îžػ'u¸DÏ­ôËnµÈ¿ µEQÆéž;ÿ>¤q VCE×_÷ÓÅ‘¾.¥m»ã4E½cô^ ]fÁü…u-?5ÈÏßaÕ©º|‘‹ŒD¿pQ„1Ý9 kŒý…ž|;^ äÓsÕ/ƒ'ê(¤9M/ˆA}G!L%ÌbHÒú” Æ$ØæXkxf]¦CQÌ@é)1f•:±MÇœ$|«iÓí‹܅(ï‰b Ï㈵&äPÞ¶)ÿ/Æ9hendstream endobj 132 0 obj << /Type /Page /Contents 133 0 R /Resources 131 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 131 0 obj << /Font << /F8 9 0 R /F26 65 0 R /F25 12 0 R /F1 136 0 R /F28 40 0 R /F27 93 0 R /F33 34 0 R /F32 28 0 R /F31 25 0 R >> /ProcSet [ /PDF /Text ] >> endobj 140 0 obj << /Length 2668 /Filter /FlateDecode >> stream xÚÕZKs7¾ëWLå°jÕfh>š¯lå0v”؉í’WZ×–ãZš‘Üñ<´Ó#KÞ_¿ANwKœ‡b]öâa³Ñ ~>P~~vðìg7œyîÅàìrPf½7«%“¥Ðƒ³ñïÅí§ÉrrôÇÙ¯Ï~–¦+­$3Ö xzI=u¢dVª2JÔÍѰT¶X}šÐàæúúh(m1YÒóK˜EÓ„Ÿù‘tÅy|E3|Q­–G¢¨ï¾Ï™$ n}Zrž3J0%ŒßdÔb9Nö,.Ó;X0¼vÅ –¯áqÃòJ3Á]Rþ!³¼cRZ>rÍszJð¾L›ø3£–i÷ZYú`7Áxø|0TV1ã• …bºô2|:©¯’·}ñ]/V‹å÷GC-yQÍa·cÐæT1Îèü>–ú¡¡7´ ŠTwCR(À•3š Žt²¨è‘Ö¹@±Uз^jœ…çŒK‘v{3Pˆâ:ãÉYiÖ‡¿œ\/éàÀÌv—«†¬?ÁS|=zóâe:pjðBõôêÅ~ýï3°³4®8ÄOiŒ¾g4< >0>ø uL„ýƒDE?‹äk˜»îh}6ìBÎÒhÀ¸KºÛ€ñR%‰zNúŽDÀ¦‹ª‰æ¦™5Ìi õ -¹™Ñ·°2¥·D§aF ÓwØçœÙš•º4élà q›Ó9S¼|T€ä<`$ÿ-uF‹bV»Ž-"„ZÆŽ¤ÒrŸ3š<ã0ŒÏ©’ )d_ÇÂùd¼§˜RÆì³o½Ë{îI¼Wîë=·¯÷ˆË6Lò1~0Êl1*sIàG¬3®ù’¼ñ6Jþ–4¦ø,ý¬Ÿâ»\ÑQ +OHb$Ú%á!–¼UŽ)iDU ª(îR¼š¯æ×7«£¡‚Dús=Å7zº„tgG^“¢yU‡´wEó!I+H}§Õkx;—'GÞÀH‰âLýFÇ0|OsáË.n¢*P›ÙbÎJ†s¦=  AÖQJQ™/VÕj2¦™˜ó ”÷ò ReÈ”ñ£ð‹%fŽaË0ñ‘‹’„Ó†•Zg.Ô° ±°Ð›PWK\ ߯’…à †<ÝR}EÖ1m_¤“½k·-™ò–¶}xúbôzt2zñ5]¼=>¡†ÑÛżù:›M€C\Ðì1àù—jz3¡ç“åâ|:™QIljšŽ‡—õtr ïçÛÃ7'¯HäMuñ©žo:~ÿ‚ÁABå²Îê†=Õ4šW³Ikp}Iãjþ5ï}µ$9èO¾Ôñ[ГIv~3;Gšdã×´õ†žšú¿„µ:8' ¤‘N#¹ž“ë9æ$ˆï66ìíóÃìG.D$„d8¿æþG¹Oʸ˜4QÏÕ²7íAZŠ«zyo¹²¿ÑÏ»M‡AÊèªè§Þ“l%-Ø›ÛâÉ–ÊÞBýú"nË ïr+@’óà‘¿œ4ŸÓH•@êãµ– &Ê Ê0¬1èùà¤|´XeLÕ°)[ÁÓ¶7ýð¤t¸taÐú“‚Wùã‚þÁ>M·92÷ÊÃÃ×gÎv|Kñ¦ Í9‚áYþ ;r»ÆCçǘVb¼²ßàü|(CRàx}¶G([Å”Ä ¤]‘ÜÑI‘ÜÏ¡%Œ-Ì)rRë;Z‘wžànÓÒø€vaÄžhWÔûì@»ú?Cû¶B´wÒߪ â/†G×É18,³ê›|œµt_ˆó=@í€[ã=ëLwUf1íú sBÅ–€0Ì©‡k*­‘ VA¯·´'®Ë¿ŒkÇÊÒ‚T(nþ^ž¼ß·p¬ÙCÏœ¶Tð UÔ—Ù¿X•L”kæ¼›Ê× ð"táH1«zYç)<½úpLRóE¸O\U«:]‚£PjxÖñj/<ãµæ˜ZíõßĪfÍvËqšÅ×,ãü¡2¬tV¡¤V¾ÍÐ=Ô)è…”À$óÞîÇŠàg7Â.©vuìºÖ. –ZîÖ„AÈü A”Àœ´›EjòÀ ·u“ý›÷Ðj&x€5YöžhÕAk?JÃ4‡*ò(j¿ybïí5Vû¥Ü Éo ØàE¦Ì~P2Ð(càíJ`•¹¶^šƒœ6.nr¨T©¡uÅøWÇXtº7éo®œ8ëvþ9ï^–›ÓDVÇЃŒä2üBÚòQØa’ê X CK& ”&‚'Mòéÿ“QÛËendstream endobj 139 0 obj << /Type /Page /Contents 140 0 R /Resources 138 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 138 0 obj << /Font << /F8 9 0 R /F26 65 0 R /F28 40 0 R /F33 34 0 R /F32 28 0 R /F31 25 0 R /F25 12 0 R /F1 136 0 R /F27 93 0 R >> /ProcSet [ /PDF /Text ] >> endobj 143 0 obj << /Length 2373 /Filter /FlateDecode >> stream xÚÝZ[s·~ׯØ7-g²(î—ÌôA–œ©SÇ–#Vé$Î%®¤\f¹´äôÏ÷\È]i©‹Ívš¾ˆXàà\?z5>øËw\gŒGËÆW™qDJe2# Ѫl<ý%oF¿Ž¿J•â õ„’8ÎLV0 Ó“Ù@f»ü˜$Ú©¬Cõ×ȬO或Ø ‹ìì·HÖÓN©¤Î¨§8œ‹|<$Uml¤cC ±ÆšHñ‘R:$.5Ÿ)/Š!^u)¼q¯Ç8…f,cÌC•ÌŒ¢„i­³ËùÁï¿üJ³é;? D8ˆÆ-|s<›hK5:uÌÎ>l8–E—竇!gbn‡˜R!æïÀZ£óõ¨`ù Ú4_¢ÒY¡)¡ˆ AÕ–¿!îËÂa‚½ !gâ€ÛDj74ì³ ñ” äNDè§~¦L‰g"ÂÀª}žCˆØÈf@G%VÊæoF…±ùÏð—å¯qE¨|ÝÁÇ#)QB÷pÿœñݾümÀÙŠp¦ÐHÈ$Ú ßÞ”M9$R0‹1iåõgIÿ>8-±Â¥<Ñ”ËfÄò×ÄÊÿ]Œ¸ËÛÕ¨Bæ§w0øöè‡ã¿}¤Š¾9F‚ñ?Çßà¨ÈOaôF ‚„虞‚|Ëa]Y07¸\Àz$ȆÁ¦($§órÕK+ò&mS]–«ðu…„u>Ú›24Ϊ '˯ÑN–O"Å9²šø®u¤=)/ëùÚ<¯WU‹CU½c?zîë¶Z€¼‡ŽVå!Þ¢écYÔ;˜®t>…fõ‘2l *à8›·a¬Åöç%&œÜ£L^_á¯Ê[ì`Råso@‹A¨.=§U`<Ÿ@yÜ,F G¯ËEÙLÚr>¯FÑO(û&Û^R]åÝŒ>ºi]níð‘OáO’b2Ÿ – Á Õk;J¾”"oê5|¶^TY áOO.fe ãáÇs@=ë0¼E“¥ÜjŽíû„>ï Ð^åŒÃ Ø&qˆH ý³zqDÜVÀηºl}PÕ9 ¾ì›Èû½fÞ­8³ŽÖ•W¥ß`Õôâ ´‹Ø44ŸÇPÞÅ~”òyļçl"žQŠç¯qm…/Û@>Y.èźõ†atÐ8ãÙ~ãBÊ.l¶²¢Ç‚ dT'ÛYÞ$M¡c²èª f²@AœvîV“ë5¹¦ÏÃJõ±†Pƒž3üiä:¥ 49MyïýÀª³DsÉ7[•¢ óK—˜7z2ézŒvx ìE¿.›rµJ«}h³°'Z–öÄ)uù‰wÞùP2`D2“RúÜË™,FÑv v{ù›\ŠYHrÝs¢§ðn»T ΧNñ´‘Ÿ ånAœciö`THø€£ íì%ÞÆìÞÙ šªŒÂܯŸY‹YÎ*¿P€ö"å)hã*ZTí}»³OO׺[uÓÞÔÉ;[¹}`yÂEø p-6Ëó^ú|4¢: ªw}ãÑBC4xQÃV©Œ„Ö>çœÁ“!Ûq ‰½wïäè(pŒÍ4£ÏPbÜ`œ¿–œUb\(8æ‚ vsBêF¹$ G™Ò’!:ûÝâzí³m à¹Ïè³uLªàú“jÕz—_¬Ó¦pÏçBÃú›u•ø:Ø„K8°qîsÂs¤Äü3Fp;,ÑG8š˜=( X3.ÀÛÆœö>ÓŽ8 —g)fÁQèýq7£ßwé&°]Î{ÃÊ£Ún±ÒÕöÈC ½ ÉÑgˆËJ[FŒ³ûÔ9r|BeA!ßk@KOå÷Ýíq'd÷¦+f©íË ûßÍ<]Ä ¢ìžB,ì¤\hãúsô%ìvâµÃ÷«tM-Å«á>Qô¨Ñ®bÔZÞ7ší†Î¾Lý³AüÄñ`ñt8¤p° øfOwU8Çî†O‡÷ÿ|5| ŸŽá|7zöeéŸ =Tn´z =Ô!Y(°ô/+ÃÈ |÷»Qvxl”°uôaOW±sWÜ—‚ÆaéQîÚÿIèÀ)B½98²éNäH®Ù®"`‚ŽtpÆÅbçðÝo=]Þ_§ï=ôí£.0ÿžº2ßT¼ú ¸Ä9 ÁF QFÙLàY' ?Üú$¡N™¬èE¦zˆ)'Œ3ÞeúÇ.ïÍI_„àðØ?‚7WHAkv‹¸ád%-%V þHL¬´©|û¯x¾WäÙu)ŸlJ¬70Áp#ûµpÏ–Î0Úvoÿч¬`a“w¨ø–±||ÇLV`¤%Ò¹!«“‡­àaÁ ^pÃçý’]ŽÞ½=ý;‚;Þ­Ñ;¡Õ|¤LÕØ¼õõ…B|Ø&Œ‚ŸzÅ’?þ_Cd$qŠŠg…ÈaK?¤ÏÇ‚¤Á.^ëÅt—ߢǶºƒ¯Ήr.]}Yë±Á¿RÆ‚s¨µ½m¯®$·UómmIôKæ£ø¸ý¡îŒ=ë(ñ~á.Ø®E*Súzÿl;®x/N«9Ô„[Þ!úôèôè<¨¸ÊϪE|SÁ¯óðž‚Íðä”`g iBŠÃWÕ¬<*¨k Õܨ|Z~ò8EjˆÌ­ £óÉÝ¢ž†ÀÝ'9 $ƒ¢Ç¯ÏÆaœ…Ÿ"ü´þ!γ.§ÕzžÄ¤G¹éd6‹s>/Ó¾"øFSþ¾V«Aé€% ¶JCi>¾iÊÕM=›²ä‘Åz~QFO#zJ M¤(@áKîÐÔ¦¾œÆÎ2¯¡Y—õl=_¬^¬è²©AÍUÝ$6‹«êzÝLÑo˜ØO¿ íñ÷Ý«”¨™¤}BЭúÒN·ßþøþ§/bò¡ËäøýÛ]LìcLÞ½z1,_'ü$íNC›‡¶’øï`µÔa¯à&m‡ÿ—–endstream endobj 142 0 obj << /Type /Page /Contents 143 0 R /Resources 141 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 141 0 obj << /Font << /F26 65 0 R /F25 12 0 R /F8 9 0 R /F28 40 0 R /F33 34 0 R /F23 124 0 R /F21 127 0 R /F19 18 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 146 0 obj << /Length 1405 /Filter /FlateDecode >> stream xÚ½WËVÛHÝó^¶æ`ZRë‘Yñ$LHðçf2‹¶,‘%"Éçë§-Y™e6¸U]ÛUu«›óÙÉïzrä:v¸j4[ŽüÀã8ah+W†£Ùâqöøh=)Òb‘=Ã*ôÅ™õïì/6öí0 %;£±Û‘ô\2»MôÕ™%ÅÔŠÑÀsÅ…5–âzPâ¦Ü4ø™)þÔì0IÇŽ˜üÉÀ޽@ÆÊ³½8ŽÈëea]åˆæ!³Ð >¤ÐˆÐ ¹*@Ž@¥8aŠ­Å[¨Y˜´ž`‘ÑjÛ:4*õf{â3ðPŠ^õšêX±Q‚ý?´Ž×Æ` ‚²âõ­…Z° ™Z¦ÃÄÅ'v¢‹/2†žè&ÅtŒÆ]¤g+?æ¼ÎÑvYT>¯§ÂSJ”KÞÑüÙhW™…GªÇÍL]†ZÛ¾ì]³/MŽ\AæÞ¾ø¨—þØ`¨' S·ç(P¥1Îý@˜´¢³:[?æ»7ú£1­è;ŽxÙn@£^'§,xÄD¤È 9ÞbÀÚÆÑ¾Ý`­ íÝš†‘)ß…â`øé“•ºa«`³¯³<…§f %»4M-ÛÃC¡Ù’ZÔjCd‚Í[ÜÚÔ²š¼4ieœfF¥MÊ–eµ6™Œ|[EÒ?Låtvwwˆ=Ǫ/aA`Ò=Ê‹ó´iRÅ…#ü6kCï®›:¯çï!ÁzÍ[ÔùÏl· Omt©ݸC‘6nhâäy‰[Âø®%ëÚJ*Ås no&gWC¤zSÓUâýõ×ó« C˜‚ë›ÉÅåíåõ—!› ´Q{qýU¯&wCZ#ê0êiý&ðÝQN¶äµÆ!¡³ÜÌ/Ø—‡Ôæ#ä˜+†¢HŸ^Qžhò—, êT5Ö"ŠÄÝÝ)‹û3½þÀõ‹dƒ>5ü®uÂÁ—Ùç²n·›î>¨û¾¶ø§Óm%%qp‘ÖoÐï:°Þq7%ÿÎ cÙß± Å*-ÒŠò‚†ºÇ\ì>"¦^•à‡ÕnÌ3s–Ú dæ9jþGcuºÖ|É#ä~ìx‡q¡kè  æ}0ß94?‘,ºãû8ü±ÐðÈQ yÈ`‚XÝX‰”6ÞÞ¾ï‹M7wk~w4X°,9åñcnÌn@–k3Ÿ]wÓ>«4it±Â›;×ÕÑ#|À#Èø±.á65SuÕ=úºÒyö³»Â@….¸²}Iœ±¯gæ+êê¬brÙÆècèMhåØ!>RZUD¡®&±7}&ò÷±ÃÍÕ¦ ”jÀßõ &úëž²:ß;++z@•ÔÇC.¦Ð®¼«¡ŠTÌhkü6"“Lš4|37üva*`ú)άFÖÑœºw°í¦×˜ï×€ŠO‚_— ¦LoÖ ÁºX®”r¬„à¸õ=VÞ™‡Öš³JKä=)÷üh³µÌÞ³Ú¼íÛ`óÇF×٢ѠYmpŽgø ŸÜò]›÷çO4#‡÷õ r ŠßäÔ†“®+~(»ðU¾‚× m—ÿ‘t#Ü:™ÌNþMê½Øendstream endobj 145 0 obj << /Type /Page /Contents 146 0 R /Resources 144 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 144 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 149 0 obj << /Length 1492 /Filter /FlateDecode >> stream xÚÅXKsÛ6¾ûWèHÍX ß{‹;v+µ–ÌdÆnz€DJæ”"eŒíüúbwŠ(Åmcå"‘\ûa{¼¹õ,sš¡Õ‹–=ߎoõÏÚ®åõ¢øO#ºOúÇòŒUœžø}ß2ÊD~_ôíÀ¸g¥øÆýeð¤¬NA···§4*â<†_ÃxÒZK¯$$ÅzS‹Oœñ´Èá»gÄâAš÷í‘ñLÏŒäVr1 Åù09hPR©/󲀿Aÿ¯è×ÞÀò‡¡ã{âÁznhãFy*µXư?p]׸ÊÌŠ“X,ç8FšÓ?‚N+xq…!²„¡¢„¤¬LHRÕs0TQsÜ}"'ð‚Æmú±•¤\ aQ®¥PÙC<£ Ë Øñcš¯hš²’ÖÒJmôÜÙØöг<·u6{/dÖ<×Êþ*Aq À™ÆðÃVEÎ2’nÓv‚œÍï‹êVÃyÉòj‰²5ù,òJÁj[ûÝ¿,'4ÄŽ&Z7¡¡7 âÅ…4Pê5üÎ¥ »TœŸÝ‰œQh, \™L&W'eèïWÆWеÂ'Ôô¥ï‰PÃHÀ mdžVr!pG–÷bö;1ĶN8¤x•H°)J&´§ŒœºÂ]Ñúë ºM ËqvËtõã\ý½0øßÆi b¸T»á"@Û¡ K°6,ɵ„íµáTH?Ê`Nj˜–¥sš£,@›ø+Ó§v¨;»Y õKÜí ƒÏévÐa ¾ƒ¸ Ônº°~‚áèpßí41¿—Ú“m¯“É¥E–Åzk‚cÜã‚MëRŽb0¥Ëté@öál&ôMáÅ<)µåÚÄS Ÿb~‹ä¶Œ75$嬔]3® »DŽ“,q-^ž>œ‹ß3"¸§ Â%B°F¯ árúñübZ¯¬~<½èT?Eýt¼G=ß _ÛÿÓY·úDùô(þŸAžit#¹’N1ªãldò^cò&¼ÔÕË N>›–Kˆ$`ÚJ¦sðå' ÌÞàæ™:cT5i,qp`w‘Ûe¼}:wlí’Î|q¸8óáûfC<&iä’Î7âæ\¯AÃÚ«VaÒ* Bbz÷@a (_NE’ï, kħû˜d’!…[Ä÷•5Dý˜b‰ªK–Y`|äÖ‚CÕ¡s‚œhŠ!XTÔòç&{Þ=鿉QÔÔŠQ<3~MN€&Ñ|P{ˆs‚ÄiúáñíáÒ d7Щ˜Çµ‡a¸Îû †ÚC»E»?%žñduºe{GÝ¡Ú="$Ë=*¤§§¨;ñ Ðp"-e…Z—µîévI¨9/Äš#AÂEµ“#§×è?Q¹£ƒ0f‡KRÒ¯Ea¯Ö]lV¯óó6;\©ì ¨± ËÚ4tõê€1»¸Ó¡cPêº@]-¨FHÕQXY-¯Åê:Âë»ñnlE³9ª YÐCè€É¶ð̃ç¦Ìă҆÷evwqàÚ­Ü5¥Ä+DkDÿï*;øÚNÑæÎ¨ýÄ<¬ˆx)»NXªá ^·z(²Ò‰ñéD˲|Õôµ¾ËŸMÏ<¥¸‚HJ5Zþ~î\ã´Uœrež¡˜iu#½‚fAÐ[iëöZ¶adΨ3"£›0Úb[)Ö„«ºÙÓN$`Q·$xSêêLn­Ô®¨”–7ûbIz2jÆ)µ8æ¡ÖÝlÙAok Fî·èuËцnè‡t¡@N`ÙÑH%ìEdP¬>­lÕƒ«kÚ²) æ.‹,ÃË‚ÈïGÝ%‚EÙ<“cR¼@¤ Æu µJe7º9;~{ Aò{¿Ñ°V+•ªë¦]Z)å¡öj뜬¶oêvò-S¬âÉZ7Û\-ÌÒ–çjøµ>mË =·UêÐÝUÛ!HOΣ“r’ðÃendstream endobj 148 0 obj << /Type /Page /Contents 149 0 R /Resources 147 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 147 0 obj << /Font << /F8 9 0 R >> /ProcSet [ /PDF /Text ] >> endobj 152 0 obj << /Length 1387 /Filter /FlateDecode >> stream xÚ½WësÓFÿÎ_áo•fðqOéÄ7yÐBlÃ4)©ˆD3¶•J2úÏw÷öΖ\åAñðÅZݾO»¿]ïMž<;°ÁYÆ31˜\T¢Y’Z5Hb©H“éÑÑ~©Àls]^•Ø®ùüÙ ŸE“W·Ð©` òeà#¢áB(8.L‰,ºF…X¦ë&G©ÃÛ§x|×ÿ3oæÆ!j|.¦J¦Äs€[ùx®‰‰žªÙt€´)\õ=çTó†ài儇d Éù›ÏÐ1ã[b7U¾ Î"G—5sÁ„Iqé,á‚tøoÃi#èÄþ[(µ$U[ßúhÔÛ¹PV>´¬©®ÅÉÎ-îíÎ"5Ññγ>ÞyÖÇ?”5~k.ÜxK„¥¬_Cýk¨å|:uGeüÿ¡Ih#Çså¸Zào«s*b–—ôœVØ•›ÁêÙö„%6Ê«7XÞ%5*LgSÒø‚Ú…ƒ:h® ¯[Íæ³œº†ÅÔ+73#ß’(±Ì~)¹»/J—Ð&T·t¸Ìaç áÖ%Ím„WS¤PÑ‘ ~åˆRã´wÕÄ^«ÖùÂw9¾åž…Dã5ßmcj³a¤O#“748ïÆ9Ïë¦o<*‘”PÁe;ªò‹Æ%Z˜€{vvFÄuÞ¾<¡½ˆËw:¿z†ïä6P`v9œ¢oà ã_wÃñ½l¼•|IÏ¿~ÿÅ/¬Ëö~ —ÈÝ1;á(E•uÆAOâ­©~Ƶ‹²jïת몡Ã)ÞKÑ.f—†xœH}¥ÉÉøCŒ+r_5§i¶Õõ¸@ŸyS_ÃÊßÚNÝdš·º§h}[SÖ Æv[‡ý7AÞ=:ÞhcVI–‰DQpd>ÙŸ<ù›åriendstream endobj 151 0 obj << /Type /Page /Contents 152 0 R /Resources 150 0 R /MediaBox [0 0 612 792] /Parent 137 0 R >> endobj 150 0 obj << /Font << /F8 9 0 R /F28 40 0 R >> /ProcSet [ /PDF /Text ] >> endobj 155 0 obj << /Length 837 /Filter /FlateDecode >> stream xÚµUMoÛ0 ½÷Wøh³+Éò‡vk×4ÅV`lÖœDM 8ñêØº_?R¶“¹X t—D&©§÷HŠ:ÏON/S‡’@AüÁáq;IÄÆiää«ïnv{çù4 CW>u¥Gݪ\4ðW´ìîÏGßZ–hhkôó-uõì±Ô•Ʀ¢êÍOˆê# ¹+Ü;UÄÂó!¾îÀ´]ÁÏNð¡üh«µ i"šS©ÒN4)ß=¡Áä¿äcZCQäŸ0D,´L‰—J@[n Ë´œG³^ÖŠ^Ù–õVƒ²p»Ç%"a ÜŸD¿Î †‘ÔÝÙiÜ×Ö •ùÆc2¤S#›•UØÊfSní—ÉS…ä¶²hFÅxÑùÄÙY‹Ú¿î‚Çz]£±¨L•‹%”·ü](Ÿ Å‘}U³[Q éQé²](ų\ëZ<{Q ®Îòï †Õ©-‡p›¤ó ‰ ðSÁß‹…é–qP}É­^¼4´½þ*=å½'évåv=´pù\šúíu7tÕ’•%»†œ«¾Û –‹—[iJ®ª®¬ %¹kÇ’«Õë)n¨Ñ‹cÊ0W¼(áoàeO C÷Uù{ÜØò°drºc J6û›ÙaÍQÃhpMÊÈT7…TLÊhí<ÈÀŠ‘&e=¾Y]eoâøT5÷ô©†DGŽÏ¢ "œ«³s‹Ž½Ò˜ùfp5#Ûáâ›4 ½FO‘ru0°âfZeËâúìÆK¹{†ëÓC½1ÂJ=ÞÏ¢=ÀY ü‹ÍÀÐÇøÔU¥NÿÕ÷{[,* ðQ‰e‚‚… –&A”†:ÓW3ˆ "r¯Îõàdð‘“²×W6l>W‹Ø½ôsµNÜùhö|dÏMüÍW»èƒa^ UÉox£ ½2€Ã$Sv¢§‰à–5‚ÎÓK¶ÿ®’ f"‚®Â˜{B˜çs1³¤)¡Ã2±¡­5á}@B¹^ªsöŽñí9½{w,'Ê“4y:쯷àý0i?pß›çü?`fÓÚ*´C@Ðë HdR¨^ “Y~òØEy™endstream endobj 154 0 obj << /Type /Page /Contents 155 0 R /Resources 153 0 R /MediaBox [0 0 612 792] /Parent 156 0 R >> endobj 153 0 obj << /Font << /F8 9 0 R /F28 40 0 R >> /ProcSet [ /PDF /Text ] >> endobj 159 0 obj << /Length 1830 /Filter /FlateDecode >> stream xÚÕ™KsÛ6€ïúº…š‰¼H½ÉNO¢¶Ö#‡:ÉŒi™¶9Ñá¨ØÉ¯ï.  Çb;*'žž‚‹Åî‡Å.@M;¯N$ï ƱˆºÓ뮊C¦q7Ö:Œ×ÝéÕÇ`pw×ëKäË«âZZG½ÏÓw4X…:ѳn_¥aÂ¥0Ã&³l8èñଗª`Г"8îõyð5D¾Ù<sì*²¾Ó›ñjS™Þe¾¦i’.gaÊR3 ÃTÆQ·ÉP¦ibæšÞkê »Å`¯ÐAŽZ–ðHf§q0/Ö•“›Ï©ouM¿Õmnßl`L1G«È(èÄ&ØU :£×ªùÄ"–?ÌLÎ\Q÷5Š—Ô® ít š.[^Qc88ë%,>~ú8:Ýí×~rF*%¦Õ-(Ëp ¿eNÎŽYÛP³fÁµ1z‚FæÔžôܘíä LNÚÊÕñ\ {ý(Ò¤oµxrØ‹j /;…°1ðIºèáZ˜¦oÊ¢.scMµ"àZÂ_t¶‹ar<ËÇï_MÆÇOĈV¡Š¤†a( æ+&ƒ1Îñ“oøNkŒ˜$ ;ÐÔOŒ«üŠ^Þ`±Q‘Q¾™r]Ñó ×άL™Í*£·¤gôs½Âµ™¯ó= ;Õ—=‘bˆa;[’0QË˵1fe¡*ÙšBù÷sh  b-·k dºÏNxcBÉ4Pä[/‚-ÐæìrnuÚ4B¸w°qmp–$peB3£7ºúÝR]“î ½—ôøzÛ<~i/­¥ç¸lç£í…òÆ×• FDò(>Yù @Ì’Ñ.-Ý’gs«\r”$r* Kz\B0£¹ÂÑf¾Š^’ ‰øŒ‚7·Š™q.]"‡‹É ìæ¼xý‚à%3¯ç©4fã\ä•Ûã ŒžÍV ÇgnÞ=˜FŸ`CKùØcè©7¦ôbÜ9 ïg8û-UB¦9¡cë1(¹8~A½n΋óáSF`Ú,gUaâuMÃkSÁÃ;»qo¨giö…™}ý›Ó(Dñ(2ϰ³ófÚáð†uy7Ra¢ÙUš… ÊÆlÑùÚùø™u¯:¬û®Ã ˜$Q÷XÈS°kÑ‘¡Ð"qÏóΤ3ªÕõ¾¾§ð+êN&Š’Gœ×³¢iÃÁoßôú±Šƒ·ù2/³¹ÏËE ‹Š%¸´ k¿kÔ •´é›§±Á9'e½;EïNÁi’Ðâçà¤%>ÍËEQ˜û$œ,\Û(s*ˆ[Dàil@à¤(ó ìÏwÞYÄFói¾¦Ý½Ä}sé½·Á½º/à1Õ¢ûžÆ÷”uò¹?ù¾X×hIg´èÆ¿†Â!.ÂÁH2.ZtÑÓØà¢“².Ná<G*˜ö”™·Ì?V…©…»Ûúɘ¦Œe£&ÿVgw;TÐÏvÑÉžHª\ï¤JsŠ5)<·yNh‘Éï_¨oçÄÆƒGy3næ'7 Éx{Ü}û¹×RhäMN8^¸ªe~µqÇY[u2òöfn%T+KʬV‘Ýž4@Ùüâ•‘µ`i‹¤< ¤œTMêÔPŠ’J믛úÜŽ[s?§»Yaô¹Lf ðÚÞ´¶ÙÌe²ëC*r ÷Ïöz:©áp“8 \!3[È1\’›Ã‘uw{4ç“âGw+»à6Aûžï ³¡W•9`ßDK ^À ¾=ZžÆZNª¦56Ñ"‡GC Ž‘«²ø‘U>¼,›† ‚sxpw†‡ëi)[ÄãilÀ㤶xÆv?6á7á¹¥ï­âa %T›‰ÝÓØ€ÇIÕxÆX«FÐ@JIܸåÆèöhHÙýf‰ ¸lÛû¼¿çTKàD’†©V-æy_ã~pµT nŠnž¸$ÕO‚ó/wÃ{cÌ$ž,¢ŸK…ŽC%¢󹯱•“BëNßL±&‚ý››I¹ýšÆƒÍlw“Rá3WLùË¿é¹ùûöœàcàœLñCEȵýî,…£÷7>ó‹òendstream endobj 158 0 obj << /Type /Page /Contents 159 0 R /Resources 157 0 R /MediaBox [0 0 612 792] /Parent 156 0 R >> endobj 157 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 162 0 obj << /Length 2444 /Filter /FlateDecode >> stream xÚíšIwÛF€ïú<‚vÐ –ž›lS²­…ñ¼8É"!c AÒ–òëSÕ ÐàË0ÞXófžÁRýUuuUu¿˜œüt¨O¤/é`r?!‰¤ QÀ4Lf¿{WÃ?'?ŸŒ''tàÂÄQÌk©éâäÓÉïúƒÙ‰?øùÄ'\ÆÁà |ñ •’ 'œ°ˆÅö{~r{r]éY…#W㋽· bBJwÞîíz“áˆzgpÁ†#é oŠß‹Åj ·6épĹð6à‹¹~ù¡ÈÓõÇGüƽûdº)Êì¯d“K}«¸×Ÿ‰þX G,òŠ5ªÍPgöyÈb«m¦SïŸ ¸^fú™Ü[àÝdSÂÍì1F”À»('íˆyLBÉxˆ-ˆ­¾Ýíoç0€[`KA FøešäðIõ0Ët†¤uÃÄ€[?*‹tSfÓÑ,EBË̱Ê<]¦e’g)€3¼É4Ì ~BÈŸ‡Aè%ù¶¶Þ ywyº0–-:¡e¡‚ÓÑ:[ÐZ)vò }Vݱr5u ¸èÏ2CÍ’y±Lrãóˆ°\tâä²GpŽÆpV ßnr3Œ¹7¹@§d‘üêŒÏŒ/á¬-׿æÎ,NðÉr¾Íñ³ìi‡2&¾Q¼\ÇyURøv¿¾9g—ÃQ a Íâ‚Aåj¶= ÕĦQlóÁðÚêà¦à<6èÀ­ÒºÝ½šžfn^ÁõÃùø>.Ï:Ñ‹2èq‘q5¶Ð³R–ÞÍü4üaônºÑ‹`d"ìqýp5¶Ð³R†Þ%û¯ô½ NQK„«±…ž•2ônØó½ ™×Ýð Nbõ¸P¸[ðY)ïŸøB<ßG»Í‘J¶Ê ©»!“Þ£YSà—¨ R{<º 7¡âã¾Ë%¹O˜Œû\L-L­”e á0ˆÅ3cz£ë€.\)”>ï{äêhláj¥ W ”ÏËW/l7¦>òý>Gc S+e˜bø|^¾úA5ˆ% q”½Au5‡ZIY¨üyAU-€÷7ïw¡2FàSϯ·¶"€5Oë¹µõUryz5„áœâ»¾üÅÔÛ‡,W•“]4ËBPM=ë¨çùöAáW¬' Y¶«ôµXNËà{]ž¾8}5‰Ø×©Xi‡Ñ,;78Èr¹6Å& |ÚªÜÁIÑŽU±i6L¶Hf––Pè²übP.gFЩÃv…2£¤ò¡•~FE[q扥®Õl}g^6OJ¸Õe^qJüÈ{´£±Å:VÊØçFÙ‡ÑýêŸ{o^üÚÚT±Æ—§E¾]X£5J[˜‚‡:z²­ts+5Z¬¶ÜÆùé®×6B@#0öÜÑØÜJiàj¾Ÿ¿S“Â×ѬPGG³‡z^è(‡¾øoä6WY6õÖÎLóŸõOÚ󱂯´Mž.ç‘Ôõ²BXèdÄD-­”ƒðõ/ã1Œz Þ$|q<¼wÓA˜z÷à>M„ëÌA¨™â#>¤SôÆÆó,M·Ës_=P‡°tÝlô˜zˆ{ð®có´M¢BVRκ˜@HŸÆ{ 鮯ã&¨¤¼}cB{ªBö»YfíÜ­—fÞ:t˜+ð_j¦–õv]·ÓŒ»/UL¨;çéz“-t'½ Î(„‹ÚQØÓ5XÞb° ?îÉy¡·>/«]Ò„ÎØ+vC‡²Lª¾o÷6ÖÚZ˜räÎÚ‹‚wj%Ü‹¿©j‚ƒF½2®;19áLô¸ò¹[¨[)û¦ê4€_¬2;{“L&?¼_×õŽe³šUÓ[[’bNôê%é2f.Dqƒv5u%å ›ša!ÿ 1#>“=UWc +åð`ÏGÈ÷zÌ–\-<¬”Ã?p!9í1>»[xX)‡Ç{ Ï‘ø±<ˆÇ}⨶Ð0BÆ;É¿®Ô0¡¶pÚ?NñìÖpi6·½éHbù¼Ÿ¢plQÏYŒ*=ÚÙEѶ­¦Î }šËA°Dœ©œÇZËÍ_ê’tú8ÕÙÎ@Tåæ)]rNa1ïq¿ÇÕØbE+¥Íøz|†mŽ©´wHš‡0æêU>SÕL•[B¾¢ ¢B·XçO= õ:-P„&Ë£FläòßfG„ÆAû®Æã6¨¤Œ ®o†XÏޙʫgûý[Ý(Qýö²îÐ%Ê0ØÏl¥¨æäº®(ïšÍšùÁn"º„r]Ï2sÎeöäûèc¦RÝëò/0’\á±Ó¤WŽ×É ¡Oü8ìqcÚÕØbP+e ªz½Q$Ûz½m]]sxîx‘ûZw‚Öu@Õ}„´œ×]4luX]˜IÀ£#“«±¢•Ò/Æ/'ˆ*æA[[,¨I,:ÊÙ· xpÃëૈy¢Úkn÷&µ§Cçõ^Eóð¢YïxÕ>ÃF¾?±Ð+Ç 4ˆŽòÔÈ4xÀeè‡Fâß7ŠveÔe%zZ„üc#tq@M€;A¥ÅÎ:îÃ+ÀJßð—/ÒòÀù»u6_Ú]„º ͆uÿ\{êv‘î¬ËÆcµÔj6ê8k–ñáS—J"¸ì3œ;[×JÕŽ{ #‘4þ¿ãþO9n(È®oŠˆˆ@ЕŒpáÿÊdŽ\|Ó'ƒ©’Ò¾ùæÖ·‘/ÛûU¿ïáô;S<­¯ØÿãN§ÒÔSߘqþh÷<iÝ‹šînlh/ ’Eü‰AƒF1‰$íqµs5•”6ÌÕÅ;0 ã‘— òU^õmw¶ÕT8Py¸Ãvggg£«·Ñk€SMYüýöÇi&XÇ­\-8­”Á©üÜ—±ràң˰Hg%ïqiq5¶ ËJÕâëó1îR¶hô<»SöܤO5ùþÉwxUx].1„B‘oN-pnyü ¶îÜendstream endobj 161 0 obj << /Type /Page /Contents 162 0 R /Resources 160 0 R /MediaBox [0 0 612 792] /Parent 156 0 R >> endobj 160 0 obj << /Font << /F8 9 0 R /F26 65 0 R /F28 40 0 R >> /ProcSet [ /PDF /Text ] >> endobj 165 0 obj << /Length 2394 /Filter /FlateDecode >> stream xÚÝZYsÛF ~ׯÐ[©Nµá¼m'iÚq›Ä–'“´DÛl(Q¡¨æøõöâRiEæ´3g"J ‚‹ßX,Ïg£g/ã1õIâ't<»‹DIŽ£€&h0ž-~÷ÞLþœýnó õŠü¶‚´Î&SÎ…—â÷6“)õ¾ÀÀ/–þ_Ã×*Ÿ+¹eZWxÿg4z<¥”ðž4¥œfÞI˜0> ŽÆ@Œ”äêÅsúÌ báýá>!-M øJWÙ"߀­·0¸­³†i¥FS¼§JÚ/ú Ýf»ÌW0t¯Ä뉱þ×n)´ÞN]2¡$Œ’6Œi%=4a<¦úƒzÐùû |Òk»üAkƒk=Á…¾Åxw~+Ÿ!­[è‰Î«r£¥«-øÔg¥ê5m/ 9E¿=»5ª/'S¸.ÕsüRÛre~+Wø :ÅÙ­Ì4›t™¹·®',1¨–wÚhyñú’hÞùœøÜ?–xa_ˆh8â¹»‰g¥,ñ˜!ÞÐüruÉ¥µU’F]„;…d{¢ïàî§²k^†0°²ÌræŒä’ë ¹) dÊÖC´k3¶áÕÊQÞðJ*S#–[ñ·ÐìX›MÔo3ƒe)-lÀm×Ä\ãéïB‰Cg——zÑ~Ê¡·ÒLã©… =Ê„w®"‘žãj'|x{Ù©†à$@„ᆣ±ga)½0^N¦A(|/EûÖ…öç%³-$4vàV¡O=¯ÍÊa Í"[:¨Jé;+ùû> òóº¬4yJ ¶ú¸×*´\•+§Êæ&S¦«û-¿T+•LÕrù|¢“(”"°Žp5ö8ÉH'c ?ÁKf|¤YŒ!çƒvŽÁú°Ìg^ëå/QMW›µÔTª•ð¬ÓK-þÂ…|UÏÔ±öýRí£Ã/§9<ˆ¢s¸«±ÛáVÊ®J€å.Béæy)éŒî/²ÏM±”®åOmBDžÄ¤ÎÝ1ʼnæN7`«@[7á/Óº’•ÉF=Ã'ºªËË&¬›TšA0yÀƒ/G‘­E$XeÖóOuoÈH€Ó†s¯£±Ç½FÊuïàÞ8æÚ@»èï¸HŠ x%S§tZöP6Ó¹K7ÈrÊn$ˆïóp@=0)ã W½;¬ˆ«¥Sš™€"-Ý w©ÚîZ!ç¶0Õ3ÄZ'¸ïÄx@D Ä€ˆ9{3R±ß0ÛÇì¿Íö©Ù™¡½SÙ8¿h=¼+ÚìaMƒì“qÀ(\ìÉ`[SWå>Ú4¤„Æ4jžŒœý†pSïåQ5Τ)=.T3z,ßüè9ämö°M ²Éª%ÿ‡ª¥Sœå7Z^ïyŽ3ˆmèR>‰ýðé9Êjœº*÷½«™ðGÍ“g/‹»ŒcÙ+bJX H_Wc7­”!°¬ÀBÿ¿ªÀå•`j‡|×ÝÁjªÕNü¶Ì:«·(ºÊŽ¢°ã±#ùñÓ³Õ8uU °ˆ0Í…Í“{)¬"j‘Ajl×®6•evgKeá“pÀíD£¯‡ÆJÆx¦IüïIL5€@nݨØÅÞ^–Ñúb,X„O”VãÔUy ÍG!)±æÉm–5»ˆ¼çGs¥(2Ü9{xb¤S®/.¡:¤,ôÚÙ9·-ÒGc¿éÆÈu¿mïÊåÙÅ^IwR§QµÑÝ8£âböúÙÅKÕ/ü唜' I`Á‡²«±e+¥Q~–²“ ˆC.càhì1ÀH9@L üˆª†w^ç)6Þò¯¦…ì°¥CÛX-(mزm÷]uO{‹<½—‰Ñ0Ãö³Ï.ßÀϯÎt6\uÆ7Ø•¦¸lnà¾XêN‹ÍIÝ$„S1`7ÁÕØã#¥ò r}sŽÇ‚wŸ«é ]~к³QZ¯¸¥^t«zæRË 0ªr¹ã»[U'×5Œ<Ö5r¤Û<»§êDD_ÎM³Iï2Sb©¦GíòLx_³ª<­_„¥1£á1ÃÑØãa#¥=üë9&q–pÏ©B›³+:S#¢T.#¸YQ ЙV~¯7ô·y}0Ù/d7é¶ÈÌq‘Šñö<*ïYxX”rñç«{sŒ"痻̑×[™uì! &–“\æ„Ò!×d£°ÇaZHûë ûf§y0a<àü]ÝX)mÁ5øì-–Â÷.Êåz[7Ó){3O w{»Ù.Û’ôøˆLM+CÕC«}'šìHjþcOêÁ±(&>M,³\=Ð) -ð¾å…|¢UU«47áX-‚Ü„ò,ÌÇÚsg–èãàJždÂ"zPUXÖœè~+`¡ "òŒ~®ÆÀŒ”~‹ŠƒÙÕîb|ñx„ ó<;H¡ÇÏЗ¦oq\˜Âè€õ™«±.#åÂõ*òÅÖmëþëùc¯ èIðFû"É–Uf õÇ^\2exn]d:«¿R\]ª•v§V)  Gµ™Ø+ ïp ’3ã$fC ¹{3Rm¦"> endobj 163 0 obj << /Font << /F8 9 0 R >> /ProcSet [ /PDF /Text ] >> endobj 168 0 obj << /Length 2622 /Filter /FlateDecode >> stream xÚÅYËrÛ¸Ýû+Tu¡ª"†ÌN¶'/Ûå$‹Ì,h –X¡D]’rÆÉÏO?‰rÇ‹©{1…Fã}ºûtçøúèÅ«T ¤Š5bp};PYlò<dFÆZ%jp=û—7Õp$¢²Æ¿óÿëÅ0•Ñýð¯ëw/^ÙHâ<Éi+­ô`¤Ó8ÍsKS|±¦Jûª"‹ó,7ƒ„”Îãá(Ui4ñßãªJM¿ÜeÕÍì9Èu½óý'‹ºô¢qU®h†YÝûŽß†"ò}§ÏpããoîÎÍê0U¯Û-—ÐrtTìVyô6ô-JWõê9ž6<)ÜP.iç¸#«MtZ¯æ*6Má§É¡Ym£7-ÁU¹¢ëô:¯±á§yãVÛCäÆÒéP~麲s—žyé¤CÝ‚f«ÜýÁ žâA`•OC! o úâ`/M8-tò3n\ù'ôµ82Z~¯.2k)sÿ–ÏY%Õ},δ “éPÃ]œqÎË!,5Jœá˘èCëX²}ÆÍ×h”3wheib#S»]y”2'Vi£i麡4T’ÞÂ,uÿßâÂ+Ìpî[.Y]á(M,¢³ý«,V3?^¯+TwáÖ@vQt ·,ºrÚ†‹MÓèrzU1C|ÑÂõ¢D,•¡M£±oˆ<71ï!XÈHŠØZ¶ÇÏò Æ„Th¸Þ¢Æ ø©l‹{¥Ôf©W^±ë÷.,GoPe )Þõf{ÝK<[€'Nrê7ðSÈáhºL~å§9‰Á÷Ó¢ ûÃá‡À§ÒØ ó ìåZŒŒÓëf¨átkbÝâˆø[ˆ¡=ã§‘z}ËÒ/h,k8WR9?þ†º[¼ñ)KÀÔÃãûÏÝMCB¿ÊæfMíÿÌyöe{à°i’Æyjdïp“'õr8î`G°×.’|ÿ6'`«)è©4·‡úk‰O óèšnzŠVƒ¸]•S¼Ô½rk0*Ô±àd #ÔÏõHæ_U«èZUy7ûsM[vøãžGàò"€w·Z‹hÉ^œ ØÐit†5nãoøqWV•ó=´Ã÷ÝEáóv¤= ÿLt2®Ú;Tt>{Sèp,;œqj1ÈÀR_ÊÕœeïk¼Nñ‘$0‡xÌ,S2ˣ߮l+ˆA–ÇR¦ù@Á>2#Ô`º<úïÑç¿’Áì(¼;J BZ=ø $†ÝÊÁòHÊ8JAu49úc;á(Ì8êOyŒ|#¹ŒEjpa'ƨÇìB-<–®x\e£™kÑ;ÍWÙç¼\’ÏsK·êˆte½â.²øvaŽ yŒ­•à›Ì;y˜ãŒµÎ?lã­‰þ¸ê5váÔpÄÛ5­ß~¹gùmˆ§Ó¿ŒÌceáh{¨èê¦üæ÷ªšÚ=#“óEèƒnxÐ AD†ŒnMFP5éwa´Œè£G?@+3ìmßM¢ZÝ~ñí¸a¶1B1Ù)ô°l÷±¬žbÀO1>0™)ˆø …¼;4®¼æ‰ÿþ’L ýq2aâ,ÍÄΡã-n€4<ã`ŽÍùýìü]:Qp|¾Eâ„»îD,~Fl¬Ð=@Hwüæ¦s=w›¤‰ú߸[+þÿîößô°¹úÁÁ>¥~"(S£¶ÌÞèø$·õœ{‚ò醿'Lw¾ùî·aÅÎl«}‹nÇ+mNj^å I‘ì/‹†“'8ñXÜ5É¥ükNÓqÑ:j‰lñoò»€‚sÈk*þ™z8¦): ¹ÿdÇEK¼Hõ9m /gGO MWÌMÈq£`òdn"2à&Fe}.žzàb,¸Fl4Ū¹´Ùß5´&õ-ú¬8°áŒ‚¼ïõœ¥Á‰’æw±åÍ6°ÜPödeÛ/ñcBMSøDiÛµáïlŒAFéÞîõL²2ŸdíÖzýø¼ªÀE©8‰úU’•˜-^ÀÇa€ÖYäþîâ]<à,ø,ËL<˜Ìy…êð}5”YôûFðkØÅéñ{îó„—PMJŠzˆQ±‡(Û}Z –€átççáež{@]`#a߆^…ñ„¢ž°x"ëCÐ&1ãÉZŽÈLÂj{—±^ï°Ù‡Ø!hž€° ˜KÃ.ý×» ~®9G pÚ,Q k6É{ Q!Z]\¾} îÙZŠpè¥CpµÅÜq›s™–Øúd”Ño¢w”‚ß”©ã›GmçÌ ÐìЛÁåe¹ìEL- ÝfÖ"Pƒá†)™ˆ(â¡Ô›†BœÏ¿Xó ÿ^¡²Î%ASçà.?kÊö]3Å4h·kŒÉIT%“5Z–,¬ånb·øã qÞ7.9žÞRÀ¢`_¬ÂŽUöHß 6iYÁ»,µƒXrZ@v¢ô;Ò?aŒ&»MƒÆc[à†e(V´Çdµ8ÁRз1 9ã/ʈî†àв*n†Hº¸ÇUtKÓ®¡W@æQÝ£kÆ €ïqáu?\ó*¥7ÓTößüÁ¹n»õË/¾~ý¯\W•7qÝÌ_,×%þ5n ”&^´w ˜‰½©I™RŠylßÖìlÊ@„^»²¥Ê0• ;vóóe8ßê"ÈÔ(ÛxÀl·ŠŸvŠïJ¦sßuåål(Ó…û2ÜŽÛÑbÐøè5'ÒBÈÌ5žK qšüªä–'&°‡ËƒEô’iÊeÁ%´(®JqCE˦ۄ܊d¥§ pÓžâŒùƒu:*ÒÂë ‚)7öô"' ±=]k¨³Qß¶(¿ß;Rø Y E¥)‘<`2?Ù½æl‘ÁaÖÙ1€tŸ¤\¼½ö¥¸Æ±}ùŽ“b‰¬ §)gT¹éÕö.zE;"CYz¶Öü `¥Z+¸þóð-†àq¿¶„B¿Ì•ï#Ê Å>eð:§^ç¬\±¿+gÛËa•W^õÚOwÆTœà¸ÁïL%âq¼‰\‡ø¨fò#)HöIû{¸ ¼ôaò)wäSù$˵:ü1ÔqïŠe›6x~°n¬4V å¿ÉUuŸªšüçT.à{ ¾.P K9ædDr3RÆ®ô’·Á-ûï§P()§ÖÆÄJ(óxf /mößžýá¢hØ©DFDjJ|6Ê’®¯»$âgo ÞH+_‚ð~®lý ä,aft-KÏ ¬:PýXWŽþ‡ì¡ •© ¶6åä“^ˆ-°³*8¢’ˆc½~]×ûr(KÑ•—a Qèiè+uÿ„ÀȈrÕGYþCµ„†¢>‡{®¤©}P/@ á‘ãö®f°e6þ¿.T[m%•ÀBå´vªC}ò^‚¢endstream endobj 167 0 obj << /Type /Page /Contents 168 0 R /Resources 166 0 R /MediaBox [0 0 612 792] /Parent 156 0 R >> endobj 166 0 obj << /Font << /F31 25 0 R /F8 9 0 R /F43 171 0 R /F35 43 0 R /F32 28 0 R >> /ProcSet [ /PDF /Text ] >> endobj 174 0 obj << /Length 560 /Filter /FlateDecode >> stream xÚmSMoÛ0 ½çWø(±ªOK>¦é:¬éŠ®ñ°C׃—(QÇ.l·hÿýHÉFd—˜!?ôxYÌ.®]"8Ëy.’b—d9“Rå‰5’I-LRlɃ§© ¯4•–´Ýß\’åš ’b שTzNSÅ9ùÙTïT:⻾ÐøŒðv¿uŠø¦äÀÚ÷øë}Hdþ¦Å¼÷ 캞"ÅÝ<2|/hEžk›‘ßÜðE 9±ñÛÅ=uš,0e‰þ}*nž¤B1£sü…˜¶{©°­g SšÜµCìÿXì‚…ÜTJ¦¥VÁ0ÂdâQˆ' _\Cäh¡˜ÔzÕܑˆdó%vTÂzÀè¶ÌIÙl#êfD_µÍs‰Íw]kœ¼™°ŠY­ÜXc!Êc,ã&“#âÇT« xª`îoM?”5M lü倕p÷mƒC¾‚ýVm}„ï0ÒEûvØ{j-nØå*úc¦1L\quÂqæNw¿þì¿ïÿ3—æ,ãvìyÙ—ÁC}©3²ÞTADøl› #ô[rå_!El(H÷/j«š 82BÞ±¸àTÜ:Gée8¦G}‡÷˜Ôv:0©ɹÔÃÇÏã.ÆñN–±Bp‹Í~`¡ª®§d¼¥â ±†ý‘Bëc?!6…Öêü(0~8 Ÿ |ç' ðFý;qпpŽ )d’šÌ2›ZWFg_ŠÙ?D¤³endstream endobj 173 0 obj << /Type /Page /Contents 174 0 R /Resources 172 0 R /MediaBox [0 0 612 792] /Parent 156 0 R >> endobj 172 0 obj << /Font << /F8 9 0 R /F43 171 0 R /F35 43 0 R >> /ProcSet [ /PDF /Text ] >> endobj 170 0 obj << /Length1 1399 /Length2 7981 /Length3 532 /Length 8814 /Filter /FlateDecode >> stream xÚí–eX\˶®!¸;MCpoÜÝÝ‚tã4NÐÜ5Hp î\ÜÝ“àîn pz­}÷Jî>?ïýuž3ûÏ|GßW£ªæÓ4”jš,â Gs°Œ#ÄÈ H*KjJÙ@VvTI°™›#DÊÌ ,òóâîV È+ÀÅ#ÀÅJttòr±±²vÐK2ü•Ä w»ØX˜AÊfnÖ`h 3{€¦£… ØÍ‹ noÐøk†+@ì vñƒXQ@ÈÆÂ `¶² ²ýeIbéàýWäîôï!°‹+Ô€j’µr„Ø{@`KT6G¨êäÿ‡©ÿ,.ãno¯bæðWùµé¿%˜9ØØ{ýŸG'w7° @Ùvügª.ø_î”Á w‡ÿ•w3³·±‡XÙƒìÿ Ù¸ÊØx‚Aj6nÖK3{Wðßq0ôŸ& ûÛ›´¤‚š¼.Ó¿÷ôïQ53ˆ›–—Ó?uÿJÿ›¿Ú!O€!;+;;šýýûÍø?Ô¤!Ž ˆ€ƒ›`æâbæ… ==Pâø6Øö„Zfc…8ºA§ ]ñX:º þµ¥œü6gwGhŸþntìï0€ÍÂÑÁÁìw„Àfíåd †üñØœ Aÿ„x Yâ¿ š ñ›xl’¿‰À&õ› 6¤ÿ!^v›ÌoØd€Mî7qØäÔµÂo‚zQüMP/J¿ êEù7AÕUÿ!>¨ºÚo‚êiü&¨žæo‚êiý&¨‚Îo‚*èþCÐãÏö»—üP=‹ÈýÐõ‚ÿ@¨Ë?êÀê„Z°þ¡k¶ù¡²v ´çö Ô†Ão„žP6ȵáøB…\þ@¨ë]»ÛÕuÿ¡º T÷íoä€ yýÐõzÿÿýZIH8zú@°pp³ÿµ:^è&³ûý_yî..`ˆÛßß,èÝü7[Ú@o2ì ¶@]šw´ ¶Mm -}']0Y†ÀàJ–V<ÚÒ&Èz¸"ˆ;ëÒ4¡¦¸À Á±µ)ÇN9V¬dYr¯7Šûøh²…,â `à3ä¥k"Z™¼ÌéïÆrÈŠØ]zµåÔÄtºüúzÕ£¡ZiÈ›O| _º0ÿxùÎÎlm í#èõv ªqN›R2Xgo5~äáHÖ±Àq©pÈ(ýïi‰¸ûÇ´ÝwŒ4þ¨e¸¹4á_(ì ž–ëc¨ì‡/>ðÆcNÔò£~éÉ•¦‡W¥µËóÙ ÈHz蟗“ îë:k};‹:E g’¼ ãL+;›a-Å—§¾IëóÉ‘,Õ¦°ºÞÄ ¶"'Ñ‘Ü-Ž\w©‡œ¦Š—ûeó«NÄ%{DmLá–;ÍHly™¢gžÁ—“^D¹Àss² !Wà6ÓŠ˜êj4j7‘_¼+5Cöþû=µ ¸±Fº6ɹ"Ï^ ɱ}OÍŽþréG#”J:åêˆÄ=¡¡ÑnÅÕþ¨Dlíò –~_§xGçîä·°ó»2Eèìb;#Ô-þc•ðaU|ù°?϶H˧_‹ï"ÝS«.:±¼ŒÕ™Ä_bóGÓ(ÿ–€½IÍ7Ýv¹ó Íæ v›¤'îpÿ1CÊ­©ÃyïãÒ3]`äªÔxß3b¡‘:`¿ŠƒLÇÕ]H²ÉjÞ˜Tš¬Ÿ#ª¾÷{ÀÞcdqÆþÊÐЋ,¡%‡¹°kñÜeR²žöV{?l›ï°W¤­K¶÷{V®jD8é|¯&8YD(œ><ÆÕŒÏD?fHà$ÑœyZÀŸ`_ÛÙ:9FL?$ ·ÔÖì$–ëØðá³A*ÒK»Û^î.fkÖ«‘==@I%W(>;Nk–ƒ¿ÌÊMö°tÊüÖpùXU©­â@d̤ËS“~Q¤T'È"Ê!™RöDÈ<.ýÀ”œX|—;I´8P“±}4\_t^äóÑ¥ü7¹yñò™*Òk‹Ð€}‹¤–ÝΩ)Mſ̢#'»²5»§gò =‘ ¥éKíT>® fŒyÂVd´9@Ô*ãÝ"Ÿ£o‡†ƒäQiÒð¸¥Wå4G|VaÅì¼<}gývg›ï JÉŽVY9ìZÍÃ’[×Ãbi^ãðÐF`|c“™Ì8FTÜ.,N!j-šˆrŽ’¢îÊûû5¶áHueõáŸZA<ûí„BSfâ-}Lª½Ì¸ °·°|uDv˜è©Ögß…p&föh{Z͹÷ÔuS¹\9…ì%~mï>Ô.c:R+­EÛê°žýòÏJÎ<éñÉH§rï~}Þñtò­ ~jÂ:f…©çÚH“ÞY-)Ä1Ó§;e¹!ƒ“×òVXÞ9˜zÕA ‚*¶u¢g’c6Mê}¡È±‰R_3»úLyö pÝËË¿ß? ÷l]{íG‹ økŠ ü™>&0:>ú¸{.Æ4hæl ™ugÇÄœ¹be3ˆàR¢¸Æª‰œ¬ÜätYžä"Ún<¥í­TC2E[”Å/^RŸ4{ßf’»zù7#w„ì.Ø#1—5;)o3œ?t6i•º¶ô3úoeôò’]Æ*ÝלmðF…ÃÈÐ «à¿nÂ錄ú™z.ì"Ït²ñ‘ºÌ‰S͈Ý9Ì,´oχÚÖ–º1M¿Â>Û{bp”ɯ˜1ä#¹xÕfÔB6î@ãû-öžž‘N>õø<p,í¶}”À£Ù"ÂÀ¹ºKüÉtJ46uòhwabå¬í™á¼üº(w~0=VÄ÷qH©t&ÅÀäZÎòÊ[Uc":X9œŒ.×÷KfÌšê§{,ÞÅö°õƒµ–„ aÉ_=½økŸµÓMûëÙªöZf| ²Ìä†PO·Wª_½ÛÀz‰€²ž’îÁÉn5‰ûÕ¤DfTù‹Y?ÛfñTã0ÀN+ÓK!Nðù×§×9âm– ›üËßígÖ7 NØÂ²dé¦Ò_C¤œÒng£ì¯èç?ì”;ºq¥c~CðL»¸e‡æÎôöpf7K·,‚\$SwQw+ 4Ãå09o4õôéKõw“+vQyÁLø˜g×=ðÈéUv‰ê;n¨H1Ö;ÃXBùÅ¡*ëys(Kâ‘£|Ä­ÊÙÆU•Ué§|˜6?røEaôµ*@_u|d‘kÏdX¶‚6™aB¶ låêÔ>ó/ðÒÛ1ɦ½¿x5·á¸&ÝMô}ø$cì•ôí ¡ C+¦Bu’%ÿ‰ÊÇ^O•p!·h¸nv”t­¿Lhí:²ßóÂq„êÞôýïhßs2¹šT2Å' ç²7•ϦãQM]¯ß1ä7\+îêlDõP8UMëåè|«t(±ªÄ~ «q)ä;:*[àM"tþ>¡±Bü¥åEôÄ;=&"8ÊÜÞœ ÐDçz^åBRZæÌ¾8çq¦àˆ—ý{)ÙÔF9F%Ÿ}Ü[Þé—щìaÌòQ¾!tƒ¯ež®f‚äs%?5d¼´®áÁ·mw·zÏÜO‘ßv+q÷†¬pU*ò 2$ŒXé@tÖÞ¨~¼;`Úªbµ!ødê»rò% =5¥)'1H]x&L*t‡óƒý!ö}%_g ‡Å½ÂmïÚÒϪ؅ÉáÌWŒÛgVèÆDWƒ9s 9º¼¯S6­ôY¾]kkøšÒPè1ÅØ(þkoN¸JÓÅD? §˜W|Y}˜ù›‚ЬÊH¢©Ðd-LX:åô˜Œ±½³#ã†ênD ‚Ù·ÖtKÈßôâÙž4ù0Ú)±Ýá£},_»E*ÙØáð'húÉ£ ¶š_©þÐXé_þ°~v)öB×38øB»ÁbêQþ!X6:±c‚¹D¶?díó¦=·¹†´Ì÷!‰.Þ³M¥øK1`Ö£ 9¯û›]‹È‚Édœb=z¡ÊwôOÊ&ïÛŽ.™üª!³'f)z:¶õÕÕ?nk?Õ3a?­ >`ÚQKt‘m6ƒ}½>uì¿`­ZWä1ÃÕ®œµÄÝJNàÜ=¼}i¦6fF+¶û;óν…< ÇG †ÖÈßY|'Aq@‡#Iõ!€øMv‹H>ýl° œ~ûáöTQß4’—_¯ØÆÈæmGêË–3úþp{ÞœÒ-Ë®áÎgH2gjÌšiÄà ÜHV"ŒkêÈÊ}n1]ßx¹õõÓŠÆjîm?ÅÀ›^q´èÚ1…wFáÞ ÃC—r›Æ¸Íñ®©=Œ=Ã]´g™ÝüJçh@ÀøMAÎ{\Œ;ýñ VÁ'³hGÔ$ûíÓØ\Uté ª¾ËuåÉYl„7¯Â¨–)ÖßzV®tõ}hÙrªÈ½TC~µsÒuÓ/év‰Z~ë÷È0ø-‰/ÃW0Ä ¯K¿T8}'<øENì³Ô•ͰÅK*ý°1ú%.ÕÞŸm^.u}³Ú¿^6Ì#:OK·mÈ®0¾ ÂŽ…oMƒ`ËeL´ü•È=öfjñ%åÆŽb™Úd$ÿÝl_x÷¢¹Z·áî‡àíšÕÐz)<³í3Ý&yÕU”KFñ@8®™O+Z3öªÝO¡È¢Q,®Âvbä‘j!£Åè›|ñ#¶Û’þîÅšÒËû6×Èôø£ìñ˜WçuÞû²ŒGt_æ5À¹„¹2œ¾Þä÷Egëˆëb48ð!ý[?ÒÔDÖ0{Ñ+ëR÷¾Ÿ±"T‰¥Ûl_þŠ>¢O¥"dËMׯVO‘ló™ ÝÈåͦ²{½ÙÞïË(™§“5ý•{Y³ÓA[OÝ©šêv¾ªn3m#¯me'A˜_:Ó:ÌEÀÖÖÕ½ïÛæ¿S&áx{ü5ö©À¤ à™­.–ÓÛó™qêôõûºgÈò1?Šh´ûæŽMnÏ7=Š~1®AÓÕ·ÊñÓÉa«õϨy¡(Q/pæƒøš8žîMÎo/ª§ºQ÷èË«whÃøòÍгrÏ:ÜÈ>~™õÖÄà#JlúXy~ºÇ{2$&6~ƒ'{ý“²B¦|Ñm o;ͦӴD<¾fùIãÈõýÇ$¥0㸴4sE˜XÂùƒ–KÂ-S—ds}"ÅǤbžÎ*²xÅÓ·‰ëét£gÙÁkjã äž*›Na-Ô—ë+çÎî] gKÛ6fûqŒ³#5cMuÓ›BP¨iR/ž< ªWŒ½’O¿BŠ–}ò²¾:zã†Ã¥‡ÁUÁÏ’K:ZèaTÅF.³,ƒeDŸ¢dH.å§cS‚w(ïÎvíÒŽ–@E _qØBCkÝ©vx-åXKvË~A卯4ƶò·RWœ ŸŒ›ïÇ2(”æßõù&0±E»6HÿÊ=Ëü<6lÀ>ýafÁÈC|·)Ñ&KG+õ€®É6½Öˆîvm{ˆ0ì0V[[µ\[‹ê˜1þ 4­æ(¢&ëÓc#Dö*\@sÿ¹Ÿ%Ä[Yîã ³Ýá4·«‚nàŽ !±ÑÉ™ÆV¥³“ÉìáñU Uñ'í—8‘É1¥itB´R[qTD˜«—7<Eâp)xê‘lœp› shàLdÃøœñг¥‰ qjïæf÷˜b‹ö¯©¦Æ¾pã0Ñü€Šâ±@EÛI¥ ÂÇùgþq$±:¥¦ü–š‹ ËרõûG«vdzêtÂC §Ú‘æäå?ýó²k&ë4{ïXe›÷IϽÖ!ÕeeÚ|sËêÍ1 ¯2YŠõ\c2E¿6Pph|ŠÌ†ïÛŠgç¾”[˜ J¿µ¿Z'×g~é]+ÖñaØSGÍ@DÔ/ʽ}gƒOA÷.¶ØŽ’£y1D"Gš¶ÐÛ¬'u»T žÁÌ(1è'àþ …Å9•ÓžÂAK¨¶¤qQâ û ¶~ µI¯ÍÚ5EùˆfÒb3b™¼r*À-l+iLÝÁrô™£Xr‰òñ€èÊïṩ¦Ûóñ-ˆ*æ$DXAyy}eä¤Í9u[Š­ùQ{p"‘ÂäÊö ©›2á%ÁBµg:ýÔLIŸ‚Æ€Y\‘È*7 ?Å$>|½L ¾û.‚¡ý¢Àûó¼Äqõ£ÒùoÆfÑåÞñãóAxÖèëAÚÖØ›{;Èãë…9Á4MY:ÕüßÑœ¶!L®äɨçå]º­CT‘ª·Àñ‡$É­F–Ùˑۮ™šR¼0í¾&ñÀòÞòy^Ìq$Ä0)+M&LJ1õ×üx9‘콞SÜ=Yçn,)7âô’Ø z¼ý…ó‹!ÄmûêWxR¹¶Òw÷Lç(‡/D}d €m¸Ùt%…ápg×ÔIÚXg{a]"390S!l“_„ð­¿p „gOÐ%z“¿'ðÂÿê1Â"Õ÷U«1ÖGÖƒUmÎÃ!Ãl%“œ@É>zz™˜àÞ‹v:§U |d§`?y‘.÷f¤äŒ»ÆækÎîq·n·.àÄÊæ©»úç^ÄÙÏ Hý {Þöc³mñrºØyºáéût³j*—Ö: ”Űó¦b.Š™»¾•þ‚rxFÓ®LÀZ`n"O¤ø+OÏ,ö)õXÀ”ïFæáGßÉÎ~fT¸<—FÔ#%&­ºT:x •r)0ÁSÍ˼1‰R­$_9cPÎ;ÃlþÌ•‰ËŸÍ`»b7Ts²?%íSÖÜú´5cÉÅ>¡™uzeX˜žÌæ`¥¿úrŽ«¿ÂØA¤¾ÿ£f1êq±!ùÍ2yœµ¼´/5ûA·õÞ­Sßî6{©ÇŽtÄùi’ÃEOÅð„Þtä‘üó°nª1.PïnÿY¨ƒþÔ¡¤ÁÅ4ê¨ÀÞÝ‚itôè¾1•{N‘Îû€Þ·gêç6åTÆ–”ÜÁÅõˆvQ»ýç¼àŽûgŸA °,U²6á\¸×ÜÄx¬u(/ŒÒ¢‘ýµÀ²x„aãlx)„Ó°¿]Eч«Açé‚Î_ÒnÂd³}*µn|xT,â»ë$^ñ]\%!4°o=ÜŠ*úÁ!žun^IðèóZoöŽ ¾_{8Is}›ªZ](ăå¶±‘„~a˜a(?•nj*}ª.ؽÅ7Vn•ð³Léú8ΩœôÚ\þ¼SNí¡{Ïf)E‘ºyòazÚ´“B¿¥DU}­¬Õô>wF V\·ä†dTüs[·zÀ‹ÛúOBBÝ}Ĭh*ØcO„é fÍxÙR߬1°¯Ë¶§»kti2gJÏü–¬ ]¸µ)pÚ`ìAû‡ƒ•~j£¸vÈÉ(¸ëׇÄí0 ØkßyŠ…w*óšå%mñwô6eEf\]½Ö5¼#6l—Y¥M¢if[ÕCƃpq$ÞRÐ~%DbÖíIÔYi³¯n~ þ`ˆ©Ø§ÂdÍÚºû0Ë_`²¥AHAF“2œ;—» )Õ0™Ö#ŽèmˆÈ» ° çè yOwÁp¿¤ç`êq÷œ˜ì’9H¢»MØ´Éú.hWQ2“y :ðc"ñ°@¾f[Ÿ”…%«ÞNXÇXkÝÈP8UÄ=UHc±Æ ˜Þ'3Ç\V-umÏæ½TŽ÷[†}çGÞŽå È–lIÏ€½ÑuLë_]üe}*i—wš‰wSW¯2ÍÌM³¹çK‰ ùPxféŒô3¯’Y3¥¤•ÖÀµ$„u6,‹6/‰=QºMO‚>-¦lñ§6Þ²ÚßúÕrb}Á¬Ö¦ýÌpíˆË9Ê…a6Ч±}R3ßnÚl#~M”~â}ó% ¶OüÈ7ªÈø”ˆÇp(çz®Z”ðLxš°ú^5_ˆjó&‰Ûßêåcº¾ º4l†åðîÞ™×Vh¿»«Fªgõ÷~ îîÃÛ®®6S°zè$ŒŠ`–23$ÙS¨fßVê:S0ìlUsqO¼Gâš)þ€"ÖN)Ålx¿Ç mêŽ6Mæ­óF J‹ [íôf°A>%ºœX³¾Ñ° T‰¢¤³º£ÿVÉXó*öÖ36`Ä–ŽH‘I’‚Zþúks³7äIÓs“)~F?þ­O£å¥è/>{2d©&]éB—<ª¥ñr¼ÛƒE ÆÑ²Ã°Ý)ºgö’=D·Ç9Ö3Ælù‘ÁÏ’3ªF¢ËŒeÝØ0úð¬wRøVË`ŒÑmàbÉð6÷Ü€.íÀ!49&“fC½¹ñYlé  A'MIF‘3x>ç‘Z8³Â"ãÞTÓ }¢•ïD.ó‚¡àmœì4ßêí÷¡ïM3â Ø «Þd(†òF¯*7UvÉÂ+ê/ª] ÏÓµõPÈ—-ÐÌ÷´\¡·ðJaX2ÔÆž¨v]ùÄ"4æi¼v!}g¯üÈ,³)»æ³…ª3üZ ^²ÎÕŸÔ:†&zkv(çG½è™ÓÞòþ®Ñ‘?…$¤­wÇ,öP%o¹±'âÉô™ŠEœ˜TAÙÇ:ÀWCÓꨕ奢6zû*Y9ð»“÷23ÒSL‚QǾ\¤—Ä9Éêµnä†t¡Q"LÚ„L=Zé:ÁfÆjlçTùÐ;fè¿\˘tÉÄ|‘ó,Ó–0|]ž¥/S¯‚½è%g:'…/o³˜1ðZ¼`JOô«2ȿӼŒÃdÓ¨â½Ê¥ìÛž8ÏÜÆ„ŽÀY†©&õƒ MÈšùG÷ŽŠã›­)ÿ‚5ÛÒüiör=IÆrÜJröš!Ä®$SMÇw>úñ$`¿Ž=gµT‘›ð*ÔX†±Êà¨ç<êGþºë…Á+µ¤}J& ð±8Ñqá’vhô!U?çÏ1`$©FX —°<»ƒbÓÜêáËn¥ !|‰ŸŸÅˆG‚ð¿YŸ§©t(É FG9)¿c_WŽäÝI;½™`&cWpUÉ`ž¤¾F¿æ0 ×e~vgŸÅ4Ò±‚ˆ¶_P¼ä£÷í²ëÁK/Øm4(¥Ûê ãWúÞÝ•Ò\x6ÚG2-ëLeKãc'eÛJÃÏ[[禋zåNê·¦Ÿ¶·Á²kIe«äÑà*~Áî%nœ%F¸NÕ¬œw”OC‡DíZ-è·eÝ3Rê7ø+F9º:“°ÛÚë 9Ç3̓b¦vMÃ+(UÁ"Ž^‚’RÝÏøÇà×x«¾ßZ"%wÕN~‘v¸$ømï3ÉÙ¨TI‰`š5¦¯±eu`¯ 0.xºîPyŸ`ûAHp$8è‹·,Zv_Èzµ¼ µ1zÅhwÕ¹&º|GFÆZ´)7 Ÿè!«>ý´"ºë©;¤Ã9lŒt¶}eóƒ©]TœVî–ªw–"&u;÷•8À†èצ3ØÅÜ’Û[öºÞŠY„Š^‡ŸWˆÅS% »eXîÛ¦ó! -I0³5zòÃÉ„†—ã6wQêm¯Ùî§šöÕDZÚÌ•Çr)Ó¥†qþбB.\Û‚ÎvŸ˜ iì>VÜwöñ 3!pù{—Ëí¦0LÚ@œ[H«y䓈>åœm^ëQV_~)CƒŠk×®I×ù×µÀRòÍ…‰“ûMùžJÅÏ ëºçfšŠƒ¥ oS÷ bÎ5¥Ï·y24?Ä„Y'Çù#¢ñú)Š1ìñ%Šì9!ÀövIdÒ=:Pw4]1ŸóÝV•™ã¸úbÊÍjOÚñf&ç)"N¨ÓѾ`«6†¹¢ÃzÔ ?DÚ|èiE]°Æªz?tÁÒþbù±¶ì ,äÀ¿µâ&¿X*c—B…®C&Ýõ)Õ¤ú±€&"#-oÛ‹oÁ[B´d=Ø9©IQò•åiNZöè3Ï»—_…«¯¾D¼Ìj£d1nÝ6íÉrAõ‚qŒÞž9N:¤ h"%3£uEâ¾\Wž}e®Þ3®ñ\\Š@\3¸e0BLÊÛÈÏ?á„ DùÄ?Ë#¦ƒ«mP['By\š¸z£}Ÿû<û¬éúð=0Ë•Óá—$Ö®÷¨ˆëå$Æß3ŸY7z°╈ë<á뫨ò}ŸBW;Œ3YYØgƒeæ¬Õ¾ƒÂå+¹ä0àzðfñsK9¬—d¥‰ÈxîvQN/[pÒ£Ë^õEõô1hŒ„´Ý&2¢Q{Ûp÷.=„'§9½Vóþ”pDEDÈŽì9koð<šÎ¥¯e¸ì] ‘ˆñÆÅgDZóĹýš=}7‘7C­ÂÅñ2yΚÑͯ¤Kõ•·#¾š–‡¬8Nb!Ò­áàË*‚œ¡VãæÏ=Ãù9ü±kôž17»æ%âÅ•¾“vkqÉÞϹ5O æXe:÷ kR ¤ïRékœÎð%Õ´óØaþ ûwýØ3XÀ?c›ƒØ ý-]M°7µów±KÈ¡k"­hÍ…èò½">yÔ•Á0ÕÕbšGn‚9‡¼C„N&…Õs«âæ3WŠŸÅ0åZ³(yoŒ­^~ÿrÑ4“Œÿ5w€Õ™"&“Eêsx{P³ÎÇYøä¹4Ÿ&òª£Èwn(ÅÄW>ûä%äÒžµ"cûZ˜RøIj×VœÑs{u´ßó¤® O ˆj> endobj 169 0 obj << /Ascent 514 /CapHeight 683 /Descent 0 /FontName /ECJPIW+CMCSC10 /ItalicAngle 0 /StemV 72 /XHeight 431 /FontBBox [14 -250 1077 750] /Flags 4 /CharSet (/quoteright/comma/hyphen/period/A/B/C/D/E/F/G/H/I/J/K/L/M/O/P/R/S/T/V/W/a/c/d/e/f/g/h/i/k/l/m/n/o/r/s/t/u/v/w/y/z) /FontFile 170 0 R >> endobj 176 0 obj [319 0 0 0 0 319 378 319 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 814 771 786 829 742 712 851 814 406 567 843 683 989 0 844 742 0 800 611 786 0 814 1106 0 0 0 0 0 0 0 0 0 613 0 591 624 558 536 641 613 302 0 636 513 747 613 636 0 0 602 458 591 613 613 836 0 613 502 ] endobj 175 0 obj << /Type /Encoding /Differences [ 0 /.notdef 39/quoteright 40/.notdef 44/comma/hyphen/period 47/.notdef 65/A/B/C/D/E/F/G/H/I/J/K/L/M 78/.notdef 79/O/P 81/.notdef 82/R/S/T 85/.notdef 86/V/W 88/.notdef 97/a 98/.notdef 99/c/d/e/f/g/h/i 106/.notdef 107/k/l/m/n/o 112/.notdef 114/r/s/t/u/v/w 120/.notdef 121/y/z 123/.notdef] >> endobj 135 0 obj << /Length1 780 /Length2 1258 /Length3 532 /Length 1838 /Filter /FlateDecode >> stream xÚíRiXSgX¶¢´lzC#È’dQÃe‹€ A´xInÈÅ$7„ƒlÊÀ!bQ4"Á ª %bSuPLa"%à†lN@¥¬3ÏП3¿æ™ûý¹ç¼ï÷ž÷œóa-ƒwÛ“èH4DF8|{¼Þðò¥â¼ƒ“‹õæA F8> òðîîÀ_ÀD€€÷ <œÝ0XÀá&òà&Àyo\$¹$6ăi ùLˆjÐ@°¡Á?Ñ ±X@èâx Їx ݃Çt˜Æ¢¡˜ƒq\´äÇa €ëÇ4]Àý%@¼xÔ€[t¹@=Ò+ C Œ#A‹A¨•ÿ†«åâd‹EÙ‹òKsú²aVâï „Íð!„Ð!g95úh.¢ÃörÔ²`‰§)8ž !z0̧1ÈŠ‡–ò‡¾Ü:¹% Ž{B(_»ßwºƒ0‡¿'‘ûOÙEöRŒÿWŒÎ‡ H't¾x”ˆžO–óåÐ:̉.›Ç1èëA# À:$ !êØÑƒðÑ+:”€ð0‹ÅtB>ÚD£Ý¢ðâì8Fó@Ä‚|/8&f ùc§^^ˆ0Éžà ØÜ7£¥]œWWBÊ¿1iâð—ZêS̀т$„h˜¿ýŒÐ¶dÆ–5‰jS}%=5mÕ¼bš ) í½YÑ_¤Æª–ÄÙ*¯QçêÊôTšªõ‡ß™ÅoM¹G~s,®P¬ø J8¨*?ò½uª<(q„kõkÆKý›Mf^¹¨íz¨ü©¶$¢êÇŠ‰Î—gƒq>{Fµ,WÈ"e§27»RÉ嬽ÙY×lˆ–¡â¸ÚçìÁÃ%ŃÍHÍÖn±{2‰´ >Ÿ“hWþùÃþ·-òΚ¶ë=V #† —mŸ™(µ x‹’ždi‡õôÁœ´ugÍkŠÓ¦ ¢÷WêžvlçZ*B^iǼ”û´‡…O y&9iœŸ~FL‚³?´úÎæv°ÛëÃS ÿ¤Ù£ðÒ®¼”†ëržt’3­Ú‰ÜÏ5Sµ§F þîrzÇÐŒ{ Š"½îÎÉÕ „¾h‹ˆ¼C_ˮȳ«Ó÷cPNX$›nû"÷všz—¦)M1öTcNºÃe]º|(Ñ¿ÞDeç×T×Ôë´Ô ·Z-{afcöm>=ÎW‰äÈú›’ˆÖ¿‚/fnYß´ <´ÊÞ ·>˜ÔÃÄŸñ3ÔÌHès#"'@‚Ï‹¼–9ul(¬ýÁµª[£´i5p…I‹”|‰³³Ug]NO63#øy}0|çίÌ‘väçµ~»­g4¨h ].g¨áLo¸ù—}#ÞéA{w›Ú3ܪzŸ¡—ªs剸È!Z3ÙíÝËóy̧M³¬Â¸}ß ŽHW~[["箎ҟœMëô|?d}Ìe¿è0_µb$tD¯1½(9qï Æoh1šš'‰ýÊNÞ¿Û}ñ6”. ìÅx¦ŸÈ)˜¿µ 3Øwãö¾ññ{ Gv\(€ûO‡Ûˤ°$ JWwlœòôŒ¿R=›©³¡_Ñì9A©Oó-K¾PÿhÐWi´`ÍÕEÆ.Ô£ƒ6:Ýz¥êDæÅüñ?â,(LmN+ÖOãœÉtÙÉüû1ƆOxÖ¤Ž:?lûÙz6^•¹:¿~ݦ­6ŒÁ ²;Vs/¾ïñIh,·] A“˜Â¤-DåÆË) Ó¸d„hոʂ¹i÷xöMOF¶©¡]e "Ùå¢ÒY¹°Rc[i,U2 b‹Š äºU³æqHH1ã׫Uvç¼ú]‘‰8ø2ù¤å;•¼*êá¦]F¼ BAmÀ)÷âx£Ê›R‘xŽ0Á¥Œ]¯s#:Ø]8²"繈"¨»j""5çG&¶ dë¬o®ìÛ_ÐzMàpûlçOå÷ʰ}_Þ-S¯Øß˜¥ÓW¡ñ6­m Ë±Ñû¬äñ¥ÉýÅ- gN?ˆ"œÑWòÜÝNµmG”\ï=?~¶»šsš’'¶~]ð²q`"ªƒÍ͵| >ê*±º‚¯úKö¿ú,ª³öÙîÆÂ±$ÙüåÕHÀZLZ@gÞô¤$@®Çt}6o^[;ÿ]iUë”ÖÚ«9õ¬ã·0¶²‡=jkW¨&’öPÏ¥µS1GgôM½šz6TßÓŠí¶Ø3è¤sž³Hã¶®6·ò̾\Ù›‡«KI‰óO/ßûÜ8¬ëÙp{)(¥Ò·§.T¬øAos¡cÞŒõëχÍÇËš “´oÿ‰)ïÚó\ܱïjåýî™Âä­WÒ×4S¾.ê¸];a Ó²þÃóÿ  y|„ òa~$UÖŒendstream endobj 136 0 obj << /Type /Font /Subtype /Type1 /Encoding 177 0 R /FirstChar 13 /LastChar 40 /Widths 178 0 R /BaseFont /UKQNNE+CMEX10 /FontDescriptor 134 0 R >> endobj 134 0 obj << /Ascent 40 /CapHeight 0 /Descent -600 /FontName /UKQNNE+CMEX10 /ItalicAngle 0 /StemV 47 /XHeight 431 /FontBBox [-24 -2960 1454 772] /Flags 4 /CharSet (/vextenddouble/braceleftBigg) /FontFile 135 0 R >> endobj 178 0 obj [556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 806 ] endobj 177 0 obj << /Type /Encoding /Differences [ 0 /.notdef 13/vextenddouble 14/.notdef 40/braceleftBigg 41/.notdef] >> endobj 126 0 obj << /Length1 790 /Length2 1615 /Length3 532 /Length 2191 /Filter /FlateDecode >> stream xÚíUgXSi•¤©4iWÊ "©&0â$€¬JB †ä$S4¡Ù(ÒÆ‚(EˆÊ ‹ˆJPP©"MJP DE‘!Œ´0×õYççî¯}öÞ?ßyßóïÜóÞçùÌ7º{ZãiP0¸bq­Qp”àH$îIJ% 37wdƒ.b9Q¸ €Âál€]<€ÆH[;,Æ‹…™ŽP¤€M ㎖K$[ÏÙt*…)Ü0)Ó R€'D¥ƒ\À3ii }¤Áa(@£S¹@0JgÁK–v²B ÀöK™Æ‹üÚ:²92S€Å²MK@f’±€†À{ Ùi ÌËÃÖ÷â;x Æ sI~)¨¿´)L:CðOÄŒäqA6@„h ›õ=ÕüâÒè<æ÷Ý\ ƒNųB `Ú GnýR§svÐù ÍÎ¥†!\®ƒ,Ú÷Ndñ-û@ø8ù:’}¬¾ v¹çN¡³¸^‚H@~#/cÔ7,‹ˆMç{‘p$%#ÊÞ¯«ÀïÎrfQ!  ±6…ͦ`H™‹¢QEùÈ—FÀYW¶ „@lØÒTm0‚Ã¥°—ªË[€pû’Í Aù†~ÁËè¯ßK @ühk4°Fce¶XÀ‹Œý7"•Çfƒ,îò/%Kí+¡Ë‚A>H…‰ú ª}|øùÊÄ¢8ç‚Îkò–ƒì«ñî%TdÛÃLJí×>aWuxF“š šáôbsv— h!X±^%>¦)Ú‘0 Ù¤ºÞòRާâGêxÀ¾•ÍÂÆuf^ò˜ÈøUd•ÕÇASÉÈÁ{ò7]ߢl/ë}HY]Ôß·ð{\Äáv±H%‹f:z(¬qÍ}ÞŒœj›}ï@¢Âf€É:©H¯a¨;â™j6.UH\Õ›½m^iÉyÞCåF‹üòq»<í” lú$­AÜrZJ^­ž™'ùµL‘ÏÜag:žúrçÓv4“‚>Q£ÙðüUÆp¦é#}©–üкS¥¹õg£äîØ7†ÝËݬF/ ó&àn§­¸)¬¸ÐÚS¦ìªyÑ%XšVÿÖp%Ì¢¼Þ¤{÷»ëŸM×MíwC|dé09°¤ÜWO²YfQã‹•m‰ÙL—r(ÓkVt¹ç±³þ¡Ó÷#óR ‡>ÖÆ¯Ñ'#’UÕÔµnU™ÜO:<•ìgxe6ýÊN'o¹qí?ŠUÄ­OÎxºÀYCʈ3;Òï‹cö(âªç;'+¼+½«$ÇêæÜœ²©%ˆ›Ã˜êm‡îu$¥Î_~±åAu»—•ÅFZÓù†Ú—º7óÜ .³BàQżÂIâ]¸ºN Ó›”Òœ±v?·Ìòd]‡ú¤½’ÍÛMë¤žÇ }Èz6† ÷´kUñš­dâ&ï-õ°UÉɽØ*^%„bƒQ¼r\²§³a{YƒùE%œ•Êaȧ gÕUbVT4ffW¼¿õ(²9`wͤ¢x%Ĉқ¼>µÿˆ[KÊîÅþß­2-_‹ž˜YYÓà}783•*ˆƒõ“ò‚FÅ?!ñrX±÷­ µÑ6éÊE‘¸ó½´3¥eU/ê÷Wxµ‡,¨ì[_q^¨‘•ÚÄêµ rFôn Þø™° î ¯Ú]ÝðcÑy2–²»-ÂÌ-ÖÚY£Ò‰Ÿû]£¨xd+öm‹c*aö–šKóIÍõˆ=R‰ÑCCGWø¼ŒUl³Ó4I¡Ÿi«Óo Z;n(¥Î} ÷Üà~í-ÆåBt×®57vXýow]JõbžUtåÀ3QbEñ»! ÉR.ªl]ü¼<Ð*ÉKð¯Õ‘ž_Ü77uîÄv‡À¶™&‘S_kf¥â¦¡:®dÛ§á0#RéÚâOê³jgsê_–\P¸Ü5kŠƒá%Z¬ ü^Ô©ðøi½êµÅ.píJüU®rܹŸ‚æc9¼Œ~¿ÕÓEî¯Ù™c£¼Ùin?׊ôcOŸF6¢ýT›ž·Ê?}Ü‚9Ýãôx¬Ï¾Ê2-c|Â"Äÿ´ö/þ'¶7\೜otµW»\ §D+ôqÿ&v̱ü»YG^·tÇá(çúØôy 87É 7X&– Í/YÇr$꘷r÷sqKž›ÃלÝŠçì4µ.zôAÙF ÖíÙe4›k %m¨sš<ÛÄ ¾-»Ž"ÍoÛœDöâ“ã„Uݵcù6WÃJ×ÌD`CFRgJ7ÔáÚ‡[5¤ÊÛOîoò8ôðÒ¯éRW;ö”ëC´fG“nåE=MµU&½iÑ)U9c¥MÂì²ëD}#ï@n¾rêýmJÄKÝòßLWiÖº:ÚŒ]ÂL Š˜¹ãºïô2çãK×öPoÇ `ë·—b ƒ÷-ŒAhÃþÉòÊÕñ™è…ô¨OžŸš]ü¶JK‹&"“Ì”/Ìõ?e¤¥òƒSšq~š7;³g³@K’—Ì5xlVul£úY¤‚¶Wõ³Â΂0³- YHϘijX£Ùóc[ß X/˜p ÷pv¨É¦|­;šw½ Ã>g8d‹Z\•´Êº3t+#,ŠýMYEœ•fX¼Ž²0Ä Î-À ¥¦­m?`¬*†ÔœïéU] ܲ'Éýa¸¯fSJŽsGCcʪ´ N5S8yF"ùÙ"JcÖß!½Xåa[T84Äo~;EØoÞ-õ½/ˆùptV‡×TcÞxìz$!CMGŠ+Ú#ÌóùÈÜKa]SIñD™º…†!gb­œï£LûÖÇ •g;MH^Èÿðý_àB€Ê)l.Ĥ°#`ä©‚endstream endobj 127 0 obj << /Type /Font /Subtype /Type1 /Encoding 179 0 R /FirstChar 63 /LastChar 98 /Widths 180 0 R /BaseFont /VDWCZV+CMMI9 /FontDescriptor 125 0 R >> endobj 125 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /VDWCZV+CMMI9 /ItalicAngle -14.04 /StemV 74 /XHeight 431 /FontBBox [-29 -250 1075 750] /Flags 4 /CharSet (/star/O/a/b) /FontFile 126 0 R >> endobj 180 0 obj [514 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 782 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 544 440 ] endobj 179 0 obj << /Type /Encoding /Differences [ 0 /.notdef 63/star 64/.notdef 79/O 80/.notdef 97/a/b 99/.notdef] >> endobj 123 0 obj << /Length1 835 /Length2 1546 /Length3 532 /Length 2149 /Filter /FlateDecode >> stream xÚíRy8”íVRL› ²¼Öh˜±'{)3ͧÅÒkæÞÌ¢™±L²L¹ôY*[e‰/CiA¢$Zˆ¾eô©,i4%)R”Ô:®Ó÷ç9ë¼Ï?ïsß÷s?÷uÿ#=ŠŸ¹3ypØ|s<o¸’ü¶Úx adäÊ…@>Ìa»|ÈÀÛÚâçÈPOpÖv‚Áa¸r"\84Œ˜¸šÎЬgÄ…i  ü0ˆ%ó LÀCƒ!¾83™€ïì à ñ nDÇ ðx€Óø@ ³ØÙ@žl°þÓ##~PQ—' ˜ÈBš²ˆt›)è%sdwA²$ÿP¿š{D2™d5k?[Òßh3ÿpX‘|ˆ 8tˆËþUê}ÏF‚èp$ëWÖ“2aš3;” æxK ΂ø€yp D§À|ZÀ™ignÝþŠx÷Òû• X¥O·ª^õ,jÞR¦Çù }ôBùeÃ%ƒ~¿Ž ïTßH³?¢Â­Õ8HNš8¡rúBZñÉ-™‰Ä«˜¿üP‚}i9K¿¢W{^ßõ[Þ±äÁUvW¹Ï ˜õL$ÜZRHé²­´i²¯{1roö}MëÑ¢Þy+Q‚^éCÝñú„ܶµ´Ñ;½<)(žGÚêN†ãâ ð£Ȉº/g1NÆžFË)U9ì£Ü™új ×Yª·©˜‚¼  MäÔ‘Ö„:¿àw/z™JâáeèËîßp‡Ã ãfüqÁ¢xJ‡…P¯M«¿?{÷±èoª·7j³‰œó(‡éÖÏX7‰Ùsíê‰C‰q•jõ)­»4 6ßh1Ýtøà—®F ¿áPb‰Fj±î0ƒ=µËÞïtµË…oœìÏj(z¦ÞP]è½p?w«¹ð¦j–ס82ÝHÞ­ÕyÉWPëÂO>)LòÑ&¹=´(«Kà. z[¤y’õ©E³ —‹º® !ýV™¦›¹û)ÅE.ا¿§ÛÊØN¬»¨T*¦ <² ϶Ԙ/ù×J*¢í“ýµWªÚW- \~þ2¯[Ðû Üáí±ØKåäOÛËã–”¨°£ørj›Uli­Â‘'u‘ä«ÐîéŸBR²»ù8/O7A኷»9Ÿ$‘Ž×.FBÛ–ÊE˜™íúüîmÍêò›ô–k}™ÙÀæÙncÉeÁK>çkŒÛ*7çÇ[ÍëJ{º´£A|nH;ô©f"øÙS±ÙÙgˆƒÅaõ…ÁµÈ»ç×£‘’jQRÅÊëM=µ¥¶íùî±W6}¼7j0çA?¯Mz¦J…Òh~ƒ¸/û›OspÓ^£?–éèK«¥eÙÛL2rÌ• ÏŸ±ÖOÂj«¦,7ýø.mÅ´ÁóÊî^XA½Ö‘†²¼½›ñáÙ&±ŸëQÉ•(¿ýÈy¦õ:’ÝAµ8%oo¥b!5”¾. â%bïÙœ<¬1ÕI÷9k2’s éV‘çR”ÏTžÚ€óKp¸Ü²³½zÁêáÉâ´Ó¶²|ÜÔER°s»ˆL^ËœÞí@¼°Rg襽b)Óƒ7혒}ìSV§†}:9ÄØ!Ö#m¨o;Vm'Õíè¼Ñ%ÝBÿ26QðÍõHÕÞðÞ¤Ü@_‡;f\9#tKO´Ð"©å!?Ч«d Ñì[t._Jƭݺ®¥mUá‘ôec^‰ª¸Š%_ŸD´xXîY±î85Ž×„ñÒx1¶ªœä†ª?1˜£uëfÊ©‹Ê]3hÁ¦ÎÐÔ‚Ø"¡Öèxå»S>ñçFï¾¾=ØбãÁêɺÜàSîÞJ fݱÌ—Û…}qÄ¢õUºbÅá)Äž¾51â3U(Šʦ½Ý©¦ uzZI|R² ­æTÙÆ¬|'鿍 Íœûˆ7¥·‚¯&jO½&+»_¹éŽÿ>&—š.áÌ঩Ðz}“’h5cçËj þéºdäé Ęەr\âÓ²…Å^_ç÷[Ù¬è¯M‘¿¦¶Dì’7%´´¢SL ‚¼áXÀáƒØJÅÝUÎ7Ú¾_ˆ1®q±?x?wÕhÞL*©»fDW~AÐÀÑæä?®g¿Wâs¨S0­÷²\–N–º*Kï#ãÇõÁZN|Ú2d¾£€Äao1)Ù:±º3/9mëŽCC‡ù¯zSëêÏílîpæºtm2µ¯1\?©>³G>iÅÀðÁžºDÝ % ©¸ÿðCüßà€Ƅ@.ŸÃ¹áˆ(e‰endstream endobj 124 0 obj << /Type /Font /Subtype /Type1 /Encoding 181 0 R /FirstChar 3 /LastChar 122 /Widths 182 0 R /BaseFont /JTSUND+CMSY9 /FontDescriptor 122 0 R >> endobj 122 0 obj << /Ascent 750 /CapHeight 683 /Descent -194 /FontName /JTSUND+CMSY9 /ItalicAngle -14.035 /StemV 87 /XHeight 431 /FontBBox [-30 -958 1146 777] /Flags 4 /CharSet (/asteriskmath/bullet/radical/dagger/daggerdbl) /FontFile 123 0 R >> endobj 182 0 obj [514 0 0 0 0 0 0 0 0 0 0 0 514 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 856 0 0 0 0 0 0 0 0 457 457 ] endobj 181 0 obj << /Type /Encoding /Differences [ 0 /.notdef 3/asteriskmath 4/.notdef 15/bullet 16/.notdef 112/radical 113/.notdef 121/dagger/daggerdbl 123/.notdef] >> endobj 107 0 obj << /Length1 769 /Length2 782 /Length3 532 /Length 1335 /Filter /FlateDecode >> stream xÚíRkPWD‚vð‰ÁX(¨¯/ ü5r€Šâ-ñô”  ÒÉZ†”+Xàè:Fòþ*È8FPŒU@§cJEã$dµà¯T‚ȱj ÕÙ EAâ,ˆ‡r’â Ç I©xOÀ„&ù]k+dÔœ)à™tœE‚¦”Z@Àž0ŒæfAÎÉ¿aj²xF© ÃTcòc!}ÐÆT¤RûV%kXÈ€Pš€ 5™ºNx …©QMîJYLIâþ”\ ºD€ˆÄ RD¦B"‚dqHÀ”j8ŽCŠ˜l…Ko܈04:bíº ÷‰­Ž÷"0’b×j“!@Þ“Çkô}ÍeÄ©  ʹóî-nÒ¬•N$%žb/€1 ¦å!œ”§X v €¤˜ `*gX( h–»¸dÒ@ÍðÆ–Š¡Š¤4ê1t!¦æ¢#ÕIÜ’ãø‡_@§îð!Àׄ¢>bàíí›ö7"®aH±ãÿ—Ñ»:är…0â¼öV_š™Xpñó3é+‹ï5Z¼¹u¾·Ý¼fã¥ô£$‘%.Zã gKú¾é¬Ü_gôÓ‚|é¡9Ìw6Yaû6½<1§¬rïñÒ9:ñuA[”µ6coþÌQ÷E}Òº”õ‡ wuÚI®3¾úÂiqûK§·f=1Å׌¿´ZÎv‹<èæhëÝ~ÚÜ`2ßZkxÒ2bç`‘Qùë·Ïìâö§ï=kÕØzú)VëÔ|´KŒžæ°¤-¤ÆÊdwiãBÝ+C»Þ±¬â†èãS0—…8Û7zÞùcÐËsÚ¥³^Íçn{%Åf=X÷ƒÔjjùs¦‹j šêîåº'Oå; ß·žß.X|»e@=r2\bÏóe-kãçú.¿³Y ýÜ+÷P^#~ Á~’ÚÜà˜ ^½ûÏ`A^WsŒáè@ƒìõIËõkË,ÜÏYV‰Wø¬¨BækUëæá{ªýÒ|E««·¥4Þ) r>2´|uÿ›‚fÄ-Ø'¤émíº¼ø¾Í[:Ìœ7eiëžSp¡&¯é©ísêaLcñ[wx¿ÿ¹ék ÅöaæUÙÍ3ú¯×5í8&Ý:ªñ(À§‡)²:êK"êW4oUº¦ ßÚÌúñ ßPù,"¤w¿ž}Û4ËP1ƒýú÷MúêÃ’¦g27ßð‹ ™¸ò’žó¡&¦¯?2]”É/™"ZË>ѹå¶7+Øùn‰+N ŠeÔ]Ío³ÝUt÷ÊÀ¨Ô¶ªÔQXQhò¬'¸d‹7« SJi,Z¦ß•Q¯Ë¾<¥›´ºjŸ~ßLÿ¢½,½¢ã°0GìȦV ôU:YÖí¡‘b›¼üÀ¡‚ÊâyŠÑêÂ%ïØà¾ýFg2G[÷2> endobj 106 0 obj << /Ascent 750 /CapHeight 683 /Descent -194 /FontName /MYPTUF+CMSY8 /ItalicAngle -14.035 /StemV 89 /XHeight 431 /FontBBox [-30 -955 1185 779] /Flags 4 /CharSet (/minus/asteriskmath) /FontFile 107 0 R >> endobj 184 0 obj [826 0 0 531 ] endobj 183 0 obj << /Type /Encoding /Differences [ 0 /minus 1/.notdef 3/asteriskmath 4/.notdef] >> endobj 92 0 obj << /Length1 834 /Length2 2426 /Length3 532 /Length 3030 /Filter /FlateDecode >> stream xÚíRi<Ôíf2d¤%ÿÙga¬-Æ`l#[E3Ì3š%£¡$*"²…’" !””lY‹¤,5–$KÄ›¢,-:SïyÏûžÞç|:¿ó<_žë¾¯çz®ßu?ê› z2ôaÐÙzH}¤€ÅãíLÁ€©«c™ ‘MaЭˆlÐ @ššö€2Æfh3´!LÀ2‚C™ÿ6 ‰ÕúA20A “B"Ò<‘ 4HD@`( ;TÀÐh€Ë,ÀdÌà Y†Dd ‰ ø‚þ: þÃ’Ýÿ^&s‚ÿh™,)@ó§M-@`’Ì ÓB2èƒ;1¯/ÿ [¿ŠÛph4'bÐùAý­M ¢ÐBÿI`sØ À3È “þ+ÕüÝ$S8A¿víØD…„¡ûÓ@@i¨0ü½NaÙP¸ Ù™Â&~D üYéä_âûéŽÙëbçŠÑù}°?{ÎD í ˆ?É?1òO,ˆˆIáûúR@ì?NÞ¿¼eM'1Ⱥ?€BD&“ C¤Ph4ÀC:ä W`®Og°WA0ဃ û1Uc·ýQú‰L ¸ë¿à”¿@#Ný 4à´?!R üh À9?áß³±´dpyz(C@…˜E"€1þoD‡ÉéìŸßOðØ" rAŒßÇ ™GQÏWž,8jÛUÕb)g\‹ê¨¾•a®?ýÒ\¦‡yç‰+¨2'-M¥I¥ýæP 'û¡_)VbÂZyxô éVIY­+™±YÒ´×H[v˃uAY1oøªcÁwtfûÕ> ®…–:N!s‹)xÞ÷mþhà±Ç¯øédµñ4˜wvµc*è>9œØþyÇÈeðóÚ¼2?­_á+>z8(™az&¿ªïúûý¸ÒÔYÍ‚fâÀ`zSØ'gáMŠSFôÊor;̤ÖúÊŒöòMYÇâQA†¼£ƒØs‡­ ÑU_å²Ö¦÷¨'4s“† dÝ^jg?à&l)ëÊ–=Ÿ2ôô†®ŽyËJs–/Ã2¬·ä¢këb‚o#îâÁø„ö÷Áw§¬öÉX6ØOlxº²QºàIv¡*ŠÈÛ7=%7‹ØóZsJYg\ÃÌáZ"Ä&⢦žvÕez‡„Ÿ¾ÖÝùì­ÿÇâä}‘Ô7ý&ú‘Cå7ŇH]öhíf·Ë²÷«©#O>?¤¿6‰q2í„(³j³Óo1‹ê0ì®)9ÁÍiytn,èôË­‘/›<ÞÆ.9yô¸Ûˆ¡œ_è:8·WKe ˆ'„ç{6ø†H™*N XöUË-ð²Ä"^ކU²(¶fùó“§Úõ£@貨#ÊÎr õÒ&¹½F+(þÜ,aî* í9»™Xž+roÑ¥vƨÉJÂ:;óÑõ^Xõ5¯]júÎÀ,&Æ“Æÿtép3—Ï^Ò Õ Ð±ß!ßz#¿>ø3¿¦èÎÜ¡ûÊ4[þ°Ê>F 7Á]üåvÛž#þâa®áé Åfa:!°}Ù'%¯B3ÙY¯/Žw(Úå_íoÊÜ.R]ݯ£{·õ”£ôn+ÑmîÛ%; vE¤™àådàls[ÛªJ]Ó”N›oKÁðó¦iª¯ié54rkÀõm%ÚKϼ®F Xbw¾«Ž½ç©p²ñÌÛŸâvx_–Ž®ˆ–±‘k(—ïÀvΧ”Š·»#¦)|ÛÑ®50;ퟗxo s9TÕ\©Nå`’É#Ìågu5oµíwº­sšïÉ|\2p7üÖ—’þâ!®“¢Ìzœüp’êÆ¾OnÆwìL,ʼn*6xPv¢ÆVè…õ`Ÿe7<\NJRyìáy¡Ê.䌹mΈiÒ:cÊå˲9·\‡“%v„­êp[‹óë'Be¿\¬ÁÙW,¨ó!‰¤4–QÐÉõ3™²Ö5Ûê\u©êïbÖ¦ft@–?¯ŒPºÝ „¦½BLU ,ŸþöfâL„ã•1ù°.çnöŒÙÀQ¿)ÃO— ìw¦¸3GÒ-=éoÚé„»rÊÖò*Søú«­Ö«,”= +*5iØg%˜xæáé{Æ—|ù»„«Luß-Åß9ú@+ßÇ:â ïaqãÔycw!Õ©"}ß,õŽ<ê¸sYÆu¡ —Á½=A’cž1(Öãƒ,¼qÑN ‘ Ú³—k4»Ú›|§’ÝÝ—j× hˆøš/ªVîå".ÞR{¨<—ÿxB}{¼âéJãd‘-¥™^KF䊋sjUO,¶bæoªŸð’H²”þ²"­1ôxŸ²”ßðÞ ¼/KxZއÊS˜`Ï¡ƒÏœoP‘‰[Šãö/ äwß9Îß¹A ÄÊ}n(Ÿt}žÈÏ~¥ûÕ÷Ü›Þ]òÕõ‹ b*åvB5凔fzÄkó©÷‡S«ž2ðÎòZŸvo¼~ÛË1–Þþ¬eV+q½ÛÁÕo7Ë,äó}ÛŒUâµ\Ò"$õb¼–SТöã\<ï»MÍÀ+›‹ý¬#©]ÚñßÕøÎëÚ>,ˆž"âõ%ý•÷—Œ¶›¨½êûÈÑw<µ¹5<µø>{,}DEêuJߥõRư÷ù”æBìk˜´§bŒ|«ôFìšÒŸ ZXû†ƒ¥)_ÈTP¾>-txÿHå5xl´™¡tßp晤¡»¼ÏpË ´Y§í±Á¥ül™êÈ>ÔàЖê{lxö‡Ú}kĨg£^„&s.2 ¦n¯ý iß2bË'èÌìyy*3î›òЪ„t£‡®sÊ)fQç¨ú+«uwQ•ËY.Âl»²aÑhê©y“ù#E¯…1’èò8ZŸM¡Á3ùyo5”òœçg0*ܶ£x£Þ¡>Êè k=‡÷‘‰ •Öù†Ž¯·÷ët?’»ŽÙ÷uÝ*ZFW¼ðÓµsžëüZåŸÝÝ"A,nA«”¶L¤„,^±ÑÉÒb7«ëŒ^ÿz\év)!ÝÛa¿„‹+ް‰só •ò±ŒÞªøqÍÁ;þ`¾¨Èë†Gœßn¤¾’)ÞÅB‰×si¹R‡ÑXQË~lΪmy|ƒMwÝ Ó[ÆlGõ8C9Ï:nŠ–A9ÉQ%W+âð"2Øç¼Œ€°+p›ë‹N9¿"?NÀ€hñÊÙÕ¼/J©-!ûWµ¡v¸ÊÈ—<Õi5þ]’/wNL¼UeéY¿ÌŽøx¾1¿FÍV´Záý—š©MÛH]9GRÜ«3Ç8Æ­B)ëu‡6ê •ÜÞã1ÉŸ‹É¨+Ë=ÚýâzÇxÚI©×šÛê ¢ éø¦Q¿Û YǸ_¦ºœ›“'¶¤¬Ã?sWììÛçÃIVß×Á޵8Ô¯LíÕ`ò¦©—x*9£D]{²z†ZâänJ}JÃÖÑXUïv,±kM_Ô‹±’›z#·³RVBWÚê½WDeέ‚( ûñÉb!fÇ·;CÃc7Ê|À„ÅÄäJ– ŸáÙï…8\#nˆײôK8RÓÞ¦-›€[½\lžÅ®½¨ÔŸêwÖvã®7³õõŸ²EVŸÁ³— ©Â…Ü!œmJp¬=§˜PY²Ü\|+R!DæÍÓO$áV^“cÏ >mÐÍbÌü½o=úˆÚ÷g!Ñ ¡IJBo+¢´˜MšU7³=ûµTÅ¿­¼1’Ï ³ØsºŸçèË+j’ëß=®3ì£a¨»{Õ/jüÆ*_>b‰x¦ÑÒ)»»û*+ó°¦Fš½­ö‰ Þ‡Û²²ú&³n‘_¥éŠ@.äl5€QÉéZŸ=ÑÚƒä%EêTk¿šAÍvÇ𯑶º}ö²Î(mB6æôù4T'#þÃû¿Àÿ„‰™lF‘ûdÍçendstream endobj 93 0 obj << /Type /Font /Subtype /Type1 /Encoding 185 0 R /FirstChar 72 /LastChar 117 /Widths 186 0 R /BaseFont /AYRITA+CMMI8 /FontDescriptor 91 0 R >> endobj 91 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /AYRITA+CMMI8 /ItalicAngle -14.04 /StemV 78 /XHeight 431 /FontBBox [-24 -250 1110 750] /Flags 4 /CharSet (/H/T/i/j/l/p/u) /FontFile 92 0 R >> endobj 186 0 obj [873 0 0 0 0 0 0 0 0 0 0 0 619 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 362 430 0 317 0 0 0 535 0 0 0 0 615 ] endobj 185 0 obj << /Type /Encoding /Differences [ 0 /.notdef 72/H 73/.notdef 84/T 85/.notdef 105/i/j 107/.notdef 108/l 109/.notdef 112/p 113/.notdef 117/u 118/.notdef] >> endobj 82 0 obj << /Length1 841 /Length2 2545 /Length3 532 /Length 3148 /Filter /FlateDecode >> stream xÚíRi<”ývl1H–’%=…„,3¶P²&»†lY3&c6c¢Œò¢²Ì›Ê^Ù#KöÐX‰’¦&j!JñÚ—ÎÔ{ÞÓït>žóéüÎó|y®ûºþ÷ÿz®ûV8àà¨jŒÂù€'qX’*T ª˜Ú:Ú@5¨šDAÁ”"HhÖ Aõ¨ž0ö`l^G_KCƦ8|(íçO›*}éÆ D`[É d÷@"0€#‰I¡j€1À¿Ÿà`H$ƒ(5  ÐHàú¡±õïŽ,±¾8@÷Ï2*ÿE‰AlSÀa¶I%€m…ÃbBè Q·Ã±ïÙNþ¦~m~2ƒ±C~oÿ#¥ãhLè?¸@|0 $¶8HÄþ*uÿ4g ¢ÐÁ¿²–$4Æúa@@UOMKçÏ2:è$:D9 IHÀ ÔA,êW#ìð~ØP·<ãîfg¬ò©þ h,É)?Õ?0ô'fgDD‡g5Ô44 l!ûýëËó—Ë̱H e¯…¶€ ¡ö~°‘6p  ±(0CØŽÕÕ°8ûÀ†øâˆïCÕ9 ¨›}/ý‰ôuó"] @ýäOÔOýDšì¿û':Êæà?›sü‰´u§èßS21Á…\RÕª0m¶k(LÐÕÖ ü‹L$‚XÒ5dgýöE³§‚! ÂdàQçSj¯†›ç>+âQ ’I+ˆêk‰®N3P›}g :D¬peD¢EΣ‹w%²~¨ŽòÕ•¬5£_rT~-BWÊNwÜ1œõðâìÎì¢ Þ‹™bîŸÀש̿‘[b‘[xÊlf º9’Ÿb¹ _1¶Ã.?e ¤¢ä&“!ž™M6I ó4+áÉúœ.Ǽß-ᑾ$¶Í”¤½ JÓ‹Ë´åz™v|“_C)Äwì)z'`?Ó5Q©aˆ_~„ç¶s¨Ëlq-¥0Óæç§=ûÔß½ä¦ ß›mz\µÔ¦ºÌ´]þØçzØn™£6…Ü»»9VK+L#ä¼,_¸ÅPÛPyÜiàZë…óédzVM±ÇØšñ<ÙÛNLl5ºè·×É`ÕDü„=eûƒi£Á£¸†ñ«æËVÜ}Á.ÃÒx»…Çï¾êw­jªÅñ7q­Wñû–u­Ìe¯ÊAµ1•^×fe÷ÁoÜ-˜gûL+{ñ®ôÇef cFöI‘Ç;·òÉ»‘ÓÝݺvËð炆S»Ìˆ=Ô.Êeªæ+¼âïeÇÛ[åXz´çUgMžÂSZZzìÃîu}í ·÷#:o3y­_LÔH*Ãò&‹2ö]¹òáM t‡ÈçÕ0£èÌBíŠ$ùÒHdr=2åÑ73˜â6‹RIi÷ŒÙñ2õÌB¤{\/£Ää°¼Èagè{.FAa\²m?¤S„"ÒÛq‹ê ±øE¥È&’î¸ÃŽ4–äbÃÁñ…Ÿzv&S­á1­Ãl+ þl$+µÐÚà+yI¤ìxÝüú§0‹š‡T Ž¥‹ÞÏ«“Î%ßYê°×µÔåEfYW,ß¾ªá%1aN&ëù÷¨–©ˆ=ºQumR—•æ™ cúGBNçLño¼¬q°2ûrÃù¾Éð(qp£“šò+ަ¶ÃÆ… ô1F'E¥V«e—5&¥"R³¢¬yœñö‰4 ¦8ÛÁ;)`¤Q&xêƒvzy‡êÓ2‘½ßâ8–ìJš•i>Å1ñ& j|‚»ÞÝ:¶[Ô=_ôP±¤ï±;;®e÷Eu7*©28rúüvUSgñÑawˆY9o$ÞU¶±_WìÈ¡Ô v¯-(ä¦j3§*ioÒelD`²åóU·É!’¢Âí«¬–ÏŒ=ÓÛC:Ϭ»JQ4;.yz4KI‚²û‰IÌîsG‹ï?ÝV.¶bc ¤L\Ê—àô”j‰äë šäiÊÅÏoÈ’Ò»…ýT*©.™göeG¸\—WN)eÑKQ³‰7®û·õÈÄnÈ狈ð1º!vf×?¼®ŸJ·*ršð]ð³Œ{g ›¼p©vx .+rZο` ¥ë2CcfR™P`~ú´|¡;j ï­‘uÌW¾õk,¼-÷PÏiÇX²àã­=>'{eJ²wä6ŠŒÊ@|Vƒ¤Ò/¯™g:UÈ9öÙ=~)žœ§pÿc\«ˆ¾›Îýùùy†gÊ)þúÙúœ×ËÀŽ&y1r“rŸýûÖY;ŽH¬…?éJñšÑtÕ¦lWÚ0ßï/Î7Š)ÔYޟ墀GfžÿÍw„³77ò–žûô‹õb7E7[ë®tó#=ÆÌ1'Ï’çþòbúUËÁ/J„’Ù7 5N–LM§ÏRq+1¼æ#gP1êŸVÏîtÑe–&új2¬R¶ï©ê#CÏ…[¢‚×ñá´.ûûgéë/ÓNWk²L ººX…Îy~/W;x‚ÜP–Þ¥²Ïå>s¨ r÷×o.›Npâ>ñ Þ‡ßza¹¸;e†–ô*GYå =:€¸>krã’‹'?`´”ŒºÙMM†ÕóR%rko#é³*‡u?qok@.öM·.¶rbg¢Ž‘¸l÷å5é£Þ¹A‹›Å“7ÅÍ*…8¿îܥ͊¬ŒL‚7õJÜÊëÒ ¯ˆ—z@*EŠ*±.v,ü÷°ë^;òšÕHŠÍ~ïq¯|<×`ShBö‰àJÞµ&ã”7ǹ‚pwû¤½’|¨ž§åÁæý†UÁÜSÚqA³£ˆÁÝ |¾b[ûVk•èsµß…¢*Æ<—5ú…/ïÉ`Ô×;ÈW—kY½*)¸ûPòØ Ù%ÐP£ã«MNL7ùré1̼їQß#¿²G™ã±RÛUšôIñâ…Ô‘¥KÆjR•F¡­úZø1^ŸÁ­ 4ùø« Êáð^ަkÜ}Þ×"•n´ÿ‘—s¸¿2N(#la•‘¾µøT]´wŠ8î¢5µ°†˜O=ÏaÕì¶1¼-Dɤj›G×03~ûj×5´—Gþ•¤áaøc[ ¾zûvc°LœV1y3û ìõ~µ±žÝI®$»XS$*%}kÉÈ›²=“-0˜ "Û·eõÑÀ6uë„ë[§¹<ùعJŠ›ZXNa]dJq.|N(rÑ U/`8·èÒø¡ÞÞ´¬ÛÕQ¡ÏR4_LÜþZ{s÷–ŽY°Y|®Zk+Gv‹Då­ÉЄªâpȲs­Àkû2O|wvòÅÜî0ç§Þƒ¥QVÁùúqµØÏnéï}âZy­/]¿É¯5G`e÷ø¥´žS®]õlºêno­m>}Ð’³qìëÙ–¶¡»œ¡%\†„€»ùêè&V¿þ£'7—XJ9{pì¾Ö[tRV/çÎÝÒ.9t@íFcÆ1ýåæ +§ÿ*üËÉë™´m^­œOr·ìÔ§9ÆÉºÌåiYÁµ¦»úùEþˆ¦€þadJ›“ö†õ]æ\Dž¿áêÌ`¢—ð1®~qÛ2Ƕ£ ÚÎ kðªM_ù›ÉÌŒ¢¨Y‹ž¦›‚W4Û…—ôò jaZ2fóHâÁxö}¿êjXìĺÆ‡æ ›ãU[÷üòÝý, ¬V鋃¨Ö{£Ëñm±æ¿ÃM;óOv œ¸Ú¡ÖJ¨?´§_‚°ïqÄ{VCÚ§’Âß4æ-FK)ãGÅÌ Rø¸“T‹×ùlí>xªéªAF·WuXhtõ†X#wEGÌ7ÑúW}{ŠM‡¥ó›ôðGæ®4{¬S90³èæ¹ßGÊíGUø‚å öI›_Ù2Ç¡¦Ì&EÚÚ«_¶)ü[ŸYÛ×z&ÙÿsëC+_ó"õ¬ñ6EGÿ`ù¦Ô%—ix,çÇÿáùƒÿ‰H ˆ ’pbäï®]endstream endobj 83 0 obj << /Type /Font /Subtype /Type1 /Encoding 187 0 R /FirstChar 68 /LastChar 84 /Widths 188 0 R /BaseFont /IUZYNA+CMSL10 /FontDescriptor 81 0 R >> endobj 81 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /IUZYNA+CMSL10 /ItalicAngle -9.46 /StemV 79 /XHeight 431 /FontBBox [-62 -250 1123 750] /Flags 4 /CharSet (/D/E/F/H/I/R/S/T) /FontFile 82 0 R >> endobj 188 0 obj [764 681 653 0 750 361 0 0 0 0 0 0 0 0 736 556 722 ] endobj 187 0 obj << /Type /Encoding /Differences [ 0 /.notdef 68/D/E/F 71/.notdef 72/H/I 74/.notdef 82/R/S/T 85/.notdef] >> endobj 67 0 obj << /Length1 788 /Length2 1551 /Length3 532 /Length 2116 /Filter /FlateDecode >> stream xÚíT{…yBÿÏ©  $Dj„Hu€ÈCüP€{’é6(qL(ùoˆúº¸©Ï·áø-•ÿdÓ_?„úO êç/Äa °Fy0&øšêRg ó¡ß×Y œÃG¸L4!]PK÷S 4EB`Á¹ÞŽ áå0,à}-„poYÝlŸµÍ~Ï}]β9ˆßêZ_èËú‚ —0$8¨jiA‘ø>ï\¿ºÍDÀEyˆÀ ÐÖÓ8Æ %D = àB1 8q œ Ô'žÎSI#»fWíI…÷ &µ.L˜¡çѾ¼&9—Ì9é} õw}û×e0Žf[¯°vGä/ˆ[ˆ¦Ë‹®M¾0\›¯?qÉì©nºCåÌú£§·¡:uÛŽ„‹ÉŒ-|”5r/éøÐwGŸbœrq¯®Iu·VRä¹ÚH묻Î+²—Ûl,¤#Š &©¿æ<9„òÒ×6ÓCw2ëªÖÅNú—©QR޼j¶¬ÅÒLnö渫uƒë²$RÇÅT¯žŠ>þ ¸ž¨B#Cj«­.–+_&Qï¶Ú L1V?7¸‘ÕǸ^7e´¢¯ŒÁkž PŸ²ÔÑË ½>u24‘fЯ$){´;?'ù[ÍFü“Vl;ÆnNՙĊҠ»¤_ôõ«|r +ùß;2 åú6ßûWO»È…8V©i¢BF2G™¥à¸U—3ýæ¢Dëþܰýá—CØW~ ¨JJ°+‰ß•^)ºh·9¸¬ëUêK4vâ[žx|ž i<8^[Üm{×ÊyÜÿ˜üŒq죂n¼f£ÊmIÛŠlÕookû´Ok®%?ö+2‘aúª×Ñ-¦ÀamÊËѶ@ e46> .x”ËÍF$äfëNä$ü&‘ÝÚæ[¥DRn›v üy¬*º¨Há¥kŽ‘¥I#gièZD0K{Ê«¾QtÏi Ë¿¿i»^/õoïDjÏm*i²ìºr5uXVLN3¼¿Icwº`ÍØˆñk·gÕnÙ©‡ow;Ï +Ȧ‚Øéw ý‰qF|ß¹–o7µ»ö~h³ÚºŠW««4y¯“±s:vWi‹;bÅhKÔ5ÆHênfAèÓÉèãÅLøÚ€æ8“Þ5wŸÄ¾¼50‰J±iÔ=ë±9éê›Ní7ú»ÚšH`”8wÒpY~:h+²ïØ5O‹7Ãm`ŠÆ íäóHv-·y§ù¢Ù­E¥ûبÇ;¿¥—´'ó*Ÿ¿EDõ :•)á2^]ýDŠRÐ_¾þ©ò6zÜXÉj›™1†bn)<z$OB»×¬¹ý˜S"3ãc~RÁ9²2qKfR…%%68ëà³.ý!öûþûÖ¾Ræ- ÏÔ0˜jÙ'QoB;±†™ ¤à¤ ßqléaϱ~˜j\\U(BÖÝZH?ù( óLäß’›G:#ügã”G2šrÏwP}pÌ¥ƒÜܽCRvàÓs¶ì­É_¸1ß’œè9+®õ.òÿ üOàòa†£~Ì—ü'leT&endstream endobj 68 0 obj << /Type /Font /Subtype /Type1 /Encoding 189 0 R /FirstChar 97 /LastChar 114 /Widths 190 0 R /BaseFont /GVMNXN+CMITT10 /FontDescriptor 66 0 R >> endobj 66 0 obj << /Ascent 611 /CapHeight 611 /Descent -222 /FontName /GVMNXN+CMITT10 /ItalicAngle -14.04 /StemV 69 /XHeight 431 /FontBBox [11 -233 669 696] /Flags 4 /CharSet (/a/c/h/r) /FontFile 67 0 R >> endobj 190 0 obj [525 0 525 0 0 0 0 525 0 0 0 0 0 0 0 0 0 525 ] endobj 189 0 obj << /Type /Encoding /Differences [ 0 /.notdef 97/a 98/.notdef 99/c 100/.notdef 104/h 105/.notdef 114/r 115/.notdef] >> endobj 64 0 obj << /Length1 1432 /Length2 9992 /Length3 532 /Length 10853 /Filter /FlateDecode >> stream xÚí”U\\[Óæƒ»Ch,¸»»w—¦qº¡qÁ‚»×à.!¸; Ü=Îùæ;9óÎåÌÕü¦ûfÿkÕ®ç©Zkm U fq ˆ9HvafgaH*)ɳ³^žÙØPih$¡ 3XÊÌ$`ççç(¸Ú88l¼Üœܼ¨4Iˆ£'ÔÆÊÚ@'IÿW/@ܵšJf.Ö ‡—@3{€hrñdˆÛÛÔÿzàrAÝ@,¨ìì   ÀdeFeýË“<Øàý¯°…«ã/¹ Î/¦tÛ¤¼˜´€€í= KTVeÈ‹èÅËÿ [ÿY\ÆÕÞ^ÙÌá¯òOê[7s°±÷üŸGW ±AÁÿ™ªú/sJ W‡ÿ\•w1³·Šƒ­ìAfv.6®ÿŠÛ8ËØx€,Tm\€ÖK3{gÐßqØâ?¼Ìïo¬Ò²šòZŒÿskÿ^T5³»hz:‚l²ÿfö?ü2$¨À€…ý%ñåÿßOFÿ!& B,lÀVn€jæ‰úrˆ^ˆàͰ[€< Ǭ,`ˆËË+€—Éø,!PÔ¿ö•À rt¶±‡€ÙÿZù;ÈÉõOðŸ7€Õñe!BüV ÄÁÁ쟀Õäìü'À`u¶7s¶þá°Zýu¶_vçŸçK–‹Ù¿/®Äÿ€UâñX%ÿЋ-©?ôâHúâ}‘’ûC/"òè¥æû?ôRSé½ÔTþC/5Uþ!¾—UÿÐK{jèEOý½èiü¡—‘jþ¡—þ´þЋí?ô¢®ûñ¿èéÿ¡Ÿ¦Íÿ’iþ½ÜG«Å¿ðÅè_øbÇê_øâÇú_øbÈæ_øâÈö_ø"dÿ/|™ˆÃ|9Ù¬àá‹.ä_ø2Çá‹.ô_ø¢ëü/|Ñuù¾´ëú9^„<þÆÿý¾IH@<¼™99ÌÜlµÇàåfóý_®P(ìò÷7íåÖþ7[Ú¼\tÈD]š‡ƒlSCJý¤ &ËèIÓŠƒF;ƒëÓY×qg¡Mš ² œ`[›rìäcÅJV Kî âFqŸo ÖàEœ |ú¼t ¤Sà¡¡1ì`N/–CVøîù¶cãé2Õõº['Bõû}vÞ|âãøÒ…ùß—~vþcKh©T;ɨF9mï“@Ú{ëq#¿Žd!¥Ïƒ„†é×xOKĽ«Óv+iü‘9JpsiÂ(lô–›c¨ìväKºÙü Ȥ<³Ú¹i€W$Âiº"-ÍŠJ'È[Sôü[ièÓò}“åtäµnÞ”åáKߨ Ë~&†Õôêw9ˆ€{B·kÔ>âÚÕLr'²;6%Ѩ›Þÿ¦¹ Ž†¿2zxX»}Ž$î:zS¼"ª‚ÕaäËu£c<¤e#F˜  Ð3³NØu•[C‹ìwÝôi¹SèòÏ}ÀŸ/ £Zâ@ݸwHÉeRÒgüÒ<ðÉPÚã¿^Xh3ÿì ¯+2ƒÃXFÒ`Í´]sq°¢¡»@ ¯"Þh YF¦€º²¸%AH!‚G*H€-n¸­ °ŽYê™öˆ;²îðÁåþuÿ¶\€'kúQpg"Yk é¸˜ç­ÂNà»–#Mo¾¸NÅ SÏI¾Šº¡×´)š>Ìj)v奊¥!žçü(Ieü5gìp¹>WâÍß,s q€j:Eÿkú~“áŠsì8Õ‡|S¨žh{W/ Þ®º÷vãhW1á4´zš)V/üìëz¾Àõ6ÜUmsq‹ŸØQÚ+Ùï9èfQÖ GºyÔ` ¼[–¶:´aÿØQˆãJ%¬[ùN¤‹¸º×OÁi2§&PëæÀe‡­Æ…]ò¢ÄÉ™Ù|‹»6gzà÷YB?Þ —xz¿ìHñÛ[iöضeʪѶ61?ÔQõŽ~ ˜üD¤Q•„ÃțّŒÝ9¾øˆAZ€¼ölü™&Xõ±ºèû$J<ÕõÑl9gtö÷÷U8œ^á1K9~±!W©»°ƒ³‘C¼nçID"dKøÆÜJî=Ö`…{@÷Óy4æØ”¢E˜ÃOc1*wæWB™N‡[–4â—ÎþDÞKäãŽZ\üÑŒês Î1•°@óîQC&B¶¸õÛ宯!¹M(¼;¸£Ñ¯ŒÔX eq–`ŽRO ¹¨1ÒŽ™|!ä ßD^¿3³€7âD\G(ca%ë¥nÓ`L åÑ)å`õΆ°£Þ(¡,œð+Y³ÍÇ’p ¡¯Íùmjp5ø)©ÖZ ”žË·ï}õ{Òúܲ6d •aBÌÃEüŽ.ßÇEÀé5ñžê«{—=œ2xÏ×q jªÕ’•VÚ#$Y„>U¬Ö-è%ÖÛH´¢À€1½tƒI°zC(Sã0nW£Ù'ÔëO„#¥ûÙoïÕüO=ÿA¬J¶7OÜâ)r*0N¢¹ü'7(ÐKG¶wYaqcT×Ѹ¹oÄlº½•N_S EÞå…Ð4ù¸H´®â§Œü¸n¿Á 68ž Ô?9•„ÂÀ -(Þÿzž±gýæÅC¬mm¾ôЈEh`ÄeŸàžÉÆs¥g—H–n„“í`í“Y¬9Cpo{Å­Q-ÎÏ7+gG>{g!ùªšõZÄ\4GŸv ö¤•“ÂŒ[†ÉÄ •Gð0¨f^1™s*½?ÃÉsk„ã9¤¼Lm|l–­jË"(ï>4Ÿ˜ÃoK+"Mþv°{*z„†fÑ8•æõoDÔg©vÍ>ÚU£\ÐPÒ/&µÌ&:}~_ë+\ˆiÅ>3yRj°KlÍ6ïõ{vqmg5žyì:Òï†1ç–dïÔ瑌Ä:ãÐÞ´=µÆšMœ#QëßR]Ÿ'|#×ßP™rÊ¥ÏÌ XRZÕ@û$r#}xGid-IKvªD¤…—dá~Òrû¤È6'fSóðk®òp•ŠÃz£séµêî‘[±@Ìð2¾J4$;OÂUì‹Ò_g$WÍk|jò ¦ \ißgÊ`o79ae7¾¶_Ùç¿<>}.ùཇFDÜ[BÁR¶žxõÉ££e½Iêmp­Ã<ñòï™ kÎuËF_zÙ—D‰|…ÄXtôá›ëåü~ Ñ¿ºV´¥`ζ'M«M Þ¼³Ý®c§ ¬{^êÐKYÛ!¤¿$€Ç«?%ôJ7Þ€­9L²˜Pšý˜øÉ~Ëb`ʉXz¸#ߘŽqø÷´·;ñû÷>46FêÖažZË.3E*xX‰¹™k‚H @´ã]ÒaIÒLί»~vP²³Ñ‰ÁGé½ïÁUºO#R¡a¬„oÖ#ht¼/ÏÁž,n¹Å¿Â|‘³U¨ 4W­ö²µ:Ä#)ß CWµ#Ÿóý Ù}ž9b¼%ÍßüF8Ò ²2.—£¿­Nnï"–‹¤~¸EvY"(O¦þ`ŒØ£yæAüi²Ì²åK\qvCIöj¿LLÝ¡^¥ö—º+©åÏýÏTÙ…$ÇŒ:Çy½“c°ÞI)ûr9Å1 ãtÑyÉ–Nð* ÔÆY~‹ ÷ËðÄ}˜î®"Šé ‚=ºOú\,†jcMBû˜IÓ®+/MØÕØ4ç-8™‰¯ Ç1?2¥JM»”ƒ °¡ì;Ìêà¤]zBÏßZ¡\ÇÜc±oŠ/V1'§ÃBN¨@´j[ƒÄ%'“H™—>±|Ó!o©ßê¢b˜¤ë¤6q%Q4«@xÃU`J©]t^.IÞrαÊ÷‰q‚hx¡7¬çèçÅZATHåm±¯ÞÅV '}í2¿ä˜(Ôõî£p:£¡ÇS²íS×Ó¢úMÓ›ùWa"6Åï^¿ÕÝ-#Án£k?¶ù¶{pã¶ÍsÔì=+óùZÔ22-iʵ­ÊeºÝfßmÀ1¸}ßK…Ž\ èv%ÈWì¹øqiО£ö$¸L¥{« é·–]oyiP§Ñ‰7|iÃ\¥v{'’|sIÐf>˜K^Œ«ÇÍ"nO%6h»û½‹†•Ð$WÃS@Ôvó*?SÆÍùê•&ÏhxEÓNW½ßXã¸\Ó º©dúLöËÇ©ss5û]Fà÷DÝ5UÊ5¸Ë|Ñ2@krÏ„Ìg^§FÕï`VŒîoL%mÇokÝKN žM33s.{E–Dbïa Edôæ4;g]ÀÄ>6·tûú.SËÚÌu±C_üTT¼ª¯ëÔ ¡-rjKê6<_g•ÁQNåÈE¢¶7L%îÓnÛc¹N‡Fc„_+¯jš°?àe Ýn¾_›Ø CúŠ|ÕÉOÕ'EÕB¦`¬[ì4Æ>­ ¶“:†&¼Ÿ¦à’‹¼kîD¬œh8•nCÃ?§UºØ Å‚’(¦‰ÒÍ—L‡‹òùæÄÍj6Ц±U'úˆ|ñiTÛÕBn‡frqŠp2¯ïõš$êµy;Gö>$ޏÑf*¡ø²}¨c(E^Eö}[(à€.úZ*ÓukIÒZpæšX6V„B}õQõámáˆþ,+–"R*A@9Òà» T¸…æ•“í°ùsôWõîç|ŠAGþNó« M§ãŸÁÅÆhšOY!n ÙÜ|ô+eiJÖ¤O [ŒGéåêHÑ —]vò›‘n<þ«à*ÆÍ!/Ê1 ââe»-ã–+ìY7Ûö,­íŸÄ÷¾ø)M¥²óöbM“h O­çUŽ@K^‡p@²Û0#Å ±ôEþïÒgˆoú Æ'r2 ¢k”òæK¤‚T@ýœÏ&»;µ~Ö…_lLËŒ5Å›_T‚J¿J‹Ä8¶qèj’|kd΄—ÔbYAóñ* xšíÖ)Jžw’,¹Ù3f•˜¹Ë.¯ÊÙgµë àEËg§n°©ËÏõÜ›ÃÈiFAä°= ºý«þj§¡W‘žyº0 [æÛoÊÊ«FëÐL¾ ˆPôÌÍözöL8÷°Ê¯ °Õñ©žUË=/áúñÎ'÷ÅL°UeYì;²Îéè“sj Ìúu‚>Ò,ä_×ëìMé¼ ¼õ‚¢Á‘²©÷.·_àLÙ j$ܾ¡­:€ÎL>ªóßÊÓh±«z<fÏV…KùqóLÅššþhP›z3‚áØØÚ‘ÉÂͯ]qÝŽU\GêK(§Ó_ã€Ó4;³W~Ű(ÊzŇ;à•¡ï·@EH¹‘SŸÝ5>Mì=Åd ®•ðHÓ ª£µ@§ÈØJ½É²REë¬3õ§Q5Kéñ¶y˜g$=êc5p–^jƒµ»©µšZ›[ˆ“ºN—»l~hÂëÕÏ"¸;Qóu@ÇoWÖžÇ%±¯¯‚Z¶‹¨F„ÇË—¾.t€zëÞb„ó®PÇU]=wîoó=˰ß,æqWpÅ!k›SbZ¤™ ýfaßÍ–E®$Åp÷8ÜÚKûÀ&{ñ™ª»Ó‚ƒµ·>†=rvÜ ¹Uß4*ð—cñOv·\›ð5âЛè ëªrIl82Iþ@ 7ƒ'ÅÃ= ’_i°_jMú3XÄ—Ö=,CINϽí›dnÀʆ Z~&‘þ£Ž@ì|u9ü=Sžýµò=â=›©ä¥t$³âØÕ‚â¯Øïƒ ÏOú¸÷>k¯Ê¸j:(÷î’JT›ˆVX$(¦Ä…ð1vš’N•¾P÷þãÊàÓ–î6hjuª¥*'ª½¦]š¾¤Ìƒ‚ÔlßÔof"n¥fà܋ӕÖNŽö´%çOòåC0'LF¥æžß§·´6Ê!K½Y¼âÊWlrä¥@^Š%§q@*^dI&ƒÛ2ý¨YdØ~žÁ\ľÁ Nø®Éñnš·á0†RŽÕÈ·t…h <úD>ëØÂrJRa²ôLF·!ß•¸ñØ·MPÀþ®Rø­BBZÖWÇnâþO`<îÈ6ðÖ>ÎfªÅ ÉÒ`Œ¥PµC5$ÊcMDÝkFd°bµÜb>lGP²@#%gïSˆÃùà‡¢Ó0T\H Ûõa¼±¿?¾|oÅ‹Ìè*´¶r&ƒdúè€äF-˜Mbª·ûp,–{líy™¬½HÓ^—<·8Ž+Z¤««F{­ø-ÜÒŸ.·*)"Whåg¹¿¬wD½_ ® SÙÐ\VûÞ†4ý=¼<ÌæÎóp!Ûâ©§;ºnÑpÎ(îxí— ^ØeGŒš¬—ÉU1¦ü"VÂbžÅ«"Ÿc•mæéQz*gI·êÎáZLS ¸ŠåÚ±ö y~$Q¿öOU?%úñÐûcé"T<‹„Û ´ÁϮ󲄪`ŽœÝ{Í8-QÏYè¥ Ú?(¶¥¨à†®Š{п‚2¾Sþä5¢¼’v¶¸k41™Û2›j}º»'áž³ 4³ìºÚ/.¢WÃ:µ[Ts.µÅÃô¡2Ûà o–„ÄnáxÆÜö‰2áÊô’†Ó·IÑ(‹Å²ªæŠ*tרÐj˜êrºuc}þEã÷ôqD4'®m¯1 s‚„?ˆÎ8½ÝFÈÐÜd‡N­îû®WÖõ"TÞÜ)#‡ 9ÿ.äÈ5“þÐu·ì¢¥i+`«8z—'Â)pE¤Ïó+Þh<.õ 1bJoh–Ôá8‘;Ê>PÙ^ƒå®8ÑÙrv«B÷ÚêçÈà-pŒ¾¢Ÿtzkk!‰<|>.T«·ÇSgÑ}-hEÚµ° ¡¤”?t±ÁAº.•(~».|K‘àj.¿b¦žvŽõÈb—ŽA9äŠÐtâ5#𸡂äRª/ðvfã‚¥YŒ÷€{¦1ág/IUm“M5ï´ûBwŸÂy®AF(­5^½Ú­.Ý®¢Ú·O­Æ±‘Ò+—Ùun&Aom“áì)4ŽÐ¶wÝ>ö¯§ÈF±TÒ¥³ÃÄ’°eñõ6äõÀJe¦‹±Fôž9ÅtK1Dä}d|«y?A²–ñPÀâ»eÅ”„(Üên±tN­æÆ2åxÜIœ•,œÑ±¯f– FÉÐŒ#W]8ûu÷ ºlhè z`÷¢û¯œnCÅžø›€÷¶¼lÓa¯…*ÀíÕ†9L*x:Ò8âïvf!.Ò ú \·¿ž Ç¿î‚Ë}œ<,Èå(ðå.Óˆ oÙP^³ë@3T!­FS®6è®ú–ìv¡»Šr&÷ªüwA> s-‹E:6]‰MD_ë 9ßïܱZ*wŸQ…Jè/%ÝÈ‹*éh°6¦J‡Ž’S>XFJ7GŠKç+å—cwV‚³©êÛàèª}xª÷tç‘F4sê<¬¯éZM1‹ô¾óbüjGù>ˆ`a[ÂJ®EŒ[r×JCµÈ=ŒS_Û&]&ÿ®*¶•¡w-ÖØáP”‘¢¼µp—?z yÛŠYÈ‹ó‡ÞÛ9L0“3ùËö¸B¼6¾io̬A®ÄÿüÝ-×r‚ÞÕ½G•šñs°ÖÕî^ëI$‹ÈiT¥Š¥òœP+¬ŒÛV å·œ7[Cäé’ _¡~§?}{¶íp´ôRùÔû˜´d)ß.ˆE릸+ûMÇvtt¯fz„Ç æíO‘N%—¨Ðèýœ '¢p jL_ÛÔŒ –¤íÙsk¡'\ôx5³¢ôêì©Ïæ´íè§78,K,»p9…®Ùi'…è«âþ&ô¶1f·Í, AÐо húÆÀ‘.#GJ2Xòíö 4¶X(®ˆ@Ó6‰g—Tv@eks×/œzÚgLÖ÷‘)ã×ìÕØak¥„ð+~^^üeKpÔ6âÖ•ÛŒKq ¹¡2™kb!ò¡àÒ·U’;Üæ3. žg )ÑöªJ‚PjîÓ/gŒǤý㊣þ°\TLA±°º'ê¶Bö1±È‘"±÷#àl¾¼Ì"ö7·ÖcÛO5¿7y§ŒŒ™©ÌJBYt¥oúšŸ´ Û[/æÇ&Û«³~^Y_¢×‹/©ÝGú4—ºÓÒu9ÊýÎBQ6“`±!™ ÷1]„Ó´³/ËÉݧü´a¢sôJ“óÅOžÇèw›nGG_"X£¬Á)V¢œ5œ¡NıøXHvqéM`}ÜìF9—ý][K¹„ýø”¢Ëë¨jK;Z:f™±ÉIú”W1ªÁ×»JãJ·šRª"™µ+33ƒo…Cµéõb\W™m„î 9P1©Š>Î`49?icÕ,Ï(“¼-i|'Íà%5âZ@¯|j‘eßqÖRS^YÁ› €ñ G«—NÀ²—í,²‰õ+p€Ñ•f—«ì?C^a¶5bì5-pÆ«ÛýP‡öúªo½Ú¹²ÏÖÅ 7Ë·1LåËÄž[v{õݰÇï‹}XDš«´ )WW?­¬‚f=CAÁêà͸¼[’>õÒàz —¨–ên>9fš®„ö‹<º·k|6½ŒDý{Héø‡'™ç‹w劷d?¾/b«Í¿'MìŠpÓ{ Øch×0¾R¼âDì4„~ïcéÔTˆ€þ¤ÞÉý±7‹6àŒ"àW6I€V2” ci¸àv`¼Cò},‘ÍàÃg‡•ˆ7㪲—<ÔžÁxrs1Ÿ«ÒÌÙX2ñ¨šˆ”0êÃ;éñž %X²[vxÕ…w$6YCBÐ)/Q¨^/ B‹a쿳–äcŒãаZ÷Œº’XdïMªùu™Íƈ'ÿ ­&ì¨Àèú~ èâc4²ÆYZõÒ̆Š0Ùü2ÀßZBíW[Zªé ¸gXÙÔùÔ²ð[C„sR— f´s,|nïfþb­¸ô§ƒ®èÍ»0oö”E*õÀ¡;Èãt‡¬—¬òHÖ¾ÕÕˆlçÕxÀF/ß –‹ ‘]›%ÑɧMÄ~‰õ‹Ò¹Îp8¸ÕBT+çܤ\»Ùc,Ÿ†Ê6ðúv2&rìŒeÉuúw$˜ï.²;ê¹–âðòÎÌüj©Vƒ.)”“¤äØXã[srê;}¨ª/'Ã}ã‚:Cê¤eéãýC¡ÊBÂî2*\±á5é¼!¨8¾½§µ*«Å|k|nA@Øš¸ü!2§kè‡9l";°%_†Fÿ½¤äÇ5æ~¤CîE)bÕ)UºÇ†áÓÜŠÿR.@6–cÁņ0ìw£««?Z8‚¢ÓoɶÛÍÄo ­ ‡ü¤]G¶Žö‹îUà«:•xûô ùVoñÍ™UCû—­ øóògÆoÛ4RSSåâˆ_¶Íu­mÅ‹17•ænÒôzØ ¥¢¹UæÞñh÷Ê,†Ãcoû#¯ª¶GõåýýMwäŸ 5dGÔz8';Ì;¦î‰ó®$ïsº&‰)!vÐô`eÛ×~?ŸizTЋ%Š.ËEÆÑóTŒ÷[ú@ÐéëÞ ËJ}ª-vÉŸ©Ë°yí¶c܉veibôïPîÄ;µÜnóÒsìó‚ï+IN1eHG´Õ«dêØà9šÙû_/Ûºo øhz"Þî56 ÊGN_DìR6?ÏÆ˜.׿Ú:tY:O’?Õq+–º~jM–dÞñ[fs=8s«J\\n³d¢/Ò™w3Éæ yK€„€³í’™Šl›äÊè*i¾ØØuÁDæ]*—Sƒ#¢´`D'“©‰>ž{’1¾ÙؽšË'»×4tü¶/7{*Ðé×ð:Ðò•hÁŠ13Nß Ý} q§Ïòè ㈴†‘EÅ- æ‰l¶“œ4sšD©]Ð[p€ß¸¸èõ|DšéŠ¿(xe?hnB¡”ÎÑ›q‡8[Ö‹ßÞ7.¤2Û.æó¨†Ý²õE{þšÍ~ >x¿Zš!{rE(P•àÈ’VÜšÉ~Eð4W~™”\‹ðžº¡8µ˜˜©Ey”³€g^²~·úûºÔ³æSÍT[o_DŽõ›ÜÛìX ŸÞß=Ã… GŒcH‹Bݼ›Œ“ôX<¹%“ûª…ƒ¦aI­Û “Mô‰,/êYÆTÂàV ßÚ/Fl®È÷T¢ 6ð®þ° Z|Úc-‡gd^”× óñaÚj,®ÀÜ‚)ÚO±èk³ ÄEƒ‰šyy£e§Pú¦z`á2Np t Î–žòZ›â¦Á۵Ȁ3À,½”Ø™”&ëœÊï\Ê8Êïí5PRHb¥+n£¬÷œ&ÔØŽ¤$4õM§‰¶0àŒõ#Ö3hd.»øcñÆO‡os/"Ò¿4ñ ¯ÍؼJxÞQ§&tzd ’MÌCqÖ«F¨Üo@KÖò?8à øe°§.+øýÜ|“€ JÛã ¡ÇŸ¼q¾õT*3X“¬Æd¨´€¬F›(ß&$~Er!ËÏ"G4§T¢Êæ!TRn˜¥ýÁRUyX)áóXÏ;Öâê ö ó{ÌÒåÉ¥+€`HHCž°K/W…ŽêŒar\IåYÃå%b~â %|'­ÚLµ÷A7q,òC‰½àé܈ÆtLz£0¤¶º±iKéUÈd˜ÎL…^Ð=pPÌOæV—ä_¥ØÝÃDÁy…Kds’å?b’åY÷㇅cöGTÍ"ù,…;á³|Ò‹µ wáðu*©§þ˜lµ™ ‹À+»›çq¶-î“ä=÷A€lšÖö\Î#{¦ƒ0-êYThz ¥FþWѧ=Zr@rÿŽ,ñHm{Ž„XtwÅuqÕ› ¾Šß_õš˜˜Ì­ WŒŽLªc€]1Ü=ñgÜȨb°ªi;˜á& }]é¦IÇx˜^øí O°?Ï›éðÓøwv¿ùÔÑâ>ï‹dNb[…9Ô0½žØbkG 8Êåã Ôsàms=á+¯vÒºRØ?ÃéÌ8äÃx»L*è"Àgµ•Ûdá,:†×'âöÎ’•ŠW¾ÕAéÁ  µd×@fxoËÎISO¤Îi|º¼KÐn'çÛР´ÚÑnËÇûZ¸€ÇTžÿý,ës‡ð²0üÇú°›eϺÇuå_6IÛΜGýÕFêú¾&48S\€ ¤Þ;+Ùð¶Úî,“EÞ•u|y¥Ì?ÍUÐÿ² $ŃçLi£„AeÙÑÎ]u[Êw¾§iOròÍ“O°ºEi£€ö˲ÖÛkxëÑ;Úÿ±IòWf%!Žwæ8¯vªôønD Ù6¹ìi—ãtB5Ð;(Ûo%ÞXò¸[ ÁÀp¥17ÉÃl“”®Ñ…d¡mÇÇ‚…$£kí‹ÉrèÌ“a$#ÛÚ¥Ö +¶]x/{ ˆ¾:Ö·À©4§JYz¨2É,]Ä;&òQSqSjCsõ“/Žæu UÏߟÛĈÐÑŠ¨µ|LÆöýnE&_'#I—E¥‰[RÌs3«ùP9׋÷DdòÞÙ‰.\ùí·m½4„ݞܾÛ=L8¡Ô§ zºWÂ>Ú>;g>n|¾ZÍ uúÒhÎOÐL¹„xšÐ’YÛYÉ»—!&ÃUÿдÞ}Ý㪉ç<€1¥|O»™^j²ÅÇ^©‚Z¤ «Ïgì[³Ñ ¯*ÊÅc)GÛUT?«4ìP;Ûï a´¼Ÿ…>€‹qŸ0{|›ÖkTŒÙ®üµ*o¦‘Áž‘ܰã$!„gµµE¨Ó+}l<Û„@5M|¾n|)'\X3-¬ —Þ+½¹ÃOí*{úïsr€À‘oºQèý,Ós~µhPÏ2Žsa<\¨-mKîø‡ &›’¬ å–Öñ|ÁÏ–òÑZÓç(UN5g†ð®h¿í<üÌiw)i¶½ 87DGe€¢¿«zª"¨RH‘¹–Lž×6å5üÕRM*(~:›ß ßã‘¡ñ,.B¿§Õ~X/ñ¾b‹Î9y+±Yq¥(О㤒ZJ—jÏZ[È&>evA“QËû\HÂ?ê“5%k5O°+JÒ¤o¶õ ž‘-î îÆÓøMlÆb¬>k<0Æ1JÝ]/ æè³þÐÇR1%×˦»?)œò+VTó[¬ØÙ¥r”Ã8˲öó}Y$¢±Ôêr­†kùh6%3ì]ÊKóø,’ûrnËh¿ùsh¡©¨ ä–ÐÜuôØö÷Ôœƒ{Ðcx±äv×ÍÉ|€wªO;ÄneÓß0÷AÖ³³Z+O­Ü¶¾‰KÔáªHwX'³"p/l(ÚªÂbý‹Í&æì)=ÞÍUÁXøí/æi "~¿§>Gs>È$òϸÈçÕä •¥º|Ѧú¸­¿NÖ¬{­m_Ëhé|”š’yï‹¿õYY*uò…,?ÖÃùŒ$*èÎîPås¤©+=.ž®‹SJöç —k 5ÕŠ®@A&ÂÐo›Ó{;ÿo»× Fž–CQ\"jèWÊoSì׃Fª>½ÈTk›Äñ£al•åÅ<Ó±Â=LáÂ*t…A†­¬L>Ÿ;4½VFð ÐZºôK¥×“IŽ{ |Î$ò¯¨šÃ•Us9ŽQãŹ'œmn—õ I—l(>ñq¬ŸLaÙÐPð¸'9Îh©b~•ììoE+ß–—}rªd3† ª¦ìõ®+‰*"àCúlƒ×8-‰™ ‰áeÒç«”TÇÅAÖ«±se¥lŸ¼Í¯n­/%æ>ò€ÔŸ‹‚Lˆ­%?Ê¿›«Þ\­w­¥Ê4à17ìàÆ"4ÑVv¡Ü’›ÙÊT WWG&®³ýþPÿÿ' íAfPˆƒÔõ˨‹Mendstream endobj 65 0 obj << /Type /Font /Subtype /Type1 /Encoding 191 0 R /FirstChar 15 /LastChar 120 /Widths 192 0 R /BaseFont /ESGTIU+CMMI10 /FontDescriptor 63 0 R >> endobj 63 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /ESGTIU+CMMI10 /ItalicAngle -14.04 /StemV 72 /XHeight 431 /FontBBox [-32 -250 1048 750] /Flags 4 /CharSet (/epsilon1/epsilon/period/comma/less/slash/greater/star/A/B/C/D/E/H/I/L/M/N/O/P/Q/R/S/T/U/V/X/Z/a/b/d/e/g/h/i/j/l/m/n/o/p/r/s/t/u/x) /FontFile 64 0 R >> endobj 192 0 obj [406 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 466 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 278 778 500 778 500 0 750 759 715 828 738 0 0 831 440 0 0 681 970 803 763 642 791 759 613 584 683 583 0 828 0 683 0 0 0 0 0 0 529 429 0 520 466 0 477 576 345 412 0 298 878 600 485 503 0 451 469 361 572 0 0 572 ] endobj 191 0 obj << /Type /Encoding /Differences [ 0 /.notdef 15/epsilon1 16/.notdef 34/epsilon 35/.notdef 58/period/comma/less/slash/greater/star 64/.notdef 65/A/B/C/D/E 70/.notdef 72/H/I 74/.notdef 76/L/M/N/O/P/Q/R/S/T/U/V 87/.notdef 88/X 89/.notdef 90/Z 91/.notdef 97/a/b 99/.notdef 100/d/e 102/.notdef 103/g/h/i/j 107/.notdef 108/l/m/n/o/p 113/.notdef 114/r/s/t/u 118/.notdef 120/x 121/.notdef] >> endobj 48 0 obj << /Length1 1065 /Length2 4382 /Length3 532 /Length 5083 /Filter /FlateDecode >> stream xÚí—W\k·ÆAФ)"ÒAB¨J¯b MŠC€$! HUiRUªtéM@z“‚TiÒ;¨ (Š"r¢ûû¶žý]žsu~gæfþk­yÖó¾ïš‹0Ó´ÇÜBêaЄsiˆ   53ƒÈi°°6 ' 0h8©@”•!€¦»# +@UäTäA€6KÄ¡€˜¶øÏ"%@Ó‰C!àh '8!]ɸ à PHQÐtqL¾L‘x$Îi/ ‚@{‚ÜB:¢Ð ðOG†h  ôWØÞû'›ÄÈ&ŲE{ Ú…Ø#@`c ¹’ìäÃÔ?ÅõÜ]\Œá®?åíÒäá®(â¿*0®Xw@1öHúŸ¥–È¿ÌA‘ö(w×f pBíè‚dþ ¡ðz(O¤½ Š€p8wä¯0mÿOä}ûålm¡m©k.ù¯ý•4£Ð3"öoÕŸÕ¿ò›ÉÛƒCy62Ò22r!ùþ÷“Ý?šé¢{š< ЇƒAäÑ “@‚(´=Ò@z’ ƒ¥Ñù€¼'>€úyžòŠK>ŒýÏø_!%ŒwãþŽ(*`ÍßDÎkÿMJäœÑo"Ë]þMäJèßt^›ü&9 û›È³†ÿ&eŒø› ä…ƒ‘ ,vøÉBŽ Ùê$ ;ÿç°ËHnäú!d‡è?Üû’•ñ y©„ß(K.öú…ÿ9 ZZOÒ9pNVNP••|þ[‡C¢ ¿>3ò@ý›PäéC"=‘Ðä8¡x;¡:¸ÀW7k¨F‚R˱æqEËÈsú{)]r_¹IÌ•[í%±1®Ñ¬ñß9àŇ5z_í×ûàö qìÇšÇ͵$¯:^«OIPâ*Vp÷ÞSCõèÎ{Ê+Ãs½±×²;S?vm<1Ó1{s|F€¢ÕÆ£²59PQÉJ/ÉÅ<$¨\DNÀ”9Ñ­ T>dösì#êÙÁ{¾·CNÔJNocÝY÷³NdÄü¸AeßoqóÛe#Àb@ßÏØÍgsàðÑ0fÿ± ôLö¬žøã<슣$g‹ó²ðÕÞkÉûŒ75¿Yéðã1ׄ“D×£5´m¢ÂŒ´Ú‹ºóì«ÄFbyª”–5„ÖúÖ“¾Q…ÉÞÌPˆ{6 ³3y͉FÖÓ=+ÐÃÍïqà?G&òX.ÂWÏ=©d—넯øû|¥Š±“àòŠÛÍ´c®h«.”uN”’í*·r 9óº"Šƒ’5g©É#®—ÒÛ®u>ï`÷µ•ôwšWêñöàŒÝ†ÜÞS“;ò¥ó ‹* 0¤“Øœi·â½šNF~‚ö6?Ñ?¶),éè§á]ïÙ4{Öí4­÷Õ´â¼þ ó¾cY¢c¤bÃ_ofV©<¿ýÙ¾öƒm–UQqÖ_ë—ãÍváóíƒtõçÅùt³[ol?Ä,½×?ì¦cÙH1ošï™>e¤ï¡ÒùñYëðQ…ô ª_Ù€©”e39¤¹Hñw™Óûe©IvÔáúNxƒ¹:Œz lÀbv„µî¢2Wÿ£RØ÷Xx0”¥–òüÃHzVl§¾ƒRjêúŒw Ãè˜ZƒöÂP†óa߉ºi§HnO¨ Gþ±©™n¶ õ³iÔ×£±TÓio|‡©q=ŸŸik7©°®W¯f~+ÖÙ‡yGömÉ83Š©dëh©']ù%i1óÕKy ¼K{ê­òª•[oH6×¥$R±| •ºYcí͹ŸhrÜ.Û2ªË ÖÇßÉ:–zèÕ7tyo~43HËôê>|¯‹s÷3nùÖƒH[í2–JYC^:AŸ”ÒhŽ“.‚u¼0 ŠR1š>·Ë\håò…ƒ®­5vR* .yTp»!«[o&ÑçFâ/ü^%É|}P~K¨½´æ:¸wçîÕÜùAÅòqÈÔ–¥ýK.Ýù(±m©kã×oHF ç´ç; ¥à…”®kÖ0u7Ξr¬ñ xžy¯ìá\ªµÏÉhl¯ÑP˜ÿÏ [3ÌÔzÞ»† ­ °ÄÐ॰V ì]©RÐö™[ÐÖÔ4R‹d³lN.Ös™ï…ÏÚý—7ƒªœíüíMÙÎÕâbxºK6ôœWýtåœVc¯‰éZo½FTRr×´i|YŠf8² ÞµÝT¸`t×èÊÀ–ñv]™Á·q½d#î}¾F¯ßRƒˆIaÝtû¬×I¹o_DÙš»õÙ¤ôs}M;"‚¬ r;úн±ÇwÊ>שO*ÊŒ|‹ù|Á:‹‹µÔI"à{ òÒÑR£YÜÖ›ƒ¼e ŽÅa²‚:2)üðŠ~…ü¾õ §Q¬ '¯úCKÛÃÊ=kwN͘hpöJ,` ¥s¢myãj eiÚü+/„ÂnëoÆökÒЖ‚šûJNèð˜Äc{¢Þm° ûí•ûEØqÖâ†oewßöx¼ÄF€¶‚ÜÏEŒDˆìó¶}£‚ê_a]æÉ“ƒ1±JY…ö»P´q¯÷¾µWÂk%@÷7«*n­gIYu_K.dE'зVúÖ§©Á’û¦‰îC!3óº1ô £¢Êt­Ú5ïøÇÑ 9@XhH½5£¢ùÆÑç{AÏ`'Çßgr>¿ÓYóÜÍWmrôhÒ3?Þ9î*N¿ä\Ù1ï…÷5bâ³;3yÚà˜J®ª¡òZqPÙ{õ³MY•Šº}c_/Æ©ŸÉ俸ÙÐ÷•†…9¨;¸,-œ•jIMû²¬çË‘Ñ"LjÅE6™ªÓ£/7LBÀ5ÝJÒ^þ…+”ƒ|°ÐøzýìÈìV ­·]J iÀù!c.Ú²"‘ÉïI P©wK:Òg¤¿¡ ô‘ûsNã ç?»Xq3p&мZéåÙÑœÀáÇB¸ç÷Z2kéíy¤ó%†³4-¼¯`DRW’lðnýL‰|+lõô]•»ï̸º§ì³u:Ô¿?·¤1Þ±g8oe{ï‚¿yLf0èiÎ:«›}½Ûdo¼ÛÓæíi͇¹LzgyÄS®w-'óò² Œ;¡Þr4gÂV˜ÞF*·¤s·°<¶‡v"o5•k¤á²’o„†ÍŠŽM±«E,íš­‰aÆbä¥yT£G3L…§=3+§JתޤŸ(¾U¢õØE¸÷²vøÍk¬ëµ'5Ùã`~d¥Yއ`Ù8kͼ†VÙL)Æ4¹î#xÂþ)dA2-:º.4½oU_B‰¶m§ÏK¾~ÄtÌw†Æ2:ÒOM}™¾¢Ì±hŠæ+¼†½Jìd=ðK¼£Aøë„¦ OÍ,§ÇvYuLGo@Ë-—Ët'„çñÎ65´áf‘s=èëû£ÆÜúOOr>Äôa’^‚ïó>JwtÏmGŒ_D Ùu»2«¬kȸÝ-“»oW‚«E®Îì²1…§ˆUϪ¿¿¥Bê ë‘…ûÛ‡¾ª(æÏ8 •4åø¼GCír”ç£?P½8îxgäeùäë,ÁÞZÚÀE¶ç<Î.¾)'PJCþ†Ó}ül ÖÕ»½²Eíj&ƒfKêý˜¯Ao¨¨|Í™˜òà¾âÿÑg)ÒópUÌ{N©+1ŒBá^y+<³ìÌý$ÖP\wJÍ=F%ÒÉ7'ã>vG«¯yyË&ª0*$¿ØÙ¤† ?½¡áÄÙüDžÖ: ‡¢Æ [üGø6JÙhM‡<äQ\3„é æþ‘ñã¡m†ƒ ï7(t &ƱWßIU ‰ Èþ° 'g_WvhpH1E_·)iñžÊxC÷Õä3²™tYvÚWGýƒ‘Ñ军—¸nœÈTÛò¼;Hé;ˆèçu}ñ ÕÇðV?ÜÆë®Å$kv£é•Êkdní(*ï>áÑÝÑñÔðó×5•ÛOu™ª¨-NOŸÒ]Z+e¿D'¤0ÉóvÅå¹FÇŸK·ûjäƒmí˜èU¤¦F”¹£/µ[~ªmR «8POÝ€h'ð±%m[‹ Kã3E¿@™4x…W»š.]íúÒ7¬¾¤U~wÄG“'EÊ[g%‹ÁšU¯äгÈy7L×éh¿ªÖ£¶|­$1°ùˆQß­¿¼Q3Þ»õÃï4Gåùx›ïAOîŽ_—éçÇ¥Šë™°PÍ_1QºaXt¯ômIgøá§žu-–/‡Ñotµø"ˆÁžëó€mjcÕp›þýDk»¸DÛäÌg4—%ùøÍ¶›šyè#ÊBÛß^Ý¡‘À^£*ñ_X¼LíúmjÂëfdqEà‡‹èwÁœó¥ ëi{~ÅSe^uÉsƒfçÎBHöݸ@BiLÊiËsc5RT,·Í™ž‹¡rÞ ;îß|WÌ"¹¼Fó®¼#ÛŒ´Kb;íOô:¯€æH…úPøô2 æÖgáëU–ãWµU²%YO¥Žƒ<¹‚¶ÔèÅ7.yg‹[nÓœVIÖk8øD!¤ä Võίqi ]k×Ä+ÙÚ¹ö¾%A5²Š¯_Ãi»Åðy‡Vu­o•Ò³b3ß$ÛÝ ññè’¤Vik¬Í.lÓ–t:|¶#F @®Kš‹fy f„K/¨˜ªéÚx ¿;_ÌÐkò•j)̼íè—ßÜÒÊט^?ÖÉàÖyÁv*{R2³O}ÒcRˆ늇ÓGÛÂé6YÆ„y°­Qâtç.§úÂé–Þ©¶ñAß`HXcò¹—þÂúe÷/-ht ¦.ESè²óõnøìöZ5Ú®|‰Õâ¦Z]Ï·]6$UääÊ…£rq†ÁGqX ®ÉLûGói…“ZØ(Š”]¤™ôpê³òª]®"6°êò"Á!h£jž »Hè:3Ï—7…¶‡MlØÙÇÜÚ{wD·Øãzâá7l‡õ8ï*ôá@Z}Qè‰7#=ítíòÄE×@ßgQ5›€%vÑöãq—ôWBž[Ö½Ob|}­=³,YR­†³C•›FâÙØ9î,ìK…Ôw£™ †œZ"Âb œÍü—õ:‹ÀÂd%*MÍ_I÷‚H+b\É Íw)2#›¢r«# £O¥Ï|t¡EF-gòy:–<„Rï2_ÐzñƱšRn„Æ“@g<ñ<#¶@(Ù?GÉcBHkIn{ÐC_+p;Ã~úŒ°‹ª3UòhÝâ_nŸº»GôøÞMjŸgݬîh+’ȇ©WÖA ælåW¶s¹+éOF_<ÀÅSL{@­À±'YÛ¸ƒUåÏÌÖíl~ôI}͉|BHžŒ{X¢ÞK|J Ä[1ó‚öÅ =Hb}šµ­Ð±X!¬ žöÉJEl§4¯œRgT·Ò¦ž'D…—c¹ï:"¤‚Ûg¼¤_åh´_µ•ÁÛ­¶¿x›çŸiÅhb”Μ½åãn}êlóNËá`Ð$Ó{†n²s£ùàÂYb;ý,¿5jù^MKÕ®Fv…¹òðy|»¿öód\m.ë;奒JÉU¯ªúÁþ— Êõà‚ï‘°­èè/J< ÆÌü›Š°ö1ú1³ôå㌠gyð0X/Ûû’æà0ÅHd›XÔãRØYl\9îž.±ZEõLfÑÛÂ)ÃÛŸ.Ë4K;SÄ08ÜOÊ+SˆWñ÷ë2Ídó­É~Ó¿ŠE½S;“cÝØ'bî}õÍ—ˆŠK‰ˆ ¯tÙÒï*ûºÖ™):CÉÿ#²a Wˆ‡;yK°¢³_Ò7ŠwtÞ<ì‘[ûîà@y`Œy™ñZ{V(}xÃ7§=•¯gà±ùÎj’§QL<§|§¼ê l—ðÔ `ŸœîœÙ1Ê¡a‰©ƒ^ õŒÙÓ][ 8˜bÿ~-åq`ó‚-ªèÆé"`Ðã´ˆ¨(´å4ãÆ=0e}ÎâX®ÒÞ4yKÓµ÷!Ï(õÇÍLaƒ1©BŸ7,®o0¿ê_˜>ðÍõ³¼º3È2ä$u\d cïÈ,ﯵyXÆt3eŽóKËñ³òãÝ9OŸ/ù¶ÄÅMœ]y*ŸD Åê!wW£o!Œ‡¦ cð{º[°°½‚ˆi~õ´†n¿…znrHÙ–tiá×[öð‚}mÇòÐQ?¼[eƒume_ÜåÁ«2UÙmrsÇO­Ò”&µ4ÀR7'¸h“Ø K¹FêÒHWéU{hˆ•òÓÏ·H•Ô©µ¨Ž—_öW`NwD¤ÛuSv®Ûg(ܳ=©LÓ"›èÿü¼¼ÜŸ:x’ý±r‹÷> Í,WÔmůBóy Hë¬;9MT›^‚iú¼?”ôªP6‘nê«lDiÉü/Ðÿ üŸ@¸ á8ÆŽsý.c2Ïendstream endobj 49 0 obj << /Type /Font /Subtype /Type1 /Encoding 193 0 R /FirstChar 46 /LastChar 122 /Widths 194 0 R /BaseFont /ZVCWEU+CMTT12 /FontDescriptor 47 0 R >> endobj 47 0 obj << /Ascent 611 /CapHeight 611 /Descent -222 /FontName /ZVCWEU+CMTT12 /ItalicAngle 0 /StemV 65 /XHeight 431 /FontBBox [-1 -234 524 695] /Flags 4 /CharSet (/period/slash/A/C/K/L/M/P/S/a/c/e/f/g/i/k/l/m/n/p/s/t/z) /FontFile 48 0 R >> endobj 194 0 obj [515 515 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 515 0 515 0 0 0 0 0 0 0 515 515 515 0 0 515 0 0 515 0 0 0 0 0 0 0 0 0 0 0 0 0 515 0 515 0 515 515 515 0 515 0 515 515 515 515 0 515 0 0 515 515 0 0 0 0 0 515 ] endobj 193 0 obj << /Type /Encoding /Differences [ 0 /.notdef 46/period/slash 48/.notdef 65/A 66/.notdef 67/C 68/.notdef 75/K/L/M 78/.notdef 80/P 81/.notdef 83/S 84/.notdef 97/a 98/.notdef 99/c 100/.notdef 101/e/f/g 104/.notdef 105/i 106/.notdef 107/k/l/m/n 111/.notdef 112/p 113/.notdef 115/s/t 117/.notdef 122/z 123/.notdef] >> endobj 42 0 obj << /Length1 1555 /Length2 11815 /Length3 532 /Length 12723 /Filter /FlateDecode >> stream xÚí¶UX\Ͷ¨»»ÓXpwww'¸5îî‚'¸»www îîî‡oí½WrÖ¹üÿ«óœî¾˜ï¨ª1Þ9jÎzš‚DI•^ØÔÞ(aoçBÏÌÀÌ•W“ff0301‰ÀQPˆ:\,ííÄŒ\€<fnn€ÐøãâãÇÃÎÁÃÌGµwðt²4·pP‰Rÿ3‰ l t²41²È¹Xm?r˜ÙTíM,.ž a€Ê?+œ*@g “ДŽ™`jiâ0š[ÚÁ1þã$mgfàü¯°©«Ãÿ ¹œ?¤TÿÒ¤|HšÚÛÙxLfpŒ öÕ€.ÿhýgr W#ÛÒÿ«SÿǸ‘­¥çϰ·upu:äíMNvÿ9Uø_rò@SKWÛÿ•v1²±4¶3·è™Ù˜Øþ+né,aé4U²t1±˜Ù8ÿÚ™þ§ÉGÿþåÁ(®ôY[T‰ö¿·ö_ƒJF–v.jž@ÓŸÙÿbæ?üÑ$'K€ÓG—™?&~|ÿçJï?Š‰Û™Ø›ZÚ™XØ9FNNFžpѱ¼™–v¦@ÐãØ‘ÁÎÞåc à£3¾3{'¸ö•™ÀhfùOì_ÈÊ`ttµwšÛü×þ{„û¿Gþ÷0ÛÇ{[[£?v£…§ƒÐîOˆÀèð±ÐÞôOè#½ðßÌÎ `t±pþáú'³ýŸ4™…ÿÐGR‘?Ä `ý7q2%þÐGnÉ?ôqÇRˆÀ(ý‡>*Èþ¡ r裂üú°SøCw£øoâú¨®ô‡>ª+ÿ¡ê*裺êúè¥ÚúpQÿC.ÿ¦ÃàßÛd4û³o㟭àþ4þC’&ÿ&f¦KÓ¿ðCøþó\ü…¢æᇩÅ_ø¡jù~HXÿ…6ᇆíüx/íþ û¿ðCÃá/ü¨ëô~Ôuþ ?zäò~h¸þ…nᇆûdùÐðø ?4<ÿ ¯áÿù¦‹ˆØ{xÓ3s°èYØ™þqäpspûþo3M\œ€v.ÿ:N?Œÿa3Ë3ôšÀ-/Ø›ðY%7†”ø‰çO—BR;¤w×§ò2œnð¢Í95M©‰nPƒQ­,ËP’Îe+MÍØ·p…}†¼Uƒ—P‡1¨sÓT¡/MNuõÁ†³{‘m3׉÷šh/WÈî7Ý:!«äŽ™9ópÏ# J~ßúYûOl-瘒í'Áée·É%5Ž6cÆ^Î$íóí— ‡±uÓîÑß–q{×g¬×S¹¿e˃ϧò¿Â2Q{˜mOÀ1§[/keqK½W×"“0Œ¸-(¥.) r9ö¡ZÍXA¢sF“œqˆBgm”©Õ»3èNÏ•Ý1ÖÇ—BäwÉà/@m‡0ëufüÃ?š=b¢œày‰†Ÿ3+>Er4R¯cy™c t‰œË#Ö7Ç¥ ÄÊYzcz¾Ò¹ÂºoG‡“…Nº€RKalUï<îZ΄OñêŸáì…y GePŸôæ Ì—íeàßÅV_®ºÎ’FˆmÖÇ>;ò/nÁþŠE>£ÈÙ—þœLïrp&ääÕ Ÿ!Ã3»B@Úaiä·r¯FzC3ª/?!ÿüú3´Ð¬é—!$“¸<âÂàœ—ëàØÓ™»å- ± O·SëÆˆ{ÂÜ1; 6»vÆrÙ\?Ê×=­¢Oq*wjm¦*uYóò¬¶ð¸qQ?Ë>U|=d1 )¡æ™r¡CòêµIÀúH‘4ÜŒP¨cÀïêÐwM¤|I‹yEÞÈB ÷q(¯Ö8}F&—ï.l›SßÖ³cþÂL¨2¥TÝå çoz›HwÞÎà]Ù ýF¨ ó“3up+¼a~AŸ@žVa“ø…l¼V/QW~)˜ÎÁš®V7҅Ξ:‘EíEm¥òiÎ=Ô·}&*­Á­ú&Àñ7¼á"/Ÿæ£Z;ôéH—+¾/-‹xf*—QSõòwÔÎÉG{…W¤Ñ‹p'›sÕ]óHgVú+‹Ê«7ß_šÜ¼+Ü? ú’ž YJ¾Àžl“{ûñòîÙÞ„§¼ëÛüHd¦¥IòSœ#X7\ØC‡LÓzQ·»eŠõhðÁ*2™é<£´ë8¡Ð ¿T šØ>½XMTG¿cXŸâ®T¡!ì]S„°;“ÝÑŒkë sfÔAo? KÙ¬–Šø©n8)–ŸÐa­® „%ІFC+™'²íÑ Ä"•´m—HPìøŠØMÍâ$íª¨{­ú´£¹GFG@9æ†Ò¹Œ†?eê©tÔ’b"#ð;Ò®å†ê®ñ‰qÈÚS:@SÌ—Š¨2rE5|˜ÄƒÂy+MþyY²eŒ7t\Ì7zÈ)èÂuLÒ1G!ŠÉT¥‘½ô™>5²à*8ÚÈÂe‘ÐòÞ ×CV‡ubŠnéÊIb«r‰Éä÷Õ¾wùѨt®Ô­†Ëtz÷Ö&[„œe–šì¬ôv›æŒ]=/>; o#ZËê[…f1P±ŒŇÉJf°YÊŠ•a[—7b|ö,jÏkL+™GeZ äOÏú²(“׿«¡1~³ úä Ö1°¼øæeû"éi´í)G‰òcÈú(lz¿n/)Åú·TÝ×’oí6àDœ»+(<ñ:×ã1Ÿ¥Em5?¿.…»r Œa­tòȻۑ€8û­Èm4¶Ù ¡st¥ÌÇ 9ÑŸ<×÷ŠãvÊÒm§Ð_«²1£Ë;qkÑ¡ÕõDÈ I;WF~_®.^U„€JÍÀº“ìÍ,²Mšö}j©{ß²uUru{r8°Á¶IÆ­Ê—…Bh+‰ÔcìÎ6Ñž6òùÔù”[SÑCex¸jÈä@·‚˜™ü—ç…o–KI°]é£ö÷ðÍëCùåu†·Š¾¢ýø=¢*§>‘·M¹Æýç¯U Gœ+0ÊMúÌU/d.Õ†¯¿ôõÖp9G&Ü„DòïñŒVá˜íU#<~ÆÆ-5Ôqk¶•²@,ϯüƒ)û«l—L5{lmœgë¢S\’á%UaÛ]jé>ý(i õã¡DByCù‘µ¡UC%LdKÛ· U,èéóæó分>²3û»²éõÂO˯åXñªˆù¬j§ 4H,ƒcâÄkŸ‡Àj•µå&M)F-#~ ÷2ÓätmN¹ÚéÂ\ßOh#¸Ý"­g4–i™<£¹¿¶Re€v+Š2¨£;ç>‡ÖÃg͘ÓG«ÔÈ;~ÇÉÅ?äÈ©¥ xæD(IS(Â{MM£¢fõÌ#!‘yÔÆ°c߃2JIÚŠÛÈ¥·ð«>¢ ðóM¶›Š{W¼é»7ÉvÕp™ÐNÙâÏ) wÞ3}6®µ›¨Ð¢ R2LvÇPž‹Í_æYÁ¼7&é2Õy  ÃîÿBrÞW:`%=êŠ_9†ÄÔK¥Á:öJü<¤Æ Ž.2©ÑëŠv¯÷Æ¥3 ¡ÐJá,î¬1o¾ ikÅ·;‘¦¹8l3…ÏK"‹aÍe;Þ=Ûø„ÛïèHšý5æ<<ÊÄ:{p®òþLÕîýI˜[»E¼wÒ¦®ÇKÎã- ¡0軩TQê´E«Ü³á"¿`»B½Ø€÷xz'D­1›ôn.b_û 5:4¤«DÓ; Þ åì§Éª{2dÊÅezôP(q÷úìGüú ]{!Îv½õ—¨¢Ì—…@ÀÆWAëìFÄØ:LºÞV@®{ŽÒQã›Fº*‰qN<éÓ ZÕ³!ѾŽpj1DW¢øÎ¼‰ùs~-ŠIiÎf¥•hiÁÂpUô ¦+¶Ðº“Ǽ¹'Ú¯Š9çôN¡Iʦ—^Bk0Ó.Ãük¾nXà)ñE]½ =Áâw•£ªùrðÝ >‡Xª•V½=)Ô$Ò÷ú,ÀŽJ[LߤjõñÏ2PáA3×OjÙÔ?¾|©-µ¾²*òJóâ©c­ñù\el{wMf­í¯«V¸¾ãh¨Tèû-v·åÞ¬\ì Vxc˜ Xìm±\™»R(™ëgÂB t‘Y…D¡Ÿ§ƒŠüTâ£Ü[ö>ýd«‚Ô5ó¦·W{¸&‘«<-×½Å%[‚ÿèû~ž[¨âÙÒ&éÌgLÿE6èËî<«¢„†ŸC‰U7ý žO ËÈD¦zªIq§Oî¶ZX÷2doˆ¯ïî²½«ÑyÀØÊÞ¾uÀa¯öþx¡_oÔ.•Ÿ ó<£…aPs‡Öb­½3~†L ®Ì93¡tiDé92¸q¡ ®Ê2è&Æïæ¨Oº 6óüè½8‹…óÞxeÁ˜sëÉCûæUÁÞ¹Ío;°)·J>šÐ>¥Á‹x¬Àz€d·yú4\®h+ßX‡¦¡GÄAÎ6õjbUì;7Q| Õ4†sndZÓÝ>N*ÿ4µ Cmg'Ž(‡l{%·DÇÖ#]zSÈnQ"þ§(ÂHxÇ# 3ÅHú*=è<°¼7bÁцnty¾>^÷é´¥¹žÖψâ1—Òy(Í3!X&‘L<$Í3k;{ø`ʆ±HZþ?ñ5 xgÔ¥Ú™´÷vÔÖg¾Ÿ/†š,Ÿ” " ¯ò“.–r„¡¥¶ŸDXoz³¶Ì²åt³ùtó˜x"º=^õ€cP?k ‡‰j…ô®n b–µ¸¾ª|Õ;lo—Uòí+OÒNFzä ­˜´mòþ1ŽçUQmôË-fn|`íÄdnÔèÊ­1«»Q\Ì/ c¹þDç«~1­«±`oqö—A2ÓÐTt/(Æ7ÝÆ|>©ìÓ„h2Bìmו3ÍÐæ)àÛz¾Á#•\(#BÇ ÎWD ÜÊS2˜tK«Ìþ¶f Ÿõž¬vEq‰ÌL3Ä Ýôöè]W‹YÚÄO¸‰°š¤ª—)¶°ñuÕp÷†ÈsÐàŸ‰Ð*|{˜Ë?ßÜ*K8ôCòÝz„*g~‘VîNð¹˜g_Sé¤Ò©]1h?u 4ÄH‡B‘>Cö‹’4nÉðÓßœ‚‹‡õpWv?ð„”ÐìG—Lþ<?Õ¤â¥ÄNÕ™)•û^*hŸ>À{Ùû}.Á¬QŸŽa t·¿L:‘ä4sµ¤K {+×/N¼Zuäóf JI½þÁœÖ:·Ç Däw¡,`MUóÚ¡ïÎæo‰BEqcÀÊz/çª šŸ‹Œæ¹á€Tƒû ¯ÁÈYÔ·ä['Ž nžŸ…òèèNšt)2O$òÏÜJKž:³æ¿+’µ¦úœ5híR;áVP “¢ûv´žíºk¡[«Õ¹~Ÿ W!È++˜/šÇ©2fÊØ«7Í¡ %̪BUPpìS ‚¹Í0ÐçœtÎx!óvÿE³gR¦`g8\E› ÿš nÉ]èÊÓš*ÖþKÙ(–5ÛP3œû툷^ù[CòL[P¯HÌÖÌAÙÏáÖ¸Ót[êiv,¸Üjw¦hRÞjbXTwkì„5D‚1 'XgsÊþPqºÈ8â­“aÞ5íÖJ.g˜]”^‹Œ¸Èðçu"½èäù{ÍM§ )2÷­~»>ÊÅëÞÚ ª­ ALþM ÖÓ¼ÛbšœjÎ8Îj¡`3g¦qY¸@¹%Z¦*çQ§‚(¢‘\vGXy,Ô¿4Èå›±™&OA¿se.MzîØ/!Ùž¶:$ö§)ïҿ˜¢]¡2ÛÏaÖ­|7ˆíñ'¯Ü„bÜÝ=<^Ÿ§bÝD ½ž‘õB`ýÖz N´—?Jëƒå4À³ÿ¬‡Ú»GÔ?àñàsfÙTÙ´)—ͯˆ($¢®ñõøÚßÖIÊšO>¨1Tæç,/>L~ÊðêÜBÛCm`Ç4,9ùƒ½ZKàÃh—ò1µñI„}yO´&ó ÿ÷dôÃé 7ç©^`EÕB$InhÖÓ1Ô›/Š¡ §ò<®× N©"qÚÔìè܃‘>J´§³¥ë`+ô;ZsåküÅ}m'“Im1ô鉵 ?õKÇÌ3¼Ÿ"Ì%†ãm¤r³­2#³ªiÃÖAQã=Ke‹Qv ¹Å¢Ó\Ÿãó@Ó/Á’ì§/+qÞ[F£šŒ§=­º¿}³ñ[Î2¡gðš`>þÙé*öÜDC§»ßjýž ›뻽֪å*úÜÙ—ÒäfQŸåI°6m¾¶c­Òs1˜@)¦?îõEŠ'wíÑJ¦ïÑ>é&|§Ã}NÞMô¶ «°I2J $¶ nô ùœðÄÊ…£ ô º  Óù±›íêN`Ï¾Š³K]Û=›oj‚‰ÞÍ×­´J’·ùwÁâ { Ú¢²mú”OY-óð¶ØâËvk–PW¼Õp¿› ÔqÌføÅÐU?,œ—5'sY뫞 4 î´Ÿ d¾ØXãÃü¬ö ]M\ªqǰ°šŒs¶×®!¥Â†¾bz—²ŸÙš¦ÞNâLžÍÌT* ´˜fÆÑhZÜýƇ¾Uè¾¹x)’מÉÄü0w¾V$:u«âk Á¸«$%RßH„9™,–ç2c 3èD]/JFÅkeíQ©¦Nÿ–ßÝdÔ__Z_Šƒx×û*øD©¢N,‹Õ”nÞMgÐ3Ì^P|ô&2=ÈêúTÔØoÏÚ'ehF³$¾Wº™0‘O›lQÞ·Cܬ;ñéùîN¼bΟV$¼.v(H ÖÓ3™›Ñºù¬N@=Û½kô£ Xº D}*Ñ"ìvì1ZÌØeh›šŽIµQSGä'gÝsuF¯NQc0-ê Õ;79î²l9¦,YØkCWÙâðÑøarŠ O„–%Æ%Å4Ž„ÎêœÓtù!ã„¢¼2 ¥€\Jöú̧f÷¶— ä B†Ë ^&ü죤 Q›r+m$Dv, Q÷ÜWõãÞt\×bÁ‘h( ™—<„v@´(þ>ÿŒÇ™2nbˆXmÝ|iÏ<ú%SsÌôÝ¿—Ÿº–;q?þÉÌé:¶-â×4—íC²LššÐdâšU`òÃ’o8¹,ºÍ:¦rÒýð«W»´ý` ¿½2§aE_ŠöRøš<<î)†ñ0ó€W-¼%!b¯ýþaÈ3€¨"sBV–¯»X¡õÓ Êô†Î:(£ž ½öØ(QëŠKf™Ç¸ 5ýhX}þó&èwRÕù,åA(Z=Í3m*T(¿Ê­Àòx)«—0vÍÇ|ÔÀjïSž/Wણ™±Ñ;\Š-9ìùRZ[ËD&.I#ŸQŠgµ’ ½ÊyöÕVn6¯šßøk{ºFCݺÉ™”hÖóÒ¡aOäx럣AF ÁZêž&§øù¨ðÍ êÍ,-3úÂ$ã;K<“$Ý]›&g£Ïn_g"°Ãzÿ±m1¡Ÿ;Kñ-•Œª.´”Æ›ÀÍ_ ®ÈI!Ö˜ŸèÁ³ÌeföxãþxOÜL£Uáô‹¿J*Þœú­/:‹~cÒ–$Õ™óþµt°é¦cx}´m!D~±¬—„,‰èf˜ºùF0´›V}\ ‰¬—‚Ý;ô&ëÔl÷$›÷*!…òÑyÓCúTý`Y£úpN ‡ûo¾Gä%mºâ0èü†Œ3¹­Cá/óG‚Eî‹ZÍEˆªÕýE}K³DþTŒ `NØ»:ˆ›!Á\ØEÛúñq™/ôwÓçN„»;Àêз‹K Ûé0N-…J w lGÁQ’Rb+‡uÍF4µÎÏ·‚|”ffo(ý½9æo„´ž“_´!aïZDæQ7kŒ–Ú—úê¿wê}}Éäÿá ë5é.MF^ƒ®5„Þ&«Vùuàq˜u›™«ÿéêGá¢×ÒyjG Œ§‚‡øÔ¨¢Ôû¢‡¾ÜPʰRºíµ±ó­%\çXªF¤NXMdißÓ¡~-ÿ2¿ãª[E^3”fè Ï_|ò¢1j}Íae?Üøìlei¨¯5µ°@ħW&'\¥t×Mu?fàMnÅ)A*(—a}Ü䤊àØi¨§÷Ÿ;xBÞÉú|Ó©JwλV/}ªæ›Eò4SA°ÎšH¨~ÍbL3—ΜçfÖsÈ´³Bê³ñu°_yC`§EΆù*¼äÍo •­é’nd¢eBzc9ÛÓÖã,Жý•ÈMWÐÜýÆpçÓæ'LÃú&´B±VñÈÌ–ðF’ü`R»ñ¬pgg0BŒuŠ+nb­–°X˜ãi ‰Šªy£ªNú˜u¢Te´arjq“®ïx7ÝÏ›ÅÈ#äÌý¤<ÍÜSMÇÙ=tïß}‚tT•{`ךáÔk½ëÌñ.'KrÃI«Ô®È±.*YŰ©ägftèV~¦ˆL·ÜHU~)ŒÎÐï)háNè"MiŠ% ÌziVõ͹ÂÙeâÖ0OÙcz¦õ ƒM §½dX¬”×UýǽaÉÑñ¤€úØ•ù¢Mí%õs:.ît$¹àÆàžÂ%µè²'R² #Æv!Áì¶X. oôïñ²™ç€Ë”£•,½8µX+"y (?‘"(Vñ }QŰƙ©àªjÞB>å•d‰ÿ×n;^¼á‰Q’ØJ1­}kÎoÈ4zúGçÆ2>†›%÷GOÿÛéÇ»nýKyg¾m"c´…¶tø¹,1‡AÞîíI_G†M´x¯Š-7Îõ4êµl¥2÷¼bh`„˜·+|è¸R/S©€á®NªÊ"'riöK¯—#¾=_õ±/ËôŒ³7üø…vËùßg1>B1¹RCO ³žÁQiô㙹ŸëESR9÷ŠºÅú{ÁŠÐ|Ž¢ÅRç¡™BEñC'ÕXŒD/ )ƳÒ=¿Ñ#©žJjØXÁæ÷Ç¡gd® Å™fâ”ò?¬ Œþ,èíE°…éÚaáçQ-Éß¾¬GI{t +äÄ7ÕìïIœ8ÛŽhPÉD—³´(]Œ0<ƒæ(p©ü »¦5fÄ4"ñó cqZæ Q«ù%°`²•Ç/äp8Ä÷†TlñIB‰D Ã|E˜Ñ^R`Ó&´Ô€y°R¾{ÔŸj ¡ª\¼4e u“»+K¢2†0‹ =ðík-uªÈÜ}÷Ãþ4 ›RW¶È³Ý—^ú5Uox.V{²j?q7Û!‰ìÒç^[ªtªˆíûîK­.Çòç A Ôˆ¬Ä:)ÛÙoæãÝmwÎOŽ€{—/`aè‹%Îĵ) D•ÀNFsˆJDÚ:V_$ÆâèµoU’â§«pÓ(\i“Ï}~&—ÓÇ¥c“^êXçºý¸dÒê0,‰Ø2¦lA0èF`[÷-øûµ¹ü,ÐÙ^bÚ…ÀyhböÊO *Í6~£Ìo±¡k™Ù¯B“ñí!Ôö¶¬ºDV_á¸o=×Éå&ï3!“±[Š(…}|[ðçÅLÇ;ƒÅ×l¯™}Ò%ýɤŽU@tJ/ÎÎŽh£uå©WŸyrÂdrÇ\5!Ý=<¾0E{õí®`I—Újì›xªsâ«om}!ò*˜8@Ô,íHèì³µlæðªeÆ~ÁlððýHZ:8œƒŠÖkú-kú›½['âhXTˆ-EÎí}¨nó…NkSÙ-˜úÉÕ&vwXÊnѪO’9ím0ÇuÚ›5O†ÝP ©.cò­db¹OyÒR’û×Jâ¿‚“ÍâCõÔá«:¸Ÿ‚Ÿ}Ûî¦Å,«¤Nba *¹”q·¡Xàƒ¸3•–\I¯ªSAš½ }½ñÎÁ;œ1yY6 Âzœý nÅ £«z×!1 ŒåiýúS"za¹ú¶¿wß¼}¨~õÝ€=¹9‡œ¹å4‰Ö$¦’™ .Ãà#,´#i *Ë©¾hx+Á5‘èô2üÏ`%ûuG:„öQÑü‡5õ1bLrpÚox;<!ÐÌ”lêö04jÆŸt{^«ÂwÒY½…83Môˆ)a7!£ë'ŒaUî7ò3G%L<- 7¥gttujy%<]9qÜÅšË[ô°jwy` tý.;¡Dö™{šRrht­îÎbâö_€oôo¾ÆòàÌgŸ)G€€üÚ»¿ço‹1ž¤Ë>Oá ëx…Àdç½¢ƒåÒÇz]ÕwWT‘Ú\‡u ÿ2|é#·—QX“’1.UåˆÚIåî9QøÝ.‰bsz2<í9¯Ó 1#_;çP¢ IðvU[ŠÙ÷¥I¬ÕÞ»€rI}ÚŸ¶ª7©^¥÷Ék¥øyÔµãk:ý…˜ßù¨k†¯ù{à ™„Ûo÷›Úë’çq¬˜mÈZár»[VÐúÄÐÆÓšƒÀÀ b8ÌÜ‘ƒjô•ö—¤â=JÞ|Îï9,߉è|/´ñëÇJ5h ’i[2›Vt‚9YŽ´Ub—\0úOÚ‹ûVÃܯKü©ë_w¸îwf`š[¦Açr$ ÎØm¿:zÔ.ŽiÐ […8º3«ëÔ=`Vsõ@Êö$!¸­†ÅËþ­j¢Q°õnÏq” =•Ê*«ž•¹´!#4æXDV¬šÍ3‚ºRÁÛ8?˼t(ާ’ÊÝ´7Ö¦AáF ²m{Ê ÖƒŠ{í¦Àp³|D»óùû`ßW³[5>£ånj1ùÜú+Éø]-Ž&G³Ë ÏCv¿Ã4æÑvÚé㣖ÚÏ'™’RíD××9-õ;¶úD4gô!'ñœDœý[î^¤-m8•çk7'úhZ~3¤h­O/þ­‚7ø&¥æ~wçöò”ƒ×j/äㆾóû–pºÚaaƒ[Þ+zgf0³œÍ¤ŽkµUÞ¢ñ]kýrX½ Ç¶õ“íTõýÀV–x½ù›ª†´óç˜M‹¶[ñ Mœ©ÄM!ÍLï¦ l:ÿ?s·H¨o#ó1ì—”Oò5`.>ôÔŸN¸&[MG¶Jv™¿1+;å°¥è¢ñç#k°jžà£Ly3¨ãí[lÞDÕ7¬D¢ þŽÄªBt£ÑZ5?|ÛRþÓL© òÍzKJÒlÆ1–䉯²ZyÕWN£<õù‚ìÂAgt|Ëunñ³/¾iß­K¼Ø³ö>Ç5Y²ØaŠºß×ÄOßçî&îTNvÝ¢¡s½jüüYR¦åw‚_:‡št …¤Z‚­.Ö,Ø“>k¿DjåûŠÇz+@²Óiéœ<œ1œ0>°ÁTßœ¼H%,ŠÌGÎy|Ò~/©O€m¿ÅJéryÇPˆÝùáåre´ÄUW"{¬ÙÚ’à ê=&†GUì‹–[éˆ=nÊŽþ‚"±Æº Ÿ“féÉ5¢‚QŽV!`TÏO0"·U_SãÍWUø\öPØiNÒ„ÅÉ^Åf9¸…lSfß6äÅ…xüsLü©õjN”nK“|AuŠ\™©Dá÷ ÜÃò^š&Κǚ-k1Ѽ ñÆx½¥X²K'€©é -Qú™ND<Å[0lÃÇ ÕnU¢©xæ)]%NÕ6æ µæ…‚ãkaù¯eK#(Šé¸Opœµå{Âì¸ZßM7Ô •á\›Äò£ä`ÝüØr¿wJÏç,Úòk'>ŽžhP+‰9>4÷‰…€$>mŠ|ªÛC²«›É÷¹˜Î7vÍ0˜æ/^ž¼ Õš¹9ÿ†ïÑ~ý6âP)¿%úy“zj̹Ï …Ça´3ú…ø×vÁ @Tɦ€¢Ëe[Õ¿ÁH;Ãwšš¨KG4ï…š¡„z4ïPÎÔ£6þ(Îþd÷1ïvn‡i{-Z:(DƒgO”Ú»ƒ¯ƒËÔ°b=üÜ5—Ìo{Kg%Öe‚ÁVyw•§÷:¬¿ß,Ó|õ²­¢Ûê'k ³c‹«•`®]|1Oì¶VY9äö ùE'“£Ò'>‰ýRIðE‘ÎL t¬–fŒÝ–¥ú Då®Z‘g¨µ}Î,«~‰±˜»ðà€b0ZKIJüoƶ! ¥, I‘p“$e²Èý¨5BƒóŽAàú+ \â/þÏb¦çH+@¿ƒ«‹ßQFæWD÷xQæŸÓÔl Æyv”íëå'§ˆ6a}Æ‚*¿RG5ž‡ÜÙÊØK¥×6Ï‹3=b²¨xØ} G’®‰yÜ`_p$‹~š\×3È/Çœz}‡âŸµ=ºúm‘-{ûœ©M}˜A—¸uÏ#ÿŽÔK°Ô…FE€ôW`Å”¦IOîsî0R~/ºü` EF_„e6Q ¢X‘KÇ·~Á(S&z‡‹i>²éfHyb¾YÆóYZ08–c®¨VÖ´°¯9÷°²´'xúžÈ éÖ.ô‹õròt9¤/p¦Ç’ñß/ú\kü‘ØRø[%Ñ[lÞâí[.‘ÿÂÍ[íÃFáZæ~Ø\â•ì1Ð]èõÑ ¶@bŽ™°¥éº÷_uð±Äè3õ°½Å6Æ€­BO…ÄåWùú9ª’Üß}/ñªuæÊÔ®#Ê3–+ɲú 5æ¾ý°ž$«mZ}ïa*•ÇŠigë ¶ ?ÁÌyž€vA,ÚØ«½ä¼þ-€„†ciÉd̺sÁM*{R)‹òî¦goBC‰›½'› Ø‚ŽÁØ;P‰ØLÓ‰nÍŽþ@´—Q Äo&â@´6xrä§—ÒD²ÚܘlqÁd2¤ã`òr.7ûüîUb³1Šg×­K*“ø²iu:-YDuŸ•©´U½Õ¬8'Àñ¼SŸúÅŸRx=ça“é„JJš{ó©PŸ¼+@àeu:Þ¬J=Ð+S¿AC#O-uÈuS$ù§ê…dUX1%»ëó‡ë=l¨=F³…]^9% ]?è'ŠQXø~ø yÝ€[è3(ƒÎ:’è|½|,df8è§/>Œâ¤€ SwAÕÅ÷2¨¹cÚI-Ù#¼}D0!Û¨ƒÏ ZÌ´ä qôØç±®ó9¨¸ ÷;òAüôÏ¢¥Ô@|‚CP0eSγ„®5ÌcŒâOÇßIŸsÎcoMܨWé&·¢e ã¾Ñy„!hăù° ¢Øó»íÐu ‰5Ìið{/Ñð‡A?G·s{Ú–$ų‘õ¿òNîKa‚íçå*óâäOwŸõ{ IÈqÏÍ(DÂ4àGÈáòÇ9ƒ,X•·K•0œYª5ƒ1ÅU_â‡xŒá¥‡&Ž3sô£•×Múw°~µ÷Æ¡x†d_á̯5@k{ÇÐ\ÉÔC°jï/-8„Úd»û:ƒ´S=R4!ÖŒª1Aârí¿Ä‘nGUë&/àã)ÀÉuºo»3ø/C.— ëCÙ?{ö¾»Ê |± ŠÖ Ǹéõ,—­‚‚Äð„L`ÌÜ&¦îNð(f_|¦OòûRDè§O£ò ŽÎÓ7± Rfvc‰{ J­€›öà]}õî¼v( ï ÄQ©wcÞçýn"¢ 0ß~Ì¡î·c7¹1¥¾q¿°îç1ßSg2™±Å‹ â8*’O•U»Ê$§…ÐÇ}zwˆ5ƒ'doþé¶Â„#ž«•Í9ÑÃÈN?µÝ¥8'³ð$DÉ%hS°qE.}ýÍ>áh³IÜ bàE!·ÃšöTÈ5DÆý§“Ä[fþ=”C FÚÏyÀ×í÷PˆÄ¨ÒÈ$z+ÏÉ»†!èÊñx2(»7þ¥HBÀÒxæp‡tmD^!ÂB5Õ«sµQùçâ0êä“öèr§K–ŸrÔ˜ŽÇÙ€Æï}^ß›y¹Å³‰ˆ&Ê#1¶á¶•4!Y«Ð]Š?U×´ö>'àw´4ƒí`cŠSB'ºŒm¶2óžø¿c¨løN£Ë‹Á])ªVÂÊKÑsé{‡èqo0'Ø2 Ïô2'Ú1õg%dY8“KŽZĸå}‚”‰m&,Cà@ã ø:·íþÎDÂ# ÃË-Sq‹ÀMd;³N+ÁšóÃdßÅWˆê~Ý£á‹"Þ}ÌÔóýˆ|ìpøÎì péÂà ˆZ?Ò)Ã4F«P·¿ÛÀ%­&T„khR³ >~© ÜžC/ÿç@TUñjJ=`å“7EÜþrý¯Ö×l×/FÎV&DÝT rûÂÒ´®”)`^ˆW&nÚv@h¼» c‰~CƲ7e­¬TÂðlL%âuW&3gD‡ÖÁ–bU(>‚| ~›PO¦-y»aC·óH>8.Ì`ëg1¦ÿ¸ÿ—àÿŠ&6@#'{[#'k¸ÿ ù> endobj 41 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /EPYZCP+CMTI10 /ItalicAngle -14.04 /StemV 68 /XHeight 431 /FontBBox [-163 -250 1146 969] /Flags 4 /CharSet (/fi/quotedblright/quoteright/comma/hyphen/period/one/three/colon/A/B/C/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/quotedblleft/a/b/c/d/e/f/g/h/i/k/l/m/n/o/p/r/s/t/u/v/w/x/y/z) /FontFile 42 0 R >> endobj 196 0 obj [562 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 514 0 0 0 0 307 0 0 0 0 307 358 307 0 0 511 0 511 0 0 0 0 0 0 307 0 0 0 0 0 0 743 704 716 0 0 653 774 743 386 0 769 627 897 743 767 678 767 729 562 716 743 743 0 0 0 0 0 514 0 0 0 0 511 460 460 511 460 307 460 511 307 0 460 256 818 562 511 511 0 422 409 332 537 460 664 464 486 409 ] endobj 195 0 obj << /Type /Encoding /Differences [ 0 /.notdef 12/fi 13/.notdef 34/quotedblright 35/.notdef 39/quoteright 40/.notdef 44/comma/hyphen/period 47/.notdef 49/one 50/.notdef 51/three 52/.notdef 58/colon 59/.notdef 65/A/B/C 68/.notdef 70/F/G/H/I 74/.notdef 75/K/L/M/N/O/P/Q/R/S/T/U/V 87/.notdef 92/quotedblleft 93/.notdef 97/a/b/c/d/e/f/g/h/i 106/.notdef 107/k/l/m/n/o/p 113/.notdef 114/r/s/t/u/v/w/x/y/z 123/.notdef] >> endobj 39 0 obj << /Length1 916 /Length2 1957 /Length3 532 /Length 2596 /Filter /FlateDecode >> stream xÚíR{<” î&)›ä^†–dn.ÃP1nKfÜr¶1ó“¹0fwmn‰\")eB¤äºDnú—U*j±(l„Hßж»_ûç÷ýõý¾÷ýç=ÏyÎ9Ï{ž£ªdë iLbx‚æ :K Cbœƒ+ aˆªª $°( º)b¤¾>0f“¤€@cPŒ¢ ˜0|ƒ˜²7 P3Q_'¡cȤ tG`yƒ4A" 80ˆŒ©TÀ~½Â°ýAfH‚AH€D!²OL¡CàëŠ,é^ ý&±}¿¦@¦¿@ &©$’tj@½ põ ²,z°@xhó>© ¡ñ>Åß«#2Úß=ynmi+åoÿ}×QÏy‡Qˆ‡W@O†ÝãÆþ ~ 4iUÜ&¡ágZ>N9Vu®ÅËYˆ— yÚá+,Õ7•˜»j#iǶñš£Úøð¤H‹zyëýí#ZÞ+²íøK{Ɉa­-eoªe:O:Þ+EÆ~ž–Í_â|²ªÒäM§ƒýnÕ^t”J ƒ«¼ì:s³¶Yt V)q´Ð$i9Gßµ=…cW|¸Tí}a .5öV»k€gqs$’¶i§Lv¨G$ц`¾¶=±qlø|«¯ö‡Jnp¥×¯vÛœ#´áÞOÞÜÆ/ÙQðÚQ#@>O,޳t­Àí ζêYõ1¨¯²¨ò Ÿ6˜Ðjxüc]Y&&£; §f6ÇÉßL"De%ÊÂÔø—.ë= k V3Mé;·Á¬VĹH8$³.ÿ\ÿ]-µSêh£l•˜ìÈ7Fgeæa2?q±Dû®ZªÜ» ’îDË•¶&mÚYøÎ*=88b*û…žG¾‡wJ¿Çu¹ZóJl×o˜lIOÔ=ae CüñЬúÞ…€µöÏâXl%£û2 ÿ6iN4Ü Ë×ÍŸ]¢ôENILõÍל˜ÆB;â-¸SùÑY†‹ÂiàÓw™ã¢.NòЬyUiL™Bþ¼¾Šp\|ùˆïœ¬Wþ¬‹vàŠæãY£h¤\\†ÄÍéèSŸü”i:÷ï¢óë#ÔzZüùkØ¡Ñ9†»ì{~ªEëùÝ©qÆŠ©>ï~Ä>Å5ñGõQz÷~gáSÃ÷Y ïSý<ê¶É±Ñ:ÿ½GËœ C±0š(¹æ²Ò±yµ7°³ë¸ }áàºcêØgG§ÈK¥ŠiŸ¿¿»*ú]d€7$ W~Œêy`^ר ½mf¿´aÑvz«wn—Yg³·ýPdíÖÚÙÒÆÁíÝR ^¼ßŒSÌí´V1¡ºÉ;X6òu²Cœ[7®ÔU-Ãe[úBYg–)(Q=.ýd7°`îUÞro¥{baMÓ…~š¥³×s$;Ø/ªwOólE"Ý~aÑ·ï±ïXwiaç’éýÂJsé±<’á¢ûB«\ 9à©ñvg¯|-¬Xvy/þbÌû×G_ÿà5¥xHûàµCYk> ?a’6~UiÖå…jaa)}‡[ŠƒÑ‚½ô¿øÙ„º]ȇÏEb/΀›r™î#añm Cã²Q´ZIÎ~\•Èx¨ÓƒK穟Îb”º€ù¬{é0hÛãΆB•]áò%‹—»$›(yõ yñqÜóéPy'ÒåšVg~ÏéÓØÿÌ[ñdè1©i1üè âN^ù™€Ìc2÷w½.}"ºÇÏõƒOòœ‹D%㇪ #T"“¾—ÿN¢&Èî·fÞÔ>ÿK>&'96SO¬B"ìmø–ÌËÁ'Rš}Wt\A{z:õÚáàQÏI®xYȲ/gí„zÇ™ö}N¹Èñ|Wp;i^ÿæ¶4;ëOW0âÑf FøÕ•F-Ú“È_”Cè’¹á0šnü›In?óop?êì'¯‰eIå´ýTš]Në|<`êQšH§@îÂâó "aÅ/é+;DE+·TÅÅ ÃT6Yéç΢¢ªšbhÒ?ÏÞUuÜš;2?~rê£Éh`Iâ§´_wWJ×5H£B3ÜšDƒØ/6Cª-o…fuœ9):kúféô?8ª$Š`–cw8düt¡è`-Ph =¢L,‡Š]{¨îeUúÚÝPÏZ¶ß*J …RÀÍYн¥…‹Tœ3ê2”--½£ó[z…\÷áȇçÊü:RävÊ€KÖ]F“"ŠÃG n‘mbÕµÄíϵ> endobj 38 0 obj << /Ascent 750 /CapHeight 683 /Descent -194 /FontName /LVEUCA+CMSY10 /ItalicAngle -14.035 /StemV 85 /XHeight 431 /FontBBox [-29 -960 1116 775] /Flags 4 /CharSet (/minus/multiply/asteriskmath/bullet/greaterequal/bar/bardbl/dagger/daggerdbl) /FontFile 39 0 R >> endobj 198 0 obj [778 0 778 500 0 0 0 0 0 0 0 0 0 0 0 500 0 0 0 0 0 778 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278 500 0 0 0 0 0 0 0 0 0 0 0 0 0 444 444 ] endobj 197 0 obj << /Type /Encoding /Differences [ 0 /minus 1/.notdef 2/multiply/asteriskmath 4/.notdef 15/bullet 16/.notdef 21/greaterequal 22/.notdef 106/bar/bardbl 108/.notdef 121/dagger/daggerdbl 123/.notdef] >> endobj 33 0 obj << /Length1 1605 /Length2 11108 /Length3 532 /Length 12032 /Filter /FlateDecode >> stream xÚí¶UX\Ͷ°‹»;Áw÷à.Áݽqw  hp îîîÜ Ü .Á%kí½’ïËs®Îsºûb¾cŽªñÖ¨9ëijruf1 G3 ´#È™…ý=@BQ\‡ ÀÎÂÆ&ŽDM-á4u³qIšºßØùù9Ò@3ðø÷ž›ë=5@ÂÑÉÛÅÆÊÚ @'AÿO/@Ìèbcn (šºYÀs˜›ÚÔÍm€nÞ,1{{€Ú?#\j@W ‹Ђ‰`acî0ZÙ€Xÿq’Y:xÿ¶pwúï[@W°€î_šô°¤…#ÈÞ`´DbUrW‚]þßÐúŸ“K»ÛÛ+™:ü3ý¿:õ¿î›:ØØ{ÿW†£ƒ“»Ð èhtýÏTmà¿åÄíÿW97S{s1•=Àöï«´ÐBÅÆÍÜ`ijï üW²øŸ àÆýK€U\FUFNšñ¿öô_7ULm@nÞNÿ™öŸì1ûwÇÅÆ  Ïn/;8üýï+ÃÿQL dîha²ppóL]\L½‘ÀO˜¸¾ìÐ ô³²€ÝÀCà–ø,]þÙP..«¹£ƒƒé?áG¸¬ÖÞNÖ@П€Õ ¼qŽB|V ‹ãŸ?€Õüs³XÝ<ÿÜçf³µ 北¥£»ËŸ'8`ãñWXÎ܇ÿ0XÍèñ—xͬÿÞÅÿDx¬ ›¿DxÀƒÄþx€ø'Kü!ðš$ÿxARÿ!^ðr¤ÿx12¼Ù?^…ÜWÿð‡ÀÕþ¸ºâWWúñë©ü!p=Õ?®§ö‡ÀõÔÿ¸e\]ó«ký‡øÁôþØåÏ3Àv1ûCàN˜ÿ‡ØÙÀ-þB°ð/ügWÿB°Õ_Ö³þ Á~6!XÂî/[Øÿ…` ‡?~¡XA!XÃñ/k8ý…àº.á?Ò_nŒÛ_Öpÿ Á!XÃór€5¼þB°†÷_Öðùþï#B\ÜÑË—™Ü?fðÛ¶àðsñøÿ_™æî..@Û¿`ðIóßli>—€@/ 9ÒÊ¢£¹À'ÛÔæ°²©‚™rXHq«–8¥†Þ¹.äåxHûâ±Î õ:OØh°dž/D®‘~ª“ÒAÎqi o&>mD:׊ÞûN÷!'èÍó¿N¹!•¿oŒ—%éf]ä©ÐIj¯‘Côé{4ö}ýÄë#a¯ZOÃI®†‘æ\ö™+|Ý#)f}:$À6¡•qõʱÓë©!7ñÍâÑ.*ÊõÝç%7dÔgD"Áþm›¼ÌüÜýa»Ù‡•9ÍeMSЬÕD½ûz[Œ/Ò^žmw”»í¤O»’ë£D‚㽦ÁqËBÃZ Ýoã̵»é©×ó]ðŽÀ"´€¤Ñ‘˜Oõ?°Óx•“ñ¿Õä«\Ìê UÑÚg¾4Nj~ßµ¹;âaÒŃÛÞÙ)ûî2ägß#Ñì¸×jå_sž—ðY<þ­îQ¹µO•Ãꇳ’wæ“Ür^Ý K,Ug ØÕÒ”¨ú@Ò©M†{+-KNr°CÃç—Ð@LˆXÎȈ8@ ¹Ç´e1Ëû ‡ø‘èðZÁƒ‰MåfÕtt´°Y7Ä+rO!kÏMˆÐÍz›‚^ŸbKÂ.t´›A¡|ö¨¼0¤%ºö¡#æUb‰}ÀûÑÍOÄ/,â½÷]ÐC+ÈfY?=Ù¨ôcc# Û(TÒà35 ”t«²yš¹rò{¿ÖÝþ’Vº6Ic;A0 p즱¬Ôãf¾¦ƒc˜ÎÓÒâG ñ“O?å]v¾`¡Š’ÖN, ÖUœ±aTì!Ø<-N@“|í¾‡ÁÀ$èT×Ö¿¦e?¼wöæªrÖû&`ÈãÝòiTTú`RimöN‹¦N‰sú¨>Øp@òÉ ÑT¿7Èõ7l×ËØÌ[T»ù×0þÇ£*}i!`îÍHé9=†»GÊ·+vÉåc-¹ Û*Ó2¹o%”M¡é°‰×HÜ“vè¹U¡ªS{d¦ Šßl¥å1òê8ŸžaÂX؆쉵€ð+Ѩº ŒìºòFïû§5ïªFëbªnšÚžÛ³-+ÝX¢(/Ò¦d›Ý™Åا­›³3oOÛv\.RñTSp·ãV­¸\á'ÌGãÈß.øÂÆ õwLâúkh¤•q¶F—äpä\%WˆZä™:2½Ë¬haæX+æ[X›¨Ëéñ¤ƒËˆŽ™’ WôÊ=Ú›•£3ÄÒ7í¬BÄ8§lãÂÚôW½nóW–°£åO«Q{¤çåx) &”‰PAÄk©ŠWièrÍã5$ÝrPŽNòÓÍÜSÈÙÐŽ ¾:ÒázéçF+Ž$»cãð£äÊ3œ›~åÞ§´î™ýŒŽ‚L‹]1ƒàqÏŸóÙ}úWœÆ—<ª~fÔ¿°Œ}Æ|>Q¢*1Ú—`¿Ú }8Ñ@•†Yˆ áîÀR‰Äñ†½ sqOñä‰ÀÞcÒÜÂûTVcŒûÁú1kÞ|rÒ¹x_òs«!ËVÆjBŸCûÇS%±`QnÊÇŠ§ó EÌÜ]¿#Cö`ø4=òw«Úëäæ“9rÞXfot{ìü°¯½Oµ”Úö'=üavæ ù5F°+å¦êŽ™ÆÎ‡ÞvûwÝò»Ê ¦xûuŒƒã€ùÄ>ʪï:w÷=^lÛ•çuøLV逸n˜]éŽ-r ' *‚~ù3ÚÍ…ã­˜xê í¯P ‰Ï¬«‹M•YM<Ø4}²g9†ù-X­5"o¶&˜)Y‚É«ï‚ü*¢% ò•Éü-9çO5 &†ÏÚ©Ô×"†Jü¿^ßo¢êGu‰vèë{7ðïà­!Œ ÝRáL%yùê9§bV6Iú‹Ι×VðáyȾa,¹BøŠU·Ð†!f㥩jbо³ðR&4…J[­;![9@Ûò>A™£Ó’ާ—‹7®V¡þ„PÎŽ …ä¨B‡\’ÿJ…f¼?j?) ×uÖUƒ(&÷ã¶óu6cá@r¦Nýæ}ÿÔQÅÙK5J×Ê(sNÇŒ6Úúr2tb3Âd‡{Ì5É·ú;§ÍºŒ*ql˜öiÉ•ˆÎêñÈ+¾Õ´¨I0‘AZP†D<Íd¸—çmk›ÕBÈ3jNx³¹e~ég Ok6 Æ—µYuqeâðêq=AÆ“UˆirÆCÈrÍG35ë ͨ>ù³ç\Pæ=ÇÃF-}#kOÏÂ󘿔 ÄÒö(p4NüÇá×¾½v²ßÞZ×cÈÞP?ê5ÝÖsþX+xÇ4ïö,µÎ÷SZ¹of"3ØdÄÿü^:¦ðë×þ…æô:Ü/Ò‰Ïßp§øÒË\óN¨ÓÛÎØ@1åÅ µ W¢GÌ+Žø™á÷"e÷Úà?|Êœ® \f–¼ÜÐí7õGÝZ3\;µ ¶{Ú Âêišâ 9k³e'{–ª¦Ú&¿’·­$P‘Ü"SŸ/™rÀýô.Žœ°—V2õ~ËÂEµaQ’2qoÑWF‡*Ž{G°ïácµ¢ ÄÒáJZjq2³ÕdÓÌ—_»2P' O¡È.¼ýwÛÈ»L-”Í<¥N:âˆUt¾éh(¯Ä-ŒÊoÙØ­è²ó7ìÇç!,'€¼ÒÏl…º[}‡ã6O·Þã†P¶ŸVÏr ~e¼-˵p×ä“çîÃÅæzAXgdͽë½6Êl–¿þ…½úZ¯·Ûí6WÕ@åbùU¯—%³p"7yÇ?Ï{éš•hI°_š‘æ&º±w ãÓùL—ùš„ÎvƒhRåãƒ8þ»êä~¶5Ô¨j;ŽìMM¯ÄOËgjÞ‚o‡a2š‘Ø3F¦ósD¸¤¢¥¥ýÄ*cn,êa-k4Œß„Œ~·`róïôæ+À$%¦FÃÉx!x‹¬9[^Ïùt ˆ³‰û”A(--ÙÖÍ8J ý QJí9þ/òQ¡U2°¥÷RQ‘¡‘²m9ˆ ÿ^8’OÈ?ËßÈ‚Œšïsó¥§U¤í¥Üa  –f±w,5i©d&‹ÏîOS,úÖñj–÷wÚÿ”\ÐÔÆ°¿aHµHÑè°‚×)&E𗤮\<Í/·Õ‡]ˆk ¸ï\˱C(*m@!ñeO²LäŒæD°h§žôÜ¥p”ãå ×-NÚ‹MC`M/¤ÏtÕæZ*=“ã´‹^äÑŠ‚.ÎWÊ™½f³|’"6¤ËÈÎÅÌM›¾Y°þ–Ï5#pÃõe¬öýÃNÎÜTÖX- 5•Q=ÙõV|@"ù]äiÃ`„T"AqGv»âI4¹ËJTÅñ¶ÓIýLð'ê’Áfómª¸]>åÌg‹Ö/ÜA…¡)}îwG¢2._'} Ìn.~MQ‰;룗ÍÎLˆ!aÈØyêÌàn/ÇXÌÄÜÐŒe–‘ië±Zì­©¬Z®ž|<€O±Za„Îh‘¼RŒLœŠ¦FJ9ë›|#ÒóàH’×ß„½aoæ&§Ã‡‰vî ìKʦ~ öYú]Dä|Žu` ¬p®¸¼"yݪvkpÛƒ¦PÔ™¢‰úoRT0Ü\Áí4}7É3±v£íQY$_´U‘8•‚ês`ŠmÊ✂žáZç,Eªµçwâ½Ù¾'ôR!RCÉæ •Wš¡9lúaª~þ£MÂßFå”S¼'vJ>ÆC!Ø‘•ª2M"|¿z¤ÔÙ1‚ùÜ㋘™î³qÖºº:4í’5­êËj|èÛË@XÔÌèúã¸í s²›™/É!q+N.K1_x™‹K"óãEeÐFwì,œ_¿°Ãe=Ë™ÊÚ •o˜½Å³?áýn¿ý™j·¨ÛŒÀ&.BÜ÷ +å$&ÜÎë4#ªähÍj™xk±:~‰øÄO—)žäweüX´4ëjj,FŃRˆ±Íu?i–ÂÔô(-Éd³`«z®â~ æNÍ‹ÝOB…cÄØ}Cé°~‹ühÖW üboZšÍw´s¡©ïÅêÌŒÌN›V_lPÕ­2 LKàï~±šÖä!Ó“µ÷Ф–õŸpP ìóñððDa¥*ŸâH9@îÉh^XÏTKs•/á†]²J¼;ökKºhMÛqÌMéæ:øV·àsÿb™ì÷K9±3®A²ÎÝ]>õ È~Xü奞å÷Ìð~ɪڻÞÚãà·ˆ¶Fß…xžê}uy…YMˆ´“èAô<¼Ÿ0¥Q™évdWñ¤øbl¯£ ¾žõ'ýãËd^?¹wCnÈ8'ãlú‹òj°¯ÞPF YÖ [Ú”wpÇT¾K/Ù.Á,Áa®"*ýÕ{‹4Ò¤ÑïýÒ£+&ª;?¥ñš|6¤²~ΟUXSs:“g Í€cÉ š—¹g†B¸*~d¨ŒX ̪ó9)÷¹~ÂùŠ®où뉛ìyæ[R¹ì¾u-ËJ³£gv›0c\÷ ¶È}sÕ÷>Dä‡Áa“˜3-ÍÒ^î׈DZ½V1sÅåìBà¾Î/#-#üQÁ„¬¯ÐE}ÆìÇñXÒ!H2ï!ץ迵½Z½© ¡2ÖY.¦å^÷†7Ž– ¡Î§Š¿GáaÃÆ¦â‘^2ßÄàáÀ&¶Ý$Aë9ª^hu]Ëp’zúTÝë&â0!‡ÂÙ’û;ÞÊJX˜)à`ý[ÛwûéÅâK§ê0îÞKý]¹0ƒx4VdEÐm¶y„Hy«ËôÚEs/>áo¦=¹Šæíå~mƒÉ ßn5Ýn›k é6&;åM»ß„déžÝè£/:c¢ÉÓN¯ô—F[*L™Ú[¼¡ÉüÑ=”Ëád©˜¿W¢î”ÅÙ¹«H§)hZÌ™'°3ñ1QpxÕ‰OSûæ£0‹Ë†ùâÅ"ùšÐWàefü‘Ú¼oILƒ}~zW yœ“päÿd‹­>‘q@…EŒ@©dÆ.>`üÌ_og-Þ;Q·Ðj[0¹:©—«ÿE¦DðVÚ¢¹Î·Ë/›G¦Lî%_B¹dä$ƒÄ¦€ZÎCIÊ[õÌ$ ˜À5/ü¦q“Äö~Ósû¦Pcôòg¨Ÿ9Wƒ’Ü5;îÜ‚ü÷‰ìÍ!®ï΄[£Ü²ðè]+{ÛÇÒ·#ú½^ÆpºîE¡‚'åñ²?P¹Ö¹ñbñ©¼£¼ç´ñ(inˆ¿ÚålŸŽs{ì‡N$~%˜7ÿá•í¥$³•‘Ô5e†]bìà—ŽÈð­w" ¡ƒR€_zk0_"ÇÚûóñ_Ñžô¬ç %Áݬ;fÉ]ĺÙèiíAOS~ðê±ôઠ=i•“›DÓ¶Z)Üé ?º¹ì*}¥µæÖ3 ÛËþU$´_EC#y›á-U û<Â=–ð¢0Ï2‚ñçÁ+¯Ï'É; …Ž=ª±F’O8á¾Ý7ÎŽ¸”nÈþB䱆û÷=·FÕ« ´>‹¢îzUÓï{eÃå“$ÞcA–Ð'Æ×ì™ÔêýIƒieeÏ›­ "ÒLÖ->“„ãL¹-¼ëÓ¤ðÕ™·ùBjkͺbÌ®& ¤ˆù,T£EËt\^*yêúøý‚º“ÿb0…ñŽ'¥y&›…jo·—·w$7p b¾Ã¤`‚fýbá£v”^)6QèûµŽƒ×*Û/ŽÉÎ>³û¤©Ëº—Œú_Vøf©8!/³²>! ΰæéc`í·Ú×úøÐŠ™zkùÅ90õ QäøÖ ÜŽ@ëÂ`ß?l„B9GÐÜF© ¾0Êï÷É$Õ¼vî‰Âv¯Ã¢ëí•ÉZö×~ÏŸOv,¶Ò/´¶+x ÃèÀÞA$ëÿäv„ÜV%åH”®7oL ÖàÐ)ÀŽWY”is‡ˆ4{18¯ˆ‹“o Fék𞣬;7{D ÓÐnWÙòë%êÏN5Ï·¦ªh›s?ŠÉ- `a¸©¢¢éÕK¬¬íŠmh€&Ιêôv¿­”úŽø[æ)ÙNøÐÅx~÷5Y¥Ù~æÜPèæ‘ s¶‡UYÜj› 8¤8%w„帄óø…ôJàåGoñ;a¾³k¾ ~Ï!ÞK§-J‰øud[Ö°Zz*!ço¶j@å²óYI聯„$=Éþay[ÕIÙ€ õÒZ¿*Žð¼Í¤Ž ãw§Ô´Íº0«—æ×#>xŠÕë”Õã1ÊÝwÚæ—˜ïlË)è2udxtäSê„×åè›#?Þfñ"M_•L¾^Ë–ãRÉD0·šFѾ‘D5Ói—L–×ÒAІŽ$ ­ ˆ;žœÞèéflFÌ«‹Œi¢Ë;cù´=‘¸gГ״2oY›Ú¦.ÉTò[ÜæªT(¬Áßb|ÖbFº¨B.jNqq[¤òå#%V RgɳG›Ïø |aøöƒèkÇúŒÝÜ· ÂCçCè0“ËÅxµì .? ÷qT;#ñdŠ*–Ⱥ„˜ Å;î»ãÏ {½ôä”`CQ±Ôiœ2ë$±ÖòÚÊIJZ3YâÆùM¨‘%ÑûÍ[/u©‰âCžç4†ÑäÕï]x¥ôs# ¾é¥¨t¥ìU­ï£3~ç¢XoãqªBåÀíAÿò7ˆÁË]š/ƒêÍ}І™¦– ¹íºWÉj‰‚îŠçîÂÀÊ5;C2‘ u¨ éÛš¼ìñÜÿå¥&Rêž Y^£(t††Ø%¬Šþ áÅ”òÒð|U&Tpˆ’8§Gz±Údz8Á ×¼,æÔ…Eõ=L®¶£dnâŸ3Œv²Mmê‘~$ñUcƒT{»Å~ļK ûe ¹,þy"‘¼qí[¦eú^ò.$Φy¯ß guzÑ sÐÉÛÅ̦ïNÛgyb“|µü'qä5•ŸÍH\?: )B¿'ŽˆÌcŽ¿>\AkP¶[ o­ÑÙîPa0¬¸1a_Fߎ„?¾WD½Y±à(rÃ^ìì*©Ë†D©Ÿó‘ÊWF5îÇÖU$2µ³ªÊTüEN‰(y2kdz”O¦ö\i ÓyØ?òå׈îð)Â~cÜC¤zsªR6Ø«à“@ñ“ ȬÒoNÓ©YÜá.°4/eÆ—\&¿]¼žWEækOaµjí¢‚vÓý9Ý–r—;Ø §;¶Ý³í/ Õm)p(DœÜDé•¢Í&3M5Œ¸¨{ ªÉ®¥ÕëüÉ¥~ʬÄÙá®VqȧÈFÊ•d")D{Ç55ç.Ñ^ââ*žD½'–ƒ Ø-ý˜zxê‰ù­ÏW*WRê´U%&ÕÆ{"íø*ƒ.±){Ô-3 @®pÄ¿;HÏv<ïãd÷†@Ž4Xoò.¸¾+(Þr¦ë÷-µl·§1 áüÕ^k?ôîe™r]Àé…jêÛè–ßü޲ýºÄ,ÿn¨ôY­|lž2ºŒü{eï< ùŠCÜûy^-ôÊ©™»0©‡r’Õ¯1aüS/[Ÿ€°~ÒV±÷×<ô’.Ðòóþ‹Ðª=£uHÀrž‘'Êä1þL}¥äd´ë<8!ñ]ˆ3“ÍüqR}ñ1èyÝÜéµöSñ‘çϘè.ZúŽ2¶gñëakSØÄy™–]ª©sì9ýˆD¢È÷|î@DQèóžÄü¤h¶èo|Ò¢]ueU£éYóAL£D@ä ÏK~j¶ã³÷â‹æ¨LGø¬ðVƾ¬“ü…-½ê{!öæØ:¤¥ù F䮀F¨nW}„$ÁQݱÕU}<©i3Úìì6÷*Ào3Úç˜ú²’‘<#,Ý9}›…µ†4À‡+¡z†»D¸ˆ²é¢Æâ5­«uJè“€!é“Ï ¬KËZ"I}]Ä/l#§DO¡û–Vê²î“/¾=£ä[}è¨c·Ô*¹ËESÂÇ*OuâQ(¯“D¶‘Œ¬Ä# *yBj¬’U(µè­;˜›úïÿÖB¸oV¨£(é(fÙµGcGa£¼ôH3[¹Iïxdä¦ Ð8šgO–jpŠY'‚2ÜW̼mLÈn†³P©«ŒXJãN¸;¨:ºš˜§7ü¥Û<\AÖ ä&l7§íÉHÑö§Z’Ê9w¸Ö¿®’Í!PÅHÑNjº68’Y¾óñ3™!Œ/ª†?Ï©ùH”½òY6k¨{S5ƒ+Ã_ï„Ò$h -fùþJan‚l…{A˜Rkîr¬´ët/³tè¬côs¡+~Øí¿A2 Shú¨÷àüöªŠÑýá’7sþB/°€öùÇ9OeflBšý)&!· $’92 -j°ß¼•,üÃWlª`öô5 ®yqd–þ& f%˜‡djýwÖò_Ôß~"J@!2óDê|}nߣ$s'oÅf Ãs-3øDœµ+‡^‹*‚cAF¤D'â][L—LôÅr÷fùÁÓ§'….2M9’Õ;=ËN›ç²³YÕpK*ïwÓjíM³Æ²aFó¹8KMUv „åI¤á¶þy†£ücQU9õ}‚Ì€ü"R’kõ•$&‹-9vxY‹yô´¥™+„™ásÃâ.Õ­hÃ?(†Çè®\èlÚ! íKž(qÇWîˆñ‘(Xѩէ”NÍÖ[“£Jt~²"ùV•Ðæ(Ú­eöt é{ÜÄò6Ì¥¸Œ=µÜ¢¯ZŠè+-*‡‹€c +wYˆ}</>μì(a9c~§'/E¯ôŽ2 ü$ÿ¢¹ÓÑ¢^VTŽU 2'Ì•®˜nF÷Ï_À…ã¬ýoð«35¥á çR<9¬v_ úu-Š|­M\6‡Xh:¦H¼Ø%QùüÌËáÿ“ƒŒÖÕe¿=¢xç+C4ÉŽ’ãQQºµ€ »d“U5ýØ1ƒ•YÝÈ„D¡v¯Y<äVG yê’d»ÆLóOÌB#_Ãh&½®EB]ö~+È(OÒæ•r £ØìÚ$ã¶Ý%ËBçŠ|W;D^pÂÌÔ&Zïè4¢ØEÀv/}>ñ¬BÐǃ§\'uÊš>ZÞV47H_`zsr^ ~¹Ýij-*¼d3‹­z~p ìÉÆ(Æ‘×Æ–õ—ÂËbøÎÿþì»twCR7K&(Ÿy è˜7¥;]ÓZPû‹Ìú'›DB6½ þb–(zˆqáì–h£]ãŒ5Bn·A—WŒò“ôh]SlwEŠ¥! º Ã6kðÁß8‚‚äQ¥K£zá±àƒÅ×[æµÙGpbUµY¶{ÄOÑãúî ãk®p“ëo„'µ¿‡Ü}Sè]óÀj+6Šæ¬ü²¥z?ÓÜw—ë9+qñöE‘Rî œÃ âWô ÷îW ~QQÌ'/ƒÅδ"ù!“.üï#/þ(ûÓŠá+ z2 ¾n•Ý&2?´v+)½f±[ ï`ßš`XP7 • cÿ&˜¸f»‘Ô*¶Å§7)‚°,EÀQ™‡Z½bœÄáHÔè9ÀÌV¶=îSPµÛA²+—EûѼ›œ›©%²x‡ý‚[½~ÝTªNÌ$ƒv¾èœþ]ƒo“C]¿éç«btÓJšíÞ>9[ÉXê ÂYíåh‚Þ§ŠfùIÿOqåÚ4¸ @š°Ýºk¾g{¤+äÍ6 ‚5/Ú#³HœÀŸá£³hü×Ð9½¹V˜—¼Føi5mi¼IÄo «,øüÒ^5!´<ÛäæA¡Ö—ֆéqæD=bérúªîfQfZ*ÛÐäMÙ°´ñ.œ— Ä{åÖy¤IVG ¬ŠmYÍiòv¬Äˆç+“ë§ÁŒFb>‹µZ*r˜š=¬Ý¦¼‘à\@AÕM®EÃröÔV£Î§6½Å[ɤ‹¬,$F›á•w=¿¸Å±Š÷®Ƀ/!”®ÅJeA½./0GFŽƒ­%bhŒ6é™õTx:‚Uœˆö40ñÐs=ùc¤"£±87:—¢P–Š^ GgÙ?Ô|ïÛKÔÐðí1§­ÿBõ¦+‘8W°;ÖUåKoN65šOåiÄ<]ˆ²çl’üŠŒÁ¦VMŽ ”†3±núðœqÙz+Ä^ÍŸkÙ²(þ¤?«ÔDÑàZ`3'Ÿaïi<‰ÕÒ÷>×× %ß+À¢77ÔV¢by¥9¦õ\å÷†¡ü:'úGÛ#cÀ÷ÏUbÔ|¨šLb¬Ë·CUå ë‚ªA'+vå!uàúVæj‰V‚Âe“žâR"+>×.0_×po –ð±­Y2±[ ‘m&Í÷c›zæÝ޽ñ[aS†‹û’hPÕØ™Sµãû[A˜Rï忇wû2Ö¶Ò8tž·Ô<©9…öôìz¥¿UþF†þh€UµûZöQGqÌ`s¾UÖGhÔoAgMAÙ‘ ¾ªàcr*éŇMKÂXD.øÛ;ŽŒ‰õú:ëXJƒçfè˜LÅz;¡¥Æ£0žžŒêíJE äzÞH¸÷ƒ»‘<_¶å¢–ºXÑÞ/W*îG¡dâœð´ ÞRà5šhÜ *R—r͉Ó³âŶàg3Ð5¢z:ô ˜Át¾Ú©àëÜS²?ËæÏj¸N]øF\nù˜HðT ÂPô7t°ØX>ôÀ@~^ŽF]ÑAìDÀÈé mak›+D¸&ÂÝ "ãµÞ’Õ… þ óxëSdöê/2 ÝëDçnm2æ†U»|W5ÉeúɺUP¬[>é1¨?Í¿)®^`B>{Ö8F ‹j[Cùík.íÔ.²Ë-ÕÙîà¼v¶Ý‡b½ƒÓÑùÐRÖÌúâ7Cü®Ë©gºíkÚi¤ô°uQd> »BÐnù¼HÔpOê UØäCQ‡÷fa÷½ ²;åõ÷ÂÜOB­/pé€ïžÝW…ƒ÷)a}4{ÉgXÈ,Ý’ gïŠÊozÕšæÞ‘ÓÒ ¾2jfxÔžá!•§åYSRSéw­uG!q^•¡5`ØU2P’2‹º†²(u&±Õ½¡øîŒí/ÂtôÃÀò{¬À5Xé75þI|ó~N"6†×j@-U¥Þr†YÈOaäM—m¥9ú+mQ§ò/|QI©Ê¿?xš»% z³>}f5Yxÿà ·vsc{‚ãÇÁnô‹[s×ãWÑÆÈë[†ª*\vUˆŸË€Ï"Pmg½õÙW”Êhì¹eóðÒéþÄÎèÛ3*)͘<ûÜÌ”SŒE£öþo˜î;较9 .ayÖ@¶Ò‘­õhMC¶87N^”Ë>F„>qùNùßL˜"Š.IÞˆ/òÖï5„—‚ùL³­wyÕïÒ¾O <ϲ۵Ã@˜ÂðÌpŸX… ÒÎ{šy*>ËyAÿøˆIb=õAÞÑ$v~ÝLÅ@â—–ú’]EWtIÚ6=Ú§p¬¨_O¼+,}Ž¢ì|l¦°ù^ªñby^Ö‡}_îZ¬ûØ«õL•Ó4àØ¼$'Eƒ ·•M[Ú÷AÄárš\CæËÛæ:SJ9³„ iÚ0¤ Ðæ´˜Ï^0®àÈ>ƒåü3ó{¿Ð*n³ØÍ (¹ÞÈTÓ ØSU¯˜¼{6œ”SF£Ú{b œ?RJÒ 2úëÒ u¹BœÚâŽü.²'ßVrC}}~r`'.mŠ'E÷å¼ìR‘gK¸Ù¾´  ̓3ôJ-b²&õJïIø˜gRx4ä÷ÿ8=뇹Üåt¤í“âT-æ¢õÕñÒM1)îîåùm"$•ÂOœ…}O±U×­0a™£rû;‡³•8 ó³ƒx¸Rs{ÅáÆi›ëÉFHW¿kù»…rÃK'Ô‹ÈÛíÄ£[êΓòÅ¥ZÌ$´¸dÿ3èü~íÒJJ¥ìBtÕG5ÄL„CÔãà…H[»L©1\&“aŸY¦«Ûe‹ö!FÓJuä ô¬üs…©|ŽÒšÖ¤ñ{"¶ÿ‡¤ÿ‚ÿOL`n4uqst0u±Cú?%]öŸendstream endobj 34 0 obj << /Type /Font /Subtype /Type1 /Encoding 199 0 R /FirstChar 44 /LastChar 122 /Widths 200 0 R /BaseFont /BGQGIF+CMBX10 /FontDescriptor 32 0 R >> endobj 32 0 obj << /Ascent 694 /CapHeight 686 /Descent -194 /FontName /BGQGIF+CMBX10 /ItalicAngle 0 /StemV 114 /XHeight 444 /FontBBox [-301 -250 1164 946] /Flags 4 /CharSet (/comma/hyphen/period/zero/one/two/three/four/five/six/seven/eight/nine/A/B/C/D/E/F/G/H/I/K/L/M/N/P/Q/R/S/T/U/V/Z/a/b/c/d/e/f/g/h/i/k/l/m/n/o/p/r/s/t/u/v/w/x/y/z) /FontFile 33 0 R >> endobj 200 0 obj [319 383 319 0 575 575 575 575 575 575 575 575 575 575 0 0 0 0 0 0 0 869 818 831 882 756 724 904 900 436 0 901 692 1092 900 0 786 864 862 639 800 885 869 0 0 0 703 0 0 0 0 0 0 559 639 511 639 527 351 575 639 319 0 607 319 958 639 575 639 0 474 454 447 639 607 831 607 607 511 ] endobj 199 0 obj << /Type /Encoding /Differences [ 0 /.notdef 44/comma/hyphen/period 47/.notdef 48/zero/one/two/three/four/five/six/seven/eight/nine 58/.notdef 65/A/B/C/D/E/F/G/H/I 74/.notdef 75/K/L/M/N 79/.notdef 80/P/Q/R/S/T/U/V 87/.notdef 90/Z 91/.notdef 97/a/b/c/d/e/f/g/h/i 106/.notdef 107/k/l/m/n/o/p 113/.notdef 114/r/s/t/u/v/w/x/y/z 123/.notdef] >> endobj 27 0 obj << /Length1 1946 /Length2 12885 /Length3 532 /Length 13965 /Filter /FlateDecode >> stream xÚí¶UXœÍÖ°‰»»Ó¸»\ÜÝhÜ‚÷àÁ ÜCpÁÝÝ‚gúÝûû6ù÷ÎÍ5À÷ªUµîgUu=MC¡¢Î"nîh zïèàÆÂÁÊ!TÔÐà`p°²³K ÑÐHº€€nÖŽR@7 €C@€ îäàäp° rs‚ÿh’ŽNÞ.Ö–VnzI†’øâö k3 @èf²¯a´¨;šYƒÜ¼Yâvvµf¸Ô@® 9+ÀÜÚÌ ` ²´v@bûÇIÖÁÂÀ÷ï°¹»Óÿy€\\ÁRúi2À’æŽvÞs›’#¸ìòÿ†Ö/þÞÝÎN hÿÏòÿêÔÿmhomçý?ŽöNîn €¢£9ÈÅá¿SµAÿ–S™[»Ûÿ÷¨¬ÐÎÚLÜÁÒ`ÿwÈÚõ½µÈ\ÅÚÍÌ àæâúWä`þßàÎýË€MQR^[WŽé6õ_ƒ*@k7 o§ÿ¬úOö¿˜ãÁíq±ö賃ûËNÿþï†ÿULÚÁÌÑÜÚÁÀÉà º¸½‘ÀÇL<€ksäfcuptO€{â°ptAúgG¹¸lÎîŽn sS»Fþä°9¸Û›þ³Í–oa~ÐÞ ‚Ÿù?QÿYáß{ý¿anv›Ðä`²ø+Êñ?ÑÿJæ¯ìúÏ®¶oA°š™£½=ð-ö²òv²½9qó‚Ïs|âæ°¹Ú]­Þ"`q‹ã[ììèúó€eÝ<߯yÀšnV. ¿2À†Žî.o.pÀÚ㯠°®+xÓþÃ`YWÇ_®à býÏÍVu°þ[„ÿŸg¶s|›Ä V9»ßv‡—럆ƒ\ÿ¹Þ‚àêÀ·…yÁÅÅß\XâÀE%ß\QêÀ‘þñÛòþÀ&ÞÜ™7;ɾ¸ºü«+¼¸ºâ«+½¸ºòˆ\]åÀÕUß\]íÀÕÕßÜ 7»h¾ØEëÀ.ÚovÑù €«ë½8óí €3Mßlmöâ`O4ÿ ÿÙ¿¿ðŸsô‚Í-ÿB°ºÕ_v·þ Á¶!ØÂî/kØ¿!ø¾asø ÁŽ!XÃé/üçHý…` —¿ðŸ“ü‚{èö‚­ÜÿB°•Ç_¶ò|CN°•×_¶òþ ÁV>!XÃø—øµ¾&̬­Ý¬íÌÿÝÔÿûÍ+!áèõ‘…À ¾Æø¸8ÀçˆÝïÿH3swß@nÿz«oïÿe kðUyÌçÍޅؤ5…•ùKN”Ã2BJX6Ç+ÕwMÿ@^H€´+’wf\«Óy¨ÈÄFÛƒÝ#÷|"vj÷U}èŸ>û²ça²—éÓJ¬s™©è½ëDù'øý{ÓÌõ ¤òÔÚpY²nQÎÅÏã|z)}ø ˆn}†î¬^>÷™všá¡u´\jéÎeÜ᫞ɉ0«ãÁþ6á-LË¿Ûݱ ò’^Œ!îÍࢃ8%¾,4Û‡g²ò¯HYÞ¯Æ êK:ÙhL³ªº3ª¢gÚ¿ÿYêœØTo«íö†˜yÚB4e%ÜP ‡ŸpÕ>…¯xªs,(Ý\©ˆaë ý-X&”ó«ô5~²h–ò)ˆ)X¯(®´Óç•T~,˜¬£)?u¢Ã:¨Ù0,´ÑÃ\¹*t)Õ­t‚¦ç+×G•xGÙ.¡ý‚UÍuÝy”€ðŠ9ž†£8wâTÐ×½sEŸL³ÃCóH†×åO?“sK.aë'~dƒ²jF§žR ]~ˆ°9ÈŸ;ÉHq`áý%€º,ùk>Z•'¢%b*fù”û¢n1© Òã;Œ©ÊØî|€ü‹Äüv™c†W;!Ÿ÷¢0Šd®w¯òý•÷0f.™~GÑ:·9 ƒŒiÂÌæiù µgÖÞâÂ¾Žš_šžrøèBäð¨T ŽÎ¡k‘1 Õ«Û v™MÎï±Øq®:Ìé˜Wr-nè±äg՛Ǖ Í;VQ]ÜUIClm(û#’¼âüÀ:iû4´!j˜’ò ®)WÆ/áýEW¹¥%Ón̹Y‘B}í݉Æ'dÀ'åòç§Ùi4Üû6våRoÖö®Í2·úü'Aa|(‚yjéOå¨? ÝÈ‹_c:MhFè¤å;Ò†ä㳃‹-\RÂÕNå¼7ÃL<”qÂ.¾<äz„^Þ@ùêyª…×£¬×”µ±ö'¸dˆÔ œ˜¾j0;ÃQ¸BdzÓ/´þ|§™ZÖÀĵ¿H™,ÇïÌ-±¬‰ÄTu¨Ôð™1Q* 5§— `õÐÓߺÃAˆ½nYÚø/B{Þn±:gŸnΜæO@½âr,r|ºµÁ>°úµùŠOùrÛw“HœÉG²9”ÄýHŒáÜñÓt—Úä0*u¶Nô!Nq@þѤ³°­¯R!|½ðÜÉyjsÕÐi9=ØžCÍyI8QkÑñ%6:ïÚ‹ØMÅÌÊ,O¬& áq$‹|¢”bl=Cá—OmæI¸ôMIµ$ å¾Ðë°t KÓg«vQ—mj¿ ‡ëæ —k?ȰxÌÀQðÿ6Õ¬%íuæÕèÙßV2²HNÖ( ͸sQzíãÁû Ö fݳ«q 7þšwXÍjìLžôdóÝ:ý%1°_’>ƒ/F-H9Të®ñ{e[1º[X̹ôÁ'ß·u&çiŸ[Ü@Y¿pxK±’*Â\ÁÎ9ÕºdI8|GŸ&P‚†Ï©ÛZ¤‹{´F4y‚z-v­.-„\xÝ¡¸aßö«$ߦ5j*HGçôİŸÅVwB $©§tÆ<„þ­®\ïËZ÷ðAÖWr¡kÒ¬¬Ëß6ÔmZªÎ°.àÕâÍsªúˆ“]P1<Š ®¶üR}}Nô;±Fœ'1þ!\ü‡ ï!©?¡³€lÐéܯM§‰Øš"âa™_Ÿ·óì¡Ú] ÅrIæl)Y¨”+]!r¿&Þ•ú3ú’ÿ‚þfQÓçK‰ªÄ,¡™!¿ ‹Ž”õž " ¦pÐ>téÒ\#ô¤AíÇ‡Û ·¹×(ã£Ê6ÕÆï?°›‹ÄÑ/(êÿø½k/®ùLÜþKœ§ôJœµ¤é—¼0A³’j¡Ý Çá{?|· áÁóݺ/qCdGE³S¤*íÛ?ª6û‚Ó±8õ'4ž@µškþkßW‡¡²¹²ŽpýGîûä“}žÚb¶P ZflB1¾zæ|ª5.0Yê«®¯X l’+âtU+¾žçŠUfÛ’6È­m|ÎGF»Ô^bû-Ôâî<ªAÝf]—aÞ =ä]?bR…š1žÕ5ÃAc2]Æ©ïCºìÖG ¢õA¹µAÒ'o_j®ßj .S™ë6§™ÚHV•‘Ü;H.ìy;eÑĽrpr§ÍecÁ03ö=o½{ÑeUW€WñÏC˜‰>§NàíKkbw’ê¦Å w‰ûEÏ/vK‹ú‚”0*1:å1±°{áôô M/CÚf†P‡‹•›Á>9»š@wvBa6=éÂÀK-NoÌ8û¢ 8Ü$}·Æ¨ëæóe^oÝn¸h°qfÙ{@Àºù$§|Phì99 s„øËà[¸Í^“À›)™aßGgbÑ<ë?FÏ”Õ!Ø”OŒ¢ÈlnÒª1ŠJ2ŒÖY™¹ÛðOl)ïiXñÑÚwßH8Ó ±TV2XbLžãŸ¯µêÏ …÷}æ<šÓÞ'zZCãÖíp÷gÁo'ÅžõêÖ^Ébï¡É2–­T¶žÎ†U:Éú¶‰6‰| ™ÝÞÅKyÚ¾ë9N&þ.Öò¾›QÑ Õ€oþÒ<ûòÒ9޲‹Irt}ªšž—yji¿‘à³ÜO‹É–g8é”ÄèB˜Šï±K(1?íú~j §7 ˜N.(|P‚d°8ü8=×܉ⶎY˜«Ìšl?gYò ê„Á_B1•íCrˆØ°¶àºyÝÍIjGÈ—‚kö4›ùåÁæîXÊñàÈÉoÁ±´ ÉKRé.dì¦ g‰Ë¸#§ÿ½ Pîó|Û*q[¼lǽŒ$“ØÇT•îaÁg¨> 㦊©P6ü¾“ùéH%fx69ˆSï9Ëoœªjl?ž]™ 2ñ}£[  »Y®Z!¬j~.©Üo…p‚ky^6qs%Açû,¿Ô¤)G-hÎÕ£ Ú­Ül+?ø×s ä2lŽG ¥”m¾äKÄ3Ç–'q#é¼>ÒÀl[P9á¯ÄŒCšÔŠCL`ú(­/kM…¿aÓ'¿£ÒÉ]ÚÊx€Ã™;± Æ:ùÚ€åÏéÓ.èÖÊ”øã¶¾ Ùjxÿ]6ýµwøÇ ì2nÿµøaHùô ëþ¸W:4p+Ü]ã×B".‹ê\H¥7w‹üÕj{ñdP^^‡T¿ÅsÈìx*Ü–~“ƒ–rT…‹\@”ÂRÑדhç³½_¥†¥wÚï Ûâ_?["ìòÒÁz+Õc¡]hì áR-ÏèÁÎîåªôD ¡F 5*h‚÷c¤7TS`õŸüNI½¿;'Ÿó~ÓlÉ uËÚ€ä*sJ¶»4´mŠ–y–ý‹ƒ”ØUn+ÌJ’™"Tënö‹g ì°\Â¥&×=2ÛG¨lœ“OçYK¡©°JÆ(¿yv‚oß•Ç8‘óc·¹öœÙ ¢NÌJT–̬–®§Ç#JŸf(ö¢îóT†Éš tÞß1Lë„ ~ñêx™´©KØèHÃÌ:ÂP­3,T<ÓrÓÑ]Ê;æ`òm:ÓÀ^1WmÖÔöôîV±Q<&Ýçdîžâ\øžV—ò›òįïC–´„pè~ü+Û¥ n«rý¤•GF”„·{»£osníù©ùnëyöW˜_Ó²óÅí4/ÑMÒÜ_T»’’[~œß`z³@•¹@¬ :š®:<é*Ûu£lve|Lãµä7;+†ò)!ß /[®Ô8ËÌÁ=YðóÁäØ9ªÈÈ[¾¾ájé«Õ¿yNM0LÆN$g*§ÛPƒå<¯ÚZÑ[}H0C[K1±}εóåj’*Ò…¨däKÅvFh’#eJçb)À(~RéDp™·Ì×™Âgô²òzR«BNpœò þ¡¨õ‚¿;cx§¥0·ªÞTÁl–lµP—ÉÞ¨øst»a»”ÙÉuTW:gå ©îQâóõµ²w­7þª¼Pû\“}Ëàôtlê BøÕw¥Á/ø­Ç‚B!)›] ¿S^yǔך( gXWHWŸ]‡zoGà¶„®YuÿÆÑ%"õüÒ’ŠNm£ ·…˜úªÃ;,$5.cúËþÆB&ºY>ƒo$zõL6z¦YÛ3GÒY—`Õý˜MÉNÅBœ°b<â/1)´/Kㆷ«W²÷ˆÂ©¾Ò÷ð “ Zp¼ÿSjd^še›Ó–Ÿ¨R›r"C„Þ`Í¢ºÝ¸w[[Žý öìÔBÉX¥…6µ5V^%I5µé±â¤¥òaâw¤›çgfúí­Œ5TŒm½_Í4ð—Ÿž_†¿êá–ZÙþ¾².{ÌÌEÄ-"ÿ©<üu”WÊe,(ãà;,–ØžmÚ¥Òd@]É}¹“˜¾Î~t¼é‹ÄÓ?| AMcj»ä±h#Qš1õ1`ñÑÙ›®ì¹XÙ¯[S Õ/žÂÿG\²Èe¤$[þÊzrÓ^ð nâÓàHY»?–}D\^¬s`ˆ’:EYÕž-*iYL® È’ãϤ<‡“2S›¼FÍlAr’ <ãÏr O3ºRL“üäs­»dI¥i©}ÆýÏ ‡u)—£Wkå/ab÷ÙÄîMIïiµLq’cæ¤è~¹ü Ð1o¶ùU"!)˜ËðÀŒCÒÐY¼¼Ë(#þF¸üã2»Eêäù¨­oT럞z1õU¿h®ñÿ÷âBD§B$;¿Lòn•©ß=®Ò9kµ@úݤXªa,h¡ßŒG¢Ì“‘›Û -ƨ긭u1È?(爗*ÑšXw¥¶ áßâŒÒ'w>Õ|GƒÈ.Y3§uávý¤g%XéöTmò^®C÷Ú §Ÿ3úÙ›•%¶C­Úžy«…ÿúÇ¢pøeŽ«£áÇeu²ÑŒZŠÒRìD<‘»|Ÿ€ß 0ÛhzUGc ŠßUƒÕh"yN5£…ø õwã÷hSD Ï-:œÕ,¦3^Ô%NÈ2 HheÝ×íÍÎâfsóí¦¯ Ô~^ZÜé®öëEgj @Qi>§”=Õ¤ø«|>?ø ëcfcQgRisy:O²;-à[êFhÚ¹°ø5%ü´•@`‹±9ŸIBÍÙ’ŸàËahLÖšV[žmOì@‹©™»kåN4wÛTýcl56ÜÄxÂHy«Ô¦3®ËÂàÐ Mri9~ëŒÀ#‰ñ_ ÌÇ—=QÑC~ÞtâUÀñÛ±vuâz\;Ûnk-”1îhïðßNCƒIÛΧÆ²¬Œ§ïý&„åtè}’÷y§ cÊà°Ÿ„ÀÉŒ9Y^oaÆV;ê×Ñ]ö†ŸݶÝ?aî1ªÎM˜6÷*]áf‚ò3P¯Öž{Ú¼‡úxê!ÀôþRM¢KÑ©3Õàc“¾cžžñš8±†ê%z8p&ÅOõnkŽJµtw™ÎœÅEš­N[ÓŠjj¤ÕfŠ×-1ÞbÝ/:£@ää{=íãñúvÜì´Ô‰2ò†}öú>I*³pg’9uÉÕc€BýÄbDZ·!¡yÚApˆ1ÆàW=2¨¢ÎŸ!¤\Z)¼hŠ’7¢MPÚ%¼¬šQÛZB¿›ub,š®ž56¡Ó¢ƒTJ»ÆžÅì9\ãÛœGHå ß”(ä9“Úôþ (Bf?$¿/BÍÖç"W†ÑŽòüÉ'yR‡R/µðkj…B^-< ;õIÖõauõ®š2yQÓÂgD3.Éô¼ %"`‰Côó9ÆpR¸«Í'e¥~ÝùULç°¯8É·—Ó¯\±•²Ö–*“4¸½3&I€9OT´ßìaêÂd°Áí1ñîä_f»~`»å,®ÛG”Ò®èf²åÆ1e[Õ¶€Ö'ÅnM“ºs™§¨ 3Õ'â—ªÖ}"ôF?Häñ¶¬Kõnj¯™ã‹^{—sÍê}v5§TmeüR¼ÑP¤fÞÌù¥5<ÔSd*qd¤ ¢…ó‡­«0HPd€ÇN_†ò2–¬&„B—$ÞHmÔÛl'WÈÙþ! Ð ÕLÿôö‘¼OùÝAZ!::i ò—ì\ÝsyÌ$þ¥³¦p“Ýâj„Yfï{É-üÓ©õ·˜¹îTvlBƨ%P=9浊à‘h#Í\á—;|o¨³4…Q/,<òIÓ£7“ûN”ú6yŒ–/ýÆêVqÿJ+Bâ7¥Üy2ªøx挣ÙÔã è…ö¶}øÖÑÕ{½üÍ÷ÊωdëÔávDW6ËÙû¿C\qÅ;iž”¸;ƒ ‘üõ‹ó(T\L¹¼Â 7–ãéixJ]ñ‚Dz±¡2íõe"?+bÿI`6ä'I Z<Ë‚?îh¬;"Æs„ Ô$Wá£!":Ù·ñAáöÞC® ðÏ~w_s±ÉÂøXaõˆòò§ueˆky…7zÆFr®’Y[sòèµy\é €G^N .S LüЭÉÁ»qÞ¾ÚªãDQΘÈ}å'È0‡öê%g¨•ŠS79êc;'û¸Ø¿PVýw„ËKu•fãK[ÒÞ”®Ó6S9Æ+P1>ã·  Ÿøv|ÅÃ/ v¹‚Õàyô&I·X"¢–ÝI‰ÖmÍ`¸ªjLó»¥üî“J¸E»Ö‡ôŒ{„Ít>îaï«[í;£ê¸wónKŸ ,“͸t!nHá‡,ŠìѾ6ɸ®Jø/læîCR ×o¥e,/ñƒ~ïÏØÂ…MÎQï*óÓ6Z>YŽb¸po”ÝnR†?%ÿô¦åÚüY³s•¥$¼ŸÚSñF•¼r©>«’[;óúÀwÇ:¶ª‘-ûÃ倄}Ž> mް=?r›ÞI"G´M9Ì[Kí˜Ã«'¼™“LB|Ư“²û‘-pa¨s­õõ¼Î6#ˆìö$lOvø«S0Û®‹p„Ì}ç©9ÒÙ‚QOvRªFÇ7G0l'¦'½‡½ÎÕëÓÅW…˜—©c{G,JUgÖ‹T‡^›Ü ‚4{Dícµïì©]Þo"„¶^k› °ñl= <öËÍ{ù‡» ¢ò÷}öRh üm9å Ò+NtqÈŽš¿¹ñÑX€+öN‹–&èñ?òÈïþcjS†ù…ù‹Ÿ Ö µ¬.µºÂ‰Oªw€úzTý>ò‡@g»Mô‚Ä,ÈÇ“€*Tv™&Ê{"Ð šÞ° › ”Ut’Ìíé®õ&ãSÖ}oGÅqà2¶.kviœz¼z»¢0Ö^B²=,&SˆN³ŒÌßcöC4TÄ6眎5°”Ç~sHE7³«•ί}xþüë¯ÉÇm¯bÕÑÙ?Ù³,¬qiï± ~ëë9‡õ[5 ¿Ã„ºt A_Y²ÏÍfL áóŸž<ÙlíQŽK…@(9ù:û{Mº®©BF:šüÚ=T«FUw¼·E£áSÍ%|ÒÍmú;Ô•cë߯.-ØŠù"›ÀKø\¿ƒ %G–Nn÷;¨n¦'äÏZÇéb¯MgL¹Ší)EóÄZzL]ª'ˆ¶)þöhÎï6 Q$I ù#}víšúpÈ|je²ýb‚ëõì:º`ŽjOVk5•ÖX¸ ç×)"Ñ+´¶Ëlu›oµÅ{Ô›™ƒ¹N/¦[KFÔD6†2Äíôxò˜æŸžX¯ƒ W÷þÂPWOVl)ê€4,¤]/ÍOHÖK&FÂl¬ºQ†ÝDÒׂRR"±Èi IßWBÒ;RzM¢Æ$›:\y‡>öáñ±FeÒÕ)ä—{@U¯ÿÌZwf ­OµEñ^±¼áŠÁ:Ú3æ}hi¶R¬¡?x…ÕP¸ït@^ZìE§ùèN2˜í|$­kÞUä:4£bwaÇNuÛÏUeË—ã¸§ÉžŽŒçÄ—Ä;3âC ËÀSL_Vr±åÕ ’®1"““õ]³j«Y¿fÃb¸À„1‹iÈj ÞÔJqˆš´h»Äd/¨yÒ/:›r„C~D c“Û )†ÀÍ ssgîYFO®w/1£~ëvtËry±ª÷š”DÖÊÙ©k¬>Wƒ §µZÐXXóÛCgGÎP6¥÷%¬üKCd¢Å¡vZ';ÄGm{úùÑßr+ÜÅÞב[Ÿ O¶õ¨A ý–Ù‚•žø‰€»˜_:«t)>?î‰ ­©°¯«¦B~+4„±¶)âŠD©õpeCÿBÇèІÌ÷öúJÿ#œòþ~ø©¨£ŽÊ¢¬ã/ÐÇöƒ¤qÄdD–®„=O­G«ªyþëÚô´€ƒT1Âé4§Ôµ™+¹}ö _§÷Å`KéõŽJŠfÿ@&©ï —ª¢¨É?u~|ynˆI“Q¼µN”2ä{ûsWG!läas±Ta à±»¹‹±cHA¥bÿ‰V+í+ô¨ ù}âî ý–‚DøçÈŸ‹@;J/¢ö?a¢Â"py@C¯!% â—¢GqÂl§iÈ7QþûQ-Jv¶¦×¥€MíHLøtKwöÔåOÓÐêÌѯä<嘘Ô*z¦>t°ø‘yyk¥ROµ ‰}>ÍÀÜ!þDì˜ ¯ûmXÔ÷©Â{Î%Æ33³ZrÕ©¼Ê‘OLÏàíЉˆÒ놀S–5xáh!(Ã'~CËÖ#Û­ ÞMTÏ«uàŠ#_.Çá.ù@Ãm CÚÕÔ‚»€n4Ð2ÍRá{µ‚Çã÷YŠ©Z­\5± Ű®Çˆ²úÁœ0ÎÂaü@·ðRbÌ)yI|„õñƒØQ.Äâ¯K—4nEùftJå€GÆEÖÞ‰sZØ!cu”¦¤r«µ^0›í1³ä¢÷_–>¤@Ã5²ÛÛùÓ›ÆÃäáÔV>%Ï[mY4€t!ëœ?ñ©¾pf¹É-Ù‰fTž{Žmåc ªRn$³3h)¶—Ç:êy¼XWkííº ó$£ÃàjÈÍ I6½!:xÚ¿‘>ôiCŸ¦Þ.Fi£9E "ˆ  Æ 73¤Ot![9Ó”NóR˜ºkE—ÄX;ñX¢5ý1Â}°%íìZ¶—Uœ„1ÑKìß|’’ÿhDë$¾eî?=v&yéµ|Gè“ÀæãHæüMÏçë0æ/´Á‹>Œú å…† rAzö 4t•¾òqäÝZ5JF¸³JéÄ•Òí#ŒÔ”2{ï®ñ³‹_×ð“ÒûÞÒ§ºÏÆ{FÓ‹[÷7·öV{ó—ét„Nçà¦ÊØëŒP_± l73)ÝÛërh{­3êc°¸û€T¤Go4ͼ| ’SöÃÚjὑx±UMçYÊ?~¹¨YxŽ©aIÕ–£©T!•ÿÈîÒiOFê¡mFºÎÙC‚Ç…¥]Ž-Ä þ·ŽB’ìY•Õÿä€P۟΃d¯x×ùÎê„}™ÝbçUžåÒ]Lj4ÃÔ”8Oë€À7ûóÒi¸Õ§>_mZêU½¯g¥ÆØ»Ýb)öÐOeÃÍʯ2 —õ*vø@¯/.EŠz3Ϭ,.KJîúð.M%bŽÎ_%Ãm‘ÌUœ—ùMI_ØD¾iÏ8öXõ'îm§ö@HÆ<4¾â\4]´”yÍÂÂò$ü¤‰u–v›ýÓXH-3L^÷ŠÍøƒXùù˜/=}oUpqÑ~kFÊSéà"»+' 6Ýã $ºº ¨¸§ôœ+4ú™Œº—uÔáׯ»õYª¼f½í˜#Ê®)¼=£Ç´°€ £úÃgÎ^þ^(l5á~Ñ=x ÍL—ì¯`Ô^U/ …„ ÷ì´W–cGQyŸnµbHÅ÷·@’cPvšÚ)¬XéÎVÐ)FCË6IæÖ*Ò©gÏÏôs›[ƒtª=ÌVHmX «‹öA´/JéÝå½®®©Ð –[ÅfObó`]9«ñ5ôl~«Y`KǶ£Áò· ?L:W›cD܉¤þ·ÀAÏ¡Ûà§r ,ò(kKâIù4eê?¨ß>/ùÈYa¡6à…Q%êÏ_;»!“ Áo½ãÿYºÒ9_ªñˆªÅÚÞΨpfÚ{à+§¥”~œÉ¾A…6¸É SéðÕ¥f5½)âR6šUì†$æ"¹ŽÏüJ°N?{Õûø…xqE—¢î,ÕÅÉÅ`–(<Wa°Ê{=ÒPÚV¶UŒÖ yl-âFxKÎäU…3Ê Ÿ´ÓnÄKd%ƒõýÊÃÓò¢ÙkÉó¥OŽdS‹ÌýÊÑq¨‰JõãIñäI 6â^@ÚÁÓöp"‡  ÛNù Öjõô·’s‚’šŠ ˜0îóp¯¥t‘Zou¤)½Í{Gä‡ðp˜i;¾±pmUÈéµÛ`žWgý²ïhùøýkÆ–…Ø÷Þh$*MÆÙ#ºŠ‘ž¥2®ï'_‰Úëˆéf¼šž›D;º%oMáž_³nî›U4(ƒ¨Ü§Œƒ¥%ä¼xɦ˜3jðð‡I$¹W)rH+¡$õO[´â˜Bú ÈÌé•Z¬Ëú ŒM®ÒºÉ^²©ÖÃèVÊr†”¯QÕpº¿’>\™¡QYžFn­^sXôˆ9ÇØ)E0t`U™È–{¡@X¸ø|óÁa¶ü©nK&L™ËüMFøáScÞ|ʸþDŸª^ô8‰…}¹ªBãphG•2«GƒV?lå«`ûÄÍ31³ûýt€ ¥îäWÜ­=Cáç}¾džäPÒ-Ýf£ÇÊ"äJ%joƒrXÒ3·8‘æuŽzb5^·Yƒ`ð"ÒíZJ P˜ÌìݳßK¨Ë¬é¡²¶²ìê9(Ìçý F~­–Tn®ã ô•œ ôûS½/Gš’°ÊJù¼ ?(©í‹©…}û}áÆRºÀNÕž|ÄຉN¸.€|AAú.К‘j„WiÙ™GÊSÔÅ¿‘”ý]ÞãIËÚ¾JÔ÷ ì·ìúhP‘³@ 7µQCšà´K°ŸqNMaÊï% N¸jh¤K¯PE¯9:4dA°¬ÛæK›#zf¡T£¯-âòbuÜ“¸U^M“§~”’ÎÜH°ÓøŽrœÕ49è©ðþ½Ýæ5ªËÃû…Ÿ÷‚¸úx™ò²Ñú¾ÁHÝŠªRÐjM1YuDU¢ºë[¥<Šrì­+°Û·Oãd\6žgfùüQ =ØÕÈ—FCk"ÚÃß¶7dm¤$,éÝž±I·FÆtrÌ Vÿà!Èøºñûÿš‘–Mø…H ~.4ØeMü5ÁÁ!— ñ©R<®;w‘ÅÉÖMu^hÞg‚ŒÉq=Y]ɨbþ@©cŽ‘Ûg††u.Úýü08l¾¤ñ÷³öÝ{nRŸê’IÅ8Ÿù~3Ưào¿¬2ùù‚{mˆ1=R “—ìõfŸïï¸m(ˆC0Ô>†zÀ?òýìÈä¼NùÜ1|½­&a¶Ÿ|ŒvíBÊaëQõ¥‚ðg}¬iâS^¸ÃäÆBtÝÂY%Rd,’zÎD„º6_‰FÌæÞÓéœïØÃ/æIÌÍEø5 êw,¦6vþö^™¿º_ó†q63Slv‘Ëþäkpõ”ÏÝæYa݆²câ¥r—Ùm“JÊ…á t 2*#¨?ÛÑ2×§…B’ºŽø´º±Ñ>Üç¦,.BeL]ƒ¢ªã7()ퟒ?$ɺfÄXMüLÕŒS;ÛÍgÈtleÆ ôÆQn?h=õXIÿiF o¥5CŒQîšðžžV¦Â‘ÁFŒZ7õê[…ï·»ÚÃk¸ÊK _ó˜°• 4À"yš&°6\Š¥ÙKKvê6M³Ÿ€ôÒâi5‘!Ę¥‰ñ„„§Y‘¿(ñ+m〒‡RÖ\?B°¯ ‘Žr3=¬ËÁ¦à4I ±j1Ìs\|EQºð™†|±(%[ø5ÍýYp¹È46ÊŸÈvw>îïÓ(ÿ®M*ç!,b´7Ó~a7C@‹â÷­¬>븅CЦ(ÖGŽÛÄB VÇ”•W µ˜…uQUxí5¬¹«4Š`å"ã97 –¥›-`2‡ÐÄ! ¬”¡`õ ¹Æ ¹"é÷ž{£}Ûm<ª ^xj¦9Œ2i\žªÐ×òs¶q>Ç‚tуÇ[4¿ò3ûªj„Ÿ VòÊv¨Ô)w#ŒßœäŠ ¾¼¨ö'¬7kO0v¶q²Ÿži@È&’þÍM]nF)ûÔ{š œ-³Z„,Ïl*÷æ"Ç9füWÞÐÑŒ“LëîLR=åcÄØ*6†œ{†‰e‡éMÛ¹²3ذ.ðøo9B_¡—sœ±R?X9YŽLŽ*jŒâpÑë2ËQ JɦF>7Tï,k äãÓ‰M}}Ƙٺ:rÛfÍa‚š¢û9Õ«2ybÅ:É:Ü“ƒüN ÷×…GÁ:NšI_Œ¹ÑÉ"òs4³vH]?me7¼’×)Ý®€OÎÑÚ“)<óÐþŒµúÂÕ2ëJc4Ñ zÃ^ò+))Ú3˜QE6¡ ß ¤ðAºíÃ$Žš–múÚ[øjš É_fÙÚB…éEêh.óv PO¸"C=@ Æ3Ž÷ÔLnööE‘Ë#0DâWÅNØ!o¼‹-Ûºb8ÒcÛp|w‚gqªjm5&ã‹Ø&ÖØB?ªÆç[ãÖdÒ“zÇïèqeöåûšm»Õz§T™¥¤dïÈ(â³KýþíëJ¨%à{±\—c‚TTÙ–g(«7¾NïA‚ű½|¢ª“=D抽›òï§UÌêi£?5#ú±g!ˆ¡Ñõ ’q¯ú$YjÒçà ?‡}sY%ûº.Ê%³t‡šnæ¡xáI;ìóþþ›ZZ5Áùþ7Ðl E•CÈ7;žêGã¤Ñi¨!m}ѹÚ ž™óߘ‘èì5?úžlóXŦ_bÕmÅò˜Úé"ng¼ÌÞ(\¿®R©N³~‡ºÙ=ÅhšÜ7<]æâ)çü Qô7ö|iõ%}¡ÇÏ,¡&5[(ñY†È6 ´˜1‘ß0`yÐôHõÍ„Pð5?+û\:úJ½yçºÔú]-³xËÔϺ•AçKßz^Ðä9‚ÆæP<; œ÷¦äÐ:ýî‘#yŽšÞA’Sçv ÕõŸT½üQ|6adOe*v$&b‹wÊa‘¶ñ†½È÷5äíÎX›•#OK¡!Ìï»ä¶±KM\à?ªšë¯>ò2¡ý€„熂Ê饒6§ï_‹Þšè:÷áóÚÞ)ß5<^Z8H‰:cÉCSÂb”¬Z0ìdxÈ8ÿŠK­öf ¹2å+Œ3‘¿˜ëÝùˆôüÊQ(×Èý¤ÓKÓ¦°âˆ–á“¥ørˆHa~¿pPâPË`ýG**€K·YSùhï¸Ð^£ å€Õþ£©²Õò0iZq"7ØÁAžcx *'Uãé4ôî6'mç¥âÒeÿËñ.qj&Á×.ªëg“ "sÒ 5Íj9~ƒTCásåd ÛZ½—ò·%#]ƒ“‘š°¼6šÌ—Æ¢/ýb^DÈ_“»Jú’&ôü<´ï—ôóê¦ïíš¡üó?;4š/*—|{¢ÛyçÝ嘞ÌA±!yj…+rÂǨU Œo]¥w»7êwâ,ämG6„á«‹A¨GA¯ ±þ”ûílA@XÆ´3)ÃMFàžY“Ÿ¬ ê5c å€;BÏ8an¼k/‹P­¬ŒòWâ“Ö ÕÈõ˦o*\ð°¹—Ôöh1'=Žf>…~v̼ð¿PMy¶0Zp²Äk¯'úÙÿþ ýÿ üb3;ÐÅÍÑèb‹ô¿Oá;endstream endobj 28 0 obj << /Type /Font /Subtype /Type1 /Encoding 201 0 R /FirstChar 34 /LastChar 126 /Widths 202 0 R /BaseFont /MCKWYJ+CMTT10 /FontDescriptor 26 0 R >> endobj 26 0 obj << /Ascent 611 /CapHeight 611 /Descent -222 /FontName /MCKWYJ+CMTT10 /ItalicAngle 0 /StemV 69 /XHeight 431 /FontBBox [-4 -235 731 800] /Flags 4 /CharSet (/quotedbl/numbersign/ampersand/quoteright/parenleft/parenright/asterisk/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/equal/question/at/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Z/a/b/c/d/e/f/g/h/i/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/bar/asciitilde) /FontFile 27 0 R >> endobj 202 0 obj [525 525 0 0 525 525 525 525 525 0 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 0 0 525 0 525 525 525 525 525 525 525 525 525 525 525 0 525 525 525 525 525 525 525 525 525 525 525 525 525 525 0 525 0 0 0 0 0 0 525 525 525 525 525 525 525 525 525 0 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 0 525 0 525 ] endobj 201 0 obj << /Type /Encoding /Differences [ 0 /.notdef 34/quotedbl/numbersign 36/.notdef 38/ampersand/quoteright/parenleft/parenright/asterisk 43/.notdef 44/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon 59/.notdef 61/equal 62/.notdef 63/question/at/A/B/C/D/E/F/G/H/I 74/.notdef 75/K/L/M/N/O/P/Q/R/S/T/U/V/W/X 89/.notdef 90/Z 91/.notdef 97/a/b/c/d/e/f/g/h/i 106/.notdef 107/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 123/.notdef 124/bar 125/.notdef 126/asciitilde 127/.notdef] >> endobj 24 0 obj << /Length1 1631 /Length2 9712 /Length3 532 /Length 10645 /Filter /FlateDecode >> stream xÚí—UTÚ²®qw –Ð îî®Á݃44îîÜÝ ‡à@p· ÁÝÝrz­½ÏJÎ>÷>Ýq›—þjVÍÿŸU³çP‘+«1ŠšL€R {0#++@\AL›• ÀÊÄ‚BE%î4[ì%ŒÁ@>+//+@ÔÅÀÆ`åâcçàãä@¡ˆƒ<œ¬,,ÁqÚ¿’¸¢v@'+Sc{€‚1ØhÙÃÔØ 2µ‚=˜¢¶¶Õ¿*œª@g “+ÐŒ …•`fe ˜-¬ìQ˜ÿr$kopÿ+læâðßK®@'gˆ) Ä$-bÑ doë0š£0+‚ Z@ˆ“ÿ¦þss)[[Ec»¿¶ÿ»KÿkÝØÎÊÖãß ;0Ð  2:Ùÿgªð_æÄ@¶ÿKFllke*joa °ü+då,eå4S¶›ZÌmÇöfÿiÒ¶¿ 0++Ê+ªjÑÿ{ž/*[ÙƒÕ=þÙö¯ì¿™õ7CºãdåÐcabaa…$Bþþû›ÁˆIÚ›‚̬ì!‚“ `ìädì¹âx±¬ìÍ€î ;Ä13“= )@Zâ09¡ü5Nˆ-fs«¿b# €ÙÁØ ho 4ÿ޲þ;ú¯iþæ0›‚ììŒG8Ì––@ûß!.H-dè ³ß!³'Ð ô;À `ÙÿaNˆ °ÛïuNˆ>ØÒ øGÆ_¶A.N¿ìÃõ ˆ9gHÿaˆ5g ëÎ ýbþqr˜í­þ0Â)ýM±ßIÿM3Iü&È$ÿ!nˆ}éß±.ó› ¾eDOþ7AôÞÿ&ˆžÂo‚è)þC<†)ÿ&ˆžÊo‚è©þ&ˆžÚo‚4Iý7AÔ5~D]óâ…(èþ&ˆ—ßSç…x1ùM³›þC¬,B³?â øþ5Ç?âÎâ„سü!þ¬þ@ˆ¬íѵû¬]û?¢ ú!º D×ñ„è:ý]?Òðé…Ëqåú!Ï4³û±áñBlxþÿû¹{1Bî5#äþC,òB®‹ÏÿH4uq‚ü*Á?Îwè¿ÙÜ òjî@S”Å9)°ujSh¹¯dáD<´˜Es¼b}÷tjÐB´mɼ#ÝjöCe.Æü™Û‰sd»·Ê¨Ôi€c|ÚìËŽë‡ ÏVí‹ m‡··A‡˜mM3WGœÐJS«ÃåI:Eß²Ïúó•i$Ôw—É¡zô\z2ƒ¹¸µ¥2l5ÂBê¨ÙÉU±ÒËÃ9ÂVܰ’>­ŒùZ‡!µÐ/ƒÚ]p ‘ò_Œ`ÍF5?ÜÊŸ¤x oê>ÄÜâÔú”Enºä)¤ëÄ~z2Ñ[&ËbÕ³½ˆÎœywÉÖO¯îtøQ=“S¤{¯„DÄQ!!Çnù¥£ˆ\ª6¾†Úo¼y¥ Ýòƒ®âÍ›‘Â9RKyé_ážüÇë Ó±_ºih¬Ô„‡54™´”¿äØåCãæB$Ÿ?5ø¥¿qޝŨ³–*® 7ïÆz«L9»Ê(»Ú¸p4œ¼(ô …Kв(ÞQBëL`ó²$v¤Mw}…d"–"äî–“¾Cž÷}áÅpÓtY%+ÉEr¿NV£ñîzsÄ%ØB^ˆš$ËF¢¯a@©­±ËÈj^øm^¤¾²öݸ«Ý½V˜`Þû³%ªRNpÑ*“£b,ª8‚ú¸1É’µ[O’«â'ío!W9çHâ/0˜¹k‹P1æèÛ¨™óeNéí¬9æóX$­š›âù™‰ßS|‰·sYP_´ÍÏæ~~3|EL9ÝŒxZH5ã'Ím¶Ÿù½+&@ÿ¢êÓƒ­”éKˆ…À~dsœQ`®_u¥Ö™—¢÷&Ætó!: ç7¼˜²Å¼ç²Íå[é¡“ 위 FYXrïmÑ—]Åùâ1_÷–ŸíER³œçŒYµËf‡bf¾Ú) ¶ºQýyùnSJÇÙ¹Ÿm˜Šòj¡«–"Ö±EѺ¸NæýÙJäp|§¡,¹(‹6ã·vêŠþcŠ YJ~ìQòíÎ v&K#õ8B½ýƒAC˜þ¥;ôs4˜Áto¢nÚÚû<½ù…»Îý3—¿ú•T¤Œ*…dIÀ–âU\ ª«=¶"[Ñ.EJÓö\똥ºŒÛb†ð9ÿz5 gÉP‡±[>éQûPX@‘–ûçÜs¸nOâ°h÷¡4èàï¨ÁÉH´2h—Ÿ´¼y\NIeYÐA«æú毊g(+«ò ç*SÓDªU< ¿Îba è}±EœÝu6|L«âo£pxLéâRߺZÀT" ÙyÙî Œè%H=OÿàÌ"e~7b›™˜7¦¯êe­-é3Üù^PR±Î/M¤æÁ ˸oe˜nYNE&pvý³EEW1mð£´nÕ;ò§êDzîoÌÁœW¯WùËÃæäs¨’»ÓÎE‚µÜBwÿ:ÉL;¨mMºø¹²‰PñwðCòŸ,(Þ{’è R/ï|ûvÍŸ,E[Òôë#aIu;+øùM <)}Adíƒ×/$ûãàlf¢fÓ»^#±÷͘ÞäâÓSrÉù`—¨D)_>ž÷öBŸ-ÄÇÆ’äú_7Aÿ@?“clø~ZÃ\dñšÑtQ%1È7Ìm Ë]ƒl”¢_í«èh¶‘Š f‡Pà²oRIzƒí©‰E+öƒêçÎLšç0©Þ¯DdÅäªæbnŠ¥¬xl|}`tK~ ¸ E\TÒzl8°-¡Ì£žáöyV¦öw»qÇ|«­+Æõª ‡Åk›yðƒº ÏË$ #Ûï¼^qE’¹$©syW$çë\c8í“È‹ÂU©5Òþ!/–„¿Å/ž^Œoè-£ ×g‰þ+‡—øx ŠáN¥‚Iëù ºIƒÿ !9´sÝÙÈ7OtÂÓ¨ËI·þß’¦h} Ú™®o±²˜‹!ºÂ*`óÌ›î[Z¨ò"WÏ­Îõ)¥Œ¨@îÙ‰=G.†þ©èn±u´ÿKï·=ílZpÏØ_À‚“——ï¯^ðW€–.b£§ÙrM:`v$'ŸÂïñuÜ—YÖpú&V½L×ešè€/+Šx‡™*èÒ×É6‰ ±¹Oóí*Yû!‡ˆLve:±°xø_ n£cB zÚº8õ>uŸÉ5n„̇0¯tQ­[_‰oëhaIÎ3#‡(5ãs¦Èý|fÏh‡¸[ß Æ¿Œú^iÝÚVÛ—iá¯bŸuìøšÄZë ]æ3æZùýÀaY–ô I~}3=Un€Ë0é^ü ޹%ô]l² §ëë’sÖ>$ᢵԢ-VXn—·î‘ÙÄw’bï…E¿xf˜OUô 0 »OÊØÊ ³*Í\¶r´Ã3»çX~>wdE&wµ½Ê€öâ|ÿz‘,pk°§nökÆÌãÇ0¬¤¶Ï¥o{üªŽ4­kÖD¹œ‰T¿îìÞŠ»Üv7»†‹|–˾vš¥Ò_ëõb Õ©Hƒ gö2”¾ÙòùÞ")„^£FR7 µm:<üõcW)ŽbbåÄmûÃàaC /-®Z¼¿ô7Ý @½”IðH°.Û¯…+á~6Ï/-ãÙ«)=õïF™1¯¼p) W¤ëÝ3TºØ•s|_¬R„œ¸­g2°Wz†ä”½µ¢-ù‰wj?]Žkaâ-½’©fÔ?° >1“†w*~9ZpO«C׾ŽÂ(kûµÄJÿô= æ4a#ëï–Ý5Æä@lµR7<½Õ%·‰ÕÑË>£|ßÄô·þZa2¹3åjÆHšÚ©¯0Åp.nÌÛz7Iò6̧D® ¹®ŽöÙ;ï1ÜœNag˜,÷òý¤Â}‰\óÕ&¾YÓcp¹¹{ÅN¤ðºÍ†L³êiºw8 Ÿ@oä"ÐD8ó©ªÊÑ¡´5]’3Stû¦g9w[ƒÁË‹©Ò<üÜ ÍÞ&¯Ëv‰ªH‚\kUׯ"áyµ%[y}AÞ3°±Â„áKLGSu”e dØ=N î8ÿRñ¢ò۷qÚ¼ºM©E^ cǵu©þ…½W¡uwâx“éjp"bü®Ëèpv„ú$f|CqÕäë‚,ïà·˜´On®Ô˜¬BÉ\ê®Ìý⣂×I8¤4ßÖ¸+æ6pwí¨H‰¤T…í$ò¬*Œ‰ðKÄ~šeÅû5 “¿o~4ƒ²ïF$@•«]{­À#ð¤Ÿêo)C`€Å´/j8§Í :^Ýþj½^¯Ãò³)'÷5ìw#Oצ€Á9WSú(+ç²ë‘ß3|á{bëÌGâ8™{eêôÂY/ð|ù-……ûmóÝ®?ñÞðžŒpÎj-ØKÒÊRPe¾q] €˜ÏÛzIt§“ N\ÔÞpæ£]ë·mãïi}´\M¹‘d_Þó—ã·`0þ°E ;O)‡Ø•–á‘/ž’'˜d#zóK‹v™2<Ïà—sÀ:›Þ§ù™~7ô0[³í&©†ÉáZXvÇ&cLyf~ï@”‘yfG…uWÀs¤ÊtýPÉF/ëyDJØ—Þ}aò(¬=š‰ ?¨ç ƹ=^¾ãô+úÍà^ìn]î-f«´wü G5­ÑQPðÅ¥’øü‘ Ñš)DïÀªzÃX]žïL@Si¡VØÈ Üø3ÐG,ù‡5,' …«ì30õˆ˜v‹i4ÍW0{þPW8Œ?³5®øNŠ9>Å`º_MüSTxì+Ë׌Bk‚?ÜsTªGù/¾LèTGÁªGNÒœžŽ ’ÄÓã’_½b7Pv´OļEI-³ž–•¦wñtК¤’f}šÏ¿›?²³zZÍ9ƒxëÜ «òlЖÖ.åW¥E>ø?˵d ã“Ûž“ÀMŸ‹SR³æŒÃGt«~ž»‹23Ÿê"R¾V«ª@õ¡¢77­Ãù¢ˆ»úë"AP<ÿ.m7`—àSÂ9¿èÁ”¤êˆÐ+„Ѱ¦u2<«h³ðD‹Îq¸ž°Îu0ÝŠ:dCAùHXm_¶$Þ„ÙHM°»W£‰úPs69vù®ÂgÇó¾úíy*8Ó®§g#M d»~¯u ¾h!j]˜g-ðŠôí”#Á™Ôé¹í°H©¢Uîð”Vg^{Únîšê6ª[`Žô¡[wHê £«™t¥®¡õ‡žªg üàÈv}+N-cþ¼_AÕïÛ6¸çÝžÏp]°ÅzN(Ý2,/¥£®ê‡VOï<¹v­[â»5@L×Õ.­!fV¡¢âìÍÚˆP£´+J¨ø‘jÎýõinZªÊO&õÒÞ­x´/ë#Ï«ñCQ0ŒRPC ئùpú›¨ÏmEñ$°çBé0GRç±b«É`ŒKøøÎ•̽4œOc~ˆ„­T¡ž d_ø‘Xøà[“ÄB«÷vïOr”Ы­ÄR¡å•q(O úÌfè{Í‘Y´ð:_¸NyW=.C¾!?‡XŠÅ üÂ&’—.2qãàÌÈz4’^·4„£é`´@%%¼ µò÷7#Ð[Y …(`6¢Ý‘§øÅ@/®\à—ü~Á=”‰{‡vùká+Õ #^OCLìý!4;Ý­5,Ÿ6xÃçFô`—1Ñ'ùž'™<Õœ¬X%ówãï>¦^âä‰á»'¦®íWï$–FW›ä^KvÚw]ÿ5v½èç%ÈM‚fœ£°/¹¼õw§˜ð(_@>opw9’1ÈX9è"RømÞèÊQ%(µ^Ó–óM‹2ë¶>m»ëè­ Ù·a({š(Ã}ŽzÀÚN¨Âk‰½íE§’eÈ¡ž—œõa5/*}«rfm%}ã'¨Â}þ 1žü3xvñMùI ñÑ"; ãš]ðâîñ—˜cèý¥ÉWDüͯ Ô?z’ó¢r k.-µYz¹@µK ;ÑæYF®ê’Ÿw:Ê™A?Ìj®Œ¥ASµÆtº¼$Ðè¸Èőޥ/Úšî?Þ1sy•M¨%©í3SÈ\ÂÓðIJJ÷Æ®Rca¬°wG™„±Ï-¡ñpšygÔróÜî¬NB¤:¡ˆrÞ¯a+﹉ùŶPùíXʽÛ›0X@ Òy\4T T—Ц==î )d+op FÚ‡qC•™8J‹aƒ95ñV Ðޤßnßû‘Þ…sG=3Í]"æé©ñ©)Ý 2—·X[m±áSmœgÎ×-L?-ÆJ¤«Äuw·+go™ø8¼ùáq2lkb»Ú{ºôE1na2u—×ÿµŸì7Ÿ^NDÏ8í}ÄÐAjYtX£°ëCÝ­îz@Ásß—¨}©¯jŸà†o9,÷÷ÆÍf%{Ög_(<d¦°²ÃÂvG%iÖ¡î¶—sãË2ÑÀM2\âi5Ô´˜_$ÂM!ð·e'K^àcìj¶¦Ì7„‰TÑ8a–з¿¶kÆœPPôZ:Boj¼zŸ5ç$`UKF€'c¦ú–=õ§5^ʈ•„_ç(Ž~ïn§ÆŽ3Ôf«Ê¡¶[GǤ’쓊¯À]zÜ7Ü ™£Ý](ÏPU–Ëd ´ã¢‘1${V‚qÔ‹1,`’®ÑS£Ì0Ü ßÕÉŠ¬qÆýœßUI‘ð^½S'©Ãw‰¬ù‚\‡Bë½ÚlS/Vvf“x1ì_ 937g…)´E£Ú²8Øó=€LŒdpõ€ e­»…dÝž2`ɦ»¹ æ ˜Lçæx‘ý.Sù'?‰\Þ}`ií h-N)äÔö®ÕjèÃ/mYñ­oB_ó¶”sÁhw~+¼ˆÓ“„_~¥ž±~çÏÒp•r~õ™‘ñ6QyÚ˜£ÓŠƒÕ ¹,lÒ’0þeG8±‘°Ö,OÁV@QÑe´µu¦oz[½š’厯áY/€m³9&3a… œ¾!õ!Y`Ìæ¼†J¶yrÜý:Æ {¿‚©Q–n‘ '[ïTæîÙÂsȇoì¤ÈB–=`ÆýÚ™†–¿ý •Z¦ öÑêîR¨R…ˆNÓIï>}BxóNöûúU QþÀ†ðC4åô6Ô¬zäÞ¯-â×|Q[¯Ï[ Û. Á®éh#wúæHG>_ÛÕüÐøüäJ}ÕäŒÌÞDQÀPwnS½(Û]ºN}¥ ýC;\¾WÛó8ÒO»Ü"G¹e'üŠºÅïÄÓpºjãú"áná›i#=!'Ë¥Î"æ$Œû gÂþkôË̬ սr*yið‘®?Û<Ó:WÚËŠØÕ{’OoËõv{°Z0i7”]'3Óm´¡[ ŸŒÂ8Ø>·%0wjP¥‹Ý@1¬ð\‘X3¨I5Ñw=@/pµ¼8LmÖM[Úà„lj÷V‰Èñ}t3ÅØà¤T·iyu++!^ ¾ÆK*™‰“¹·­|àÖz¯µ+Y«í½±–MÝ5–š¦RaE†CßqTVûzTxê½¥GÔõ:«*j…ð[F (g(I&]´fY´Pvz=ØIåg»Œª…ðÒû±~ª PñE°ñR°ä¶ðGþRdŸNW|Èÿç3³vËG̱v×>Çê°Õì$k§ÇX¢RNš0K¾©ëàôi﯄nŸ¿ƒ\(CAx%O»Ù‰²Á¢IÛå°@D¡Wé$h\„sJ ÃCoÐH+ÑÁ7uiï—›òÚ8Ë ,Éçöì<í* K/Ì¢2ð6ð?O"t¦cö«ml}…Y£ýêÐkøÖI…­ß3^–™]%í[:ìªÇç…RiçLÑÌ›¦Y]mº;†î!ñáœ'Õl½Ëi,Ý™‹ÕR™ÄIn¤Nû{¸³³Bì*k¢…x.§93XÆáÎqäj÷oRTÛi6ëFÊš{i⺆Î:Y·e~¤Ü;¼¨l6ÑšF¨|„îgÇЊ@Ȃƨ1!Gž·bT(ó‡ɽ;in7L&ËC˜ÃÚõf*³rʸðï´6AbÝ©V‰=6ˆÙ¥‡å™DèÏ´¶ò8eWcéì“©šïÓ[¢±ëæ•bx +U–áÆ?®r Úoâ0qN‰§àyW' ã&½Ða!¾ ÷J,û R¬[]² Nz‡y˨R*qpÃRðh{æçÐ\:`Ç,x´P%¨Ù¥Ìsµæ1Ì~5×ÐÿÔ ø~|Ó¤œzPy£–Pju-hLt©ªÀ %úZ3»á3Kœ5f]-²¦”öYp1·62Îý:t˜¯y›ø+ú6‘¡”ýÞÒ„5üîûLvòðqJ¾ªœJÖ•Ë ÆÏÑûeÜ<ð/h²ð0XŸý^=i-`º…Á8lôf{x;8,&p=Þ)jøß!LƒZôv”I'оùß!Üzø'È^ÊùQˆƒ  EÃo[¥tß…¹erÚ¯#>yåM ª'áM“%’ekÉÖ5òÉoõïhw ZžÓV¼_ä#ª€R¼8ç4¦ió'ŽÚqc5ÀþNsãˆÜðì û’bèj¶Žbæi'^wÊC-S¼ uÇzAPõº‚ÍÈ÷Dƒ…£3ôz“Žo²™´;‚³©é˜î‚ÑiR _.úW¸n¹LW;î:5f¥%EÍû•³V/Ф¸à35]1Q¢°£âÜizÓYº#—¬¼Ahï›iuQ]? nP7)ù#Ôžßí^Ç¡ôè@8Pôn‡˜Þ¨¾éL4÷þÂqn ôK”.j±?Vã…õtÛxáócï€Î6š£'û¢úrƒoËÁg"è„ÈILQ.ƒ ûÚËçž/Ÿø…NqÚZ_ i“É/ŽÔ{üB•ìq‹}ñ_ícæCM´G)ˆÂ¦îÖHY¥$Ÿ@Ì¢“ødDÓaÄÈ”üÓ Œg¬P3[U5רHþSg¿Cxå"ö#xŸÌ¾…^µîc•ìÀõ*.óèžç7õ}à ©ËkŽZªZT‘7Kkö­°Ã4ý̹[þËCÙô*“Ãy‰Gzºâi*âÇE&²G…åKüIè¥"ö[i>RdœÚsÈ8*¼Pª‚À»½‡€z²9ždï–Á¸nŸñ¸i¯+^t¤HÕÑq».ÍOB"7Ÿp#Ûù4•WKD¸ß$ܦò½Vú¹ŠsƒH2§ëØÜÃöL_­Œìs£6ýÅiñã*ßZ÷Î+º ÿ ëyù¸/*‡„õÑ[9%x?.•ôYÝTFØÜc<ŒænKs!yQ&¯û»¼OA7Šb{ýãҡ‡{Ñ÷ÏÚ¤º‰©1<=¦ …É!$?ùý̲HÑj¤ÕûJ±çö¸öÈ>'ÓÒš$u¡kƒ­޽¹ø,©[óßñ)Ñù÷ÎSÞ­ß\šÒ-½cÍDjí4«¾/T³HÊê1<¥î­í<É#P³6©quô¾Ì1-ê–…Yè~óJXç‚Q¬ÚÝ[PªoÈhÈCw>˜iqøp¾*oW4—·w‹¦ vRÙà²}Ói«h3z’ߣ’(&‰îå5Ùò÷ûÅûükà댨ö›˜ i܆FYÇ¥ï\PI¦;œÊ‚%Be©»QyÅL…¨–3½:„¿ÊÑ¢è™ÂHZ;+æø¸­1¸’'1ŽÈ–M,_ªèûÏ÷פÍÓkѪÈ"¯û~fÄQõË?èÊsö ÑTy-qs¥Ç‡@*ø|gr·ÇaýêãÙªBѨ Gï¶Fÿzý1›m퇤•Ô›EÎfP "/”Æì`VÄW(i.J˜ÝúÎuÇû{åT»öºU¼ºî†ótVU7ôZyéű3ËùŸ+4ü#=|H­/ Ìpk5ººúO%Ñ ·u'4'¤«as*×¢âCoÑÉÇò¶PÇ–­IܤŠ_ÅËìBûHä ÉIÙH-¨ýªlJrK³;Š_ï-ù^…&ãH·…æü\ÌREy mÚ¶œ­VìÊi0À©ï†p©EoY›Õd àu í7~(Ó~ÞŸ°àF±÷NZÞ5:Wc:o‘Ísú‹SŸÕj<8=+Ï/”*±‰…Ú[Í»PèÕz.Ôu9Ø¢ AsvÉ=‹ÃXs¸¶^-ò¤ßLjŽ‚Ð¥1A'ECuÓ:£ÙûÍŧfü·6EÓ^ÉÝeMôdošwÄ^[¼r›ˆß7ôm`GÿéÖ.ÃTª< B!þ¼¼Ä3C÷s¶ÃI2òL(ØÖ7´þuŒS æà±PŠHÎòÈkK¤…P#ñ$ßà é¶WÅÝbDJ‚Xp»¤Iñº_?ÌJXÁHGž±ê•Ð"7†‰,'YçëØ\Öšlî® y;6ÕŒç9NjT醥D){·PÁáôU<¤þäÎ-Ö@téòŽO÷² eÓf$Ÿö*‘HÆ]í Í3y/()[)U–ïGJª|ðTxL*OopÉ’ÚoƒÓÛƒ[íÚMå½·èGMJ·²kü¸ƒÙMQuämåAܘ¹*’€Âª¦Ù³¢¬JÖn{;ö}FŸ˜2­2üÚ!­L™O³/!+«†ê¶ÈSaòÜLnÿSN Zb\S‡&,%‘©ñuŸ+£,xš€Âd9L<›˜"³ø5sÎ2®þCÿÑÑ<"íú ÿ»;åìœújæ*Y©¥£#š‰þíµÏ¥ ›ZNžwIGSûß·Õ/¼2&×HǸ/çëêM Öa„x¦ZGŒŠ^%HYîL£T†`Ï¥ÿtÉnsfã¾¹ñËe:Öôø-,s¨­SªºËüGÈëi#¿>*pÅè œà|Ñ­1ÁÉî¡AΥŤ֧i¼þÖÿ¡T€êwÇuO}ä²%SÏ™=‘äÆiÓžÞ4ʯó¼ ÓP Õ6Âþp¦{»¼4°ÛtÌÍ,ƒÙtEœ¡¶Î[kß3MÜ8âÿŠþˆwuFÛë³Fèg?1GêH«€ª¯_߯ÊÝ‘»3>Gö2|ñ ‹¼ä«Ö~äëY®ˆù+¨zÂ#:–ƒÎ´[+¦˜†€¨ntŽb™|Çܽ¢˜§½r54ŠíÌ:µœ6 `1Á&€¢®Ñ|tôö¡O¦çª&Ÿú­,3ACïMå“Vº´ZJî—Åü¬ê÷Ä Â¥G<(ÚL~¥®¢>Ú{ê„_‰¡>ޏ¼a^…DÃDœFÂŒ™ÞtCºõÈ´õ#XÊñÍYlfÕ®Ö {0è¶Æìx-d:ÙÒ/ov‰}óhVÏåÞ;¤n‘®U¸ß+5õXÝj'¶,Œù‰ùD>¢vÄ­måvùz$\ŸQ—}Q'à±…R67m—L5¼¾Sn½ð”A}”åÊ(W å!1A"CÎ(Îæ˜õ@T˜ú¾A±LWk\ukÚdã†Z?×M‡ÎŸÜS‡æÏ1¾†uO5?íl¾ ò}u>$w?ª}¸Q”Ã#œXº="~¤à*wñX(id+¾ÊY¬Î(x«QÑé岘f]‘o¤°kè‡F¹SzÏen™~ý+O#bÍÆv.)+Sph-è[ÂyZéKlãcYؼû‰¡¿5Ûa_žwïÈNΗŠlQ:ÙôGËEÔ®Ísp‡ÒÑTŸBë¢÷)> endobj 23 0 obj << /Ascent 694 /CapHeight 686 /Descent -194 /FontName /PNKNRW+CMBX12 /ItalicAngle 0 /StemV 109 /XHeight 444 /FontBBox [-53 -251 1139 750] /Flags 4 /CharSet (/fi/parenleft/parenright/comma/hyphen/period/zero/one/two/three/four/five/six/seven/eight/nine/A/B/C/D/E/G/H/I/K/L/M/N/P/Q/R/S/T/U/V/Z/a/b/c/d/e/f/g/h/i/l/m/n/o/p/q/r/s/t/u/v/x/y/z) /FontFile 24 0 R >> endobj 204 0 obj [625 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 437 437 0 0 312 375 312 0 562 562 562 562 562 562 562 562 562 562 0 0 0 0 0 0 0 850 800 812 862 738 0 884 880 419 0 881 676 1067 880 0 769 845 839 625 782 865 850 0 0 0 687 0 0 0 0 0 0 547 625 500 625 513 344 562 625 312 0 0 312 937 625 562 625 594 459 444 437 625 594 0 594 594 500 ] endobj 203 0 obj << /Type /Encoding /Differences [ 0 /.notdef 12/fi 13/.notdef 40/parenleft/parenright 42/.notdef 44/comma/hyphen/period 47/.notdef 48/zero/one/two/three/four/five/six/seven/eight/nine 58/.notdef 65/A/B/C/D/E 70/.notdef 71/G/H/I 74/.notdef 75/K/L/M/N 79/.notdef 80/P/Q/R/S/T/U/V 87/.notdef 90/Z 91/.notdef 97/a/b/c/d/e/f/g/h/i 106/.notdef 108/l/m/n/o/p/q/r/s/t/u/v 119/.notdef 120/x/y/z 123/.notdef] >> endobj 17 0 obj << /Length1 1791 /Length2 11778 /Length3 532 /Length 12778 /Filter /FlateDecode >> stream xÚíµUXœï²æMp ÜÝÝ5ÁÝÝ‚Ó@ã4ÁÝÝÝÝ‚w'$Xp'¸»Ãô­Ù+Ù{¿ïh®é>é_=õTÝUo½ÕTdÊjL"fö&I{;g&6f6~R1U>R6fVD**1ÀØho'nì à'eããc#q± eg%eãæçàãçâC¤"³wð-,IiÅèþqâ!±€€¦Æv¤ ÆÎ–[p ScR5{S ÀÙƒ™TÄÆ†TõŸN¤ª'È`ÆŒÈÆFj4u&5XíYþÑ#cgnOÊóo³™‹Ã¹@N`Q¤´`‘t¤`‰föv6¤fsDE{p.XÉÿ¢þgpIEcÛƒ{ôœÛm<þ÷¹½­ƒ‹3Dª`oÙýOW-À¿¥)Ì€.¶ÿóTÆÙØh*bga eý· è$ t˜)M-IÍmœÿ²ìÌþ§pÛþ%EEYMSK‰á_Oó_GÊÆ@;gu‡ÿýÇ÷_Ìö‡Á½ÝIõX™YYÙÀŽàïýÒÿ©$ìLíÍ€vàqàâ&5Œ=Ás&.R/6R ÀàÖËÂlgï ¾B n‰7©¹=ñŸ‡ ~Þ,æÿ²ýÙÁüƒ`´ùƒœÿ8ÿ9æd%eq0ìlæÎ¬lÿÛúï'ÿ3ø²©½­­ñ )‹¥‡ƒ%Àî‰|< öfL¼¤,žý)‹½à?ÌVáìöçœ œßÙøË㟪ì]@ ÿÔtýË,Î Üñÿ0XšÀõ/eàî²þ[A\<¤,vÀ¿…ðþS¡ý_—øþ c üïVnpp‘?,ú‡ÀAÅþ8¢øG“øñ€Ë–üCࢥþ¸`é?®Væ³Ëý!pvù?ήð‡ÀÙÿ8»Òˆœ]ùó©þ!p>µ?n­úg×øCàìšœ]ë?Þr,& cSk€ó/>ŽÿØÿû€— ËŸáâ‹7ùC`ñ¦æ˜¬Þì/§ü…ÿŒË_Nhñ‚ë±ü Áýõ°‚+²ú Áš¬ÿB°¨¿^'V°*Û?ÈVe÷‚UÙÿ…`U!X•ã_Vú ÿ™ß¿¬Êù/«rù Áª\ÿB°*·¿6X†Ç¿ðÿÜq¢¢öî^L|¤Lìà·Ü)nðx±zÿ7GSx8ÿëï¼)ÿ‹Íའ¸LçìM‚¬R›CÊ}$ '+`è߉Z|‹Slè™éD \ˆgS2&çH¿öUû±2ef—Äí™À)¢ã‹Ê„䙿c\Ú¯×]W£Ý ÏVíË ò»À#ÔöæÙëc®wJÓkãåI:EƒÙçCGùÊ´âê{p+d½z®½™AÜ<Ú’6¡Á_©9ÈTÑÒËÃ8CWÝÐ’ WúX…·0,_Øw¸|x,„ÏK|ý|‡‹nÚA¡ºE©º't\VІZÿ60R+œIj¡"Ý…Y‚W“ ÌÁ©ÍöÄ—0œ:Çù!¦àzÆØÚ¡MöBÅ­¿VòÛƒÁвHv¡˜õ<¡Ð¾ºÙã§vÛËÌÑË f+¾‘Ö<Ý)éÌ‹"<—`6·fT‡íŒê9yD6 •Ê¡ÅÙÚ/v(hûÚ‡úñ D Œ:f«ëÓ––.p†#¸lå"—(ƒqЈÌ¹¶D(™ßp8tt7Bxfî 4)§"OÆS]Ûgás4mJòÙ"´."wVãx±¨q¾²q:odZUô09…çlk,F2?Àiº dŽp$Ȩbi×.aøÐ/¾3E̽žƒ< ²’U†YTõ·~71î"À€®ÙB7ƒ>*å Ùx¬bžpÆ#œCÏ\­ÉR¥–&g­l÷!µf«òg (BØóž‚rgþL´™7Ö¸,'ÔQMKk áì0â?ì¡©n5Ôbs"q)„Å=}$]ö[Rß `<uT™JX áÛfºÃV¢NÒoi»Õ²iHt®Sœ1ÄØøè3›uÇNmãT¯NãÍœ<æ¸ Jþø;×F·Ø÷DAx¥îøûl åˆãàZ¸ºk”›оƒ™šÛÖר|þR ·ÂOÇÌ÷!`ED>w­À¸âGôˆ£¨-‹^Õ[˜½îB€MñæQw¥©ƒQr‘#×|, ù -ö^Õ2k]ñÙµu-ŠFzZ¡¾Yصç,Ãd~éÚP—ƒuâ>ÓkC›WÔ“Å}„¾nŸ°Ÿ_PG³R~½³ÓÝjÆ\™þâÅOú™±…½OÈM-QÝu!ȵ¬­^,.¢ð~ð‹*2+ñùÖ𽸃eþˆþórouQ‡|˜ƒ¶ “ËïŽÒ j!îe“%&^Õr纪U±3ZzTÒÍ£6‹­ñŽÔN¤ðø«cFoøôHÞZÞ8ó i…phä<ŒÐ•’÷™*œ‹2ùžrŠ_9¹_Ç ìß©ïâ}ixŒmÄÚ’²¼Æ¾ü‚Câs®úË]Ú—¹{ßÚ k´‰Äš¹‹5$¿'Z*Š ‰…µŸ~J$p-[})óLµ¶<$þ$4©¿úŽZö>̬Ò)3FàÓJã}ÚÀè¢ø³ƒ~²ÕüwÂÎÁ­5=QóHx´H*3ÀÅF!Ýj‹FvÆh²B¦Âý©Ž_[Ø•A¹Þ8w5é±ü­Q_ÐÖš¿;Û“܇%èö|ù.0Þ×fötäëH?.à™ÍƒáW¡ÛòÓyëkŠyÿ |WiÚNx¿å»tw¯ B¸ßìXjB¨ ŒHÏrzup}qý‰ð?©Òê¨Ø›Î®—àÀs‚èX$“òmJì[ß³2–ÎkDÞžOÛ¦ûï*QÆ…¡ a !±½ü*§¨T®îˆtp&ÉiŒkaC:γŸ4˜QqÛäï1¶#Cוîá¬,´»·#iªÞÛðØØBy5ÈŽ ¾wŠå„vï6ç)•žãÈ ân*äöëZ¢T'u§ûE5ÒÁë}Íù¬gp‡ˆÀPE­øÐìR~ï‘oºk€§÷¥Ý7ÏjwpwzEnÆqSYÒneå˜}»w[ ˆÁ6ÃssÉèÆÌÄ»öã× ÓJ»ÏΘAà9ÊàÐÆ×åq0…-]æê „ mÒ1³Z¸ä»1({ò\"b(5®í¸GfRâÝ‘¶%¼àŽñÔ¤ÝÔj¡_úlñ¶˜[¼©O‹ì¿ºðð‡¤Ùß·b– Z÷Á*ˆ0hô:»ÚNã´€JæhʵnyQ'ô¡?à›}[+·Ðûx “åÿ!™™Nv2=biþy æaf‰1?CH¹9À€ÊËʸ$œ–"Ĩ’ú ¤Ó2JÿÙØñ‡Höµª Ù4pø£ÉNžà)Y³rJ&ȦöwþžŸÁ‡Ëê½XÆg*ÏÈÓëÈQ~ûÔóQ–<ò²2ÊN &StJ ƒá÷àP#•LJQ·Ñ8V޶ôÊ® ½'ˈ´cJÔØs}FV…5l©Ÿ¶·ܽ„ñ‡yêP¼Øùe•ÐS¢Aßôýå¾"F<'$ÒÉ·¶Q¤ñÍŒ…æ0\^×þO°TUŠ'%ÞQ4¸¾p,iÌýí>Ka™„„Drõs™)í5ZÕ/Jg¦ VäädGíõ-­2˜_u8qJj3BµÉW>l)Ë33sp ^”Á¾æ£¦fÌŸ„ºåÜïõì©püÈyòt%ŸøœoÕ.5W5®6i»lÓ¯=+cøÉQÇe3˜Æô£šä$“žpœ{—Ôo1™éГ·¬r ðÉŠ´ûM75ÊV†»µœϤ_\’öÇÇb¦«"¹Šá,©àgé ɱikù¢ ¬Ç&:PcNØ‚N4†©½~} 2êîïd ÂË8öe'x¤}ŽýZh‹Ÿ€žušl?D|û¥ys¼­mÂEyÓ<Í,Te:Wk$êo(}Øì¨èh8W‹eªºKóUÝ>3!³ÒÌB-%> †cÒ«ÏŸrÈ’e\=²LÜãtD¾MÐè6ËàBÅæî+XØ­_o¦#ê(˜[¼¿*ùæ­?Á 'ÊŸ»˜‰›6„‹Ähb¿÷bZÅ0ÀC–'hmŠÛ£.‡¡7[ÑŠL¾Ù€Îî9þ“[·ßE¤gò<6фӴ¦[šºpÒùÐʆ’€ÅA…à¤9ïsÊkEÿÝkÕ¨ÝB¶`¶wâ¢` r¸yîˆgY@Kyïß;6$×IÕJH¬È(ºC2•ëÉcÄÊaÛ¢ç– (°!*uÖš¤®Èô‡ÜOŽ4‚&:EÑ7£ŒRªîªOcñìù?h|“-/bÜrcÓûÑ›!•>k"Ö¥¼ü!™/öFíB¢s~JÉöËí‡ v—êÖH¯‚‰†×cµíˆŸL5ÀJš>"bjO²ù„‰öpOŒó>4K„@Þ ÏÕŸ€ ¡“7x¼aªÎz¡'©„£±*èËÑö­¡ÔÜKosÞw¡‚¼Þ³qOaWqœ›_äÓ”S–;…mi)ÈÀ½~¼ûmÍq‚éU·'™+=jVbƒóãŠ+mtC<ß„¦5Z—ÚkóŠcÞOUß2K7O… Ñø@¯þðíœRªg7súïl€f®‡\ziY«7¢7lçX#žD—6~ñ‹éAÛ×gÜš°ÅáþSõrɖ뢟ËéÁ^^Ð#¥ÀïÈßÖ;µš'*õ-Yùd¡q`»r®œÃªi¥1¾ÞÍ ûo>h4FJï?–íže 0ÎéXÞ¤b9SxIpž$àâÑ‹?!V%È]ÁŸâ»," {`fhRŒ¨žÇ?¾*ܼ@J†‘EñŒÊŸýqºéû|è%ø‹TSS6©''tÃ…p6KV©†5;µáædM…„Xrx’þáï¿i¦D³ÍõÄÆ°áö[( Õ}Ýúú"ý²Û°iqÖÖ<Àé6ïãíD×hr¸§%u}HOb o½u´ÈÒtVíÛNïx­+›ÒV¨Zseægò™ßè–Ü9UjÀ×óïÞÍ“ ¦Mâ67Í“ ®ÑzJ¬)yj¤Znܸ?^Ã)uA²dR[ƒìx>ÔWÿp÷z+«˜rî•wÑ<ð>NÆ8ux'GU­ NÉ:€~‘eX‡P!(uh7aÁ±|KqW¦ß*š–­„Â. ?û…¢uBÈÓwWùGFÑVÒI-H9¸2ÍðüKƒß·m¢T§X§J\[Д_Üô×°Ö QÏrx˜ƒ&¿¼úsìy”q3.ˆ€¬Uh—`»ÈYÎ{„†'¼0…(…UÞV€uý ùlÇÄy}4ð=æ§'°¬ƒÏ#µÞìx V×ÎFV¼~8ü9ƒ¦šR‰,©Ë|lT 6"^—1—>¶ÛÍ÷¯\´F)•E÷¾²å‡1BjK CßÎþýëÇV|Æ¡0Ѓ°Yb{¨«c¶2㘡 ²T#Äü¸Î•™w¿E~IÓ½‰¹¬Ø¶üˆM†–zKPøÐeéÄšž¾UœË—VãI͸ÖJe_lÐW±åÚüªÐ%Ê2÷}'§eFª®šß¼â %!nIϺZºmÇ«Š:ɲ 5'ÞO/Ýjm0½ÏÖ«¥ÿ}¾“‰Ç‡ZWamƒòO!C6ª‘³EÄïᲃ¯3ÃM±u¤£¬JæG°sl‘ÑÙÅÒÐ+înm´ÑŠð²a‡C¶ ãÞÞ ”NvA2Ýœ5¡¢û#è¥,¸$r8~ì—êª2¬û(FïÛ‰ÌXZZáR‚`ŠLغa6Wâ¢â1+u¨ ¾ÃX @iŸtÜ’åp®#§/q/¹Q5ÅbKyè÷ÿf¢ ƒÑãîÏZ—¾â²¥ùzýTí:|纅»3ü*Úg+ÊW¥°¸7Á۶ǾÁrmàë¹`yAýT§’Ô'ÑÌ®ì_‡âÇ@Çça'Ni+9ÃR&0«V±cÔŠ8ÅÝï»0A‰JM}5IPÛrðŽd…W1ý– ¤!1>$sÏ$Y¥>9@ßÿ•–\Ï—ÊÉõ›Áz+§"‘‚µI{‘βÊ@Ó@ÿQß)º÷ŽoÝ¡‚Ÿ)ÉšŠÌê`±-†äL*FMü,ô±4ŽÜ÷Ns±t š¡h^oº5K@k_évîê!ŽKàêÙ„yâ›ah§ñ³ö!³Àýó»]^|ÌyëŽ-= 6‘²‘ØG"î·ÃîqÌ$Éyz½]v¢ nna†5zÇ0Zo£Ô©.››AÐ · X¶&D^hŸ–e0Ö°ŽpÏ.Ì/†™œ‡ÝK'<º+!Ç+C5Öï+iñéžçƒK§ž£ŒŒÜ#»ŒìK‡>éX„ŸXi·sµ1ïs²E¬  W=Alxu|FÓøÞÙœIU†ÚÕWÝ aˆë>|¾oÑ-Ɉ„Ï_‹=Åš;7(9m!0'Ö&ça³×ÿøz¼ñçPmCó E·qºÄX¶ˆ O§ÚAÜ^—C§»ØS¨<÷ðá¦_e܈«K'…òw¼m$úºItâ/Nä2GÝÊ»K5j‹­xjƒ²¬ß—+5Bë+{PEBèUÝ!øÅ© ’;j³dŸœÔ®è¤>gæÊOO™øȨC¤¹MR¹ úäù›Î'é¾ûõ‚kNñ½e&d:¥öˆ‡( êÄSø±U;úTþMâ ÕÜ…O½vÃfQ],èðuPF[œŸ¦²Çh¹;jŸë§SVù¬îB±úê5"…òÄœ «.ý†Øç˜¤‰avÀƒ˜­ßÙ”ƒZ1¢&äˆê3œÂfôC÷ ç:‚ayÒ•ÒÜÅh•xÙ‡L“Q´—«¥傳t[óÛ'ÓP+!” ¶•{ÂÑßLÔ”ƒh¸r“[˜È¼þ5㲋MÊòÜAÞÓ [3ޏ^¢ö-2]€»‘qW't‘öDâB ì…9ÔGn¢F²êÈ&‘шq–Œ3x2LelýW'ó®b„y{ƒ[M§‹!†j`¸M’¦:`Y½¢ úè²ä»Ç|ƒCSq€·á&i;œ’R3ˆåëÍžM¥:äp¨ –Ù.Ó»5W¤ÄŠêlX…»ibžöj¦hÙr²<Å7¼“q£ü¬YüÅ-߆Žw˽(:*ŒÌÏG¿[ÉRàX´±“éåÊï~3þ2å K*ú}³›Ág -¢YО5®Ô è#æ·™iŸIºzÌ‹B„åâÜy§ògÆ_¡\› Ÿ[lj58âh±¥FiHàWGAª±º«F:„ØLg¡T"2áÏùÞ&xÂ…jÓÿA”½UOé­LRÚ¿‡Ý†\¹eìGåîlÈG3ÓÊY@+wA`´»£Ý—9jÚN)\ÜÅ4×nz È_õ­ Jûòñ Q™’bõoaTäQ¡öêðɘ:ˆ«½ýˆÓK $‡f´yD«D³¶ˆ&¹žŽ&FGŠODfhNñ„Ÿd¢Y¿„ÔÓ=››ÇJDçRÇÆþ(f¿ÑåÓÌÙò ¤AÅÎÖŒ¡<äŸ>­¾¼Ð(¬PËŠ]r§¢6¨ª=)…ªÊ›~xÎï¬ÒQov9 ñÞí ó÷jÃß,:A˜Þ†°€òÛUƒͺŠðP 2IYp9 îd –Ç5p‘^gP4àðà̺øòõG¶Çq ƒbGZ®ÅiP.aª\Õ¦äó¾:Nƒ&ÝS3$Ùð°O¡²¡}ó\†ÑGS÷iÙxwá•°áèïÌ€×åqÉ™r7Á0U * ÛÁñÅ]ÍtÊ9Tü/B™µØ‚ó]ªgžN²Å–¨ùy¢žj<&D®e#|Kó¬ ôD†WVÑH‰“5P£ªX¬òJD¬$Ìע `ǯ¡«BïõUÔü¦Aê ½&~;ÛßRë_Ðñ(xëa!Vp×W÷0Óàb¿~Î>¢}XŽÊ(¾5=ÊKDóñ$挠A®çÕ–fçh•÷¬û‰ y†Þr£IÉE@³`¬¢³Šíþäô¥A [ÿJaƒa-×Ê{UüÛiMÁ±áçt¾ûajz£ÜÀÆ”Æ7i£¥:^”c¤ú¤ùèÏÒã€:-QV”šÞ }Ü:/ü×ݱ—”–vK×ûX³ÇÌšóÓùGF^6‘mGx â·Ëe››)4o³ðæÖî®h^ÑHk dÍ”)_Pi‰›ssøå\êù2M¨ç‡Y—‘Ó¹îT÷!ÿUlŸÈ+¸ýc‘HoSE¸Êyé”}qØ}l(j_‘©Ë{(»­ A)…-7LÃyº7=ìŒÐ˜(¹ð¸Ú;T3¼õȸ("¶Íö'„ þØc¤ÏG¡”ݦåÛ[‘%R6““ýùÉgœõR¦?ë\g$z/Íù‘%³ðåwí6¿b lµ¤trUQÃ{ô·‚Z‚ʺ,¡m§–9ÞÛzàï¾Ìl4Å5Å[£5º\7GdÅÕ<*’EU“ÎÑ-éð´YEŽ7î1uuª!8ϦËô ‘¹£ÇAhÞIÌáxà!  ï×mÎ/5{!-ü¥”&Oš¢Iñ2>VÝåa[ˆ²£JìŸ޹cºV£;6“Æ©ð?¸¼!àÔP¹Ó·7o„sÚ­û‚ÈM˜èu 7=`¬dŒƒ¢bÃȸ à²à˜Y?&KX%¡_šªñŠï0‰Ž^ÑáÙÍÆ'ÿù)ôD®5Ç©Á´:Ú…×hšiäFC¿s^š8؇¡üù§f`Ñ:¹ó D5æ?§7{p¥9Ö7ÀšsQÑiˆ¬ÓmÏ Ï /KçsëM!#y°ðzß\¶ „ŠOjã@,ö^·êò±ÿ( Üuã«™n;'kìuÓNÉïôæ x>ÛÔœ.+ ÓŽQQ¨ÔD2„•È™¿òSxt@V_¨_–¿JÉ·;®N°u‚"ŒK®öŸ½ˆÞ]ˆ­fâbÅó(Xž©Ëÿ„õij~fm ]Í« ‹#ä€ôös޶“e®G¹%¤¡ ùr<´¤lÓ;gzÊ~ïŸÄ1'?äØKÝÆÜöÊ—Å”î¨ÎïíùZh¹;%k–À‹“¦3½­œ¬;L@;ë&ð{wÐA²¶í)ï4:,=Ka |KÛ¼W‹ÎOÈõg8<”鎙>¾±±-—æ”ì>†Óû¯±œ‡QË>Ñ;-TáÇnO ±‚Ï;FT%DÄdɹÎDàA),ð"“xzÒ=ŒWLøL05â ¿Þ¥`u…á×íñ²'«•¹¨W`~$<ʆgPRì’èÂOæäÀÅ<þ”´ç`+ _à¬È€ž³1o@4»w³Iï«O{¤ÍøÞW‚sñê¯>òú5Aq¢ô|ßкèÂejDˆZÂpÒÙã?ð0T&öVÀÔ+å¤-—@gÝ“T—{ªÿÃ:êç.± m ª«-öÃbq-Û©žO3’T¼Ú,¦ß„iÈyªì]#”iãjLUí[5©ë1mÝíroo|ŸÛ懫ËMõÝ1qàÆ¬“Ü øE^È}׿ªÚ°ýÊP ‡7Ü÷í7ÉŸ(óè¦,Óçó$+ãÉrêuìL“[S¹Èg½)ôþÞãÚnŽÞIü@_ãý†ÿ‚‘àY"Q{HØ…wº¬d ¹sÈÄt¬Àn®ô?R:[„?š€ÇÐÄÊóqT3hÁÖRAÑ.P-‡·@@V48vç™D•’˸åVN–ÞËÒ:Eðvmh2 .Ï£aÈšUd>>s ñ4ªHà(eÙàs¿k-”CœlÁ½Èž5…¾Kt}‡Y<1NÕÝ)Ã9©Œ$ÄÏØÕ¾ñP:H›M<{¬cË-¤ÒéÁ]É>$’C ‡>”&Ó=Ýj7Ю;Z1¨0Bކ8ßC¹yiÂGåñN:ýW² šA;3ÔÚ‚d•l½ƒ*aéeü‚©Eïfÿ’Øj #”³½_KeZ˜NR<:¬ºMûHœ(Šr,c þhCjïkÔïH¿œÝITD·ÏGÉO,k–Þ±¦GG&Sµ *ÅŽ¿ëRwí6mäêûrƧmû c“Uôe1lÞy§7V´LwxaÕÍêŸHÃÂw³L–ëåßAËnÁø-&žhiHóòª§B¬?‡â¡ž–o¦oj+|be¤Cœ~ùÌ2eø`8^ßy¶‘©%RÁ0=.Üà—ßIããð+Ô.n5b÷ÛŸ&sv¯ªöñËúƒð¨É;¨1L yº®‚i]¥þGÅp‰aç§òGëÈØvxÇŸ­nÉk„ ›ÍÖ»²Â6]½E]ìê.°çgîIº”)±òáÐÌ~üú[¢&eSöÚîÓ€HjsLUñNÕrÇÈ ólß{WkM« å;ÈŽFÛv?_ï ]ײ°¤Ã!‡Œò-Þñç™3 aTB´ †Q—a‘Sx]õ×uÁ7ÔúªÈÓ$Êã©øu™ ÌgiJ¦k¼,òûD•œõJ÷êjéwŸ)oH,;Un©¦“Ò‹{ú åif…³ä  4™(¿}‡u*Âýé¨Nþ±§P³ ä›Èÿé[¶_xË"¶ò2â\虿aj7÷•]ŽßÖ³HqùpCÛį«¥Kw#g:p]y´Û‘@µ(˜“>!ÀWòÅ\åcÿüÝ%sŠ•UqTã›sèܾ(Iqr×Î[Ò‘†pü{ûúN1Íf”ž¾¶u=;{ˤ‡GQ%šm£uN1zjC(ŒJÒ@jÔNÖð®S:õäêóÝ Ž`,)^ïÙê[O×= ®€sê°2<˜ÿ …ñ¤½¸ãÓ£ìù+ê­S&?G‘c`ðCž•d¯÷úgÖnhü +ÀZ+"ÚdtÝÞÔí8ýNv¯~܆§^Äãù¤±=”&¯RÚ:&Ã'€¦âX?ÂeMK’®Ò¼Y×€}²„'tmýÈKáÛ"o†ˆJn§ê8¬×+GlC/¿þãG¼/œAÆMZ!Ú£ãlfâR“s%<žÁ4Ÿ ¦Ú/ Ñ6ˆ:GÐ-Ÿ:Þ×µ ²¤ä9go@ѧý|{½{S¦o¿f^ØôÂÚDZ#Ä¡GÈ‹«Äs?½÷œš53ï6¿Ür‘ÉÒ*zœr0 ëÕW6{Àrœ.ÇYªÉ¶s`†Ð"âZé®àu5o®)åÝLv2kpûê±}ü1¤Q%áXî秦KÇFXùI¦{dòkë6ƒç¾–az5áÙèçöü{)€ÃÔ›ƒ¼ïP2ÆŽ……):íÍ“+AØ6ºš¨âá5ö€%>ù³µ®[å‘ë˜M›ó‹ŽÍ«ßÍ~L/V2v_(¨Èói™;qWÅ´i—¯³¿¶É“Ÿ ô õS9¾ îp¡™Óçd¦’×Õ3&Ï‹·ÔÆA5nÔÐo?é ½"~»z'QPË® PªñNTäs9\Ü{¯-¶¯Ú¤ãnm\o«;¹Ÿ¥ðOv—ltØý½0zí¦„r‰ŠÝÕ™„Ö c* ;`®ßHÒÜ™I…Q†Ìù7·«Í)Ü6뢲W‰:¥¶U 6”á:øM=²ÒÓ»|0èò$Â[Ck.è}™ehâMš¨Öç±ÝÊeÑœzÌ ÆÒÍíoK°ë2µU@ÂGÄŒZXó€@mÙ$„ŸqчpXÒ #òÜ®(¶7˜iá†w O¹_×d3 /ޱ¾ŒZSa_î®—Sr¹%Z]Íjª;qì Ê·¡!º¯õ#Õ'pÆçFÍV‚¨Éd rßFgyP*h«tê‚„†Ã¶¨$ƒQÛøá!‹)™Þxš|‰¦m(D2‹ÖÉ©ºxa=@l3ëmÄ©¦ûÜj•Oí!ûU"Ù¶FÍ;e)!*ŒÇmz»)ÔWIý&b¸CÁC}€ØHBcX¦ýÐíœ -Éò­'ÈBŠf³šøÔfTÝúîU©E Óoò™.É#5q¨c ¨7€3cDVRIŠ€<‡•J*èƒ/jr¡*Û T Ó¹js=»` °¸ Œìšý×z¦Tn’˜[lm ¯øpãúK¿Ÿd9Õ¥¤YÑjkð7œÖIdŸ©ZžìDzŽ†Ê¾Ó ÍÍ^@¶S5W†Iz¶(¶nº+5… ù?Ý4á¬+®øåò»ªEl‡ìÊÚl_‰s™/n߸…Qó9̼§ûÈúzŸ0)×çU”.š³«wEYà;Ogw¡ 1íûo_FaÓ7¨'¡z†"áÉò”'Ž6BR J›sa Ù§;e˜eÖp!û > c·îÖ5¨È£wâ=–‰Sp<éý•£Žõ˜%?ždZ Z„å&ºŠúѯxâ_=3,;îãDÏ—>&«ÕRcxñ>iºXdi«T²Ô…EH\rÜ®» ÃEo´; ܨp¾s-RÉ=¤p6œ´Ë^¥Ä|ðx³ÔÈÆÀšÑFWό׷·5ƒRk0V½8òxŠòhÉö•Ï6Ä?ûdèù®qóÖœ}nF…”ÛÏWÏ”—‰VS­ »ˆ20TSêr©TˆP-ðûåg‹g¨sêh³óûÓÚDNzCp¥ ~3Rlgö¾É•ºƒ×4ŽšS~q¿)TáIg çÓÆôïÀ‰€Ü{lÊ2¢ÀŠÕÄÉyÙg^¸GܶRÁ@@§T­+òÏU ¤¼›E¢â‘[—öøÓªègË¡àO³*-`øžÙX¿~´B¹Cbj`÷(’ÒªÙŒW#R,ï^Æ<|Ð<Å3«*Î K.>ùDl `ª”(äÊ[—]|q*h"øøÒ +öÁNå‘ Ø‹{XÌסŒ:mÓÊʼnóáPk~Îñ̆¼vYEzûé«i/Pe’(vEÁ—XWs˜Ó%Õ+0áJÁó-¸ Ž$•™ö>:ÐgÑØŠFþŠ.{¢Sÿ³Hg}!ÔÈ+wÞ3ÊëÆO>’ôÔ¯;ÚÐîB=ã¥\yI‚00ÿý¬CèP=qÚa߯¿Î³óå¾Ú>Œð¡l0±Í ¾Ç£vqÁÌ–2Xòô†¯vV`­lvqâ ®>Å>’W˜Õñé gÄ(9ìôRØt¨G"ù8Œ˜L+³ÊͳHs~sÊ!û¼r…N‡68ÃÀ¬ž÷Mò 1þ‹ëÿÇâÿ ðES€1ÈÙÞÖdø¿‰7endstream endobj 18 0 obj << /Type /Font /Subtype /Type1 /Encoding 205 0 R /FirstChar 11 /LastChar 121 /Widths 206 0 R /BaseFont /QPSVWO+CMR9 /FontDescriptor 16 0 R >> endobj 16 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /QPSVWO+CMR9 /ItalicAngle 0 /StemV 74 /XHeight 431 /FontBBox [-39 -250 1036 750] /Flags 4 /CharSet (/ff/fi/fl/ffi/parenleft/parenright/comma/hyphen/period/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/R/S/T/U/V/W/bracketleft/bracketright/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/y) /FontFile 17 0 R >> endobj 206 0 obj [600 571 571 856 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 400 400 0 0 285 343 285 0 514 514 514 514 514 514 514 514 514 514 285 285 0 0 0 0 0 771 728 742 785 699 671 806 771 371 0 799 642 942 771 799 699 0 756 571 742 771 771 1056 0 0 0 285 0 285 0 0 0 514 571 457 571 457 314 514 571 285 314 542 285 856 571 514 571 542 402 405 400 571 542 742 0 542 ] endobj 205 0 obj << /Type /Encoding /Differences [ 0 /.notdef 11/ff/fi/fl/ffi 15/.notdef 40/parenleft/parenright 42/.notdef 44/comma/hyphen/period 47/.notdef 48/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 60/.notdef 65/A/B/C/D/E/F/G/H/I 74/.notdef 75/K/L/M/N/O/P 81/.notdef 82/R/S/T/U/V/W 88/.notdef 91/bracketleft 92/.notdef 93/bracketright 94/.notdef 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w 120/.notdef 121/y 122/.notdef] >> endobj 14 0 obj << /Length1 889 /Length2 2612 /Length3 532 /Length 3240 /Filter /FlateDecode >> stream xÚíSi<”íŽ"FY“²ä)”3–±WÖJv:*KÆÌƒcfÌ‚^û.D0²f+Š¢dÏ’}•}×Q(ë™ê¼ï{Nçã9ŸÎï<Ï—çºþ×}Ý×óÿß·ÄqS -ÞÔÇãÈ20Y˜ cd`²Pˆ„„DÑxœ.‚ ª0UU Eqä¡ ®¦ ª•‡H:x•ˆvr&’:§¿‹”-WˆF"p€‚ì º2<,`G¢A2UÐÂbóï+H€9H‰î Jƒ(4’ 8€NhDî{žK8G< ü“FQ”ÜA"‰ d„< 0"¢ð8,@Ž9cÿ"DRˆDGþqp3ú;¢AO À#Õƒ0´Ò<_½Ì®|)&m§²Xã’šÞJŽÀÁ8&샖ËnR#ÅW7 ’ø8gXfD=¶„HÞfíúKþn±‰ý;3îö3I^/„®®$Q§ 'Ö?p½,íûò»“IÏHk^µ¬ú§Æ馒º–³ûßßSkíþ´69®|U? {%4¸ø¤ÂqsîD·¼0ÅÐaî„Ûû†;}1¡lÏ¥ß-ã+(¼™liñ;6_æ{Î[Né‡Ûâ:úß8¸b‘Q¥E£2o%¿(ßÚx”"w®K¡ôdl"®O3%×{.v»¿e\Ðû)ÝW6à}_pkºàî·Úâ¾S6Õe¯›¼O¤ +šzöDŸmýº¯½nÇTbã K[ÃRMýWßÖ|ã1ŠSû¾¸5Ëê(ˆæ’+Üf«Ãî;ÄSÏ-]oV¾ýI~Ò nHˆ»¨³žÓåë:Q(¯BâІøÒ¬vM˜˜2ûÖþû8d}ÜdÙÆ„÷óV–ÏP»†×B4¿“Ÿ0ûGnCÀÒœÙ9kC©RŽ]6Ôœ,_YP:›¸äà1.²kテzûJë~ù‚&òĹ ­ÖBûMšKüèÓýIï½?‘ö~SaÎ[3[j[¿s…%I·µ((‡Ç5‘McéªØAËG±äú «-‚Zg\ÀÑ®lZöš_)ëÔR£¯ÕqÙì?×RãfGx^ ­GŠ/[¦ˆI¹l7oѪᗼ¼%™E¸ü6º¾[ɬmGúTPUäª󇼢¾dªØÍ”¿ÁXL+ óÜ‘q)¶˜¬™°¥‰´ü'¿;{Ú9v…RJ+Ó6ÔNgòÂJ­ÒM;PÇ·mµ¹L~{öømzùÑ'\Êòµ‡$‰%ëÆlÒK”q¬‰I˜kã^­Ã2ÙFO,ºkcØå^ÂC´¤Ì§¡cïº¢ÆØT÷=rî/Tï¾Ö%4úEæ0PM¶@£‹ ð6QOšÝäy¬ÔáfÃ{¤ø Õ]$Ú—þaÂÃB¨?v¶òÔ¤ú½Á´)@0²Býþh8Ý7Ÿ#á’:­h¡aï»79 »FÒ”¸·w²½®ñÖÌsîèe6[MJÔbEø¶«~þÖàõ¯WÕ—.öåùtäW•ynoIÕyÒñÆ¢o+vÜáç<*NêE{—XMp4ª·*|«œÀ·)ù¸©ùA~³÷ú 7©²âëhÂíZM@;Í8cˆÓ¶yÜìuÚH‰X•50Øe1<Ôdêe[†×Í8ßC¸Òß@¼žešnÆžš§¾Q¾Ãk}õã:yÌ~õ^·kö¬br¥Gœ©uz¾VÍgvÕ•+Ÿc#*ô¼—çX é©ÝŠV>iïó〲 «CzD¡©¬¤°Z<êլƬsÇ}-5¦jSs_OŽ÷ÊFN”vAû.ƒÓð„f3̪ø”'ïBk%—]}ò8W)S.P²¹=ïTvÔÞíè…•šƒëº“t§ÀÞåƒw˜Îg—Jzä«…Suœ{CÐÜZÈøºcůû"á™v=z½*©¸ºžO©Ùáj%Úë´V×ë±tšÝkÁ××ÜõVu'ç“æ¤€!{Ó*=°/hâ5¿Hq¶¸dÂë%ÈÄuĶ‘ÚI‹Ö˜Ê’ä9ó(kûoGòK[xh(f«éUÝ0äÓǼа¹'u’ë°rí¨»~¥¼O¨2ºäï…”#|}#Ïæ›:gÄÞƒmN%þÛG,nöúïDr>ž±ÜßßLOE°ÉZ7Æ™ïÄ)k”xhä…’%Bf>FA3 ˆO®žÄ´ù#2g8<îßk”L޹Q f”,1ãMÍ“`~¤UqIà¤9„^¼‹½ç"0™j%?/;+’P”ëbÍi[h£9}ý0Ewè3ÿ›·"µ#‘{ƵP/´–á~‚þÛå‰-IwýD/ëíªºX®W~g ÑtøœTÐj­÷ÎNdSë·Têë ÝL™ ïcZy1Ê‚îÇ;£9U´,tî݇¹Ÿ»#¦4ù×è ¶£ïX£š ­Ùb/œ§LÙ7Sù²Y9zêí‚o˜~fi±ÂЂåMAi…ókÁQ\ñ˜6f¿‘ hÐØ«ªœðÁéN?uâÊ<¦×(Á§¨)Ôç¢GyGy­¿”-ïÍóiÀx˜S«gv® I¶oÊC²µË8#§ž©a|Uhìü.]²Ê¨‹¹õ3pŒ„­IiïÙßræ1Kióί²Ós;^Åfè6¶Æ?`cVæžj.‘,»ùÅh>âíI©5¾3©#niaš ãF50™îð$¸CgÕgäòœ©ÛñÙÅ«ƒçö#²¿N!‘û*Ø‚VË¿ͼ(c?OîÉe/ÑsKœåÖ-E=~ áiý»ž& }y×®56 ÄΞ}‰Æù«IM¶tÝ?kBÝo>47S©õjÁÌxW°Ef°›GhFJ€"ð Úû)5æIÄVeµÓ;#½Lç[nl*\#‡âÚ_Èç䶦×Ú: ç˜Ã7\±Ëúø=ùtNuªAâ*½± 7•s„®Œï‹Ï娋PÂZØý\IâL‘g‹RýDÙZ Ž̽~0n°ÿÌ‹§´ä¬X¾1§‹{¯¦3•çáã쯖F %OÃéÞ¯žè´×LEgstCïQˆ¸~Î>nnwDµÙ¬ô΂d6ƒÝ†@í³±'*ÞÆO¶Ô™yih…–H²[ —½m#¡»ç­=ô™f ¹]¨à¾øZOwÅ5â]áyõ²æŒpv¯³¡S¨î‰wœžÁ…‘wukbêÏL›ûWÄŠ5ü¶%N±}¦7¸;vŽ—ñæ¦_Örƒ;,qˆ¾S –»= ù¡‹²Wý~fïÅŒØÚô¥ÓÑ5Ïâ…7á™Mi)yƒ% ÁªÚxôB¨]ÿµ%”ù‘ö¹|Ÿ~[¿êXQ“ ïaL©iTÙÝgûëœëY«Ò®u-*gMhOèèܽRÍùáÈ”ëj%¥WÈŽ‰wðó÷×?n$ˆ^ÃËЬïíl:÷ÄÊ/@²…$¹×pÄÜŽºòLΰ i5è"Ö1}ÔU›´„Ù`ÿ]8­Ê˜_ÑÏï?Ñžr,Þ¢Z() [-hÄ;ˆÜxÔžÁ—ê´ÝñßíŽ=ÓphN×(ŠöÛ‡+vò5·‹®‡SŠÇÞ7ÎG”~ZÂe¯¶é¾-7|JQ)8·t‘~~74yDí ›÷±Ãño)>òÞ°ìÌÛ»Zt›tÊÅi•éç»Õ½UÁÛÛ[i©Y—„-èÔ¾uèø@þoð?a€Ä‚"ïŠ º@þøá°¹endstream endobj 15 0 obj << /Type /Font /Subtype /Type1 /Encoding 207 0 R /FirstChar 48 /LastChar 57 /Widths 208 0 R /BaseFont /HNFYBA+CMR6 /FontDescriptor 13 0 R >> endobj 13 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /HNFYBA+CMR6 /ItalicAngle 0 /StemV 83 /XHeight 431 /FontBBox [-20 -250 1193 750] /Flags 4 /CharSet (/zero/one/two/three/four/five/six/seven/eight/nine) /FontFile 14 0 R >> endobj 208 0 obj [611 611 611 611 611 611 611 611 611 611 ] endobj 207 0 obj << /Type /Encoding /Differences [ 0 /.notdef 48/zero/one/two/three/four/five/six/seven/eight/nine 58/.notdef] >> endobj 11 0 obj << /Length1 889 /Length2 2616 /Length3 532 /Length 3239 /Filter /FlateDecode >> stream xÚíRy<”ûÛ £"²+D ³XÆž†ìÛPÙN³<ÌdÍŒ}‰8ˆ’=ëA¤Ð”ƒÊ®,‡sì;Y³UÖÒBÖwªß9ç÷öûó}ÿz?ïóüó\×}}¯ïõÜ÷-+¾ „ÂS± •ÂPB@Z€¥­€€Â!²²4à R)ç1 P @hj"”—;  H-M-U–0 zúшî opú«H@‘A‡¡–$³Õ7@I ()³"àêp@] ôß„8/ ¤0¾-.kFa7"k¢ è â #ƒTœvø•;O# ƒ óº‹8ϰé»?‹³*{ÞWË6ÏF*øÓüꙉR‡ÍâtÁÃóœóR>Ûôèš@›£ÕЫq©»óÞ—çÓý+%ÖÒ-ýæâƒ°¥cEg¥~íôŠì‚ܰo™*:[\Õ—"ÖSßeFÆ{G¢– N‡·êUÇê C?¥ø ?åkúùnzÐèÞÎ~·bž@‡½•¦lš®¶>.í¡Ñè2ÿ…ãÎçóÛÌ;ìÅÅ/§ð Í¢“¢]ŽŽ«ð- àh¬¸Të¡ËÊz½¤#ÚûÅ£Z£¥A_ä¼Û&ÿ9Ý“Õw„ΘVè¿™íuø£ÿ¶t·½¯R!Æ›—Æ^Ö¼¶6Îf }È3íyPí4¡{oÙ†XÓ­kRúΉv¿‰%·ä}°RL«J9ûåçèžc%}6Ò‘Ù†fÇá_Ÿ¢h‰.:@^ꇕ•F!›”+ U½}E²§Š70ð°å‰Ârªù&¹š0b8»fQf¸;6²½´Ía=ïyþ½ $üh-Ù›Þœ¯à¹¼¥‚Èù^k×n‘þéz˜PdbEûi[¯ÆºÐ;öm‡9œƒ_jïKXN:¤žs1ÚlÅS¾_ͽ^ó^Nëîù’àç q’\þœÙžÙpƒð¶Ÿï›¬qZ3Óý úôýDkQÖÞ BÐãr£q ›rÝ®Æ'7ÅN]‹É5ÊÙô© ä©\„‡ Ìä¿­íBeî¿/­Uêøq:±¨s¤ ¶%óRRßähýe[5ŽžŒÌ‰!K’š„ì³ÞÞuøHUoìÒl?~–£/1óx9)(-#ƒÚ5Bï-ìÚRÔè² ›àˆZ­ׂæ”÷ع,?…Ž’Æ5Ž…Y}ò¹?–´W܇“á4œàÔ-ÙŒMåOç—Ïñ¾/ܾ ûSzš }¹{nÑíÈKÝ¡ð]┾«ýˆçÌŽÝœJSïH¢‚skç¥AT÷œj/’Ï?"”YÍf’lb]»Ãw6Vh¬¼`Š÷,ì‘]_‰ÒN.xï×ûw{¾^FÛ¡ðÝi~î8¹ÅÝäI¬”† 9›Ïµq:>·¾¿!ZògüVcZ|EÌ }'óBˆ ³&gÙ"”L{k§…1le„;¦c(.r‚bÝäx.Xhh<¥ܾ’Èœ’×É8gØ(|ñáÌ›(dVÆtžŠ²òìÝ×l b‡ž)Owaö­­6•fuB«G8(ÕÈ×Í/{ÌZû±æÕ4êñ’‡ûrËÙ¥œ*ñô‰›²!;:CSÊ &뇔TnÖP2TgQóa7Ü´™ Ω#ê(WØ©ÏE>†ðmÎjÝÏÂjdëzÙ3 ƒÅaâØlZOù©0´òq“$‡Ã˜’‡ri¢›õÌê”#$o=M\[o ¥-’û'/n~vºé¯hcƨs(çò}:¦ÑR™,+ÙTððæ ¿ X.vÃ/‰Jc¯/÷å*òßêçmõ€Èì«´òqXH{¤Åa-ýövýÝaéQóUw‹ú>©A~iÑp‹Óí:Ë•…ÕЊ®‰fu´Tn;yífì‘ÓÐö!Nš| ™Zζds¤cÕR¯‹ÅÕÎòär»aäÅè]ŽhŽO¡šB­)š.E£gëâµÔ;9¹?¹l¨SΦ}ck&C¹³¾¿ÞôöhêE®âêÆá[¹‹é¨Òʺ¹—±‡„C)O<˜÷¥KçæÍBˆ¿õ '—¶ð8Bœ÷U ˆ—gÞï­fëliè¯U5šFØöµІ!w–£õjyÙØÎø»ÒÌéáˆf¥x‚ÍÆjä†çýžtÉeA]‰þÜíF¾#3¦¼!ãÆ’ÃrÌߤ–þùÌë”b?½‚è¿ÝÚFa <Ä®BTeî8tqº¬š“N¦wœ=IᘎkS`ž_é=È«¹çiíîi—£óvaCÑà„±׆ƒ‡ô&›Gïƒ8£Õµg'iìÖ^Î<&«Z_Dš†¶Eu·ß)ÑËêÎUOâæ¥rü©ÍÍ=IÔ ŸÇû\ä.í³¸qêÝüí í†æ&ÿé ÅÞ[Wƒ…œ{+¦¢xLÞ˜étÂKއ†Y—õ€vS›M‡ôè6¨±gq V_Ø×§.T‚ç´S*ŠœZ¸½'¨…ã¯ÛXtmü²çºBFŒ=ad µë¸¯0ßE^+b+Ò܇YíÚn2ž¶E©ÊC{Vä*ô[ë³õÞ?·÷þHz‡ ”Žn‘¬¾=0±«^„.üµØDoÞ,àU›úùcOÛf»sÛ‚” \¥WòÏè`c›å‚:}d6 ±—4Ø.TˆDdb?5Õx6>á5bؽVŒ8¦>O5–³«žLæ ÛXðdñ©ÆÜT’K²ùí`7m#Uü§«¸É ‘*¾®Õkg¹Êé¦þ>9û5eçòä_p’c‡Ÿ´à#Øët˜ê)"/‚sQE=ñ^ë)h¢Od›kôÅòð”5§d{\‚ª-½ÝåªQжûeìã`ÜT¯QhY«+²3_ÔèË–œy{ÆÜ&z^èñŒ¯¥ŸôÄÂbkõøñÓô¨Î–¦’ÔFX™bLòÛÆ=ò¹,GþÔ‡få a™Ze†Ò¹XŽ&Y´ºÖÒÑÇùë¯.­–ªº7x–¶ÃøÃQÖ@_¼Ì¦­bgý`æJeÔ®¶}Ã=_©µõ&¿ËãŽef¼y$;cýf›u}uá]ÝcªŒ§ílY*SüâÝõ-\cUIh¦RÌ—ª¼VÍíw§ô®Û9ß;¡ õ½ !Í3*é oTٸŘ³¼*'Ó¦g”*—Ÿý~ ߯Öõ¼ö| üø@þßàÿ„Žbh *Có€üuÛ˜÷endstream endobj 12 0 obj << /Type /Font /Subtype /Type1 /Encoding 209 0 R /FirstChar 48 /LastChar 57 /Widths 210 0 R /BaseFont /UPPGON+CMR8 /FontDescriptor 10 0 R >> endobj 10 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /UPPGON+CMR8 /ItalicAngle 0 /StemV 76 /XHeight 431 /FontBBox [-36 -250 1070 750] /Flags 4 /CharSet (/zero/one/two/three/four/five/six/seven/eight/nine) /FontFile 11 0 R >> endobj 210 0 obj [531 531 531 531 531 531 531 531 531 531 ] endobj 209 0 obj << /Type /Encoding /Differences [ 0 /.notdef 48/zero/one/two/three/four/five/six/seven/eight/nine 58/.notdef] >> endobj 8 0 obj << /Length1 2113 /Length2 15483 /Length3 532 /Length 16621 /Filter /FlateDecode >> stream xÚíµct¥Íº°'Û§cÛ¶=cÛ¶m«£ŽmÛêØêØüò®½÷ê^ûûyί3ÎÌóºë®ª«îª§RB9E~#ckGZNAizZzzhRRA{c}G€µ¾£1'#ˆ±ÁçÏ/' 3' #4) ­›=ÀÔÌ‘€\âŸ$6~+c{€¡¾5´¾£™±Õç†ú–Š6†cG7Z~KK…z8(;Û;ÑB30  ŒMÖÐtÿ(‰[›Ø°ýWØÈÉöšœí>¥Èÿ¥IAð)idcméF`dlM'có9›ñ§ËÿZÿ{p'KK}«†ÿ§PÿW³¾ÀÒí¿l¬lí ¤mŒŒí­ÿwªªñ¹Iœ¬þw«¸£¾%ÀßÚÔÒ˜€þ¿B€«±‘ÀÑÐŒÀDßÒÁø_qck£ÿ-ñY¹)Ð)I++«Pýמþ«MN`í¨äfûïQÿIþ3üáÏêØ\ 4é?ËËð™øù÷?¿´ÿ×\ÂÖ†6FkSFV}{{}7èÏÓóI, k#cWc×Oa:ZkÇÏ.Ÿ5ñ"0±±‡þgCY è¦VúÿDÿø<t&&ñé-ÿLÌtvN6ŽÆF–ÿµÓÿna! ³v²2øç ˜Zÿ süw‡ÿÌf¦' ³Õ·7¶¶46ù+ÊðßÑÿ•ü©¤ïðÏ‚Ÿb¶–NŸj†6V­ŒùSÉÌÍÖÌøóçòm?²1úb# s°Ôw0ûa' s7¶·ùø\„µñ¿™åÓÞÑåO;˧·£™½ñ_ÿTÑÆÉþOàŸ:œÿÊøÔuøÜò󧬃±ó_®ŸÛKgü…`ùTµü-ÂþÏš-mþêÄñÏ0V€ÿŒ²~ Û9éÿÙHÖÏéøÿÐçTèsÁ?ô9‡Ðú_øßÄöY‘?ô9‹èú,Øú\¿øú\¼Äút‘üCŸ.RèÓEú}ºÈü¡OÙû§‹Üút‘ÿCŸ. èÓEñ}º(ý¡Oå?ôé¢ò‡>]TÿЧ‹ÚútQÿ7q|ºhü¡O{}C cÇÿ8îŸWû¿¦ÿl`úw‡ÿ|8Xÿ»Ç¦zý9õŸ^èÓËðÏÓLÿ)fôþs.þÂNí_ø©aú~Êì/ü¬Ô_÷ý§šù_øédñ~JYþ…ŸVVÝ@ŸVÖá§•Í_øieû~ZÙý…ŸVöá?Ñ_øiåø~Z9ý…ŸVÎá§•Ë_á§•ë_øiåö~Z¹ÿ…ŸVŸï…ÿ¹Fþï׃€€« ãçUñùþŸàøÜN¯ÿÈ4t²ÿ¼ýÿõöýíØðùN26v56„^Y´1ä 4Oi .ó.˜.§0mŠ•©ëžë€ X޶,•´£Ü¨U{þ‘Ž~€ïòŠãÞî)?!rág›ºð~à¬wîÞ‚£v.í¶oKôpŠÐÖ8{Æ,;»1V–¨^8u9xš'G.¤t¹FÔ£é\ß“Èʦ&’n©Tû‰P1Õ®,”9dÝ11l}*ÀÛ<ª™êוM»òsTn»ÖZ ’a;>“Â%ªÂ!ïYia"ÂÏþáj¾ SE{‚ðU(…Qñ€l”àê,wlaÝ™KŒIáPiç ê–v5Â7RVíâ=]&*]³B‰å"úËø›Š(²'(÷÷‚äGŽüvÉ’-ß O„Ǽ(- pB¸sb ôŒiTwøÛLel~oh"[ÞÄ'¾tm~¯bhÀ.⳹ū·¬ Bÿ ·Üq6Fvž”d¿†=^4ßÕ¶@´mõ––†Ü\¤’ô무ˆg)ò:’ ˜áŸBI¥b#¾·ßž_¢kà$‹ Ž8o^64e0¬‡¾r°ý÷ƒ*ˆRxax g烶zÊt’ŠvžšQw®÷RÙßã/VJšµíýÙ:è)F?²•-ç/¡H.T3Ì€×8«axýÉíY£¹×!FY«c Ezp‡ìÇI踩%—€£ìòJ¤Ò%½«Þn9!ŠV/¤ž7W™!!¤²%ïû]A ŠËoj:1æÓ¿48i°7¦Pþè!†ÈÅN{}Ä̲kþu›$×z’yÕâÙ54ž,=žØhkìuL\¬þ+DÕZ Âüp÷©Ï 3´60VXv«íüh/rÂ0ôæ•ëby¥XÓNÑ TåÛÏhÀãP^€•¡}·ÅoA2[ŠýV5ú–£p>’>FóÙYÑQŽúPÒJ“´‡Œ)‰3!¾.«r ´7F–d-C#7BÿˆkÀÙ°;nd¥¥kã¾~ dJ'ÕÐ(c½"ŸÓ½8αúÌ=‹i¶‚¬°‹$åãûÍò ]DÚ;‚C«PÝZ1Š(^ÁPÞ„ëwžz|‰É}§ s)!“eø‡¡`‹JˆýM0æÙAÈWÿ‘ÓùåXÚ=hgŒ¾ªŠ1ÏI †,—r«¦-RùؘØj~§Æ®Z7™*°%ÂâÙE’ÀÇÄ êãf¯Ìs°ÍGª+q†ºQÐr][ß¶‰ë~²~hyuœgÒßK“ðÕ´ƒÎ]@êMhëJ­ô®¹Q LkðQAO«¯ºË·ߔȶ®š0™g»¬ŸûÆûáxnjw Pù[…÷§»–ùMé«%÷^ð’•·Xeádñú}Á&;V_ðá·J‰ˆÕNºïi-ž¹sDBtêGÒ3âPÝÅ‹ÍäTLAa7™“̨ܺïQËÏÍVœ/&ÆÀ=áHó ñÄ',¹õö3Oƒ1>íNþ –u Àú e®ß§£Œ¼ÃZe^C€ÛÇ̵@N6KôÏÿ#Њ,©Öyò! ½¨iBu¨DЩ¼5àQ¼+žÙÌ1Nö”?idň)pùE³¡{ÁŸwÓÙþ2WÚ€˜»é#5¨L5ƒ¸£‚ü“C`)À¦`nØØäk­óùeXxœ…,äòp–@Á(3¼"Î5OºûµqŶšvL„Æ–ÍePÏ8¡²ß«—ÙŒaÒ ³[]G°¹ä ÊFƒ¤4Ðo&”—™ƒ ApÓª–ëjlÞ0"“É ´[®±€_€“¥ÙEz^ã-)þ¯¿_Ñô ¾¤÷"·®_§á'ûq¾ô [ÝqB»˜?,b[ihTà@¿ýRˆ'ÌS9_²æÁ›Æ·^¹ÇQEZ°Í¾+˜œW!¯À ß<ŽdOqÙ’ðæóæ²Å±±>œ~í°”v€AŒ™í¬y±BôÔh>íS“E)„W†ªÌÒ€Bîšüª]­û=X^A¨~zÉýÉ:¡iŒàC2ÊPŠZ%ewK dgÂ#½™5;«u*ŒóÕ£ t¤f_|âR˜Þ6jšÉtY]ù§ºð•·’'nbÔ‰»ê5-åWe+KåÅ6™ãøÌÌsÿþÅ<+Û÷—vcù6ê¬ÀÝêŽö}/¦ûäO¬s½Ö»Ã¾´;Ø¡½!»[θÙmì Úƒ‘¿¡”QÏGòG˜’Ÿ‘&`èb…Õ¼åg½ÁãIJÃê àUXV¼9§¦Ö%©èûKÏW¥{F==J²æ(ìˆØs{Á|ë^…~Ä XÍCåmë äçGÝ•òL“FÊÒ 24=çmkcDÓÚgá'·ÆŸ½ ²ë}(Ñ£sñD •¾¸(ÒcbV['ò6¼y7+1­)¡U©M þþLê­Ø¡‹ÜdL-Ü?€€zå­^¢Öá |^š7¢Ôl"Ðìã'›`ì")â1|ì.†7£ó©Ò!f}wCnûL0lè .Fl¯N`—¥Ï‰Î¹È{Äm…Û¸êW<Œ>$ªD›˜@o!T²vÌÜì²ÎbÃ>íŪ<†šqðÛÈ:ðWV#¦úý«pû6ωÚLvŸD•u 1Ï ³%ûh¢3æon‘>ƒï1åã–OT18|®¹ó¾1òEí)÷ ñwð6ͱïùè‡+_Šûùú=¦ Sþ"öV‹óÎeo.+í9é>GQŸâàþÒorbCž£bΔ‡˜¤ÎÁ¡¸u¤úx[ÊMíd¡ˆ“Ã*ï:kEét¼Í¼~Â"¹ÂÏ„&ëW.*† š±ÀPd'QP8vÍ“êVºÖˆº%ÚÌ.jIÌYá7}3 é˜;;™ùˆ=tsO£-¾s²™<§+–}ˆ•DyQT庚f0õÒPf¸ÌSïºq`ï„7ãBìÏ =`M¼¼Órbcœ„ÙÿqÜ$ÑPcOv­¯õšOœÄ@·ö!´/ž´…dÃ~ÃÜæQÚá)–à$–"å,áý…pÓgTÙÚ×ÝC¥K/h²Ý!’“Ξ ãúåÑ6¼O€F€4êuVDÊòÞ[E¥ñD8ÝAžÊ»šÄ$$NOèb‚˯ëIMYíhïqñ{ Ž\Áïk÷«\Çû|nŠË¡š±Êe±·ÑAm^Rà¥8ºûMJwC ¢lÀPýqÿQ$D[݉ž–PИºÃ©è4V¨¾_GØä‰Ì÷î­^"±ýx3M‡øŒ{Jɸ¡MÛNžK¬›N6qÊZcjÐ&ÌP¦N“ú}1lj0v=Ã<÷ºa޹SÙ+6~‘x*a8KQ:½9rCö:¯_5&X®|Idw<ëßœÁ(¼«ñ¹ ˆJ,¼e|\×´t9ç:ä¥ebnµÐלwŽíi†ñÔè„ {ñ1/CõP^J“ɽž„Ìú í)ÖhPéà„È(+–º 4‘"Ý2ù~7yŠ_nå«Aøk[Þ©4Wªnž á%”¨û?¤CËžJÈ¢ FôH­ÞÎmjlÞ…xÚ‚žø¹$SsIìåÕTw[©ýðÿŽr Ûìúõ—úP°æ‚r½²¥ KÏ.´ °W(%ÓÑŽ”Ak™þø„–cy5’SZLQÈ.ÛTq„Ñ€`ßF¬ê6Uè<-ßDéóêéÄ |åfMÃxA;ñƒòŒÆ×$2ÒÈd°ˆÜR~§Õä´ÊcZ† Õ[Þß¾=q³qÍÓ×—-»|#Ã!éÑYtP.ÿÙ_‡êvå©nSÙˆkÓ›Ùã?¤CòÔz)¸`T–}É|m~Ó«¾¶/£ÜL/ÅÇ_¬^äQÅKÜÐ.5ÇÃLJ’;ùU¨qc“‘˜ÒoP_¢“÷ü*Gþ^ÎR/DͰ ¢×[ý„˜Yx*xôšh5ÚÄ™ðu„ñPŒl\DÙêÆ:ʯ“>±k2n§\,´ÈÁXL7¨‚-°’}ô\¯È2FÐã”ð‰ŽÛjŒjŠÍZ?Ê­¯üˆo³e:v¡ãîUðì×,Qk±<Â’P€ Ú.Âw¨*[½ïÉêjò{åù9¾µ“ùäkÔ>œJ zøµù–2½¿%ÄsŒ—³j¢Ôl”r@VOÑ‹Óý mŠü–Í}ݰÙKzb³ˆ?Xû˜|ê*îw0¢käɇ¼Ýˆ&ÑÓÌo¹`!€Û Ý¡yÞ<¯ÖJhÁ$Rð¥\4á¶ó&RÞVwZYBLÇiâYa>sضÇSVÅŒwË´#ïFª™c¶Mƒ6Úß9*EÄÇ)÷š^û©a £ê–µ¼9.WùË{ö&m¹«3³%?¶@èÇ¢½a)İäÐ~æÉ`Û` ´°R†ÔC¯J–û¼ÌH­‹Hã‚›æÄò3ý°¢HîÝÅ&eÖŶ:«jœÊªbqÕp6ª§áúÁÎ*jcð\@-›[ôoüš9ª·­¥àvN®"éb[,ìú¾÷ ¥} S0ò,t„(¡8âop‹^î´D†ˆ·1ˆÊ3Æ|½Î¹b$›‹W‘%m ˜€§@ÖFÈØb—ØÍ,’¦A#W:ŒñMÊc†ÁäÝ«Oãssh€àÂqÚß`ç«S¥>JG FrŒVt@[£ xVÕ“læTï$A»²ŸÕ«+®T¥ÇL¯`uCL«ÁsɇÂï-—HäŒtaÆv¦_QPIZ89sß“¢È3MÀÔ¦î«Ë ƒÇýR»ƒ=<åX#pËýŒÂoCwÀ0m7âV»z¢Éév ƒOãÎk/#I´ Ìu?Ò´Èe)ìlŸ CI-zìrwóôëîŠÛ;ت» ÃQ°í€‘R1‹›>Ü1Zw[%0±¤"<@Í@²Aw‰¨¨äÃgzŽ-ìÝùFö¹¦òù ©ïÁÏ9àK:Ü¢@™X¬.ÒšàØ‚[æô»òD Ô,Ï{›\_’¨ËѧÚQ)óŽK¼'µ&¬›`†©7sêïL÷órÔK޲ßTó]º)”ºï §GôJYø‰4°²Öš ž&׳&\ ˜GÒk :‡Ô,q2«©³¹óÊ,* 9,xÉ‹‡‰`:'=$º.z(øÛœ€ ‘ò²Ìê«)™Ÿ´Y+i24¨; ôLBµí+‡º¡À†®â8gÐÙø‘̓Õbî Ï-\XÀ6a"œ½IÕrIC?¾çÖ¼(à{M¨_«À³¼bbÐ]‘þâpxGÎ8EÚC ±iŸ0ÑjLdðSŸ:ñ!ßep†Ä®‘Þ‹·¨*‘ãì°’‡6hÇXµœÓh€ð¿ÎÀ3RD«âN¥æ…ó¹ÊŒmÙŽ0(ºE夛ëýîl \ep®¡™YøDµc>ÿ¼õZ9ʚל»PMã~Zü}•ÖD€ÒXGåVγ>"¨i9ìÁiì&±ÚÙAs?ÚF‚2Ò#ãK!¢Àþ€À‹DìÁÚžB/¾ß~f¡:¢|‘-6/J6Î6¿¯™îˆYDZý¼éËbSù#ÔõXÝOôY§÷%µAwMw']¨1Œ¤Ö‘U€ÞµŠüžh§ ˜”b"brr‚~9'OêK·ã÷˜Íkåüt6Ý 4 É¢Ûº›¼ÊúûKç¡V&»³l úìAQ쮹ðL„BÏͰ½Q½ý¶£¼þ²†UEË{P?ÓìÆBÊvß:D_nÅL†_RÂBn°{V1J¿^Öȧ¶¸›Þí|ßéUÈu‡’à×gÃÈ£•á3ë‰!AÉ ýÆD,Û¬¦¤“ËAlý¨°ëŒò“Cdš ‘Ç—Q·ËÌ[§Z`ôÂ6³Ïç‚Â>ŠÊýb3(´f°™f¶Fª\í@®¶‡/bFqXe/¡ëB³ßJ[©îD]*èØ–’֮Ǝ,©AúF·ÆÜgR¾%–—“ 7üþ ìá",ž@ú®P÷~¤×äW F*MÇŠÚÊêb©*CŸ×Ú -°ÄeÉt<×a4L'«ò† ÛøºÓ]e{ÕT¢¯ÇôÀªZŸ0µ~â3mþK­Çrôº@:àõçéB!<…jûêµ®ØvƒºZ°­Ýs™Î” šUÊôö>^JÉ9œfˆŸ H4º[³Zn×ëg.oPŸP~ïÆs2÷„Öžkô›0+0A<ö¹YÎ7Ûˆ7ñuÎñyRüô‘E >j/»]šaéý|GL'f~°;c¹€ë3 ²ÂœËíSÕ¨“âƒ;pŽLDgݸÝï³2ηI·KnËž+?Ëõ½*Šæ~œm,xXp¦yQô‚ã[^U¾|WG9ˆžÞoöB˜æÖSÿHɓƜ•ÔPå «ü~‹¿šÌSš]PfiŒÚ¢\@Öcc(Läß×:™ÅѺ€Z×"Ž 9ÑÍeCû.[™Üeæì·óÌŽ:Weïn­‡yüòmÜì…÷v—@š—;D0KöíÊ'‡s§(W¿”&Ö¢›TlU,N>wy!&N9ñvÞf ­>D&yÙÊj#½à½£;èœu|¬LUÁ¯ŒâÔš‚?˜€í‹EàVCP`¦ðý“L}ŸÍ.‹Tç*ÚôP,þ`ƒ qØ}hM¢sïx%,…ê†Ê„7Ê)ž"€6Ö º ŸdˆÈ\bôU!‚VVdÀâ~Á/pÄÃŒò7+­UžÅ_ŽL㬨D«'76º§™¹*Êo -àƒ±¶‚P4•Š[Ž<‡Zç‡ÅÁ´\$¿„Ü4ŸîeÆK!u‡ nêY”zX#y¤}{!oçÊ·¾‚.1çä–j„VqÙƒÃø#î‘ÉwvåÛîêÚ ô­f ܶœu·= Dêz+2'ãE1ÞîEQ•mp']9 Í2‡›žfrP—D³ {XVuÀ™¦y±|Õ[•-}]É qH£æì•‚›÷™Úgƒr$>ÿ-.*Å>Ée]‹çï}±ÉÇ5¡óBI¥—zreúkSÿM®Bç|о6!^ÅVA’lp ŽEÿÞD¨I5` GLa¬ÝùWYuÈc‚¶z1·øÔ_«ÝI}ÝFcŒ’z¾*â/›¶L!ñì«WÛxšîpEêQ¢†‚xײ{€³Ž¸ÐÛ3ª©ðˆÂÏJýld Ð!ò³7wÈ’/,Ïþ\•DIx~þ“ÚÏX¨|­ k®Mšdj=4l'\°j ÔQº(H\Z+m§S'ÜK%FÛÉZòˆ³³BOmk±$ЂÞô1ô2UdMWÈÝMW4„Ú9Aµè€’ÚêÁ©².'Ã2‘k A+ Þº‰ÚKVÙp*q;á ]zkß5ñ´@Në¼}̃?HìžåŒq{Á̦p9‡ñP£ÄvñË›a=B$y˜*Ä[ ·ÛZ‘EmZµäè9K‘¬¢& GDXÀþË´CÁãMùž•UtªúâƒmNH[V[§ØåÌKô0v`›w+¶°ì “Dw–±Nì²`p œ=b@ê#ÉÏAiåäDku¡Ëü¶ÝóßÁ³³ž¿‡¿HLŸô?) ÿ:‘´ê467£´X™o…¼Ü)¼ôLÍùBˆ-#ÿ;ŠÖJ…g…•VÉÎ8LËú<”ë£6£õ BÔ²À4–Ta8¦øô'ùr(Þð‘ïûQ锬ŽÚ7‰€B%½\ã8FlRaÍ õ]2!ø¸5iS,ÃÚ*¯ÁwÉ¥š&73¡ÒmUKøcÕiX¤«|¨õ²³¨_1<4‹œ$M©!LîÕ§¿ŽÜŠMPŠMu·¢¸ß ®V¹{Ûà ÄS*ÚéÒ¼röÌDºç¡dŸ$3Ñ€{ºÌ×FĪe Oy®ŽMÌÏ4^^ïDëåzù73éanv`›|ôüS4¬e\‘a¨I]‚>ï{ýwü¼ w*cÀ^+¶—߯÷S!¶–Щ%~?ôZ\Œâ sôUߤäÞ[Û)/mž”êU¬Õ.©½YHpß™ùº…:H NK_Ñ£(:†òúèúMÅ>¶ev™–À UùFŽÊØÕÙ{#ˆu]h#³oWßÑžá²ÝvÂAˆ2•ÊÑÍ(Ý8j'¸†¿¤Œ)~®€z¹%ê“4²„”iÂËæõ¸×c‡‘:ðƒýã.ž[O>å‰ûÙôV*õ)dÕ¨=%3¤’w„ çB=â¢bж±ÀŸï†G™rðxͦ¼îÂúUÙËçêgfTo“f™ÓMÏŽç§}Ë~ÂkËè×ð[mÁ{gn8-+u¼)h¯d?570A«ë^±ùÂäcŒYv›•¾hy¶ò*êš¶³ïè8éÖz]èûÑ6¨X÷ xt9™=Œª½ ØGë}×¢ÕSÍg  3Y,|‹ÐHÚÜ[aƒF4“’Pû¶³ÜÔª*¨ÖÍ„ue¤œ{Øàpì`¾KÓ._Ö‚ÏRgVï¤o]õ†G 6.ÂÝ×{ç/R•Ñ»ø?þwJ1P¿ A_¥ÅF ·:`L³´Ì^¨IíõÑé½A{óí‚:šj;B|ƒwr¸¦bûê1çm¨öf86z–aí„Öà+›êìt=N4y1g}*$[Ÿ]­(|„e ¡fò ;q‘tG[j“—¶]¤zb8=„Ë<ÓO2n§¨m›ë¾¡í$K(òm˜’îÎØ~`ÿr 3‰xT\`LþÆ(vo¶Ý§ïÀÙ¸“²¡º]+† T ¯ÀNLÒÚƒäåÂÅ`â‹W !­ 8C@à/³1«ô®;u¼‘ÿ0¸Î︹õIŠ$18ú©^›e£ EÈÀÁz)Ï·|G ŸFçyíÒάÖÁ4.د׶Á½ç~8ï“ÌóiSã@æÉˆýÂr¡¨eb9S†Ç‹slïòRÚ.X­ØÔ§Hä7h¬ö%Ø‚×çHK¥0_—É'ŸuV3Š­˜2N¬sBr‚»k©‰`©*»sÜ>ìÑ óß•N&m a¦Ðo¿)…Bdl/¹4LìDIJ]™WÖ´l³ño¤`c:¼ùªøE8ÜcçªÞBç“‘5°É–'^çqu™¨~`炽"v C¿û{[ÏŠ~¶6Ò3ÛïÄÂNx£]qØi°·ˆ2æëÂÈoÈùh¿˜r²S“Õá9` ½ÁSÌ‚DÉãb€ìþÔ©²ŠêÅ¥8â4§PaáZÜy%ÀX5·VB<3‡s¡T)-X†£ŸÖðæ§œYÉ0ìQ#gÿ„ÞÛ õ•UÙCNËbÊÊÿ"·ùÃÉíCøÛBì™xÃj '’DüÀÆXÎ$0pô ¤dêûz쾩r ¬éJJšjrö+$.£Üú¶þêýQ/õŽÀŠú{Z¼ÎÁ¤¨kè¼oW˜¨Ãg»í]É$ŒK‡ÉA9éœ0¸âižà1²>ž#˪—­œZ;g·(‚üL:q²ñæÖìf2;²¨ßº*Ñí½22Dö€à~´l{`9°8ù8˜õéõ‘.µ7ÎuÏqô6eNóB§G*C™3$6ä£HÑcΉ¶ÏÍQß«ëvÌ%wEíbüf™Ã·B¢u!›riUõi”ë ªÊoÐñgðó“·6å+ðÁ3kMžÀ—+¤€Ö˜)‹ÊÔÐR*+©ÐŒt¿w»”²ÂäÓ™_r¸Ÿ¹+ÝXªàҧe}çÒú“-?÷ÃÒWQ$6ÞuªzošÚû‚§i©{rd„:¼žk“]Y\&ŽÕŽ˜»‹›·!Æ ‡`‹ߦ±ò{»cž™–·áÓdñ€‡»“âºSB„vɈ4òéYêg'ªUÍûò eOó@0„òHš2¹º»'0U.‰¼ý.æòÇi Ø^—.ªC‰„ kï G’†["WA÷HéÇ.WÁ‘¨Ù­ŽÜMðÓé…^«f*Û]¥Òl­7Þ´r¿ÏÞ´‰˜5÷x ŠQG¸4w§ÀnÙ”jár¬Û¾nÑÖÖ×ÊÙUÉ™0gFóQdáh¶aÍ:| cΧHæ§øµÞ÷ËŠ÷mmˆÁˆ<ç*r¯ûò¼(·¹¥k¸ˆí vî£8¼¤€#^Z NНùCÇ1ÃÙ ‚Þ ùäM‡K˜Iô÷Â2 þ,Í}ôƒv ëoÒ[ž–àÈÇ=öTòb}ž*Þ°?k®OëËg,7&˜†~æó]ËŒ.“"ÑjÒ“ðnÉÉë¹—‹Íøö“9¯9 E^?Ä÷Jv§½˜êcŽQ2?Ö¬š¢ü®Òª‡Dü¹ôõDÍÑŠ›«CÛLИZÊPÇ> Øhv øÐ಑¨Á»>øáyyØ! ßïßaKGŽ«10y,óñ²Û«/ÎÄpТý5£vStXèâ²É%OÜ#G ¡¥/=4e¯Þå¬-.‚ [é-¯ñšk€þ¡¯Ü–C–þ‚ð„ÿTM^÷«à$E¦ õd¸ n¬Ý#>1HOÈO]Ÿ¦k÷DEÑ÷ëþœ 2)!žK3M³oVSø5¯&]`3·Òš"Ø<‚B,%ìðŠ긙‡C»À¥g» ƸŘ: §wöÙmm;èŸL7‡Pí“üäh¦O!¼ýµYÂl ¹XK.e„4“Àxftoƃ5³lgq1FœÓ!ý£^,¹Å§½Éš@úM²0‹{SË«µZ'ì0iaͨE’V5M ƒpS¾æ~ µÂvyôZêÔc©_ÑZ*´“;Q¼•²í­©×÷À® JI >S*°Ñ.à0C« ´Í÷ßòRîtó2ÔÑøùñÏ/{[„rÖ"±s'¦a¡‘-qwt]?0¨ÉÙmˆ‹®¸Á-ÒéhGú«ÈãÚzÙ¾¸Þz5¼al®id4ž>ƒ‘¶}{¸TN +Üúz™ä9;W | ©z;ß2¦h~Ô SÑo=Ž¢,]É8…<ÖIã^]p‡–TA„ñªd4l†O—ê¿÷X©‰<µ¬b(a\D¤Ÿk´FF*Ý'¯ ž0¿Ð³—•bó º:*|© Yó߯µh[…²éÏ:ÕMñèçÓiõ\‘@[{âÜF9â ø¦ˆqoêkôm+¶Æ©ˆA‚Tª§ëˆÅ€šˆ›\Hxa›©*%üCR¸(}¯¤E3ÖV VJ)@ذ¿NŸnV—HnнÒAá¤zfsj£* 9á Ø’Î:wÚì6ŽX>»¨{š)F¹Ú^»›|ù\X5<}y÷âµêùš¹'ŠÂ{J?DËu)º4¡6tߨóÙø«Þ7ÑŠÁˆTón ªã,4±bhKEyË Æ*&°&@ÀK" dâŸXmkÓ5–tyI[BxšÐÙµËfóS¾‚(b<•xÖø–šgýÀ f'(üƒ5-*ßÔ‹ÁÈÖ—ô5·±ç@çÓv˜lˆd0a‰SbâÑ5ÖèyIT3¡ëXVs˜f‰XŽDER­dÂCým‡·ü Az±&®è/tÈ ÌöáŽC kYŒ¸0]Ó›ùòj[pª\DûîXš›[Â#†¶8Ja3RDHûa«ùK>ìÃ8Þ%qÖ£è {!U\ü&çqØý,8°Ëñ oÜ_e¥aJ¼PòIhV°vµ/Íø“iÂ9¥cæªXÒD+4”†¬¬õm¾Ñ¶ª¹g–>4,¼å·@JˆÇóçƒÌCRzÕ´šÐ0DL?>Ìòhà ÿŠlw‰fŽÃÕ‚IX˜DÌA¼1M?rTZE˜é÷,c”±ÿv®´}Ì#ù_U["û;u­ºûu¿w=²Æ&Õ\4~¼ÇîÑ+‚ˆ dŒè±Ã´BàþÖ"*×ͱ0™Í£÷ð$GðÞeeœ TÃшˆ¯ÁYª/.'0íÞVyç Œ(nÇiÛï%G^9NÐQª¡¡ *ÃôGÓ€§² «—$ädQ͆Jà=)î%X6™ ­"×}ˆù”×™/³ÝÓŽc˜¯nüçò·êÏ4Z¢ZWq>ì” ‘{h¼˜>Q‹åœ»)2••0z ¸>À–óGØ;wW%þmnw3ÀcAQRùzEcr\ÕRA@BÙ5@Mü‚ßdÿùzØÏQß^œ v#®±‚‘¤8Ä1Õ©zý¹³*ËOÏÃ{rÛíóáQ´¯öW™ÌS¤ðVäϨp9å’Mœ­]ÔAΣ–¹ØI{í#Ë-ª_UÿnnOo,ÃíÍéY‚Œ$ í5W? ^­k²¹¸:2!x. ã!æÜ,85%Ìãu®ræËgorEØD ÕÉùq×îÏ$i¯£™Éw{ WHY=éW}C!¼50-ûà8}µ¦²rHf©Ï¡À* ÁÛÜýKÿgÃ×ZÄùŒe¢ó¼›tM“ô€\`P™£¨ˆCÅëvã A'¾z‚:PlÓhÌ1q"×òô¦´¼þ/ñàUYT,žeê¹Ë@Ü]„ìmgÎσ»}¡0Ž•7Œ³…> C[×U=8G?ͯ034‰$ÈG«.¨ „?´zSæZÌ‹ÓT7'²¦ç·TDÌ “›ä ¬u(UË]2±#5XåC´Ù[ëÐ )lÞ Œåý"WˆºŽýš”oÛ·»x­‚i‚^¹Ï¹g ˆEÐÎǦ(Ó˜–(¬9¸kƒ«ÕÑõï°©ñ–3dÛqs°s¨sË+Гå—bæüvyB‡8ÒâG´“ÝdǬˆƒà–ç'B´Ê›¶`>Ží¨0­6PóSsæjvµ•µ˜·¼ =à ‡¹©)RõÓ¶1CFͰ¸«óÜšÀã\éÇ%7?j”Á:LT¢±[š©WÃZLöø‰Ýä›Õ©†žoiô徤P„éz•œ5Àüõ&°§0ï§ýܯ®Sú(Ö„B¬¦±“ïûzØW–i臰¢TLp™nÍ%¹ˆµ‡¯CŒ/ZÏM‘Q$Ò ­®Žs<%jxÍûaŠßA&>BY*j] jf•¼ ¶º¨Ë{¤0÷¡Ë|¼!¸õøÈÏXšYðd7,6Ñ(ýox’óèúÁƒ´«W~s¼÷¡^ MOϸYçT„ÍN0ɯᓖÒ­œ§t~‰|]ÞsHÃ5ç³·ùÙHy=~¡>5g¯)µ‹R>çÆâäžÖø9¹nÍ&©qŸ0ÞGí^ß9ñ¨ÕØ!i¿oUÎÜýŽÊòž’ÃtiïAº½ÊÞÜEÓû[Õ29ó‘©ckÍ¡Ìøë+L‰Æô!`/*5Hûv[ÃwŒlL?µ¬å^^µ)³êÐU¥ÛŒ&dd‰ßG(1‘´»Á'á® dÇL šö­Uû~•ëtjâç^'SN’œH»Ú^–=,Œà‹죊‰¯ö©Bö&D15þÁú¶åú\…ØÏÍoƒñ +ÃV°ÃÏÍ×K}¨ÌãdÒµ’[ðmmõË$oQÜäÄr‰¶–ÀF:Z/LÏFq •ÌNf¢šš¸W{ŠÚ ô 8séf%ܶñ÷p`ß#`Ŷ˜YáWCP–Ì<áÕò¼hUR„÷dewçìÒ(Ðn‰[ùÆßbˆ¢¸"˜{j3!`—MH–Tþµ®Y@‰º´öòï…wŠëÈæ,Ûi7ªp;†>Qsì–[/¬çC?JÕ®Ð*no{kXðÇ[’ À–†[w“77œsÚýÙ§&Pj,hÎ- .8Ût|nïx˜9×Ò失i–¨„ÆrÖƒ©ßº—ÊZÆñä¤p;ê¥XºïùJnÑÑÊ÷Rá»…å=|Zƒ;ôº,7Ö°aÃë77Ó|}ïNù¾ü€< É,G¼Þ·Ôn&òX@S0ÜÛ]!q|†„gó¨3R®²É*Ûˆæ»Ú¥Ç~‡MrLív!ä£Ýp­b2=~†Bj- käK;RýÁÔÇa„O¹5‹êÿjœ»ú¡ŽgÁõö\ü40´IZp½cŸ4o=snØ—Ç©˜•7®(¿ó]{φ0Åø% õ|wzšÝ¼vÁJµ 1¦HÆ•Ù;wðÜÍ…pRõˆÅçQûm™”‚5Üb÷lÇ' q-Xâ°BÄå¡àãÉ×pŸ›&|àK{JI!ÿ.ÄÍ^\gx.þMZ(|B§¥#Á‰¹©v&u6ç²"9]ºTZH>˜x+x“Z"±˜@ÚyáwKì¬gcCµ3T©µŠo¦Åíl2i'‘dÈý*ßÀ»à™Ôiô´ÄuÆ+“U|l€²ÏPR€19mÊx^03­tQ'¢ì²åï‘úªw·G ¹žì&ƒ¹ŽD¿'«¯Ýîýå$½4Eo´ÝS¶W葜W®£Îh©'‹®3¥n9B’ÿÇ*5ïOµ âÆïéï:Vøš þéÙ• ‰]Ëp7ÄÐ -s“?VÏVÑ¥œtY‰Â×Çë•M͹D  ª2Psú‡r¶Êƪ×H}ÏÀ¨¢öÍG¹Ž~²^# npXË·ÁtÑY„Õ· 5’[4Û/Wå«Ü¼†[±d±–cŠ—…ò/ÒưTR×tˆÄ°­wágÛˆ7ô÷ ±øº¾(÷Ž 0’щ엽·Q/÷þ¼ë[Ì“zC@FìÙ†ñ2ÃU{«Š®ØJ•®V˜ÍŽ›‹W*ŒßÆä5ç…„2ÈýT X:L²I#Áú½O,§+¨@ñV€ˆI1©P`Õ”•"vÍfOІ¯R²$–Ü”ÖO÷‹þ‚k…:Dv.¹ª€_DÖŽ#Ñ`ìƒåÔCG_ÒeÚ€¨&5ƆzB^éÎyè^¼Œ½“ƒê^ ]ÄwMýUÝ<®9¦Ôˆ 4];ÞââÉ(rF<ÿÁoÏ;»v«‹è*‰½2˜’‘ÒáÇǽ+õÿfjNÖm·*È̼a$i¿¾T3)[$GgÆLïóÓÄI ÝV\Ò! ‡°Š“A€­lºÿ +ÔñÚËûã)bƒ+´Ìæn\±Ö\²å€ ËÊbSÙÍ,´•ÚÁ‚æ1½Þr¹8Á‹¼ätñ*ð“xư`U¤Âº8$•÷Dx¿!£0?$+«ÐïuWÌÆŸrQâ—L©ßû›Ó· Ã8‰]!l 4¹%‚t›L¡Ž€\ lÈigŽ”1ƒ­Óq®qQh ‘¨|K6àÀü¸_ dXË3:ZðIšÝÙ•§cF^uñᢔ˜uµU´[VõW¨?ŠÎT÷²·t¡\üG)wÞÛ¥ÀHÓuôuÇáÝÑ¿ÒEd;‡—làjaLÓUM•NWÏd¡sÁ¤v—ÒÆnX¶ëÿzhçØåÿÀÈ7›üðû>Ñ95ªR0 ®Q÷X_-·õ²Û`ŸKé³±l¦)¼GêëÔ¢3Â?ãÈtÅÍëà‹Ùñ&ŒzL€•K®¼[LgtÁÚþ@‘j±´¸1i#F~Óê¶æ·‹b,l‰â ³c¯¹íÝ8ž©L™&YŠ;Âuß@Þ)þqÞµ©Ï 8ÎyÜ 6¶*qùÙÜ„Ž,{¤’îrtèY¼ò;2r,z±j6úí‹[5ã}ӘϚ? XHD€&¶“ÛVè`6vØÜÉÔê¼Ûc üËrÕýíÙ•›`œ);¥æ¸J'BÜ[Í+Œ&q½…úeé;3T³AïýwRÜÖ[ø² l‹-ë{L¿=¶û±è𤴨à"‚"`ìƒ(é—Û~£"ñÕ~¹ß2ÊÁ •³š1¶+UŒn5+¹Y\òѹ"MÕÑŽˆ3^³Ü(ɰnö‹ û¼³?@÷è„X(¢YŠO:€&«V ’+–™šRÇEê"¤2᤹ó%sÂ¥ y¿W=d>난teLïÕÈGù7paÙ9÷ÉJøA¥‚q®Ð0'ÌL>}È,MÀ ‹|Àpç¬%!ÚS7>~íØ²äÍÖø^„6„ ‰îð™m¶3L¨ÛO)”¸¡ƒ\ÎóÑîxT±”“Moóä YÈ µ¯öŽóqµ¸]xê‡&Ô3€á=¬°½Ì^æõ|‘ þq—!]Ü-+TE\½ëÁ·69zQкÙŸÛ»4{·‹=}ÁœoÌÍãÚdSÓ…Q•bfžŠœ”ÅÒ_sž't¦©&jU Kï"V0ÑUç…oµcÇM–SÐJœùÅ»6Ȱñ…„œíû+ðe¢L9Êî¢9Žê'¯ ý$“ÑÈUòØÈl¼²ç®ºÚŒIý%‚ÈI@rÊ¿OÄIü‚Ë·©ÏKswð*làÃŒ™‰ÏAèpÎòMg™1Lî,‚Vî‘ÀÂCjà խuo"#¯¥òé$;”ÿn¡Ü©cV±ÄÃcS0‰|VN8!i5ù?Á <§Ú뎤Ñ—ª¢Áˆwûå¼àGVxPkôWD$”•h(YÔ¢0?ãVÜQ©µ`Ÿéñà~F§­á ÀŒ±£èNÞŘ»¥IúöÕÝ dDLIw˽t½}ú‰%¶•šG‰ªì+‚éuýÛ¡Vf’1¤.F@àðh×஼-ßoåÇkY¾™ oíŠa'»ùFß “ Š’»°ñ¥DSÖ:€?H âkŽ4Äãy$u•U˜=Jê[çƒ(‘xI¢µ%d3=Ø`ÿ~« $¶[†B^0y AU¼×P'þ‘a°??eïõ,àÙmîÝ¥<õþ-­.¯S^B-VsßçRŸpdâ(û´;änüD¼ù4g>s2Нë@X§˜VT®{7þÎ~ÆÃµD‚4iO­VÌ­=œ&[%€¦Ôô{¡iß÷mhDÄ`ð÷ÑÌSðiÙuÿY•ëyߪZ„4hÆ¢ >µLÂ'L{°ƒ(DdÙù/Ъƒ/ñM‡×³ª‘ÌÇÕ_†rŽÍlí¬†Í%\É–˜®q-n<{eO~iZµô™#¤?!6©_:Ãqu¸ñõÞA»®øì†çãêçsx«Má*Y £ÜÒMi…{ÖúbWý R“;Ì']„Ž(hMok‰û¢Û+=ºˆ`®µÕ8Ìæ(‘ü¸5ç2+Ñþ÷^[£ƒ_¢déâáñ¥@lË/00b&wï@¹|ÿt½©|l±¶/ FǵªP¨˜è·GåYyX…ü‰KS®X_C{R…*+ßAr; ÁÈF/÷,Á€@$)¸Ë…!GVöWŒbs¢Ðª{åÏ“C4,â¡{bnu_ªÖ LÍDf•…¾ä0Ù·Ð +ÑÃ8_-|àoFS×ïÊn ^±<ÖY°<ÈFiKAõO·¸¤êJsîºYº^ø&—†åÂTÍ[šû -ÇúU i­y²»Kæ‹Ä«5ˆé4¾õ–ÅÁÈ|ˆ mD}‘+{eχ^½¾„;¨Ç=â· ð«)e˜,fÒK·>ÑLôp(ÜÐã•/°b·²a.G”h%XÖŠòLHŠ™hq„³Œ¼µäjž\|üË’´(ýK |è[…pþÇ!Åà/N!ÁôˆzŒúGÜÎM–nÊ4w'…kr¦¶­wžÕ J¯K8Q ZʸÁLb·ºP/YnËxùx ?dqcRÇéÅ_ LÚD¢ƒ¦Üôqs—Ûj~i¼)ÊŠ_«>=ÖÅÆõxgMäô§:z;¿`DæØb’âó¿?‡a‹G›™œ 6‘««}]ù‰fòÁAeõœÁAþ GeÈŒ˜ÙÖŽ)³!‘ÀÔ8æQîõ»b>»¨^?лãWÄýÊØÂïÎwäéŠë°ÌY_ò‚&6ýç”ÛðS4ùÌYÐ‹ÐØ’½ºéq®‰Abå ‚?2*츱ºëÒùl¾DˆéS(ÓZÑ‚•«4}IA0eðövLýJ4ž'lAl(&£>œº."ì8¨%z´Dè|3ü½¡‚&njæ_ñ ]axLKÅbúÛ×â·)`+HT!þ2ÐÍo;ÐDŽg¥@Ø=ÏûQ2&o½ñ?Ÿm~Þ:«•‘ƒX“gá§Ïµ& l¨ÖÈ x ½­ÌñT˜Û;OcP/&ÓÇb1‡YI{"G€Š”ÅãU™¦OD1 Ï Í׺-È$a{93kÑWÈó\ÞÞ=Qcê­Ý¦tnøê9$Ù¸hyÅt{û$…`òÐ/9Ý{| qåG[{¹-Çb7àš\ð²jÉBÅ> endobj 7 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /TMUREV+CMR10 /ItalicAngle 0 /StemV 69 /XHeight 431 /FontBBox [-251 -250 1009 969] /Flags 4 /CharSet (/Sigma/ff/fi/fl/quotedblright/numbersign/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon/equal/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/quotedblleft/bracketright/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/endash) /FontFile 8 0 R >> endobj 212 0 obj [722 0 0 0 0 583 556 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 833 0 0 0 278 389 389 500 778 278 333 278 500 500 500 500 500 500 500 500 500 500 500 278 278 0 778 0 0 0 750 708 722 764 681 653 785 750 361 514 778 625 917 750 778 681 778 736 556 722 750 750 1028 750 750 611 278 500 278 0 0 278 500 556 444 556 444 306 500 556 278 306 528 278 833 556 500 556 528 392 394 389 556 528 722 528 528 444 500 ] endobj 211 0 obj << /Type /Encoding /Differences [ 0 /.notdef 6/Sigma 7/.notdef 11/ff/fi/fl 14/.notdef 34/quotedblright/numbersign 36/.notdef 39/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 60/.notdef 61/equal 62/.notdef 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/quotedblleft/bracketright 94/.notdef 96/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/endash 124/.notdef] >> endobj 5 0 obj << /Length1 1119 /Length2 5360 /Length3 532 /Length 6077 /Filter /FlateDecode >> stream xÚí—u\ü×÷é˜HH)(‘ŽÑ-Ò ‚ÀL`ƒm”äh–” éNi¤KºDº¥ñž×õü.}®ûÏçùë~ÝÛ?{Ÿs¾ŸóÙùž½¶13jëqË‚áV%8 ÅÍÇÃ'”×Ðåãòñð˜™åKS°DAÄ|bb|@Y[ ?/OX\@TœWÀ ”‡;y  ¶v( ›<û¯" ¬#µ¶„5,QvGŒ†µ¥Pn … ›¶bѸè2¯z öŸÄÞÆ?S,G ¯L{ñù3Ä%¼ud 1•MŽ~A“õÄI»£‡^SÞ QäÈ`²Bd(ñHßöŒ,+ƒ,ÈÝâ$Æ.>mÀãè¼Ähâ,µ¾kØ|ïi²Q}¢wjâà0rbÒâÌä¢Ñ)s»á ØüøféÁâªú gĽj‡”6"$z’œp?{¡üNIh—Ïðšó*@0áÞ¼]¾z/“ó»0:rq:A'íy»g1Ò¶c”÷H¼”Ucô‚ai¡ëæPþ†wÂÁÛeدÂÎ5¨“‰;“^ðîÅóTP”ÝüpÚÔ<ùâÚ!ÊW³ÝXæ.Oúœ¸QÄ›×2ûC}HbÝ3ŸÝ&h¼âv¾MGÎ’ñéÞ¸;¯8¡@=©»Pká1g¦µÍ4 C~Ú§Q¹é—±7®Â}qµ§æçW,2ÅÅY†ß†£yj,É|6CTYú7© õ'{®¡öÒ5?«SkºLΈ¹°™Ôi&DËV0¦¯­uU“س—D™ ®7ÔÿÈMÒ,JÑSk¸09]òy ­Lôiר—Op£[ŒnÝY¦hÊ>|o¤éPoYóÊÈÊWø9­kíèg{÷yt¥¦Ü.ÓóÞÙ»mcø2GRx£:p-3rìó@×±9=ÖèKµ³¦B* µì‹ñ\%c@¤tSk”b \kܶbžæÂý|<¹¶Z÷phôùKœŸÏ_oøL½TVÛ >(ª|H!®ž-ðáºá ‡Sn¿Yô` :sSNNøÕ54ЩˆŒ“°¦:–ОomÃgùÉ–Éu( ÎG—Û3½–« žÔÁ»8qÔ=n)Zk ³¿×èáÉ¬Ž‡Ðøäo_KWq;+`ÛJM|ÚÕÓ LN¼$ò˜òH/¶¦ÞŸ›2:Øô㢺§="eïÊ!Øh,Ñv)³¼à¿¯/¬…Ó¸OüAû¥t’+5-Ãe5Ù›#uèŸWxM¦>GÇ@ô-<=áÍAŽÝP·`,“‹ïX; onÑÞuÙJ¹ýàÎg ŸÈeüMæeAú-É¡5×/³·{?X÷±Š×X¾\\2×ç1¯üö¤}Fhúí°¯§)­]ÝìQ1²Aþ×2i†soemváp³>ÁµÐ›CåÔ§áž“·SB4„­Þ𧨵“ÄÑÍ[uÐǪ½× ýï,›³“«m-d¬*Ú–OQ~;i.œwŸßŽ2õ™2oaÄœ¥Å…»ß³{¸C±ò. `Küžn|ïâ’&™Ã‡¤ƒ0F .±û&>]©’ÖPWmFÖV¬šq›Ë·÷Ç0‘ž Ï·ußzdÈÕ·¡2áœP¾-Zž¡£d/ !bÌŸË_&ÿ¿þ¤ÅœÁ#¤–ÅþÔXð¹–Æü»f’€"p–DÒ#‘Í 9kÓšïº&òKÝxž>Dú’¶’>ã=:dŠL˜__`Ó-ƒÂæ²É׫¥\ÔwX»’Ãî¨3ÛÑÞw0}x/=vÝŸaî0ÛÉÔú´Z ‚ý¬öˆ3æâƒ™ñKeU›É÷ÍÕë9rÓ|…Ž›?ÇB—+0Ó&þ»©$•qo‰_Æ?^w4J’@Ü:|h9’ÊV‰ƒ§a(MUýf­û¹´/¥Ùib…þV†Ö–¾7aK+ް¾.€ú÷²—£4£">|YÅïé¾gžd3bÉEŸ#ý6zL `­îù}ꥅ8.[„}NYDŠþrÐ1þ¬=0Àå«%¹»òwËÁcœ;¦¤ÞýšN`,Ž'Æ4ãîDœ%m/i•zûdíúZUC¥(F…cßA½x±ãÆrÓ•G AP®oTºê*yG0龪uŠYö¡.çlÖ}%aøI£<ÆfƒªÑ¥%ì–Ô¶Þ–ª‡ dP*BW¶â3A5WÄœÖ\¯2M\bµ2ý+‘)T`g´ù–£kŽîØ;IŽRO™.Q„·!e:a»#‡kåÙ#$æîCT„jóõcn">˜PïÙ¼œlÂÍRïµh³¥$šÉ%ÛQ ©`DÉMÖjìPÀ·²ª­­ðÙŠçð)ÎP»½éŸ'b6Å3*7oú§†ÑÙÝ<‘ªêi˜Ë»¦ªŠ6+î‰Ýí(~3î°Ò¾ëOå¡Àóš¹ìä Ëö«ª@íe·X>l+Xùî0CKÎý|ÞAÖ•UW›F'ð5º‡4D•P5Ü“g¸f l‘Ïpj§µ58€l—RL­ŠÞñÜeÞôÌy4·È²ZͪÐÇ—Np0„ìs>IymÁŠtl\zA,ðEr:u—ÀQºÚ>º#Ò#9vkÄ$³qýF¢ó—÷î‹… ®ºÛÇtSú•qŠ*jZªêî)*dÓÃàl—.°Ûº¿æõ¡¥ÓMÏ;[IãëÇ O¦jóô¿¹?lè-ýd:§3fQìíXt‘¨=1)_nûÚ÷`?Cnä.2rõ#Cþö\¾”ºô`NN?Á¬Û”`® _G—QÔÙ–G;ù]±2OcÚùn›ëCÍ9üÒ7¥÷~î Ñ 7ú–jñC¼J‘ÁI•®K ›V7eYf­©bÚÝméÀ X¹‚‡šE:9ÃijÚÁg£U-ÍC­ï”#±Ö0 šY¢òŒ††€6þÕG:"°|?5–‹ÂŒ_Ð-éh#ó$tÛ\Šð»÷Áñ†ø â7ÜÖ¾ºøE…϶¼¨•–¾`%½ÛÜ¢ÛøÆ.>’^îq–§…>Ù~s1Ûª>È17ú”) ’•§Ã$Ø{­ÊÚÎ"'uìbì…t1‰Ôx•V@/d‚è³´#asBÝAiq žèÕóGš}Ê¡íôÎ[â6õ¹RJ©‹ÀâïµÄö¥$šØ,ø$”gãÈÒšÌP˜ÏLµNd Ç_Á?ïõÐ+I3޲=x+‘7–ôª§Ak=GÛ£±veÛ{r@'ÖžçR* ¥/R¨7EÝÕ¦kEñ:zUÛè[çÏ ù‚\×SÔg°˜}ôÓ௷ž·†¾ö:Àƒ³âñ¿€Që7aO)±åW^¦ÞëLo‘Qâb/Ôl’ó.–ÒYë䍨ëÔÞÓìR[zi2¬Tà‰²…q¿u]šÛÊ_}0§sO˜Øï!HްT×Ò€Ù.7\éŽÔÈYQéEò '9š¢žìá‚àÒ÷â¿Ô{ÚínÔd™œâ~ÁCŒ×–ºš–«ÌtÃÅ aÝeÞÕéwöR àJ9÷§I8Žm÷ÊŠ‹ikTÉv=ó©ghpÏJC—Îí“ä3`þÒ.«\oƒéK'“šSv ¾íMÝVyæ~Œ·,|EñàÅâ%ݰ dYÚ8Ï¡*7êV$vê Ö”½U×ß<5xñslC’PË;£ùå*t¾Ÿ3Û|7€£±—v5 ÄÎå)È®Ûu•l!Ž=)>xƱÔðù¼«˜¸Ü\4ºöp|1ë„/ÑsÜW#ò[Úà*ÏF±¿Ú_H [QÜ«ó’4²¥gZ™[<#¥€‡¾8™01&l>ÝÛ6µÒTÎZQ$ã(“XÙX~0ä‘À´r+\´ãÇùý@Ó¸Þ`¸í/ÊŒ¨÷ÙiëaàìøíóPmþ&‡§AtÖÈ©P· ‘Ë<«@æÒ·Ã§8˜¯ã¥±±JÈåªïêoG†ùAwŸ&•ˆÂÆmÍý¶«²oF˜§¹£Ûx$)©Æ8ëc¦»ÓÖI4—+ì£5«‰éŒà–ärxójã—^ÏÄúb?™ê,sÆ^ÍäÑé5SEÂCXfl>¡Õð*yXv«¿)bsÙx>޽qƒáý,©gÃÍC#¯o!`RÏF©uÂáðiðV Š|`«ÓÖ†J3UÏšVÎýÎ$ÂûÄå»Jþ¬íÑ€íôƒw3€‰1’ÓôÌìñ . Æ8acŒVöù&wjdÕ¡‹¯hÅ•*—èTxñ†š¸„s$Lš  öÀ·¸ÚjñêÁ“(ñ«79µ©«žlæê– /T÷yk½ÄC4OVxŸÙö¶Éyf=+ÍÿŒ1~jÏ^9û <ï½ï0tU…©j²x8¬OÏZ€°g±%vUÚ+*ø}ÏŸyáBÈ]N¡†®å¬Ü4±Ä.I„[Ù}½žºÂ+¢¼·.Ÿ¾šìlñŸ!¶ëýïÝZ©ê(+ÔdŸ¯¯||ÌvÊòïmŸoÙ/¶½JLÑ Ò‡,/ŽLµ h`¯­)è«PB6HešEé8V$&7qÉd÷ï~ªìùºPÑ(8Ð)!ÎÖvÁñ4Þ¤†ú˜:(*[ÜÔÚØL«8îQöÖ6ÞFæÓ|Λ‚ÑÑ‘8½j z’lïÆó©¶dQG0nL}&+mׄïh.|ä®êl‡’*j1·A-d.ÆÖ òý”žù\$täß_,(OÒÀMfJ\€æ+šyEK.”qøJtº|ªmYË"6…°ë\ÒÔAA#öÉIÍ“IEöv˜Hš}”e¤¦Àé0Ó.Ûžµx¾¹wsr¡TÁõl@•ZF¨Ýû‘eìvûÌžrúÇ»ùa91ßg–áØOïh\ü`˜¥r;îéEdƒë“ N\AR0?¥rÖ¢ÏÅŸ÷úF§]~_íQTs$¸‘Fß|õ\ªÕ¶®¼ïrÐÆ­¾S‰J ¶Sþà³àAƒ&ß¼† NÒ»$רÇÌA»ÒXÅŠÒ‡S§Nîkb¤âªAÓf ?/lïéã«Þ›šŽ¡L wü"^Ë3$Pö§)_Ù%‹g«CCOË&œßbm×GK^fÑŒÄèÓlq¯½KdÄåϽX H¾ðZÈpYiÊ´Iù¨Ùã2 ° jª— ÛìÄ>BAÇe ”±¾Ï ™%¾¯×&,²¹úY*öÝ#ËVÕ÷);¯ºü*ÜÝYrñvan£î„q¸»xIúmŽÓJ¾m Ñ ¦Ö‹DýZ¶ë&+G ²Q)Ãä;d*ËfxYšŽWÁ’ÒÁdá×§ð‹˜X½¡B²ÉLÑ·ÝüÝCòš=äsS‘:çmÄv‘Õu³çm—¹tMÉŽ„ëyÏ´ñ‚SR˜ÂȘHÂ;5BÂéÒ> endobj 4 0 obj << /Ascent 694 /CapHeight 683 /Descent -194 /FontName /LTSMGM+CMR12 /ItalicAngle 0 /StemV 65 /XHeight 431 /FontBBox [-34 -251 988 750] /Flags 4 /CharSet (/three/nine/A/C/G/I/K/L/N/P/S/W/a/c/d/e/f/g/i/k/l/n/o/r/s/t/u) /FontFile 5 0 R >> endobj 214 0 obj [490 0 0 0 0 0 490 0 0 0 0 0 0 0 734 0 707 0 0 0 768 0 353 0 761 612 0 734 0 666 0 0 544 0 0 0 1006 0 0 0 0 0 0 0 0 0 490 0 435 544 435 299 490 0 272 0 517 272 0 544 490 0 0 381 386 381 544 ] endobj 213 0 obj << /Type /Encoding /Differences [ 0 /.notdef 51/three 52/.notdef 57/nine 58/.notdef 65/A 66/.notdef 67/C 68/.notdef 71/G 72/.notdef 73/I 74/.notdef 75/K/L 77/.notdef 78/N 79/.notdef 80/P 81/.notdef 83/S 84/.notdef 87/W 88/.notdef 97/a 98/.notdef 99/c/d/e/f/g 104/.notdef 105/i 106/.notdef 107/k/l 109/.notdef 110/n/o 112/.notdef 114/r/s/t/u 118/.notdef] >> endobj 19 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [2 0 R 21 0 R 30 0 R 36 0 R 45 0 R 51 0 R] >> endobj 56 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [54 0 R 58 0 R 61 0 R 70 0 R 73 0 R 76 0 R] >> endobj 84 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [79 0 R 86 0 R 89 0 R 95 0 R 98 0 R 101 0 R] >> endobj 109 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [104 0 R 111 0 R 114 0 R 117 0 R 120 0 R 129 0 R] >> endobj 137 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [132 0 R 139 0 R 142 0 R 145 0 R 148 0 R 151 0 R] >> endobj 156 0 obj << /Type /Pages /Count 6 /Parent 215 0 R /Kids [154 0 R 158 0 R 161 0 R 164 0 R 167 0 R 173 0 R] >> endobj 215 0 obj << /Type /Pages /Count 36 /Kids [19 0 R 56 0 R 84 0 R 109 0 R 137 0 R 156 0 R] >> endobj 216 0 obj << /Type /Catalog /Pages 215 0 R >> endobj 217 0 obj << /Producer (pdfeTeX-1.21a) /Creator (TeX) /CreationDate (D:20070405140427-04'00') /PTEX.Fullbanner (This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) kpathsea version 3.5.4) >> endobj xref 0 218 0000000000 65535 f 0000002393 00000 n 0000002288 00000 n 0000000009 00000 n 0000241821 00000 n 0000235468 00000 n 0000241664 00000 n 0000234000 00000 n 0000217103 00000 n 0000233844 00000 n 0000216671 00000 n 0000213156 00000 n 0000216514 00000 n 0000212724 00000 n 0000209208 00000 n 0000212567 00000 n 0000207944 00000 n 0000194887 00000 n 0000207786 00000 n 0000242655 00000 n 0000004063 00000 n 0000003955 00000 n 0000002507 00000 n 0000193732 00000 n 0000182807 00000 n 0000193572 00000 n 0000181462 00000 n 0000167216 00000 n 0000181302 00000 n 0000005126 00000 n 0000005018 00000 n 0000004155 00000 n 0000166208 00000 n 0000153895 00000 n 0000166048 00000 n 0000007873 00000 n 0000007765 00000 n 0000005206 00000 n 0000153119 00000 n 0000150245 00000 n 0000152960 00000 n 0000149104 00000 n 0000136100 00000 n 0000148944 00000 n 0000010720 00000 n 0000010612 00000 n 0000008025 00000 n 0000135303 00000 n 0000129940 00000 n 0000135143 00000 n 0000014051 00000 n 0000013943 00000 n 0000010824 00000 n 0000016299 00000 n 0000016191 00000 n 0000014179 00000 n 0000242764 00000 n 0000019527 00000 n 0000019419 00000 n 0000016403 00000 n 0000023162 00000 n 0000023054 00000 n 0000019631 00000 n 0000128885 00000 n 0000117752 00000 n 0000128725 00000 n 0000117347 00000 n 0000114951 00000 n 0000117186 00000 n 0000026481 00000 n 0000026373 00000 n 0000023290 00000 n 0000029339 00000 n 0000029231 00000 n 0000026597 00000 n 0000033044 00000 n 0000032936 00000 n 0000029455 00000 n 0000035305 00000 n 0000035197 00000 n 0000033136 00000 n 0000114543 00000 n 0000111117 00000 n 0000114384 00000 n 0000242874 00000 n 0000037339 00000 n 0000037231 00000 n 0000035445 00000 n 0000039693 00000 n 0000039585 00000 n 0000037479 00000 n 0000110607 00000 n 0000107299 00000 n 0000110448 00000 n 0000041726 00000 n 0000041618 00000 n 0000039845 00000 n 0000044530 00000 n 0000044422 00000 n 0000041866 00000 n 0000047307 00000 n 0000047196 00000 n 0000044682 00000 n 0000049508 00000 n 0000049396 00000 n 0000047448 00000 n 0000106948 00000 n 0000105336 00000 n 0000106790 00000 n 0000242985 00000 n 0000051394 00000 n 0000051282 00000 n 0000049686 00000 n 0000053581 00000 n 0000053469 00000 n 0000051547 00000 n 0000055887 00000 n 0000055775 00000 n 0000053759 00000 n 0000058751 00000 n 0000058639 00000 n 0000056017 00000 n 0000104651 00000 n 0000102222 00000 n 0000104491 00000 n 0000101792 00000 n 0000099321 00000 n 0000101632 00000 n 0000060885 00000 n 0000060773 00000 n 0000058943 00000 n 0000063343 00000 n 0000063231 00000 n 0000061026 00000 n 0000098897 00000 n 0000096778 00000 n 0000098736 00000 n 0000243102 00000 n 0000066368 00000 n 0000066256 00000 n 0000063508 00000 n 0000069098 00000 n 0000068986 00000 n 0000066533 00000 n 0000070862 00000 n 0000070750 00000 n 0000069265 00000 n 0000072639 00000 n 0000072527 00000 n 0000070955 00000 n 0000074287 00000 n 0000074175 00000 n 0000072708 00000 n 0000075397 00000 n 0000075285 00000 n 0000074368 00000 n 0000243219 00000 n 0000077500 00000 n 0000077388 00000 n 0000075478 00000 n 0000080229 00000 n 0000080117 00000 n 0000077593 00000 n 0000082908 00000 n 0000082796 00000 n 0000080322 00000 n 0000085791 00000 n 0000085679 00000 n 0000082977 00000 n 0000095853 00000 n 0000086755 00000 n 0000095690 00000 n 0000086661 00000 n 0000086549 00000 n 0000085909 00000 n 0000096438 00000 n 0000096159 00000 n 0000099199 00000 n 0000099119 00000 n 0000102103 00000 n 0000102003 00000 n 0000105167 00000 n 0000104897 00000 n 0000107200 00000 n 0000107168 00000 n 0000110945 00000 n 0000110819 00000 n 0000114827 00000 n 0000114757 00000 n 0000117617 00000 n 0000117553 00000 n 0000129538 00000 n 0000129214 00000 n 0000135770 00000 n 0000135550 00000 n 0000149815 00000 n 0000149467 00000 n 0000153679 00000 n 0000153395 00000 n 0000166859 00000 n 0000166564 00000 n 0000182296 00000 n 0000181938 00000 n 0000194468 00000 n 0000194107 00000 n 0000208750 00000 n 0000208371 00000 n 0000213026 00000 n 0000212966 00000 n 0000216973 00000 n 0000216913 00000 n 0000234962 00000 n 0000234535 00000 n 0000242281 00000 n 0000242072 00000 n 0000243336 00000 n 0000243435 00000 n 0000243488 00000 n trailer << /Size 218 /Root 216 0 R /Info 217 0 R /ID [<9E547859013DA727FF3790C8D22C44A7> <9E547859013DA727FF3790C8D22C44A7>] >> startxref 243692 %%EOF scalapack-1.8.0/INSTALL/SLmake.IRIX640000640000175000017500000000577410604316307016464 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: March 20, 1995 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = IRIX64 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs #SMPLIB = /usr/local/mpich/lib/libmpich.a SMPLIB = -lmpi BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = -64 -mips4 -r12000 #NOOPT = -g -DEBUG:subscript_check=ON -trapuv F77FLAGS = -O3 $(NOOPT) #F77FLAGS = $(NOOPT) DRVOPTS = $(F77FLAGS) -static CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = $(F77FLAGS) CCLOADFLAGS = $(CCFLAGS) # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lblas LAPACKLIB = -llapack # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.pghpf.SUN4SOL20000640000175000017500000000562310604316307020024 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = pghpf_SUN4SOL2 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = @(MPIdir)/libmpi.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a -lnsl -lsocket #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = pghpf CC = gcc NOOPT = -Mnohpfc -Mdclchk F77FLAGS = -O4 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = -O4 SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = /usr/lib/libblas.a LAPACKLIB = /usr/lib/liblapack.a # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.SP20000640000175000017500000000553310604316307016134 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = SP2 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using MPL # USEMPI = -DUsingMpiBlacs SMPLIB = BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # system primitive MPL BLACS setup, uncomment next 6 lines if using MPL # #SMPLIB = #USEMPI = #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(HOME)/BLACS/LIB/blacs_MPL-$(PLAT)-$(BLACSDBGLVL).a #CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) #FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(PBLASdir)/TESTING TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(REDISTdir)/TESTING # # The fortran and C compilers, loaders, and their flags # F77 = mpxlf CC = mpcc NOOPT = -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 F77FLAGS = -O3 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = $(F77FLAGS) CCLOADFLAGS = $(F77FLAGS) # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DNoChange $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lesslp2 LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.I8600000640000175000017500000000553510604316307016160 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = I860 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; uncomment and tailor to your system if using MPIBLACS # Will need to comment out the default native BLACS setup below below # #USEMPI = -DUsingMpiBlacs #SMPLIB = $(HOME)/mpich/lib/intelnx/ch_nx/libmpi.a #BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # system primitive NX BLACS setup, comment out if using MPI # SMPLIB = USEMPI = BLACSFINIT = BLACSCINIT = BLACSLIB = $(HOME)/BLACS/LIB/blacs_NX-$(PLAT)-$(BLACSDBGLVL).a CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(PBLASdir)/TESTING TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(REDISTdir)/TESTING # # The fortran and C compilers, loaders, and their flags # F77 = if77 CC = icc NOOPT = -nx F77FLAGS = -O4 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = -nx CCLOADFLAGS = -nx # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar860 ARCHFLAGS = r RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lkmath LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(CBLACSLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/scalapack_install.ps0000640000175000017500000146242410605235131020451 0ustar muammarmuammar%!PS-Adobe-2.0 %%Creator: dvips(k) 5.95a Copyright 2005 Radical Eye Software %%Title: lawn93.dvi %%Pages: 36 %%PageOrder: Ascend %%BoundingBox: 0 0 595 842 %%DocumentFonts: CMR12 CMR10 CMR8 CMR6 CMR9 CMBX12 CMTT10 CMBX10 CMSY10 %%+ CMTI10 CMTT12 CMMI10 CMITT10 CMSL10 CMMI8 CMSY8 CMSY9 CMMI9 CMEX10 %%+ CMCSC10 %%DocumentPaperSizes: a4 %%EndComments %DVIPSWebPage: (www.radicaleye.com) %DVIPSCommandLine: dvips lawn93.dvi -o %DVIPSParameters: dpi=600 %DVIPSSource: TeX output 2007.04.05:1405 %%BeginProcSet: tex.pro 0 0 %! /TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin /FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array /BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get }B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr 1 add N}if}B/CharBuilder{save 3 1 roll S A/base get 2 index get S /BitMaps get S get/Cd X pop/ctr 0 N Cdx 0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx sub Cy .1 sub]{Ci}imagemask restore}B/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put }if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X 1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N /p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ /Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) (LaserWriter 16/600)]{A length product length le{A length product exch 0 exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end %%EndProcSet %%BeginProcSet: texps.pro 0 0 %! TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]FontType 0 ne{/Metrics exch def dict begin Encoding{exch dup type/integertype ne{ pop pop 1 sub dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}ifelse}forall Metrics/Metrics currentdict end def}{{1 index type /nametype eq{exit}if exch pop}loop}ifelse[2 index currentdict end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1 roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def dup[ exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}if} forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}def end %%EndProcSet %%BeginFont: CMCSC10 %!PS-AdobeFont-1.1: CMCSC10 1.0 %%CreationDate: 1991 Aug 18 17:46:49 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMCSC10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMCSC10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 39 /quoteright put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 74 /J put dup 75 /K put dup 76 /L put dup 77 /M put dup 79 /O put dup 80 /P put dup 82 /R put dup 83 /S put dup 84 /T put dup 86 /V put dup 87 /W put dup 97 /a put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 121 /y put dup 122 /z put readonly def /FontBBox{14 -250 1077 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D5993DFC0930297866E1CD0A30EB76029337 900ECFB1390CA5C0C3A04528044F266BA17BE487C79B94FAC6D6484684C5BFEA 87BCCC77D40AD11552035E95E3007126418ED49B68468B38A14E88E68A267B98 076F1C9769A5AFBC285E5B158EAC9F926F1D6C0B8F1D57D9C31D25AE27123518 9D2CD92E5689E0213089BD268DA5E47525CB8EABAA4B78A15AEA34705889AB3A FFB8953B5B3482E52BFA0940630ADF8C0AC2177D907324299EE980E850F203CD B627962F43D5A678C44243CDE97853BDC6AB45FD5C09AD274DAF89929F583CC9 CCC24BDFC68B92111055ABA5F26D2DC67C70906F71C2957701D65AE746A60C30 40E6CB24B97FCDAD0487AE38A201FBF0E41BABD2181981A71940F1E707F91E5D C8CA50CB16D8702D188E56D014D92F76CE0B52ABDB9110E32438D2BBF3E6A40B 7B005F10BB437812CAC6ED2996F7606DC962C4FDE207FF322782C343DF44CEC5 FF06A55C630C20E9AE1B0D1C5673753C43BA0767D65D1B451CC6380D8BB3C4DC 81E8FD8AA79BE993218686F29D3CD925566DD587F541A0DA1B1CC3BCEA2E6C7D 5E1016F6917A871F1BBAD96AF9E867735017119A381FCF33EB2D3E1E7093FD90 CDB0CED4818CFD9E201A03430CEC713620BE0D3254158931FB657C6AD4B2482A 0E7D070D7497892E9E942DF58E88CAF0C8221BF36BF7C435BF2C683A4A2EF4CB E85820A8AD3486155A40143011BA9D76297F46DEF69ECA4596D6E4CAABF84091 22A96A4BC78A8DD072FEB759A68A44BE1164638B6D952147EE3C628F9A022060 1D1941E73310943FA782532ABCCC88593FD63E7E2CECF6665C04DB48D641ABD3 AE1BB468638681E96394B9E7BED1D13E534B897DB90EA6EC706BAAE06BE2FEFF 1DFA5258113E9B4CD2D36655973864574E0E8D6291E48A8C62203F679B7ED838 6F7CBD523C2943ECA994CACE6679ADD23290C50FA5ACCD01D64928BEB3E5BEBB 7CEE09A8E654AAAB01E0B0FAD974BFFAD8A9A1B6CAC9E28290B6B76146CE0D8B 8FE70111A50AB10B3E4D9F4C4B57D10980F9E02FCA8A51294D5651CAFB548336 E9FDBC163CD16141B9C52B4FC42C122901F70138B2176B760E0C9768F0DC3C11 CFD3E826C3BA6234EBE8515797347333923C6CED42FCE5E716FAB1DA0E826F24 4CE08C6A562EF0FC7F9D959F3BEEC37B9B9A2275C223F1BDFEEEDBBB04D1CF68 8DDE2BC3F55C5328752E085094926432D3C557D166E24611A1B9773D49630884 24DF6D3A89228EBACFC3D032D4994480A53EED5E7F8507C5B83051D3AB9ED331 E179797FC67F9912F0E155E0DDB93C2565C1964E04F0590E318C908CEDC2D58D 6083D3F0C7B8320AE6E65E622CACDEE38315F342A7B067B71D1CAB5EEFC82C49 1AF28C6BCE96BB7744B4AE081EBB8B63FC373E1B7B996C7CB81AF8B309495FCB 05F816307210CB8DF3B0E69677B69B7193BEB754A973B9C62A7FE49BC4371CF3 8E4CF9B320716913CC3D871C0BA5AE03D8C1E304E7C79A1FFB97F13D72492BEE E29824AA7033505C3071876186C5E86211246A6A24B69959AC6C9E7ACFC8CC2B 7DA72A91647B43D8F4BB5CB92687CE6D257F01DB8EF9282A274997EDF1A00013 20ED4576656AFD42104025E4170D11F4F892FB2BEF17CDD1F909FD6B03F25E67 7155789B71DA23A6A0D6C89A8E3E7CFDC94CA9D3965A5EF54866F47A4F52CF8C 844D881C2789127CC09C8DE04FA4F90F6D37D8BC87E1EAE0B991AC363D0543FC C3C414E0B5559A60C6B32FAEE8B9D37CA39D6148C90709D8EFE5DEAF1F7EE20F 180509E1969A76333067D012C175644C24995CA68C0EFC2C599E69D8EF52EB82 20302646C41E4115B5DB34F67879F106F79D3B745404DD66CB7CE5FF252E5F64 BE01A5AA9545B7A9FCC91B334574F0CC301F28D693E6AB6F75CD349A0EDB0A20 36CF84752035472076EB6271286C03619BF6D9F72D3B341AD1F88BC7E2F8DE4A 25CA82AB15009F3A5F53585928A959E7FD95ACE70A37652B140EF0F5C304089A AE6B9251E6741E0A6D078D68E6CA0F3C32A2A7E2864EE1A1D509D9418ACC3819 BA4D9E5D82AEADAE9AEF380E6982DD9F07393F71005955D7AC64C1567B4708B1 F0462DE483E32C0085E5A36A48B250B539D737286B2B485B9980F21FD5E26D89 20F51BE7927CFAA4465C847A9B8B9F83DBF005A3BD548DC26E22EE9D49A4227B 6C584F16D7A0A6DF31E161EC94F559DA166873CBE9C3026FCB22F96080E637CC C5FF951D5044951EA41E7671AA5FADFF60CE8BB7BF237E29A2B4F54BE63B59EA E28BC5EA1E70AED2589F5693C8CDAD6DA5642E4E8E9327B072A538ECECAAD737 941671DCCF52DE1D84C0B9028CCF7E582B17090321A0C49FE21564CFBEEA51A1 ADD70594999CD3E94133ED9C3BCB796CE430BF1B111CD1E29F341D0DAD385028 8F7737D218CA8A9230872C498B7C8527C82346FE6F3AF4D38349A043A417B49B 1868B036146ABC7567802CC61EA2BBF742F638E0084AF4A93EA46EC93D06AD6D 17F0BCB651EDE7C75B64E4B38B2BCC6915A4607612DEEEC081BC9796B79F9283 513DD3874486E63393521328EB10F9C36638BE827663F94AF7C4E0CF254CFBAE 8ED7D0CA9C1F632AE5F0670C5DC717F4C89FD5919F57372396E367592EFB77E0 B887B4C196B4A576D12AB641FCE038D53DAE53725E598711A73741DA51CA2CDB 4A3CD44ECB92603CDD08540E01274D9A8D859BE640BD58F0EC5DB4AFBD350642 15D477681527D908DB58902FFE53380DBC210F2A7503D88C7B6623748A4AEC69 6B113991537D4909C8BA62F44FDD52DEC6DA93E1F0F3400257788484F255B463 D1FD49FA84478C92BDCF2CA847C685E0B5E36C3562524546DCC942BF37F0E34C 02AC14D98D652EECCF114879F9F6E7638AA39395A31C3340C35832864D5ABDF8 28FE4D5EEA80BBECF32B7F81AF6ED4EE619658566A42B3AFAFDDF7B198C9B25D 2B1019FEE03BFA0E6C832442BD1C1CE3B8657C79A4BDE9022EAEE14B36611259 FFADD46612E4959134C8E7EBF7186150CD6126420BE7DB359CF875B91D8E55D5 EF62EA0BCFE42DF6C272DED8E7444F218C70E0EC1AD477AE81DDD08363E5A877 C6A91C0407033E76F5AA57C956B6F8DEE834A405568767F7E36EC7ABB4F4EC81 56708D2404320B3444A59078B4697773C08E0BD6C6BE6E93FACA01198889C93B 86B048CEF863DC1A096D0C031A977B156E7C83D2C8C23E43FEFBB46682D27793 F7EF4B59608A373958A75B005C69F20BE644F30F6A338CF9EBE5D66221749D6B B4F87BC9439FD1CD681D07CB766165B61CA83D12607A822E35F163D2B3B982F3 E4C1A4DE52DF9B12EA26C61EC75FC4410B8CB1CD4A7E5C887A645BEB72AB69B6 8FE3CEB934D185E8250DE85BE70BFF46E7A2AD718C208B29205D5FA39F806912 0DF859814183D72E3BFE618C6F0A946CE5EF8EA04F460C45AC22A838F3E14DD0 D47F1052915F1F8722DA1EE17778ADDEC203BB93B9E470ACA0F350081FE6EEBF F6C64374F30AABF77DFD29C8DB94389B7C3B857F20814B04572590D9A382D2F8 3DC8C04840DB2C44F469CA791E18225987CD2528D9344F987AB56918D9BFDB67 E9F5DA5BA117F1999A6AB49EAC5DF683108E04BA9900C72FA02A9266FC9235CD 5FD1E02A022121E2E64BAA50D08A39F8D4C588C2D8506508AF4A37AF05F8DD40 2077E067C9E1811B042C5626FF27E31DAEF48B729BA7C93A8812617B8F2E0BB2 3C6D367110DF6BFB3C8AA6CC0F348116BC1908CBAF3C5CD88CF6EA147241BD5C 2FC36AA8C6C2B9294BB0A9A366F9BB36738A9A90EC9ECE8D1FF1B27AE947762A EC27C0D65265A016A046337C7A1DF9A6F0E106E140812511046EC959F77D1B97 922E876C3FF41A68A975C5817BCB1140A63A9742D9F79E7C39AC937B3FFEA93E 1CDAB7F55D67FE8A2FD69CD7C208ABB86047E8C4899E7A80AA212CD295537F4D C42E9E3B9A64E4FEC29753516B7C4F6D6374D3BBCB1F2A5D6A47D0640EC0EABE 99BEDDD58901B1B2B280C5E539F84D1A3277EDC18EFEA35EBB20789E5790F140 9FC4C3B52AD1EF2380B2FF6ED708ED39093F8C75E3E67425A0C3E835280959A7 125AB7F4DBADCEEFD0CA5A67E1A10C515D5001D8CDE628A7F29D6DF58FB5F437 AAB992D735A0892F56A6C1ADBB4996B863EDA50A75D6B26421AB0472CDC75549 FEF749AB834E742C3FEFEA566ECDA97CF8C2C4C0E9907CFD457E40BD7F623A9C A34870D149B29BBD3985BBEB7D5CAFA06304A8B5D3501779094C24D733A26438 B732FEF82A5EF1F7F2AFD1C270920AE89128ABEDAFE626889138A2610C9DA0F0 BD741C98C0D47A530D381792B798ADF1EFE837EEC94040CEF61347F5FB39D0AC 46ABD874C7A513E5996200BE60A84190B0DAFE520264738098944C875D8F99B4 0B624B008E16D6EAB9B06616E46072956259174B8D6F94A73698714E4713A7D2 F792E19A27CCF09E84E050CE6408C3AE69BE3D540A18E1DEF193F0F97316D4A9 BBE2D4C611EA5DB69E8A538E604B579AB7850AB424267097F2FE78D8A3AFDE5C 167995EFC16EC7A6DA7BA19DC1A56F7A1288039A8703AE04D41D436F665A580D AE8E20CBF2924B9E3A6684463FFE8B461AF38249D28E9684E621F8F0E76B99C7 ECD964A664FC8F103CC9E0C2976B13B942AB0FD96B2066DD7D83AB0A5D81255D 6A49F744F43364FB2AE380CD9B1E4CD67EC57C912CA62F89ED73B445FCA0EA87 9CB5CDCA5A30D293D3D77B313619F8E342BB47158F26445F200C532FD2E0B60C 74F56AEB05CA6DCDB1B1B38BDAA58EB10A6F9689CEFF658260B3CA32A6241CC5 582F160879A5D70B75FBE99D3C3512ADAB7A9F7A00E5BD70D5E74C850C5A7510 016E1A634246569FD210CA4F276B46009CC3367D3F50864E7F1BC642EE469F60 25B67006674A40DE6F4B05D5B3495BEF8EC7860732703EA3FEC53DCB43B6CF74 D3876DB433922F0DB59B1F2933B9B72206D1E8F61F399CC2CD4BB955FCB044B7 B6A816C5885DF66517D6C74E4BF234193AACBBEEAD3B4A98D6FF39CE0740B24C B7A2B9B0F29B662310B3E9ECDF6B1C5851273D52F2C9B4EF558A621DABFB7FA2 C79EB0D0B253C4F82E47B8E91BF13B79E16EAFAAAA5538D5DA51B88DD71F9C2D A758738D9C3FC1B41E3252A48A9F3D04C63FD5E4903035F348D7CF229AF76CF4 E11D592C187AB13C87BD93CA7856505A3E3F7D8B75BCE6E2384A57F88EA76B21 32B8D885429F451D9151EC93FAC44E1BAFAEE2A26FC0A8F0CE25069E4FE5C428 E144B9C82A5E7EFAE9319DABDB2F4AFA101212D9C18F0C2B05DFBB0E143309EF B6054624D897EE87D0CE3CBDBA3C0F6BACDB281A2E55129057273BA33E290ACC ABA916122B6DF94E42D830E9700DFAEC18289E92A1E846DF937F66DC207ECF08 8E90A554041222379D83F35D285A113527725FA3C978631907EFA9444E174EE3 6307C33B8B59103C779DFE516B9731135AD3FA649283EB3681F99329091E63F1 2270E84AEAB99D64B1A8B6D842F410EA8EB3E40AB74113BB68750A964DEC25D0 63E389DA1DADD1817487E494CD516C0566CCFF32A750296EF30998EA17F47DFA FFB7B0C278EF067764228DF564403105DE0918F5F45C705533D1F7A910539856 10C8CF921E5EF46AEAB7C23021911815D7AF789A28D1D3C51AC54A52C7618FA6 3EE00A3500391ED01404B346A8365975E7055B3E28D83A80F1A192CE1151ECA9 D6DB5DBE618CABCA7890CED68313680CE18355FA6810E3E8E608CE1EE1A59F84 25B7982DBE507F7E25EFBC8307EED0DEA1495A51A1A1F374E1CA6E4F318A4F1E F731CEFA9443DE54B62DD4F3CBE2EA6ABFD3B0A9138755C5B7419831E2028949 7A49A1792C8F1A190096DEB7FA5EEDEB8DB3F539139F8A30C478D135C39DF174 2D96F6413ABD28438110D75837C6A5E2D6D88519BBE951F41344A06A45F8F92B F109EB023F7B475A31CFBB129E1527A8A58803F0633AF52494550FF0E8EA87BF 3ED39F8200D1AC85322FD0C03C1468C03209889ECF27927A1D80157914C176CB 2D44C5C154B68E7B47762E50D5766DA00D61DE9C1D154C6A048CD2E2081915F9 793501D2A211BA463101018D31FD30213A30EE029A485FCBA8F035B069C19FE7 EDC257C25720EE67691F1B7551B5C42D06D4B52907C664F937BCED7261E541AB 278ED627CAD2F99A61AF2272BAB24209A83A4B87F1B7A7341ED3966BB97238DE C68515AB042A0960BF9C3A82B13E31A09236AA6EAF31FCA158618EFE97ED3B15 6038F646FD053965CE5BBB59E89D8A621CA565EE390CE69E9EFF2A4AF3F0CE45 06581D8E191BAEF34CEB20783CA934F04A29A60422ECD6465F5E8B4FB3FD1DD9 CBF0294DA1F00EB8B5349C12399E296ADE6BC9B3F6EEE9D1457BAAB8BAFEDA8B D3663430CF539DEFF45BA9829A952F6D6759DF18D5320FC6BE086E2ABD1751E9 980DB0D88BF506E206B49F085FDA1D8F6849454C37246510FA57BAF974BE596B 02255530A976E6697F8C9B6E71FED0360D7251384E5BEE2860CBFD3365FBEBB2 D1B68F8258F8E9FF3CB2060D28EF6DA8B472608BECA36C75632BCCCCECF9B697 1E10D54B277AEA3909047CC3D1FBC02F4D972AB98509A0C8D8E189BC3F6BE9FF A184BDF9FF7BC80D654722955516D58879D5CFCE9401688637004CD85C6CF53A DA4183425BB6D488A96E335BB30914BC4E4B7B34B44611FEF2277F3A6EBC5E2B 5DCDF8272B4C14AB8C92390CD3E5F064A968BA3BFAFD4BA638F8BE19DE7CDFC8 DF1A165A6CBAEBE48BABC9DD32419DF162AD15FD7B2358D4F82F04C655CA9453 D70CE522AF513C85762D02103A195C4259A5875B8639225797972897576575B9 7C82264A27DF16B55A46A9EC0CCE11EF4845E0D5A2D6709FB1EB75C3C90EA91E A6516D1D95EC169A99D2521C2B4AC66642BB229B28B1476322D2E9359B82548F 873112A8F61ACC41B5BBC2518102F7B3A4FD3C3CC2C5942C5B0B4E10CDFE169B AC9A2961B8139E4436F895C7B101D198BD568ECE617EEF5D14E3A00631463B65 510A0D25324764C641E644079745FB55D42D30F3E95E14A3813DDC47B435DA18 43970F2874AD4AF049C4318F978EB8E1A8CB143150BDA27BDB680D106FF5AAE5 D2C2B05725EA9CD3A9F07DD967A573053555217011BB01296C64E9EBC8AD7D50 CC89126B08950D0912E1F5EB19BC002910E0DC36A73CD8E6ADA1B849436A14E6 58165CE26F473ED3737379E1527A89E26ADA2E455E818C256DD4BA5185CE8312 1142771E26C116072C57C39256DEBB6CAFB8FA19935B0E4BC554640E2E9DE4F8 93D439A5202B6A250A8996080B445BD5F14340F685A964B02B60B306CB58E240 15497209303D9F58C97AD2BEEC0035182839892BC37578CFD0BF46EA9427F791 E369A28BDC3B6BA989A8D39CEF64EA7D2B1A7A816308F52FE1D047011CAFE591 81E10DE0BAE29B4AEF4B12EF4A992D67CA1281D2E91640608B658DF3AED9BFE5 D45BA1184D907DDA017E7D1DBC0F6D3B2F9566282910E2BFED267FAFA7C0E1D1 94E737D22B37E35758AD995E653525E3E87C210E6E93A5F0667107FBA1AD2C4D 0E96A8BA2C015A6D0AA8991A2D0AD4879B0F26A194309245BB584A1E28998DAA D8FB5513DA50C07CB3DFAB19593B9DBAB7E99C885589F3A1280332D4985168BD 005066B1F60BE70E0B83CF8F4F8EB16C1E8F7DB1995AD495F7BBA93589B824C9 75A1416EE7B9628FC1F7F79B765643126311D226BECC37A37402A46F48A68883 0EB428992FDC3C5062E5B7E3B448192322839AEE7AF6DC14098B01C53E65FD29 A1CC4B2AFE921329EB497C075873F14FA691FF2406EF91D864DCAED6A5109769 5F8C07383566146718FD9A595E0C45019B66CAE7E8F079E486C67573529778AF DCEA4E393112E7FA77BFBFBB3A60655186D0004E6B20D4219C956443C3A5506C BBA973D3A3CA7167B0F2F919D7848F1685739C41FA0940BB2031D14C8DE27A80 0E86B7C28C609537B27A0A83998D87E13FBE7A296908EACC17F3CFE068F65266 824E8B24C82767F828DBAD2AB01F8EF7760E8E81CB6A271756062B431E2449F5 C1B8B87A6EFE59E378E32B90D3451490777BB666F33FFC386C5A1C0844B75745 A572A122D9CEAB13F7EAD8632ACCAAEB87E7D127FF30A8E8360674FDD52EF02A 9E49CBC8B543D3B6C75A4F5C3FDA2AAAC2100059042EF86D4414F267DA3A650D CCE53A84A74388BB758D1281BF5582C90B482B005EB85B58D5CEC7D400102945 83CF3B7053436D2A3EF0043871CBE6260361A5A65DC4976083E917DEDC17AC72 D63B8665E572119E997C8124D755808680609B8FA3305BAE80159B4B8639168C DF4C19B7ADBF47EB044B59D85B50F33DD627BCB5BE256EAAC31E49AAD725D6C3 B9F40CB937440DA36629B18EEE4FBFF3C97CC78E05B6A15223721B7E6C4DFD2C 46E347E07BE40A56CA233A9043B2752E7F1B688D258CE4D4CC31487D24D87F6E D555E47ADC52BDA2C98B073C5558F82A31A7764E011DB9F66C17378D9C7BD18E 06CFD04E09E90FEAC712CE50074054BDB1473722245C771F954D82DC707ADA2C 07FE8D915CBDE9488A7942F11AFAC94F68578AD7E245A55C922900AE0BCF5400 B30B992F1BE115E39BDF8EC378D1ABC97E2C9B1E4C668D9A4392A23EF19D60B9 DE6504B2A1667C2BC44A6C3C3FF3339A331E613737B8405D31F541A3D13C133F EE0FAD837FBE62AA2E0D5EE35CACFF0CAE72AADBE8411336D5E2CF563AF0890D 611B400E0B24FA4625A664682CDDC2E64E6FDD3AF6DE54D17FA3E06AA9A29899 6C48C3945D48354C486C53818519731A989760CEE6987D36921683EDC710238E F12E664E48D165002E4AB0EA66872E461511519FFD598AFCF8C40013A96626FE 951B817B63EE567234483F3D6DB67B1B51EFD6399F5A2A1AAF05E0884266F0F8 03311025F7581447E499C905C08941D6D40D900CC96E395D5FE1D625B18A6DA8 9C840C09EEABF82CCEE14D8A37E699EFF6D03A2C1C304A734E9B2CD024F505CE F584326188572C3DCBCA5F87F86C9D2BCBBDDE060BE9A3A7E0837B58DBF3BFFA 79588474CC8396C267EA9BFC9AC958F8F44CDD6E038883550B981C545E68022B AAD9CECDEEFDAAE54CCAFFE4E4F160D858AB7051F760A4E5E56547E094AA89DF 1D8C65AE393BC2F64C3511914C4A2A03BE054F9D9F7E21FEC9EB17BC5489B90C F7AAC2D3445181F68214DE5C9F5756D001E555E1C73DB4E29FEDD3B83DC8C740 606BB7F2CADE09AE843E6F793B43B644C2FF149BED65230D4213DF7CDBB98A43 5D92E750EEFC1BBFCB72917DE5E92B48694EAE443E0E61B7829AE02D3C9DBD10 DE052AD77873E6227AEE107D6E1A1143D8C73228A7E4D763B9E7024779B91F86 6C8ED35C1F2A6BF4BEE03FE584F81C1C2EA6E348D4318441CFC31CB33DEFA4DE 3FE77857C95633CA5D07F0E5F469DD3207BC3F412648F722C4D41E8D97E5A01F 41206917FCC02BF001D8A0A8F7F730B2C4ACFA76477F05ACC3376DFBF406A7D1 A864F8DA0135F7B7F1C9425E5443652C68290C95FAEECF52796FE535A697F826 C45A61E7A4B0BCA34F922D2632269CDEFDAB4460914C8929CE7B3986CDA53412 6AA385A333BC7BBD00859B4510C52E127E6C902929D38503A2E8F3DA5ABC6000 2B553111F76EBA3E1D8AFE526428A12DF12FD63B34FD47594A04E30C2B9B5222 34D727126BB7491271FCF53AD99B08F6F25E7075F6ABE84EACFBCF68B2F671B8 25ACEAD9837A6075AC1933EA504CB5F7A14625DD403D2ED0CE39898C5C8213C6 FC4B4029E8CDF038923EE870051A01C5EBBF429C27FD6D24BDB7F42CF138F7AE AA621381737C0E48D455D06B379C95A0834B06CF51ED2726022FAF623187A674 84160C0176B33DFA8A69C06E58548BF22E8E4F7AEDFABF0126FC2DDDE0DA6D4D 3C6D3404E4DE7449D8A9466B96220C561C45BFA49E0A5EAFFDA325899B998313 E1357938D77A423FA8E030847194B74B431F66EF9F999ECCFF367E18C13DAFF4 C089189DBB212D5DBAE560C39D720A79006F8CE5D3ED94EB2181205307952C5D 0BF0BF653E8FC0F384ADEFECC12CAFE85DB07BC8D8D98906E05AF72FC75C0694 A1E2A2A2EE33008209A439D43640560901555AB1B23E21EDA992DFF655F9A0FF D4FF5373FADC829F0F73336DFC430FC034800A06FAE1AB1A5DDC9CFF2EE2C387 0FB4904C1782B27804B3AE24C881A27CFE86DFBD5D9C2E2D30D48446D5685087 91DC6488480592AD3581484A0D03C313D414EDA0A93268D9474519FA1C36F8E7 09EFF3B9EA119A8CAA1FC58BC3C52952CB85BBF7922AEBA5EB0B247A6935C4D9 FA88959AC9705FCA507AA491EC221716300878D45B906C81FFCC9A3459545BDA 7AA34489920D5F6A097BCB45537E42F16CE06C2887363517793529B14AF2EDDA 1D9FE40B4417F6FC94F351DE8574EC38B0B9FA9DBD000333193C3E1BE4CAC784 F39122159FC9BA5DB8B5C3CAA29F398EE028788DBEF6FDE762A841A7ACAD7CD0 6BE08F95327AF2FFA0B0FEB4620FAA56F96456681B296307DC89449AE033895A 7104A8AF26D610FA877F9198C64BE77D309B2D65FB8EB8832F6EC6B9BFB701F6 B1D6F88ED90886E03E3AFDBACF0BB8A50C49C43E7BA124EAAD8400AFAF4053EC 743B9FEB3782C916705EA5AFD50DDF4162D3FDF44BB5400EFD48BA9D2137F65D 68B418DCC0F2B7C90095148014DFA0C7302E711E8D9C2D44B588BC83B85698D4 0495D50225A4CFFAA1AF8B1DE6F6214041DEB57BA19215A028C8F0BA0810DBC1 A59E0F85A5A444E76A412A78E5DFCCC678EEB2A3993850310A5DCB1531B6B3E2 7338CC84686E84D601020E0F9C2E4C12578DC5B48E76DC53E9324E864654F130 4E192D0B798865B0D15C063C8E42A847031A7D4EC69647A3A45768C3C047A5EA EEA2DA6943D2A933A156EA51BEE6955D31B9B7D2A999EB3EED12D6378E3E0971 1C280F204CDD8021A2E860235BCAB3CE278D14B9A00981B2AFA3F23B32BFA909 D66164FF88535FF1EB74B03705 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMSY10 %!PS-AdobeFont-1.1: CMSY10 1.0 %%CreationDate: 1991 Aug 15 07:20:57 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMSY10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.035 def /isFixedPitch false def end readonly def /FontName /CMSY10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 0 /minus put dup 2 /multiply put dup 3 /asteriskmath put dup 15 /bullet put dup 21 /greaterequal put dup 106 /bar put dup 107 /bardbl put dup 121 /dagger put dup 122 /daggerdbl put readonly def /FontBBox{-29 -960 1116 775}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052F09F9C8ADE9D907C058B87E9B6964 7D53359E51216774A4EAA1E2B58EC3176BD1184A633B951372B4198D4E8C5EF4 A213ACB58AA0A658908035BF2ED8531779838A960DFE2B27EA49C37156989C85 E21B3ABF72E39A89232CD9F4237FC80C9E64E8425AA3BEF7DED60B122A52922A 221A37D9A807DD01161779DDE7D31FF2B87F97C73D63EECDDA4C49501773468A 27D1663E0B62F461F6E40A5D6676D1D12B51E641C1D4E8E2771864FC104F8CBF 5B78EC1D88228725F1C453A678F58A7E1B7BD7CA700717D288EB8DA1F57C4F09 0ABF1D42C5DDD0C384C7E22F8F8047BE1D4C1CC8E33368FB1AC82B4E96146730 DE3302B2E6B819CB6AE455B1AF3187FFE8071AA57EF8A6616B9CB7941D44EC7A 71A7BB3DF755178D7D2E4BB69859EFA4BBC30BD6BB1531133FD4D9438FF99F09 4ECC068A324D75B5F696B8688EEB2F17E5ED34CCD6D047A4E3806D000C199D7C 515DB70A8D4F6146FE068DC1E5DE8BC57034F6B6A67AB666E15104577F342F68 CDE6AB4EF53E08A8E0552B76072FF46E80AC884959B54D147A9D5105D1FC6B3B BEB747B75D6E6A3BE833BFC95FBCB2A23A8697CF76D1B9F09F567345137B24FB 84474441D5CC4C121EFE1E4AEFC2950C042F8C4C40FB10A631242F0A99BCA589 D4B03328602937409D24869D80E6408319F42E1982A6426352CCBB6C1BEE79AD 8CA6E8C49A7B752D6DEC5707AEFC6E7A7A7FEB9DD51ED1C6703D083C18F34D83 589BB9DF0D0EE93A9D15623226127DB28CDDE276107C9CBED2F576FEC8FF1042 42B66FCF98644EED8FF119653C032EA2E736A5F0F869D380EB13EBD3F4B95BEC 4223CA8A47265C6CE2E8859C3CF7079465D7CA5CD067785829187E645D2DBEE3 DFB64169EB31BA062FFC377655F7DCC6E2579038B4D8CC6D269A2DC9F040C423 801B889713A4EC856097FD71223094CB35B3B037A5BE7F28D1C473E2FE42DDE4 F16F5C1AF3E29247C58B0F9288411F926BEE5F3A05A9D74DC2E2E438308438B1 F2744E927E164C05070EE525FFE45A0055C14CA5F35DC4F1246F1A300461E48D 15FE58FBCA01FCD277CBCC544F6EF52C205908EB4BFF55568026F8AF1F9453FF 27B0FC0B1180D47621A67B944DB43D6584D1BD46B788C13423AB4552F8163047 C76A0368A1CC45B962C36852DD80BB03BBF0AFC1D906CF176FD08A71EF56B919 AB94C50D05BCEAAE427531E09053D4885ACF4DAF592897580484378EB2CB9C44 6513B8E718CD0F20F54666B4C4B1FBCFE8F5FE2D3C3237D79C24861462E3A142 D9D40B38B12D83B58D6E52F5F771D016C970E5CFC264F50CF844B3A9B6463B18 E5C44E316F5836DB33B40B2E2E760926622BED0CD21DBB2EAC1AF9144E93868E 52E0473FE0485C84EB1F2C342A8E282C9CFE6B6F710764344EFC21F058DB25A9 A9AF6E085A915340F5521881718AF08CBC0EC06F31C0DA098793EF6500A17277 5CE37D8AC753BD0ADDE71A846DBB15781C4DB709E77C56BD968BD5BEEC9C6FAF 5865A574B1952E23C7C9CBFA3C05B6CC7E1DADD00AB73F4D1B14BCE59373C7BE DCD98F44C0136040E899C38FD6370F4439C832A6BF4388325ECA3ADE7BA67D0B 551EC5D347DC0E15C269A2BE1EA28A7688A8A68B95CB13231D566498B9DB4A81 F29FD32B1CD8681F5E7C4B17EC0D4EE4DB2BACCBD0B48176995F8D8782467359 A0185B85D17159F66B90F158510A1F32197132B56E302480902AF31D1113B979 51E9C3D0EB16739F49E23A9F908799380DB5137DED7E0299987A5B918CC370FB 616E4DA8C895956C9E3D7A910B62EAA610B27BF97078FE5B29B778576E65D3FF 91A0777862EEA8AB8FF439A4049435AEDEFCD7FB3A1085458C404EFCFBC13332 37553A8DE2F71B303667A67E2E246D368A0FC2B2A48F5ABAE2A3A61FFA351C67 FE0D9C1779A1A8C782AF9723F994CBC9D64C32D121E895690AB02E8AA2A880C6 420D1F963922562A327F63B55502B78888DD637D24004A39A1F0DEFD6C32B7C2 866D18BAF0B0255503A1E3F4E79A0A60FA43E477AD8DFD94E10F3E102195363B 946C23975AC20B7975DB010AB849A77C9CCA815E0BF044E61CAF0837E6712F32 AD8461459F513D7BE79623F5AA2ABB20A94C233E2263B4230D9EC02966844AA8 907CE05C3C384C1AD44A842832321E4DF1476E57681807AA245797BC993EB2F8 F8EE6EE2C4D2C67BA06B781D7EA11DE3E1E3AA5A603BA66736F937647FAC7EE3 A8A7674F872933105289C4D7C7F89F02771DBD555CAC91EAC8F96F31EBA543AC 349BF0DED00ADC9F9E52BB8BF7C8BCB8EE48914ED4214620D13BE7300ACA4F93 45A8A648A21A319D1A97B8183CD0E8AA37D874B6E85995BC880124D885D80D68 6285C2F9EA352DE347BE224C233861596ADAD04AFA12870A50C292F4CD526345 8624ECEF4C9C973C8A2DDDF172F6C9B0C25356DFBFE35423B7FA803609F03832 F0B89BEFAF5EDCACAE074E77AA5466427ABB0E75F940631E100FE52B9B6D08A7 677FAEFD848FDE552E57B63E26763ABC0314AACF5AC2E4221FD1C52E1F74633B 2D42D66DF8D21BFE933C5D7087BCD0F67A341D61ADD6EC5689C26C4442CB1EBE C579FC92DEDF50E358C2FC6675CDEFAABB3D933F5C058F35579D4BA3913715DA B7A8A16DDA9D644D2C190CB307F900D071117D12F34FA84F7AD241413AD31581 A9B96E5015A288D0AE21BBDF866486534D99FAB9C17098F3587241FB8C2CB384 E71CA1EAAD 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMBX12 %!PS-AdobeFont-1.1: CMBX12 1.0 %%CreationDate: 1991 Aug 20 16:34:54 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMBX12) readonly def /FamilyName (Computer Modern) readonly def /Weight (Bold) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMBX12 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 12 /fi put dup 40 /parenleft put dup 41 /parenright put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 71 /G put dup 72 /H put dup 73 /I put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 90 /Z put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 113 /q put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 120 /x put dup 121 /y put dup 122 /z put readonly def /FontBBox{-53 -251 1139 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5F0364CD5660F74BEE96790DE35AFA90CCF712 B1805DA88AE375A04D99598EADFC625BDC1F9C315B6CF28C9BD427F32C745C99 AEBE70DAAED49EA45AF94F081934AA47894A370D698ABABDA4215500B190AF26 7FCFB7DDA2BC68605A4EF61ECCA3D61C684B47FFB5887A3BEDE0B4D30E8EBABF 20980C23312618EB0EAF289B2924FF4A334B85D98FD68545FDADB47F991E7390 B10EE86A46A5AF8866C010225024D5E5862D49DEB5D8ECCB95D94283C50A363D 68A49071445610F03CE3600945118A6BC0B3AA4593104E727261C68C4A47F809 D77E4CF27B3681F6B6F3AC498E45361BF9E01FAF5527F5E3CC790D3084674B3E 26296F3E03321B5C555D2458578A89E72D3166A3C5D740B3ABB127CF420C316D F957873DA04CF0DB25A73574A4DE2E4F2D5D4E8E0B430654CF7F341A1BDB3E26 77C194764EAD58C585F49EF10843FE020F9FDFD9008D660DE50B9BD7A2A87299 BC319E66D781101BB956E30643A19B93C8967E1AE4719F300BFE5866F0D6DA5E C55E171A24D3B707EFA325D47F473764E99BC8B1108D815CF2ACADFA6C4663E8 30855D673CE98AB78F5F829F7FA226AB57F07B3E7D4E7CE30ED3B7EB0D3035C5 148DA8D9FA34483414FDA8E3DC9E6C479E3EEE9A11A0547FC9085FA4631AD19C E936E0598E3197207FA7BB6E55CFD5EF72AEC12D9A9675241C7A71316B2E148D E2A1732B3627109EA446CB320EBBE2E78281CDF0890E2E72B6711335857F1E23 337C75E729701E93D5BEC0630CDC7F4E957233EC09F917E5CA703C7E93841598 0E73843FC6619DE017C8473A6D1B2BE5142DEBA285B98FA1CC5E64D2ADB981E6 472971848451A245DDF6AA3B8225E9AC8E4630B0FF32D679EC27ACAD85C6394E A6F71023B660EE883D8B676837E9EBA4E42BA8F365433A900F1DC3A9F0E88A26 30F19E1BCE69BC499D860F9B17E6A78469F652E1F327CC0F46597373C632CB29 0024F4D106BFB1238E71B84E6AA5B2484CF0DD8ACB90BE5E4916214130582DC5 BC26EE23C6ED239A49243B7911EA4121E5BEF60E3379B6685F548F185BE9EA5D C95E517502C6DBF8590DF15D0C02C9DA2A1E8B4DF6BC91BCC47A7CC93B3FBFF1 809BDC20FFC64F51465FAB3E491B746843AC44366FACF4E832A46C41BF4E3C24 63BC9F166A8DDBB29AF7A52C88854A80B3535573458670CEF7803A6247CD6E86 0320A45778B09FF104C07A1A878C6B78CA980184C80B8495AA0829480CF3AD57 7C3875EF1C49300D6FDE665C6617A5D424ABACA15ED6AB979840AE517A5EC2D5 100E065B60486C07D3CDE77361209298AF003BB6CE23753CFB96BF3654E4F4D8 0F4F1F85E6FEE5BE8289C31697F19960768430466888F8C76F32D4D0D7612BAC C3D531B1A6C45EE6C8A481A6AAE03B4719537935032DE9695D829968A9251F3C F0F5E1A60A590A5A4E98C992475513AC2721FCAEFBA8C0C52F84352012767B17 A170DC205ED42F95F100851B9E6DEFBED916DF9FA37880A62A459B6D00E5562A 90D6A8D08643275D05CA4BAD67234CC5FC44BF8354A988BEFA4AC5F53B954629 AF85B6FF9218A6AEBC31651AFD1E8D051C2BA28AB1FA7BFF083CF9ED849D2F80 166F0756B627C35F424CA2B70F897C2143D3D24A95A1167462758B933CD27E3A 384C6E3EADC96743CECE944AC61DB601CFDA0D90F04A2DB4C8F486AF2FA4671D 2D63D9518107837E8777D40C1078551FCD23C653C241CD9D5F51482C0FBD3EA4 20F3C4D14F47D60E390CFCB630578E6052B0BEC70F28FD87623C024046C3C219 1FA521528182AA6642774EA7311432D03AC4740D683B550483D940130B1CB311 04746C1848FB5B5EE5F1BD109B58C86DF64371A169E4DADD61B352D8EBA5DFFB E3A005C95A91FDDCC71BB4CCE5277B1734FB8C9F3293A33D3556159803648F15 7534CDECAB9D1FA36A964AB92B380E4E5014086A6691916594EA438AB18A4210 5A155C5BE25484F148ACD95EB59E939F86E79CCB86141A09306FD2808F5A9916 2B2281C96B8DD77DE45EBCFB93697F832C36A682DFE4806A057B67D831D971F1 22DF3A9F62974A042AE53DD74FF8F83DB9C4131B9CAC4D83501A89EE401B49EE E5FB974B00318CE77A115A8971312EA7D68F5C4D43FB70A90999CB95650D5949 96C20650200F0098C69C2CE4C4499AB449732007D634A826840FBC4274F29600 7ED2DCEDEDA74792832E3D059505B422384A96422BDDA3127259F34C5C97B042 FB4E6745F3BB77A9657679F8042EAF0E5C2649AA23F800DDF7B4D38843B2C8D0 AAA56EAA482599CBDCF0CD0848F31BB27527A444147E7984A38D5ECF017D9339 0A36AB57CCBC60A08E2C6F15E43B4399A5155E7C484D4AC4681388FF9E33131A EDE9DB002CF851AA2E57FDFC01B6553B1E18210173B2F0CCC57A0D18EF8BF3D1 77C6817EC7A6BD2E8C5C4429D4BFC58E490F82D9855A3F5174669BF6C0F72900 A9A4767AE4BEE0D24F9A8B8237D59E44E871362CC6D28CC0513842E00CC780F3 7C22C1BC325704FD11BA743DD1F3F3B54CF4FE15DD1665687542CDEF9D4AB659 744D3308727DA3C890B237F3069C6AC7045C6269C32EF582A8560D20BADD4E14 EB9B510D47F595F26B93B48E9FFC81D7BC519CE985EB072E6DA8598E031415C2 16F78C8D855DC1BBBF355BADC0F04AF0B5E285D6ED852FDDBF25E06AF443E559 5710EC45D6EBA52F09854FB71535964AE1FD2E74D458CF87CC786AF6C990FE81 CD7EF456EAECF76CAE6EA9835715DE11F0BDE67E628E6A5B3EF3A12D9F697FDA 158D30DC457929CF40951DF6D3D2A95D132CD178A5FF042FB886278E95401B08 35761DA6F10631C4083FA4DF97A4E43103377522788A9D1AF845424C3F41BA7A A39A66D2AAC43C162CCB78D1486C480931A34FD4F3A3B934BC052F789E6820E1 FDBD9C8A95BFBB179A017B354C1DD91F9118E4C9C1B2D5C29AD4FB92871094BB B0A722C17FACEDC7566AAFE0EBEA4136731A7252C2E6E7F74375F8EFC0B77581 3FA3188840B04A9DF570CAD3CD255CDFC37B308659AA9803882F7B5E47F6E47D C8B83C9B3E0DAF531BB2CE8C00E563CBD484C292BFBF1C124E93ABD0F7BCFAC9 EBB49639291353908047C55A9A2C20B3466284CC845A32FFD8F43FC6327ABABD B8CF9DDE96C7C1B5B327CD2F0FF47C70120724A3DD47B3773D9A51BF334F20D6 3ABA69C7963E72376AD49BEA11DDC1CA4A70507C578C683B1AE6B160FDACF86A 6BD9BFA9D8D39B14B6D7A41F08C4CACD1299B622D1D6A98E9F7FA9CD1AA7F6BB ACD4A26D57FD6AF6649E877F6827C46D08BF29C6FB3084F30159590266F10696 4A196F931582C488A4D820E9EB827F8051C939C71658767BE31C62425580F3F6 75B6E352DC01F8F5C8467DAB29531B8277DF273118F3FBAEED7EBD57AFD24DB7 AD1D2B18C20A7627E7ACD44F8A7C7AE1A6193B3E6CFC2C40C70F018126A45F14 907C6DE640AFC0D8A6D0F4F3031A9BFA5C804F593D7811614F4DF98ABD83477D 3868E8BF0C89B9074E34BB9C0387F634B749941C3D16ADF3CF570F14DB1748AE 2D5CEA6B84EE64470571CAA5FEECD87897CF745401B12213F2CA0EA774587FB8 8E4705FEC4FA063E5E9EB610ED095CE7A8F8B58DA15D00CCE283B926B4FC225A 216B8E595B9DF0244CB62EFD8FE0C5264DD195BE96F4D45F472672C4A3965ED6 8FCE75116AE79445F7877DA64AA809B2B229FD7CA0FB88662A4DE6CB1F9FF34C D14DF9A64AB7F4E31564B6FB84A96678AAE68A3FE06BE23E48B752EF997CAA28 43303A3CC38AD965623F9BFCACAC717115D8A7B99945359B41CA11F6C1E9099F CA0911552C7B7B2EAB703174C90565B0BE0133BB4B558E32F30B5B821B09B15D 9CB2C65144758AF4E449ABF5F221C1C7E2DD3265023A93990C5271492D40822C E7ED9126BDECCFBAAAFE5127DA325C77FC375F1A98D780996BA767A0962D8334 F56AA75CF26E7BA36AC093D72989299CC1D0B20008E33AF515CAE12F899B5A64 C948906154B7CDEB5105F228076BC0D54B202E7DE03483B6D3E1D0D29CEFC939 293B0C438AD9B1FF8546BEC3A484D1BA4CD5B9A238D65AEE7EA484EE0D8428E9 41DC1B6846644FBA6FF066788AB15F0EAA730FA116E914B339CA765A6A8B93D6 F1129E0759E53FA4451730A75FAC0A4EF3771519F9C6273E0439E20DFA978196 69BFEF108FB14735887E0AD363CB3DDFD4212CCD976F35FE0704FA7CCC5B496C 6953643536803BF212CB1DA8050C331FEC075AA060E0C3604A8B649542D72FB6 2401DAADB6F04EB44F0959B9BB090955C7F5847CFE616DA502E42522D7B6D32C 0CBDA0450E16D1972C780DABDBA36281E46F2A6669648A47948B29A6EF898C10 CB602ED449D9B3339C0263FC24A987A4B3437F67627DA20F201974040C54F95B 84D0426ECD8A5EDDEA670E50C969FC139B94838AA85D7A1C5E6516BD4A389446 90AF8CDA506C91E202999DCDC5197D1482D17099530440423B76B5DC0D046D18 5C1B643ACA8FAE21E03D30672705E42E58EEF7EAF572532E6986731F75B2B4F4 6680FA9072888B84E01CD92C3447D856712220DF37AAD6E33C13E76D70251C19 4652712D3F6D44A069AA611915A73C42E1649C907FB53F211DCFB7FB64006EC0 07160B4AB1DFFA41AA14213C7A2BD2C1CFB8A8062C030F6EA4B4F129A083EDDE E5DA866AE0B35930E1B68C129F1D03C85F4D7A76B681C9D676631B2B8B6973A8 816AF640E67EF0153FF91A6A9BFB1A8F48F9502699A3D54D7B74D7A9F7252367 78EA29913880B7F8E7801AE8CBE8483F9EC70BB1747B456B00683D51D7B5E03C A21A3A77376AE9DB41E6BE190C0443416E5ED492462AB9C5E561C80E98C42936 B69F8A1FBA4C3BC847423B672C6160E4261103D7244A07BC8B958C989A79A143 A06F4B16D132191EFF5729DC2488A05DFF7381E02A775BADD7D4C6770DE67432 B76CE31C5562EB6029033371B64861027D0F4B482F3B7C820ACCD72F714DE076 5E81F108AB995C8645FE122D7B6AD7072403FEC7C3E93F2191001085DAC15523 569391DAD826D7A2CF377A3CFEC812EAC64E7FD415B0D9C0B9DCC3A52DCEAB67 4B71604A2B8BD355BDA2A2BA610AAB1C65FDE10716936D012E855BEA69AEE261 544B3AF03C564FD8B13F5F5D8861B0657D4295DA7D6A03351B237649FD1F6597 EC1A1C03E42ECD9868133D9DD717655A3F873B9BB98FC06527462F90965DD3C6 5343ADBB00888E17681D2D3EDF3DDA789E51AECD3BF2BAD0591BAE8B03548AD1 28EFF018CCEC16071B902B1321F417335D50716E930FF70A936FDC30A3EFA8A7 991ABF38BD2820B64694D57DB7F1C866FD1833317A69D6736F39B2785F5EACA0 706B0CDBDFF34BDE47406080A5FA5E844AB89A0915216CF11B0483D3F1432424 C961319ECF0589C052B0D6F8908B6466D2BF1952CBF553ACAA0B7D252B5DCF63 8EB212BA4E13DEFFF2913D43A1F898E781E716AD91F116083B41CA16D24552CC 3E1706CD87B6E01F14698C64889367BECF0485C1920373B202C04E591FCA83A1 0810BBBAE4441EA288046246D0EA3378AE0C417D263720E334E73AF488D5CFA0 C27FE75F7A4D7329E0999D08D23C9D76C8B17584F2BC8519BCFCCDD8D731A27B 8A7EBE4A194012D159C1F7BD6796AC2950BD7A3D47542FDFFCE5B7BFD5D2C05F B2D82F8A7D2A7469EDBD4559090D528362BF97DF0CC686EF549D3CDA349D7622 43264866FDE915AA25C67EC387C9E87CBCA12CF5A2E431F118498CB8CB4B4659 54527F0CB32B73C79BDFF55AA67EE4C71A2AB58BBC47554264AA514D96737C31 B506AA8D4FE74126074E9225D6781DEF9FA253975D51E12E5485A7C3E4900CBA E0CCFDAADE90CA8B02162D4600CA4D1163A1045CE3C70BFDBBA490551B03F13E 9902EC46F18E42DE95740ECC680590BEDDA60FD41B5B9812ADCE7F071881B925 867A6F957E05CC8E0584F76293A3694C6D4CEE9E4F0DAE694297014A4E501224 EF5DC464D4162BC36488093057155FBEFE36EF39DE79F385C509FD86A268428E 810981FF11194B47A4627734359A9CFB5F47E0685E0428BD2D670B218618F229 697A438064165BDDDB8817073C64CC58BDA04EBA5D5B8C04A93CFF4BE9A2F900 441B78BD58A91D3FF452AA5F397A5E3C9B11E9CA7D0C6D5ABD57DF1018E16B5D 14CBF15F2B5D7475CE41FA72384BC1FC48A0529E9CC78E4F6627929088F49297 F312A04215789399B0B1D78941C654BF3D1A470D53D137DF9576D13EE976E0FF CEF5D97F7B3D371B0C619F818D53206EA36F7522807823D0793CA9D809F1B478 75ECCA485D819ADDED65202A89963B6B1E5A4A25914FB85EF529A93AD3412F5A B9FCCEE578595BDF613E6EBCEC0E507A625004B0901E86EA31BC1851396E0F11 D88C7B2A519C5E3454A1F8A1099C60DEA08B99CC10ABD4DFC93099E2E13A6FAA 78D7FF2093901974600F84D5D91EC421EE832093FB679D980EC50C6D84D9E7ED BAE28D966101D85CDBD1179A193BB717A254927A21390B353F56DBDBBB68795F 776F6FB1DB6F6D41B7D5342D36AEA6E1F859509B83DA6406AFF45F71476FD2B1 612A5A391B010D1309A58A7CA7FE585678DA272F367BA8D0539453E92F2348F3 208898151BA6A7E8CEF4978E026911E7CC3D2E3F11F168011412B739D4539FA0 77F869EE850646BE93001A709E4CDF1150E877427F8EB8257FE6684AFE5E616C ED622C67013DC928EDCE79F2B4AC3CAEA629995FFCFBE78109EC4D5D9CCF50E2 363D8A6DA585A43700A862714742118474355614DD000CEC4722E5F6A5847F1C F888A53665CD7A9B667589D7FF5B533A513E4F19F93D2FA9B86A67656C1104B5 963D6C8173D4F181EBDF186763ADD98E4499518FC0CFBFF084BC509DE4627D70 5D1EDA79EECB6B8E626CDEC3EFDBBA5C078FDF2CD197E73980F80BC61FF113A1 2B4A405B701A984C89CAEA97A88C87B53F76FAB2F75AE0819416FDC4BA8BE946 C25323AD04CBF73468E9E8CF64D545C1E0D5FE23792D4D48D2109D8787E7CD45 28E000F8E4934EDC9F90A89A91208C65E31F136B1AE6D3530B988DC77E0818E3 3E74C5DC72683965FB8703DE9D7D5648C506D0974D418F3F308622E2FFE5AFCE FA720A0A5BB82CBE653E223CAF707BC3FD56D6440352A6CD7065EECE635C68C1 44B392D355FEA81A4F3F1D9E4ECDC8C0B48826118F5E5832AC9E233F6DE00D0F 2545C44690AA13DB1D09C4CBB4459E58C0A33E052CACA89D9F398361656D3628 485E1FFD4F027020B3422CD80F1CDF283F260AD42CF93FF8B24940DF358FE1D7 E75196447CDEF8541BB215788D8AAFBA1084901184B20885297CDEB68586846B B3440CA8F066029C20FD6F2CE9F33E732FEDB79C874DBB8C0B6C33A2706E3AFA 6F625F48135B65D8FB91F2A8B9A2E7A3891FC07A79487445F6802EF8D69A2FF2 029B739266148AC675D2A1EEE1194D75276030B9BD16B9A596A335587CB157B4 C415A249B54240DD7EB343B60DD97439B8A93605EA58BD18C5A3F28F5B4505DC 17549AE0F88030B4F59C96F1EB00B02D2DF7934A605C6698CE71692EDD7069F6 2149038F08DB3F15F3BD881A5F3F57D538A2E481A4A4F38C6A6A9B7E99BBB353 5D4F68E67E7718F0C32032E3B78D93040F91DD2E7499E24660955020613273DF 0B1FE3EEEDC01D8D5D9DE9AA2EB5492AD936129D5BEF48F8FD677ACA7D3ACEEE A467493381D478F57328292415BCBA25539B6211ECDEE7DB867D4FAA404F1298 2EEEC3E92B9188B7E69DCA33E0F4B819A1C7E23FFA8C24D3E500D5590D8AE8FF E41AD476A1416C7BB0E44DA2E575446F3DDA76990CCCF85C60D61B913DCD91B0 F56DAFDA55B0FC7626F4B6730933B623A22CB2F16BAED949C0A7E01801F44D47 DA5B37884A05C3587AED8A7F58A9679E50B8E688F426B87FEE7A5ED3ACE2F5F2 91F8D8C5639E2D2B183530F359D90FD108140278FE3591E9FC318347FED4D56B 25F95AD951509721A784EC5A800832D72EE03698FEDD42F44C1BAD22A95BE7C1 10B8D9EA0F29E24FEB76D0829B996B5801B9A3FD705F873432B0BB912FBE5525 9942F6002CDD38F41B6A2C5346B62BCBBFFA0184D98136B8FE70D2E3B2D3686B 1285E358C3AA1B3E404A3A9277630EE23524546BB817F7494443A254DF391C45 9B0E029BE8E550F5852037574C57E745B1587CE2DF9D26BFCE979851AA691F12 2BC689ECA8B19201CD3FD24C3E24798BF5E031520BBD233F222D23007300452E 5A0CB7490C86332B5B03D150FD6D9AACD888A7F9CEC625836FA5F28461DB8445 E53F923BA21C097DBE7615ADC0F0D4D56DDCEC2F8E6DF57DED5403AE331BDFEF ED104146725602DB7E97E07499D37CC2161877B0C86F7524866F14A670FCE79D 9349844194E5A90365073E17991B0C3618D64F4DCBCA1E0C1CAB4E0D74F6B298 4CDCB6A0BB3588A4006821D6E86D7A6DAB5EA62CEBF2648B9A14E215B0D106BE 990FC653E2E4C202DF29C270C35E22725132C43B7A90492F33480A98C5B40D03 DE79B092A34698F12E8C2FE3639CBFBB5AE65E7885FB908835EEAE8132C3DC98 30C09BD9AE469B9A1A9477D01C72E9E813D5D53E335551DF7041A31475EF9ED4 106188738F2153F922B49652BB4E28E3B2CCA8B62D3DFBC3288F88E17686D572 6ABB9B9224E8EBF2ABB75E6F296389519201C62F8814C75789069C010EAF6221 09D76A3E2D4DA88001859FF8EEB7BC5E951FA006D610E77C2EA86972A181F280 BE6A620831E6AF01518FCDED5D8DE72B033785D106C69D3D6A6979EF335330C3 20E63F1F5266E92B680C32615AA0A70E79CB4FABA81390FBDE363D6EE313EA2E 345ED24396147CAE933F1394FE2A0E5F07FE09C344DC7D0B23E069F333EB3EC9 D17C877C481652244D1F3437032338587AA19E28F4A320BD03CE702C67380657 0B44263346669F57D7C90E64FFB584AF2BE15981C8EDF6B65097EAA68AF65391 A769F9F53F826119F3524D3100411D569DB4B030AC046A0FB2A26C09564658F0 84A537580912F9E001877E66BB43172BBB0A40C73EF60AE9C3A791DF15C0F99B 793321888543243A81259EAB31DDF383B5B08CE989A8373805FE0C49050E0210 B07F17FB5F57D80FC977870270E5CCC39D7AEA7C7070D99136FBF6EC505E5580 F806D36FC7B85BE6501CD00CC580F80761EAF779809149F34A7F234374189318 4188F7B9465A278719779B356EE007FC7BA0D23D549414D31F931F9D5749B2B5 3A4BE4C6E658BF3ECD68F129AA4CD93A19AA0054C9F2F1356128BB801A8BE677 315D11C828F67109B4FE65BD2ABA965E7C918664E00A647A6D43A301A0EF3826 48A5DE00F831F216AC1DAA325F5E39EE553034BE86F5E32A3AD1B71CC08935B6 B6ED2AF22D72D146154A8CFF885AA948BFBC13E0260FB4246823AF4CDDF0C731 7B2394EE04CC7565FD262AA519933F6B714ED6EF99B6D01F997136494B5D86C3 1ED354415A7F59E2376FC01C21FB00E87FF7E8BDEB47CDE4203F880320A427E6 1A2B5FB3B308F0419FF96006ED9F890A01FEAA23005A0BA580320E10B5F26AD3 BBCFA3FDF02DC3C759E50C717A33D954DCB47EB8E416B0196B0191C15E89D10F 413C75365DF26EB1B468FDA3C1BAAD3B3EEF12BBB91D3E581F4BD9CCB379FF0B 45C1778EFE80DEC0422F3B8F0B936E0AA28B1126C0573C96DE2421D0079C2A44 AD5F2312BD5F2D2E951D0F647414615B81AFD5ACACD6B55B05828221E14A12E9 BD3FDDF291039274E91F7F05B82B52B292AC49C7F5DE132FCDE85B35C554E95E 8538234685F3F534B125B10B401EDBB5DF6EB903CB28C62F069FE480DCCA9D2B 51D1CBA093FBEC5B5A43985143EDA46249ECA3A989BEDB3B940DA7D0406EE498 7D461F3558D6095D6712513900523D65F8E8FA81B3791FD638DD090A7CB8C98F C07DCF8FD37BF4390D08C78A52CDCC436DBF56AD3E40F6AD2C132D8ABC3A55FB 50DEA640371E91F7973A1D4FE1DE12EB02071BD65A71B7C132FD2BAE50097DF6 53D3BA72D992DE74156A5AF87B8CBB15FC10FDDCEDC48B9E166F109278115098 3084E999332A971CFD510D63483F6679CF8728F8C93656DD0695D948A0C0C84B 7238A2C00A4E9833B3FBDB08EB4DE60C346B78A4C8B0FA581C5A93978D38C163 2CA395851BE13B7F649C4D1C0CAF4754C4A711D6E836E81FB09529296294BF0D 57F1746970ED7C363B726826B9A12738EA3E4F2A80C3D724F8E0F6F3632ADB27 319B08B9BE64AEF9A34D81CE67949CC15EEF26C3B1BEEEA016536ECA65B65336 BDC3FED62ED95AA887675AC57B91E03602523DACE7E8D850CAB390B49AB4A00D 73F13CD4B4857060F1DE4B6DA4D6A0E8F70E8752747251E2366C1EBE6C5C826B CDEEA1C1519342451074C33962E7CA807FFF39FDFFC7C2D441581E8D834713B4 B54971DBC836009463D0E635503DA63EA897E78BA09242623D3E25DC9B7BBD3F C24A290B5D815B630247A36A734E9E7D77DF2C7621942DCEEE49A8D0DCF3515C 80D661C6AF98D72B5729514909DD3F7FCDB02D12527FA1EABFA0737D8556511D 44F9888D8DEE8B5B3A061C3DFDF85D96F8DCD224954AFAE62C132F7B71DA9B6B BC3B54057B2B7E0455E6A80296733B10725A14BDD28243C2824FF37A248CA220 777B0933689F2FCCF69BE43F36379461CC0373155D7CB683B8C0FE2D1BF9DEDD 259C75D85967AF0887EBEA511B9C1D0B537E96C7B91E84F5616D1E2AB6150C14 55AB8F33F1EF517849541029FE560D3A03EF5345DEC0C977E5CDD26270628694 650B208D09DCB9E2972B227C7CFF5706DED731899301CF5B3C3AB4E88D4EECD8 4B4E00ACFD8482D62B837248E5CE89BB7BD24864D732CC13A39A4D281EF86DAC C305E8CC30E8E30BC4D53598F57790A23524B1547394C60121572943FB4A85C5 5EB3F9617FCDABB96E1B24BFFEDF6E0BC6757CB74EFED9E28D14D431A0EC990E 8DA7B5F9BFCD845D946AEC72C980C03B58FD7501423DA04D6D5AEAC0EA81AFB4 CB18CC92EA90C226A2A58DC08C4C54C7F8ECF6D4B6A5C40ECE286CE9F19D65AB ACB4AECA08175AD9B9DA900B74399F40B3C2DDBA939AFDFEEA5756C119CD1DF7 B92B2BBB0DDC10ABF94B4E0E3DB498C774D8BBF8F1280837EB7DA17AA5F98C7A EADAB40350D733F6D745768A5DF8808E76003A3B2233C094A1DEB61FA33DF151 906BE978F8FD783C618B1895EC38FCF5512AFD467DCB3A987F056C62B08D8310 FFC438E4AC0A55AF880DC06A0DDA1086F5929D32E20CDA4569461ED935B76F8D 07390055D5C99C89C20047363C2402E7B3BEE04D71F9F950976DBCB2DE14B2D8 37B4F1993152770DE2B14B47D9CEF068D7E1DDEA283BCCC13A08B9FE6FEA0F88 6A533C2AB2B470342BADA6AA8CD870C7F7B2EE285DEE1CDE87D651F54143CA22 0D2095ED4B6C00ED68926A1B77462DECA5179048E7017D449ECA4A466B5D46D8 53FFABB69477986DEC90E0C3A6C8A28B0C487C822AE40C73FDA530AC24ED0163 BBDC9D53A576355DC7355C77102075B10DB485DFD5562D853C39701CC661FAA8 58FDE9D06737096F33C3D168F8B6A373AA610F73E41F5FD735804326F1C95FB1 5665EC79357AABF1F24FAB111A3EBCEC2DB966BF0A2B53C1F25465B29E11E241 064D4D83D66D95C1D982CB10D6136C7BB84B1CC5623CAFEC830D470F6FEEA4CA BDB2D359CD9DE9B7A5EF643BF76BA4D37B95C0A8B62B1F1EB693E6421D671777 D090E95E7EB4330DE177BC482EA750D36F0A1AB0DCDB38D42AE1D5BD72458AF0 3E846C7E86BC2B1D8D72A60FC9ED3E96409EDCCC1D6808D8865F43947E84AA16 47BB17B5A5C042194F3D1004E71C94905AC260D5446902478AF0315BA62909B5 8740DC946AA1591B6E6182A2F5EBB162E3E75EDD0C7C71B55FAFCFA071D155AE CE5A87968B507CB8250412C4AAFA97E137A141A5550775BF7C90997B6D863258 2EC919CFD37BA940449513DEC181CDA0624C3D4549AB46A8A17F1C1C5184EAFC A3ED1C4B2B5DF33097C66B12C3C113AE76C0257CE8D97F1F8C1C2ADDE761FBE6 0FC023ACEC718C69791AB59BDE4A85910A3153598AC1DD5B90818965AC9558BB 22CEC8D45C1A24860CAB88F57098A8503A56C4919C9CB10225F7A47A4DD1F164 4AE9AD9EB80CC14E8FB6BD5603242E287D196361F5853A4D312D4974D339200A 2E68CB2684D593964402D9C8319BD6688FB360C6ECDACDA0701958EB893C2AD0 80F8F8509D9EB3AE2FAC4946DBECEC4028D0C6E5DFB87ADB9A6BB135382794EC D2E9C8E554F27B9AD1DF1CCE37F3D7EBB387AC3D63A2DF2D03D83E38D2B961CC 3C5FA417914668E65DD30AAB8511D699E1759DBB733237F6F8CC7F9F2EED567F 207FD8DC3453E0659608ACC0A880CC21B3298AC2FB4D368D5A3CEE12BA5AAF62 0ECC9DC1266F00CC82D6988D97B398551DC5C660A755200B7FF836F926EC75E4 48B3359DD09477356202BC99B6D389863B59FDDC02C091A500BBCC347460F05A F7DCDBD306E7B6ED372F480FB6F41A9A53E039B16EC1D31AB5CC80171605EC39 CD0BD4587BB055C16FE1C61A653846EC57A225C4C65C7FAC787137BE909E3F03 F388D91E304B45179631C609F5D53689D7F400B318EC2A30CC285DBE98F7697D 2EEA02B4818BC00D34A548A1719FE8A48DFCE8A9534D0C23E5960FB2539EBB65 20D9916B8123B2B580CFCDCDF7FAC448C1F4AFA12622492F16B4C3F6ABFC5799 4753969FBAD9A19CA6264C1A2C4D3FA72071140029890F3B0A4FBFA4F64D3A29 F92691FF93867D67FA711339877BA319CB1935B55F3F2D9B7C2A79CA5AB30929 B3CC105090B79C3264AE766A91E8FB020D6C61338214B81F599D47EAFE1EE6FB 1A7D09A028DE61F14B064C7097E41CDFAA77CFF94FB6C169F75893B8D8A2CE5D 5F7F427D8AFB0BBD8FDFE49E3DC0BAC2EC3FB39AB29DF2B23C79E40AA674665E F0940062A909A8EB09563E42D0EE8A56AA440CA269460755E8679244C91C3D6F 102781B4FD6ADC4A0DD3670496842B00CBE0E202A96AD8FC9A26EC9C7E863A20 A94F86381B6208164A099AA59D34D579074DD2C8E223DC2AB161A800F763BC65 323700E0F15A9901A1D1E854013B9E61F43F0378996FCEB7FCE9A2E3FEC93AC2 73EB2178ECB1FAF64E4A1418D381DBF7EC1AFB2336A97579D8A6B532A5F49EDC 82AE9AC7A22255AE0EBE778B75D9986AAAA16E834DE75E7F040C24E6A7F93666 6899F51E10A0430E89DF6B6CD682089C9B5C20EB6A3D7EA635EFAB7E1A6B7D68 A3B7E003AD2CC6B99D6206E94B39E8EC6D4A7E251FD9479563AD34B8F28BCE3B 9F4F28FD84BD4FECD2C44DB9D97CEF05348690C7A3894E6739ACF7AB6DCC93CD 1AA544C471175C4024A37779121FE3ECAD42D00C335974454A53290CD2214733 72DB9B951D37E3004B18F41BC01C88B4AA90BC4FC66444271C754039D92936DB E6FCCE7CE68008AA02FA2A83DE7447ED 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMEX10 %!PS-AdobeFont-1.1: CMEX10 1.00 %%CreationDate: 1992 Jul 23 21:22:48 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.00) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMEX10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMEX10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 13 /vextenddouble put dup 40 /braceleftBigg put readonly def /FontBBox{-24 -2960 1454 772}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF5B8CAC6A7BEB5D02276E511FFAF2AE11910 DE076F24311D94D07CACC323F360887F1EA11BDDA7927FF3325986FDB0ABDFC8 8E4B40E7988921D551EC0867EBCA44C05657F0DC913E7B3004A5F3E1337B6987 FEBC45F989C8DC6DC0AD577E903F05D0D54208A0AE7F28C734F130C133B48422 BED48639A2B74E4C08F2E710E24A99F347E0F4394CE64EACB549576E89044E52 EABE595BC964156D9D8C2BAB0F49664E951D7C1A3D1789C47F03C7051A63D5E8 DF04FAAC47351E82CAE0794AA9692C6452688A74A7A6A7AD09B8A9783C235EC1 EA2156261B8FB331827145DE315B6EC1B3D8B67B3323F761EAF4C223BB214C4C 6B062D1B281F5041D068319F4911058376D8EFBA59884BA3318C5BC95684F281 E0591BC0D1B2A4592A137FF301610019B8AC46AE6E48BC091E888E4487688350 E9AD5074EE4848271CE4ACC38D8CBC8F3DB32813DDD5B341AF9A6601281ABA38 4A978B98483A63FCC458D0E3BCE6FD830E7E09B0DB987A6B63B74638FC9F21A5 8C68479E1A85225670D79CDDE5AC0B77F5A994CA700B5F0FF1F97FC63EFDE023 8135F04A9D20C31998B12AE06676C362141AAAA395CDEF0A49E0141D335965F2 FB4198499799CECCC8AA5D255264784CD30A3E8295888EFBC2060ADDD7BAC45A EEEECDFF7A47A88E69D84C9E572616C1AC69A34B5F0D0DE8EE4EDF9F4ADE0387 680924D8D5B73EF04EAD7F45977CA8AD73D4DD45DE1966A3B8251C0386164C35 5880DD2609C80E96D1AB861C9259748E98F6711D4E241A269ED51FF328344664 3AF9F18DCE671611DB2F5D3EA77EE734D2BED623F973E6840B8DAD1E2C3C2666 DD4DD1C1C9C622FAEAB9D3E54476B49A2A026565F10A907B3B33DED2B3AF7DD5 1A717C6F3322B4061D682CF928DAF9EFD083871A112BA0A76FA34F35E60902FF 02043D966A58A3E052612591918ECA0DA4F91C716FA786D5F449D30797C9A503 CC3A862619988DE3BF464699211760C6DE2E72A409A6E3B64F1372A87875A94B 9B39927313A0BBAC8698FA32DC59706310E8B5AB38332E2BA87A0088E9864651 75ABB116518641B3928D5B79B86F4623858BB05A98268AFF07ED326E70D72AFB 9B8891EED3C1252ED718CC96E8039D5CB48509D79D04F57FBEDCC72FB443A1A3 DAAEF15C92B8D26BE384F3D15F46DC9F0FD5418C39389BBE406FDE704354C50C 53A66EAA65F2CA79BC5E914BEBB4DCF05FC36D708921F561DA780DDE250D7527 D8945CEC0C5FC6A9E153B490E87BC1FBAFC3E6E54B150A7F4BC68CF3F1A34BCA 0E6837E1FB1161B21F6AFB9C96A4BCF20715B1A3CA58A12F1438566D1B6F5A28 25802A2B900186ACD058A28063802BE668E7F40F1A42B6D024A6CD0F4D6A612B 3B2D666441A23E29168541EE2A3726C28C1B1802D38C28AB7D7D714A828E5755 E94AE356C7E4C1E3C09661AC5864407EFF9D00BF0E36902F8CF423ED12E31CEE 97B7907B0816C2A5CB68CAC754E998C35AB1A0CEC8F4907C3CB08215601BDEED 91C3C4A9F0236964FF87 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMMI9 %!PS-AdobeFont-1.1: CMMI9 1.100 %%CreationDate: 1996 Jul 23 07:53:55 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.100) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMMI9) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.04 def /isFixedPitch false def end readonly def /FontName /CMMI9 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 63 /star put dup 79 /O put dup 97 /a put dup 98 /b put readonly def /FontBBox{-29 -250 1075 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D5993DFC0930297866E1CD0A319B6B1FD958 9E394A533A081C36D6F5CA5FED4F9AC9ADE41E04F9FC52E758C9F45A92BED935 86F9CFDB57732045913A6422AD4206418610C81D882EE493DE9523CC1BFE1505 DD1390B19BC1947A01B93BC668BE9B2A0E69A968554239B88C00AF9FBDF09CCD 67D3B2094C11A04762FE8CC1E91D020A28B3C122D24BEAACF82313F4604F2FEF 6E176D730A879BE45DD0D4996EF0247AEB1CA0AB08FF374D99F06D47B36F9554 FAD9A2D3CE451B7791C3709D8A1DDDEFBD840C1B42AB824D5A0DFF0E0F15B0B7 22AEEB877FF489581DA6FA8DA64944555101EB16F7AB0B717E148B7B98D8DBFD 730C52937E226545CF8DC3E07C5BA30739BAFCD0F2B44275A6D503F582C0FB4F 449963D0AD2FAFDE33BA3D77BCA9D1DF878DDAFCA2E22CC4BACD542B282164C7 97C2BDE318AF9D501CA21F6E662E7AAB75A5F24D2C182E598D175D44E88AB19A E7CD59584F95B389183EE21B525BF52A3F23C0FE5383A5565A19361D716F508C AAB78411CA5A4D27552CC1C435760D5A89D535B71C593E755C616661363308DA A683F54ED0C23FB2C225A008392B0B719F66F11A946A090B7C00B662A3C69599 B4ECB0CC70C85C4BBBF207E0026F6C7A19F2ACFB7A60804FC98A4BFFD7BFFF2B 9529E6D9D4238002BBC255BC62959D6F3381FE06E0621B879D5FE5B541D45A1E 759A6E7DC32B1D1632368D09A97039DF255B6492B1B2B7E2C1434E8306ECA7D3 5A79B6D614B4979F10988BC76ED53A5F45315CD7DA216221F842FD0F3E050DD2 BAC23C984D506D8F7D614BCB6B244F5F41321549BB0BD041FBF3053307168680 3435E9C9438B42FAB00E47C88F11FD0F6B4EFE51F0E577DD810056E37D07C868 3A15DA228A6992CBC01BCA5F12EB1D79E563FBF8DD6A531A596AAAE93347A442 7BD14A0CAE46526ED7E8C51FD1A79FFE07197D5CDCB6764C74D82E9531E007E0 EADDF55229017AB21384FC0520CAF59AE78559BD17FE97FF5EFBF496883F3E5D CBF9C7D944C058CA95B60727F99F0F39AD3DF1DE681E52A352AAFF400FFA0E94 9AC1E3AD9C06A2D1FA23390A39C6F5156E008578D531906A84F619BA12AB472E 16B641A7743A09C02E45D761419F9E35737593D75804F6A950E672D095E7E575 3C728C4F8EBDD91B7D8F1F8FDE30C8D8A532580DC7E1CA05DBCEC93391D344CE E7D63BB7298C93EBF02866599116A459882C7979AE47C4419845AED1CDBA479C 5A8A7B06D67426E0439A294824CF9DD2586FB93E8173ACE7F69720629B872B39 DAB2E001679EA3DF136E434D1725CA745E82474F89FBFB41BB946065E0FC6A71 02232DA9CCED093679E018D34A1EF00AFBC264871AC044F294C77562B8F52E4C E530257F3D2A875A54785A7E9FB7D2BDE79E36A768B10CF96B3566DF8BF94208 AF060F2E167FCA10FE093F83F260C75177C5A1B58D5DFE4C513A72F44CC53211 CFC718B6A019110E0322D51C33FEA9900D011F08279F99B2C22D861B1E6F413E 5AC8A690ECB8616BE318B3F38079230311BD4C4336E7A135D8F65CDAD96D939B EB18EA199343FC84B112D363B87ED835C13FB135A5625EFDE76F320B1DD7F2B3 B60484A06D32FD8D7AF153F1C847583FDAF5CEFEA9F0708724099CFA4D1BF193 8B20B105DAF4112FE09E8C557295D32A7915F59D89741CCE24BD7A82215132DC 70061654BADCA5D0A36851C4FAC9859830537CF66368C624E1823407EAD82DFD 22736F17D8506A726722279E15B9CF11BC551D68F8933E99D9C94C0815B340BC 7CDDB07C33FE70A97DBF0F6DB74DFAFE9328E67A281D357996FD0A6F08C734CB 38332BB4DE1E0E45B95F19B79C42B03B5201F71E5D5EBB278A9A45A9731010F4 2BB1B72E6324068F92F5F58E287A10FA3E78CFFEAA20710351227DA9A5DDFC20 842AE9F4426025D2FE57C3796B65782E3298CF54F4C69DEBC065203152306723 DFABB4CBFF6B259E78309BA1C50AD1F48773E0F0B20F281006429AF012560157 B91EA8F657C17D9F0886FAD0225254 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMSY9 %!PS-AdobeFont-1.1: CMSY9 1.0 %%CreationDate: 1991 Aug 15 07:22:27 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMSY9) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.035 def /isFixedPitch false def end readonly def /FontName /CMSY9 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 3 /asteriskmath put dup 15 /bullet put dup 112 /radical put dup 121 /dagger put dup 122 /daggerdbl put readonly def /FontBBox{-30 -958 1146 777}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052F09F9C8ADE9D907C058B87E9B6964 7D53359E51216774A4EAA1E2B58EC3176BD1184A633B951372B4198D4E8C5EF4 A213ACB58AA0A658908035BF2ED8531779838A960DFE2B27EA49C37156989C85 E21B3ABF72E39A89232CD9F4237FC80C9E64E8425AA3BEF7DED60B122A52922A 221A37D9A807DD01161779DDE7D31FF2B87F97C73D63EECDDA4C49501773468A 27D1663E0B62F461F6E40A5D6676D0037D33F24E2FAC2B0009AD3C8350CDF8CC 65BCA87979C36D14CB552E9A985E48BE4E88ECA16DF418749AF04FDD2B0E1380 D281BB2476BB45FF30946B247DFD7F57305FA87E50CA338121C71CDFDF927A9C 77FF14CB4A1D6D80356FB1171ED38C37702350497B44E42CE31DB2F493807DAA 15B887C671199A54C4C1294BC520F5538C15556BC43C9F62342B121C6DCD6C5F 491DA47FF360201EE21C08A781ED0589A6DF91B99FE118B9B29E4F068672E52F 1A06C514D91C4C937D4E642503392B1CD1BA5279B4457B627485A2758184511D EA4D44D333A4B67F720D5EEF9A5E1AA66DF9C11AA72B65F2A7A8D4C2796662DF 369048FBBE329786BC50A09FF5D30483227CD636263AD51F07A3E7A8EA503223 46929A32E158B32C9A35BA57B632A1A9773B85571D16143BAD0432BFEAE3E898 D679DDD2A73CEF9C7BBAA7194EF95BA77D0CA1136D3E767400155413383E63C6 81ECB34018A8E4FE813ADCCC814D8545054E9D73981030F45072D55B4441A64D 4D9DB40B12655A0D0077EC2C2C71FAF0EFB36028550FC548210F1CFFA82C3869 2D9244F185A45F190CFA9919F2389E10C0997E3601D48ADC0DCAB9D5B0E51D67 DC1A8061FA4908B96179816ED83B8FD55FD568B89E5FB45C12CEB13DE5326B77 12E4B2A884A916C3BED9B45DA339C999457BBF3C4B9BDA8276E296D2DF73C7E7 AEAD1350BC2DC435835D725651C05FBE8225A50E1E22E7B2E7A4925A288F962D 0924B1AE3722842E4DB30B14870F29F6F08A11FB23E3E0AAD6DD690518B82B68 CA8A1734CB7A66F5E14BD553439BE4BD7653861201E029B81EE47F1FE2A855A0 879882AA087E2B14193F79A3C6E799CEBC47F2BC1C658F902EE2C002D7F70BB8 18B5F93C6A3E87DF6A08524139B6A191008297FCB119A8B223DA5C963F8F8420 A996C710905A7F4C916D4CAD20551F861052F3F4A7C24C108F9FD63A9846F8C6 E421E64F2825E5CDC123050317C0A8ECCB712A0A999125173C9A99E5D44017AA 4EC9DE8ADBE7ED1798027B2780B85C4E25F5EB4FACC199AF2214089E9B8FB9D3 4E0276EE405D1200AFDB14DEE909F1FFDE7A7449E0CE93E9BAD2BB1ADB4DB9A3 E9EA9A79177EE4897BCF3BF6F45D9365EC2A291E8C51AF28EC96AB1244A99842 9F996C10F84830537F3CBBD9616E924CDD61B2EAF7A05C2B405A5C4E037DF8B7 4D5F41C9A84E4E3D6CFB7A4F7F35B5161EE5E83B08964D904673FB3E5D87929C F991D1193BD92B8562263C7B5165D5123855EFCAE0119F5B4DB660D1C4D4E758 64FCF1F49A5CFF4517D7143B24CFF700E0DECA45556966BDAE814269A3A2C163 757988A762E4536151D4C94D23E41C798307B099E75577303D593FC1C71B9E95 8C0EF1E04C801430A90CFEDB7023C146347C113FF0D5887D73BE2E4C19E6F11B A74D416817B4F2A2E2961576C8C587ABB710D4FD2B794BD167E0889A7B6AA881 1CEEF2AADB3AAB517EB0EECEEBCBE2D65CCA60D227F7B6975FAB454F09F404FD D7CD3490F200712FE69533C6E98C08A994F80A7CDE2A78D5AEAD1750441738C9 C93412150522BFFBFB09D5A6E4048AB3ABA44A919940E75476F41A96D00AEDA3 C85EBCBD26EE801DF8EB4E10432635E625441FF28EF100888CE46FFD30FB5565 472228A177152641BB157F578C1F4E12AC5E57130AF144BDA7423080DCA406A0 4CFE02DF363811DFB48703C2150CD54298F88134366450666C500A4E489C5C94 628D2FAA087AAD41C4C7FC9D062E26B3423B8DD0971BEE98FD884DD6B3EC1F03 045EE09BC085A5C392F309746F554A872EC6CF91420DF7A34310E7D0127EF112 B85F1C4074630E12993E575CE43C82A04D853780B2D198858A5960F5E532243C CA03FE21F8B7B6B8B06AC0CA408197E242D44B293BB32447F718FD7C038411E0 EA8DD9B6801FB5A10588 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMSY8 %!PS-AdobeFont-1.1: CMSY8 1.0 %%CreationDate: 1991 Aug 15 07:22:10 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMSY8) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.035 def /isFixedPitch false def end readonly def /FontName /CMSY8 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 0 /minus put dup 3 /asteriskmath put readonly def /FontBBox{-30 -955 1185 779}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052F09F9C8ADE9D907C058B87E9B6964 7D53359E51216774A4EAA1E2B58EC3176BD1184A633B951372B4198D4E8C5EF4 A213ACB58AA0A658908035BF2ED8531779838A960DFE2B27EA49C37156989C85 E21B3ABF72E39A89232CD9F4237FC80C9E64E8425AA3BEF7DED60B122A52922A 221A37D9A807DD01161779DDE7D5FC1B2109839E5B52DFBB2A7C1B5D8E7E8AA0 5B10EA43D6A8ED61AF5B23D49920D8F79DAB6A59062134D84AC0100187A6CD1F 80F5DDD9D222ACB1C23326A7656A635C4A241CCD32CBFDF8363206B8AA36E107 1477F5496111E055C7491002AFF272E46ECC46422F0380D093284870022523FB DA1716CC4F2E2CCAD5F173FCBE6EDDB874AD255CD5E5C0F86214393FCB5F5C20 9C3C2BB5886E36FC3CCC21483C3AC193485A46E9D22BD7201894E4D45ADD9BF1 CC5CF6A5010B5654AC0BE0DA903DB563B13840BA3015F72E51E3BC80156388BA F83C7D393392BCBC227771CDCB976E93302530FA3F4BEF341997D4302A48384A CEFFC1559462EA5F60DC05245E8499D8E61397B2C094CEED1AF26EE15A837209 ECE64FEF41ABE8DDA7BE1F351CF14E07BA8FD40CEFBFC3CE7B9D4912D6FE752D 9CF163084E688DDCC4A450C440D47668A3F7CCE40030B01911C9A925DD42B5EE 504AE98ED274FFCE11DDB10C749FF05ED2BC983ACEEE5C2A394FB61F725DAFA4 E8AD4D01E203F60E03278425AF330330790BEA33AB3F1AC5B174162A6A40A2F8 35278FFCC3BD96D81A859ECFBBF1FE491ABAA6222FB19C01EEE848A460377468 6FCD71A6E58BF79E3DD28583C4808FB900E66910BD1C7EDA05D2F3D9AC7EB1DC 982F9035227478BA2ED2BA805C8DCAFAFCA32819949643FA97B5A315684A334B 976428949DF88C66D25824699079C3F44F7D5C0D8DA329C63ABA3AC0E9F7DC6B 08A5A3363B62471C9347FF96D6D52CD63D21F7C15997965DC1B21C0BF7F8925C 25C5EDB952A4431B75A63024E44969A9EF3C29C20571FE84C5513CC2C4FB684C CF0938E787E7A6494964F30D70FA29A75A7A0ADDFBE2D467B191F9E67A95758F B920D99157F37F32A5807FAACDFB 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMMI8 %!PS-AdobeFont-1.1: CMMI8 1.100 %%CreationDate: 1996 Jul 23 07:53:54 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.100) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMMI8) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.04 def /isFixedPitch false def end readonly def /FontName /CMMI8 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 72 /H put dup 84 /T put dup 105 /i put dup 106 /j put dup 108 /l put dup 112 /p put dup 117 /u put readonly def /FontBBox{-24 -250 1110 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D6A8F05B47AF95EF28A9C561DBDC98C47CF5 5250011D19E9366EB6FD153D3A100CAA6212E3D5D93990737F8D326D347B7EDC 4391C9DF440285B8FC159D0E98D4258FC57892DDF753642CD526A96ACEDA4120 788F22B1D09F149794E66DD1AC2C2B3BC6FEC59D626F427CD5AE9C54C7F78F62 C36F49B3C2E5E62AFB56DCEE87445A12A942C14AE618D1FE1B11A9CF9FAA1F32 617B598CE5058715EF3051E228F72F651040AD99A741F247C68007E68C84E9D1 D0BF99AA5D777D88A7D3CED2EA67F4AE61E8BC0495E7DA382E82DDB2B009DD63 532C74E3BE5EC555A014BCBB6AB31B8286D7712E0E926F8696830672B8214E9B 5D0740C16ADF0AFD47C4938F373575C6CA91E46D88DE24E682DEC44B57EA8AF8 4E57D45646073250D82C4B50CBBB0B369932618301F3D4186277103B53B3C9E6 DB42D6B30115F67B9D078220D5752644930643BDF9FACF684EBE13E39B65055E B1BD054C324962025EC79E1D155936FE32D9F2224353F2A46C3558EF216F6BB2 A304BAF752BEEC36C4440B556AEFECF454BA7CBBA7537BCB10EBC21047333A89 8936419D857CD9F59EBA20B0A3D9BA4A0D3395336B4CDA4BA6451B6E4D1370FA D9BDABB7F271BC1C6C48D9DF1E5A6FAE788F5609DE3C48D47A67097C547D9817 AD3A7CCE2B771843D69F860DA4059A71494281C0AD8D4BAB3F67BB6739723C04 AE05F9E35B2B2CB9C7874C114F57A185C8563C0DCCA93F8096384D71A2994748 A3C7C8B8AF54961A8838AD279441D9A5EB6C1FE26C98BD025F353124DA68A827 AE2AF8D25CA48031C242AA433EEEBB8ABA4B96821786C38BACB5F58C3D5DA011 85B385124615C1B216CC43CEF394B2DC098149B7072801B54C49DA0820EFEB67 A590BADBCEF9791F3B1ABF1E609238CA41A0D2BFBDEA2A4A3E550F4EF3D49ACD AEDBB97DB4FB830D670977546092843AD44E66475E92BFE4FFE8214DCC5B6B90 AF470619C1576A4E3E32E4FE6EAAC0F10AFA1418C9F992921ECD575468829BB8 49778B3B48A2E139925EBF3769A0A014A2B454DF930B3D7C03CC55030A8C66DA 610514FBA4E8BD474AB3F70A533297771B6163967349366D7A0D13EC9A1445BD 27BF542C6A25EE8AF60E9599CC02F9FAFEAB5336322CD3F10553C45900CA391F 3342D1EDE7E68B804CA1E4167CD050D326CB99EF6BD40666F61034F59E0574EE 39478B7A9842586EE7CB6E53B9151C45161EE94DC0A4C7450881A9F6B1C26E19 1AC4264A9D8F3858A54D376E51E5F862D93F01B8392CEEF88DB77EC229A65F45 80CF7BC9ADC3E9973756001FE9AB2E629D25CCA56AE550B199A8009BA06559D4 6D0DE45889263273CD60734D37AB3E0B049B2AEFA0BD28D0CBC4C162E9935656 F8BE0CDB2604623BF71FB659756B6B309CB423C91CF2A6CDE6253C8D1988B637 930422AF9A5CF83664B39CF24B23B8CF402441F3B025835C0B0B924211FBFE11 26F33157F0BC69F8A6C1F0337DD69D01EB157B32A517E674F235709186E67DC1 1E128CF8840778DAF6DBA6D3B78178F33E1829476D6D73A3D734102FF3DC4C65 4C628FCF16EFF481387EA60A3FD464C6388ED534FC450B584D94D3E0FFA3B05D D016BBC0F7F6071EB24900DBBDB2711AECD409BEA66ABCDF95B8D16F4D501629 F54F1BA8B55C4C8A6ECBD2C6EF29901355E1535466EB47EF75204016A662C837 1E8D295296800D2D895CF9F79435064AE5784D7BFF46BDDBE046EC405BCD8F44 30E19526D02A8DFFDC23D9500FC8F1F60687614D2E0D671CDC5BAFDBE3CB3823 E0D6F46F042E4C8721C77D95ADBC74E498E12E3263D54E983F29DE1234D5D7E9 28F68AE25F0A1158198916C7111B430CBD7B7802DA8F06AAB347FD470A961C9B 9A171CA8EB00765BE1B6A72F8AD605C56F1A60752F18762F7C589219026E52F0 9955A56B460F2A571852B28EB11F7A025A32DCDD22BBF6124A437D9FF1BE5A0C 076A8BCD32D8799378DB9C72A9E9B51306F10D332F1D77E148D9532BEC51E082 879A8CFD1CDD038F9836C954F2F4F6B2693A84916A2EFE02A4BFB932601CB273 52017449B1F17C85356AE710F338F37AABE20141CE0D35B28C2B3E29FA390570 ECA6A5EA261AA550F3EC411E78C8E1368D6736D5DDD669E70583452D4BF0E682 908FB645A6344CE23CDA2BD3CA15A8415AFC0F036C99D08D01D10EF2580F66C7 16D2B9220B61ADC6351EAFC6E69477F7A1462B9D2974C5252BE3A8FC812E3154 85310152B2D811D3473D4AF8478CB2D730468D7C556679105F42852419F40C60 A249D9DCA60604E173AC578C664F5302C12FFEA69E8D52401A7BD49E5A6A31FE 77BD0265DF35430642DA43A20327A5D9331DB95502EB2417E448E32D75607975 C3D2CCB006B105759384AEA4B38C4D57041243D77B99687CA1CA4721C0AB8750 E064E5CD8F41653509B6F61408017BFB1A95F49042025B03C8354B3D55341216 61F858256CBDCE6867D00DDE3EE6E6EA1F732CDD45DE9F80F497C3A6BD234B0A AB0817F0FBBDE91D2763D0A27A94C2BABF3B414C8C03321C2DD379C32DAD00AE B55157E8D9F28999BFB1A37ED3D8A8CCE5968610E22827C0DB0506C38F40984D C4E3668327AA737F78FBE9D050C5924CE8C9C6940F4DD25619CED65A5F759320 08815ACC748A4071DA1C6AD52DDB727BEB6A9E7B1EA2DBE3612C4A5253F92599 2390E84F69C094C124E38A1F5DCB4361D00CD684D8E4AEB02DE1532B5F558E10 44004BA1C8C05DFE0612910E00021C0166D95064070B773A813C4CDD37057D8A 1B12F1417C8989A30DB1018B7B4A59024BA76122DB85650E732D17F7AF5E4E34 6A96EA967EB1E62F44DEF7C5EF430E9C1ADA95668E481B3FE70BEFC0C0F59FF7 04088B4D7F4A1077C0D1777802B7538EC48F4CBE91198FB6AEF9C5B870B48217 7712E7D1AC40CF0D2F445CE8E4D7535FC805210772A659EA57D6612ABCEF0285 8F79921A00EAB3842972C428B8B09F58DA291E6B09FB7EB6ACE1A6757C405188 DA7B4E6B627BABC41680E4DA4FE52BDF5F26342C4FEB32D37D772FABC3FD73AA 09F97A4230CA897C26C177CE144FD3A4739A762826964A482A839B5D76C89D9D D6E89DB464E0962C04029BA224330A6A649829FA58352A9C6D160D04C0681FBE FC3A053A4F8D71EB47632455FA5C4599AFC431C3A2D65FC1BB93 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMSL10 %!PS-AdobeFont-1.1: CMSL10 1.0 %%CreationDate: 1991 Aug 20 16:40:20 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMSL10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -9.46 def /isFixedPitch false def end readonly def /FontName /CMSL10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 68 /D put dup 69 /E put dup 70 /F put dup 72 /H put dup 73 /I put dup 82 /R put dup 83 /S put dup 84 /T put readonly def /FontBBox{-62 -250 1123 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D5993DFC0930297866E1CD0A319B6B1FD958 9429B9D40924DC059325D9D4CC0344F3F997A99E6CC0676735EBCD685AAC9142 08DAFEC78BB41AFC2F1C219910BDF41D6279284EF600B69776CA15BC8A34347C 30783C52AFA60FBE3E353E2AE354CF87B558776A22C776C7A0B5AB5CE1F941EF C2D9CAC37294BF407A671F10E4743BF842143F4F7DFEE643BA3BBD8BB9E3F24A BCCF7F0ADF8BA500620C81033EAE8C4EF2C1DEF13AC575F1B3BBB66F093D3B78 5412B82B67FFA087AF57182B2230F9F2137180CA58A7D9B2C822FF04BE6CD01D 43B2CA7058C7B953F6D9B5D6E91ECBAA5CDE1159B0E59C83DBAD96D6C8C8BAB1 374EF652D10C0F3EE7104472C98DD3572AAF2D45A70BF7061447E21EE3C3BF23 DF39C2D1B35B42CD5297BEBE6BC94F7C9DC6E61EC67E4F677256FED9064BD3E4 B51A71B1D27CA4E5AA9E1D8080E6DAB5310711EEF87C40859FA935B19524AE83 63B163FA8397BDFF443227FEDF7DB27DC35D89FB1C5E435DA0619A5C88AFC73B 89A2DF5E767C5B536BC7167A840A0C32BD57A14DE69A7D0D819AC36FF32F908A 5070F32983BB007437E3500799DF5E0AD3710A4C0000F0098D5BE99F2EB9C1C2 C444FD9552D0DCA098A94B3BF176F511CEE13DB7EFFAED7C47B5ADCF8D4700F5 7A5FD1B49560969BF5C44F3749370663A04776F749DDD7B50674D93254426C4B EFE264BEE7810EC93784B7C01A7F29EFD92547E13A2C7851A2E709FBD5B87850 4A44F08F56A542DBE072D2FBC58D9E6468E1AB858DC35240E30D31C7AC13D6C5 7D2BB634BEE96FA0E10F842B11A789F72A333DD6DDCB1BC23227EBC406E50B40 30AF0C48E6359AB0C46898CDAF1118E46BFF8B00F54EACBC2AC262AB898C42B9 2E080C10DE923C15125AA61226AB19663C9B0787A1CC83385A40ACB33B2C516A FEB3BBC5CC72A87261C44E26D9B2EF4014C6AFFB1F37132C550D9877FB4ED826 F225A6E09835D9E7B2C2DA9AD21C4C11321EB0EFB393767819120FC3F8DFBE3F D050415D9AC378C555FA581A7D33C47B5D5CBC1A19657D765A2117E56C156038 AB3D1F51C0B013F74C3B292A8DE5F5A617025D1ABE830513C48D42A37333238C EFFB1E74711B150F672BB28D579F550B1DA075815788242B0F97A9E0DFC7AE0C 64EB948F8868C3D4C91C8AFB24A6AB111108D6C80A4E4488E6DDE5D2DBB8E79A 4AAA54E466FA8167498BDE3B1E96F27BB6DBCF8BA083E82368A7CF97C67FD630 E9E52A71A74551261B5952F2C884CFA4DD404B89F108FA87DF704D04D4C95153 8A760CC1FD166246CA1CACA107A3BA11E01E6B0A62F8731A9A7FF945A0404848 A902E1EE5A8CF58C7690A425A5EA995234AAC715C704EED16AD65D97480990C7 DA1EB8A2D847F62007BB241376BB2AB7084FE2BFEB4E00836E47687480AB9BB5 E09A2DBBA158C2DB088ED36ABA1325B74AA9A5EB037D652CE9D10166DC02CAA3 8392395AE8D3FAAB5927594D4BC69A452CC941D9E1545DACD1613BD66152D57A 9A8649D2F02971ACEBDA90B55421F54343E85B8D6FF7890645DC5BA88D6C2FED F85B6D0E5737D9AE9632C6B59E4A97FE9D2D3A6379607EE3496475F3FA70B17E C2C64FA555CFF3C7FAD5990B51B433DF42A7C6C6DFA956A467D5F8C405735964 495FAE1ED123EE002F0C04CEB7ABFD717F43E4026FED05D2A55292D349F31597 E9C295D7A22A2B80C7856B72FAEB428FE66303C28B0D8CCF4C73F5966491C88D 9632B8068D17A382C3C0743ADF90222A7CC8CB5AC0B963F3CCE8BFF2BDBAA4B1 D536B645F59AD9C56EF30C88BF0532D91C701C08C251BFE271B60E2EF82719AF B825A0B83AA95E6CCE653769AA28984A2D741B8D54202C501C52A641AE736F1D E4F83AEBD9C51AAC24E4736155B243D68D14C5CB24838F31DB2351361D3127E7 B221EEF54A12F663753FE9968319AAF6C081984DC6B462C1E28AB2E40E943640 141EC87FF91B385FA373F3FCABE5911444B20D2156D8C51035DF8CD2B2839552 BBCA177159B0B79AA98014AF26E2211A971A8D17FA133C7E8F04DB58184E76F9 4ADC974C1F625FE35EC4708C03D2A4D90DE41ECB0CF7A77058B59E485F0060A7 7E5A4F9518190864C9DC54F6D2BC1F3EB375F49D250D077733C4AAD604B90F46 200F803E837832F9FFB64A945D8986CE577D2D13D95B8BA0E0F008A4638C5DD6 D6B8B7AB5024B4B0344973D7ACA79CAD193CF776576D3E1B6C366635769497E3 42F07B60727C45BA66D6B0521624F7162AD670E1054A2E10AE95ED277A7798DC F57B77E1B59829E094315CBF3A346D70E10662D2EA5CBFB9C224A752D7B17D28 7EBE06000E43F904CC5F8783298FC3F4A4A228CEB28B0D9E7CF9D64A9E9AFDF3 CD2F33654F54813D5734E7F2F961EF986A004ABC59FBDBFE0D7D9F8D354585B5 6BD99E8EE1F14EC6D4180524D7193E2852C14D6C70B4FE93BA751C8B34AB76FC A1F032D81F2EE1C9159558744E8A657489AA121A1B92F5405F7DFEE9A10BD290 111ECCFD4A84EA3B4D98FD3F58DD54ECA4248AECEA2927FC34E46E02570F71C7 11E727EC7EEC0D83F33B98396BDBA3AA7BE326CACA99A093B48479D09733D3E4 93F1B69C6C9C92364475448CA32EBFBF00A1BE17B292E57990B3AB7E0AF656B6 0BD84FAF5D70C8A1967AA3C87C56CD5FD2AE844A75A63A6D8BB66EEE599AE262 8BBE265EF7ABF588910925A0EC71DFA1CEDB71091A34CB3C2AB6F85DBB865A4F 4B348545E8224902BAE1F1071EBEC0EA7915F2610279AC00033E713F205A45F8 E0FC6E3AB8EA5D04031397B0716E22E1DE70B65F12E51E39A29B9CAEC6235769 6BB6FBBA9E3C3AF690D9F9C26E233AD77EF046889FC2FE0634A2EACBA3FE22CF 0E378CC2538B1E57ECE1A4344A5234E8C6CE0911F485C241712021687C767DC0 5435A8D64BD44E9FEC81A40B09627186E9D40C945E0F3C03CE144DAF53C038F2 3556B9F952B3FC6600249196D99EAA7884E91C1E47C90E43FC0C86D6C24DF2D5 C7B0F764DF52794743ACE14CBD17A8E3A1A567B4B4328AE4FA689EE6BCFB4C3D B3FD9D67A65A674731DFBF1B7A47AD738DC54B1885B070C08A458E329C127CE8 5609E5C60B3F86C42EBF71B82616CE17711DC181E2DFB999EDACA98230EF47E0 AEBA7DE3381344A797095004952DABFA08A8FD0ED4DB2248BB863B9EC85EB47C 7985B4FB13BA04B1C489FF1278B8D7CC16B9AB43DB1BA694BB39702CEC80BC5C FA8D006CEB69BCEC8EDCB0564FDCBDB13D8CADD37D84502EED35092A49A250B5 AF4C971B4B3E6E3AAFFC0F82CC44C0F1E9C79F7668EEC958AD4A6645AA2FA0E3 C0502753687524BB98F557E8528A02EA8F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMITT10 %!PS-AdobeFont-1.1: CMITT10 1.0 %%CreationDate: 1991 Aug 18 17:48:50 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMITT10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.04 def /isFixedPitch true def end readonly def /FontName /CMITT10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 97 /a put dup 99 /c put dup 104 /h put dup 114 /r put readonly def /FontBBox{11 -233 669 696}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D004D606918A40B8D7BFA821B73E118040992A4 E1BF99740F8FAA47E4349853C8149C0F8BE2F23C6F332BC0373C867D0715E8FA FF163A60AFD0FED665D5829739975C5DE12EB30895604D211F645D4E13330DB7 64B6E35463C93B752F691FDDC44595B0A0E9E57C6F649809C4DBC7DB58102A60 46349E9A5740893A1BD4536B99ECE72B147B713619037400669C07291022F84F 4F3302F8244D2F0F1380466E81E0B5E00AF33E021A55620A7A93F3BD49C7040A 67C096167F502EF2051B526405B9391B4340A3FFEC103E317E315A88D31661E1 7E4104A2B925D1DDA9586861904FF6FFCE6A8E808385E4C4014F5A494874E2FB C3758D6989AB68C4CEF82F92B9439794FC404A29D086ED6B27997735BC3A24F0 473FFD74BAECF5282E2EBFCB92D69B81C568D394055E2E30A7E3F448796E4EB8 019AC2E075377F777183BD87FDD194E855ABFA35AFA73304DBB181C267431B16 70456FD8470B525011891C1E140B8FF24A474B89F1CEAAB509F91FCAF512E16D 8413BAC0C664FDCD31245C5996F4883305D3EDF1C8D1E6F0B1E79A06028BBDDF 6AA5B515DF33BA8FFF2394262F3FE1DF95AD661322BFA5179E325BD1B1EECE49 69F64789FF1BE8DE5CD7485571A07471BD6CAB4891BAB122BE4C4A1B7176F33E A1A434F745811B71EA8AF73407F32E9F4EAAE1C1FAA979523C18A24F754C307C CE056DCB71B20292D4FBCBF9AB9E9B81DADAB90E60BE926315049E5BF0F50315 66D82E4963CB556F19461F43EF80302912AC1168884A1692AC59BFBC431B14AC A5FC06C4AB595F9DF66CE5EB69568038445A9EDDE20CF92BA308A235A872B15E 8EE6356F4041C28B24C085EF70B228149486F1E0C94ABF729645BCD8A06028D5 2E0D9F5A8E1BEB0724B397828DDAACC2432023A931E5E028064BA6B61D18B20E C25E0637B21F856529A1DF1A43D939B77DC0F03A00D9B23964C9F47129F04A33 35A579B7F07C31384513392F55AE902262CE554E2B4318C5C93A6C934B505139 4461A84B9C8EB9B1DD76D205913636BB6AA2573738BA6C3D55413817DC394E3D 1D2A67D6CE019C198843CC05C8ECB91510A022431955273461F2F8A60ECB58A2 7B587DAD7850AEB071BB908A51AF894298BA04FC511E77B2D4F195EE3396508E F9640B89A55C2E803BDFEBBFACD54FD24B59EB708C18F33B85DEA8D574BE1A23 C66E45CC009F241DC6326ACEF2EB2D06E5013139AF7ADC1F4169AF759803F782 CA46207F3214EEE7CC8A20076FF439C7C7E677A8DEA241E79F69FA0E17F4C08F A08AB475E5100427CC6BBB210522C97BF2577387F52EBBABA628731464988CC9 84162511A10A9E2B34497E41B1D6B6BB0C1F60A0CB76E8A7D71C3E35D81B2CFB 01BFA11CAFC84AD4AEB395E11607172D7DDCC82B44986E08E8E53BF75EE6BD5E 9F957CC7C6D5598DA27BA89F1B2E729BFB8ADCEC8E883A6C6BF5CAB8F91CCE5D D8FECC52754F5D0BBD2923C8680D1551578CD2212657507E00E7CA81B539E595 4441A879E4EF828DAC4165B5DD2DEB41257DD4F5D70550AD2773901B144EC734 9D621E90B3F8D1623EB8DC7F5C734F485A8A50DDAF59D12574166C992E2E5332 1F50CF51C9F20BC965CC2E942BDF2593E98050BF63C94048FC4E47C5FC218452 857831D668A7CAD8054C9AB36C6C2601245B516FF09A0E5CEBBDBDE31114A8DC B60FE422EAFE1A626E5E4392281E418438075E9464256F86A50E32D847C9CE8C 0E410E9A9962D0F6EF195980BA8E269A90B94A1485779E5BE6D436E050FDDCD7 4D6B14059AC50E66D62531C48ADF954AD90EC245258F08418A211957322EE1CF 55CAD650F5435FF0C7FC03A9010A3427A92F93DE9E726695806CC517F669D17E 70F48822E599C8A2A3D01B6A74725CD02577C9D9823F10166553638C35594AD5 79BEA7FAB8F6DC9341778E66F4107D 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMMI10 %!PS-AdobeFont-1.1: CMMI10 1.100 %%CreationDate: 1996 Jul 23 07:53:57 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.100) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMMI10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.04 def /isFixedPitch false def end readonly def /FontName /CMMI10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 15 /epsilon1 put dup 34 /epsilon put dup 58 /period put dup 59 /comma put dup 60 /less put dup 61 /slash put dup 62 /greater put dup 63 /star put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 72 /H put dup 73 /I put dup 76 /L put dup 77 /M put dup 78 /N put dup 79 /O put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 88 /X put dup 90 /Z put dup 97 /a put dup 98 /b put dup 100 /d put dup 101 /e put dup 103 /g put dup 104 /h put dup 105 /i put dup 106 /j put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 120 /x put readonly def /FontBBox{-32 -250 1048 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D5993DFC0930297866E1CD0A319B6B1FD958 9E394A533A081C36D456A09920001A3D2199583EB9B84B4DEE08E3D12939E321 990CD249827D9648574955F61BAAA11263A91B6C3D47A5190165B0C25ABF6D3E 6EC187E4B05182126BB0D0323D943170B795255260F9FD25F2248D04F45DFBFB DEF7FF8B19BFEF637B210018AE02572B389B3F76282BEB29CC301905D388C721 59616893E774413F48DE0B408BC66DCE3FE17CB9F84D205839D58014D6A88823 D9320AE93AF96D97A02C4D5A2BB2B8C7925C4578003959C46E3CE1A2F0EAC4BF 8B9B325E46435BDE60BC54D72BC8ACB5C0A34413AC87045DC7B84646A324B808 6FD8E34217213E131C3B1510415CE45420688ED9C1D27890EC68BD7C1235FAF9 1DAB3A369DD2FC3BE5CF9655C7B7EDA7361D7E05E5831B6B8E2EEC542A7B38EE 03BE4BAC6079D038ACB3C7C916279764547C2D51976BABA94BA9866D79F13909 95AA39B0F03103A07CBDF441B8C5669F729020AF284B7FF52A29C6255FCAACF1 74109050FBA2602E72593FBCBFC26E726EE4AEF97B7632BC4F5F353B5C67FED2 3EA752A4A57B8F7FEFF1D7341D895F0A3A0BE1D8E3391970457A967EFF84F6D8 47750B1145B8CC5BD96EE7AA99DDC9E06939E383BDA41175233D58AD263EBF19 AFC27E4A7E07D09FB08355F6EA74E530B0743143F2A871732D62D80F35B19FD2 C7FDF08105847F13D50934419AC647CBA71DF74F4531DC02BBDA22AEEA3FBBBB 407E0ACC52BDC60D01A29407CC4F93EB8BF6D4813E9BA858D54F38918AC82720 4956D50291F0546E50FCAFA6DBD0099123F5ECD4AB338DB310DB4CAE11337A89 8ED99B6F483940C97544F888EAF0CBEB11094A13C073D0061808662A04A82BA0 AD35E8782F854AF66C20C0FEF18D0ECDD1646321B93D327E53D88CA0E825FA95 05AA57BD7684554506B2BEC1399437F308EDEE32B11A1214CC9BAFF1634211ED 233D3AB8E7ECB59C05059E41681DDABFC386A0B70937E5BD125BCC8D005D5293 2FE2C99D33A80ECC24CED23C74512DC8251CCD5FB2173789E108DFF6CD616404 5D3306DF05AA2CCB2F1EC224BB532B94873F3655CBA9322F7B9E6F310AF64D09 D7EE394D66EA1F30D68F1A34C90CDED57EE1531A34B518817E4D50B16409A9EE 5E49BCE8C37EFE55A5B9DEC96872462C08C8A10D8706DB0C45A2B6033415337A CF260423C377BF6C034E6EE87D70CF780B726055B99D99A9C13C43A60CD1AE63 AF58A3C21959E09467210D0160F73B760193671B1746B07A81CE6BB054C43CC2 7F18114458A57EE800D6E6597FC80FAD301B42FE354197484F01713FA039EE6D F60972F3702A30269D8890CD2332F58D12A01D5D4069C07B4DEA2AEF1621C9CD 8DA5E73920848625B7EC7C77CB42BADD1497CBE2F5BCF60D245BEDD5835AEEED 65A88701043CD74EEBF9FAFFD36C2FC57A3619566862DF84FBB60F175B5D346C 93779C3036F4596B941E9A5D119E7B81687C9CA754D315F96AF43553AF413938 D4486B1FD4F864446FC35A0A9D163E623F9F5A27E302EEBA332161354622075F 710A363BEB84B0D64B9633D18A0CC69B7179E3CB32FF9F654EDA245EED619E50 6A2E3B3B9177C9D6949F396A273E451F1DC8C0D18DCB252501D85E982516FD06 780AFF7C44BC54C427977F505DA065C995FA2C519CE5B7285A397D3AF668F412 6F609F21E936731F959885591E2E8FB0D11037F54C314156735136D610B31CDB 5C8EF1661E2652D53B075D40BE900B4C15BBFEBA8F6184CFF107245AF723F21F F193C51F5AE04FD171A0299C9B15D94DDD530B923EF645EBF8225D6843271EEF 4A6306253F04432E35FE5576924B30D54069B0FBFAD5ADEBDD233268E0BED916 50E235CBF740830E89B01E38AD0B074F6B7917DD10F2D31CB07D7307755486F5 14244A1F80B560D73499DBD3AA02F7E0209FB688AAB6F67D6AC331D6B25BFCFE DB85A8807BE80B1819C2A8212EAADF94F49278BDB9DFB7441D85B16DD619DAFD D3CF683382B3DA5DB2C231D93F187D3C402E575A04B6B8B4487EE2C92900F54B 6A212D9E6C1C996E51D13BF85B266AE4B3312117B3FFD9BD5997DEE5154329F3 150413B4EF177B3C9A5EE002B0F0C89564CF4DD48294926CE364C7D1711945CA BD85145E22032A2BCAEE2090FEE8E912CFFDFDE9728F8E44EC026DD624556B9C 974485CA4D2DB834CF18512018E57BA8BD661AB81F3958E49689F0EAA8E5E0BE 0D3790227BE97834A9E9253EAC535E4D1461BD113B5DEDE9493231964166E339 04DC2DF296CE698665B5C64970D95838BEE04DDA0F37683EBC89DE3672B28D1C 0C48F6E4017287675B87D7F6696FB64C3B5DA42FAF5FF6650F3335C69E00BCC3 CEBF665EA7B318B29FDC887E273FEF5B65BA1727CF1DF8B365ED1F8BF321DD1C 6599FB30DAC0F87BC458CB0FFB8E2CF0FE383D6E3B683DDAA9C2FC40C77770E7 AAB65C3A0B042E175F7B5D7103BB372946B8464CF6144FAA816AC2965E07B0BC ADE20F97AB1BE7D1FB36D4AAD47BF2FD334776398E8CABB59269B3032F48467B F3334497A8E823229A4F604B7A8236EA7680D55DD21045CDC9F7C36C9CD07084 472B918E7CC45BC39C18C7B09A754CD096CEAA6767168557B7577FAC3EDEBF1B 94E1FD7F64ED91B437491540736AB7AD35B6C5E906DD26C3DE97EBD306550972 D162EAFF3AC576CF9E6EC8F57FF56961C4D7D0086A71B4A974C8077ADF6C91F9 0A868951B07CD4AE0AD725F4BA6C478A21B157213A19E0CB97DB5C87DD568BFF A27FA431ED7CFF328E7A3B43D8621BFD05EC593A18675EAACA878BEF0B4FE137 82FA73F07B04A0240532C6BBB348E8F28F341AB4CAB947DA4284A146B4EADF1E 1978B8D3FB41FED0AA66B9F20F413D06D4E04D47F4E9AA40526DB44FA402FA52 552F1C66E6E9E2FF239EA41AED805EFBBECDC8377B968E0F26444E7D3471902C 3D11BF287A47A9CF3A1A374A05B4EB9066C58A297E9B7912FBD2C0B33FD1188E BD1802ECF995A5A7400A618E68248627FC9C25C28958494517BFB6B7D57B4683 D09C1451201F1162EAC960239899F348038164CB22C4F18B9DBD3327D9FE0CA2 E3DC22DF0EE82E6CBCFE91A74F6218CE280F0893AC0667BBDE048A2090931996 1A4462A9FE2EC5571FB9B4E3BF232AD028BE25CA5F751A4B61AC051B34A36022 56A6F1AB431FB9F132DD38922BCF18CAD7C288AF11ECE6F2DEA38C86ADF7A77D 59F2D10F833329B1DA394A6FED188775F8823D9A23232B5C78FE966AFEBFFEE8 0652F6B71BD600883E69A726161D58E7AA1A10BACD28BCED69C5E7EAF676E436 ECB87BD43E8EA5EB243F668B9995D0127234E4503460E4BB6CC0608132C8E4ED 91C2230C08A720C0751581A24BC1F2E2F35BBC9F51FE3BDA2358F75285636557 63E3DA75F737250AD20C19E07D2788755E4505E7C2CF957DA01A0BD3806243F2 CE5290D40615A4E4D194B6A356777E268D0242541F51134A065673887AA8CEED 85404E129F01C37A99492B5CF425C09A2490AFE9B6B070DAB0B865F6AD2CA51E FA7C71BEE1DD9E269B83DB9458DE5022DE03F3A23FAA20BA96C1CF46F233A18C 3353AEFD852F0DC0C52CA8BBED1DB03B77A8C7EFA3FF609C4B9CA0EAF3C23EDA 6318BE404C015C3E4659D554BE2BD4746E197C69F728E95A74D18ADA56BD2DB3 8FC9B27E4F506337501516B352FB7287B94851D95269365BFBC3D44E6E8C71AB 08BEA63F6AF62CA877E0D276E4CDA071EB5DCD17C3ADF4B0B71080139DC9F7E1 4CDECFE01C9007C308F4BE39278CC44423B91A884A5E58A39671CD31D24E016E 6AD544ED03CA17F9D6852134488BF8BB63E58B81DCEE5371634DF78788FB703B 732BA51999AD7469C7078F659521C7D2189A62D960CA4BA6A1D8396DD54161B0 E00B88D21050CFC419EC7D142550BC5186F7C981D3A011A6119B08F5F95993B8 EA0542B45637BECBE88094CB76279C4D097D3080B32AA908DD087D1DA43A6D14 893F16449C75E3D943683BD3F519478F3E2152DDFC50FB1DA4CB6114D42F0F4B 079815843AAB07C826840A03D7B8DCEE8156888A49E0F10C00B477F1384B84EC FB7F71D615DD93B7EFCEA56EA75E0B54FE9D86764A9E353829DCAABD994D681C FE656A0DFC45DAAF8B3FA35374E7311FF6CBC03E1339A36785AE2BE1C9577A7C 7C22CD0D19A7DA6BE35EB9F410D4766AA8319D55E4E619F97D1497B7A947D66C C7BE40B7D00B93FEBAF1AE706366376D893A831EF7885D4BAC409ADC4B39DB45 F0061BC4156F5EEE6FAB1E3A014218F509ABB8F307A39820B4D57CE13177245A AA0FF27E8F8EB9E80168211BFA05233B4DFAA9A64032E41128B095FB7DB046F0 3DD9518F2F65D632914FB51354BC68974D79F8432EA09ED361AD0EA0DA7400AB 30D8D456B3726ED866FF71C06E6074E6DFA0F6EB8BD22B3B0888E82128E9F4C6 AF71C9008B79A16415BE5801B5E362E41BAAABAECCB30B5FC33B3A3E21C1D5D4 C279C1CF73C12F49DD635B10521423FEB63A0650AB79F303C67313F1D0E9405F 104F20AA9DFB31E547739A0CD0D551932DB6425A2078DF15C41C9D08FAF5DF31 B79A37B56437B43B3F85058B47983ABDF974F7B2036031A481B04276C50BDD6D 65F05F825239F7492980B96BAEFCF1CA319E500393CBE2F6FF232F989AFC5BB1 98F6CF65046511696A20BC081E88D862F563BF90C0F270AE34EC52D909EFD2C3 9063D22532E5D4BDF47E82400F21AAC241CABD044E724CE36581A63A07A0BD8B D1A57754637C64B731C1A71EB58A75AD04E007D24A15509BCCB163718B8FAD59 D0AA861C2A7853D8689DF03E52FECC516645CEBBD601FF075924ED516E9D5D13 98B56877711C6B68517A0BBB7686715257BF7747A28793E52FF4E6A603F853CC 62DF20BDFD752FC1FCD940C3AC0085E666BC3E508A899149A916A46D727B58F7 0D893617DC2490AEF4FFBEE9E438FBED4631F6D8A135AC349008566224A71817 0E64973C6E2DA47ED40226D4AA8BDC958E35C4156DF76C44857C0294ED7A39EC 6051CE77276E3B8556E38288FC33BDF6C976AFF6B64A39AB0F39D0C0B9F55F38 B6112961EE57CA58503442E381702C43E2063C89617263D041FD03FB0A1FF453 3119F355D07F2A1A6491A983B3FBDA5DA7721AEFF17B6CB74605F66E4E5C9355 7E5F8B7FCC706310A2524814E86036E9DEABE819E869D196D99A077F61C7A78F DDA3A7C310B64C8517F1EEC6EC35E8A556DD4675546D4EC683E71C42AF5F3FAC 64934B979086FC0EDCC73A9F6ED229896D616A0969DAECE368FB94BEE22AE6CE EC297471087F7D57C4374888DF5354D5190853C46BF3EBB7F90FF12D100AD052 FF24FB27E3C585A16DC33F234C26FB9AC886FBF950429B425F8CDB71A470FBD7 9F44B6F05DAF13540426A43E67E57B7C62A94B09053FC81EBF68BC3D6F5ACBD9 E4753B3050A25D0F30C7DD714188F5A2F34E6171CD10C5CD784CFDAAF1046037 8435E1A05C8AC81402362B19941070175EE0D8B7B6E058669E3F1803325BF396 82EC8EBED5C18ADEC40EDEBA88D766635F76CBFA092162360D806FF721E1DF49 65B64860BB3C14D889F5F9E7724AAB0D496C67EDD5DDE9A86BF99BF89BF4FFB8 08C816E79287F382025F118D8B8E0428DC3A6171C181B2C9A45D4EDFEBE40BD7 A387810248887C0371D64BBA1FA33FB9C27277A372C92FAEED68322B304ACA3E 30467A05C9D73331FCD9CF5E8CDEAE2FEA2F76AD0DE6494608746106D6E8EACF FB72869141C7828FD218026F9A239AD7B3160482802285EFB9C0BB538D245223 B5364DF6237123E8B7E725C4959CEAA3989E7D98C5613E76A9616EA0F2EFDCBA BE32BC99D9739249ABC901EE3A2C5DA96279DBD183E355E0AA4B6FD9C29D3741 4EF4CF3048F21FA9E06353C221D971CE2098138BA89C2A76DA29CC61B34388E9 A15BD58AE95BC811BF6EF8B770F8D237B5EB8E22482F5D7DA9DC18CD6ECCFE1F D470B92EEF1AAC5FD9FF1E28E58A49BF94E0FCC4E42581A33126AD3D1D4A9505 999DC33AFCC019C692326E13358BBB6EE3E911E19864DC1AFE07C88E3C223C3C AF3E2AAF6F8C78E90D0B3E420C7AD33ECF20DC67D985B962CA68A115A8D72544 486C7CA4112A9CFF817E095A8EB5C9A7A320988588E03F7C2BF66CF9EE72211B FAF741A66128AC55E39FD08446C428631F5DEAB90EB74050F7E9030D2E66E830 BCA19C2FC207D27AF3FB36F71170DC551CDDDD5DE8BA14B9354592F3E3AD218B DC2749D435B2DFBD8C4CD8C885B3884FE054DA51DBBB07D2DB89AB8869F879EB D79E64FEC1C08DB3D85CD55D90EDDEFA3B381388F422BD8E51477A5FF223A70E 49D80F93D8A17E2100A67C80CDADA3BB629A8C594FF095F7527389552CD1FC93 124BAB568F6C15D7A17E1A8CFAE9EF5092947E36297F0F5805ACFF3E896BC7BB 04D4DF39EB2E874F5EA30ECBF0F9A77AD371D924D564599A3B6CFB4A5CBB9751 A31287DD41782900722B264E927A725D22F78C457472B9775565540D3AFC2DBB 9B4BB7B17CC0C0D003FFC89D6F25D4315880BCFD83A7D8D8EB0CAF0271BC68EA AF33D9BB9101C40A2D6911CAE1A8868FE311798EF7C43F2C1246C21C892885BB 44254E408F2F50A03F4AC0B06927536058345C76C00FA5FA257EFE82CB3F9FA1 03BBE8B6819CCF1A6514C80C2BD2E4BB15832AB8B76B0C24527D26C24775F287 4962719B9F3008A2F886EC759C29EB75D212AAF2B9B7E381C834A78DE7368A6C 0783DC3EC479C313CCB0491224D92E1906AC09C2843F9D44EBCE187709E91E63 4E6C532EF8A79473626FF0F74F28167C72769070322037A43A708DF429D0774C 0AF76A644308CAF789D4893CB159E836B4F864C0F53B671CB98CDC81D7424D4D A2C9F2E0321CDF449E8341F7DF3DF721988A05756249DC6C8852A288F10FFC2E 6B9A0D22C97505B7EE7AD33AFCE04F0F8474A95A3A1DD3E0F22EB84037EA35D3 B693E6C21A9B23B1B52C69AF37D277D7C0C44AF1A05B9B87276813952951F76B 1228E74B51C592BA5E8F8B45DCF39EB3765F841D6A96036C2153EC0B88E4E776 82C6DF97478C2EAD289A31018F1CC8EA3A2F189D38ECC2B5A1C6CB0283449C98 19403020CB59FF334058A8830D3E4881707C47C77DAE1737926F9DDA13096E5B 41E7AAA72217098A3552769B8F2871BA62E05EA346AB13359533AD2EEAA3338D 8F00D3DA5B6F5D96729BEC2E7558036CDF779B2528B9107287E20D46294F5F63 9935D877FA9FC05C4BC191F6814C6A3730D288163DECAC6EBCAF5C219F2CFC4F 13574511413644952B9F481274D0C78421AC5F313C800958FBBCB5716BCC40AD 90580C6C46CA8EB4CA53F39AAF2596687F027546EB226CB664EAC600E9BADC70 A89D526FEB51F6FB86C6793D551B680ABE57756FEFB5FEDD79408821B5F4ACA0 8A72B7D14A735B4CD72EFA121701E1013D41C03F4CE966A319103277B14AC5F4 442BEC4391E443AEEFDFE63F5F203C9A3C73DEFF3C679CD55B12B10738743315 0F0D5300AB89CC3CBC60FBD9940BFAA891EFD0CD483881EECF9C311D31D708CA 92723EB7F744685B639E28DB289D013914E683EA1DBA7C945AEC4F67C4D7ABB1 AF70CAE9B693F74D02D6EDEC25508F6F29DDECC13CD962C9122B42BE66444F7B B7625E945DD0DBF846DE09F7867CBC3B3DC40ED55648ECBBBFB6960201B415D4 0A5067A5E7BE9D06D825C87D3DA3F5BC63C3F03D9116BF03A0FCD0EBA3A032A3 7D35AA538A1FB9E04EDE6BBD0B5C4F1CAF0B4EB41699C877501D1EF7D7C0AE22 D3A0FB3823ABFDA3A21A2DB1112E649A1028A8698AC4A62FD30D1F38FDE2358F 554FF8A50A3CA829F343761FA6AD28250FE0D1A9C9EC969780668B45B7A78B41 45A24DA2AB10BEAD6E4B9E5E23B4BB2A0328AF7C3615AFE8572BD50807CC9354 9FC7B394B57868F44B28BA600EA659DB36830DFABC09DBC805646AA82F1F5519 12A8F8BA2523D8368DCA11B4B1BB45B1884926AE9B02BA2AA812DE8F5E6DEB3F 2B21ABBAA4E9E7398DE308CF10672D3C7A33FC1131297B73883B9C3396B2E4CE 4A9156146093650513CD61B548A87FF126F734F64E9359F4F978AE515EFF8555 F4E7E8842FEE8B2E3EEF8CAD4F664ED53CBA30027C4676E3B022C59F1BE3C91F 9A4393B24B727EEFE67DC1E46B1155599839FB528320E90E271E9781C5F2183B 3E2BB3D135ADC660CDE55758EDF4D3C1B53CEBCE5BD66CEF07EF424BD9230B29 7E71A4CF8C125BB05F831669B0CECD3CEFA899E86C35550C93F2C17B84B82F09 C257E8505ED45C9FBBE55A9AB56DDA42D9ED74347187DE6B5695865A5012FD17 7B695E316A9CA4860556633F0081B7C52A32A28946481C1AC8A8C5F7EAA98FA7 3C90A615546AA10736E71C47C74FE3D733E77E8924CE72C45F91ED1EC4DB062C 9BFAE302F4CDEBBAAD423D0039373714DA09666E8CE42C3606E3F476D374A78E 1F5C4E1E7594A408EB3BD9EA3567957703E1FF12011379A81C18E4EC0A4FC105 3C988412FEFA730DDC7095E9EDD721ECC688A00A0E093B4088B3FE24BBA431CD 408BCBA6404CCB6E9E38A19CA6311BF768CDE4FE6F517E1BA1E0EF2B2B9C982D 4D3C6FD40C4DC5155A549256FD3C196C68D8A291E5EF8560966C522FA5DCDFD9 295991CEA8B1E5410792D598E5B9BFF3CC352677214B479C15D8B596346E9082 9A426511B7E5E9473335C44EFEE05F57EC0054331B4821E636FC8D26E176ECEC B28A2F8C686E97673F33B03387710D198F140F0780E71229945F027C766CB5CC 33DAFC7F75BAA9343D897AC7384D8DABB323B1A9BDB9BDD42D101F1CEAC44EDD 0DAF6EF5E74DCE4DF75444503E9C82564BAD9C9C85F73D8755EB29598E75DD2D 693CF8A4320A0E24EAA682D30DB773FE53190FB0D7A7D34E1A1DA8B626452A7F 0C721C5D34D70CADFEB94731E57355514EDCDFCB1140D4810179890BB445930F 6C47BEA66B3E8F7EA36D015846CA3148AF83C6F008DC2D6A5D2BC260A37313AF CDE7FD93B30B16F4C113C8320CAF73ADA4136A745BA09D7DB6884FB2CFE8769E BCAFF8CA78E2E5DBD8C40F1854DDBE276544757565392753024A54B42AA3A3DD C8F6CE4976955A24D9C8DF8E721218BA0223F8D6968ED22703A827B2490C7B75 919E294642B4FB3186A9CE80E57146FFF226AB4BF71EE2DBD81051D55E4C1C94 BF8A70CD59FBC881E82ABC535EF44BF43306BE5C72DBBE402EBE58C84A8A72E6 24E5A0E2E8D6078F0BC773093A7EAAD028810BA8C99601665B83D776EA5EE51A C30CCD94305B80A56DDC8A1BCE5047F3362479851348D58EA5AE996230EA2E9C 1323B7184D0DB489BE2913FE5C422E9EBAFBE53A37523DE54281E12F86860C22 F2C809230516D70A72A608016CDB2FA8A20DCE123E2F68C1CC751A649EE8D051 7EBF61D48E4196E2C987AF17BF0DAC0DBFDBF520BF382BC85D693D11D9DD7A54 9E723E2C69FA5B14E3D9083FC626B1B40AA9546D4BDE905CADB7BEFEB9D7FD53 3E33D03E1215B0CCBECD89D5E8000EA2D8B1414592EA81348DE1F8887B3197D8 01235283C9F8746FFCF072E51EC21EADEC476CBA755D1EC81100CE81E0C238C8 0F745F186BBB6639A6D91A92E106C642E0DFF2E245A04273031112AF3C23BEDC F1351C34E79ECDE02E92CAC7AAE01488BFE59BCF9F6B5E471F57C6E5426D2DF8 F29EBDB434D99013A26DF0617EB123DD85E6D94AAB1A979F6AA368146833335A 7180AEB4AB1E121B75860AD31C7155667C7BE24AAE64426BF38C894BB5161CD6 5C253F41146C7968502F6D21D6BA91A0A38188DE4149EB0833758A65E2620294 3163B9A246255AFB31A8A8E2F50EE98B3E2AA0F2A906AEEF239A78E05CFED5DC 3C39D9A020478F32D774692420CA7EF658585ACCA4CB0957C66630E7C02C3968 72AC1F6FE627DF08688D7E8DC24A38AEEF421DFEFA1108F759D81BF01E67B5BC B2E3F204F1ABFF2BC17B6A991C60604EA78AFA6AB8B3BABBA7D88EF632AD6276 4529591093FB83A93FA0AE627B910BF94ECF2E5CCACD1DC68BFC4BF4AEE4CBEB 5A497F7F60E549FF5C530547CB51C1334B06D0BE2A62BDD1F80619A1F443F99F BFD019226F6B729A854E6A167EE6FF25C1EA4F0CA742C7A6F3AB3ECE0CA14F5E E9B9C46572D2F5C284F3AD78E917AEE33143E698DA02A1BC6ACD35946BAA9940 81292609F841BE5576F7A19A9FC7056CA185F9AD1CEB3AEF0E461CCB5652AE46 B3300432B831C616DA6A77E1183A7C547906F7E8B6B6A73B498BD2F28AE5EB22 B8FFD48E60DAB1A06C63B5BD582EBEEE9692B4A6354BA97592B665D0A89CF839 669E321EFFC773355042F2F3E1852E9C0CD82862C02C465EEB9F0A847B445B65 3C63D4BC4305A39C4B30E11F4ECF0C80AD25B2696BBF018BA1F34EAA6E98843B 8A8C55013FD096241882F1791ECD2B1BE0E7F4D5A21EF9545CC87080896E6CAB 61FEA357C6537AEA557D48CA81B90F8E5B485F3D9AE787E7166F772489A2C978 BF88328B6B3D0147096590D93A83A1EC41CBE3EBBC1AEEB8335F8C51F2251F64 326E1B7477CB59C18207D3BF91F9C7A9C726722A89EAB8B70922459F59D3776F 7366B3ED41F7F9B6D7982D6C63E6D85CA237A35088F73084C48D79FAD48F9EFB 19EA4CDDA99B47EEF4173AAE9D817E209D454BE3D39E0015FEC7A1ABF39596B1 054C24B5A798A7192CB94ECC33A336D643B4E7AFDBDF44D72D54EA2CB0D1BBC2 C48A9F681BA0F79E8F537CC2FDC1CAA465FC0DED6FB94AB3D67671017145CD91 0B0B1F662C7C77DD646D545CA8B16430EE2C698DEE47370AF09BC94493C8E3A8 38DEFA2B106F123EDBDC0A18C7F230747C6EE75AE288A8635191B051FCB13279 8BA18C7ADC200EFB0506FBD267CD90A37E1F642C8D77384BA6C3AC2D746F7453 2C242DDAE0B9E5A445F6AFC7D205F3903BCE6E636D249E45D17ADD40D1352513 E7649B035B0EA9F342E5D0451EBED1A2BED99BECA2C2C03F8F5B4D4A952F28A7 BB243BB479D21753E48B22175F65C4D299843FB92B1511CDC606B03D2A539CD7 19747F0F378E8585D289E43363F28A9AB2B818360C5EDED3690093FFE5652498 93BE83789DC81ED2C9EAD6724B11DFAECB2377F66515C7D96849E23389E2C77E 9E5B3198DA4A7E84E6E11B930E6599E832252914D0F673F3CDCC79508CAA5BDE 483BAF0E2AAD64C76FDD8D62C74EF79394C307741EA29D1C410662224D239E36 174D4EB5D4216E8047E22EAEADEBAD427CFCB437CDB9757B65857C887EFC9D5A 8736A028A36F0D423A456FFE10A9F2FCDD3CED2AB34A5E01A0203495AB9DCAAB A8407E41B5A838EEBA562CB1C7F984F6902E49C95394C26EFB45A08A812B9A0E 57B45D8807BBB3CDD26645DD6FD08857D3AC421384F963C8405F638D911E76B3 957FAE4BC672E80E091100121869EE9D7FCB5F9D79B3E2E2645B2BB356820A3E 54A63E499D4ADC6B3DF0A2781CD364DEAC773D89DF2344FEC68E9E51D3E06483 F46BF6FFCE30E335EE96E877C857819E25BABCA033EC6C2CEA885524D40A729A B92253A2B2753FFEE8271F2096C6E54719CBB1BC9F42408DC0ACF5A7AE1B9B38 ACFD6405E0C359B72C2C626772F42B702C50B6CD208158401286E841FF05128B A13F3381C8B25D46E6FDACD10BD35EE9DE9611722D720C4F54CBF1CC8784C62A 584A8C68C23D348EFB7D977C0A40025099E50E89215F4AC4BF9A6095ED130E7A 14BC2AFE02E6F1B829EC149939E781E7DB187CB327EB12FFE5193E9CD00E1967 886DB02C16CFE330BC070D21ECA037CD321756886D37BB75EE38ABAF75632775 216CFEED021857D36DA2017B74D04A5805049DBBAB699D11D87089B49413EFC2 D41EA9C841AD1D5709C10D1527663153082A4C6A313325B41852335EEFDAE715 BC6B487DB5B8814DDDBDBC6A3837163DA3366049394CD42FFFBD3DDA3D0482B4 88F6DA79B3FCDF5B4EFA6995E47333ECBDC6AF80CBB3C4C393B56D63977494E2 65A3444C734D69375CBA6AE9CBDA1E8BF8AABDB2005963A2EFA04A5AB2E40284 4436137322694D0D2366BDBC35AEF7B40A38F1E8546C1AEEC579383BAFB94DC8 2A1F8F45FCB999A06B3A9007CF55DFFD563217F43E861A6405D75B3CB79A7F4B 44EE3CD2FB15EF0DD10CB3831E24C3EBFEC47F695D81BCF3AABAF7F50468CCF8 27C77FCDD01F0061A88670F8621100E5D7AE5938F62A3F1730E1346C2783DA90 5787530CBD22BC2BF742C71B66367764C9010134985D2DB74901E41AA9DE27A3 EAEC869D0BE4918F6E3D71438DB16CA71E9F28629601438BBBBC44DFA4ACE474 37F3C172067D5768F7A363D1B00B71AD4729724E1AD4288A26152C1FB93F6041 60245C6252EEB27062B381B5AEFFDBFE3BBB700540C7180C0BA624557C5FCDE9 7EF75E3ED016960745AA8C99416621D634192D50847C211873B441DB79073B2C 7BE5E58DA4DCFD7EE4668DD9C9068DBC96BCDE63CDE93FEB93EE225A21231D8D 9465F9528C10ECE4F0F12F3DEDCAB6FFF4DDA08771B2B6623915B81A6FF36F78 5C6215271E686B6749743DAA069603AFE2563B68E716C175541373D3810DD14E F927E19489A95FE33831AD4F0AA64A475A385E7DB0E0BA04503F3436664827C1 724B52F0AD613BBD51F0E9C1862B66F9D472FB6EEBA71106FE0EC17DB8FBDFB0 4F5E302AF4937F55ADF6D2086E798B3502CE1A8617F0B1B1A60AD2DCC43036E4 53176351541438C005143E4471120261550FCF74C2F40959D5EB92BC4FE8FB5A 4C9F9F6363CBC53A588C0CC62ED2D57EB10B7279AA32F13D1312726A27B9A0CE 80AC5F2C69A89DCF4EB9BACEA23BA566498D5355D2F109E0AE5B71B0F05C0475 0BFD6B787E6227E7FC2225E47A5B33E03FCC46633FFDAEC1AE8A243A971C0834 D95FFFDEE149537E2B51985FAC21E67362F6EB3B85F9131E0B375BDAA7A5A60C F927596380DFA87CCAF4308D9FF03E1D42E56FDC7521A356CAD5332344D99A44 BC9DDED79E19EF2D740B4651A1A5A41A2E04CC7C9DCD8C4767D615E73F1AB75A 61E3920418063090FE03F6690E5E1BC1479BD88F5A287591639401708C527759 9901ECF10EB4EB7C66522B97A0620F5B01C0C695891722C30F8CB81D0F8FD4D9 8A1F09EB9066811EBCF1E9470718CD98AFAB55B5F51A498D304D614610E722F3 62A8049D1AF9AB35668D7ED6F172A4603F746FF7176275CCFB196AFDD1D56D77 84FCBB6ED8A8F7E776780762C737D1C456866BDCC8337F5CA0FB4779BEAF55A1 51BB1F6AB4B7343F6DF4A658CA579CAC83E888DD0E633FB9874F2E68FA30698E F0FE9A9176754A5E3D1DFA2DD2645F18397EFEC47062386FB2071FA51208F1AF 1FE02AAD0DA958388D605A12BAFDE6D0B0DF7ADEE4F54655578244D33C464CF9 7D14E3A54E4498D0FF091EA28F680D73F01A8C84F86BEB4FA58B607529121358 7E2E7197B056A5CF74F5215150ACBF833B2C1787C505E178D2E8206B7FC5E7F5 2A5D79202EEB3F12182472C322C5D131F5C85D507CC2BE08538323DEE11991CC 8830ADABA736D28F3DC12C893D4F28A4845CBA6D2F2C7CA5BD547ADD2ACB136F 72E527289AF34D16D09570E832834AE038731A49830D0023B8894E50A032ED0A 914135CF712B69F7B6DA5AA3D81CD96921923832DFEECFA45D840D1E69252136 779570D355500D1E17C343BEC6BA0BA66EF7ABAA9271AD305E6F0A50971AE8C2 DF4D3F4B05136E86C46A85FCDE3366192DA41904469AD6AE4D50CDB640C86568 DD8FD5AAA930927B6294F4F768F342D582375B6552FFA7E6655F196843824926 D5AFE1DDB475B1239C5C20132DB5E5128F05B61827ADBF0C18EAF71E5E616A46 AE18CAAFAF8B94DF 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMTT12 %!PS-AdobeFont-1.1: CMTT12 1.0 %%CreationDate: 1991 Aug 20 16:45:46 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMTT12) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch true def end readonly def /FontName /CMTT12 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 46 /period put dup 47 /slash put dup 65 /A put dup 67 /C put dup 75 /K put dup 76 /L put dup 77 /M put dup 80 /P put dup 83 /S put dup 97 /a put dup 99 /c put dup 101 /e put dup 102 /f put dup 103 /g put dup 105 /i put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 112 /p put dup 115 /s put dup 116 /t put dup 122 /z put readonly def /FontBBox{-1 -234 524 695}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5F0364CD5660FE13FF01BC20148F9C480BCD0E C81D5BFC66F04993DD73F0BE0AB13F53B1BA79FE5F618A4F672B16C06BE3251E 3BCB599BFA0E6041FBD558475370D693A959259A2699BA6E97CF40435B8E8A4B 426343E145DF14E59028D4E0941AB537E34024E6CDE0EA9AF8038A3260A0358D D5B1DB53582F0DAB7ADE29CF8DBA0992D5A94672DFF91573F38D9BFD1A57E161 E52DA1B41433C82261E47F79997DF603935D2A187A95F7A25D148FB3C2B6AA32 6B982C32C6B25867871ED7B38E150031A3DE568C8D3731A779EAAF09AC5CE6C5 A129C4147E56882B8068DF37C97C761694F1316AF93E33FF7E0B2F1F252735CE 0D9F7BCE136B06EE967ABE0C8DF24DCBBF99874702ED252B677F407CB39678CC 85DDFC2F45C552BA967E4158165ED16FECC4E32AC4D3B3EB8046DCDD37C92FDF F1F3710BB8EF5CA358ABACA33C7E5ACAD6BF5DC58BDFC3CF09BA2A38291D45A4 C15FF1916FE2EC47FDC80911EB9C61F5D355BEDFC9DB17588547763AC5F0B1CC 12D2FFB32E0803D37E3281DA9CE36C5433655526ACFB3A301C56FAB09DF07B5D 048B47687348DEB96F3F9C53CE56DDD312B93D3918CD92AF53FB9461864D11B8 0138918D0B1270C54873C4012CDE6F886DB11BCEA04B023EBB43E0D0A06BE725 741D08B9DB688731A6C9886C15A83C28DADCC81385EA239E045E8F3670CE03DB 9EE77ED067036595C9F3B1854343BE3A12E486B6E5A2F8AC44FA5378D28DCCEE 306B0E283AA444423F9A4FF38E2B56DCF67A39CEB2C643DAE86865517D5D0371 CB8797208ADEC637330A3A57902C9A88EDB75A7C16FA9850075D9F19578EC666 1353CC1FC512D59DFF847ACCD04CF9DFD4A2852A700FD18961F9C616F7F372E3 62908D305C43B011B4324935801B09227D9CAF8F150C6C22B91B53500000AF28 05CC714C186E39B2E0FCC6EEE6147B9D05729BD422F1BBA3C846DC9C63CC717B 1FAAFBB5218D105ECF34EE24B62F8FEB85E66DD884F9778251A6DFCF7636B2D6 31DAEE5764CA1845DF8E28F12C59D62A5E5F2B498F25A5C3A868D09C7324375E 41B70FFFC8BCDD1767B77E2781BDA283B091DE9D68047D0C8F70851B4BD08A80 E21ADC13BF53660ABB0ABDCBE66306B581576F05732CBB700058C40D14B3A9C6 9D2C4DC308A2EE4DC19D9E7BC02BBF32A5A67078E31DC77D7706FACA6085B56B 5D806452132320B6292873094A5B0B16517F45336881E59459284674C162D1D2 63B40119B7C243747E11ABDCE015A348E9C260353C4B824B4FCEEE4EF1B9B048 E2194EBA2BBC72ECA840CB7FE2BB26337B35EA52F1237A0C33F8F5273613BFC8 47132C47A67E52C48C85583133F44736CB9407F4B0F3B93B4E790C3630D3F893 F33C0D5AA31812AF682A81FB9C654AFFE2BC5495EEE7FCA7A5392A1590AC8A8C 322221761544309C1F63448BB3CD35A8CCEA85AD8E120DEF630C51804DAFC38A 0DB278B8F417DEA9504018FDCB2C745653E0AFDE27C2A7D68C3E0DB09E431FE4 C7248A6A45ACED94CD41094806AF0ABFCCAE08206E8B93988702F9277CC253A0 64F1E489AB70D612ACBBF8B082E949C976D122708C0AEE85752D8CD6FC8C26FA 1BC2F8034D44254F12E31AA730D55310D0122C5888CD6C00C219EACBE8643773 42974DF984EDB5B362EAA32C58C8599BAA126E970BC1B47EB8869E3E539BCC84 20BED887C8FD24544438459321C50B97D4BC6379356952E6B66DA707F0D4EB33 202524D02214C1A0B3BF5FFFF38385B1530CCFD6ECA216BD77C5B7BD71711B3E D9D4FFD978A89761C5D66D290BF4856BB4C4DF7A6E8B7E4B0F1D56281ED91C48 023AA63B4939FE0650290AB0EC3F23BE74323AB3FE45CCD5F63D953ED61EA21F 3DF1A2BBCCF605111085C886B09E8B1203E23E434C32787757D3D4AB678CE1E1 1330B51CD48CCAEB50872FB7C8372E7A80AAE401CFCE1D9063538896BA47A48D A4C16F067C5D9CBE202B4816EFA0DE8F57B3980F7FA1B70320B446622B4D88D3 CC4B47EB3B20CC7B2050642D68D69738F22D3DB36084DC8455B637CA23D4A584 D299240072BD1583C0A2B8090D67641A2EA82AD2A341567C590810CF6504B384 7B5B7371CD840F981DE413BA8C0BA9C6C4B4F7E9558118C684DA64A44421C43F FBBD57054EF4640D38585C833C805593A2860AADA5EA707045A45EC850F9D6C8 785BE8EF2E4191A60F46B3231A2863DACE6D8257D9A7A7A331B30EF48840FC7C 33051E8AE40FE88D39C09C20DE62226B4B6353A74813F4915D26E84139B72CE1 61322BE724D254AB02DA12863E8C48E2F754E6286FD593342E1A3B8FD4A05225 DB78A2B4DAAFE6B6CEFF2EB108AC62AE42996C25A972CAB8D2E7D7706D59A1B7 A4D62F1F219B37DD15912F32816112B7DF4042B0DCAF6F529BB9F02FD864AD31 E02B9E8F9C4DEA24DBFA648EBAAE74878F5CC30BA79B5EFF0F027EDC05578F8D 7F3E3FE30BB3B067ABDA05F661B714C20328F231EA84F5966748A92FF6D84150 1AB7DD1676F1B0B693C4CB818A11626C4C096F4868637807C5C23E6E49EDAB6B 8E0B0AF6ECFFC00EA6BAF2A1A5EF93CC6F9ACA2F891B929F6775A6C363D6AA20 F0C78E245BFFC86D103FD3EA40307182B033895695AE72B82765E5DCF7130F8B 9C28B6DD3FEC623A7BC409C6206765AAFAF1FD7E3B2A69105FB116BC49374133 971D7CD4885DA539DF8F4EEF04F63DD66777D3CAB2D831F677A322CBB80684E1 13BD1A6B6C7E9C086937D08049DBCC211F133500815AB6F7CB32ABC33E3CD985 DDAEBA7F10594D5F3A3AF6A5D3D8DA76194F80F07DE28D78FDDB69AC1B2D37C6 988A003583B2C161A2B01E899A12880A33964D26637909D0E8B53B284AA2F894 274C91F38144E576AC614B465A8AE629E6DC379EE54311604AE63B9E109431ED A388F20E374011C319E2DA115534ED4C575C5B88AF2821975BE9ED8D2F697BB8 0A0525253782198523B62BF7220E7153AEAAD5403AE232F266298EAA5F376CD7 CE7F714D66DF9834E8390A2B2BA5967BD5A70ECBEDB7C19EC3E906751BF96258 D08AD753D00F4658D761DE234D0419D50326C51DDE3075D57742CCBFE3EE496E 958A278835D42434E1C5BE44961E748D611CF1A0406816BFA134065A81C900B7 48C829FE8BF11F9CB0D4B7A91534319295C67416870FFCB789FF4EFE4ED0F10D FC3CECEB00664431504E9451E9852CB5242721A932FE58858B79D2A45E3FB1FD 48FD7430526E5E5BAA26C07CDAA0E709F650F365BF7B4C32DB7E443F90664B4B 4CBB604A185F08A2223EEE7882CF017ECF63CD8CD61B6DC7AD4D8142CC0DE847 8B5B7A8256D912A4BC52F94D0A8E9D1B4B555A67273475D876C8C4C4AD49F3F6 E63A6AAD450FB504566368ADAD094AE6AF144A092435D91AE8E4766978E6091F A609F7D1F70A7D70C1C4D8CB36040463B0756E4AC357F2B8BE3B8AB3FC3F9DEB 3143971D139AF15A2665AF96A227FBD34D0F401B25E5C6BE4A51CDD0F5CCD243 8BE242B282EA140A1D05A72C7BA629E8B958326F7050793B4FA62911337CBBDB B947EFC351439EC93EE359AE9884BFFF0E4771CDB2BC41967CC1EF1E7C9E6739 DF965BFB85A182D65E3226C961A8959FAA2946501103DF4F50375F49AB83AFE8 AEC58BFDF2C9EA4211F5FDC48FFCE745421D8C798678EADF2044069DBCB5D2C2 4789985A5D9598765C9BA2B1054A929A7D7FDDC352547644799965E0B088C3E8 51F4052A705903AE80E0E14C046DF36E52501B714B564F21FE6C27FB22A538AF 35EA9EF97FACDA00B07AB99BDECF31BF666BC5AA31A482958474AF939C1C1A3A 572DD5B72C03116A550FBD2869BBA5E92567F407EDE9AC13872BE3E67BD57CB4 772C53ED8D5D9A131C80797A38356E159D4D7D007DCB0DCFA6BAA373BA3AE396 E5433AA42B12179DD60A781885EE3E0B29EB4A7CA42957F106811C3A9B46BBFC F200244420A622B5CBDFE61E6CC12788E6C34173375C5D6DCBC7277B4D40A3AC 51865970AB4079C892931D7C88B5C6EAEEAF057936A394A2E79B5D62877A0A1A 457B2CE506B75A543CF106AEC566F2D25D798F21315E2B5527A376CFA08B2EE0 3A523E455B7C2179FB6B3DA5D6CE46BE7E2CC0AAA26A677FA8BFC0C11DBC9FBA D5C51FFC71C53C4B5317A4D92BA2CC3FD976D92A4B184F762A0FF05CAABB2EC2 320EC774DF2F5C4B98DBC5F7163FE01CC0CBDAC29329CF7E86318ABC9B2DCA80 254700B0894AE040C6CF9DE28F0045141DCBEB7DF7CB58BC5CE4F594421903E5 E129A85CE3497BB3A5A5FD338B69A6724986FF95705618D9A26492DF9EAAD942 21708E009CF765542ED29DB1B2B5F718AB132F3BE3EE63312A15225B3B559D61 90A6E17BCEC6FC3355A834524DC38AD8EB5D649362EB33E9FF09E1C96D0891F8 70C44804D61BE56E0AEFFC2E3FE124927B1BA09F9E1CB8E3D83D6D847EB11548 8EB7ED2A20680170E15CF0076C9FD12478EE5ACBA1937E7E5A78A357119D58D2 A48839BED44B961314981577E021FA2C87BAC8690E0F4997D068C08D8A859349 976B5480E346C498ACCE2F928A322AB4525465F5E14E7B83857BE428189B0DBF 8200A28DBE8EA6B68C48678F179FDCF0663006658E7957DCA81AC5BBE3FD0D76 249104F7104C2142C7E767B60133D3057874094ED8BDA094A9249B80A53776D8 2442E233F1CF7647429420711E141C1E254B113B6B1FB59BD4B9E1EE1DA6BE1D 3F7546158FD6F960047DB1C812758C6E587B26EFF4DAD15A850D5513B24FF1A6 19B40B0C8F3DFC729600DB76A94D582F940C12C219863B341EDDB9F4EB7BFECC 2CF6A58C2BA1749BD99591A99C436EF998AD7B847358101FF24DCC93D0909A12 ADA3F135C4E1B325452FDB7DA39D63F19CBFE4173F0E3534584304DF748E8BB2 6F9275F767AA632C86C3DC7A2ED1A540C3515C30736449FC85E5C3C7E8A780A2 580E504B9F10A4EE7D755A1723BFF4C0FDCF85EE31A27CDC09D9C5EBBFFC3C23 79C30BEB53A8C18EE383B7A511B5F740A4B35539D23873C324488043BD9B72CC 06A612E939E2AEB42BE57AB5BACFCD976C85B4EA86A9FB8D53EE8F8FF5371A0A BCA2A8603653C3BB280BD5549FE3070EE0231A735353CB13ECAEBB10868A368D 7D65C2288E99877B8A237095C95572834579B63A3B1EA2ABE8AADA496AF24C30 BF2E6B00930D66899AA7B035963A807FC652A2137EB7A4E7CD96E573D104F4B8 DCA6935ABCC39526588B7C3C12E7F58CCE5FBC2A36D8E298633CD145139F83D5 49397E6DB950853667D24D78D28D32C48AE0C4A94F79FDF4A700E253AB23F59F EBACF444E791C933E6FB666679CF26EC814E6329D287CAA0D743DD249FD2EB7E A47243DAA835A74876EB31FB1250A740AC792D3977AC2FE566395D97AD48A9FA 1A7307092D540201D0D22ADAFCCB403FA0DD1CC6EE354872535B36CDFAB80107 20ED3C5C69AB5F1CAB20CF761C262727310AEE68DC7119CEDA5A0C6111759484 B23AC3BEDA964CEE416DCB911AD4055AF0D341A22548D52CAA7DE7763C565EEB 10D1CDE0DBFC7EA3D47F5751F4CF11D068258E0726CEC4E573662030E34807CD B85B91B00F609CDE16F5C00723EFA89675161C38AEF8E2181D4D98DDE4AD2296 9A7B7322D0944665F7E47431FFF8870EFD520DD5618345EE538AF9A98CF4DB1F 3F9EBBC87FE0EF4671336532C09F2E25D73D9957C91B2F7E92F1941A0904902B 82B54B93706DC114E1F4271926733B0FB55DED191996DAE505AF9AC148BB539D EDD819FC069A130E49AECF18D381B99E7B510B3BC981058179B434DBBDEE7BB4 049DB869C4CAF5FAE4536877262EC3459CF45E64A03589D55C0C3905C032987A 61F3BDB82FE3CD9E88CF0C149939C07CEB7381C254A62771E47FB33A6FF3D520 9EC5B90CDB793EDBE253417DECFD2B7D4D3A01BE7B608444253AEE0C6942 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMTI10 %!PS-AdobeFont-1.1: CMTI10 1.00B %%CreationDate: 1992 Feb 19 19:56:16 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.00B) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMTI10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle -14.04 def /isFixedPitch false def end readonly def /FontName /CMTI10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 12 /fi put dup 34 /quotedblright put dup 39 /quoteright put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 49 /one put dup 51 /three put dup 58 /colon put dup 65 /A put dup 66 /B put dup 67 /C put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 79 /O put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 92 /quotedblleft put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put readonly def /FontBBox{-163 -250 1146 969}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA0529731C99A784CCBE85B4993B2EEBDE 3B12D472B7CF54651EF21185116A69AB1096ED4BAD2F646635E019B6417CC77B 532F85D811C70D1429A19A5307EF63EB5C5E02C89FC6C20F6D9D89E7D91FE470 B72BEFDA23F5DF76BE05AF4CE93137A219ED8A04A9D7D6FDF37E6B7FCDE0D90B 986423E5960A5D9FBB4C956556E8DF90CBFAEC476FA36FD9A5C8175C9AF513FE D919C2DDD26BDC0D99398B9F4D03D5993DFC0930297866E1CD0A319B6B1FD958 9E3948FFB0B4E70F212EC976D65099D84E0D37A7A771C3101D6AD26A0513378F 21EC3643079EECE0C9AB54B4772E5DCA82D0D4ACC7F42FB493AA04A3BF4A1BD6 06ECE186315DBE9CFDCB1A0303E8D3E83027CD3AFA8F0BD466A8E8CA0E7164CF 55B332FAD43482748DD4A1CB3F40CB1F5E67192B8216A0D8FE30F9F05BF016F5 B5CC130A4B0796EE065495422FBA55BEE9BFD99D04464D987AC4D237C208FA86 0B112E55CE7B3782A34BC22E3DE31755D9AFF19E490C8E43B85E17ECE87FA8B9 1485831624D24F37C39BF9972D74E6EC4784727AC00B9C4A3AD3DA1C22BD6961 7E0ADAF55422F22ACA5E4DCD4DF9FCD187A566B7FB661D0530454D0DD6C6C50A 7A3875C6CBF8EC7769F32A1F3F7FC1C072BADEC97794D4E90E0035282A170402 356E5A9CD9ABD80AC4342A5283E458A7269252F4541CBB6452B39ED54D336D0B 19928E9CD1AB26AD83EB209E2EC75011A2643813053B5DBB0246097C4821B5F2 C92554E9140BE35B2DBFCD98809A8EC9FC910FDE9E0D86457C70ACB056EBF90F 244DC0A5BBD455E15D6E3180311D52CF50B0BF7D0A7F64F3A1821E0AEDBC2E7B AEB549FE1D51088C153799C6E089B5D5D65E1C4E2D2B430CDF1FFA23CCB25D95 5C4DD885310A706B320AB25C8D742C6F29953254FA54DAAEE60ED477877D19BC D28E9AB576B0EA088171FD000B60D73B3C57F754BC07EBC9BF751B7D2B32459D 993861B7C4B0D98C422A11BECEF76F4EFC0ECAEE89723E6CED53E3678D733363 2DF068AEF0FE7DFB57393BDAA439A6A4C396F86032A98009EAE1247B7DE83B3B E46DF2898598FF5E6CA6953127432A967E4FD41CDD60D6E413059A58FA556EF3 309178B57C16A763CFC9BEEC276944BDEA255789EF4E1ECDE1EA43EEDB955513 F42EDDCF39AE522A1DC2DC523F046EEC4CCAE25792B702C288732F5B13B5CCE7 E8B6A1A1DB86B1EA38883E481BEAB54023EDD9BB94E7780DEEA577ADAA169E66 AB7D8607B409619E79F242CF52E618AC0DAE43317C507CDB27EA8A1472D4E8D9 17E62C98DFB049C78AD15560CE44A39581BD6B555165091C5D41071212A9D51E 6DC3005C821622476DB88946944FBD830DC0293272497557825CF153F8E257E4 1C9A2C1C27CB7610BED918021BCF64795D6571584830231C7EE85AB23906C085 B17C9070C59EE450E6124FA3488AAF61751160F6CE1A0618FEA94397D1EFA9E0 621AC7E9AC853CCAE73DEF3F7438E921BDD42D7E139C431530AF569FD8592D99 8DA4F085A207616874D71D6005F57219784B5B33CDCF2CD8F061090E46E0AED8 3063FDF0E57BACE8CA49A148F35674D09BC0E0DF348A4C699E4C124BD349E1BB 57D26EB43B1B35213BB612B9E011FEAD57A8654671AD663C327C3077284AC6BA 130F486A332EE15C3B8AD603D3EC8EE45F16CB58973A52D05020653D9B9EFD10 D74AA2C4E35656CCF2D901F87B041B88BDC5D9D7FE7A6DFA47708D61E1794C27 10A6C76BE888E107E31BB3E496986BF219AA557D692B071ADCF381EA4539BF84 8995F1A178F7A332B954DBA3A3B39839AECA02022EDCAACEE87BA717711C2765 D38BA776CA91B85FB10CBECAE410C4CB7C660F95751B24A17DF036EF09CF6976 E2A57F58AEB69CBC167B95E0D770A91175EA47653B052BD4AB24F5AC5545AFA7 0C11292E3887D990977B338542E1229A8CCFD8C0E569BADB5605DA11E1CEC882 B56DB7410C0876AF7D322540656D470EC16278878934CDBC9AC70059EB738B06 76B7A7F6AE8A03895386CEC5C04E4466234DA674E673D68B69D8960409BF9BCA 6FF50BB8DDC7A3ACB3EE60F34F7D43C41BC14352EB7C8D9CF3B7A162C4EDFCAF 4E14E837DA0851B75E31AFFA052374B060FCFDC55E5DDC193765E863391D1E4D FF932F6A8915BC5636AFF782C93A7E6262B9ADB11C2992DC6DAE4B651679833D C698202035FC4BBF4AB03591B29279BAD772929660EF28C160BBF499AAE52DCA 22B129F7E74694ACB5ACE86B606AB5B3691C088DB9BB8B84531607D0EDB8EDD9 8AB5C323E2666FF4C5ABD0B555A526BDD9FCAC1693530DA33354EB942A0E32C6 CB68451FDC59C7021CB2515A4CCE6425CA698AFBC8C13F312AA0BFDF84CACF02 6538BC2C8875FFEA2B8A39768A0F6BD256D92B30798F398B60BA289C01C04F43 2E55137362A1F987B40B039ED2672D858F52B14D718C18A11BE736A0B22A81F9 370CA99A50204F08247ACFD011119EC1D50E0E2378B2903517F5012F48476D45 6C5F4C9BB93D53F712EA103D3CCEBC64457BADFCE85E7441B74BC0835674CD5A 98E03DA0A78973E464F962B26ED4C5AD58C72B8450609777145341CD841A91A3 93D93ADA15F56221BF645BF622CB8817C48021EDC41AAC01C56A49CA7C89871B A98DCB0E30B5989128563999CBFC1FF9C7542E451342CE5404C27512F55DFE38 5BCA044EBA25734570819756D567D6576D6A3CE3CE8A988857E520E7BB640BA2 9624FE93C933A134E9E3ECDE2619C47E20BD96D3FBCB73C8CA3B8433D3E6F113 52FFF928BC7B2641395AA81045C2CE6CB3C17A4C78FE9E0CA5A3C78C6448A7B5 11D068BA4CF960D73D3F84774EB444C57BCC9BBE04A02B481F3449F25CA10DC3 BCC66B1307057546B7FF211AF227D326CEAFF5230F27D7D92D1387064577B49F F71BB4CD5C6F4037BC5DDDFAC58EA79DFAD68220DE833F6B9FB60D91B3152CC2 A93320A177A0610EE8B6FE569B53232162A0189322F8D212AFDC31601EE41B5B 41A011A804BF9545E2D56367F9A3B21063AAA0DFAE402843AAA4D6C8AF8FF215 751740DD7278D5677912FBADD43273B113BE40C94727B7FAC2362A1D56F10264 BF60A3F13CC0095B85CF45D75C5DCD5D3FA8FF52E8AFD5AC03E3843C70DE9128 DABA5DE448119622FFB49E20E252BB90C3CE53B0E9D1AB0141C6667526549F36 29A68080B2ACFC6BF06AA77A99927A3AB333B17C59AF626DF4F4058F2DED886B 5A7F5C54A5DDE271605384A57D8B91E3B9F566AC44FE0941DEC83465A87B668A D9AEA1DAA547A1770A6B86321C50EE9DB4160E255EA25B110FF8A97C4E85C2AB FFD0F86D58860EBFD2FE5DE4B2EB82DC4D14A151CB82074CC0E08E384BA91BF7 7DFFEDA1A55279B9BB3E05733C622D804B8480E3D5334F46567E70A96AC02DF2 1A7C840CFA32C9CD9D559963A8BE7A253AC4A1E15488C0D905C2867D7DC719D9 6F503361ED81CBDAE4E56B81E7847D5AE5CCA57EC28EE3287E682A31D5142F68 6084B8BD7016D7B2422D731B9C4A98194AED311D49AA8AAAF4240F0362A56C19 52D901DF14FDB88E265C68006CD53D7813C218D7F7A5D57B1AAB851546D414DD 970CC7E567AF857BA1B8FEE20C0998F3507C57077CAA563E85D71A3365B42047 AB39679BD00AA143BA3CD83A449C365760788AE6A0E129DB95AF91FF2A7B1E93 8228D01473A18D9AB7F4E518993DD0294A2EB2BEBE1843364B9435AE39469E36 E0F72C9BB78A0A00E3A7951B26868E321DDE8D0B71A470746146664F8D2DAF5D 07A202A2FE1F3FA443B5C004137F4D3CC33B3E77D09AD8D4C1BA590D4590EF49 A210B8D28616638D30703A21B8D2DCE2E41B025160910E58057FD11B57A43BD2 5548BC9530D65AE4E254DDE7D28CEDD78763D9EAA91EC60EC8DB3D22D7AA3784 C9881299BCEA8A6BDF7AE933B9D33584A0C0347CC03A63790D76F7F0C1031429 F9584188435889C9C2DBE03F15ABB9A475FC555F835DE7BCBC4B507DC2CBAC96 5A970EFCC63787B4FAC5CE6DB77BA6CC1A7AADB061FB7690DD0ACCC5DCEA63D4 CA61F076B69EC0B645447E81BDD9B4EA5BA6335EA82B75623FC2A89E1780C623 6487E611137A062FFE5CB6A33C489FEB948F231D17E175DAEC5787B8CF65FEDD A35FF72C6A4C872F0CBDDA18830D4619AEEB23088113696A9DC4BBB8687CDDC1 9EBC6C084F45469D9D660DAD5C9BBC8FE37568D32B9526199509572253EF986D 0993B3B00AF560AD0FD40703591F2007523CE415D9D1FE76AEAA81365E8772A3 76C140AED2FB22B009DBCFC93C74679FF1E9285B992C54F02E5AF8BE3EB59049 870622F905C44321B6E04A3D2DF2EB034588C139AEC0F63A86AA5F2DA4E58FA9 CED1ED03EB57283B2717995BD2AA4C8CAA3F6F9BC53BEFC28CD4A22E66B65E3D D4350CDC01E3C4AB499521F5019DDBA9BF5017E0A16E1A9245C4D053C959DF82 10A9BD2D5EE6D458DD3978C6048D8C40A63202B7AFD55A87FF7367FE9540A792 CB65AEB47A73AFCD2AD1D72F8A67A170CD8920995FF584839C850F9E29F324E0 EA714E76795F5940A22C2CEAB7705C3F27D131CD953DD55FF34947EB607333A3 FF50EB23BA60997CECB558E328ED7219AD25132210247DBD21BAECE37758136B 54B3758CD389521CA2ABA4D5AC84D518AF62309CE4B464A02A871D9EAF114E4E 71C4884E2808F39C42C55F5C0337CE739CFF00237B77FB29A0E463AB4E6C0A60 C8AF1D919A82050BFC9D03693507A57F113ABA9944BCFB5D316191339F605789 39FEE83BAA0B518BB597D2BB84C26C42B708E0D2E6BB23D1C8BA92EB9B6D29D0 35160AA1B077308F223BB01F4A1C0911776B1794DC0D1CCB7884720070097073 670427C487452C8D925F701FE08A63C83B8A545ABAAE387308D25C10C2689C92 8DD203E28173DD1E185D8FB10FD5F557DF729C482377E0C46EDA3C27D7F1C2B2 84AFA058EE3F153DDF4633EBBD1AF3A82AA0B03792379478B04085667330CC4B 0A824CD82B7C30AF73CA72A63F10438D386EE816A23229F6FB56244DB81730CE EB0020FDA14AEF63C1F435EF0534F894E0BD46E5D027F49B83086412F011316F D415B3DA8C5F9343C17F24AEDF062FE3E3E7E9DDD52B4033DF0C2BF1D24B7A0C 338BBAE6551EE4A393CA2B7C1672C53AE5F95D11C2E41EC4C578F67CEC69B7AE B7DF4A0A9F3D5EE70D4042554F820483E9F17F7F09780E486B3C4D84B6874AD1 D33B1B08A398C8DBF03A280AD69234F6B282BDD0092424F66F54B982F6CABF51 E92962EA8A6FAC931FB0867964FC6170E597A4FA80CD3DC73BC48384E17D0F64 3AE4B59879B97353ADE232E5CE6165079D091CB30EA8D9BFD710EA7DA0A26F9A 1E48990959978DF5424EE13E48B4110C97BB0BCED0292F0AEFB3D7389B2C1940 FF933DBC8D06B27F1D5E468B2A516C21B95F476402B1049FA057C819AA8AB444 092C013EACE3DF02DEDF0FE7741B87D41DEEF6E28972D82C279237AA183AB0FD 5840EA184430273F29E9141DA210DA2B40F76D47AE25CF31E693ED04F65C1D73 219E44216DD709887D96F7C0AD501E031738E83E41D810B0C76792DB933C2A4B 2611C3CF12D4EC6EC3BC5A873AA18C4E37A9B0B878C4DDBB753A4B5D6634EE35 D51EDC4AFB1BFDCEAD07F6EBF23937F8225D82ADAFD68D21A1879EF8E906FE7D 10607C103751D5197AF61C6318AA4F1FB712CFD3D801D4F470615E0E108F795D 38316975C6BA07F83512B8AEFCB5D568F5B2BE3063C16DA807EBEA6B633D29FA BDD2C53A730B7E610D08EF1471F38D51B86D512F3168116E6A22B5E0E6A7B6F5 32AEB9619FB938142468D74230D4C371F9C5B7FB323F81219FF880DADE927BE0 61CA572FEBC1DEBA5CFD7D9F621BB9EC9D07D21AB7082A0E325C4FC1F28F079B 77F358FDCD2CD544C3F3F158AE0138A759BEC398B77668B49E791CDCD067DCE2 6B52C1EEC694273E885ECC7A80483AA1DCF76A4F08C3F76F96F289E2BD77D44D 7643F3B716A5B7478E440091A492CAE6810FED1DEA6A7436AB01F807A4AF1573 F7B53934F0F43EE4772DAF37BF555A779F3CCFCD302D767D76A522AF472E19B8 FDA4D7D235B112D7516D9BCF7CABB231C8E144D7FAF017BA9E40BFA00B6AC8C4 76346C06E990DF89A6984F87DB7E009E18FADC72674B6BF0C1841225399AD1B5 239C7D1716CCC83D336FD600D69CBFB73853B692E9AD090957B625B62FD46DF1 7586AA4160817D445DEF4B5934B7CFFEEA9263F9666652AEA45830D2CC122AB9 777EF6CCF7C78BAE5F5CFE747A4F24B2EC4A90A66773F14F1EEB76AF3CB52A14 F4AE221E55DE9508EACEAAC84D38662F885FBE11DDA797111ABA33BD4252B029 9B8B9D93C0B761C4B4AAB4AE8201180DF4C2FC3FF6962752551F459116B797CA F085D987848AE3D92AA3EE9598D182DA5487EBC5B156A53008E66EF492C8AE5D A0D75EF8E324F8195EC8D5A10AD5765BC679886F3F45CAF28D9B846AF1A6068E CBFD79F22BC57251072F0C66963CA6F05CAD9D088DF8DF349C8E7BF3C739330C E2939BA377C5723B44737026DA467AEEE225221CDD9B9D31B812C03C6AEA0179 BC7BB15E8E5F3281BF04437D81AE128A6EE26F14B966354A2BB7B83FD6E9E922 5A1199C94DCED377A15BCAF0EB11901568F7D15DC784B473B771E332E071991C D8686F5A07AD236D183C2A7E1537420FCD8982AB95929690B6C987D35BED2ABF 7E0F18861D10FC30C79800EF47C2C67CD5999E03C2BCAA3EA01105894B3FEF94 D1C402253F288654D08476AAB61D0F71A90A865579E54E877B023941DF7044C6 DC1B48314216004D3EB5AB86D547E707700D68A88CD5F91A3798CC6686600DB0 6BC097EF6F31CA809D57CB64FF7C1BC23D29B23995E593E7306672F191BB8AFB D0386DF6974A9A5440CE95DC6A82AD0FF6D87D89244B00136CDD155196F5C8FC 7ABC496FC6B93D6F511A3760ADBB3C985AD889DC4D0B19EB361462C831C57A38 B20B691D6504E4B41413F5F6C779811EAD9DCD4B5C093CC0A847070CBA26F24A 10D0DE5BDD012F5D6C2D5ACBCA1EBADA749DAB78CC6E6C292DCAA402B4A3F9DF 018C2253D59E51C6062B78E457EC5A2811067EAEE082AC93486AFA883557F7A3 3E9982B04E7BEB3A80F00353CA9D918FE21C384FB9A035A3485805E0D9F61E63 74969C3EC95910A8D35AD696A57AAC3AE554DAF2DFF0B8FE3DB2C1BFCA8776C0 240D30513E2ADDA29BD1650709FDCDA07B5D925F7712058AC70968225CEB97C8 987E7C523CD2C65DD2D8D92F7D08971B7346F921A496F4F164979F1385D3DEA8 F19D0C34858B16FFA6E168C68A5EA1D3258BA3D3112328B387AA2A74171C767F 44070A0AA7724E91623D1EF679C32374D2D3E9DEF5E9E4456656BA4EEB807F52 991AF24C55F3683B13326E1BB696E04728EC73E5F1498564C7157189B48F6DA5 04A3A41F4B6138464B362143D50281DF039B77F0D5EF65CA7CE1021AC046D452 7684486FA59E2B7210E7152472796E016CD2A9895FA90BA3D2C829B8EB083F87 C4E12B55CC3EB723C225357B87EE089EEB66E3A605347BEE111D2551E8FE24B7 F69BCFB4C6B723B655C8A0983677FD3CF70FD85A2CA88807A3F62E9C9874664C E2E0DA604E4180D50CE83FA777D758B8A70D53B0C4A7C3D868D31E817F282F4E 027217E35BC50DDFCD868538B617A7E15E93929DFA2DF4D0EDB672361DE3E265 B087FE9468EF566DD0940237584EAE657B9009BDA4E89650960DBA366BB1DE8F 294E18D1FEB51CA310B8317B74065EE4D421318B1D2B795FCE805A0509F4B942 D511DFB161D8BCD8C3B48CBF1D5D83FA9D3DA6704B7ACE77492324B11358C713 BB4B54AE83C5F7C833E1343138A53DF8F0A6A5D77AD8ED8811BF368208794E78 4580118EA799F5E2A7F6C3A1872788AA2CE1C554CBEDE0E6501973914A28B60E 8652374A4E09570F497DEBC7C4587FAB7EE9AFF34F3B6610B807DE79FB3C4D43 E69054C3B888ABA6763C6E6D4B2A29F1575AC5160045D0DACECEA6F098BF1FB3 CE1EC0F7609C1ACFA5E09250054E51AB15C6E883C6E33F3728938136E2B5B42C CC7FD4E6F80FE2C99E59F2BE531F92E69B3BDCB449EB547D9E21F8D2AD1CDD32 01951D55F132622AD49B3231A27666C1E730E2DA227CDE83C6C4460FDE3EE2B9 4B0A6C31F089A9FE7ED81134B7EF2CC9E304B9CD49DED99F796D78EC826D35FF 111E765C3F6777F260E226DF26151860B4B712A544BA10458D9DB989B621A91B 85226ECCD6087C168973735F021D14DD25F0391F58B9889108E9D06846ADAFD5 56E8AFBE2D90DD7BADF2A7995112C8241829453EEABF8C1AF2C0F9DFA80FC924 31C4223A05B839CFC5B6B7E99FC12CFF8C7C85945B5351C109DCB8D70A55B27A 82B494203E319374EAD84CC896DB29750F0C9174529EA856099924EC662C136A 7E640D30F3390EAF7E982F73C507FF981289721342B75F9A4FA94E315D9A6AB4 B8A1419FAB45816B08F7F89FCBC55D64B4CD94C6C5B72EEBC223163327E6F1AF C4E9C288978F78932220B491DAD5A79032575B5AD855ED2C3808399B0E4C85B6 85C1A5A450548FABF82223EE6C612FA25510627A5739D5AAAFED56133CB9C335 403F9FBB9138567B138C934B9DED20EF98E8DA9E5D925491186A1E4D25067E42 A706333E2030341FFA2DA7ADC856EC643FC7421153A2D6A36532FC21DB0E3DE6 196EC6179345B5F82F4E051F6A4F9AB535E0D47EC7648FD0A6AD37DEA6233C2E F2D877F7797FF3D0F7F4C05EEF4D733CE11E6212D6BB9B5B93D49E4470C6743B C0E1CE7D3E712EDF12937AADE07637DD9A3E29DC9F50AB77A2A8073A658A447B 750B87CC50C23830AA3E60E35B9952D7370FAA9FFAC27A711B6F3CB0E97D32D5 1EC0346FB0DF80CCBA7A170C4378953403A60E753E53E3F84AD091FC5A18538D BF9E835123F20D6EE5F7F99B855086467E18AC2098B981EAC35B8A97EBC00A31 C64DB3B1C5E6FA3AAD820FF451A2D4309FC1A5540923994FC6001F8810B4BDE5 31D65CBDB711F766ECCE71BAF8811FC25760008485721D94C28EEE2DBCE0CFAE 4EB166AB627A3A9E1279DEB5D5DBA64545D43C16DDB7A11D91C1FAFE50A75872 A10F7BB7CC881B49F401CFDD8649DFE8F957B61541C8FDA471AE4D9167862AC6 EFD74E9D7C5F18A0477757AE2F80E27C0B982F994F235F89EA3AC2098A9C6349 E3CE99935D7F22E39E6EB9BAC2627323A95102AC8AA7A619A665C08FDE765E20 CBB8F5B1F72996B674561139CD8ECCBADF147083695360D1FAB8372104D2BFA9 B72ED3D69D4D4431AEE1F7B378DC87DA1DAFE262C0D32A6C284F148BEC02B7D2 DE043C77CF438D9FADB23E00EE11CACF60B1DC9BD8D2821F65631B5852CE00C4 74018BE22F51EACB4263ECD4BF4460797EFEBBB2D33E83C666B37EC1B9AD82AF A4218550D46F948D9961370B247616DAFC5F8ADEFF1D2CAE6BC313D614ECFE1B 93D6BC69864DFDEC907C4090A148C725F83E01D304A40579858E9A2DCC9DA159 0DB4439599D12873C2437690E77F7A09422A598E2B16553A2ACF11107E131E43 56D78D13A421A81A6A5CEDF25D21B0D08496B5DEDA173D7C0B19002BCC2F31B3 8718D0303D3F487FE7B1828F5944827B77061CE00E8D75BD687E78B0473DBCD9 5D272DF3E8EB0940723CCD16B87F758D1F95D3BC0D5FAE302CA0D8D749A80CC8 D21B12713EEF51F64BDC58CB6130C946D1F814D7D04AE6B658BE0FFA82A4CEBA 3A7E86E7C8867DF62A2244D7F8465021420E1467E510412F6F473EDF6C87AA7B 2015E6DAACE3EB9284C4996D045351A8D8CF6D06F2A1E34B2152CB0CD30C5AC5 8B83B20E299942D4D42DC0F6E5D0651617275C4BA779BC7D320B49FC995DC8D4 915ACEAFCC1B95E39F70860E23EF7CF55A48AACFA7348CE3FAB25C36D9599C42 3F7868118A9E95B3486DD38B678ACCC0BBF47305F87120F5217480028813D7A9 731FB2982E1EAE65BE2F6704AE181C0D2BB3337D0E2FA88FDC8BAF47458AEBDB 0A81D010389A5FCE81F9C37E63BDEFD0E9AAF6CB04CE7A5516ED5CC4DE192349 5508325F95174ABE64348408136102E0F5EEB91BE5B2A13D3205079F7A445AA5 65D5071F35AE26D652B86C7ECA31FE913E87DC9D9FFC40CE93BC86296F6D335C 1E6A830A39F379DD244CCD20FFD286CE91A4E04F10A5C33C3E36A4E00BEDA830 E914E2C6A8F13446FC9DC349A9C497227178AF6513277A37BEBD8F61DD51CFFC 7CD5241DE20F4A2471A154405CE41A3C884F6F55E1BF854774C92BB0178A6C93 28ED1FF08BBBC3864D521518651C119E215A8D079FECDC9F31C589AFE2D92FC4 3F9F0389E5C5052B2C0A18E6ADBAF12DE06B2DF2E4F37200A88F091104B9584F 3637E1E50681390C67E2D6BE6B644BE03A6308B4CEDBB735770927C043AF2647 A0BCBDBB0846E9B35A206CDC792EE3871448BF4E6224E0971F4C26A2494847E5 F15045FB85976693872F5D550BAFBD39F885F97DBBF4D02F4469B0E8494EEA91 09A4AE3850E719E106320B84399D50D8756222F0B098F700B87A062F7D7B1AED 03BDB573153B32DE0688C23E73FBA61FB6431A882CAFF8CC564A216682913ABA 83D1468FD6D9B3016501FEE5E4F5B885E1E755F0FD135F3597B8A02E241F0FB9 FB0E07215D0016CE1550664A192EC6F73609077C162F22823E1132EB5E4389BA 1CF11E43D0D90BD185506FDD712C0C0CBCC16C4343A3F6DC55CB1F1524032B8B 1AE23ABD86A40731273455CD6F08702A5462265B2077FB73C1FCAF89E2709B33 7B4036B69D63B55D1F2709DFFB610F135E94CB16AEE5B63D318E94CDF8D8C8B7 9B9CBDBFBE587A94F8F0CB7236F491674D68C1335ABF7AEA140CBFFDABEA270F 7900D4F8CF5070B675B0C09E303C176FD6883E0BB67FB8F1167837A335D2AC0C 040583C2E5E47F6D118ACC965C35D525730971F00C306E7BDA5B1674C333F19F AFC3F4B365539048DFDD09758980C8EF7BA04CD80A9CA5B1989CCC48AF710C87 BE287779CDA58C6E950B0E25DFD0CE899AF9A2BE760D9C9F1A171B550317A0E7 46B70E3FF3DB5A48317DFAB744BA6F7BA427D855D07F1E09AFC296B452C2FB3A 21806B5093904D11DCE9F12C5ED690FDA311DC60FCA3E4034E9D1DE1FE70F5F7 DF5ADD47ED9233EE15BB0F58894CE3E06A07E65E1F0762D057C66582AD1F0A15 A1C9E6B013DABCFA96A8E4273B981B4A378CA0328C1E2C7DEE5A1BB3C6CBAA56 2A06972BBA619DB7DA5B853732E85A5291D87414C4EABCA8C3DB8877F1A97F29 B4FCE238F571E2D2083EB8B9D001D4A047A4EC356D83717AA4B2D7CB585F2A0F 6A86717731555BB3F615B038C1054BC1960C76DDCA88934D09FEDB57122534C2 E1D439A4ABC7F8AA4B5153ECAB74BB0F0CB6A0688DAF206AB8679C84BF9903BB 37594CE6FEEDE981BD4FCC99224CF2BCB6B29A06198AC50D34E1794D41C6B401 43857B656ECF4E0219D79E7C43C0A33DC309FFDB9FBA56260265952FB50E1D8F 4CF2B2153C76C4AE233D7568E8CECA9FAB4FA520F9C76E68FD8856D57112E2BC 2D938ED8BCEDCE3096996E43DDDD37695EBDE0C342D657D25E05CE1A7242737F B9F4FCE76149696C3751D5B2DFCDC3F5837457698B6025BAB49BA8FD5E001ACD 0DFE485477F4F476FAF8A01ABA6F45847C087613E28B39F31D20EBDBBD6493EE 6C19ACC243F4665FD2D99F30652992BA6D51E02A7DDCBA80E7B0D65D34E0FC90 9F72AF8CE66C094B3B65E707DD8DFD3528B122BED1DE026CB95A76A8EA2A844C 6446F2A52AF24AC2DF52099B0A3DF6383D31C042877D862F31E909808610264D B102747C5F2D2926EA38CEBA64CAE9E6E0A9E27991D0DAEFE0077C38953EE185 8CCF6B988588678944905A3E18E71855DDDE2AC47D5833752BC6478408E8DB0D A549EEEFA5D0D5E0BF323C7E9129C0E29972F46B3EEFE7EE4D6D083444A0FFFD 9002D48AC000F610742079A6E4A28F8A1746BF0B3D118AE50610FF8E9C1089D3 8FCB65114E260A20003CFBCAC6E1EAD303F266ABEB366D25BF2A7F3B441F44BB 3E6AB8CD3C2798FE5FB33BC3D7B8DF115E60AE0E27858C0E33521EED904E8AC4 1201AFD5C8F3B9B489D1680250A4AEFE6BE04847701902E208718A6D21F83CAE B051DB7D4C56AC99F9EE23EE705BD501CCE075D4D7597D1B64C3F3749344F95A E536CB29239744E798557E8395268C3FD4F5C5CDF452EAE377B58F07A17AB17E 7F3298D04DE285FABEC7B75CA4D7412E48B9856AEEDC683596595AFA8D58A37D 45917B4E05352C585BEAF6EC2EEA2FF633E808B0F2EAFA4894D8EB42D58DD478 265AFFACFAB4940B4301BCF31698BF74FF1B3E4E8A43E2A68A7A74F061D838B3 A94BE957BAB9947B017BCB441A28A87D12A1AE7117CC643513FA1046DC33DF0B A09A5F9D7938C95214AC128488AD3E61B43D2881CA864CE0B4B1B17B3CAFA5F9 ABF681A5BE6721B716328D35AF3469C6E00F6CAC886F93BBC77A380DE9D1CB45 F8BAF0D4432CE0561524E40C53CF245130A95D0D0CFDF20AFE2ED9E49A5718F0 2C1278DC6D33A81ED5520B16B91F0092E2C75F9734980842B27DF50A7A363B73 06C29D7B97995712A5F622B4378DA9CBCC56A60C4BD48E7AE0B5EEF9C00BB218 A2C6E96BED088CE74118B3C1E626F3F4F4B6B71293CFE711746CFEAF2EE7589D 603A3AB09489419FF46AC8C4A59AAAC6B435D3B6303CAAF50C4B07BAC9FE263B 907AFE986A1D35499402545DC7D8277FA764CD8D1CF893685F2EE11F0B90C7B2 E0AE125279A250F095CFB2B6A340BAA287031B5815847FDC9F49CC0C5120A82C 39EA19E8735A4D3FC1C5886E38BAFF64DE551D50950A75B744A38E4C09767E34 A18CBE49D5A0D71D6D3D5A95F7CAA5EA56295044A771F6B8C344860095F8DF42 26B3E40E6EB2CBD2A3937FC9EED0A362759C5FD03DA8D9CEF08758D3D2F2ED8B 1B78BCF1FEC970AE4DE04359DF29CFCB73C3710E103A6C0CCABE8FFA1FFBE1B3 5FEA2043506CA42104BF74E1537FA50C615A9C7DD03C071EB816541D3A43A2FA 292EA929CAA2E74C646E14B293E8926FEAE3F7A2F3E42CCA1830E1DC8F498486 5C143AE443297BBD3CBD38780160A804DDA303ED6D11A1237EE1E05BCA109174 CD2E6D530FE3DB2E123AC2F188C4C4669D67DBEFBBADC06AD133B93171B9756A 3FA15F2C93A83A91E17E16ABAC05E1FA24EEBE30717A0E263C46E5AA4703D742 BE9D141316B9B814E3B9D8B484900D51BFB242F9068154E5379E6AC446CB90F4 F6038581F61F5B022B690DDA23A38B626D86D6AA3201968D0ACE21AB4B0FC411 B140C6D525711C84035EDAB53845FA7FF99C4464ED3A8E099331E5F55F034350 FF47B6A1F03B33A33F178B79ACA9633EEE8D3AB8B337220CD76CBC8E877CDE29 F1B5417725FE3F86D0F6A1FBA84B9932AD363BBB89021A095F25C09CA4117B55 97EB0F5E3E0758812DB6AD8454ABEFFAB92A69EAED5782504BEF2C5D041919C2 A8993E1BD9CC4D811D2A155AACAEECE21412FA3DD6B3C136169C322D053D1D66 C4D4CA5B0FD34B256E1E2787356722C1A65F74B4E91D327E0AA956B7D65C1B38 45A8E53F483E112DAF377F14A6CF329B4D5509E9CBACF84914CCEEF14AFABB3C 5591B1A7D81A200294DF86BBA169FAD546C1B566F61A22B093A48423130A81EB 3C85DD03E411AC49481F286F86165B7FB35CBA33FBA71DBD29C26F5235978175 E722A38F7D71F07CC356C310CA2E16A7E8D3BA2E11D68F761144F6929BFE8595 3BFA867307496176CF59B55A9739F9E60683AD6C5A5C3894086BAD8644FA3677 FB04D47F413DF63359DA7AF269EFF011B2316339745E87059DB8403670C09143 B0A8C78DDA27B0F6E00DDE08830FB22FF8A0467F707AE5D668C8333BB1456333 114C71A1059427270C2077D7CE0A46CA3AB2D90E00D0919A54EA5FDF7C0EDA65 7EE6F0EEFD8E6167F0BD1EF51A8E67599A546CA4CC3BF6E2516FB44DCECF1EDF 097CCB84AEE983298EB791FF86F46D4A6F489BB2B8D54530F7153252786E8089 0E49B190F7DE35D671238A85F8CEDD5D5FA3AC19C6CFFCFECC063DD36DE8F0FD 689F4BEB91F99D5A29E79C2C95E0F53A4DFF0EC21CD8BF12281C188483193EDA 643DD2B75D4C59A1C80EA3C2135B0E0BC69825AD14C38A699F1E900D7602A774 BD8BC43F8E64302D70EED03C1C23D0B8054D1F3CD3632FEDD35B2B020A7C32E9 388EDEBA339A8883A0F5094B2BEA1A5E790F949B6B742DD7CDB44C4D2CA022FA 140A64CDE947938BB6EEC375B17F0D0E34983DBA4713B96CFE92956FB9EFFB24 FB19A2DBC3171038D939F6DFD83B47C15F5CD6C27C5D89F9680E304190CD6D2A BD48F57FDB707C6914C364786DF317621409AF139905193C035283598EAE247F 77DF1A942F0DF0BAD2F07C01770ABD8E7991389669B01107F81190F5BCE7B421 9EBBD055E4F6CF993A6A30E2E21680173F3D1CD2EDA23E5A9C1C913C12CBFC5F 2D47D55EF31820212A36D8DC3063CB6BBED676489FCE509E27F4F2C1E4CD5650 3935C19F3465B913142FC2C5AE1B82B82ABE136B3513F61EE49CAA651BB81E36 0D6B5FFEC7778D3B8ACC29CC48570C4B406ADFDECEB976C915305F822C36307A 73A8036659FF7816A96CDECA1A6EC05C224A95FADF6AEBD047A728EC0B9E9D50 E0B0E0B84FD43E36F9E2B499807F2741DDA0F4090CDF30EA28484939DFF8A55E 24BF813EEB0C32DBD09366AF55827A9B36155EB55656A25499C775DF429D0FD1 5011EE720405AF88A8273575F9E67875F509876F14B858256E9C1AAC27082CC4 012625CA090BC40BCD4D745C81F36C07EC065FBEB3218107A35DA3160F310A07 F8807C2F45222084EB773F53D7FFAB06D4E97C2BCE586C4BE81AE50D0208406D 8EE65905945868319C2B244E922D17ED9175D5A0111901F5E24D843D20135943 AAF62946651B1CE7017002516437EC94BFDC15FA6214A826E98C22F80DA0ED91 F3637629DB2CCEE08F4BA5928B2C78880C56931F027C353F106F3D76E22CBF40 44B5D45AC93D7BD82A3D8807F98FBC39796DA996933423C4FA3F3BE7CEE54815 02E5A2A1513B18A0A8D0C0ECC21F780E464C39D4D24E8FC608B51BCC8A4C1A20 A3CC376541C63216AEF3D8AE08379E53B1C66890A8AD5FAFDE8093C73A620B49 C7CDE99DA05E8F51DD63C4E216FBBCC2921079869FF018D5DCB5075A7B902AF0 4AB4043526BB03C345AAA4E706E5209F777D7300BC28F725B7E80DB1CA543005 19364184E5FE1C9222DFE81F8D61B05C97D61B1A4E0A4CBE77E1772E7FD96205 D9AC65A5B487355979C2FF754CC559006D848F562414F2C279AC4BAF06051479 0A3F942F9DE11F29C09478AB45C94135D7F92D967E80A71D7E5E2A52F655133A C3D5EEDB00AB66F26919F301294E199AF67BB0F0FF7485DCE7470B7B65EA4E40 E2C591DC9D03AB4F1782560BC04036EA7DD06FA0A91B1806791A1DAA52770296 BF5DF0714AAEDAC6E34BCE80B0D634AFF088B6680DF491BB2A61195DF3D20C38 230C80126A3EE21CA4C7752E6E05A6BFA11009C4A15C7C95FF8D89CA71AC3573 8F7D2F4E363B5B4CD050CC5F6ADE7131D2143715DDF9FE5A1C5D482D11416E69 8375AB7F2252F588E20F87C86A70C3B58A243306296C5AE7CDFB62C401B84F6A 85EA9ADDEBE3F5A7A924CD4C68624BCDA20ACD64C244C0F7D0DF74A248FF44F7 A9743572185FDEDB07724A0DC04CFDFE1CED92C8B15CC872582333ECDDEC1DC8 343F07B0F06DCFCAD7C93F23B5D80AF37C8AC6349F6BF0F945D1A60C13B8D858 52773276E3E89A39AD96D26A4367DC408D8CCB8436AE5DF59067C37BFF63425C 2067E441F6C69F3E4229C469CB3563F2642CC641F277EE6BC43A67E4553063D2 6D1B92908436E8A74D994B54F09D242B045E19F8FA037091660B1D35B8D17FEC DA39961845A1589F37CDC12F352DCFE1BF4FD20A4ADB8881F8402732063F6CAD 81DEF02449F18B6F94E8DFB751035FADE5C5FA4EA1BD6B2BEB40758A004A77D1 724204CD349DA3F5067098149AE67CED8E206D1976FDC78646CA50C9CE13BA3C 3AEAFF566001DAE9CC9701357B341BEF429481D8CC9DC8BD49A72B8AA2811D0C D6B028FC73B061AC59A8868829E60FEABC8FAC72EF32D14C291571E99F20A92F 8CC37A8CB83B39459FF71E1ECDAC8D0D15D1BB19BBA9B70E69E9522CF1C645F8 B0B1BAC2F9941BBEA4B9B802E21715436227079574CBDFBA313BEA7FFF1452DE 7DD0134D440AF050E753AE094D482D385E7B865F5D39DE31946D30C92113D2C2 2FC862941D6E30C49E949E68732447CA6892E676A226054A91B81DAB0C361237 8183D4E177FF3584213A43083B368F4AADF30C391E6DD2DD2B4633A0A663E574 7FE3551E28F5E31378B5804F1AC52D90CFF9E66FC94D91C889E2D809D265D8EE 5F800054C40EEE222ED014BA4E18655CC4C05F749AB1874238C7CE4E1D15BF0A 0B93D8A49F036F365DA3D1C58EAF8462DB98B420DA267B342592E5D9B4FBBAFC 9F75F67A2F4237BA0C306C0D5C9925246FA5AA9ABFC110CF65941A866A2C389D 3A5C9AE18187940B773F2FA95EA42E8ACB517B27B24B4894C8D390AE0DF1BF9D 31378AC13658E6E04FAF871B0C4DB11BBB40C19D6D24BC60B5C073CB1200A303 1908C6BA595B44 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMBX10 %!PS-AdobeFont-1.1: CMBX10 1.00B %%CreationDate: 1992 Feb 19 19:54:06 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.00B) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMBX10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Bold) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMBX10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 90 /Z put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put readonly def /FontBBox{-301 -250 1164 946}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5F00F963068B8B731A88D7740B0DDAED1B3F82 7DB9DFB4372D3935C286E39EE7AC9FB6A9B5CE4D2FAE1BC0E55AE02BFC464378 77B9F65C23E3BAB41EFAE344DDC9AB1B3CCBC0618290D83DC756F9D5BEFECB18 2DB0E39997F264D408BD076F65A50E7E94C9C88D849AB2E92005CFA316ACCD91 FF524AAD7262B10351C50EBAD08FB4CD55D2E369F6E836C82C591606E1E5C73F DE3FA3CAD272C67C6CBF43B66FE4B8677DAFEEA19288428D07FEB1F4001BAA68 7AAD6DDBE432714E799CFA49D8A1A128F32E8B280524BC8041F1E64ECE4053C4 9F0AEC699A75B827002E9F95826DB3F643338F858011008E338A899020962176 CF66A62E3AEF046D91C88C87DEB03CE6CCDF4FB651990F0E86D17409F121773D 6877DF0085DFB269A3C07AA6660419BD0F0EF3C53DA2318BA1860AB34E28BAC6 E82DDB1C43E5203AC9DF9277098F2E42C0F7BD03C6D90B629DE97730245B8E8E 8903B9225098079C55A37E4E59AE2A9E36B6349FA2C09BB1F5F4433E4EEFC75E 3F9830EB085E7E6FBE2666AC5A398C2DF228062ACF9FCA5656390A15837C4A99 EC3740D873CFEF2E248B44CA134693A782594DD0692B4DBF1F16C4CDECA692C4 0E44FDBEF704101118BC53575BF22731E7F7717934AD715AC33B5D3679B784C9 4046E6CD3C0AD80ED1F65626B14E33CFDA6EB2825DC444FA6209615BC08173FF 1805BDFCCA4B11F50D6BD483FD8639F9E8D0245B463D65A0F12C26C8A8EE2910 757696C3F13144D8EA5649816AAD61A949C3A723ABB585990593F20A35CD6B7E 0FA0AD8551CEE41F61924DC36A464A10A1B14C33FAFB04862E30C66C1BC55665 6D07D93B8C0D596E109EE2B1AAB479F7FAA35279ADB468A624BE26D527BFF5ED E067598E1B8B781EB59569E3D0D54D8EFAE0F3EDE26279776ABA15341E42E636 6E02817082BE6FE0B04249A4840C11F95F8ADEFF72173E9A5F2AB2F62C427E5B DC010E18641EAC906A5EF0F9BC2108062134A7F10956219C5847C0D82F0E8663 12D963E012DF0DD899911EC5D8096F80B49CA3444CF1294FBFAB57DFACC9D01C 46F3BA2F3D1C14EC30CBF83E5729F1C074D4F1665405C9AAFADB8BE41EEE43AA 16966E2C0CCC853C4C09F245ABFD4603C4AA55EADC0A59AA6E9F5895FAF3D3FA 83EDC6E2540417530AE7DDA8EF33DEB81444316FB3F93EF944D9FB06745BACE4 848398BEB747E58310BBA39C64E341185C82CB77E9D4439EC15BEFF1335F22F8 F036517C436225F4125ED67ACA7A84230D4E2B6CA713FD6B3CA54BEB540D4604 D58A8335BC20052440C4903786FE3E335E331CCE36A13F05F71126F680077AAD ECAE10CB7C057C2D55F384723D58EF3AAE83E9E0B39D6A522667CC5B3257DAFA AC1C3C981B9415967F7F4DECD492A52D35BDFF8A83E40438B3B35576A12BE3AD BAB149D495329FFE2ED1F3587EB4BF6B7C1209F279EC564EB9E63BE5CD767D04 E2D4193954C813AC49CEB1E84CE72CCAEEC6FEE3C2E374A2F9BDAA5DB3CE37C1 09585F829065A21C7A4F51EF1ABCE3E576FC6AD6BA4FD4583860DB2865177411 A3BD996FB749D5EB4C503C7CA2D05701B6A4CB7EB81116532ABA80FD024C93FB 2F64DBD6B5AC9DB5361326C148ED9F5DA2B712B8AF9A3FFE6A603B11969D3C95 DB1A817CAB8C445CA24F1F7D6633D4EC54A3CCC7EDBA2453DDB389C6A77C199B F2A6F7DF0D5B8BBD40BB5B5B79B339E216DC091CE6C8C6F52414CE9479817B5A 719711AD80B545A47D4119D363B0AB38167648FE10D77380007B41AEB7278609 9E1698ED5180568011401A64784F19610298DBB1EB1FD9E60EE5FA79EB0CD328 5646912949915FAE50258417EB7EAA3117838C020A6F50280BA7A2FD240E5FE5 C96CC8014006BDE686BDAF094149DAF305BCFDD19C10C508E644D0B153F33AC2 DC02E87E22EDFCAE0CBDD9C92D9FBBD0570EDDD8A90B0393B608CDBB43318DF2 1DC3C429F670DFB19AAD421304BACF5044D989BCAECB8AF16D38DAAE54270D94 048A813B56025D1B36B61F15A3F0EEB9DCDFAE3D0BD0527016D1A0A97C99EDB3 FADC694C5FF0B09DB190A94207C45390BF815FCDADDE0998495FC60B49B66DE8 D0AFDD2AB60D7A00A2D1EE340263BFEAB4C75EB7C1C8DCFAFB3ED411F44E0041 98E4221454EB7DCB07DCC3FDBC30EAF5F568B21048F325F95AB5F5B11C33DADC A31A2CD474FB45DD38FE8DA8ACD5916260000C47C5B55E39FB3A468DA49B9BC2 D5B699B1158F4693FBC314E7CE38984DA973A10AEB2599B9ED306E8DAAA64CB0 6EFD691BF97D11F132914B9A9C7F07E48A48E44CB9074B7E849CCFB180D831C7 44F0DE5448C2617D0AC9E0DC5DDCEC6A826BFAE3868112C2C8B5CE3883EDB96A 311FFB45AE24E121F147CB6AAC217E5044E01F53D03F07A7CE6D15841A902119 B337274497F7E064724F0DB92AD6442393E4D67B4758249035E23CC1F87FAE4D 4700D7E7D99897A6890B2D67CDB5D08FF4E34702EBD5FA852001BD2E16E51AE1 0BE32CB723B636DEA87054584209AD287B990E0CFD1CABB7432BB44FABFE9E13 B8BC0F48D4F3C431EAEE832EC4EB20A1A8C48830A459E0C1E7CBC469FAF57900 CB7E5D026A84DBEDA018F49AA3FED849BA7E35AFA2EA21A0E5068EA07800689A 9DD31ACAC0F25E9CB64AF2F413DBFDA4075AE3BE74D3A48BB3247262019B5AC0 2E9CA4CCA095E4367D923879D7F22F1BD73CC246382B26F38CB4C0E01084EED0 BD63DC4358E1B34094ACF9F842171AAE95C230DC0D8BD0256B327F31B5B5FDF8 1CF8FAD8ED8952A3793CFEE78647558A1318D0435E61D4D31B151E40A8A8C21C 50CA74642E5386B7DC26802BC33D5EFFB7A311353982E2C0A24C04949220CEC9 87213B37007E37A56833B7FDED1938580BA31C9E1CC14F6E0CB7B79E68B6144E 4CC26E402357D3398FB44A8B85AD4705A8F7458B8ABC858A48B99F096E7DC4C0 061D840BE9AAFEC41F815EB5E7F7A0A246CF50466C45750401E93DE7A89D31BB D7B556509C9542D1E5CF22D67BB13755AAC2BC6CE944DBC45557106CF32A9764 97D054BF00670758A61E087D473CC524C7ACD6ECA2AA64405B05D590B318F7BC DC9D3F6B08A5A8B30C1D7B31946693338C330864BA25CD7B8AE3226F49373229 59A694E48E4098082F99A4299C735734DC3DA8E6878649398A6B8CD636568B03 A6A24E9FD1F23066FA451C5D289A9EA011A098CFF3D568C74B8330A234D03F7E F3348FCAB03AF8E29FD3CE9DCAB0043825245EB21FF2E0917E9321F68AECB3EF 221420C81E43924DE248E1ABFA4095BD6740506FF5BC70530FED3CE90DD75CDF B6F597426B831723D08864B88F3581A48596EFC175F6E84047729BCD7A004CE6 62F3F0B3F4CE2442715B0FA9D1D0CC410A127E476B7758D015E1D88D7E64D08D F326CA9CA91F575A2F64E581DC50DB66DBEB7FE6079667D96E1085D0561DFD22 CB206070A5CEC8A833B2B57B899893A1061E9295DDFE3E7905F3D3CDCF87C71B E2F7186CA7A9CEE9C73CC1667D5EEF899F888C406D2B3D80AB71ABF0F11DFDE0 AE74B374E403242022B1610E407DDF22AB2AF3F106E2B5D260A160687427BE6E 2281649365FC57510A334E81B29F04A669A99070822AFB06B720EE082EA551B0 EEF64218C09E7BEB8029458345C69563D5ACF180E655859F305B86517C7DC9B5 3EC3C9494F96793CCCE2A77F9102086B1FA8512CCD08D2F1FC3BC52358E263FA 0488BF7B099C997ADEEDB8DBDBC6CF729DCF517B2F5FE77BC02A19A5BF822B73 DAEAB9E69CD901BE2D38946D93E090499D4DA23ED8341C123C2E009C7FEFAC81 DEBE8ED1067CC23E6DF0B22EED50DCD9247037866C64FB7D19F7E3C26CF66251 98354074D03BDF150890D2AB674FB30A2C475D9FBCE2F2E2985E24958C559F25 47916841A3283943F3079384F0967721F6A939122726DD526D522E5EA6C54509 648EB6B1E9B426D78652E65A23665F1F489D052BADBFF190DAC5A0CED15F41F7 CD035E87108D6BC30CBB68FE8A7F62C1A34FFC6C61A89E38E8D78DEF555B782F 7602659C91C59E98CF53D6B35159AD5C19989239BEFC67CF55361F5A486C788A 2547C979CBCC73CC4E80C17A767BC4770C2DC924AACE3296E60BBF9AB6D5D6CB 9C5298734AA73505D76793F6EA7D41B994EFB898E26FA096BE34E6C3B1D57AF7 FC66957CF44F93BC8706B344BF067575480797ED6531E7A68FFCB22EFFD0C7E5 A7DB521AC0B0EA82FE89B9B4367BD59136AEE5534A6D4C7E48550098EB8CC83C 720FA116E904A88B9C994BE2D548DB1645388EF86AF28C6518B37B77B2EBC2CB D81FE678E935E383F31F335E0BCD906A08C2A5A1AF13F1FE0CC95D2EDDA4B7B9 4F09E214C5CA50D246D76AD58106A3D7046504F189AC7D53C0A5B32A5498460F F77C98E78CABCCAEBCE784983760885D2468FBA2D14CD6ECB4C796BF4BCE0765 EF41B4133013572FBC3C3234CC9327AD68982871CBFFEB8AD9876E2F103D39F2 A221E09642D6AFBE7DCEE2DF7B1FCB8A394632EE637D8B86575321AEFBA829B8 CE3CC20B2A82939823DF3CE3AB759D65B054E16991BAB9C7A4F4FB6ADCFA8427 5AB50B35BAD0AFECD0A15F032A14662F6ED447F72D020CC4F1A6F92AAC89DC80 A165533981704FC17384149B0F5B66F4FA351FFBD0C394AA48E54925B02ED9B6 6F779EBF303E2B90BED52E133FF7B6ADD2AF38090BF8C5C7608DED5655A8E237 35FD8993275AB841634C4DD89EA465E558F4605E565E17C93C929D9B03A5C15F 31EA911246830AF8473A0093E7DD4529C3B9FD67FE533D0D2BB166D698A0F2C0 87B4C9AA3D0DD497423A0C333F301313243646D763DF103632131C6AE33C1D0E BFE8AED5B873DC9A7045FA84AEB505743F6DCC490C33B7A0FF9167673E3E2C7E E6DDC3B9D26CFC46D6A6F070AE8635C0F02A5BE349865C910E2F0B4D6EF59E63 893FAABF7372CFDCEFB6C01719FF2CE449ABB6E1AF4FC23257585CCDE24C6D43 43147BD5355259BE69F24346B92C6B4FDF6BFF195C1F9977BE0FC976FC58CA40 95CF70FD29F05EE0502C9C576DEFE0378595398CBF23D8871F9711FFDB708BF6 4F423135AD1E9371CF2226B7632DCC139C17110C15C437531CEC97C1D4718B11 A6A9C738E691418A38B50FD90747D1E87D0AB9783CF51D61827AE979ADC676EE 1D32A2E9308EAEEB49C98D6D0D050880890CAC2D13A3C7E62B8839B26B6A858C 42C0CCB1D5B86AA3CDDBCD5AA051918F47A73CF54664B6B17BBD7C9E365904A9 4706FC9917434FA7C8EB9A1D69A3254976580A457951ED607EA61873A187F3B4 D9111D6AF7DF77D113F3A4590F32C9F0EE14C6C22DA0542321739E90EE6421FF EB48F3E7157BE658CC74B449D83EC9F6B831B9E832D7C3E829E4C02B8728BED6 3DA37FEBAA9148DA0D34DD35168E09172479F98BD579D357162326F31C8F3C6B 9FE1ECCB3576E585CC939B18D45F87DA789E784E47E09A0094BDCB0264898613 A75F6D7C99092AC3C0CC9408BA32233B251746E0C5A2438014687988EAC638A5 E446D177C56618F6D17511A7F6A343E00F62C9EF1EB0E6BFB5A282DBEA46C5DB 6029381EA124707443B5E152A806ECFC390F6348DB29AC6855D312FB08DE2E30 FCE5F18A36467A38ABB3B421B954285DF5248031DE0708F7123ED63E36D8085F 88BB3CF17888EB95E2D5D564A46FBFDE518E5E44FA14877BBEF3716F1523740B BB7D3D216D8E5DE5F7BFF55EAE18DBB3277AD640755AADCFDEBB3AC048874A94 433A1201A729938091B16E6CC52C52B27D1E82274848EEB667028A92089860DD 64881D8714CE74D51AC155227C1547D4698F1F856A69032FD95F31523B3D4D13 11FB3DA58D4C8B1D052CEAAAA844EC73F9D2E60025BC2717EFC5962BF63696B6 D09E2E24E4E3AE152D15C0C8A080DC00D4BB60A3CC26DDEFD57F578B5AA8131B 853ADCBBE6FDAD6A8F6F7F1FED88317A98534875C7A7C97DF020D938D1712433 01EF4D3C9D9D840A3CD02EFBAC10F00469D56BC1AAC506E694EEEA99EC5E49F7 FE2E26BE95127764D6B6263E86B5FFF4325B73018E00C9C3531FD38BE368A476 BE362270E1109EC79B5434BCC545504D41FC116F4DB89751CD8E1B0F22126951 300890D49A5C6EB5C84F2C51FFD3CFFEC8509BDF002006BCFA0A08F40C618D70 5E42A43AA625028AAB0C05E81644ACED42AFAFCCC0E0C5D754BB7E533B8C2F1D CB4F9DCC4F7D301C22A9434EC019A5A978A40D378A22818322CF31B06D624223 D4759BD350CA8ABCDBBC3BDF0F1FD6A8E5C177EDBA5829BD86813BFE5856EDBE BFE78214A512B97FA15881CB1012E5B86CB07A7A273F81E61B6179EA567C906D 2CC03D229F7BB1C4F5E7C803590413F7F8DE8502718926F66EC8B4252B5C8FE6 2B4AE5C14794AFFDBCE4400719BC1CDD050FE8EA5B5EE4AA06406620E5DCE9C7 D4EE82CDBBD6B829D5B8BA4C787EC7045A7E05F600955BE9356F01E1511E343B 9346B263B49482543258A3139150D644C8B975008A62FC5CEEAB90904AB820C5 8B5B5437EE0EC8DD71B6768BC72627E05F82AD6A395A935BD1CCFA34B1C0ED18 941C0164D4C3BAECA3295449BBE65900D185519624C8DCE48191815BFB927B6E C50CCB5A4F35777D1BB3D8F01B4ACF1CCB4F5916CA1BB7BF001E414BB60E1A15 C2664F93D8628EF59913D3B2D5528FC4F23505A451ADC1E02A788049CF44D225 6CE3824A0229EFF3B9B2AFF95BE8D6A9D6A9B4D05E1A0C425A49E54926A46D1C BDC325AF7CE0B1490BEB2D23B8696E1E7991B7E1CF5E84F77F9C4D3853519827 2169CF1715E0C1CA3A4E4463062B14B9638E38CD6A2C5D7459F426870B8BD0A0 7F5F6250078C1304A556FF24C9C4D01933597B97A6A295101DB0D57449E62BF8 6B3CE19C1045D3351FFA33EF48555DD5C7617AE0A125B5486EA1A472115C99AC 37988DD49DA7A7729779154C9F93776216AD00537F72820420B0270EAACED438 DC514D6A9FF7C9113540202E2AF3AD2426C05A93ACB0BD8EB3B3031C9FCEBC76 4675F5AC45D20A80FF47FA956B5965F8BD2BEEF69B9550B66CD0EE5D3DF3F924 11D1B8142F0D4F4267E13065C64DCF9D49E8126FD714F98F1EF13BFCDAC0A61A 3E38EDF2389A3977C637F070E0234391AF530B6A2F86B029243D2033DA7930AE 20ACD871EDA7C0DF784344291DE5FE19AAB9ADEBA9C44725D7DCC251143ED469 CD58472BD2709798DFB18667FCB6FDF85E380722DBF296DBEAD94123F8591A57 63F043111A6AAA22289C584736584A97B3B13EDD492893B68A7FF59D370ACFF1 A7CEA4FDF248AA152447892D1CB8618B27FE1D8BB62857A7CDAAB029CA014085 C894C6DB3B426D5DEEC4F925F158B23DA69E2197827454D8152521DB43253899 7F98C2DC5D077AD6EEEF23E45694952B34B5EE2F1B344A2C0EF7526540BA57AE FA340F67FBD39A371A3EFDE6826DDC2B362B2B75C47DB61F9127239D0276FB76 52E026477E161C4269D0B1522E7B287F0FEA328AE93946AB92F48FB4F8A298C9 FE97974176F463E9F84D4396F15C0A6346BB2994623C015B6D44A63F918EDBF8 4BECF7185A599AE11989D4533FC81F550F481C71127AB9FA1D7E759A2921AFB8 2DE068616A97D747AC375F64F5A050AB4CDCCA07F5109B65B0118919725101D7 6EAFEC140B144DB745A24A8A43DB5C97D1219EC9B5C5ED39A37A7E2B040838E5 8129B0CAC25F4C5F7475E1A38785D483294C1195AA4137566BA3F0843EC2E832 7EAE710A369C404DA748C57283603BC6916FE3077516CBD1803E78FCBFA0E6A7 111C02242EB154EC2D58942FB0376A494444682C6615907CCC521F94297CD468 EAD7BDED52401701EE9FB6198626A1B2E4EF7896C2350BC80DFE5A9650BD96E4 AB81DDE50C3E2BD23422DDB936706551029F06E403F47D5C8D16A0D7D4A902C0 A0FA8C04CF254326F5BDF7509DB78B03BD91333EBD1012A062ED0A61D043276D AF83C1E0CDF23CBF77E5F478523FA87687A001B2EEAF4D3DED0E09BD3E510FF8 43782C9283FCB3FBF1A93DA3E7130C93D35A1EEFDB85C136D6CC2A3F06B682D8 41780381BD1224FA8CC72152E24E63607D9FC70EEBE1189E61798999E8439B54 DE1E57E1B76CC863AA17123BBEFCC4B6A991FBCC890921B4DCC39C412399EF26 6B1695E30114DF63C07CF333AE99A5EB2D81EBFEEFD0DF7DC0E2B9884A1C60A2 52A2FA420BDC6050E976B60A34DABC5D192285D293C83FD411CBFC1EF8F10354 23BA67C7E0DC286AE224102BC6E6D9742C13F0893EF5C887F93A4D0DF3D96432 A57413D6BCBDA7B19E04C50CB2D37A45A250C70D5FC213594D1B616B67B0C69C 518AF42387230944EBD16B36D7A21F52FBAC6104BCE7C2C88FEF155E28E7FA85 006CDECB76406E51F3EC50A9C5C04C7A9222E930652D50C26326BB0155A6BB75 3BD7D445623895A94AE1A6DDA1AD1F7B6906072267DB6872500EF3BE88996A23 E3359765C00659CAE1BE9FE17D3B45BE663BE73D1C95B5237896528C3047D413 24C70F8680D659F27E5155315656070A8BC0718472B2CE62A7716DF6B04DC67A A5694702731DCC948357E25E080D910D9FF698312B6C43F600F1911D53BF8E32 2E00204DBE46F997E7EC7711C3C17B45A044431702ECB8508D976979CC98EAF1 9A2893B59EC97A0E9C9A0A202287507EE839E3C52930EAD47A706BFE17200B8D 20B64BDDB579A3F2F6A3A6E07128C27BA866B7C9CB6C26618333F4BAB06CC61A FCD823DD3B70FC0C24CEDA30C9E07CD4E24F6CDD43D139E38546EDB04ABF48A1 4F0F474A3A4F79C4A1224AC4ABE74C15F7D437560FACCED0F68645F8AA1DDB9B 008D86B439CE08FCE0008465057C46678EF7F236294472034AD47DD60351BFC9 7E10B1BB0A65AA36C8FA2395BB3BCA4C399C5B50184E95950E8368A1063D42E3 00ED60DFA28D421E5B42CA03C4D459A0C446B752AF164869D727639D7E904C69 9C95421E0635884306A278A5FB535F14C045B19577CC7A6CCE918AEEFB110F72 565B6F476A3B3BED91DD86DC5CCE131CA19C66BD5197D3319FC2091B400BF738 7565094003EEBF93A294DE8C30C48CC36D384640BDB176A980ADC9999DF9D481 2C89C91B650BCC77EB66392530EAED3A42D6630D3B2CBC9AE8172F07675F7B9A 2FC4CD39A4B7C053E4836C6313581EE6A8A22A5E217320B402BE735B08943CC9 59CADBDB5B1645CF62279E9EB975AD20FF621827FBDA115B48448A36C82EBE9F C100DFA4B05D2665F8F13DB22AB36B9306894BDE4899405F42AFDEB8DBB1644E 0FCD20C646EB887F1868A8A9B78A1E7B7241B4EF6A8ACE40EC03C208D7D9B19D F795EFC3FAEA0CA268CA4BBBB1316656A7F6A00E4E17113CC6AA977036044CAA B1440B6C8D448AAC1621200D4F1B770154DBC5215EAB4EB75ADD6D63617DE5FF 5608F7B64CB122A7BBA62EE36C0E310C3023EF3C769862D9F399BBF92D3C3523 CBC57E54E8D4319545B37442FA8DDD1B025DE54D9CF5B4929EB6066450AE3E86 AC89D7983592F6E6ADE8F1CCD4295DF459B6C7AB1FBF8A653519BA5E4227BF9A 96577D24D7459FEE6F2073B2FDDB1FCB5ED308200DC5411E3C0EEBAFBE6DDE32 952ED238392C6209A3CBD65187FBD3527A44C4A9FD3866B654537924B66E4DFF 4F04A6F2E2852692553DD69D7BF4962DB501B806FC08CE52B6BD6FAC6BBC75A9 666DBB44B12E20C2340F4D4BE3C2F5C80A61864CBA8B7F5AF871FEFD896EAD8D 55C9DA729D06D0EEFC293BD50E88DAEE36AC9C8E92985829EC1B11193569010A 630B20988BC5C263AB0D1F874B9B13248231BF164F2FA78AD4E6DC724C16DC1E AEEFFBBEFE731AA478A6990A0CF4DFCAEEAE984511396FE3FDAA3CD7D5793C7B 06746EF588DF2DABBA4204EC0312735923B7A853B412E53FE38DEE28C3710611 46EADD8D3795ED768C48BDEF95B3027E1F04E16422C8F95623742DA45DE0D9D9 9F92598BCCA9FB8AB7ECA452D16FD6980A0EAE534A7647FD7F3FCAE82637FB48 92D9EBA06CC8BED180F5B3B818450F58BBC4DD260A0B4AB17E9843DC3BE8C1E3 32523322E68A907C4226D3DC99D536C948A039C3F857623527D5A3707DCD6957 5C2DD266AB5A0BF5053069CE43D01A1703DA521034D46D4258952E8D39B5542D 4E04F895255B1A684A8F53FEE90945E602092D36895D589B6E88BAE441231F75 21B8132A1F1673A95C841CA202E3490FB00D3F145C001F1B4E283F79B0A62895 29818F6801E3F3D8F8776014BF9630288A984F8A2F79999D6409065736F0BCB2 0B515DE045A1FFB54B2557B75562B4A9C72B141463D390665551311008D8CD0A C7B9C2D42AE8A2EA40514953E518839C7EA2A51E1DF253D9942C64E049310748 64D40F98F84ED0F16E43625DD422BDEB1DA47E48D5C74F4794BD4A7DB6BFC040 6D6326349CE704F0E91C1C0F9C1BBD0EC2BC50A412F2D054E234011CEF11331F 525E05D4090C80621F52EC5DD428138A17EF4A2824ABC6755B9A10AD239ACD59 E6776B6AE9D830FCB408730D9BCD9195A8EF3BAA887598BF169132C70B5D3CE1 343E7D45A8888ADD14E6D0905BFDFD4AEACF17B93950212C764312DFB69F9CB5 6E4F23AEAF5E02EEF87086BEF127ED93E779DE216AE73489F5866E826A88E532 4611B04BDD6E3C3C276A4635121406F5516D62EE17AF6E15BD57DD7C8316ECA4 51549E62982D6E12E59585B493C6CA1DFFD500879C3BA7EF295714F15CA1E1A8 072CC471856856B3E3CB8F521831D41B644273460227A8B70590B9AC314B9C9A 65E627CFCC46E99303CD79FF1835E6A840DF13D76F9A0B71F9BE11061519ED85 BF5DF978464FD80DE51FCC328F0686DDF05B9D86587649289321EEFEB8AF74C1 345E276797123D7455932B6D3F46C7A606B903CF75A537534168783D2D9A334B 056BD75C590D9543552CA8612FDD102B31542E81E81F0422EF4062D1940C5419 B55123E2E54E5BF566D69523065A41DE92FE06CFFCE6A0F0E54EDEBBE1339033 96FDD46C1B59990DFAA56B1BECCE73937859092E811253A05E66248103D1CE64 BEC99B21F3AAC2F6F0CF14C06C3B0C6829C804040CED09D4CE0790493311762C AA9936FFB9F7DF9FEDFC9E816E4D53E98030C422CCD885F813C6940667D7FD8E DE2D685C435A67226FDD3ECF15B0007FC46C75E913A91488B843C568A055AF1B 425EDA80F0AC3BF025DDF193594CCC8E62F942A6F0042BB5FD823768EE3B2060 A3BB775085FBC7807B274107FAC9DB957731B11EB6015CFC1AB9027B60E28D0D B46FF0B8F8F307403488C8AD37F0BBB510C2B0B073591992A5DC2372426282EE 28750022F56A30D45F274021CB2823B686A15E765F5CBA165DA8B4E2DCB49437 B2BF3B5380C22246359A02BE2C3EAFE3CA777287DE5C83FBC4CE9DA5CA17F345 8353AE48B86BF4010E6BBFC0B3B1103F56580F01C7CF03ABEFDCBCFA563BF0C7 43F4E6683D81BDAAD6346F9EAA415101D8DDB79B802F576AA575BE0DD5FE62E1 A3D8AF9168D209FC13BE69EBE4CF61B824F9C80455B467C6E95D187C97B39022 961DAE47C56177BD2E76513A795097E0B31B1AC983A80A19238D34733E1434D3 BBF63C7C7E73C8AB5ED730260846EB84DE770EF22B0809EC96D3A99AA866C9E8 5997C29372CFA03C1ED63927B21248F10B0F2D1ECCB2D1B1310C55A8CCE0A077 32E04127A86BDF6B1569C4D80A516E0410A65E1F0F21BC957C0E8C21328C79FF B9F91117FAD7CA85AFF138583B7202609C94DD6B7816CB8C8C43D04F1DC806D6 2D49AB01B88BD1B8C943EBAC5972A43354C2D61D14E8CE87CFB09371299CCB10 64F6930D74A5DE7D818D365EF6AA3DF81FCF6D7E988FA1401C7E329B3589CBC8 8B0540CEDBEB7096BC55B8F560E8AC0F391FA54AB85085B68B27E0D83199107A 46F71593AAD53011D7909AE2E452A0A59B56AB4D0FCCD9B750E7D620EA5C4367 754800FC5B163B78763833A1EB138B3E49695F3970DAEBFCF8A5FD29896F476E 17947DA59249ECC9A5C02F760D32DB541F7011D9BD8DBE2B758977F465D5EA92 12E5C307DBD0AFA887D5EE4501369F2F6B8F02C25F59F4D63F9BB093D86300D5 B5EA2293EFE31D504B82FE8237327DE9321F2B057C2F72E5BA89A6E29B2A8C1D E24E6FE8A599683B60314901B55824AE29CAEA2A6762B1C8CC43A457C0629101 E0BB830C2197D744BA4310D0B6E911A45E7B475D8C2C7A5A8334D6195931C267 018B771EB6D9AA645E8E9EB04B1DCB6ABEF9A7D83DEE4D38F1A3EFE21B78063E 2D2560561A28548B3140DA05BED788EB77AD085B16034223DD1DC9709DCFE8D8 E14D635C9791D52CFE7071DC82FCF5E2B5B8D1A5A4F030628EADFBF8733C80BF 9E10A6144A5713487D45169D2AD2393AEDD246BEB394BE0618729C2A6EA22DC4 81EA3796BE9955683C57DA2E47DD8436699319305ADE39A62E8B2900CB3E9EB7 8C5EE35F9AC852193574C57D724BF12B4ACD80290EBDCE30F6A54DA883009259 B3C769DC0782C33281814A0D46A88BC00712078242DDB7D4569331C8148E5157 D62EE1BF42EC0F90C1F6A34D91AFF11595B2F33ECD57D283F6C34CC0DC7612B9 A65E8C33AC8F2EE051ED07CF8DB6C1F6A077D143EFFE8F4D2359068F066DC509 9B0FDE75BEFD64392A404011FA785CD6BCD90EA54AC660BD17D2C8FC7D0CE5CF 4D87D96ED9291F2238E62E3550E3B51F007C0EBA6E2F1E2955A674A737C58138 DC5820C73C59A00E0D3010D01317F3821CAFE1B41E678EB77AC0948919A9810C 3B7D094F26142553ABCB1D6DE840606577CF082D30A9E1CB7AA3ADE3BB1DE349 9D277F63BE2136132CB78265896E641A138CFC15AEDDF2B5A8531C2C470EEED6 70829387D23B1038DF2D10535BBE0AE9FD4D8CB5D9986AE4E52130A7CA97FC08 ED9427F0C9925A84AB83B6724AB7447D8490AA57F82615B3652686E3B1F23883 716C0AF10BDFB9107A18DC7827E854ED8A1480E987C9D10E39F9F2039FC0A067 EF11F0375E1798AFB9549837941CFEB3DB2E17394678AF83892736E12100BB63 AB81856880F075733C685D999063F81BBF4199495B5175628B625650E10321B5 9E0527917233F04C1CE4AA687EA11E946708E86E9DABE14855CF21BA129389FB F160F2BF70C59AB41C3864DCB0242104AFE412E3B5A1C882A020A3ADF3A064B3 D89ECEE0B4588496305AD6B758F544BB0AEF9D9D0A2B3B69C7D91ABFF4083542 12A6E4F1B42182F0004EF241A8486EC072FC04E85E6FC5B8A7410E9A2B69999C B22416583CAD33096C892604914B0FA077398D458A8C1233DEBCD78B0CD7A5FD 2A5DC9D1314BAFD2C1E589419354547BBF6327B1CB8F24FE593F8993D3A3E3CA BDAD7B29631FCEC99A072477646511ECD58983C465319FDF1DF48C3C8D132582 51958A804600CA6068B54BFB9AF0B8F53D31AE39A066B7E28042FA5BC8055581 B522C20873A3648DD34A9A6C775FCD12B7C13AA07B70040C8A031760127E64C0 A0856A5B00ABD8D9B68DB8EE50FFDE5D3D4907DD330F7F6AE85F20D288AD4125 380D552C412FD8F5C6ADAA6EF2A3D924DEBB043C70673158D8C65365DDE09CE8 0DA756924CF0B55A4DD7932F1734E30865A2595DE4E00519E73909B9B92D4493 BE450930D0987BE710E13503D0EA0F7A0631BF09C3A4B59A7275D74081ADCAED 70AE6F3AF53C04A879DAAAFFC7E3C19ADCE0983258FBE0527725333DE45A6BC0 46C3AE80FF0B037F5C12ADE3FDA5867F584DCA5CDFD4B8487A3DC97CDD0858DC 4C3E0B6F18B2ADA37FB14F971EEF4BDF66198E09723407F5D56B6D329ACCDDAF 29B168AF0523475CFBB6038D9C1D4DB26B3DD7B4E88636BF9AAED057D0234D64 0BB214378A6D063AC5E38A368FE1498BD7BD2F0EFE19EFC5E7D9ABF7C9024790 2BC470165602FE960754B526755CAB96D6D79F6F631C99FF2F168EB7179E2AA6 C828B55D0D776DBF087E6204BCFC806BE8501758F72331FB48A2D15473CEEF7B 89F0E07A604336AC3C04257F7D5D6D64DED5423EE776100BFBD88C0DD8835809 BC08109FBF0485B730B9D3A408F21B15E0811F36C96A6EECF51FB1863CE98676 FEC14D2D78C2A55E85DE49BC0F35B8B55F350552F01AADCD34618468B83C41BE 4A94F981C2987DB59E06B0073BE5004A9ED15F8D411224E14A105471B99BA027 F802E30B72F524EDE3C5D4579E6B4BA65AE6ECC9D485A82F9CB2A6FE5D17C834 F0CEFB28E1F227CF0A9986DD400B38546BA46EE3AAD43F8BE63B65E445E6973E 604BA5BB79DFA4BEF73C0B7523F2D2A4A084CC3DB8FC067E9920D23E77BEF1A4 C5F79686C15426E495ABED120BF9802EBE442AED181AA5AAF3C0523BB5D3FEE7 0B9899A39B2B559A76B0ED160AAA98A167C9C72325245BBDDCBE8B0A33F2E8A9 0EB313A36BAC2A223C801E2D4073852E4EBC94C91B319B59F322D271137D3F2C E8DA5C66D28E3BDADC0546FE5239CD1763C233181B302AF373AE20B024AC4C16 D89A6283F10E1836A48AF39F264C8FE14ED329C4F1574070AA8F388B94974FFF 4B776374920D792FFA882A828560D53AC318ED15B0DFDEE1CC32DAE6E38CFC74 B69A1234F99B40B48AF2F52AAD962415315100E9D820883F02B9EDC0B29EF123 4F0E31A0A9D40746997D1C710FE1D0A35096B6181136E5352D23DECE2BA5CCC9 6C7DFE1175E20F7BC59F6E343E4A2F8030A8C8E0C4538C555D30907433370CF0 C12B08C1424ABC5DD3FF2C113F804D7294790943FC4A4BDDF7543ED78238619E 68B4E33753F698D2CC0E18FBD4209D3CBA57866EC422113E072D06842F3DA398 EEC0E8B636AB88497803DA7F111D68CEFA8921BBB541EEF2B6242A1D7C989C85 FC9EAD28A5F01EB9CFC9C14CEA4D5BB21CE33E46EE0C4F71EA624C92DE113A45 B4EFD8D4487A360FE55106EC0341BEEA78B8ED5133617EEA63A79314965418C0 C10D3057467BF83F6DF0CF2154478FFEDFDD2C96AA2D43501E98E63E01606569 ECA6386C3C90A3E86F079A2EEE882D3A7C85AD3F35628EDAB6B30CA0790B24CF 2A13CEADFD1137E4693323CE5EC95C57E441B333DA96A798A39AC22BB1985D59 3485C4700AB90A90E87CEF9ECD3A5DAC35029BFBFB816D311CA8B5911E0A3FCB 7B4033F0BD504A3092F3DF3EF06786C61280A1495D7D6EEC45D613192F94C043 21E23D437A635F9C22791BC8B3A2C2DAECEDC206C8EF3CE3336F27C1453351A6 34277B5878599660A6BEC036FF603D4596391C9D13F722E0B1BE4CCC669F50E7 8EFF9F33B81C7E9C886D42874EB6BAABE7DEECB973EBDE83BDC273AA1AD5AA5D F0700DEFD901F5E193A75EF525C4BCEBEB21D6D7B01194549890957DED03A2C2 57A8AC234E9E96190F51F936526D099C08E7260DEA82D58A6A6B9C45CA152C60 833E7AD12CF1F5D864BAC62B61AC530BD0540F9DA2EE4CCEA22620144EDC56E2 015F3A1B 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMTT10 %!PS-AdobeFont-1.1: CMTT10 1.00B %%CreationDate: 1992 Apr 26 10:42:42 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.00B) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMTT10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch true def end readonly def /FontName /CMTT10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 34 /quotedbl put dup 35 /numbersign put dup 38 /ampersand put dup 39 /quoteright put dup 40 /parenleft put dup 41 /parenright put dup 42 /asterisk put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 47 /slash put dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put dup 58 /colon put dup 61 /equal put dup 63 /question put dup 64 /at put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 79 /O put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 87 /W put dup 88 /X put dup 90 /Z put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 113 /q put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put dup 124 /bar put dup 126 /asciitilde put readonly def /FontBBox{-4 -235 731 800}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5F00F963068B8232429ED8B7CF6A3D879A2D19 38DD5C4467F9DD8C5D1A2000B3A6BF2F25629BAEC199AE8BD4BA6ED9BBF7DABF D0E153BAB1C17900D4FCE209622ACD19E7C74C2807D0397357ED07AB460D5204 EB3A45B7AC4D106B7303AD8348853032A745F417943F9B4FED652B835AA49727 A8B4117AFF1D4BCE831EB510B6851796D0BE6982B76620CB3CE0C22CACDD4593 F244C14EEC0E5A7C4AC42392F81C01BC4257FE12AF33F4BFEA9108FF11CF9714 4DD6EC70A2C4C1E4F328A1EB25E43525FB1E16C07E28CC359DF61F426B7D41EA 6A0C84DD63275395A503AAE908E1C82D389FD12A21E86999799E7F24A994472E A10EAE77096709BE0D11AAD24A30D96E15A51D720AFB3B10D2E0AC8DC1A1204B E8725E00D7E3A96F9978BC19377034D93D080C4391E579C34FF9FC2379CB119F 1E5BBEA91AE20F343C6420BE1E2BD0636B04FCCC0BEE0DC2D56D66F06DB22438 452822CBEAF03EE9EAA8398F276EC0D92A7FB978C17805DB2F4A7DFBA56FD6AF 8670EB364F01DE8FCAFBAF657D68C3A03112915736CEABAA8BA5C0AC25288369 5D49BD891FABEFE8699A0AE3ED85B48ACB22229E15623399C93DE7D935734ADA DA7A1462C111D44AD53EA35B57E5D0B5FC0B481820E43222DB8EFCD5D30E15F9 BA304FA879392EE0BCC0E1A61E74B3A1FC3A3D170218D7244580C7AA0DC65D19 741FA5FE6F8CBF60250ACC27454BBF0897CA4B909C83A56672958752ED4B5E79 E18660764F155E86F09EFA9F7685F2F5027EC85A775287B30E2069DE4E4D5712 E7D033481A53A2702BA7542C71062173039030CF28D8B9C63B5596A9B42B33E7 D922944A38713383D3648A4AF160A3B0C8F3379BA4372BE2E7EA49AABA75AEEE C5DDE1D8BF68483C3D21271280ABB91D54CC819680322EAB72E1250A760BC8DC FF798F2ABFC4F3539392985C4CB324B00072295FC160818BB0355FDC4F12E39B 984826450553E3D271F03D8DC2D12A92A4D32034FD16DA13B876D88C8C097384 46D8D7E41CA1A8979F9B07EC3337E70CBBE3A377235B04C79BBBDB66CE1C1A41 89DAB7CE91F2FC0CAF6DDAD09992D56F72299068192610EE3DE5DB7CF6366B4C D74F414484DCCDBA449BFAADA39D0F27574E604E31CB513B18E3821A33076151 C2BCB6E957C77A0AECA48C587ABB5E8C7624D56B32F80BBCFDC874AAD6EA5119 C9B06886F08CC7DE5400E0F52B07483FD4BAF26C1556CA27B259FF3DDF71131F DFC05D8B14C28F2073C460B5011B76D84F7917E919E50FEF563B5DEBC5CE6923 ADB72392C98D03CD978D3FC207A52B91E267E7ED8BB4531E8BBAC113DA68765E E23FA502BC71CFB91E4FDCA39BDAEB7FEEC3588B1108CE4A1652B770375724A6 508376586216289093485CDDBBE68956210B6FFF3953D097D66BA31D19CEF2A4 35A33AE97547B81426E58F9FFECAB633C6433E86C32130665210F44F10F3A2F4 EA31540D0BC08EA4DA2DDE3E8CAEBE52A3E8B037632B235D4ECE3CB797A5A939 12C45C282783F675060040FFE2676A7ED903798EE3B86644EF30D3B461D4EC3A A1D2E95C02FF1531D93180F66A13E868C9E1FF1722FEF6C4F304921961D4A10A 6AE943157B1B0E8871BEA71162E5246080618A96D5B23FFA8F420F2AC74BFB60 BFA3BAC4AC3A320887D4090FA3EF7071D2E1DD5D70DB98A01B6D315271D10F2B 3D9256D96FFE8D8BA0F4781B74490C63686397241640B08A08FBE7CC9B1FD0A8 21CECF0F994CC97AB18411EC8745F5A6AF56010C22E73CFFCB45B82DB68E6552 2E57A4C06B96C55031442EE1F53373C50E14657ED320D9EB3820144C7EADD2B7 564578EE778AB577C5BAA6CB7F9884D91F1EB53F032AE4F0A8F47A7636AD0573 00083304E10F77C0B5C7C390F436CB4C0E68CEEE4B1DECCA113BDF28F21B61C5 432899378C52824F854212F8B53B75ACBAA50F74868CEF45E8807CF574DF2B71 D37AF61581497D87076740A67F6023199F3ABFD651B2944306176F7AB6659154 7AED74DE897275A2033C35108B1F9153B113B15926004A87B2E9415DC4E3FF43 37E1690D9608655858EF65FC29E1909B2FB2EC1D611A14B3227111E903F1534F B37C2EB3064720BB08497C43D8C0D163A9C07E6B8574D344B27920DF3978B879 308CED51A761149CA2ABCBCD1503985786DBFEAFF4EF1AF192A50441359290FA 8741BD4E2536E91D7E1971394982EDD6C9E170D09105B0A41BBD869AA7FDE2E9 A06D02BC72A3409F1CD66B21B52D234FAC72D9009F61F16ED04682A87E2A7C1F C9063DAD66B0C47C220D4E2C4255994BD8590F0A9B462F009704A3C86D85DAF2 645485ECB452BD47F68274914AFF8A5FEAAC476AAFC75F08464730643E8F28D8 4D5BBDF1E56D39998755FD1BBCC94135A8F3412EA6B6C94B3D18B74E51C5A36C CA4831E9467D177E74AB213DC8EFE5B29E8FCA1EEAE8A4D5D21D50BCE3BDAE08 E1C57F839812325BD054FC65B155DE7EDEBBDC6FCB029C339B18EA157D09D9CC F9C5B44B947AFDE8BA8CE20CC7A2B8D46A8510A7779D80B15FA260DAC4AFB3AB 7FDE8781B64AA420E914C0AF4740A5F4D441338D4F2FE24563C79FB1B5FDA10B 0EF257DA2FF13CB87571CA89855424BA69B2998864B445CA79B3CC60AE0D99CF 9BC0CC8C312560D3AF86325B7A1DDB74C4185056474FB9B4437AA0D62090A864 5BB95C5348AC9F7E86D65596B51EAE9994C3C824874B8377F6D266B634DCE8EC F857336A6629D4CEBBA0CD0FC1D972DD5939364DEA16763D2B289DB24C20797C 26606CEC9674562C06F2159E5A7D8DE25624F01D198123928B95FBA02AA3C1D8 EDED3B6820975A8210456AD00419583338658A9F9930D1D5DF859127EEEC2163 D3D17229362781001341285F2AD5FE5B2021EFFBE11D3BDE2A59747A9ED75ADF 6CCA61D95CE0EE67C3E8182EE1FC4A4FE8A35F77D1D504EA09A7125CAD876ACA 5AB639476AD272487B86E0E20E88BBCDD48D0E77B37B8BD4A9B2086AAAD0CD0C 4839E1E126522A3F4329CDB267EA63756A38D0E24FE6546837266A1FC11BF542 7127CA2DAB3AAC2968658C60FD90FDF456B3EFA33DE77AD676B79746927767A5 021B15B2E434C3CB9B07E3938DEEC3E959B1F348A313E60E4BA3291AED5DA9DC AEEDB9EDD586AC704A197EC8BA3FB63E7B84D5E23DE51695FCE783F8C2EB941B BB40B846C12A4D5C965C3516D7F264A013C9184FDAEF144F7260A4354F8BB396 525A786496A8C55E3AFD4ABD80D994FA67D0EE146E9842221B0F3C114D4613A6 221180E57C80B5858728627FD394A2A3FA4E012966E97BD3D08FB7BF0C74DF11 A39F4F2E946DD667A68002EC297E424D963174BE43CA301B5DB1A2F4B7DF7470 44E4396137951563FCD59A9EFA6AF8EB446FE8EA707DD86FA8BA0A78434EF884 CEF5B614DA335FBE9DC54648614A8ED7BADD1BBA9049BEF948432B407B9650C1 843D083AFD085802C4105FB6474D0196026A477DBB1E7D584492199977B66E41 24E6714BE0ED5154E3A5FBEEF3623A2B37AD27DC863927F5DBAEDC19AEB7FDA6 ACE7B9007006B8FDDB6074731C0F378E4BDAB6554A243A6433C25165E5ACE1BA AAEE10F9905A6F653C7FA1805DB7900D852320A9E2CDFEA1428F1B2CBB134B1C 741CBFF2CBEA5411BAD850EC38AC118F0AD1568F09912BF192455B49692317E0 6AC44BE450BF34A8B92AE8089D329FD0D8522ED1FFB4127E327604BC3A74B92B 92BDF6B3A2B768CBE73BA257610FFFC3CBBDDC3305DB15C5F441E984841FD3C8 2EE7CF78980361E287753C10C9D8BE09152D51D684AC7934B84BF3F54A0DBCA5 D182A0A0BE44C566FD84D5CF9606E25BB66E565A0BB044AB724A7F8A4CFC0A97 0E1AA7EC8B71EE845AC9A85DA8F8C7573B03BA90FF8E670BE8E45D362705794E B3139E26D939B5C2843351AA99C2139CC336CA12AC770984240A855EA2B6077D 2A285D524C2E7ED1BB2224E7E5CA064B9D46E163DA5CB9E2695C1CF39AE00249 C1A885BAD2262CD530FF1235101D1B734AE28668432C218556F8D708269E7781 49CB4A91F25533F90B082F7B029C14EC80EF9BDA8596054E5F0CF135E483F63B AA8C701F3813BA73C2EE6A090CA3D0D544A2ACA6D4DDA8A3DF98900945ED994D C30DE735AC86496439BFF9F55C3C29D0EB5887391D9E6536587860BA53426A58 4504D571CD0423DF29D8ABFD2634272682F24B6F9D1137E2A3712505F32CAEE1 B0B1C2C3E5AE2F21FB94A7E7322CC1D232D8BD8797AA8F95F122EC7DC4479B45 423D85E790FF2FF25015B94FB3D16876998A425C7975BC6F7CB79FB1EFED64E5 B9EF9CA704C9D349D7A5BC25FE8BB6455499349E51C09394B8BDEFF511792D02 A97200DC096F62DD6EFC594F6CC10CE1C0997B9736673863EEA5027984A83E1F E028A9DBAC54EE9A9D14CDE884D87D7A1131E4EAAB999FCEDBF63DF533B8C4B1 CD5BF5FD913F915D92CB13921F2BAA27E0520532EFAEE6CADC5ADDFA9639630E D840968C606BFD9FD465BC379F551D4D27845099A1431A8DBF88551F0A9A459F 11824C044D8050BF0872D767A158D2172A78126878FC52AE0B916FD7FC7783BD 4D56FE17E5CA045DF84E908A5BE8CBCE1F5EF40FC20BE8A5159BFF08B5BA5F64 C53A14CAC4E43D5020EF632608BDD4DE3FCA54CFDC1A6BD6BEE999EBF8F6A0F5 89AB9ACA92763B4AF8CA49A93800A706531BA591FCA03DE3C0C514AB15CB18CD 09ADA954FD08B538492DFB03C2C640DCBB64A4D4B36D151EF86E67547F75DE87 8689592D83DFFF32EA4B786620758E98921F1CE4D68D3A1053A8722067FED386 9020B120CC1E6DD982014D259348FAEC050BC08B8CB891268B54B109E60BB866 18BDAD39CC4C50A39B29387C79353016F1F07882DD9520404102A9CE5EEA2C14 2B9E9E64014A41D2F5CAB91EAC0650D7455548F963A0A178F4CFE0B2DA5FD65A 866DBE06EB648BB8607E71880321BF5BE133E43831C5C20B14ADA034EF68AFE1 5011B743B96653489CE0ABF7EAF6E06BDA9D329A3812EAB2A8D66877015376A6 7AA7FFACC3DEC390F3AAA357EF552D0F30E25C4703D38D96F50887F3BB4EC89E 17B9EB3A3C8495E1C029F195FF36CE4FDEB622925DD42EDC91765052FA70B545 FA5CD8F301D54AD50E2003452101FBAFA5B46828BB8CE68DB59CDC3C7044E1BC 79252F644FC74079B5C8631CC310205764CD1D8831E2B8284F59FE7EF46F1D02 87D15501771F6D0E2E71B139F981FA53E5DF584573B94A2ED6C04A81B2FEA834 841C371A6B03C1BC2EB6D39421612A4F5B0608A66042DA09D68EF4ADD27062A7 BB09C290EFEF751AFF9B24E1783705B47C2D52E0999FCD4A7531F0D6583D8DE6 2A4AA43FE4F7C3BB86DDA92DB675721C5A19D9CE03080AB35F1E16E75CD1F012 6DFC8661C9BE0D7CF3DBDC0767936EBE664F308ECECE8D19A165595B305824E6 E439F6A627B751E7F57D2A72950096EFF267232752E0A915B81B24F3BE798684 980D8F99D3F4127D28F5664A048BB74B8B1037CC8BD30DEE498BD4B759163477 9D436A0B59212068C1FB9C2230232D1B967FDC5FCC7E42D13CE749257416E55A 8FAA84E6FB0C3253DCC4FA801118AF3ACFC58096BE0E01A0AE0E67BA70E27D3F 4EBA4F9229885AC8A925D9536CCF79B62FBA9D6DC857D5D2D8A6CEAC66572469 12A0AC1CAF2462E01B4DD1674FE992BB0AA26AFDE447EEB5635BBCB92AB07623 2ABAC3A763915417DBFCFDFECBA75A15A8686CE9F1F2CACA6BD3A9FB9A9F0915 A41FC64FCBA7CD364472CE8299E8BB3549389B40E66B97F24ED17F9D27A6F9AA 70405B58E78B9062CA2D42FCCE381807B07A20B8085254D2E3A6FBA4E0A02209 452A850DFB7FD9FB717E1327A9FDA54F7DC15539B97DA1D730217EBD8F943EF2 89432FA1DCDF94B6F81683C8661592FCC8CCA9BC7E126A6D97008FA0C58D7181 844E5321B03C1EC01623E666260D1DA98C9F4865720131F7D14B31704F162BBA 4B549B25D5A294933A072AC6AA6135B70F3E0A8CB64BD1FDB17543A6AC661DA8 6D99F9C6D06EDF4472EAE0FF694F9E8640F97B2F40F9621DF92656600B14948C D74C4428EA0BFAC972F785885864B76D651A7DAC0B5E0A4C631B483504CF0125 2766FE3B9AC7AA861BAD19DBBDF74F306696D1EFCD7A6B7C8AB9F7C2B34053A6 2D517E7D8B338441BE7E46413C1AED3C1CE4C960FAA0F64F243BFBDD277156B8 017DA4F55C9569E95210D8560FF5CF890C1CD71FCA1F60EB6B3AE882D98C5158 74DEC0294BFA4DEB9D41A84E266069C096BA3C17F6145CCD2894C081FCB0BBCF 0E009CA6DE642673D434738D805A69123AA289748E65AF60465106BE59F46314 C5328BFD79220D220C910405BE52AF6D2CE2B838F4BDD93D87F29D736F5DCC31 F2AF943C1AD452954E471D1392163EF8D8177A7FC7F12904E30E5AAEEACE294D BB51708183535E26F1810A77D2B70E87CC7D3CC5C117C2572188CBD55ED96FCA CCEEB0D9982AF0B38183EA92D0485D2B3D4791B92EE759C39C9B343086747CC3 CB28AE8157FDDB6166F859DF19C55A8B9BCD54C7022355FD955CA9FCB2EC957E A5508EEFE9EC461E981C3B2294EC4B839E1E2EFB9AB5A4C23293A8B7AA983594 752620AD96E08597EF3D41F42207D3683981A22D3C1B64376091B0EEDA7D3AFE E9858C9BDE56BA8A166BC2900C9A4AD4298F9EFFCF8552478AD33C371D89886C 412DC7D3214F3F3D3C71D277ACDAE756A469FF297A66306166D2D4F8F44FBF76 0E756B965BEB134C522F15929019894B6896B6EE33F2A38385E0551F45484A38 69997B61EA42D81441C94C2CEBF2C28A8BCA7D001079392741AE61B841F6CE9D 30531BB3156C6BC169567E0CCE348B7987F170CAC8937F76E3BE71ED5FF79B68 5FD3F97DD0E8C8BD869DBE5A804B7A3B550462296ED8A19308322C11D1AAB366 632FB28DCD7DBEC1A9461104381D0F6BBC7E8808D6FB8AB2743D32059FF3984C F5D0228E615AB16D34D364C224EBD23B192062F9F25242C04D720ABF965C7BB6 5B6FA05A5FDE41211B546C51F20F876119D4957D51F8E2D62351A8E5DB276429 0C72452FB257556823D2CCB96AD2191574929066DF307D8B8310A23EECBBB326 FBEBDFE38FFC05D344EC4F0BE06D9CDFE71C962C3DBF936424A6F3FB7F4CB3D0 D98897C15D196497E883845F10C8A75A1E02A4BFC6841D335695360E4D7F43F5 5C66083FB60257A6362E558AE3563CF1B7588D09C766B6F3FD54E103978B8250 A8C0C8205F77A530999D33CFE33299EA0AD0AA657C958A0BD660B198BD7EB40C 3C63BD201CF1F08563DFD689DCCB0EEAA07E1FD13596CA96F25617B0B13C84FF 667601BF5B1CDF5BC805595AF3B69A360D9787D43F99F21E5BEA78034288051B 845BA131CE149534AF64D14944C9F87DABD39D30DC704BBCAA28DCF32FA9812E B381A84F1CB4743B5F1C9104EF924D6DE1C22BC011307F6A15C26079BF38DB2F F4FF392CCEF632A583B2E7094457ABC12B6B3410CC652A5E9C68B1B865DFD140 F66293C0E99F2CD2245C099A2A53D090DAAE4CDF7A885B5ECD4742A036B8DF44 C3E157DE60EB173FF479AA32CAD55A8E73642251B92A9EA5E0B4A45264B7329E B98785773ED292CCCCB600B00C32BD6B733D653A3EE720163028A91482657860 61AA5CDCA3915C8503A692F544E0B36AE394AB4A6BF71826C75685559880FE86 7B1FC44F3BE897A30F0F1DB23A4F9E9C951559EF4B119338DAEEC22B8760CC59 A61BAF08D4492C5879D86D43E217EDD2DFE8748CD6C19630761376A32A8AC74E 65B3121F2004E92EB80218FB1AE097644CC97507C38571450A00215EC305070B ECB6FB792B75E48A53E31F8C567C28E0DDE2A5BE38DCB90892AD4E9FD71E2390 902C99885ED596EBE003D8BCBAE707B9CDDDF95AA1E1464FFD921EDF24876C1A 7FF36ADB9CE7F184E8731540B5BF25FC4E34BF825D0B86795BA5A021507260FB 3391788603E191DB90282535A873174CD8823EC313029A816D5B48898E4DC8E9 13CBF7912C5D381C9282D9EE9B07EBBEB5B2EA1B166E810019551F503727CC1A 1AECE76A7A0C3479E60BAB187E9C3BF9B0F0E12D2AFBAB68FB0CFEF7B9DCCA33 DBDC30368BD46A43D6AC2C5755D1EAEF0DB7CF4EE87F354B4A3981069A398641 E974B66E79B5D77107AF68BEEC4D4A5F1A75CB0CE814825D9D26F3F20C9DCAB9 233259948A7B7F302014913813EBA57EA34953F1AE0072FEAFAE55B5FEBA93E6 D25970E32BAA3710D22E6158099099F13A723741E390ABFAC942304AA2DD83D7 0FB6435C748D1A1AB87F183159CC4342B9E3558306AEAF5C1164F8DAA1C1ECAC 06D96CCC2EFA9899F97608E1983734CB79F38C6B577B54E4CDBE5315F8D774DA 805C67930963E834D85900F51D07CA67FAA57F6D0EA7C8B64873DD427EDA7FE1 A120F98495A3DFF6454867F2172A82D8BD1FE7D46B0686D1D624E54F3826B567 FC67CD107234E0A9F6E12287FC915FC6792633E1C6B0E4F39B4E3DE796C2AA00 AD25511FACDAB39BAC34579AF4E87CD614698DAEB52F9C47F2C7426D9D5B1A04 269D3D6D380BBA5AECB04D318B6AAA2104F76B670457EC04E9F3FC16D4EC9A19 18172AA7D14910E70889B81006860DD6B9B3B336716ACCFB09306DBF1C2FFC6C 17DDD204E35C73D906192CC4EF969D459C3A8AFC6C44515EEBF5EA04212FEC62 ECC3E9C371AFDFD3A5F3A311F296CEE6EA2D4E81AE88EE69D923E97866BFE134 886519A2976D0957EB52BB3096FAC0F746E109203CE20818FFB1B6085C120917 09706BD9760276E7AA64C9A5787E8772C80D38C48E784CB9C722F16B89D27965 5AA592726E9C8AD7F5F57A54DEC706A53D79978B4518C27EEE1266A1C1F7626A A486119E2C9E7D3A56F5245C055924534CEC7A96797F700FDF9902B3E70B4781 71506CE14C0FA292B81201FBEC7FAE0D3048B622F91A65EC0E5ACB502F6A0268 91039348E826EDE569E12AFC9BF9C382182F31ABE21B81DB13596133C826E50A CF24CFAFE3ABA38D6D4243BC868C9A3C2755A91E7E76DD05478B0288E332D658 6961A8356DE1CA50599211C0B9277DBCCBD7EFA7C9D1FBE378A551B83FD5F79C D52D2E8F9746135CF15B5A7186C568B73D3B1102F272840FDCDA6BCF9F9C2F39 3A9384377ED3D1ECE1B9C24F8F9607FA08A6ECA7D705F1D00C0E45B2B10AAB48 458B1FF4758556B05159CFC3B854B480B0F20793F5F6983B0DDCEB69F15F6672 B8134DABA13EE161F2079F607DE8E04E6F2DB408323475F802C12BFC0B8E56EB 9840FFB6EE2B5C9F4DBC95A4D71B565A2BE0C051EC096B957E6D0E713BE1190C 431C5D38897AE56CB6C4141E7AB1489C7D8C83B35A6CBEC004EAB1ECDDB1556F 4EDFE82D3301D7DF21890FAB56E3A96B59B7F65740D9C22953B72C8333EDF0D3 B9A6CC20241A6A5D481BBC200F90081F8CB7C67712B5FFC8FBB47FF3F97E813D D002F3FC682F955320BE0E120AE57855800A6942DA605E3D2F2E59FB8A5DC11A 45F43A44443E8D0B97E89193BBDC8498BE95C3608B7FCE4C43B6BE7336CA4719 C416372E8A829A27B24CA1AA7602AFDFC69CCADF712926B3966B0C79DC67F538 8A8C12EBCAE65F36FAB8B7684DB028E8FF0559202175E4BE01F266E68B977A59 D12963E47B1DB2B7E54D33E8C86323307230139674E79F512FA14A31F9259CD3 899032174316EE5E370E8699164D98DB4E726B3655E96F567F180E661F39B9CD 653884E90D29DE14F90471E9DC330F9EFA02A0E6A7296918BE5BB333EBADDEE2 A74DFF3145F5BE203128E803418EE9C1DF89DBA4A92EBDF94B401FDC423E2D7F 5ED88BEC636E17C5C115B0FD061FE69DE97E4A0B2D7FCF2F6EC5B3B9A6E9A928 F17BA6BA9AD2618560073726E2BEEA8031EBA4FE0156AEDF8A8F565C6FC259C4 3A118F81B0AD2B787DCE58F40DF5C5941A13D116E3F726CDAA715A16D885A9FE 4EE2E195CBDAAD731BC924D89CFB273A7385944642444FCFAB0A351CB0836E74 2BE2C36DA5488897B2C31771899143B8780DF5B351F2C4653287B00D4940517A FD2A5A7F4937AE7E07EB29C02B276C662BBC798D7EDD3BD67291A2C1153A0E02 50B1604DD6B2B0A4C9E09CF41F8176A32A448DB0573EB4F07B6CDE555FF98526 BBBE22E9ACB3C578584344C4D76277077C8DE319538FC9509C8D3E1FC175619A 0801B00C9997A8B315CF208960B18FB4268F69C9529F1C78F34E6D289BB8F0D0 B89CF8741096F678F00685FBF235A4C1A8B14DCBEB1D8B0533A26A75FA1B3013 2BF9C0110764CFF5E982272A39C7C1B44952E32DEE9393A3A1A50EFEF2B0BC20 5D6AC175288180E8D7B6D831B5A7F2A83E36DE464AB787DC8442C4A5ABD4D9BC 5BA190411AA224210FD9D7DCA4CFD73753037B9C655F15D4A06F3E1038D4BE6E 6FAE715E2AC6B28F522F5D4598F9D3A7D6CAC432F3D072119ED9D370BC05948D F005ED4A88909C8DCAE253481F522C7086BBDE86A067B0766222D104CBBC2556 A31BB6924DA174871FEFC136366AE6CB28E0D5716D1DAE992F48B163103608E7 4043D73405A66A7D954E14D62F5D7DC41FB28DCAF0985E6765E2B7A2CCEB8D7A 5D5E3BB1FD1D691485AE359C92156F0C7E41C48012EBC6B9EF57E6E38C0D49A9 757CB049B02F6B867A4970F23D536B819FC5CAF65F680F4D4745FFD5BB7D2F30 FAC8FAFD8F9F6DB3947AE87ABF8B778E2067A343A739FB5EBE9B08934FD5C840 86AF28BAE95F0204FC6FC17DE47B198C7418FFD471E694209462D13AE46246A0 2B66B1D26B69182B0664B1864B1BA7B2EED6BBB48CFA8AF8CD4A1085AC6A08A6 71ED00CA33EB36A409E77F080EABF6B3252CA96E02EBF9A3137150E063D03850 32FF9F4DED6E4CED171C17975BE74A78B241F0FBE744269C4B165240E351C07E 70FCE19CCFDC7049F8AE95B8432B4AE416A5BBEB720CAF6D3CEB850435BF587F 2F06B2573FF7F0E5DD8B664D00F055F4E92181E11346E833D38D1257779CD970 93A535477EB1F55267DE6FBEB5B2FED8CDD1CE5960629A2320B5904B9D0659B6 408F6E191D71CF5AB5343881DE325A9077AF0DCEB45533AB3A26DFFD7FB17BA2 BAC585ACD6E94E08F006A68603F6C51AFA8B82FA29F47031B24C8FDEB94DD651 DD2C40EBCC219B8932C6F1EE8245B05E1E4A497CDE68BAB0DFF42F1229F01100 3E638CB49BBA83E156956E8AD166BAF21130463A55ECC53FD595EA141FEA1D04 63049FB701420E082B033204F5F86477A9281F5AF942D453F674EAF62DF2F688 DD5AFF93931EB932BF595F53D6AA86040B0656DDA2B5A3B0F1753158DB144A5D 3E0595C3FE86AA9EBA298601A24D66E2194A1397BF30007B576D28C74BCDF14A DC34A579F489E2805DEC206BB30D0882B4C5679C3AAC7717009220F88CC9A258 DD2795317AF0BDF91B5D692313F481AED28508F14CB6E8862EBA4D153E8A52C2 339C03C90F1620582684D6B2EED6F4F35B7B064FE7E787ED3F6F5850D9496FE4 36617A6B4776015F88D1AB009B59426D35B176C5B951648A17FFDED3D3396E44 A508EDD36114A873BC0B3394BA30D5FC1758B715FE5CD3124EFFF8234425CF17 FF07201E435B790696504D5538D2EFEBF2EF5D112599A5E2DF1A95CA7CF6C6E5 584C86CCFAD811E309A84CE61E17C7FBC1B7C02ABA31CA4C50ABE7FC265697F2 1285EABF08120B46C24DE5FE276DE24C42878E89C6F0D9617E0C6C22781ABCF7 863FA23D3E06A06EBE615D78CA4E541BFEA4FB41199C7047970BF58A7ECBE5FA 8A21B84E6C6B62F4A820E157886A01111807980C67753096DB80D303C7532C8B FF1F35AA111124505A627A27051789A0A0DEA844FCB13199D892C47AB761C715 CA38BD881A301191FFC7E7BA12247BE72387C2EF42CF9A9A9BB89F532378AA0B EC62EE076C0F1A1A98B2CA61ABD267B016870E840C76CBEC7DCADB47DF89BCEB 68A3393BD0AFBBA0B16E338F1FDBAA1434A9CC4725345C481DC0B6B815D80F5E C7B8D32DB3007CF3DC178BA0357D8E442BAF52AA5192B60C5DB306C2787ED422 AFFA64198F666ECFE859A3F242CE5D0AA10BDA41CC2EFB4740EA9F00D9C9F272 97344D4BB70F224F847FFB2AD9355C2EC3D07BD6561384CEB222621D4ADDB178 04E1BC218CD51F3FF99EDA80CC950306B619306D6A5E7E28629004A014B1ACFC 94D768E01E66B4206127318481DF32C617239E329B744ADA6C3F9BECACEF77CE E2A110095122E094302A18564DBCAA8D6F5A77C7FF202EDDB16D6D5EC0FB5DA0 43EB8B8606B01F637FA31CE1790008BE35BCAD1DFAC3825703ED96F6D95D881A 570E7088C8083E0C0D8F3DB72CC798D0C00B687125A8D3362162C875568BA68C 6970EE35670EB6F75E15FA6B1DBFC0DB6D483F4DD104605A92C5E1FC444B7B5E 267041E2647ED3CEEE3A89F278DBF828817A9D38080E7A6F1E71AD5A167AA7CB 11C90EC8F0A23710B382844FD8C729991F3A28504830F00E0F50C4AACF0BE5B1 5231222A588FEEAC4592DCA8E3EA109695A96D79C0CFEEF0C9F407D145E77939 45ED59FD5FE0E6FB5ED308D9E2F9F5F66D687BC3A1DB27583CEDD60761086239 4F13DF2A02A7125CC76BE19A2275BA92B27700CCE6592CC85F59A204E858E233 7902F07B315778DF2A5B102D757A65504419CDB5F55563787A036E9495E6D986 BAB107790B4C3609135162D72DAA7B9EF0B0D8FD8CB02D96574A25AC501D4B7B 3072BF6D1E1D764C9C63218685DF32C21C1633131257AA133C2991417B15B221 431EFD512E7EECE819577E277618656D4DF8BF3B5F02EC30DB3066E4FF4B2DF2 75585E1D9962621BA056E8187C9C8EDAED876880C47C572624DD5AA7EEA85F13 E5C140956D03FCFAA9CBB7BF4FFF484CF208B3506C1761789E47F24D3EA4C39A EBEBEE6866F2A6A6F8C416F262428CEAEFFF43876B0A64B55071DB38621DFE2F 3EAD57D46FC268C59405DEE21B96C200438CFAB5FF14F0B7A4F0B8A978D50505 37E891C626D03E8D714574D43FF7B5A3249C4214CB1FB316FF132ABD1B4FFDEB 379898E6DD3AD9D96DE2D444774EE8F08813C09D9B059876EE09653FDDDD20AB F92277D6A35EC6940DE69BEABE7DC5F959E28E44AAB0DFBAEB318A6CB687BC99 C262A3A22310AF478E327F3638C30213523DC53FE6076125D4C049F778830DC3 517876664C91F075EC5D9B97FF352DEB6F3F4B7AC73452A5014D46F6611CEB65 9C9752F8441D05409D1DE4E2BA61708C54566B8B0F2B48F6688970C2E67D628E BA74B44A03966D1A1A04E284B1868C2E7257E88B7C0CAB1DF8F2F95959631D3C FE1274ABE1C292B7C8B2AA2E581BF403D5A1B9639920E2586BBEB467F13A7E04 9833BA6F888FEC435BAD817F3A0477CAF683FCAA4254121F8A69671BD14B974F 24F70DAD8EDA7A4A68120DB4168623925BD7F4717408391F1C0A07E2FF8317F7 DAF35784EFDAB0768AB112E3E39C7FAB712BE6FA37AA262245FBD49CF50D035C F5A08650BE7C59242E62F5A4334F5ED54DC1011B331CF4909AA718DF28D5F3C3 FB9E1BD9DC531521B2EE967270725CD8051A878F154CC8AE7D34DF895D456B49 B940265CB7CEDE88F53DE24A60FF50328A68171DBF6CCC783EDC992E8028DCFA FCDBF84D63FFA6FDF27A9D43B6B848F9FC0CEAF81B856050AFFBECA5D1ECB01C 1309E67F97E8FCE3CB92313A6A2FE4AAC856DDAFD3472D74A6B86418A6B0AB84 54048634EF8778DA982D3EB11879530AD25A7F3955F96F0BFA878704D36C37CE 87575101D3DEF68354C2FF715BA9BB0EA117C5DE5F67A313F9796E0E1C50B65F E09CCC594D8977A8487346D1FF1ABCB21B27D478B6FDB63FBEC12A43F5FB6207 4CFDFF9BF5F9B75020256E0CC80D34ED102AC8A8A61F361694620811D4105207 07E9931C5C9FF3954AA20ADC851CB3FCE226201B8CA328D0019AD445D450B112 F2B3E010E133A8B2D1C31ED2E4B9E98B952E22AA2995A78AAF065459C99347F3 630E1A2367ED89E2DDF4316983205A4C1B32CED830223E10580550922B1EDBC3 02651215A21737D783866B38967674DA602B9ADB38B65E1787D2CE79374F157E 1A8EC651F4C7C27040869EF3AE0E38C76EBC870D4463B30E0353E9B937AB2F80 82318B35D0D4E5BBEDC718B8B2ECC98FF66D29A38EE737943594851DE276E5B7 5EFBACA40BAC294E24795CAA051DEE748F3EB7DF4831A2281B652416C163B43A 83D90AE02326CA5295A6614261942C79C2E7C31F20B29F89B0C2236761B92DE5 5A6E4CD7A0F7830BFFAF49C262A3F5F49019D85B4E3971A25BC59679F2CCB6B4 9312FBEAA9A8377801474E5382E61B405286ADF1F0742DA8D83023BC94EA2973 E10F19DF390BF0211D3B816B1003186588965E16AC67BFA01D35A4C038E0939C BB4B76FC56696DAE3F7EE5AD2D05D8345C49DFE665C84D32A239C396B5544518 EDB9123AC52AD6490E4CD27DEF0C7F8F3E61DD5D5E59284CAE285589CDCACA66 18DB59BA7C269D3FF4866650615228B1A433782E6FE642F64FDD974B24FB4445 9AE0916C5442BD0F0CEB9BB6C701C8774C46464427E1F40D5A2DFA46E0D8A27A 46663A155B169A4B498B5B7CBE830AC14D51C7440352B68C9BB21AAE3F59A2DF E2A8354D4A4230B9DC05E3F6FCCF1E336A77EE63A1388A4CC213AF0BF25ECADE 013E57CBADE3E0496A4442672874FD92131DE2CCCE589D6302DDF71608487C74 387EC9D2651DA9D07D877F453CC88EA35CE52E92C9D0312F004A94058880AC41 8FC19FD92D706B7451D73CD77AD01E2B6FDF8F0B534E5EABD507E8480AAF10D6 2A37346E35D604252ED68B75EFE98386D7A6B5F1E22C57F846341D7AAFA6D14D 8F7AD7C5632AA71C17972E4842FBA118A13AE6BA5C098CC24465ECFE9CDFB77A BB3B346A211B8411864E8EE985760483FD5D37C6BE9A32F4968C8EBECBF4E352 4263E794EB0EF4721D316B76AE9EC42119C6B38D6292FC827816876A6ED1E0D8 8BB2D8EEAC0AA23C018D0A539DD088535737A6548CE1E6FCEDD67CCEFAC92CD1 11E1D94C07DE54243B2D626A6C7E6D78FB9A1876C9C1FFA0CB14E19A956AE561 89DB7E1FDE06B322FDC1649B86C15D49CE41A84AF29CBAD14E4F76A3874C2765 041E235E02F7E38B48E0F4EDA33C0143596F1726598D218B7A34EF622D72423C A92BC0C8543F51CFF50C95BC809447934973998C68D304FED2B0CF2471ADE7CD A12999C86FB92C128179144FBCE8B9EDD578C7DC98C663220707684C9BD41B10 AA7391C607282648AB6F295E0440245996F3ADAB5E36AD8C6CB1B05DF406ACF2 107C648C86DCAB0E20D91F97521161B18749022DB7785649D2F69755C5181DF2 66FCDD9229845F2DB61A1509EC1C35B74D38A492A726B5C74E764E9BE406C508 837C3A5E584FB7280572E8E13AD31C811B51A91B04FD8FBE07ABA4983DD8EE3E 1065041FD9A4959C3DFF9775FD3ADBC2612BCECD3190922997F8EFD70F7E7AB5 4B3BBA3C029414FF088688CDC39A6DC708E599395621F1F6495B2EF706D8E965 E13F33127B31F692A3212E726F95DCFF7699528CE4D8DF3F510757DE12D6F397 21834FA45FD674422DDAF5E2619408313CD0E4192D05445D3AAFEC1F54EC1F4D 38A8BBC2F95EC7E76BFE574C161823E03607242BD610A97E451535AE85FFAAEF 2FCF376FA2983FE8FBF60E7DAAEE6DAEAF08C6AB9988652E1FACBC0296D2755E 6A10F1321F4DBF917C360D6D21EC4769E1B009EC10E44CE24D149C09FCFD2565 0B60437EABB7B6C0745E2195E724E6552932B8D5F056192DFD31C72F50F9643E EB9D639938F379038B2ABFD12BDF752B44C2AA00CE1B134288506A5D32E65D42 92A9E998B6E39F49D49913F712E8F01638F632B485FF03DA199D711296476870 67CCD1CD4D54CD143328592C4A21094449981989FDB4AFE4DB5639A1172741A7 96FFFD10D4E2F3EA74E32E9D2B02D227C6D2C350C701EC682ED12ECBC29D0B3B 520715C9F076A2DF14C2630E60C48C6E1F5EECD90BFD8B0E2C5784B2C526ACAB 34074E78ED27E5397A9DEADEFC62072CCAF85B9E10DEB3A219BDAF48DF7F4E21 CE258BB45A3154CB784BDC956E549557D4118AA4396A85B421BB184420078259 BCCB1C6F5567BA5B57E21701525529949ED8D52FBA855F3D283EB225F2A0E55C A20D3AEC3389857665D85FD46FF9242B746DF7E73F0BAA88103FFA90DD40EC30 01F579A567BAC08C31988DBA15E87CE483D5EB14AEB03A6B54D190A42FD0DE2F 857B518DA5A16915B09A281D7990E45A339AC4A17C63E374B17995AE63959599 83E4484D16A53044F1F17C73194E8A487F7CF0DBB29D600AA4ACBAA06D20A9B3 5FF498C20A0513846BF43AFB3C2502B249E90B338DF8D27DCFC5264D9BC2BA28 ED631ABD00E6663A0BD73DD6F89910092589AE70DA128C8B7045CD3582D5FA4A 047AE34DFCF173E8AC093AEE9A80977EACD87FD7A2CFF876889F9218123F8C38 93299EF7DC94A1DAA32428A05C018261DA79B3024E7BA5990516AF291D5A5767 4D5C21719C590038592C6F41BCABFB510EFCF0423CC0011ED2D561091C8796EF E086C6C5F288F0A809952F44886670C1A4F5DF081C51B10C52079BBF9D8D7B8A 683D550CE689D0D98CD434A5D5F8D5D484E03AEB52A208852FCD72566A6225EF 00D1052DB98667B0962C3AE183E78AD5BCE415FE6E590EB70B58CDBA2784D1DA 57D677BEB995896446B579BA39B9E270EC403AE66199F02493F6022D97E893B0 C671E5302D4B6D4B41286394B32D893F9CDEF1E07080DA818A7E32624F5F0734 733E502612FB3012C5D74179829BF62DDA3BA24083DEEBBD25B6BF29A4690498 59928A521C57C45B19441AF4F50A5FB07C4C0FD11E96938E8347B4C6CB7C9F2E 43C4C0F24DFAAA439C2027E955746485168793E430D7BE7E7EE15656B08376A1 7EF5032F4CA4ACCA0BF5E47753EACF1D8B970D84BAC48B9F57BED8779AD7AD2C 0A276D557B7CECE335688D98F212B2E28D5D35E8626C5A1909E399FED5F54CF4 FFDD2351D32EA79002F5E365FC8C55637FF9B4FCF29F5D164FA17FF700254D7E 623D77FEB665C4939E28179AA6241D63D8A67ACCDB009C5C7F2266D4604BE05C 2D3B7FB782FB23AD60193AFFA19B9CEF458BF353B7E4F4A8693BB12C41B8D2C6 B2DCC839FDF27CB33665D1EF0854E1CA9030250679E143CADF28E5EA6F1F9D52 5C16E89370BFE38C23F4F7965AA1CD17602F3D0B774F23300A2B58A5663B4F86 896B905DC30BF9B01FBC7112E1ACCCFCDA85842C46C04AE313A8607207801951 645BDD81FB362BCE0EBD0107340207614A5A961DB798E7F43FF6605971C2EA17 366BF84FF8B4FBF2666E443F71124B03220510A6DDE96629E7CC99764871C98F 96AF79291E84F36237A38F604241F0D6C3E47B0882FDFF31A34AB534FC58C325 BA4F8386047FDC7C810E997AA0024DFEE9092164F9E391E8A66EB12969FC8F1D 3F391527E1B0ACFB6D6FD8B787B059068A8A6E12BCBDB6ACB967E993DD03420F 09D09915995F296E4B31CBD2029D964C2F31FCF70303E5E3320826A277F24DDA CA22B27EFEEBE51B969A18A7C023F4FD60AB1A6472654EEC2425DDAA17B47002 B4A3FDACEE018174DEB3784FAD91184E5E59485CECCCB086A0BA259A7BF25F31 3FFEC540781A0BA794C0A886C093E8D05A7E917657F86FDA5BA0B27FD3C8F96C B781027EA18E6EB564D94FA9ECADFC27E03E3B79C06F9894312388E043ED6815 3EEC388F2A56A56190B9DD287140F95EC56ECA32A336BC0B5D0437B28C08B30F FE0FAB24CC6ACCCB809D2C8DE003760B661819A92BE4D6014834288618759AB0 A1944C5665C3632A853106EA0F99EC2CB5F8BCA9884E15ACAC8A7E4E37455A85 549F7D9C28BACAA3DF617F352F74EFA6B1C2562C93FBCE819AFA1A85198E6F9F C8F22038FFC723D1618135E210B8149B41B1F4D0C5 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMR9 %!PS-AdobeFont-1.1: CMR9 1.0 %%CreationDate: 1991 Aug 20 16:39:59 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMR9) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMR9 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 11 /ff put dup 12 /fi put dup 13 /fl put dup 14 /ffi put dup 40 /parenleft put dup 41 /parenright put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put dup 58 /colon put dup 59 /semicolon put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 79 /O put dup 80 /P put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 87 /W put dup 91 /bracketleft put dup 93 /bracketright put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 106 /j put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 113 /q put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 121 /y put readonly def /FontBBox{-39 -250 1036 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF7158F1163BC1F3352E22A1452E73FECA8A4 87100FB1FFC4C8AF409B2067537220E605DA0852CA49839E1386AF9D7A1A455F D1F017CE45884D76EF2CB9BC5821FD25365DDEA6E45F332B5F68A44AD8A530F0 92A36FADB679CF58BAFDD3E51DFDD314B91A605515D729EE20C42505FD4E0835 3C9D365B14C003BC6DD352F0228A8C161F172D2551CD1C67CD0B1B21DED53203 046FAFF9B1129167921DD82C5964F9DDDFE0D2686875BD075FC81831A941F20E C5CD90040A092E559F6D1D3B0E9BB71733595AE0EA6093F986377A96060BF12A A1B525CD9FA741FE051DD54A32BECD55A868DD63119A4370F8322CCBEC889BC2 A723CB4015FC4AA90AE873EA14DE13382CA9CF0D8DFB65F0ABEDFD9A64BB3F4D 731E2E1C9A1789228FF44116230A70C339C9819676022AB31B5C9C589AE9094B 09882051AD4637C1710D93E8DD117B4E7B478493B91EA6306FDB3FA6D738AAB1 49FBB21A00AC2A999C21445DE3177F21D8B6AAB33869C882613EA6B5EC56476B 5634181ECBF03BFEDB57F079EACE3B334F6F384BDF9D70AEBD592C8ECF21378B 54A8B5DBF7CB9282E16AA517E14843909339B5E7C55B038BF3BB493F3B884A1C C25F9E8FB912CBE23199AD9D2C3E573727701BA301526C66C3617B9514D6F11F 11930B1D97C17816C85B1BFD9B973A191B33CC3B391815AC46268691C741B2D4 48A840F1128D9B2F9CF07D0709FE796B23A836417BF7B5B12D67F74453C82F5F 25F7B30701D6F6D4F4DC623C0C27D6A6FBECC7312A3CD10932FC7C10851C3C52 24B75DEA8A648B7F34F5711DB0E843C914E25663C510185BC37BDB7593C1C259 21D8DDAD33982C336BF272BAB2F48E68217403FE9F54877B243614A87E64784D 2796EE4179FBF96123D1BEE3EF89D682B427BA4F12A1318A57F18BE5DD903815 2617B231347185E09C836AAAC02D7084899EE31E55E9D98B2EF9075677C6720B 2B49EB18CE9249521558B183DA135E7E2AD901630A9A87F4D602ED878C6A4A50 05D952816B0C0507CDCB79721B1304AEE2116404EBA735C41FE0FAAC2ECD7419 1CD6119BDDAE9C5226482230ABBB7E206B31AC812D82A60309E77DD103782E5C 1DE1A0CAB4DB4B3FBCAD1628286369C94134BC603AEABC4127B169E2B3AF1634 0B354D88908260FB3D1C72F2C069B8970C121B2CF0877151D1933BDE8639E569 9B7806AB0FCD5F28ECE25877AF315545BDDCA523EF2B8E6C7E5D2E64BF8FCDEB E317ADDDD219D3EDFAA01B7782C51C46CE1206D6C1881F07D9C2FB231CF35471 E4EFCA0EEB33E9F440ADBEC93620032ABC2E1D04366D7F618022D7F23BC1AA80 5437647FF9A1C4DC414C9FDEA261AACE8448E27233A4BA9C2A527D4032B0F13B 314EF5FA5ADCB5BC1B0A46D9339F66CA940DD52F192816E7ACDB30B0A5EFF46B AF0E559998841C5D64881B43F47AD42BCE59953A48AF8775BFD3F1E9DF1EC263 F4625F6C86FE0FEDD9E809C2BF7E883D5CFD0FC99C9682D5016E5AE069568EF3 63D5381A94CF9AE216E81C362645ADBEF13C35E069AFB185D9A5A4237DF9808C A466FFA2867EB7B3CCA5BC2FD7EB2A5CDBC1ADA4BC4C887058752D75DA38E54F D30FA318F2B5681419ACDBBDDF526AA57410998C94F5FA582F6D61E547CE3470 0B89918A6AEC4D2C7D750896118B38AF38FD0766CD3BF5484D89040DA02F1387 DCA60C9B5134D949A17A4B4EB234FA3665E9B2CA006F0154E6197CB3FA8FB414 C98CE24768F416F6D07C171F7EF052D578487F2EBFE86B7315C9B51C1F6B2EBE 3086A1C08D478C350B2F30C32AD14183DE9D54DA9BEF52AFA98691ED8794C3AD F7264AF88864AB739B8E3B3EDCB4F898C4C9D944FC432B5D956AD7CC1CBDC5E2 DE5B70422F66120B19280B51D369D9B43F5AAD67B470322F2C8D21A32DA3E8D1 BCD5E216AB849F7D17F7AE998F3A79B9838C7C10DEB6CC746F201DE9F888925A C07CCC3BCBC2BA64FB84EA8C847806712ACB3B7A9D370F838A80AA5AB8CF74E2 B29666C3ED08BEA798D11789C3680199787BE01C07E4321567535E3FAA0FB32C 0BBDFC4B5BB007C290C39308E9CFEC2598B02532B5EFF4E4A617083472652311 151FD04CBA9A4F32E2CC9C9ADAF0AE0941FFFB71EE93BAD2C3F7AB0ECB40041C 065F02167B80ABD126825292F3F71D5917CD83222761AF068682BCF09DFC6055 2E0F18BA4CF813E38BEA87DF4FF8076B09826758C5BFE38B27AC0C6C39EEC46C 6D037BB34ACAC50C738F340478BF6637A74640D6339C8436B5A8142680BEDA24 54207829D525C8BC387DF434FC4E905EF70A092BAC03265D6512B675AD89F879 A163E65E195B7CBB7FA06AE6C5D718D2DC4BDB0571E13C0D466EDCDE2FEC32E3 C1E33B653B658E1740641977759ABF149B19F4803A56885798F89D9AD30D883B 3A72714917289169DF4C171440FA6875D6B00040A2836C99149CB9E9354CBF13 24C0A0B3758A8E2455F46E36C8D4D89691780BE3457B07EC1953487753AF04FD 92C26D19E22ED9B87D26EEA46F69D5BE1AD81AC648320C1EBA5F14EAA93C6BC2 064D412B1B55C174766DD217B872A6D627A957F6380FCD5D041218086460B7DE A9675B3DEE059C8112952E294AD0998ADAD7FCE00305F9D3E72D2CA19A3F50B6 825E257B6A61B472409688963F2C5197F94F59B8C92A5C6171CE419DF4527521 D269C73D6212343785D14A64A9242D02B5BB019A817A5CD5056188614A5BFE5E 68919B50D34C2A5AA24B24B73EC03D3B06228BF2AB8C800747059B42D13BBC2B E4C5F73FA271254996A4BF60CB159E5848DCE66C2A7A3AB8C8487196A2770B03 CAFCB19A9CAA6B132306A77939587D3F2D07F85E4567391E12D72687198FD7DB 705187EE59A4ED3C7F99E8DBC2A42C1955921F730B3320D0FFBAC9620BCBE19A D8B688183876C33E0625AC4EEDAA67A67D8C27187F072F982EC3BB7EDA88F99B 1C1C1D4BB1D69B96BBAE57ADCDFD4FEF9A6372300D95957158DFE257A805D5B0 17904F53F9D33FAF95F37E313396DBD3D3D6790772C10EC5C2B6EBB512642E3E 3FBF4B78F85B6F2C2517802237A05A46FB651A34FFBCAD75AEF3B4AE766C98E6 31C4D2FEC0DCCA1A958CECA8D314F1612A09C956221FD07A0734E7F226FF8E9B 990445D0BDF6304B4D08954E28E8B5F1BDF5AE506A5FE6AFE93470FC20B2B375 94E8CBCA8ED2ADEBC48B35A507682508D429A322698F98DEA1429230EC625903 CA73AA163C739A275F97C180C24DB3495478010B49895E89A833E84AD0923748 7EEBB480B828A40AFB698A7A3BB056833109CE417FC347820A389E7D6B6C8942 0A38983B7A64675249F9BDDDAE45B1F5247E316E2529270934DDA549ACBF69DA D7ACBF9A7ED386D4482E3FB8A67EB35FCA48DDFC3E9E21954976799C62789059 41E9B7CD275DEAB64C65078743D73BF8AA302B3261D5DFF4E1990A594D666A5E 0CF53FA6CF887D5DCD340740872B089FD99B1898C613060B2C626FE7FD63AC2E 82C43721A03C6B60910F18C0544B135BD4AAB90D22E1B3113689788BCBCF1C36 5AC37D0E41C0D0F08F93623463AEBF4826A3D074EA6A6C241B2F70511BEDC3C5 0DA05C96FEAAC3F7FEACC96EE5099D6E7765E506C1CDF1A2E202C7B6D40E07F8 186916A068A22824F881C1CAC64BBD25B91C1E4E2C4887E51FD19F7A37603033 6DB8865B779D65A26C1DA77457D026AA9B8102E8958B60041DEE0E5D64248C4E EDFAEB6279AB6BCB14E4DACA3822E92AD99D706FC987B6FE558982E075DE02EA B0DD2B60405A26BFEC5DC14FF7319D229BE988BC255D7013DCD91A7B4C68058C 3B4C2B058B19B3A82C02F2AB6419F309CE6000422CBEC23DFE4694932ACA3C58 64B2CB5E6D6735E789E2CAC92999EF09408070220AE73B753E18994E1CDD2755 28310899F5AB6BC59C725730B98D2C9C8C0885C92ECBBA6BD258131F34DA80BB 176BEEAAF85F4D8853194611140B4652A38AB78491282DC1ECD106044039E31A 08A502B380EC1D56B88AFA74D5FAF62A31256F7AF2B9825F587B63AFE225C3B8 0A819759057BEF22C8666D6B933546B9B5235243C9961F2D1DB91A376B4052D4 F8E08206AAC6A0836AB9279550D79F4C9351E090887F2CE788B82BC17FB321D2 573ABE2296FF6AEE4AA688208B074337A746C40E1B8BE5F4E6039C09E7EECE8E 665AA85458B0D18888EDC58361719A181A5611C87BED681A860F60BAF7969B5E 6717A257EE5BE70B869F8DA9E4ACCAFFD866A8D12BBD09D7CD67D8ED65B1E767 A09AF9872DB309F5173EB4CEE072F4E883AE2EDAE70513B6F3CFCD5E70375616 C82B3A8E75A989F72F87382D2B077E527852FBCA9132A1CE2B277F9568F18E1B 779F8F99C311B6024F5C560AB0960CFACE46A14188FF248D751F59F0EE2431D5 77EC171B8FE625F6603B0719AA2D273879DDBAF2691AF89525C4357294D2EB1D 2DBB946C3E0E3410F9F81E170C879C411B4C64150A196F1130811B3109BD3618 85CB102B22B3D1B06BC53AD8C094A633B10AAC847CEBE3F65F47D6F32FC1B6A0 CC3FA2A0C1EFCB7A40764417F5D522D224BDD1AA68E5A3BA98960D69E7C31996 6FE5583BE1CB1BD2DC5277FB49F35B3BAD301E5EF0ECAAF358115F197F92B5DE 2875AFA66B66AAEC383EACB79BA769F5FBA313928D1A697BC389E39EA725FC77 59F02ACC3165B635F99F2A3BD2594A2B6AFF1DFF16748F60379375C4313A4100 44F1D2C56DFED87418AE88D9C7C3EE54A946B8F4A4CFDB99857B7B0204C8A71B 69CC0DB7DFBD57B6CDAB5D670A30394A041706BE9EF37488AD284813B2F7D30D 81E1F955B6C58B48E8FAA8E6EF9A092CD65968F59716EB70EC237B45335FED92 18192A44FB0AAC924A60F47C1AD139F2A4228F88F932C72BB54E8AADD71AECF3 A3E53CF1450D8497C7054A0E79C58C9C1AD4CEEB87E17FFCEA7B3CD52056564A 94C09E87E0751CD49C4A3F1A0BB905309D97B3F5EDDE5122720AB086EAED99FA 00780881D89902644F27E335FB312BE5028A041377E43F980F78DF6BF4F199F2 C2165FB5D9D46D6682800BF6A03DF6CDBEC995897A6826B186C093837FB16B8D 41DAD29CAFFFE5C1F544AF3F76314FE9158753B6AB9B5C22D3E41168369EAC53 69FEF08A7DC1E1ED9A2DE11EBA7756EDDC697E5A3F211DDF968F642022C20F8B 539F90CB8119B3EF1B6326A2AA0D4694BAB34ACBAE5D03B2818978B16A6A8EA9 36DC01008D37087E7361F117B0B419EDEBAC55B92383D873955965C341DB0670 873F035954043377403CCAA16844762DE4511B9A5668B91C4075D8CF83C3743F 93738D5986220A99E6DD3AD9C865608D6A48BC94B80B9E1849B69A08BC003AE9 DAC5B789E36C8C52D10B30D1A676B81050FD773B5DD9DE16D8DF1C0FEF9E3782 2EE472D07CFE8133E779A8362CD84482726B51284EDA06BE222FF0C03FC7CD7B 0BE9143F244051FFDC69B0C31B92A14531EC45BD1EA0C22708C0668AEEED0630 C5FCC8AF097D32CD82192B6AF474606A3880173A9EC5635647932F97DB3931BD 2570723779E9311D38653561355D164331F5CCFD7BE51810B44751D978C2A8DB 868670CA59D81F1C266875235F7F127481C33DB1CADC7ECB031B131184162F8F C1C426AD8EE29B90E92E03009C528A438035F0502EE6B78B7C985AF58EF2AAE0 C7683D16211097F5061BB10AF9C38C6873309999E2A5948F3998AE7A262CDEB9 256E8208A55EC2AAE276B6FE4DBE422FD6CCE59EB8D35003B0AD3A66AAE9B892 90DA5B6BAD48BAE57BAC2705049468A80F9E91805B996ADEC599B81968130906 7BB8BB1A0CA1532D627912AF7640585EA93E86C66C528BD4A41E0C079D85F49B 896316745A4D4D488C6AA6D7C8169E6DEA740D1132439811AAF7F6FA6C5811E9 A4199D7406C786E3A390015FF83BA965EDE5B44E7249BF34AE9A8742E8C811A7 2F181F4B178032F25400524930E80E600CBB1D642FB8B97107968405A462B331 BF05E1DC91D9B38C9114AB59B3E9261BCCEC0567090358EDBCF6219E34DF0D99 DA36DA7724EBB460901CEA1647795DC3E42D2988055B36C39CDF48F3356E4227 B2F4FBAD76CD1A85F77D87E218E5C7FE42C26D4239AC4DD9E7CD38BAE732DF8A 3A7276B339C0F12FFD0F3EB05194C245B6325081B00E802B2939796E44246D46 D32FAE883BD453AAE560B90AD136C37FD8CD2417729872D1B2629253E2707D8B 30084E416042CE1488923F538A45D38A89C649F82D46AC54D0C8836511E8D598 95B139973435E42B6BE29EC223932330B558D92968AC81C4565E16652A3D5D73 8DC1F739DF70AA3A2D946B25217103E9D9BA8E1FEF478E5344EF87723DA79039 227FF756D9A7CA10262BBB1DD75BD2BA309C5D65286FAB77F0811A877944181B 767AB5188BED7F9A5F6E55CFAFF99BA2785C01E6381A14D76BD333E25B69581D 506043C433930918B7EABFCB149446D72A091F5BCEBE6E42B3F17777885F3BAE 5BEC0557FFC9269775E1E18404A24DF64D156D62A2AA1D7B10FA3EDB4913DE15 EB18EFF166F1C72D09743CC778A7CD79BFAB02CBAB8755DFF80AAB0A281A29FC D785A7D1FB0A8C6060788A89BE60926FA7C63E596789ED6A58B8A5699F5814F8 749E9D416A0DA2B3A3DD79C5723119B03960D21A79BDA2C47D9D34941BAC5F58 AD70F3BF3DA3051EB0125CF8B85AA69A8B08A1DE8FEE15D6F06682A6EEB82286 C461C765FA25174607B11F3B444FCA80DD1CA034F5DEDB5D3CF71112E4D6DC1E 8B5F465A78EAE8AE40C9CA184A28727E615A3E63B5538D31376A4A33DB534EFE 15FC4E5DE07081F02C25413B85FD27A55F005B41200D11ADCA2BE9C2F8EC7571 3B6A062490D0AEA9405A00666038375FD2564B045AA9BAD8DE90670017181DA9 7F17F999731FA5D111F8270838CC40FAE071F4F3E232D6423E62329FBCE2F8C8 48DAEFB17147B81E7E8A74A3B9E1614D4F1FDD63684CE7A3E1CC2FAA67D49017 7877EC3D393E232CC8DDCD7C61FDBAA5E549175CCB853F21BF7479EDC67B5A79 F4221D918400143969BDF022588B58688F23CE6EE72B7C30884D54BBEA9611F2 9FFE291DD1F8123174AEF92B6FEA73081D6F625649C0BC6656050153F57D99C2 D0C87C4348C47EF57BC2887165F0B0A738A9B89946E9F30BBC5F1C45B22245BC 6D4281A62FD1322B0D9260539D9AFEA88355D02C0E4A7EF99B6041638DB16C56 80EF6F3D3D03FE5B45B314D198BAEB2FE9DBF4680E840FA98757D1EE10F44962 32F75E694BC0CDE119F378F85C5740012817FC73E660C2C6741585038B2BDED1 43E4286EFC690CC9F1C86F130415D269F7C0588E51D3B9699F380F76DA949EDC 4030DBA93ACE8F554A448AED35F5732CF319AEFE0DA9407A4DB52FD983D9DF23 1E38EE562EE7458BBBFA5FA1598C2F8C9A2D2978F6634D13CD1FCD153BA0CF8A CCEBB8C8676650588BA84A0D408E74EE400C54FC5D33512992886B606CC1B2E1 6B8964C67714DD39DA2E095F8846302B2FC63A732AF800420EE2F69721AC40C8 198DE6EE12ACB0115B56ACBFF635B622D6FBE07AE2221A8943CB778F6332CA9B 142D0BCB53DA32A5BD6C4A6010A3F8D5EA79E51BBBCA1B9F0102E91FC5E7C0EF 11E8AFCE9CE37501B793CF31A30BF6A71F101815AC6E049116EC474245EA4C4C B96CE29CD66235128CF1820EA6C644C0E64753EBAA137AFBD48F74E7550E57DF 23499DCA8B81AD892DE34C097B651F722C3A36D996038DE84C8C1245D4DBE7EB E6B64F0FE0B5DC18708C5BEFACADCF6132F45DF5905F013320970D4F74C33CBB 1A1CE868BE636E03EBD3BE160D9AB140557226707518569AD9388B4B308E9AB4 003DBA8D16D0A32C8AC8BC69C2B0515929D47E560F97084A6C622AE2306C26FD C68CB9ADB84E360C57A1E3C143C2967F77E82AA9D42F09308EBF05DE827242EE F39CF29D8856B937320826DA054F11F353FD3396F77C1B5EBAE08FB7E9FCDBC7 D36B27F38AD1E6E1436738DEC6385E92494C20C990EBAD484FB9A3527331A191 21EC6610DBCB56E3B0EA773ADA5428A0DE812691A8086C2B03C3D83C7A5751D3 F91012BCB326D608EE446A68399870678E9338F2283554EB9F606FF972D94937 1B5931AAAB6C1A97E5E857C052397A4132BA5ACF93A1226B2D7F9C374F098D8D 4C45B4CAE174C72647385FD43F215E003C4436773397944D8FE72C9C3BFA28B8 395C93AF828395A016311EB1CEB2EB85E0120417EA53E056EF1A0E5AB4D2A661 4AA428655D2952BF131EBBB09E1D905ADF83C0A350D6F912F5C351CB6035BE59 179624E4916D8B11C654628D93D534700DA8715AABF7F24D3B532667E291265E A88215E4F2DCAE3FDFDCE703A43F11BDACBFCD861AA5974D46E553D4213E7354 3BF4E45E5997FC859B9F4CD2D1627E824954009877D025750A81C8A07E6163D7 945A01D5FD186623CCB8D38A7263D153FA190A4272BD194DCEE2AFC9FBA9B745 F0150F6675221954AFE06CD9544384EAFEC5495844323A27ABC060E70DA6168C E835CF739CA9D45AD8A26154DDF40A002350CDD6621F6935B099F51E7EEC1FB5 2BD4C4791E31A6161A7463333A044E8AAE86EB255C074DE18DF9BFC534CEB01B C74C0020DCA79FD98D518A084AF9E91462C910FDF3DA9250A2EF996D6686F6FB 63876A513F0E0E1272BADCF81CC042E42D3B2624C510184BD0E2140D3881AECB 4AD9F9B5504C36C4847DD2A272F6643390C1621DD332E2215A3B8265786061BE BD04941D58CD93D88E06F166033D361DB421AD8BB541AE428ACB2F9AEF170E08 211450165DFE7366BEA509D76F5E866D5673F1C62BAD69896C94564F1165DB54 AABA04EBF2A31FCC79D7B370B5A5827D791CE120BB075E0B4FB6722FCE30F5E7 6CAB671102C7873C5764E62D01DE760B93AAAD9D064DF7D21E37BBAD2D8D4AA9 21A04EFF19EDCB60A19CD41AD9E27FB3BC8E7868E78C292513E59B5CEBE4B921 96072F57B416952A4BA9F7E42CE4C663388893694E80B76ED31A2D87A427DC00 58D0E949B69CC7BDA3AA8D176A10990D57329D6CC9E8059B1DA799FBC30231F1 9E1A4BD8A7FAA8E0E893CCE6610A8E221091E1D17E0DF655E43153971CB1C37B 96CEF43699D76CD7F5AD3068EDE02A38468EA51683A92472B201398EBCEBF09F 38B722A98491E8E13707E51E4EB9F41749B1159A770CED917DB7C990A179A82E 21390D9729A600D29121259D12B057EB65E8BE513D6BB38CF6F2C0F6E1C0C15A EC40022683C03B54F7278BB68A6C50A8A9A475A7C4EE6916F1CD35DE220F1B3D F0E079202B676810EA73BA2ADE118103829FC4AAF2421E5673B1700C16132B55 7383E2B1F5981B98F9AC595E7F998E628676EAB540674B981C336C24F7DA41BE B2719B2CDCAB0B1AF82E2D72E9CE810F159B53DC551ED399BEE723C4611D4DA4 53FE2B238EE66F4599213C43031C36DD2F2706A706AD75275B5A3D24EF1D0C7F 6E9243A551DD5433D7DA57E2F0BC37E8632CAC3FA4F7BA5E49F831D1062D2D75 41F7B47B6EECB46A0C20C1A7EAF650ACED8BC16DCC1E65360C1CC64E79BA255D 0DE901091F1467644207A2709C764984DD876417F231AF0D22185354646A285A 8245A1E1EDF87704B2987AE02A670E30F23895EE61AB57C423C21E3A6CD3BBD3 94F3FAA0918C0A06376917F77D973A9B91F30EF462C7F067E157B37117573197 8C55451A1EAD8C02533078528D3EEA00E1992F244F1D623A9A49CCBA3CED350F 6CC1908691E4094232C010305B4FF177A8465B0481C0326C2250B8CAF81751F7 EFC67E569B6A743B579FD83BC9BFBCBB7F1C2E8CD26E5BF11DB5894481DC639F 6607D2DA693AACB7AC0358F2FA7C1DA84F4EADE4400F0DC93FBBAD89D08EB000 F3E7E88AEEF2558433270B70B610D70A6A9364BA8AB54BC0BCC44D2D2C71233E 1D6433107391FA1CF93E498D307C86B129FAEC6666CB8F4560E98D9F268F8FE4 82A532F55A39569EE27F83270F169D568DC9C824EA3AD2EEADF2F155A3AA539C 8FDEC67825265EB13FAFEDA703ACA0D240F9FC9D44BDAC5954B675E9277DE6BE C429817BBA1AE1A4ED09D2E300670380E650CA0542429CF38A794D1D6296D875 E91EBD2B264C185E7548DF2B4E5E37C479349CF17CB2CE9D79ECAE2B4EA83398 9F67EE849F1C974BACE15E46FCE8542317B35629FBB60221C7C77EC21C505F6F AE74D69A603D6378D24A91783BC7DC88C78DF57D9B82FEDBCB46D3A9773C8852 6725236DC5CBD9E63BE1B3112B24D60F1A7C3F9BAF163CD7BE52EF7A734AA568 0FA1AAA0427A5337621D76A8C839DAD7305570922A9A1D5FF36A8D0B93D0AE03 C9E05215304C4F1D7930331FC72C3DDF098C0006CBF404AC87F85D5153653A63 8454725BB53A6E6DE47947DF7C11192338B1EC0600DC18DFDDE71498078FB25C 9DEB3F29C9F9DB8C9AA5F663EBA095C5107E7A1E348A270DB13858483233B94C 7FC8C5AF90CF1602EF11B8F7848E5624351B27D86151598EDD1678FB737CB33B 9D5DF34DE02BDE9F6A7DDF0F44B7EEAEA21BEC5F5C9939F8C7262A6086188396 2C96B4FF4860DA9E7E29380EEC700BA22A94D7CE11FC48CB65B05742300EAEC1 AA5D18B07B1AFEE6CAFD96B8BB6876F88F64FA9BAEF0E0EED7F971622C383181 41E37108694EB7F2DB55F5F5D1107D6489B6B9BFBE8D38428B6B6946AED16339 84A7A67774B689F2D6977C4CA85603FCF9D475C8EED6BF9778C68181E5DD167E 8BF307E8EC418B7D634E07ABD74896E84406E81603267F41D1F2F8036EE2843C 474DE277145FD729FF5B169A878E8C4B8990AFF70F59EF19DF8B908C1D31E1E5 BBFB0982453A8FEC0BC45CEB8724BF63A9E3E28BA6476C02D0D000C3A195EF34 B1E94763CFB076D346B3C1F2642C3A0D469C1993D94CE66EE1B2163BE11DB896 BD35AC26087DA2C3B972B884A8B3BFE968046DD1DB330C6D791AE53B657CD3E0 B58E21B5916B10B475F4B68A9DEB90AEFB5F4D201C218CAD20D629DA5937BA6A 8BCBB4E72DBEEBBD530974D49949C3820B2178119000585FF745D6177919090F 40E8B0E1D7DA6956C1026781474FCDE9FAEDB58D20199A3DACF7A0166744C932 0F45E8EFC9EACA77851476AD1171B6C72090513AC7F975FFEB0917AE25782ABB B6E08885346EDF7F7222622D2A59A3E179056A4A736171848C8F882135A2077A 629765395B1180CDD6DED2FE4E2857514686C7AF2C2EDC6369D4BDEF806E68A1 58D04171FE8420D1C3BDB3F223A1B5EE2B33AFBA149B075FD2EBA8DBEF318347 FC3134BA0B26DB555B75CE299114BAB353F159DA25994BA23A7652BCEBFD4537 14372CFF3D0E0618C8191944278FC25627624CB476BD00F70E03103D2F176467 85FA95456A9411F2853ED1AE7B91BC6291858DDA415937E69D1A22CF3E87ED4B B99E73B363AD8D753860D22DC8F5555DBDE708481E857E2BA9F50DCF5683A4DF 2274ED00AD14DE3A00347D3285AB6615B765DE5E2EA4A4EE864A73F6C0C57ED3 382FBDFCBC2930F5A32C228540DFC2D69D770E00517E97B4C48F16E7BF52DBE9 1E81C90507F4B4FE643F5ABB340B308FF4BC2D123446CC2A6692197EE3263475 DCD802D2718C8C0F261D491CAB0D9BD5A1963759726A7C26FDDBB2474CBB71DD CD31BD728A6165DA76AA6F5C7B1D01F143DD9B18159137508268EF544CCF067B 3C196BFC30B31BC772F3D753C744099E725BE89EEBE39549F4D9C93645E94803 04399E790B5748606D99BDD3C0A973E4E561F05E902E95F99E6F47F68F835EF6 C14CA88EA7E552D7E5EBE77F675778739556A608442098EC2D01C9ABD0B0EA96 0010EFBF1B8001E9BC1FDEE37A4C73C9C748D4235E40F628B6D1289C1C67859E 046FA93245271BEFD52C13A98CDFD73B3593AB98970AB36059E76B4446366830 9496A537767613B7B2DC8DA4B82F9D6B607DB1CF360F8235F69621A5EAFE25BB E6FFE337461FE365F261DA40AEC0F3B4CB5DAFEA7E3B47C219FE67BD3E0D23B1 EF173713249923719B7E7F8F8FB8F2662482327E5FD2E8FE838F3423E9EA8CDB 7E8DE5B82589EC77FBB38F3C83FCE56025A69A1D1E2195459F741D59DA25A3A2 7B2145FBFB5AEA914E923D851F82D1C8738708DFBE4D6AF31380BF79FDE74A57 9BD95BA25E99F58440C931195EA6A57593753A217370352C85CBFB94E7706D40 08A2744E2B119EE0D75E276964F6CB776C99C2B198FA289ACCC2A61305F28C82 9AFE8154C8FEB2A1D84EDF23B32A72FCF8B3B9D9753563601C0FA62BEDBDC081 C4F9C6A843FFA22DC14773584B92296BC094B09FEE5DCE6B8CCFE61E75288352 766D16125FA544576DD1C03ED3462538582F6370B74027E92237ACD86F768A92 502890AE63526FB4B95626B1146DB4786E9FF6F565395CBBEC82D7C7ADA9635D EC78141707CA6B94781B3A41FD227FDED6AD3BBA1681E9A80F1CC7E1EB1B78E8 6FE122D9FB24A029D168FA99D7A046AB2C91219EB1596E6395B8EA9736CE4C9C FF4DC1E4E735E3B68DE59765CE11DE38B71AFD13927AA645AFEA1CBE098407EE E00569AB2D86F7709B1471AA16B6AD8381EB4EC6EFD908F084D7F90813564369 828F4C3933AD2B581D06AFA5A3250328D825DB89B80002AAB371BFA0628CA722 4977A1C5ABD0A7F84A2870608A3A85A3BEE058C92BC5DBD710134A64AA9BFBC9 FC61330B37B454E945174F9C6C1AA24035EC01B9A34B0AD0B818F19EC7D78F63 04F793760114A5CDCB25BF16BD4934D0500B3F3A2CBEB489E0F9A7C5289D1ED4 EC596D363F51C0345536AB32C6419E1B4B11C69849BFD2BC0F6EC4BB5AC9AAC5 4DC822100AD7C024E1F302623291072579014899D5955327106A1BBB2E03DED8 7E46AC4AB17052375FA7F291D86367C1E1C3DA43DDB82C03746F80B8AB988859 BA0B911106546CBBC890420E508F2CB81AC9B397C1FE8CE48B809EE6D00F3770 0A77967AFA463E2FDE687D8F63EBEB62D1AFD851A57102D50DB09769F7BB6C8A BA222354E81B4B61EEBAE82BCAD0AC2AA88E317DA0EEF56A282D3DE538065464 B1FCF965C40BC740F7D42D68DFA9CC72CDDB77851AB8627A10980BAA82D74C52 EE3F30CFC69103FBDBF57AF5AD177948A6274C032F3D99E1584D3E32FE2C2A43 0AD2C7FD5C2FD15FF95FCBB1BDEFE09B5741AA2BD2D68940BF9280A1C3F22905 7E70D5876E90DD8AE6B7CA0339562E9DC15258ECFDDFF940C96201330003CA5E 14234C29BEA2D25A4FC3FA4E8945C774FB9622FA6B8B8FBB0871CFB9773D22AE 40A3B664DFBFF9929C65406CBEC1A4BE32547506F0EF9A78945A2496028F4C89 042EBD803A5DE2426270A88AD1FA6F5878BCD2658B2666145244BD52A9718B9A 669D7FF8766B566ACD50DC6E7902D3721755BAE3C339C1C56EBE122188F50BEA C6709AA9E238BC44FCD3EF695F8C928D511360BE5FA47389B0ADB2DF3CFF0FB1 AC8E330902EE94720EFAEE44F4A86C05F7532134A7969B76914A0D0C93519EDF AB78ADAD4801025C24F5207368BD51F625D29499A5C0C35F4C27D4409C2B4BA2 03562D24B7CC0673A418CF7154223DC0A33C14BA727F933A3EB79D733989B8D9 1650C80D000AD687C621815F97BF3936F345299E091780E2FC41A5A9C7B3BACD FB387668552FBFB4349969DF50C872E6B68A9225D93C9E99CD45651A4F7F14DD 39325CCC810E9BD130304EC955B77487D6E8421FA595A435E5FF94EB5E554091 0C6FB1BD43595956B60EC0C2BADF5B6E6F6894F9FA424F27E360DF3734432A26 4D5F0313AB2083260FBD3089BEEE2954958E26ED66E6CD33851547387DD4EAAC F67A799276E7553582F026888A211905D8819A4DCB94E790ECEEEB9DBC59D7F3 26E2EE2D807141EC6985F9A06A4502C17DDF5C30BF6C041AE06A65DEB90A10D0 8DB0E7F103F6CB2AE59DC15D90E07A5B8AFAF0D0616F0356384F98DF14277E2B E5346527AAEC53241A8A75DE57462B07AB66B65A5E32C50802A63787DEDC11EB 7B4DB77C1D7D2B8A8E243673ADC49015FEF371165F7B80C3ECFA27197C348461 B557238658C982CB79316444473BD0D6A6377A857927E95C5ED1BBF15F10E01D BDC81168FBBC0CB0B80FC52F96A0749DE0032A98CFFD07FC87F7FF502ABBF42E D8E17B15E8176E1C1A412A09A090AB1978EEF87AD1D642664366BF66F2E27549 9C57A4FAD1706288C15D5064FBC41571CD40A917DAAE9D6E702E06CD571E3F35 DCBFAA38766144B6AEA738E1957364B377B279E3EC3D86B45192F269A9808053 5A1D9030881CAB3E49BFC8D0B2B9E32B6E34E8575F99AD1CEFC95CBBA1F84765 70D1FF704C5538C695137C33D8D8D12958E1EDF33C06BA29AEC48103A58A40DE 3282DA394CF50564DEBEF65950C8F48EE15E6CF09E67BCE1F3E4B6802DFD6A49 6E7C232522A1282EBD18DD435828DBF49CB47C3CBA4C22EFC4C0C6C32571FFC4 5FE7C8350E3F662A9E9B9722B0B12C95D744B8AF9003B4E0AE2985E3FB5A1EC8 C6FFA30AB743F3014B00A2B118324D4DA7550142415CA907900C7BE231B25820 CB77BA35FFDDE59F5C471AEDE6DA6C593281717B13C16E2BD13F9F1DA578542D 3FDF611C8EAB1CBC05F4FF1F9960782E20400EC6663A3CE1E3AC0A662377E1B0 2B8C9C1B700ADD1DBD47FA92E352A2E05007BC3A63799C9999BE7E1329A04589 B987AEF12A7F2E492790C562B525DFD71677A9EA4A1073C0645ECA99666FEAF6 A616B09B5851724023EB1E2C57151BD76583584A9409CF908D79EA0715484DC8 4C36760E6DF51498895FF7722F7A4BD5F421D392FD718F7F022C57D1887CF71B F3B4DB96B39F36EA45B4E9B975C564AEADBD4E176F1C09A9E30400BFF45D8B54 CD17CB0AD6C964DDD88426214A5E9FFFC9D4370EABE92782F965BDDD29543C1C 5FC76D0F1FC58CE31A8986D9969B7D1956399363BA0EA21D49D96B95E9139755 5A191B888AC0691664336BE30AD1B5B36F9FAD51D87A58C66FDD1D4ABAB456F7 501B473F2513FAE32A6EB3470FB376465DB51E07EA3CFD5F5D3B626543F77250 0055C749BBEA77F01C25109468FFC084674727E1AD1EEE6CC954711604CE18F3 A767AE2D7C7D3B22D3923546EB534403EC5503FD776574152C8A9C94A6963B4C 33642551840F72127F0F95A35231F503B82DF052B6842A1B323C836E4044751B 054ADE6FB2B12D97361F8EF61658557B918961B1F280CF219EADA7209C8D53DE 08CDF5F9346B94213E2C252BB8FB6E41C0EBC6A8CC2A655F6664FD726DD1AEF1 F35F945B6D0E6D12F5BFAB56401C3A3EF5B517DF4EDFE9809F3A7653608CE9E3 86E64A6CA2E3F3EF443566D9E3ECF57788263974C4D30C293D30FEF892D04BC2 7BA499429EE65BF324A27FD7296EF15672722DE881E3F28C162769E1933B26D0 03C0C68B0822C4A0716524ED716C3F472B4FE1D68872692F213E12E5A814A86B 0786E84D7E67D8656177BFB9DE038BEB0144C06893D1E9073799C3ABC971B114 A6AACEFB49B803B8404BCDBEA4C311F33791FE7A9A68BCF89042F03CDA3D9308 53AF26137B38FB577270BE2F4A28AC519C543F0D411F759E3B77F4BE00FA8942 F63B58BD1B772517CC35D9254BF91F961F34B3EDBB4AF399A28EB73B79FF6855 9D1315D665A4288DADD3CBB1BBBAB67297AE13DD7B33A038EBA4A02821E850EF E0445C7E9B1B5B7FDE3638AEEE84B2D78DB60F024BE8A1549B4C498D5352AA29 F18C5E5F534FBE75AB4D8A52A8C480D5806D195F59F8EE71638EE18EE43EDECD A01799B385A25324751A862E0B8F88349DD81A62F36C54705535D2175373A1A5 C36303A3EDBD9634FBE0F904E483CD829FF81624A81D83AADD93D0D74AFC5938 07FA18BAA73C8365BD5903AF760DCFDD230B86A0F5E54E1DA5C8F675BB8E08EE C4AC868DFC68C6D869CF64AC576989E764CAC35D8D4DA97B2B939783BFC90B77 0BDD64D37BB40B8F0038E79AD70884D723FC305217DC2F9F1A3122CD8F8365D1 A71B693F7638B0A18DA5F1F4973C6E721B3DFDB35A3B98126E51FA1B857B36C7 437F1870A82698E16A756B17092D34F9F765AF66F033FC2F310DDEF2AC20F63E B263C169420ED4021D8FD60E4D7F1E4D3856C73475977B8392F34D7AFF85A29C 071F972E3769880C8D837ED9801E7A03274CF3299DCDBD5D5C41BA6517DFD887 607B9FD77A8C7BF5804C8B20EE80A5168D5F6EF14C119AFDA9560DDA451C83A1 0CCE9CEA04875B4455AFC759E4FED5F09DA14BB26DF9C8390EE02D32EBD460C5 0C19267575149D475E45DA7A7D08AD7544FC526515DDE9E1BE34170FDDFB8FFA EBC922AA6360B0395BC79CA28E4F70E5C1236CBC51228A5EA30B7E70C8A262D2 50304BD72F4834B7739E41215CDCF3112910C5D32B2E54A0CC1C314D02FF0A91 7C41 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMR6 %!PS-AdobeFont-1.1: CMR6 1.0 %%CreationDate: 1991 Aug 20 16:39:02 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMR6) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMR6 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put readonly def /FontBBox{-20 -250 1193 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF4E9D2405B169CD5365D6ECED5D768D66D6C 68618B8C482B341F8CA38E9BB9BAFCFAAD9C2F3FD033B62690986ED43D9C9361 3645B82392D5CAE11A7CB49D7E2E82DCD485CBA17D1AFFF95F4224CF7ECEE45C BFB7C8C77C22A01C345078D28D3ECBF804CDC2FE5025FA0D05CCC5EFC0C4F87E CBED13DDDF8F34E404F471C6DD2E43331D73E89BBC71E7BF889F6293793FEF5A C9DD3792F032E37A364C70914843F7AA314413D022AE3238730B420A7E9D0CF5 D0E24F501451F9CDECE10AF7E14FF15C4F12F3FCA47DD9CD3C7AEA8D1551017D 23131C09ED104C052054520268A4FA3C6338BA6CF14C3DE3BAF2EA35296EE3D8 D6496277E11DFF6076FE64C8A8C3419FA774473D63223FFA41CBAE609C3D976B 93DFB4079ADC7C4EF07303F93808DDA9F651F61BCCF79555059A44CBAF84A711 6D98083CEF58230D54AD486C74C4A257FC703ACF918219D0A597A5F680B606E4 EF94ADF8BF91A5096A806DB64EC96636A98397D22A74932EB7346A9C4B5EE953 CB3C80AA634BFC28AA938C704BDA8DC4D13551CCFE2B2784BE8BF54502EBA9AF D49B79237B9C56310550BC30E9108BB06EAC755D6AA4E688EFE2A0AAB17F20FE 00CD0BFF1B9CB6BDA0FA3A29A3117388B6686657A150CE6421FD5D420F4F7FB5 B0DAA1BA19D638676E9CF159AC7325EF17B9F74E082BEF75E10A31C7011C0FFA 99B797CE549B5C45238DD0FADD6B99D233AC69282DF0D91EA2DBD08CE0083904 A6D968D5AE3BD159D01BDFF42D16111BC0A517C66B43972080D9DD4F3B9AE7FB 11B035CE715C1218B2D779761D8D7E9DEBE277531BD58F313EBD27E33BEF9DC5 50C7821A8BBC3B9FDF899D7EAA0B94493B97AFEAC503EB5ED7A7AB65763C9AE4 247C7C9B60444EFCA66AB8108DF1085D4F3D758CA672BC89FD250646AAFC9585 F7F8583BEF48D4A97DCEAABEB778FDFC2AC2789D6F4E1F0D9237205E6210893F 77BC26458D7CB35B844DE20BC63A4057AEDA379413FB28B0C7C9A6AAC97CEB9D 5F712AB7B25A697092C13D2042A04EA2FB36836E425CB0C97A9F30C6758FD6F5 2E4A4AFF054C14C7507A4D90B76F44A240D2709F26814A24C405E59D5109948F 87A93BFABAFE124C8D58EDF774E060F599847C6DA5E7349BBD7791505BA1AA41 C047310EA5BE5555F3908ABC457CF1E806AE9D9ED134577DA0DCAA9120B78425 571445724A1D1BE4A49A88C16F64C3E73CE74A68CE9F413A0186BF50527E3378 0BDC374D6775CD1A60D04A2936D6613D91023157B2277815D3EACBBD0F5EC49B E10FB601A820B3FBFDE967AE881960711947F2C00DF744E3E29D6783D3F10D95 0140A5B62877AA3A89794368D3866910416393C21EB2C8D48B36A3175ED20B45 D3389ECE0C0FD0B8219CC0FE131BCADF70835C1ECF5B3B45D25467E7035313D4 C9D0DA39FEA7F03DD763174C09C7F59D83D484E2C8151DB2A52428C5EA3A0649 0AE2EA79DE5DC679CF978D3CE71BA42811E1DE2CADA4FD5618AAB6CA11976402 57E5F5448863B4B0123088E8B1C22853F731BA9797197646BD327D873769B322 12479C1813D4DEB5E9C7CFE623DC65CC67B381FD18537BD381FE8B0EB0E65407 D5C970329E61082E5BC69152FE91373CB3773CA987742586E6ED8C3016011724 E3F5266ACC8161A3E60B779F99C62483289B8F5FAB2A65A2B3EF026F50521673 94318A800C2BADBE0F256E2B0B1BB9DB90996B17E39E5732E0932EE71D94AFA8 6B5B0E5DAE5C3DE55A167544D9F315D7DA1DC1DE8B00E14164B941F136801A81 FDBA98CA9A96D881EE331F4B45FF396B54F7BDF32408863D62F39AABCB5B45DB 5E1DFB417F9E79FA062244A32D847C0CE041A90A2375EA44ED95DFD6A7BE97A4 69A896ACA2A8F3D1838AE43D8315F69DEA5DDFDB068CC94C5B0890474075E320 A5EDFB9E13A5060BD2C45E855F51AEF305CA576A9785324F98652B3340F6858C 0F936ACC0280DEBC3084E0C3BEA789D8E5CF803B7D72F2E96AD34D947DAFC787 806C936F0F9D8C7C8C7AF64EBB2EBAD3A97DC56A775279BFE6FE5A17209A5DD7 BA86A542B70E8AEEE4B53A6A7E389709156BD02E37648648A8C4E6366A255D4F B6D33EB9F9A7E96AEFA0E9D68268C3A5A1A8CEC390A244C6CB8F210C608F51B7 769E52259B2EFBF44DE98ADA262AF6132C829EDE71A0883DC517E14DC0312DB3 10899A3662CF88797DDE4BE8507121E7EE58D83F0761A5F9DFE463637560BE31 EA5E7931C115F68D02EE832D60E974D5BB0F6025D28E24058E8E3CEE6B45156A 70B4C4DB0C784FC5D4A95959C6C71C70233E3EBB696E813A2AE3CB830D6D07E7 4F790752D9E8E6BD41C3EA514EFF1ACA2DD8D1111BE62A17120A17A6BF806CF0 9E8FB18AFCBDBF67B15E4D45A3688E4A7108380FD6DE1491CDB932A7A8CBA1C1 5D6770AEE80289FA6D6CF1460C151A78147DCFE4CF3A0A91BD7A14091CC7BA0B 87F2E10493A80BC1E88A382088CA09806D7324018B3C832C984EAF9E801F08CB AB1EABE8C8A6E14AF293B5776C6F75CCDEA7F25431F902A1247CF5A101BAA8AF 846E3366916058BAD88CD99BE5369D7CC3B143CDC0E48DA50BD130997962726E D50E227D7171951F42C60857453E6574084AFFC583423E902277E7F86BCC93F1 C7A8CE812F89B20FD8FBED0005CFE83303F61369405336DF24CC15745443E9AD 8B49D9F29798735EF6892E925C254400DA606C657DE585088692AE468276EEC8 4544F24C068ADBAE403BB7D643AB02A289097A3E87E464D1E2DB0E7885AE8B96 44C08FC42CE55281BC901F194CF9FC24755DB18845D8FFE03F70A9A2D70C7097 46B7CA5F10AAE098D99DFEC165BA71A67381F49D1F09BEEC2C0348A290C1A1EF 298DC0B5931CEE66FB6836A3C7A09CA9D8B37043478539426F6269EAC21F5ED5 59AB35645218CDE8946FD4468608D539E0AFC73812F41C6AB6508CB796B50483 C268C21A06BEA059D0EE331836A4E242E243439655BF0EEB18E45E6DF5BD75D3 1B5E0112FF2080818146214D1A6F1CF66F2FAB4A4A975B99FE8282FB68D29032 EA0AA51B2810F66E0B72A8CEC2BAA30E88AEA22B3A30EE6C66A1DF6D4273EF6A FA09EC1CA0BE4E156FBCD5029381E0368F42E466B2078639AE281C31F5B282C6 6F621D7371113AB536EFC242766FAC1696D1902CC514E8444D8C977FEB555E82 32C08EEEAF5A8820E4341EDCC6E98AB6F0EF6EA5F5CD18D1922FDD273C61ABA3 1AE8EF4874E27B005CE5D1308EC915999E62E44638BCB9116A69A202392A8463 CD263C86FDB8E0CF5E25CAFFD32F824F9475232A27862EF2723B84B35DBB0627 37579F19E21253433D9BF96F4C8841331190F924CBAE983DFB9728B0B896879B DE3A0D087C1E16BC93D734757D327C31866CE6B15DD65EC169CC2B9C5FEEE538 E5B8FFBFD3BE85FDFDFCA09EA4491C539D79D4F7 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMR8 %!PS-AdobeFont-1.1: CMR8 1.0 %%CreationDate: 1991 Aug 20 16:39:40 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMR8) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMR8 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put readonly def /FontBBox{-36 -250 1070 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF4E9D2405B169CD5365D6ECED5D768D66D6C 68618B8C482B341F8CA38E9BB9BAFCFAAD9C2F3FD033B62690986ED43D9C9361 3645B82392D5CAE11A7CB49D7E2E82DCD485CBA1772CE422BB1D7283AD675B65 48A7EA0069A883EC1DAA3E1F9ECE7586D6CF0A128CD557C7E5D7AA3EA97EBAD3 9619D1BFCF4A6D64768741EDEA0A5B0EFBBF347CDCBE2E03D756967A16B613DB 0FC45FA2A3312E0C46A5FD0466AB097C58FFEEC40601B8395E52775D0AFCD7DB 8AB317333110531E5C44A4CB4B5ACD571A1A60960B15E450948A5EEA14DD330F EA209265DB8E1A1FC80DCD3860323FD26C113B041A88C88A21655878680A4466 FA10403D24BB97152A49B842C180E4D258C9D48F21D057782D90623116830BA3 9902B3C5F2F2DD01433B0D7099C07DBDE268D0FFED5169BCD03D48B2F058AD62 D8678C626DC7A3F352152C99BA963EF95F8AD11DB8B0D351210A17E4C2C55AD8 9EB64172935D3C20A398F3EEEEC31551966A7438EF3FEE422C6D4E05337620D5 ACC7B52BED984BFAAD36EF9D20748B05D07BE4414A63975125D272FAD83F76E6 10FFF8363014BE526D580873C5A42B70FA911EC7B86905F13AFE55EB0273F582 83158793B8CC296B8DE1DCCF1250FD57CB0E035C7EDA3B0092ED940D37A05493 2EC54E09B984FCA4AB7D2EA182BCF1263AA244B07EC0EA901C077A059F709F30 4384CB5FA748F2054FAD9A7A43D4EA427918BD414F766531136B60C3477C6632 BEFE3897B58C19276A301926C2AEF2756B367319772C9B201C49B4D935A8267B 041D6F1783B6AEA4DAC4F5B3507D7032AA640AAB12E343A4E9BDCF419C04A721 3888B25AF4E293AACED9A6BDC78E61DA1C424C6503CC1885F762BADD50DF87A0 8B6D1BA96CC415579DE9A26786F37C6F37C57E20559A06FBDDAFB7A0421BE3AF ED947D999B9B6FCFD973D2A9CFFB2C38CD1F4E83DE081288F2654E747F7E7BE5 32FF02074DF581DB456CDD381D834EF577A7DC94FFABD35019230545DF24B281 3C57DEDE6580DFF703681287F31B62A0DF73E0E08FE3E8D4F3BC58079DF85A2F 9D66DE46A9D3F8F289BD78DD2641DF574F334704A570AC88AD21D0EDA5E7660A 5ADE393595DB983117680F836913355EAA0C6DBBCED2E7568D988462897B5EB7 F32692135C9C6ED6E12978DDA350CB2BA64D433A908E31F61761C23FBF18DD4F 987FA6D14562DDEC9623FE5480255072140CE23ADDC4B0E867FA871B5D1CD62A 0E07EB4CD8067A363DB0FA8E9A81109A1028A076A716CCEA3D569950E2683B60 D0E5EB6611DA3DD784FE69E0425E19731170E3FD55E533C4D2D9932B5CC8CE59 D641D0116334D2360F7A8681ADBB014895484FBDFD0B903E8E15DCB4C5207445 1AD1EAFF6D9EF7EFD9883B95A6F179D2B108CCA419643B31658A4755658130FE E210089026EBFE95DF621F381F7236E4A407F8298A30E5F704C28A1C5F64FBC3 9991B88BD657CEAD53805B2F61F2264DC7159572C18E5537886145C22574845A 9A616E5D4C2613192073481EA165A94547E0337ECCEE93ADE0281A913C9B4045 C3165427682AE888369D9BE2A3333232979FFEE701652CCD553F4909D0A99FC9 C8C851B968ADBB92C8550384651FC30FD010251D792EBDF4BCE1E241AFEC6B67 EDC73EECA0BA12DD9A22937CACC49F0AFD3CD7E032C248F70D2D338CBC6E9B34 205C184BD86663C972E37D05E01137415E2F27F6AA774530FC05BB3DF616356D 4FD973B3B74C43EBD83CAB8E9F72D1B4278350321E4894587B458BA313029168 AC14916C7FD5788B4E4A675081CE6046FEA28ABAEFD1ED7C577BB74F66F5D8B2 2F355DECC234EAB0DF594B9C9F11A3415DC4734AD1E43CB4311C8DB693F2ED84 C70A36215C4D146DD4DF54FAF65B8C7A2B93498B4A74BE58B40778E4A881DC38 D180B995251CC4A6AC8C8979536212CF023C0EDA692DDCE760D3A12C108DD40B C86B0A2300B94E7AD95EDEEA99AE3A034F21E98FBFA2E31621DB4BEF674CBFD3 1FD6102118844C29CC3CEDB9A9BB2EA82C5E43934A7BBE72980855283C3BC562 11292ECCF4805B390FC29C3A5C526D4B365ABAECCD0B5D4C71CEB495B455D836 548AFE038A03F581390C5AD5814F6E395DAADB1B3E93BE91AEC7BF5CF9220A6A 91DFED453CB4013B89FBE39B32CEBFD4BF498FDB985407ABBBA84784FBA1EB9A 41B2B9BEE5865DDC8658920D96B6C26BAD0A67EC5940E5C34A8069B1D31695B2 C7095A0A5C74A733436960E3F1FFEF9DAB3CFB3842F2BAC3D6498652D3CCD618 8336FDED8A3FBC5A0B01013E06B10208B94A5BACD939B91A9251F8EFCD0A8970 A7D19A1CED133D1BD4A1FCC33C0F11E3490B80DDF8471C1BD826ADC77894C7D4 E69C826EAB793FA6C1C942E9FBFC416213A66B19710A34239758CF055D6C4E31 39DF5B49A21E433E299C6E03E290CB2BAD44EED9D20C0B39FF6B0AC8D1D8D1CC 28196F69A3FE2FD78A65DE47387363CB633080D42048FA14A7656346EE34F2B7 2472024F755C094803EF3AF917C4D7FC183DFCF1D72D73B3BE40BBDF63E621D5 A07A6FF8C5C5D1946FB877AF00585D260CEC0051128927F0E68FF4743BC2C5C4 7AE2F82CD28D717E155CD2B8E0880948EAE84A3CCE30B01E8445834FB3D3062E CD7E6E84C549D73F735141DCB74390924EF902F7E053BB7C65403B96B8AAA827 EA8FFF133A6310BEF8EB5EC3F9FF7881EE687E31DCB5749B41933BBE086AADF0 877FAA01AA390061EFD315FCC44BC099FB6EBAA35070B8A12BD44D474201D42F C0C0577AC0AE21FD682D9A50C71CBB8FD5DEFE37AA50B2D69EAB483FE64A7BE1 920137441DB5EBCBE4D0A1CB7E06B31282DB73B910E33C628EC5267DCE7723FA 456259380153BA68A817869C62F315C4BC750E47060A0B467455E72C872003E0 F6E0C3B4023424950373514C099D10AB46348B0CFA3488269451B10CD072DD61 1FABCA5E1A488917BA120F9207EF7F3E07B473497A77A0043925E52BA328C105 6D8B680EB5C7648602BE3CAD4C37CD9617C17EA141AAD191CA7FCD23A6C473DE 9F4B38D38E0909B3C5969F6B592E35ECC11618FE0B8A7DBC2D8A5585DCF4FD20 47D07DBF38A9439CBD3D17FDE6AACE7C5D2F9F2881D6E3E92A4B51A42E4415AF E3784D7921DEEAF969C8BBDD1EB0499A88CEC7C4B098C32FB32C8B95E9C3FF6D 97269D5D5A1098AC4AB4EAB8839C3AB34521A16203C42550373AEC6A18AFA4F7 E15946EFB23467C26C09ECFC832B1084414F20D39123FA77522CCEAA63F11857 D69CEEB988FE3B57C2A5781FC235F72248C628631D9C9BE8AE25E34FE8E3629D 82EFEAF0BEAF6F237052E4B398AD1AA82CA20EBDC707C3DCBA9450AD2D8BF9BA 220B4EC5CCA2E03F82555CA52232656942752E80C50A2A339AEAE834010819AD E40B332499E2E32DB9EDB7C65964CB10532FCFC0BDF028B0 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMR10 %!PS-AdobeFont-1.1: CMR10 1.00B %%CreationDate: 1992 Feb 19 19:54:52 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.00B) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMR10) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMR10 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 6 /Sigma put dup 11 /ff put dup 12 /fi put dup 13 /fl put dup 34 /quotedblright put dup 35 /numbersign put dup 39 /quoteright put dup 40 /parenleft put dup 41 /parenright put dup 42 /asterisk put dup 43 /plus put dup 44 /comma put dup 45 /hyphen put dup 46 /period put dup 47 /slash put dup 48 /zero put dup 49 /one put dup 50 /two put dup 51 /three put dup 52 /four put dup 53 /five put dup 54 /six put dup 55 /seven put dup 56 /eight put dup 57 /nine put dup 58 /colon put dup 59 /semicolon put dup 61 /equal put dup 65 /A put dup 66 /B put dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put dup 71 /G put dup 72 /H put dup 73 /I put dup 74 /J put dup 75 /K put dup 76 /L put dup 77 /M put dup 78 /N put dup 79 /O put dup 80 /P put dup 81 /Q put dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put dup 86 /V put dup 87 /W put dup 88 /X put dup 89 /Y put dup 90 /Z put dup 91 /bracketleft put dup 92 /quotedblleft put dup 93 /bracketright put dup 96 /quoteleft put dup 97 /a put dup 98 /b put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 104 /h put dup 105 /i put dup 106 /j put dup 107 /k put dup 108 /l put dup 109 /m put dup 110 /n put dup 111 /o put dup 112 /p put dup 113 /q put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put dup 118 /v put dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put dup 123 /endash put readonly def /FontBBox{-251 -250 1009 969}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF7158F1163BC1F3352E22A1452E73FECA8A4 87100FB1FFC4C8AF409B2067537220E605DA0852CA49839E1386AF9D7A1A455F D1F017CE45884D76EF2CB9BC5821FD25365DDEA6E45F332B5F68A44AD8A530F0 92A36FAC8D27F9087AFEEA2096F839A2BC4B937F24E080EF7C0F9374A18D565C 295A05210DB96A23175AC59A9BD0147A310EF49C551A417E0A22703F94FF7B75 409A5D417DA6730A69E310FA6A4229FC7E4F620B0FC4C63C50E99E179EB51E4C 4BC45217722F1E8E40F1E1428E792EAFE05C5A50D38C52114DFCD24D54027CBF 2512DD116F0463DE4052A7AD53B641A27E81E481947884CE35661B49153FA19E 0A2A860C7B61558671303DE6AE06A80E4E450E17067676E6BBB42A9A24ACBC3E B0CA7B7A3BFEA84FED39CCFB6D545BB2BCC49E5E16976407AB9D94556CD4F008 24EF579B6800B6DC3AAF840B3FC6822872368E3B4274DD06CA36AF8F6346C11B 43C772CC242F3B212C4BD7018D71A1A74C9A94ED0093A5FB6557F4E0751047AF D72098ECA301B8AE68110F983796E581F106144951DF5B750432A230FDA3B575 5A38B5E7972AABC12306A01A99FCF8189D71B8DBF49550BAEA9CF1B97CBFC7CC 96498ECC938B1A1710B670657DE923A659DB8757147B140A48067328E7E3F9C3 7D1888B284904301450CE0BC15EEEA00E48CCD6388F3FC3BEFD8D9C400015B65 0F2F536D035626B1FF0A69D732C7A1836D635C30C06BED4327737029E5BA5830 B9E88A4024C3326AD2F34F47B54739B48825AD6699F7D117EA4C4AEC4440BF6D AA0099DEFD326235965C63647921828BF269ECC87A2B1C8CAD6C78B6E561B007 97BE2BC7CA32B4534075F6491BE959D1F635463E71679E527F4F456F774B2AF8 FEF3D8C63B2F8B99FE0F73BA44B3CF15A613471EA3C7A1CD783D3EB41F4ACEE5 20759B6A4C4466E2D80EF7C7866BAD06E5DF0434D2C607FC82C9EBD4D8902EE4 0A7617C3AEACCB7CCE00319D0677AA6DB7E0250B51908F90A32C4175B6BFB279 024EAE1B04D721A6C54FD62484F8949B2CE9B87D9CEE04DFF82BF14931B3CA03 AA5F701B6F80BBCDF2C427C40A51597F0B1BFA25EDD7CE0EAF2EC676BF0059B7 15DD5462BA30DE78A08DD533DC0E8D85F9DAFC5FD842F417265427E0F1B71834 D2BF6EFAC3CCC40D3EF3B2E2080F148441BA45E5D0C0F7D8416730AF4BE4FC93 1E965594E0364F0D4F1EC48004CEBDDAFB1F0EE0A8222358EAC0F62E6BFA3C9F 46875EB4C999219B91E6147A49A668505667030CDF3495682B79C0F614AAEE68 D976EFCDCB04127C0D7325A2211E49CD316935A0B472D1F9FFC68F7FBEBC7582 036CB393016193A9783DD08D647E89C5BA4EFC8701BCCB6A5C027C4FA8644C06 251B9E33FFEBB1B84AC4D4B148205C8CA7AF5EF97F2788EFB0B71473AD0F5EB4 FC43F46602C53E53F5A6D1E445439F65967C51EAB636178FA377DB2DDE5FEF41 9E91F3BEBCFBD3B4EBB510A0DF7F4D19C6552BC98F10E25612B1396020D7836F A3D3C865661DB276E428F09F048A916B4F07D8FD01AC1942A3CA342B0E531BF2 3E9A7AF265ACE1585E331A8F8B5AE06FF085C1F349215581FC7D68D16395D934 79B3BD866A4BF33913DEB54B4D00ED5EFF3313FBD1C5E6AC430567872BD935F2 AF1A3F892266CEAC15DEF43BCB83DB075A69EAD7D2EA53303F65E04C5673411D EDFB32156120099AC210E623BADDF2991F96813AFBC1126DF53A0A776AF7D61A 6B42225A5AAC1B0AFDDB59C5922143A156EED76E3E1ED01ECC6ED9F61B5711DC C5709EF5A3CED45628AC1728DFE98F07389777E04A7E407E3B007017C96F6EE7 D0FCC0426C4D734A0B108FD2BEB48FFB6D107C5AB8EBC3584F13A40E5508AD9D 5A081286BFCE1D5DAF5F3D86515244B4D0D77AF9820A93B7C90420FF4B8D634C 2C5697E3E04802E2CD7B9AB8368F1A9DC214CF893AFC7BA3A8DC24B0E549E106 F04530708DD03367D8501655B1501645F17E547C1C948DEA7A4814F22E2A1D55 5AF16C55D6BB4EE9929C9CEE82C4D6A16D70D17AFBBC6551BB2C9D8B69F5DAE2 5DF6C2187ACEB119EE6087BAF5E79340D115C53E710CC7E4C771F43ADE91D2E1 1AEF7BB2E6C9ABED010A835514EEB6C9A2C93396FA12E8CD0B2F9045587E51D2 7E059224A889B3E7620E57AB6CACFD769898B3955661824DD4AFCB9A7B218DF9 F84B369F52E246723C7D0B26C145DA0AF81C83DAA091143FBBBD2A414114E34C 7C994D8401119D5A46856367EE3FBBBB32155A00E5314641288A6E1FCE47F385 71C2F7A7602F777C11884DEFEFA560333458DD227E63F3FE68A762CAB0A7308A FC59C9B293233DFA257E5D10BD5F12B7652C45C4C50F14F17E59A758BD7355EE A89B1EC954B86642B87F814F936577429C467BC8E97A481ED15E2633AD73830B DDE3DE50E1EC439204C7FE350C69494823725920719D613907163AD63FE83C6D 1CB6DA81A6C816C747542CA09493306B44A7DCE934B877DD356F38D07D3E0355 CB149BE6ED646E84DB9DB61859F6FC45BCE13EE42458D1991AC34AAE6EA72320 3EC6346C38CA87107618ED3C46C331623D8FAACC6CF9292B8F1B407806A0D480 8F51A5BC97F6B592F50E6FB89017FEA216E7D909A6ED818080A5F897162ADB21 457E5604917EA038B54B01A9A8BD87EAC0EE1359CF393C09BD9266EA6F30059F 566A33A1109559393953DF8DE857F8F4A83C2C8211628B531B9F365178D26D2A 75E9E134DDEA354BD94133154F81AF8A474814039BD531A56B914AA3A4CBF23E 9879A8DCB614E04303B838476C233AAC81D0F6920BCA07749A0CD30CCE9CF81A C7F3F62D5D49E2EADF96D35390489EE719952AF58BA5AE78DA996469677D5A56 4531F0A159BFF37338BE0E64CB3B10C4A088E63694EF84F55C7537CB32CE0BE5 ABE9B751EC06B5AC8F7227F2615D9DFCA22395312C1EDCFF460485E54995E011 6F38F33C1986BB7BA8BD7C48937548974C7E964A7E0921DF781BCA556E811C7A 7B56BF6085CEBC73931E073F182F723717F2FBF8708AAA1E4B69152011CA7DEC A52535FEC2AE2B5AF921D0BD28983FDA9533444930217766190C415F7C2C2AAF 77868472818BEFEDC5B30D2852811DC0FC50BDFED4A09777E7586556F0A67E64 E6B2A18B1184E62A5E05167AF397C0B584104F83C7B4E9C409471110702D5FCD C1B988038FB3E7CF8EFF153608C3DBE81A286B9434F67ABA7D4690C5C41ED1DE B7E71F32E454961C88B7E1757CD736DF756F9155175CCB580A6634086775A3CE 61EFC38F37580B71C1C83E17F65A5F8974A0CF9ED9F76F818CA31E56891BD78D 5EC25A9F50C7725EFC1D5ACEAB6CD909943874ECC4DFD117A7C6F5B07FF1838D 94A543F432F843DD5B6C77EE053BE73F2E3334BA6B615BD478C892CAC1B80B7C 61695E2087167D41E834D808C12AF04D2750FB63EA896968C36372E729DC2D13 C62F0D44272AACD7BFA3C6D55B53118C799C40EDB77C49F0E0A2582206825D6C F556B8AD57DFEB441E93A756A4E4468A82F79A0A4FEB4A4F13108BC14CDAF4D3 DF5ADFF5877C7003EA8177962C3B4AE4A1B0AFBEA66D58AB823D13E90CBF8678 AB14DB59C7865BD555B4556C6F35C1E30A6F017DAC032A33E8E24C62BAA9444C 1FCCCD5C74AAAF1175C3158FA587E337CFA68B64C543A381DE9057E12B88F70E 2E1540CDA8FADAEBCDFB0EADDFB0B5CCA3BC23F755D15A1DAB24294E4D12270C 103C4C81EBAF282E55CBF75C63C552AFE07E2626F93C373BD4CC30B4A9D87726 271B24C15ED67355AAED8CC4B31479F17F8C596FADB61C6FC2A207C182C75E24 F9BAF5020743D564A99EF034F26AF3C259DCE54E5486B8304A168D3F9141A659 A5007BAE3F23B5BC4CD33E34252E8E97A0CE1D44B6DEDF2910232A81C6616006 BE3FEEF19F51F6506C6085025863BB06C27E59EA233445CF86CAF222DA8E6676 21FCC932E74827CC46556DF30B6E8D81C0183094BFCE91E2AA4888A57365485F 85AC3784B90738CAEE60A56C8F437BEB21F92F3C6DCB2BDD2B376E618D79C3AA F72240DFB9D090D5BDF5FC43ECDBD222BAA6510FD744834358E30F3D08AE7060 3D96C5595851E4A3AAA29F80B2CEA228DC2CF87F3A5403F783601FB2A26CA34E C2B6DBC569A723D332F0B6D2065884472C29372A50006DF98ED6D0C4D5BB5341 6CB8C3C00C9ED760EF48D210AB36C452337FB3ACF6E2178EB05127B6C7F77E7A AB4795302DD4A6E07717000F056F1500CBF5157CF3FADAB24A5BC70B94294097 EFB71CBC3F942AF4AFBE6D6C06D0915B1E6D19A4FA3970BC922AAE8FCCC05169 5D8CF56964579CCB70B7C66F5DBD3D39AD4649CC2AE4B7F7FC32C42C0B078DB3 D85CD57E39F0DA41A932C1E5D5CE703CAF9C9E4BFFE00230CB8E7E0C29481950 15B1A14E1A6F1713B9362A87B40ADA4BAA80E962FBD14CDD464D1C02E9679F90 4132E8AB70002996C2E31A25345F1A700244ECAEB6CF9DB48D35785E693AB659 991C810CD2535D173EEF08D7EE6A6BC4DEDBD1E9C7B470AC4F293E45769645AF 6963115F1A6B24D5E38163FEC115991DC9CC8669C80129323ED5A3BD87552216 A3741FE0833C47F0E2B98C8749BB314751328F1DF29FF13227D392564F25BA89 CCF7CD0E29125D0F9BE0384AE39CA3004D2AC9152854E78F820503959F48340B 183FE4B499407973B5063B1B75DFCCCCDE384189B358C7CA2951C8E896490F23 11748E537315DC2F8842FA57604BB83A5749BE2415F172D25A50B489A6AD5730 1CC69AAC36791099B01E0478D1A28A081FC2AA4A943A8C77341A7630D9525695 D5EAEC7AE44C29A16796C5185D2CC48A89AD049F890E1C09B2E21AC8CF7E10B0 42F43CFA13416F2FE631D071C979B2DAEA5B3A79EB0B1F2D7A3F6F924E4B22E1 013BF6C9BD1585D63B6C457672934E2A2C47E977F5F3F9DBEF0DF13C7E866DC0 A501190D177443B5152669322A539D3C308A748E5FE1561384941532A7AC8A83 58624BC20F5FD722B400A7FF7FD0D33789FE76F34FF712C20BA407FAE695C3F7 817683099A0DD642A948906F5F11ABB08690A3A370349AF5AA945A149D7CF66F A08024EA5E9308F0CAF9B2CA4C5E6ABDF0561EF958B719F38631CFC14E79396A 2C3D33F6D4502CD774734F2657A277C02954C0F621D0C960A835412310E65A19 9DDC695BD5AC7C66F2D2663B9318E84DDC035EE72C3591A34EDA7398D2A0D455 355673529F351EFF96D6E794045EEAC1245F7760131FE13A69CD46AAA99CAFF1 CF4EB12E9DD9994E2D14E203303344B2BCD9E77B5F8801BD6378A3495A1BEC0A 9E1FBD8CB8C583C6AF487AFF1702FAB9B69B3B19016F89463AC295AF39735095 C7AB3D9F43B0FB521F7DCD59F2560D0538C5FC18172FF00025DB3C8373FE129B EB10478DE414876FBCCD665CB694318159CFEA804628E33176071AB2D14F45E4 926BAEA7503ABF426D510A62BC17DA6CD35AB50682F29B1E6457C90A844315AE 3C982C3F0D7FF19FC690B9E18B62A5F490143A2FD3C23D766C0C01AE62EE5A5B 9CA4F92BE26805C50EFAE0FCADCA36A1B586A0D5AF2D7AEB88A6FB0AF199B720 13B6B32B79AA3EDDC9435B6CC87B3A657D89468BDABEBD2DF6CABB2413CA87C0 8F4E8029465238F71B833F11237B36BC9730488AD8830EBCEC6B57C955EF9E48 D44727CC37D4E55B9A7418AF9139E18E3D28B709D6B7AAE83208F2CBB3B116D2 75FED758C67A5B02FE755F08CB1795BAC9DA6960F25651E447BE66044C539410 96969361AA3A3E98FBBD0D063C73323D8FDFF23955A29A375F8599174BA5F4B3 F3A1ADB4F6F07376C7BA3371EC9E29309EC6471ABFD3A3059C0FA47CDF89E4CA 60E5BBE8A1C4A9B5DA536CF62D0381679E8B827732A4132ABC4280B310803C53 9C31DB4C4544798B0CE46D8F54DBFBDC28CFE03CB7FE7180FE3056285FC87596 C50EDDD28A73288E550E9CB4230703270A81CB466CBB6DCF4D96AA0270812B1A B364A29687C6644EB753E92112CCD81484E1A96A53BACFB90B6089DF9EECC6D3 858AB1F8475122E8DB3762A46E316FD0346E11ADDABD12AF720EA5341373DA38 A72D0C91BF1572E099D9AFBE14D7A3BDE14C4D5DAF1A8CA502B011262FDC34C3 66AAE048A19F9583010DC8FE2604F7EF8992204DF5A45FF6C9C4E366DB952D17 B5142DBB1E39D958D9DAD6A8481431A329CC7DDCC62E014AC52A9324D0923BE7 8E0BBE3655ABB59B7092B3D967E3557202DA9822B07DF89F1684AEB27F8958C4 5223EC5D41F05CF8AA16FFB3842F01C559FAD0A42D441E52AF72AFDCBF90BC62 B3B204E0E3B56A4EBE97DE4A2DDA2A9ABCC33F2A4BD31BB806B1B50F112D5FE0 D25C79F2DDBDA3EC77FD3ECF7C5C114FAF4144FF9231D93219CA42A932C17788 3A2D31418184BFE2F565DB3B519BA743DAFB120D0F20718F5C38B89D159150DD 600099BECCE701DD31DCD3D9530027DF3357426F3CC3037F8841027EB6EE277A 93BAC1F216F3896D04431E38D3D23AFD63F38BFD49E6593AA349D4251F9AC9C5 C55329406914853F3871995221A8F6D4BD8F9D113A18A2B044457A32AA4278FA 2D1252D377CD2EF9AECA7553E4317A019F4E8BBEB3CC71E0173FFA551B26AB95 F4D7C579E6D87CD9AB43AA617DACA5D3B5ABECDED57B6B3A997D29C2051F6CF1 ADFB3D5913E68ED0E5B87D1042D03C6059FF97A14D1FF8D24B5A573927B469ED FD92F1CEA154D2DDC553C835B68DE02AD512603731A49441DF0561F9EAD21B5C EF08DD5C28458C87EA5FA0371244043D4FAD96BF687681E2FA3814D3AE727A6E 6018E9FB26CC68FB3FF4E3204D3F3C87439D4FFDF17F9F3AE2A5A061A82D906B C02548DA48842F51A0CD12D58F915D1294F4D46FC5BBB4874E96D86D6DD0179A A3FEBDC085EE36CCCBA9575281A929EB6E2941A1232037096B84E0B585849C45 0961EA9C617FDFBF6B9876E62B2E9A881941866285CC520CE50A5B22EE1B7E92 541952AFB5AD93FD2ACF3EA5695D19C6594E404B8746D3948EF1A40F5C9DAE07 83D6E5A3DB0510E79BAAFD6D5CDA3ED6DB7433CCEF361422DAEAF36F5FEB9CA0 075655FE5B53FD93B905C6907043082DAD53E0743EE75CEEE7A6862E3B24DB44 795BF9F64ECCA8075E21C820F3E793B43554C1368C3E4D80C207F4D3AAE01D03 D734C8A062A0A85A88AE3B70E9050B8210E42751BEBFA270E35F2172C515E9C5 C3DA9E42F4B9EEE8C0BBF715EA8DA45F7E533201961E8E0965E1C213574FB573 8A25D9EE832E9D6A0DD0D005C833735951F8156B2FF742D8455247731BCD012D FB7043FC60DA4FA8FCDDFB9F840273992E013AC2D24C0DD4800102CFDE8A3708 7423EB4249ED49474D1A9D38CE3B6EB21E7F415DEFDF403BCD5EFB2A2B6098EA F167DBDF601FFD50AC5EEEC672B29392567052590727C6CB0D3561F6770244B8 115783C59F4852CBBC76DBA9AF87F8935D59A63C49CFE440BA71EAB4B3DEB68F 5459A25710DE083F1E0D375D4EA423D2FC7DBBCC8D155FC8AC98E8945A5249BF D8717A83765E4944FDFA27146745C9730FC73ED9B4D28C9700C76312CF347E3C 87D7800C3EE5D39D2B6D06A78AA2A2F958B19008A25CEF6E3BDF2D962C69300A 0CBE0DBD0FAB04AF0098CA2F8D07773E391C2E6D5D75EB755AE79FF0543270CE B2288BECECA47C5D6E48957303A3C29ACB16A957120B5B7807E3DF78150FB2F7 5043588EE6695458DAE6EB7D02125F3A31D822788F20BA69926EB72C7D4F5563 CF94E193FD164B0AE0CD1F7894F003A3505CEEC338CB731741855A68B435FE04 32E1D5686FA4D89F32AB00B66F4AE1A6FBFD63B40F8C2928895706FD973C79DC A55229B28915AF4BCAEE35A54F535B35398B8983E5FBD073A3F8F38528E46D6D 8E9859E010F762709F87BB9DB337BE48F0D1FB8EC74531201A84BB7EBA1A454F F3334AC09D65715E90D8438693C650721919108398F824B1C943564D5596946E 595644F0A2BBE3EEED86D2A3D27CEDC8094ACFE9EAC4F952C7C8DBEA4B6DBD43 656A682A6BD9D4BA07F0E2A4F07C989F092B8B101A4E51ED8D2E6D563ED9362E 547165895C710AEE883BFFC20C9BBAEC06476CA367902552C88FA6EBFE1B1B96 F0C79263F8403DE8A8CF4F5E58264A83A45460D91C6591321A25455BEF59E327 440E91DC524D671963B2AE7DC6FE4BD79F2DB779683544A8E1576C0EE957D00C 6F11F1E21F08DDA9ECA708DB8F3E2DD63A24B79887337AAFEBDEDBE9693CBD53 B70853B7B3F4471C3D20F1DA3CC2BB0EA483499783ACBC2F997D9FE46846C0D4 084FF94B9C53160F01E7C1BF68B6DCC948AF4E0AEA323FB5F1E96668C669FA42 2D7D7DFE22DDAA60A2F8F3D19A89797104F8DF40304162CF2D36A93B8C891495 BF437FFEE4C4F5DEBD413F03F556CB017DD970F03DF2E5980793E0D7162C4AED F7C26BEFCA1E4539C3AF3DDF2A3CE4DCE2AAA8B8CD4A844CC253B2905DA7B2F6 FBD5937AE2D11DE014C62E052E30ACC15347CB13FCF878265672992EB9D2D82E 20751455F350372B6578767EDE8536DD44DE9CC3E3471F747231F0BC760CCD73 44002A982B7479649A658DE286B041A7A93392D3A86914F0E0477F9400C9D711 9C663F377DF8E4C1711798C5AB38F739F592403C605197F93CFA672619982CEB 07AEAB8DE454685211A7FE0FE62976A46023A5A66F1B03BBCB84937F3D63E807 9C50390E1ECDB8F8673F7A4561AE209E9B9B39D8EE59ACEC682BFDCE68BD9CD0 B7FA74EEEBC3D88145DCD8C814F20EE0BB86E4EC791BB9D9B33F9715FC27B15B DE1803AFB3FC1A4089518F6535E3B7B584541D205C7CBA843F535F67BC38E25E 755FB27DEF61812EB559FF246EC4427BBF7568F862C92558FD42728E603D5C2E 6058CA9FCC33832B0466D6A4F8488B5A95DFE4D9370A10684C4A5826E2DF86B7 BA574358C03C1119F16C8561129EA0E7B573E9736AE32DBC4D92A9B91F35B368 B475616EAEFD1E2058CC51EA3B0AB791C2F58246AE9BC2D63D4192ED979F2300 C46620C3B52A6BDE847E5969CB5BA8A97244B758EF14EE1075FEBC0F5DC618FD EF250A8E402BDE1572AC49DE3FC807C8C30BACE1F1F89FFDCD64B2F3C8908ED2 31DCEA2E8169804F987675F2CC22CEEFD36EF688444FB49EA88E5345E819670E 17586202F75FCDEF95F52EA86FA199E1A557EA63D0C71C34D1C424CC71535D63 90A07ADE5D754F2146091637C82A2FF56572C5E5F0C5BE8E0E1123CA4942CB51 F3710090E4D2BC7FC3E67665BE2AB5AFBC5C8F2B03481EA371484B640A6F1736 02FBEF6B1362CD097D8F212E662229052B318384E63F3465364DE37A2C926482 C8055EEDFF0DD3DDCE562995480D30CFC26FD864EF6B530784C6B44C7CFDA274 297FB676D46F5DA8BE9DBD36C6A7153B0C1DF27086C2EE811BFECE34D499CFCC 006A9610C445508814A9E6489FFFCF63E9D6D337F7FE514C5D7719BA1A2CEB11 28F3153672A71A43F2EE11D7A869048077CEF97FDD366829AB39BA18294E7536 D3445043E3DC9894049856E3D33C7F388E52D43D54EACE6F0A0BCF16F4265488 069E83E1D777B5CDE28B9037F16AACE7B0B9E13741DE971ACC1873FD8056818B 73F61AA057F40AA22727B5374FAA94F2A13BBF6657FF1AA004FC10BDC80AFECC 827E6ED3EB47FFE7BAB6303472E2900CCD7E15F139715A38B94732A25F0B5065 E1FF507F5DFB673A38862C27B31E74690BA47EDB0D0529D3E7028D50A61C1702 E3B15EAE6D8DC21F692297741BB8972B191C6BF57D930B6DB8BAAE0306FA348A 3B13AE54D56C8A16EBB03F9F8697EC6D4E31E42C129EB10AE4E388804F56E412 C8999D339DA2094291DFAB7579FF4526D590EC49B5DA95353A114A92C5DECB9F CE15418901018EC604074B99140972B4021AC3CFAA009DD0AD2A2DAF2838F111 3B64A0E630BBC4AFE5CA7D59B93984ACB4F62E421ED93AC6958DF20AA126F118 14C5C8ECE3BCBF962432D7E796859F9A3A0B1CF749998B7B7C058F4F7F7CC9D8 576BF0BA3A5CE2ECF4134341335E914F926A6E386B684EE24F143D0FC00FAD16 72C67D9B9B3146E4000DABB9E17B197335730082FF7368C360C325D7BCB6D3C0 D30039C2B79CD03F0A9A8C55086A3194B5A2134C3018D347BCD3D32CF6595FC6 BDD39486BFA58DE31962DBC835D31F6D1129037727CF77BA577FB64D89BE85AE A2B55E8237C5818105FD516E6740A37F1E68BA96CD1F50A4290A5AD1A98DA118 4A5C54A9A7AC5B8C7AF679F04C6D18405ED4D77A697AEC7A54F36C561E928A99 9D1D923B5C268F9E51D3AB6C80501046909278CFAFDA131ED02C7EEF3EA411D7 C0CEC9C92C8AB5303BDE183855494E91251471233CBF31231A3FB59A2D629F20 CD6A19416FB6AD3DBC74A1685C7E63F9CEA6E4693E383CBE29F72CBE4A462ED7 64259607F9FA4CF73EE28DBA5B157EF0092AC1B8C586065212C9995528597A7C 012BA0245172E318D8FFEB8448E4BF5F1473A74A5636C2C64439955A7994207B 3BA38B3CC9A8FFE33BA3E8500368F46F5E500DF386F9EBEF60BA445BB31437F5 AD54D2B27E1ED055C47FE4D066486E3CCC971364BD8A4D3CBE42AB8B3C4F97AF 45AA19F4C3B53C02CABABAFCAD8B4FDA4BD1897631326A8F4712458E37E6456F 247B5BB31F428785E93AF98C34F992DCC2FEFF63EFACFEE15D06C68BA1460F76 AE287DF6AA9F661E8D358279D3E2D7F2460D8BE185383CE8911E9583E83F2E7B CD3A291DA2C7E98FC89E52437ED5A2DEBEBE28B7E7D71895FF3083F6454ED582 35B8C330C67193DDFD4DE07C6C0512E9C1722B51B648C37C567E0C3EF121E3AC 70294DB3035A2A2D4110A97794BA791621E7A2A54DAA02AEE816CFE450EE055A 5A578134946CD59612A4CB5424F9353E0DA13E788BB71C67E800F4326F283FAE 1751D049C2106A77108DEEBB8E42BA91438F896C0E7360057FB1B0F2EBB4AAD1 6CD6DECD33C7B1A240F24ECB84D86528CA112D44035AA430243FE05051607AAA 48D182CCCA65CAC42776DC74448CF2F792C24BC099FB676118CB2A34F8B0DA67 13ED020AAE5CB40710B1D71DEA58746D3C3BBD523C6843652C4C635E72838664 D2DC01E669B57E1002DE945A3FDDC6FF739CF0E7B506450EC482BD62A02F298A 1C5AC5CEE94E2632EE07C07B19C3A00D666369E6B95D1D9BB2DF47C844EFF0B7 77A1497B9F42B5B9C39A8897E4B477ECBB9185062B1A54FDA1B6F23BC530F7C3 AA70394F410EEF21EA41EB58515F1D43CE299C560231F26B37078A771C9071F8 1F2302C187B15F7FB7F27A945347FEF2C49FAC274C443EF0685B68266DCFC81C B0FC66BF04D1F44D5B29461A3E43443569541A8AACD12C91D1F7E77184D7FA71 853291D68FB32975FE2438D2E15D710AB133F3E708BCCE41281567F9873FC4B2 9D4537D5A019D777A9212DCE011E682FFD8B65C6B0D2C5377BEC918F643AD087 C4EF29CA7D35A0A669758B60CEB08430DF27896BF6676CF1DCB2751A894D455B 8DD624BA42585B6B628A6740B8C40058A4BC5116B22C98E94CDB8E175C562ECE E28D3FAD4FBCBA98F2F66B0178B55495037F62CF560C8E77050BC7DA73B52EB8 F626A197F55FB665A19B591541D6411FEEF0C2B9449FDD528C1AA0CDD0E90C0A 126C1CE25F78FF172C28386F23A5F13C056B9A2F2EC9C4AE2891BBC2370978F4 7DB5FD17DFDC5A9BB6EB21FA0425BB26F7F0559489A4E01DF09E957CD2C591A6 01E9075803F4D4B9CB536AE8B80BACB5A2708ECC13554DAD32CF12CBBE2B8F7A AFA3F51595AA8B25878517FC54630C85C8681F2F987E1FE4FF69D9B08CEB6CAE 31A732DC431030D3B66E1211A8E528AD86CD6AEF60724F4C35CB808644F1E8A4 FBAE87DC82E11C6BBBDAD2086FC49DEB5F98D17BC4405EBA7CD94A15DCF93ADF 4513E83A83265317F10E67806426D81990AC0B75A5314A254CC1BFE835622C22 3C284445D5DA1A33AE978AFF4B45A59AE4A7B95E18830C650F0C5C0C601F0854 834563AC82B3612FD10C5F2250C67AAD7A3152EAAFD1D618CFD517A42E088D12 930904D75EDD3C99D2E11B48871F38477F5438996717AA58E4857A7951AAD389 AF0E30AAC0D6F2570951B0D3C97529A4F62A81106CDD4C2F9944BB3516808DEE D2B00EF1C2FD3947ACC68B98136AE2C000202BE99D1548C9230A6C5302D46C85 B6AE3304B7026983FB94130794C78148B2BAB7F2194DAABD24BB938A9944ECF2 77DFB8F99B1F524710922B62059DCC0935F32DE2FA6184EF3475081FE6DC5C2B 80FF98EFC6AB126EF030B0F490EE00EED09B711837B6D8636B270B8994CF4849 8EB06E16A1AC952BD188DDDE196DD3186894197422A54CBA2745FFC7C4BBE7F4 41B5856090667816FB2F12D534728A74C7D5DC9D321C0BBFD0DFA25158E00D57 3B22A3E57A195BDFE045C963701B5489D14C2211E589DAA2D77F38C81B397EA7 916ECA1603E487AE3B9BC5F84166EE911AC4351B0CD89231FD3C1D4F4D0BA77D 13A2242DD919E3DB22BCD7D181CC25C75B0D759A18A0566C2D628E52A5695A54 C76D6E616F262EBA587A9CA8F9C9B5D620FDA2B9074A499241A202D4261154FC 5B6DCD5A624B109AE9E79F087BB5501B8A1D1271F0156A108F3BB91821A49523 3923DED030C9E8AD88AE30198D0A949C81FA4E649BE5FDEE54E1EA013E4B1D1F 575D4A861A3D2CB2597AF2C4753CB420555AABB757A015411EF8C5C0CAB06D20 8BEF12178ECBE799A401D941BAA52B78F35BA4CBB50420624ED15BE8FE8A955A F805C2D8551BEF1463E6CA8B8BBD7DC6F74F42AE0977EA0BBC7E6DA1EED53213 79E99939E1FEA729E8BD7D9F750A8D57BD88889356C830E8D069F9AB4FD5DAF0 2428F7278DB863547B05C1977AA7196F9C93BA46F2C310A22ABDF29CFBD229C0 D074CBA318FC7941EE51F459FA2D5C475CF1915A7F382A318CE4153F187F8DD6 AA293AE3974EADAD0B603ED36A8CC0E52260A96532C1E35A2DD99659808537B1 E0167ED71545BF702A21CA165AAE80D563A40E1FBEF6E961A287A868F988B8EC AFCC46C9BF7D2A056499CE06D09578AB26DFE19A40B015425A10AE9F6CF64408 BF5A1F4722607F8A0D1A6D2195836C3C605C9D08A6A0ADE0E20AD544C4C7A840 160A32D0EC83F6FAE61A50828A0FE4EE7A57D223ED6A71F3C53E6B29A7ADFCF1 2D66E9AEB9AC20242ABFC5288E231D064166E5F212F2C881746172492090E317 91B6AC3224A687749875AFDDFABEAE9D81607B801ACEE1797951AA5372AF8256 CE9E8953297E5341929B88CC42AC77EB3B4F9403C6D2B2D65E12A12C4EEFE299 FCC327A0A4C7B4DA593D6A7230654E3C7E400A8C7CA712114F0A7DD3AC92D005 AF569066DFD6DAC9D3064C84EE4D0B7B4876B843CFCF453E7DEE56EC80390D72 6787F121B7205A8DBC8C3769C802ABF5BC82334B725E5B9C40F4E660A42AAFCE 81AFF32F0645E0C5D04FF78184D0F1DC5C02D9E727629C61319B523647063FC0 96C0DB61B1B51DB210D49BD822EEA1F39A5B2D669A83A001034EE8C908C106E7 53F2BC6585437540B420B3031A67AB15348F9146F25130672E203F82FB9205AE 9D2B357CA9BEDE59A0A3D8003CBF2138BBEC76ACFAC6FFE3C3880B74ADF332D2 A47F02C7E0F2AEC11BE8B16AF1189B5B224A28CAE7AEEF2B6245FF5CC297D3B9 6AA69957DFCD9DD0D4E056466AA496B751D5A36E5E2AE957ED50F033A53811B5 6DA206BB726BE95E2DC752DE21F313CBA1DB28528DF290FFC6C7CE51BBDE4971 EFFCB3440BB70AD9F69FF63583900F5DA21A29A95ACFC84D42A543B0E6F5BB0C CBB25E5F82BD6FB07E506337E27973709173B379A1A3604F414C343C4326F0EA 2F1969E85C4110715190DF3818AC91063986514122106D9B89D06B34E990BDAC 67AF62588158763BBC01457747704F6BD46CEF2EE63B8A0BF3B0A5560C815D6F 6812149B6BF5B4897CE0D48B1E3B43D54EC41C286AE8B64E19E794542D6CCBD7 D1B45819D6967BB11AB7809DBE5816A264838C64FE087AD7610FA5188203F196 9DC583341DF384C1A4A1B172D3DBBF9767308D3693A419A75AB7CBEAC17EF6B4 0CFC35D00AF789A5982418054EC05BD7508BDCF71DC7328B84FB5CFAB78C8D29 244DB5BA787439D33E42A7581EB8E589533D02CDFF8835ACB27862B0D2547E20 E0BF2CAAC14C18E50AA97FF77E063C604028EC35B835201E4FDE6BDF15112A82 F33E96A12FC4010484855DAFD9ED39FB01C314EF11D086D0D17A006E9FAC8A85 D2CD3351DC1F25A83022D9EE97BE1DC98CFCD8E473991C6A40726F02B1B56997 FCE9FB2CEB6A38B0A6CA718D55EE3C193A3CD05AB1CEDD6E374B5806F693CCC3 2C7AB4BE8CCDF85CB6BD4B72E56DAA34C0AB0D2B6CF62A397325D7E4F74DC416 AE726A775B821A269F2BD82834E86774BAB0E74ECCFCFC0BA75AD0AB44017D2B 02DC0702F687387063FE179E1873356681589DD8C23F58CF68AFD103DA54F49B B7125E964AEDE8138F8C2EE36286EA8A786B3127E93352F85B72BAAEE5DEA381 ADDD2F5849EE7DEACF754B3A11E35D7D6CC1353205D66238CA5394FC72984472 66228FB082866170E6AA61E63BA41AFAB8FDC69252D973C86D0CC8FAB8F2D7C3 1434E6CC274DB24BE00E3E19AF5A221B75F71F870CA62E2F44FD15FD1A71D96F 546E44BCDC2FF0D5F6DFF07D71DFAA6856EB09F556B73E47EFE34CB9346DBAB8 852F0C293B82461BC1282C9B98F50CB67545D683F042EB951F58008F91861687 EB4749E586F2753CBBBFE3DCE401BB9F96D066E3D56BC578C3DD7D0BA3797FC2 A505185C3F2432C670C22CFB0D389F56DDF2078C900D4D11A0C96846FAB5EB49 2EA158B493E5E7EEEADE8BA3A5AA98FE623FC0E71E8009E25A24E480339AED62 97EC30F94CE630B402F6E1424D1B73E68A75E2FD6A9F290ED7E2960DD6987458 C924D7B3D9C6EACFF87C827B2AD202EFD86AEDD53DB66840E0730C884475C333 4C8B58E7F9BD355C8BF4D499DCDA09CE3F992291CECDD8A76F5D5C42DE9A15C2 0B9D3EDEA6B92B4EE29C9458585E057DAE00389D1358D10AD90534A8DF8D2A0D E1653DF3050180E80153BB8FD245FC6303359CA16245DAAAA18E577B250FFE96 557A76E61AA88D03C04AF451B6ED4810471C4618E458D18704F0B7116C2B82B2 73789D00940F5F072E724251FEA4F51753DDBE126A35E199F3570DE2637F8DD3 386C838342E0FB36EEC7B100A858F115ACF4F4C2B0351FCCB9962004D7C8BAE3 96DFDE769FBB128238CFCD13B06B2DEE6C20D5F0B53AC6BCE65E80C5F4F53E34 3ADC9A504D431A2E0FD72B44CB9FDD862CFDC0D7A9B9CC1E504CE61CBDB44C35 C0F64042A7F41615AAE4980EC045517B7FBA86BD60C0D56CDEDC19100C8AB4DF DF99808094F5EB4009AB0D43EC879CAACC10F2E56C5DB7C8227BC5D5155263E4 DF03D92474FA070E37816959322C21DA374F556F47A278719A903D0C957498C0 7721402EDE78AE3367E9FA0811BAA327B64099EA0057AB33C33962951F1F2AE1 19C50C1482FC65A0DAFF591E6B3B0912060CEE49EB9C69C7DF25A3F2E27295D3 926ED1F38A63C3A13C1607539DA1CC5342A2E23D5DE46F219765FB8398EEE51C D0D0386AB2D10F6D57A9108FC2004E78347E911CD069FAC03B0FEAAEF819407B B226D825380625148A6BE3ECE27F8D0710DC864AE7AC4677F7A3FFF9807A2C69 F6A09945E6802EEB2A2B8B51F79A443C38AA01B363EE4926490A448ACD5EB9BD 93753134B7B2D198D29FF0AC969A4DA84D44518623E086DF2C4A94A6204D76D5 EDB990D26B3AB6B5AF7F7608A86E56809C6BF4D29625BE224B87F6DA40C5FE43 EC4CEBA50AFD0A4AF29BFC336D929083557FC7958365282EB5D53313600E1799 B8851CCF94A926C8FCE07131E2DBCA6B989DC2EE57A85EA724155419E080A5D5 F98A79868FCDC48EFB98E3C971204367CC56873EAB4A21875C9DD0C1DE05566A 5F8F01161D465170D8CD173B47315A180F562BCB68FD3A3B06EDEEF7FAD56867 AD7759222ABF7041C14C1D60EE0DE4204BDD9886794E18DD1130E1914F615DBC 7E09EA9AA89760CABC7C4FC24600F825053F50BDB364D7C19D2FC318910D13B3 D88B4B41ABDA2C3FB158EF1723B63D9AFE5E6D1F5A5BC593829A849EAD0294BF D80DF3230AD5B9D3CEABDAECDA15E74C755F36228ADD8A74C8B4845567656A3B E6460A20AE9B149FC4C79FE0B5A9CBAFDC2580EC042B8DE568B6CA3BE8B136F2 1143DFC683396E51BB0BBF2F6B89B2A6BBC7B6286BB872D8AEA256F3FC8A6D35 F80A505641F3F04B1A8E7A36CCC04F077BFA512ADAA3C059B0DE2706A9D651A7 76FA8E32EEF5F5E73D34AB6CDEA0011BE91D20A9AE8696F3F789FB414BDA3100 BB3AF1B85A58744BF5D0A8D2CA9BC55E9A5EC8C161907A179ED947F1482B94F2 AEC6118959C5341945E322EB535B1544DE3C733863CE2D422B667AC78A4C536A 8FF2B0AD389D5FA0F26460438B491125BBEE57322E2EF452862298F197732D36 E5A0F14944065353DB037FD71D5F8BD29A73250266F47C1998CBFA4BBE211359 5EB3418BCE97900DA2B230A07C65E86E14B2675AD9C3C250251CC6E4C5B4CA76 23BA9F45431BCA10FC7DADDBE62DA2CD8A89686C321487ED9CA141C3A9802C82 B32510F4EDAE2A150AFC07722EB6C7B596DBD647C9E58FAE58888073D24D3603 749684C98118B30740AE6E880757F3F9813C1CBB560A07290CE485ECA3692756 C2BCDA8053DE796D4D9A1372489C58893BE974248901AC4C1BA79E058EB939B0 625F52165651C0FE3A05D01D82377BB35A7F93F5CD2B88E333A857A7C5CFD0BF 0F269C44E7AF63E5EC25A1B694CD51EC9659165A7F7F112AD849BF4102E2CD95 92757FCCE3196A8090481B92033AC8C1A2AD2A86BF395F102C2E5F4AFAB8CF54 5B8817DB4A0CD86F66ABC8F0CC92A6E3B5719760D7BC6172DDA5DA3003B23F54 EFCF61A4ED23AF0FCF832FBC0D9E734F433CC760023D5C1F69A66EBD1E5A9651 639BD4F45DC4384D2367C2EF3F46B88B88FECD008118E7D7E5259F6C79F47FB5 069254FE6BD3946642457ADBDF0042DE2A72A0A5BE0A5FE0C2E1C7D50817A71E 90230C546F1967F16D8722A943FE845AAD9C6F8FCCF969C569B2896E91EEEAB4 56215706A0737FA4903A5A5849A48911356E8D98321F67648ED8D120026F1AF9 54A1B53519EAAA8E4E4A387407BDECAFBB4F2A84D3499403CF3094C7D4A897F0 2199C744D012CF84BEDAB484528B6B62E9D04A045D156A762C6919479858B1EE 7E0C80A5A2A0A17E365C1469BE48B32B7A43E4E8339A51CF99505E295D3D2053 6F592E37FD1D31223CD3076E42D803B8EA60E29B5A5F0CD01133633A43A654F2 2D8A4F8A1500BFE3E3AE88D268B27DDEB71BA158EDDC9C7A874A8B267F330638 EE35AA443666F12530E3B6844EA082A0691E4A6BB8BCA4AF35EF1AA11AA2DB78 997CFDEBCA2E529D0A2C136955EB6AF7A03DD8EA39C858D3BB9D5159469EB29F 7974B764F0C95B9C9C01547414F036F2C73B99A01D5301C4C59859C485330490 2241D856F16BB9CAEF2B33940E406F30895A909382335FD4DF1F2E5AAE3A0100 55D915A264D33F8FDD90B473D66EA643DD2F50E93BE7584C07FCDE5233330FC8 3064E49323FF0160031A77C2C3E22554BF62D6FEC589DEC0E7F79D36AA1849A9 8841D630848F35AD2CB0BD468F37CBDDBD001F9E6F49B5C4C1C7EC358078FB55 C463C9C532272F46E5A9FEBB2CD8C4CAB1F5BE04D6A14CFD0F1210C1E10B808D 681C58FDAE908EACE0984DAF52D29E91D39254AA8C92E1183F6A3F0706C762A0 3D85AF69D7E79A006F992419EDE523505F435229FDA3101829CE52A36DB79D97 48F2377225151FF656A7D74896B799C41F2FDB30EF3BD92C874FD396AEA34146 4F71399462CBFF35EB7B5E614B77D06222B0988F640B2CD1065154F56C12C7F6 49A938BD18392BF64A0611164680DC59FC4E166A860DB8A10BA86420A89A5D93 306BEFF9648CD149A2F77E1EE47E9EB2E0BF3500F19538AD862A322A73ABFFF6 782CDE41DF98A31B9DFF2EC05702D1D4638C25C4615808B825378C39BE8D2917 D18580FAF9CDEAB92FE09195E72D394556756283704F9A82EA3608CD23DCFBFE F8EB10B5780A4EDFE391A65CD3C3126CC54335D9D6B7A9F3D20A6D442715E6D5 5BE93060D8AA4920FB514BD0A6DA01E433499BC843AEA30752DD49072B7ECD8A C463324541070236063644EDFFC5DD7818FB025AB6B1508D49F0A333983DC4B8 9AE16243893A2378066FA4D55B3C4A855FF8B76708E9B50077A36F282ED2C5E8 5518866E659A1BF21C132D69AC938C1440D76F0501411CFF09084E36AA9BBDB9 1F24B8E77A3855D08FC9FB745F1F0D8D54345F105D565DB9DA61D9312CFFA5EC 57E49EE05F087782CA2AE2FEBC4C04259A5E615FCC0E7A161D2F8B9E768AA7DE 1C5C17D03064ADA5CFA8D0AFD19D163B0B98C0A82E90DE6CB92861DBF7BC39E3 41FF17A26A04CEFF813DCDBECA8DCA5402A3C8055AB3F8B4AF50E0FBE3B572A0 2B8C7FDED8685B4A01E4258075B95EC941D17433E69D39048E3F738018BDFD47 8614E92019A02855E3A62F64EF36BCF72706986BD7D784DECE6F4828F3BA79E2 0481E31E87654563071377860BE2725BE48E7EC384CC9C552A994BA81CC93BF6 B528BE49FFEEBFDC8C613C69E99FF8E286656DA777B1B8B71A8A124FE8569A05 77E8E77CA6D9ED8C8CCB8ED6C108D28EFD0979AF32F6B7CB7FEC291542411339 04878B835B171A4C0FF979E088C69E1A89D3EACFDAD479F88451FF35D8AEF6F4 ECF179439167382A5BE60556BE0F950DFDB0FC0B5B23B46B59F0A8FE3408B862 C2F63D251CBAF40EA9DE1A6BC5E06EF61881E437F6CBE25E0A8A95998D86A520 A5017B0B1AE68D4DFBDEF4C464A549DAC450ED4E55838631ADD25BF78F70D9AE 3279B0D97342A09D3B518EA0CDBD0F46B7DEAF8E7410D17DCDD23C1317960C79 72D693E53FDED2AB03E7E430B5314501D535298E35A6EABD00CEAED90694694B F8D96C7B9C5BACBEEF4CE9A583594625AC4E8A2569521B50C16AD7396A21D7CD 5181FC6012406EA3D51B9A55187EDAD507F851FD056F454FFB507ACEAD0EC654 A3CC3B888975189C4061079D5B69F524355183C8BED26C213315F9C01F1FB2CB D84BF3E0CCC91E8B5D066F245FC8EC709E760B8879B14C1391C7E6A0623AE26A CAC09252144897EADF7E6A96CD4F07CD581DBD7274D491B21CBF1E59E7662CD1 69C8E41989E49CC21B34DDA246DE41DECC77C75F1CE06D14531C60F2C6FDBB50 0224304703B3B94FB1E1E32DE4F4D6C1C386FF3AFD34B8CC5D27CFD06BA48DAD 4868737C533A2A39A64D1D9FF9D6CD5ED02B42CDB257A4A8F58BD9181657D445 F45D90CC66D84398B54215A7764149C075DE271A1F44447672C401ACE61F4E8D 673A89A5F58E699F7459EA29FC02C424CEBE8E12AE73511A12373F4FEE785F5D 328B95C4A74312BECD9500835055EDF8EA492481863294AAE12CEEA8ED3CBD3F 5242DE1F0BCB6849D3065E3AD24085D09DA56833A3CEE2D605E7ADE264240E21 0E075D0C44B3DCC0DF1212DC98405E95E973AAEDB92AE2B33456708C1E1E3489 52668CFA5593EA24B64766098492D09B3C01D357C27A115ABF9198565A321F1E F409EEA3FD3E4F45C64B19CADB8B8C086D4788278DD6454132E0AE0DE8A87F2C 43C367E9C6DC1F9BBE7063FB696832B017A5F55177653CB9B74DF4FC7A62A7C9 4F04545FE07AA8DDBCD0F93590BA2C3E542BA91D0F67F2B4FDFD215C9C956507 5F178384C8CABFC6E351522E40ED55F8F2024F40D1FF5A7E5DACC87571D4B62D 7B8085EACD29A7F589CCD7946736B369821E029710FCB35E4D06F8EE8C2CAE6D 89721398FDBED731472249A7946E6C07B81B3004C6C4E5BA434B1A799B625285 1E863B28B920AE927DC7BD3A291FE863868241172AC2FD2CD50C69FA79D3FF16 77AA98E60B0EFDA2DCB3A1B51851590658905BE5813EF0501F21C9CDE8009EEB B21A87F5CCEA49B8EB9FD49CCE8D40BFE6455EA62E61160350C0C61C92E60D72 D17B6C14A74A2595E458B24879BC8A3A0B9E56832DA8673DA467C33DE10A1010 8605FECA9CEB05D04FDD82D256F2D480AEB20F990A32A322AC4058D11821F918 7204E68D10124FD4090A57C6FB92B7E7F2D257068C34E9AF09C79FE96870716D C86A4A77C627D8E9523469788B91B979623EE4A9F981995CB2309C83C925B106 F7DF2CFBBE8A1C5E3C1F60FE03C6BFF1E64071C8D491B4D49F3FAF670DAD3507 8D799A672E45C1DCC390DA810697E7ED2875C9D495EF88C9525CD0E15C498016 7254F8C6774618DCBBB073D29F5424ABF46A3B68A75D3D91FEB2B05E0F0EA754 6C777B7B09A348E08184A1218BD1A0E4E655AAE5EBDDB7AACBD6B2BA1E8B040B 5E78AEA444C33F013600ED7BADCF27AA89AC51C72477B7A048C3B5BC95AC5736 40E6A912BC2420C9DEC23CD220C5428C250577EF63399D9E1D17A66A22888F69 147AADCB3EF973830A0C10E6217A94A0AFC3985CD9240BB0CD9CAE5229615007 C8272688856D47E7C90D805C1F012664CFF2FE55BE79D57D903E6E9D0C3E1264 99D785B4F9F41C25598B54D37A5F9D5F7D1F66F063AA452BB8E05BE593B9E961 AE3199BAACA14FE3D76AD623DAB0109A2D80B46CA6869B12401052BB2217C38C D973E4A9FAE77DC28021E214E9696023E679EF05F1CF552D02F30B9D3425F079 C347FF9C168A081C88E9D9090C48F44F8977E82A15A7196EF753513311539C15 9121D2323FB228587C3B4041D86924998D61F04A1F0F0AF4528A41B574C753C9 010975B687863010CD603261E83C76B76C79552DF5EAA4DC506770BD83BAF57C AF9354FCD71B8D025C55171C86336248F45F08FB9D79D81EA21ED5AB4F1C8F98 CC3049FC2066BB468E85CF79611CA0D8BBB052DB5AFDA5534F49F259CAF9F8B3 909085C17CC89DCD9FC498747E76FB196946D3378F9592A23DFA891A498E6866 EB43B72700E959581DDC058FB1156B8787FF3C8B2B6DFA9B3928FA0D2B6E8434 109CBBBC184EDE4A9333B6CB7BAA7DEDACE10E9EA5B461847EBDDB8BF6D9AB0C D5213DF776F5289A53DD0C349D09A185CDDF82D355BB1F76975B8A3B6A3516A5 1537967DC0301B0FF2230290500886FF9BAC713BF819C0B39A406F098B4909C3 52AACB5EC7D95C0C50AD67FB970F67317E7E74981D22CCA14F866B2363484E59 D40198DD464574C6548D47E8D72176F3C8F8000A9EDE561549676882B48B2607 1652E501CB5C566BD0261DA6FDD14C016D07144441A903DF2639E20A2274ECA8 001AC13E99BE7FFAE58DBD4E66FDC2927B7A1432B1FA6FDB63F5CA7658A92802 6E289D1F99CCD3BA9442DE57B050C57EC9C7FDD9D33EAC6A7276D0A6172CD696 30901764330589046D4D7C10238B0346A9921E0464AE6AC69ACD8D3116D2451D A25CF4A39C2437FEFBEC68B95D4952A17CD8E1E3EA5833C2B2E12A76B51D7CC7 4BB6D66CF133F4F4F94C0F66FF83C44BD0C2E9EB06F1812EB2F0E1503571D483 770396A3FBDA6C35301B8D34C9F009ACB0E3B5FC2BD4012F84A57B 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont %%BeginFont: CMR12 %!PS-AdobeFont-1.1: CMR12 1.0 %%CreationDate: 1991 Aug 20 16:38:05 % Copyright (C) 1997 American Mathematical Society. All Rights Reserved. 11 dict begin /FontInfo 7 dict dup begin /version (1.0) readonly def /Notice (Copyright (C) 1997 American Mathematical Society. All Rights Reserved) readonly def /FullName (CMR12) readonly def /FamilyName (Computer Modern) readonly def /Weight (Medium) readonly def /ItalicAngle 0 def /isFixedPitch false def end readonly def /FontName /CMR12 def /PaintType 0 def /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0] readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 51 /three put dup 57 /nine put dup 65 /A put dup 67 /C put dup 71 /G put dup 73 /I put dup 75 /K put dup 76 /L put dup 78 /N put dup 80 /P put dup 83 /S put dup 87 /W put dup 97 /a put dup 99 /c put dup 100 /d put dup 101 /e put dup 102 /f put dup 103 /g put dup 105 /i put dup 107 /k put dup 108 /l put dup 110 /n put dup 111 /o put dup 114 /r put dup 115 /s put dup 116 /t put dup 117 /u put readonly def /FontBBox{-34 -251 988 750}readonly def currentdict end currentfile eexec D9D66F633B846A97B686A97E45A3D0AA052A014267B7904EB3C0D3BD0B83D891 016CA6CA4B712ADEB258FAAB9A130EE605E61F77FC1B738ABC7C51CD46EF8171 9098D5FEE67660E69A7AB91B58F29A4D79E57022F783EB0FBBB6D4F4EC35014F D2DECBA99459A4C59DF0C6EBA150284454E707DC2100C15B76B4C19B84363758 469A6C558785B226332152109871A9883487DD7710949204DDCF837E6A8708B8 2BDBF16FBC7512FAA308A093FE5CF4E9D2405B169CD5365D6ECED5D768D66D6C 68618B8C482B341F8CA38E9BB9BAFCFAAD9C2F3FD033B62690986ED43D9C9361 3645B82392D5CAE11A7CB49D7E2E82DCD485CBA04C77322EB2E6A79D73DC194E 59C120A2DABB9BF72E2CF256DD6EB54EECBA588101ABD933B57CE8A3A0D16B28 51D7494F73096DF53BDC66BBF896B587DF9643317D5F610CD9088F9849126F23 DDE030F7B277DD99055C8B119CAE9C99158AC4E150CDFC2C66ED92EBB4CC092A AA078CE16247A1335AD332DAA950D20395A7384C33FF72EAA31A5B89766E635F 45C4C068AD7EE867398F0381B07CB94D29FF097D59FF9961D195A948E3D87C31 821E9295A56D21875B41988F7A16A1587050C3C71B4E4355BB37F255D6B237CE 96F25467F70FA19E0F85785FF49068949CCC79F2F8AE57D5F79BB9C5CF5EED5D 9857B9967D9B96CDCF73D5D65FF75AFABB66734018BAE264597220C89FD17379 26764A9302D078B4EB0E29178C878FD61007EEA2DDB119AE88C57ECFEF4B71E4 140A34951DDC3568A84CC92371A789021A103A1A347050FDA6ECF7903F67D213 1D0C7C474A9053866E9C88E65E6932BA87A73686EAB0019389F84D159809C498 1E7A30ED942EB211B00DBFF5BCC720F4E276C3339B31B6EABBB078430E6A09BB 377D3061A20B1EB98796B8607EECBC699445EAA866C38E02DF59F5EDD378303A 0733B90E7835C0AAF32BA04F1566D8161EA89CD4D14DDB953F8B910BFC8A7F03 5020F55EF8FC2640ADADA156F6CF8F2EB6610F7EE8874A26CBE7CD154469B9F4 ED76886B3FB679FFDEB59BB6C55AF7087BA48B75EE2FB374B19BCC421A963E15 FE05ECAAF9EECDF4B2715010A320102E6F8CCAA342FA11532671CD83D34CCC3F DA992BB3D9CB34AA01D8906A7D23ED35750B129B3F77A91A0CB3E17FEABDCD13 3FAB5219AE262FCB3E8F1088BC3D0A1B39E7DF3FC54F7D738FB23B9D504B0FD7 3C66F690D80CF2D96ADA6F731BCEE3A5D9DD2868DE969ADB455F9DAD98FD23E4 13145AC4C6F3ED5786EE9E9134C3A325E3115EC2F1761A2B10B7D5A9FE9137AB 0E2BA316A1BF681C234301057B97A62D21AA4C5E4B00A4FA69D0502CF90C6FBC 1083572DF59B07E876EB8AE60CF6952C11FC5A83B28C54C0CD5627AA54103680 0F3EF5A0654F673490CAC1C0E44E097BC90C47236F4C6F5654C82D0CBD428811 C7B756DDD6D8AEE0D20428B0385D872CD14B2C7E86BB77905703B8F7A03899EE 6FEDD0BCF153DD3173B6A769212731AAC873D852636D80671C6A8FB34E42EC23 6AC9DA1CC1D20540F23E04D1516F4F5C1001F88476D2DB53278FFB554BE35CBC AA49142063417AD3A446590AA665AFE84FD111E564B281644D186225D82C03E0 F3D6E5E4B824CED16A8B02FF6A969AE77E0A5B7C8349E4C155A9BF4549BF9909 B5DAA97E2C0B6C5302B16BE8AB22CCC269D90D42423693FE69A00AC44D0A8D8F 893B6314F33B50DCC0C10526FF28A02CF13F64A881B16B23933642B7D515D654 8704FAF46D52F3BEABE5C0896B1DCA846D94653102940AE4D387167EA8EAFE3F B42D714401673FCED5D4EC5351849896FB0879D7799DAD6315CFEED539D757EF E8802C140B896F17F20E29C98EA306E7BBAE5075B0680A7A321F54364F02BBEE FC909409BF508B3F97761528171E19E14B410DD0B83381FFFC04BC6523CA516D 6D848112045336E8CD2AEB5288778655005AFAF900EB1EA51E12171C75EE8E99 182219E051D57E8DE14005CC4E25E1341B7ABE9888B6FE8B4829C0CF7D22E67E 62944D2FA963F18BDEDF5E4D432E5EB3F658C2D94B1A604A389D1889F0F599C0 C5C6298E90418582FE409C651EF87D5A475029368A5CCA34E5880DCEB1B215F5 8A7BD6189B70874D3634F162A55E9968D2C20C921685DC62C60A8133F34AC9FE 2FEEBFA761BDA29821B5675F4062B2C2DDE1F4CD7480EB9C75C42B1C6A1DCF8E 5E3D8EE013A404DDD05E26BE752501CB7BC6CFC345B31C260AE95364AC1AD3ED FAFB16982A7E0CBA2A079033AE6F3BEC9F949D46B31E1756524BD9416739B521 EA2C7FC9FDCF0853662EF6E67FB973952F767FBAB395F80743AE8138B19E4B90 466B333B37558632A42694B0439F05BD6F58BE5E1E7987B71B74295D59346A4F 4DDCA7BD0C22330AAB64A13B973D37E8FA42635BB6F9525A43DFC6047B7E0854 3C673C7ED3C7510FFA4523466978645BE955AABDB0D69AE4AF2C15D5196C27C5 9889194C2568171F6C5B241D9B8101DCF7FECB9E5E6CDAC42D8E204FD4E4A937 0160B7F22B90FABF5C3300098B474A66D6ACBDB5E6A3428CD8720544C30DFF20 F335FBE36E5BBC321C9B0C83B3929F070848FB9448E66D57973B730612F12461 D09B28B302044D563F14B0902DE56352E0AFCA99AA73C45A6281F6B081503F1D D55A91173B648C63FEA96982C929B1744E8E8CBFE2E405C7C61D5E9766D06B00 8D2C1BA33FA58FD2460A634C7BF9D78B79113A0F03288C6BA3B0A9653E1BFBCD 6D94F7C2A60ACC2C7F4F3CECE39A03CD006C79D32D90AEB978FE1A33D2922E90 5B9E1CD5926117C90EB44CC2D6B7E6E5B4BAB338D78B792AEE6C5181DEFAEB77 260D762E33953442CAD13FB4FC98EB34971F4A82B5995CA2F1522BDAA11F4636 6FFD3C57A621BD55B4D1DFDF01BE9BC1C9BEC714E724DD0F691449077628ADF7 5A344B7672DB4FDBBC3047169296B5471B9337D782407484CBC4A169BD126D39 27A352D2A70A0CA346C7B052088C7D5556139D07C27921112A76B3F73D737FC9 FD874936F2B7A85471900BAA72BF5A15EEE2E8FBD6BC03A14CC95FC1671308D9 A40FEB441587B22174D24057D6B74DEB8315A96FE9A1B56362052995ADDB0511 F774ECC99DE0D5907366ADD9490D0D829B891A66AB0DF43EB4C7BADBA6FE14B4 8F5CADC791ECC3AD9FD36CE3C2E18D91821479442E9A25B0F49F26EA93B48472 69817C68DEB1CF673684B3A7F19E4F4278F8DC6C41762752501651849A1A4C79 3F654A074A8A7B2ECFB6D2645FA82E83F5684FE9CDCC73C23E459BB4918FEA86 377BEC25E87BA33DDBDE26E4B52744C092319D06EFCE73CA71F4999A5F27736D BBDF7A0933E23CD89BD803150A2AAFE4C2D1EB37BE903CD212D05AA0BBE60B96 71E2AC78DE84AA2F2CB818F31AD754A9B3926511C714151714B5C6D726147383 232486A275C56477EF4C824EFEF161700D7B19E997D3E6F3DD58D7B7A654F678 24BECCC9AFD45BDB51D25FB973C9C7655FC50C4B585AA98AE1C1F0CA22EE9E42 D01C738DE4C81EA8FC01DBA83E4C3FCDA3A3CB0F06DA77D734A45A1E31B879C5 578EF7E979C2101C39B07B825917DCEB492D66FEF14EDB2F43DFF646AC80781E 8F0A5F771BEF26B53AF15A04B37537CDD649C5FB1166C0774741254FD7AD17EC EA740ECCE36447B22E15263F989E5A562BB56C7EE7AA274B9C794BCAF52A96B7 56A169A0AE49240F161606179493C81A5A086443B882150082BE44D98085123F 8F51575E9781C1DB9936A7AC869456052F099F2D637F22C61E323836F7E97C15 46DFE20097A7E8E91AE7F6293AD09DB1808CEF9CA64F81F4EA9FFADAC04CCD2A DBD15D2383B3272EC35A867DE5B463685FA39BD2FAD564828E73755A8D4D939C B08F951B355A00847EDFEB3B66F40F69EB74574F2CEF725853B9328D075CD4A3 17CB8D7DA6DF2D5B7EFC4699B88C0032C961017F1308D5E8A9D435A6666D1E2F 4FD69E44DCA09B6396218A9405ACB232DC7CF00AC93C16D37467EFA53BA6D297 93C7BA4FE6A35079BBB7E3EA7DD6CC51916B2EFB3E8399CA8D35C9994C7666B7 042CE345FE8FE45057F6852BFFE743D906BF2CFED715F76E90EE815D86F0126A C0889A7BCEEF046F2704327A6E115954BA0801D6CE4628A8B3FB9B1DC49DBE40 462C29AA5484BC427DAD3E51E5C4308CB283EDC452D8ED4EC54BDF8B5ACF5473 4C332E8E676E2DA576DFD6F1728382BB13D9B0E1EEDE70A182454FB336FBBC5B ED7F43C47269430E1CA32BD99D664E65AA8913A3864C0BCAB8721EEEDBD634AC 616C9F24B59481024A7C65AC464087AB50E3F8C91B896F354F81DDEF71448852 39E80C8B95C5E20AAFA9AF27C292A586E40FBA59C6CBECD0242749AE7D457553 31AADFCFECF5C4959FEB3F0374427BDB66731043170840609F49FE02DA4999E5 A014E30A25BA3CCC37F36E82E0009A17A2F5208DA4275EC632CB0269EC30E861 CB2F6EC52B16B2C70C777AB9975672252DF1F0BEF82EF20CF74FECC5D1A3E860 3BB7A694F0ED72EF02592B7100BCAB82D7C9979160E9A8E422DB511D36096C31 242F4207AF526155257F6F42CFB378734DA3E4919049C9732ECF976D5B7458ED 032F6F3F1D94E2B9795668ECE783B6A15EC8D72DCF7A2E062094503EFCD842B3 A035CD77022F94CD3500188825EC51CB717C4A832C99F8821695F391C27CA192 90C1BB49A276F77EB91EBA78A19C81AFDBEE0CA81E363520E14C06FEEA641BAF D50798F2BD99DDEB697FC2BCC662A65E8021125F7F453796046FAD3A2AAEC66E E773B09CD32EED074A2D74128D019C74FA626EBC85A2E78EB8CBBDD7CDFAFFD9 64660C95B730D94EE2FC88DCCB2B91015EEC832ABBC917E4972F3264A42EA9A6 36B8EC763CBE0991F4ADEF336A892AEA734D7EE0EC490AD8CCFBE8DE5CCEFE7F 0A2DEFE25CC46FDF1B2A1F4B0880DD20660347DA8E297A3EECD3DD9B348639DF 3745EDB87C438D57671B23E3DBDEF70E116F887AF4D55A5907BD205DE839EA5B 624E47A1E3450F2AB03BE3E7E122CE7C8C9523DE87128A38C3FDF81F847E6C5B 92C9866F67CE7F745C08C960F5C0F184DAD318F8885032BC6C5D851A6373D788 2677FA37FBFDC32EB2E42025AFA5CFF502A7BD0CDFD2D2B35165FB19B4F94CA5 D0CF322F1C5D97AE38846ED366965EF380EA6DB4A20D8CEF5E9C7881C12E3C11 2114D22BB990D8C69CE60C4EE1DEB26B8F4D0AE4969D2164BEC8104204DC4BD3 FB7C6039CA91F3D45B51E12B91FCD9A61A53BD11578D6F885726D966D4814B04 B32E26ECB5E2EFF245012C667B48910B0B1E1D1BDA0E7BBA0DF1577CF687640E 7BBB3EE607CF8AD864E984494384C070E9E749BD148C2ED78117427819D6727D F475F949A8DA67F2CC67D822A7D90A0A5A59732B5BF75CA759CD030A905995E7 D06405814AD236828B9FA4D7E72F62C3520545618E2C9B429670AAFA9F1509AE 2BD089441521812FCC1579055F1F7435E9BE7CC7043BAE80B95AEF41D16237B8 B5D6BD2CC640898AAA4C7E22A463E1852E08DC0FAA1F2567185AF8A0B9280027 16FD44829ED232157BC951DB6BFA047DEDF96E81B4B25B4AA373ADCF89CA5363 3307C7DEBE91E43F7C8E86ACC7FDFFA68A3573EC1E2B51CEBAC5E1CCA1A49C39 96C5443C7277B01F53C7A31AAAFC08A6A5753BE057F05AEBE932F4F272EAB982 1D12E36E084A514744B906A2F8E6E3C84828F5268B04A5CA7FB0CE7F016B4990 ABBF3B0E24267ABAF25B5006224D01E5E54454491365E70E40BDF1381BFD6DB2 955698BC2DE2A02982C65D477B9ABD0AB2BB34CCC4378C3A28C1FA2A5D945AB6 32D8F315858EA23A5B63595C4FAD923D08A2E9EA04E7A05DA86E029FA9D1D1D0 9253C84B1E3DC797671C94314ABEA1158C1E84E7D7E00FAFC24E6FEB4EDDC82D B4CACDC2690E45508525BE57695F40FAD2E5A9A88046607EFA95C307CCA81FA5 94AD34430CBA2D0F5B09A98131ABD9931710359EFE849338D8B1D7C1E14F5F91 99270AEC71AEBCCA8C111639D4704B7B234953171889379C6B8E55138D4E33F5 CF23EC28ED633AA85E4B2DBD98AA3EA9CE63DA8392144B9E8868AC9537E15501 C26BA0C7B11B48ECA889A390F9D9E16F015D194DFAFD1EDA1470D2F3C7C972A2 2FDEB9976D55F4762F3E796E8046B127AB0B6A843A6AC97F8F9CFBF9E4C74634 4B6D060B8C9C15026FE8936A3EC067B8B1CAFBCD6677AA1BC446143391C443EF E034EFBA4E31DC4D5A0297A7977691836DA0CD68AF9145B2AF24D7F57078E539 0EB23A4A7234495BBD4D807C017DD8F3FCC90D5FC34E6D5F8823BA19808C9A43 1E9513CB9C47FC68983AA24C698834F46DB0D59571A500EAB98F3BAAFBA25716 D0905416E9F22DE5A796210332A4FAE38398FA7CE50A402C624E41EFC13E7E4B 67ED7459DE929E5501A9B6B5E1595765EA29097EC43585F3B01E13917F609607 253BF9E6B795DE2876806145CA1D0FA24A547EB0F23AB41898938A78713CDE18 AA89038C215219727C098A82BC293AAE9D182AF5B331EA5F38D523C0EF5F9654 B728FEBC626D5F0FD13E5698190F49E15C04A14E6DFC86C33C3F860F8AFEF5C8 5705AB236C277D69354198238F12790E4F6953ABE5BBF52F6132683B6CC52754 C381E98184758E8D3E06F9C2FEF8BB67E4865BEAC8E00C330E1C3DECBA71B155 B015F1693210C94DB1F574406EA9D44FC3812E49C9A9D99BB01E5BE7792B6EB3 F6302DA68EF532EED466019F8A2997D0FF3F2A6FCA53FD7013DAAF4170810E33 0A4EE2C3FE3430137C0CE24B11D9E4FD6CB1DE3E42CB45465C4CE359A9B6FAC4 62AB8D4888723F1D624906DCBC7BE99BE8BE0160AB06B355C379365E427D32EB 2ACC5E2FE4FBD082CE9E056440F61E80F2EA618B14E6754F109B9249E86B73B6 ECBF8CBB32BC9B81941FE4B60B29BE8C3F9541E9D6A6714D2BE4B2EE7A4BDBEE C0565AD48592B46E085630D024B1D808311798C4FB8AC858ABABFDCE997516B1 48E1A657E8501163CCE38AFBFEA39E81FB0E0B2AE88D82806FDD5FC356E729FC 79A40A98ADF296C7A97DFEEA905EBECD4A7742A56E692C00B521FE52A2314951 946CB39CD3753CE5D5F58E9D06793D478FBECBDB1C1B8A4A7D4CC2BF7D66CA89 3100D755C217EA9E7976F950A1B887A7D0DCD18180C9148F782CEADDFECB8555 97207ECEADEC5E4CBA59E6D8BDF929793CFE2987E0BCAB4C2B1872EAAB62194C 5CC8F8C1FBA41ABC986D07E6A6BB60507104869B339923890F230C8996C44D87 89CD1A9C7B417C4BDFBC923AC2D4B3A9B4ED74F9802D10CF085BFFC6033A598E FBC879548587E150DF37B099960714CE621D16F2AF08BE35502DC20195724337 0E06725FF1B9D032089E05006461EAFB14B85584BA0618D13CF485DCAAF3DC9C C80DE3BA662F7F07FE860AF5061130375A39C0606DD976ED084642AA7B5DAD24 FA107F84D5DB1C1B5331816303E8C3329E8B023790D70A75480AEFDE595AF8AA 1C41270058F581C7D38E6C8C02F0D9D3F1250D149682FF74AF9BF35FCC15A1FD 68F6D1492DE7E00EFCACA2A4EE134344 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 cleartomark %%EndFont TeXDict begin 39139632 55387786 1000 600 600 (lawn93.dvi) @start /Fa 133[46 56 1[76 56 56 54 42 55 2[58 56 68 47 58 1[27 56 58 49 51 57 54 1[56 9[101 74 1[71 56 73 1[67 77 1[90 62 77 52 37 74 77 65 67 75 71 70 74 18[29 34 29 4[29 39[{}45 90.9091 /CMCSC10 rf /Fb 133[37 37 121[{}2 83.022 /CMSY10 rf /Fc 135[102 7[108 1[108 4[54 3[88 108 33[138 146 65[{}8 172.188 /CMBX12 rf /Fd 215[67 26[46 13[{}2 83.022 /CMEX10 rf /Fe 157[33 41 17[58 15[38 63[{}4 74.7198 /CMMI9 rf /Ff 133[34 34 8[64 96[38 11[38 3[{}5 74.7198 /CMSY9 rf /Fg 252[35 2[55{}2 66.4176 /CMSY8 rf /Fh 138[41 4[36 3[21 1[29 24 20[41 11[58 72[{}7 66.4176 /CMMI8 rf /Fi 171[66 51 67 8[33 68 1[59 62 69 68[{}8 90.9091 /CMSL10 rf /Fj 141[48 9[48 4[48 1[48 97[{}4 90.9091 /CMITT10 rf /Fk 135[52 2[52 33 43 41 1[46 44 55 80 27 1[37 31 52 43 1[42 47 1[39 48 6[62 1[75 1[53 62 53 56 69 72 58 69 73 88 62 2[40 76 2[67 75 65 69 68 1[45 71 45 71 25 25 23[42 18[37 15[{}46 90.9091 /CMMI10 rf /Fl 133[51 5[51 51 2[51 1[51 51 51 51 1[51 1[51 51 51 1[51 1[51 13[51 2[51 2[51 51 51 7[51 1[51 17[51 51 46[{}23 99.6264 /CMTT12 rf /Fm 133[50 59 59 1[59 62 44 44 46 59 62 56 62 93 31 2[31 62 56 34 51 62 50 62 54 6[68 3[85 86 78 62 84 84 77 1[88 1[67 88 1[42 88 88 1[74 86 81 80 85 7[56 56 56 56 56 56 56 56 56 56 1[31 37 31 2[44 44 27[62 12[{}58 99.6264 /CMBX12 rf /Fn 133[37 44 42 60 42 49 30 37 38 1[46 46 51 74 23 42 1[28 46 42 28 42 46 42 42 46 4[47 5[68 68 65 51 66 70 62 70 68 82 57 70 1[35 68 70 59 2[65 64 68 6[28 6[46 1[46 2[28 33 28 4[28 4[47 21[51 12[{}53 90.9091 /CMTI10 rf /Fo 133[40 40 13[45 25 84[71 5[45 11[45 71 1[71{}9 90.9091 /CMSY10 rf /Fp 138[75 52 53 55 2[67 75 1[37 2[37 75 2[61 75 60 75 65 12[94 75 2[92 2[128 81 105 1[50 5[97 1[102 13[67 67 67 49[{}26 119.552 /CMBX12 rf /Fq 133[46 55 55 76 55 58 41 41 43 1[58 52 58 87 29 55 1[29 58 52 32 48 58 46 58 51 6[64 3[79 80 73 58 78 79 71 1[82 99 63 82 1[40 82 82 66 69 80 76 74 79 7[52 52 52 52 52 52 52 52 52 52 1[29 35 29 44[{}58 90.9091 /CMBX10 rf /Fr 129[48 1[48 1[48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1[48 48 48 48 48 48 48 48 48 6[48 1[48 48 48 48 48 48 48 48 48 48 48 48 48 48 1[48 48 48 48 48 48 48 48 48 48 48 1[48 2[48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1[48 48 48 48 48 2[48 48 34[{}76 90.9091 /CMTT10 rf /Fs 134[123 123 2[129 90 92 95 1[129 116 129 1[65 2[65 129 116 1[106 1[103 129 113 13[129 173 1[159 3[140 182 7[168 165 175 65[{}25 206.559 /CMBX12 rf /Ft 134[41 1[55 41 43 30 30 30 41 43 38 43 64 21 41 23 21 43 38 23 34 43 34 43 38 3[21 1[21 3[79 58 58 55 43 57 1[52 60 58 70 48 60 1[28 58 60 50 52 59 55 54 58 5[21 21 38 38 38 38 38 38 38 38 38 38 1[21 26 21 2[30 30 25[64 43 43 45 11[{}68 74.7198 /CMR9 rf /Fu 198[30 30 30 30 30 30 30 30 30 30 48[{}10 49.8132 /CMR6 rf /Fv 198[35 35 35 35 35 35 35 35 35 35 48[{}10 66.4176 /CMR8 rf /Fw 132[45 40 48 48 66 48 51 35 36 36 48 51 45 51 76 25 48 28 25 51 45 28 40 51 40 51 45 25 2[25 45 25 56 68 68 93 68 68 66 51 67 71 62 71 68 83 57 71 47 33 68 71 59 62 69 66 64 68 3[71 1[25 25 45 45 45 45 45 45 45 45 45 45 45 25 30 25 71 45 35 35 25 3[76 45 20[51 51 53 4[66 6[{}85 90.9091 /CMR10 rf /Fx 206[42 49[{}1 83.022 /CMR10 rf /Fy 138[65 46 46 46 2[59 65 1[33 62 1[33 1[59 36 52 65 52 1[59 9[120 3[65 2[80 1[88 1[73 91 1[42 1[92 3[85 1[88 7[59 5[59 51[{}27 119.552 /CMR12 rf end %%EndProlog %%BeginSetup %%Feature: *Resolution 600dpi TeXDict begin %%PaperSize: A4 end %%EndSetup %%Page: 1 1 TeXDict begin 1 0 bop 889 -269 a Fy(LAP)-10 b(A)m(CK)38 b(W)-10 b(orking)38 b(Note)g(93)661 -140 y(Installation)e(Guide)i(for)g (ScaLAP)-10 b(A)m(CK)2475 -184 y Fx(1)827 72 y Fw(L.)30 b(S.)g(Blac)m(kford)1424 39 y Fv(2)1465 72 y Fw(,)g(A.)h(Cleary)1904 39 y Fv(3)1944 72 y Fw(,)f(J.)h(Choi)2289 39 y Fv(4)2328 72 y Fw(,)421 185 y(J.)f(J.)g(Dongarra,)i(J.)e(Langou,)h(A.)g(P)m (etitet)1899 152 y Fv(5)1941 185 y Fw(,)f(and)g(R.)g(C.)h(Whaley)2719 152 y Fv(6)926 298 y Fw(Departmen)m(t)h(of)e(Computer)g(Science)1123 411 y(Univ)m(ersit)m(y)i(of)e(T)-8 b(ennessee)943 524 y(Kno)m(xville,)32 b(T)-8 b(ennessee)31 b(37996-3450)1517 736 y(and)531 949 y(J.)f(Demmel,)h(I.)g(Dhillon)1395 916 y Fv(7)1435 949 y Fw(,)g(O.)f(Marques)1956 916 y Fv(8)1995 949 y Fw(,)h(and)f(K.)g(Stanley)1054 1062 y(Computer)f (Science)i(Division)926 1175 y(Univ)m(ersit)m(y)g(of)g(California,)g (Berk)m(eley)1200 1288 y(Berk)m(eley)-8 b(,)33 b(CA)d(94720)1517 1500 y(and)1369 1713 y(D.)h(W)-8 b(alk)m(er)1770 1680 y Fv(9)1015 1925 y Fw(VERSION)30 b(1.8:)42 b(April)30 b(5,)h(2007)1417 2138 y(Abstract)-210 2350 y(This)g(w)m(orking)h(note)h (describ)s(es)e(ho)m(w)g(to)i(install)g(and)e(test)i(v)m(ersion)f(1.8)h (of)f(ScaLAP)-8 b(A)m(CK.)32 b(The)g(most)-210 2463 y(signi\014can)m(t) 43 b(c)m(hange)f(in)g(this)f(release)i(of)f(ScaLAP)-8 b(A)m(CK)42 b(is)g(the)f(externalisation)j(of)e(the)g(LAP)-8 b(A)m(CK)-210 2576 y(routines.)38 b(No)m(w)25 b(ScaLAP)-8 b(A)m(CK)24 b(requires)f(to)h(ha)m(v)m(e)h(the)f(LAP)-8 b(A)m(CK)23 b(library)h(installed)g(b)s(esides)f(BLA)m(CS,)-210 2689 y(BLAS)33 b(and)f(MPI)h(or)g(PVM)g(.)g(This)f(will)i(allo)m(w)g (the)f(user)g(to)g(use)g(the)g(latest)i(LAP)-8 b(A)m(CK)33 b(algorithms,)-210 2802 y(mo)s(di\014cations)f(without)g(the)h(need)e (of)i(reinstalling)g(the)f(ScaLAP)-8 b(A)m(CK)33 b(library)-8 b(.)45 b(Tw)m(o)33 b(new)e(routines)-210 2915 y(to)f(allo)m(w)g(read)f (and)f(write)h(from)f(\014les)h(ha)m(v)m(e)h(b)s(een)e(added.)40 b(Also)29 b(a)g(complete)i(ScaLAP)-8 b(A)m(CK)29 b(example)-210 3028 y(has)34 b(b)s(een)f(added)g(in)h(the)g(main)g(directory)-8 b(.)53 b(The)34 b(design)g(of)g(the)g(testing/timing)i(programs)e(for)g (the)-210 3141 y(ScaLAP)-8 b(A)m(CK)31 b(co)s(des)f(is)h(also)g (discussed.)p -210 3559 1440 4 v -106 3612 a Fu(1)-72 3644 y Ft(This)36 b(w)n(ork)f(w)n(as)h(supp)r(orted)e(in)h(part)g(b)n (y)e(the)i(National)h(Science)f(F)-6 b(oundation)34 b(Gran)n(t)h(No.)62 b(ASC-9005933;)42 b(b)n(y)-210 3735 y(the)29 b(Defense)h(Adv)l(anced)e (Researc)n(h)i(Pro)t(jects)h(Agency)e(under)g(con)n(tract)g(D)n (AAH04-95-1-0077,)j(administered)e(b)n(y)e(the)-210 3827 y(Arm)n(y)g(Researc)n(h)h(O\016ce;)i(b)n(y)e(the)f(O\016ce)h(of)h (Scien)n(ti\014c)f(Computing,)i(U.S.)e(Departmen)n(t)f(of)i(Energy)-6 b(,)30 b(under)e(Con)n(tract)-210 3918 y(DE-A)n(C05-84OR21400;)e(and)c (b)n(y)f(the)h(National)h(Science)g(F)-6 b(oundation)22 b(Science)g(and)g(T)-6 b(ec)n(hnology)23 b(Cen)n(ter)f(Co)r(op)r (erativ)n(e)-210 4009 y(Agreemen)n(t)j(No.)35 b(CCR-8809615.)-106 4069 y Fu(2)-72 4101 y Ft(Curren)n(t)26 b(address:)35 b(Myricom)-106 4160 y Fu(3)-72 4192 y Ft(Curren)n(t)26 b(address:)35 b(LLNL)-106 4252 y Fu(4)-72 4283 y Ft(Curren)n(t)26 b(address:)35 b(So)r(ongsil)27 b(Univ)n(ersit)n(y)-6 b(,)24 b(Seoul,)i(Korea)-106 4343 y Fu(5)-72 4375 y Ft(Curren)n(t)g (address:)35 b(Sun)24 b(F)-6 b(rance,)26 b(P)n(aris,)h(F)-6 b(rance)-106 4434 y Fu(6)-72 4466 y Ft(Curren)n(t)26 b(address:)35 b(UTSA)-106 4526 y Fu(7)-72 4557 y Ft(Curren)n(t)26 b(address:)35 b(IBM)26 b(Austin)-106 4617 y Fu(8)-72 4649 y Ft(Curren)n(t)g(address:)35 b(LBL)-106 4708 y Fu(9)-72 4740 y Ft(Curren)n(t)26 b(address:)35 b(Cardi\013)26 b(Univ)n(ersit)n(y)-6 b(,)24 b(W)-6 b(ales)1567 4989 y Fw(1)p eop end %%Page: 2 2 TeXDict begin 2 1 bop -210 395 a Fs(Con)-6 b(ten)g(ts)-74 840 y Fw(1)164 b(In)m(tro)s(duction)28 b(.)46 b(.)g(.)f(.)h(.)g(.)f(.)h (.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.) g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)184 b(4)-74 953 y(2)164 b(Installation)32 b(Pro)s(cedure)63 b(.)46 b(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h (.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)184 b(4)135 1066 y(2.1)176 b(Gunzip)30 b(and)g(tar)h(the)f(\014le)h Fr(scalapack.tgz)79 b Fw(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h (.)g(.)g(.)f(.)h(.)184 b(5)135 1179 y(2.2)176 b(Edit)31 b(the)f Fr(SLmake.inc)e Fw(include)i(\014le)69 b(.)46 b(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f (.)h(.)184 b(6)426 1292 y(2.2.1)k(F)-8 b(urther)47 b(Details)i(to)e (obtain)h(BLA)m(CS,)f(BLAS,)g(LAP)-8 b(A)m(CK)47 b(and)799 1405 y(PVM)31 b(or)f(MPI)36 b(.)46 b(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.) f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)184 b(6)135 1518 y(2.3)176 b(Edit)31 b(the)f(top-lev)m(el)j Fr(SCALAPACK/Makefile)25 b Fw(and)30 b(t)m(yp)s(e)h Fr(make)23 b Fw(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)184 b(7)135 1631 y(2.4)176 b(Run)30 b(the)g(PBLAS)g(T)-8 b(est)31 b(Suite)65 b(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.) f(.)h(.)g(.)g(.)f(.)h(.)184 b(8)135 1744 y(2.5)176 b(Run)30 b(the)g(PBLAS)g(Timing)g(Suite)h(\(optional\))90 b(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(10)135 1857 y(2.6)176 b(Run)30 b(the)g(REDIST)g(T)-8 b(est)31 b(Suite)90 b(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.) g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(10)135 1970 y(2.7)176 b(Run)30 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(T)-8 b(est)31 b(Suite)61 b(.)46 b(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g (.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(10)135 2083 y(2.8)176 b(Run)30 b(the)g(examples)85 b(.)46 b(.)f(.)h(.)g(.)f(.)h(.)g (.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.) g(.)f(.)h(.)138 b(11)135 2195 y(2.9)176 b(T)-8 b(roublesho)s(oting)88 b(.)46 b(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.) h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(11)-74 2308 y(3)164 b(More)32 b(Ab)s(out)d(the)i(ScaLAP)-8 b(A)m(CK)31 b(T)-8 b(est)31 b(Suite)h(.)46 b(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.) f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(12)135 2421 y(3.1)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(LU)f(routines)25 b(.)45 b(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h (.)g(.)g(.)f(.)h(.)138 b(13)426 2534 y(3.1.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(LU)f(Routines)24 b(.)46 b(.)f(.)h(.)138 b(13)135 2647 y(3.2)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(Band)f(and)g(T)-8 b(ridiagonal)31 b(LU)g(routines)43 b(.)j(.)f(.)h(.)138 b(14)426 2760 y(3.2.1)188 b(Input)27 b(File)j(for)e(T)-8 b(esting)29 b(the)g(ScaLAP)-8 b(A)m(CK)29 b(Band)f(and)g(T)-8 b(ridiago-)799 2873 y(nal)31 b(LU)f(Routines)54 b(.)45 b(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.) g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(14)135 2986 y(3.3)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(LL)-8 b(T)30 b(routines)48 b(.)e(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f (.)h(.)g(.)g(.)f(.)h(.)138 b(15)426 3099 y(3.3.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(LL)-8 b(T)30 b(Routines)48 b(.)d(.)h(.)138 b(15)135 3212 y(3.4)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(Band)f(and)g(T)-8 b(ridiagonal)31 b(LL)-8 b(T)30 b(routines)68 b(.)45 b(.)h(.)138 b(16)426 3325 y(3.4.1)188 b(Input)24 b(File)i(for)f(T)-8 b(esting)26 b(the)g(ScaLAP)-8 b(A)m(CK)26 b(Band)f(or)g(T)-8 b(ridiagonal)799 3437 y(LL)g(T)30 b(Routines)88 b(.)46 b(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f (.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(16)135 3550 y(3.5)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(QR,)f(R)m(Q,)h(LQ,)f(QL,)g(QP)-8 b(,)30 b(and)g(TZ)g(routines)52 b(.)138 b(17)426 3663 y(3.5.1)188 b(Input)34 b(File)i(for)f(T)-8 b(esting)37 b(the)e(ScaLAP)-8 b(A)m(CK)36 b(QR,)f(R)m(Q,)g(LQ,)g(QL,)799 3776 y(QP)-8 b(,)31 b(and)e(TZ)h(Routines)64 b(.)45 b(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.) h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(17)135 3889 y(3.6)176 b(T)-8 b(ests)31 b(for)f(the)h(Linear)f(Least)h(Squares) f(\(LLS\))g(routines)93 b(.)45 b(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(17)426 4002 y(3.6.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(LLS)e(Routines)56 b(.)45 b(.)h(.)138 b(18)135 4115 y(3.7)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(INV)f(routines)51 b(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(18)426 4228 y(3.7.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(INV)f(Routines)51 b(.)45 b(.)h(.)138 b(19)135 4341 y(3.8)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(HRD)g(routines)86 b(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(19)426 4454 y(3.8.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(HRD)f(Routines)86 b(.)46 b(.)138 b(20)135 4567 y(3.9)176 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(TRD)f(routines)89 b(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(20)426 4679 y(3.9.1)188 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(SCALAP)-8 b(A)m(CK)30 b(TRD)g(Routines)40 b(.)46 b(.)138 b(20)1567 4989 y(2)p eop end %%Page: 3 3 TeXDict begin 3 2 bop 135 -269 a Fw(3.10)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(BRD)g(routines)90 b(.)46 b(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(21)426 -156 y(3.10.1)143 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(BRD)g(Routines)89 b(.)46 b(.)138 b(21)135 -43 y(3.11)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(SEP)e(routines)46 b(.)g(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(21)426 70 y(3.11.1)143 b(T)-8 b(est)31 b(Matrices)h(for)e(the)h (Symmetric)f(Eigen)m(v)-5 b(alue)32 b(Routines)65 b(.)45 b(.)h(.)138 b(22)426 183 y(3.11.2)143 b(Input)36 b(File)i(for)f(T)-8 b(esting)39 b(the)e(Symmetric)g(Eigen)m(v)-5 b(alue)39 b(Routines)799 296 y(and)30 b(Driv)m(ers)35 b(.)45 b(.)h(.)g(.)f(.)h(.) g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g (.)g(.)f(.)h(.)138 b(23)135 409 y(3.12)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(GSEP)f(routines)45 b(.)h(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(24)426 522 y(3.12.1)143 b(Input)20 b(File)h(for)g(T)-8 b(esting)22 b(the)f(Generalized)h(Symmetric)f(Eigen)m(v)-5 b(alue)799 635 y(Routines)31 b(and)e(Driv)m(ers)79 b(.)45 b(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g (.)f(.)h(.)138 b(24)135 748 y(3.13)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(NEP)f(routines)e(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(24)426 860 y(3.13.1)143 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(NEP)f(Routines)e(.)45 b(.)h(.)138 b(25)135 973 y(3.14)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(EV)m(C)f(routines)d(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(25)426 1086 y(3.14.1)143 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(EV)m(C)f(Routines)d(.)45 b(.)h(.)138 b(26)135 1199 y(3.15)131 b(T)-8 b(ests)31 b(for)f(the)h(ScaLAP)-8 b(A)m(CK)31 b(SVD)f(routines)i(.)46 b(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)138 b(26)426 1312 y(3.15.1)143 b(T)-8 b(est)31 b(Matrices)h(for)e(the)h (Singular)f(V)-8 b(alue)31 b(Decomp)s(osition)h(Routines)53 b(27)426 1425 y(3.15.2)143 b(Input)29 b(File)j(for)e(T)-8 b(esting)31 b(the)g(ScaLAP)-8 b(A)m(CK)31 b(SVD)f(Routines)i(.)45 b(.)h(.)138 b(27)-210 1629 y Fq(A)57 b(ScaLAP)-9 b(A)m(CK)36 b(Routines)2325 b(28)-210 1833 y(B)62 b(ScaLAP)-9 b(A)m(CK)36 b(Auxiliary)f(Routines)1862 b(32)-74 1946 y Fw(Bibliograph)m(y)86 b(.)45 b(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.) g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g(.)g(.)f(.)h(.)g(.)f(.)h(.)g (.)g(.)f(.)h(.)138 b(35)1567 4989 y(3)p eop end %%Page: 4 4 TeXDict begin 4 3 bop -210 -269 a Fp(1)135 b(In)l(tro)t(duction)72 84 y Fw(This)31 b(w)m(orking)i(note)f(describ)s(es)g(ho)m(w)g(to)g (install)h(and)f(test)h(v)m(ersion)f(1.8)h(of)f(ScaLAP)-8 b(A)m(CK)33 b([1].)-210 197 y(This)d(release)h(of)g(ScaLAP)-8 b(A)m(CK)31 b(includes:)-74 385 y Fo(\017)46 b Fw(Externalisation)h(of) e(the)g(LAP)-8 b(A)m(CK)46 b(routines.)84 b(No)m(w)46 b(y)m(ou)f(NEED)h(the)f(LAP)-8 b(A)m(CK)45 b(library)17 498 y(installed)31 b(on)g(y)m(our)f(mac)m(hine)h(in)f(order)g(to)h (link/run)e(a)i(ScaLAP)-8 b(A)m(CK)31 b(program.)-74 685 y Fo(\017)46 b Fw(2)41 b(new)f(routines:)61 b(p[sdcz]la)m(write)42 b(and)e([psdcz]laread)h(declined)g(in)f(the)h(4)g(precisions\()g(they) 17 798 y(haev)31 b(b)s(een)f(adapated)h(from)e(ScaEx)i(example)g(from)f (An)m(toine)h(P)m(ettitet.)-74 986 y Fo(\017)46 b Fw(a)27 b(new)e(directory)i(EXAMPLE)f(that)h(con)m(tains)g(a)g(ScaLAP)-8 b(A)m(CK)26 b(example)h(in)f(the)g(4)h(precisions.)-74 1174 y Fo(\017)46 b Fw(Sev)m(eral)32 b(bug)d(\014xes.)-210 1361 y(F)-8 b(or)38 b(a)g(detailed)g(explanation)g(of)g(the)f(design)g (and)g(con)m(ten)m(ts)i(of)f(the)f(ScaLAP)-8 b(A)m(CK)38 b(library)-8 b(,)39 b(please)-210 1474 y(refer)30 b(to)h(the)g(ScaLAP) -8 b(A)m(CK)31 b(Users')f(Guide[1)q(].)-69 1587 y(ScaLAP)-8 b(A)m(CK)30 b(is)g(freely)g(a)m(v)-5 b(ailable)32 b(on)d Fn(netlib)h Fw(and)f(can)h(b)s(e)f(obtained)h(via)g(the)g(W)-8 b(orld)30 b(Wide)g(W)-8 b(eb)-210 1700 y(or)30 b(anon)m(ymous)h(ftp.)17 1887 y Fr(http://www.netlib.org/sca)o(lapa)o(ck/s)o(cal)o(apac)o(k.tg)o (z)-210 2075 y Fw(Prebuilt)f(ScaLAP)-8 b(A)m(CK)31 b(libraries)g(are)f (a)m(v)-5 b(ailable)33 b(on)d Fn(netlib)h Fw(for)f(a)h(v)-5 b(ariet)m(y)31 b(of)g(arc)m(hitectures.)17 2263 y Fr (http://www.netlib.org/sca)o(lapa)o(ck/a)o(rch)o(ives)o(/)-210 2450 y Fw(Ho)m(w)m(ev)m(er,)d(if)c(a)h(prebuilt)e(library)h(do)s(es)g (not)g(exist)h(for)f(y)m(our)g(arc)m(hitecture,)k(y)m(ou)c(will)h(need) f(to)h(do)m(wnload)-210 2563 y(the)31 b(distribution)e(tar)i(\014le)f (and)g(build)g(the)g(library)g(as)h(instructed)f(in)g(this)g(guide.)-69 2676 y(T)-8 b(o)21 b(install)g(and)f(test)h(ScaLAP)-8 b(A)m(CK,)21 b(the)f(user)g(m)m(ust)g(ha)m(v)m(e)i(the)e(BLA)m(CS,)h (BLAS[9,)g(6,)g(5],)i(LAP)-8 b(A)m(CK[11)q(])-210 2789 y(and)30 b(MPI)g([7)q(])g(or)h(PVM)f([8)q(])h(a)m(v)-5 b(ailable)32 b(on)f(his)f(mac)m(hine.)-69 2902 y(ScaLAP)-8 b(A)m(CK)41 b(has)f(b)s(een)f(tested)i(on)f(MPPs)g(lik)m(e)h(the)f(IBM) h(SP)e(series,)k(Cra)m(y)e(T3E,)f(and)f(SGI)-210 3015 y(Origin)33 b(2000/3000,)38 b(and)32 b(tested)i(on)f(clusters)g(of)g (PCs)g(and)f(net)m(w)m(orks)i(of)f(w)m(orkstations)h(supp)s(orting)-210 3128 y(MPI)c(or)h(PVM.)348 3095 y Fv(10)-69 3241 y Fw(Section)36 b(2)f(con)m(tains)h(step-b)m(y-step)g(installation)h(and)d (testing/timing)k(instructions.)54 b(F)-8 b(or)36 b(users)-210 3354 y(desiring)c(additional)g(information,)h(Section)f(3)g(giv)m(es)i (details)e(on)g(the)g(testing/timing)i(programs)d(for)-210 3467 y(the)24 b(ScaLAP)-8 b(A)m(CK)24 b(co)s(des)f(and)g(their)g(input) g(\014les.)38 b(App)s(endices)23 b(A)g(and)g(B)h(describ)s(e)f(the)g (ScaLAP)-8 b(A)m(CK)-210 3579 y(driv)m(er,)30 b(computational,)j(and)c (auxiliary)i(routines)g(curren)m(tly)f(a)m(v)-5 b(ailable.)-210 3866 y Fp(2)135 b(Installation)47 b(Pro)t(cedure)-69 4069 y Fw(Installing,)31 b(testing,)h(and)e(timing)h(ScaLAP)-8 b(A)m(CK)31 b(in)m(v)m(olv)m(es)h(the)f(follo)m(wing)h(steps:)-99 4256 y(1.)46 b(Gunzip)30 b(and)g(tar)h(the)f(\014le)h Fr(scalapack.tgz)p Fw(.)p -210 4338 1440 4 v -137 4392 a Fu(10)-72 4424 y Ft(It)f(is)g(v)n(ery)g(imp)r(ortan)n(t)g(to)g(note)g (that)g(only)g(PVM)g(v)n(ersion)g(3.3)h(or)g(later)f(is)h(supp)r(orted) f(with)g(the)g(BLA)n(CS[4,)h(10].)-210 4515 y(Due)c(to)i(ma)t(jor)g(c)n (hanges)g(in)e(PVM)i(and)e(the)h(resulting)g(c)n(hanges)h(required)e (in)h(the)g(BLA)n(CS,)g(earlier)i(v)n(ersions)e(of)h(PVM)-210 4606 y(are)d(NOT)g(supp)r(orted.)1567 4989 y Fw(4)p eop end %%Page: 5 5 TeXDict begin 5 4 bop -99 -269 a Fw(2.)46 b(Cop)m(y)37 b(the)h(SLmak)m(e.inc.example)g(to)g(SLmak)m(e.inc)g(and)f(edit)g(the)g Fr(SLmake.inc)e Fw(include)i(\014le,)17 -156 y(sp)s(ecifying)i(the)f (lo)s(cation)j(of)d(the)h(MPI)g(or)f(PVM)h(library)-8 b(,)41 b(the)e(BLA)m(CS)f(library)-8 b(,)41 b(the)e(BLAS)17 -43 y(library)30 b(and)g(the)h(LAP)-8 b(A)m(CK)30 b(library)-8 b(.)-99 132 y(3.)46 b(Edit)31 b(the)f(top-lev)m(el)j Fr(Makefile)p Fw(,)28 b(and)i(t)m(yp)s(e)g Fr(make)g Fw(to)h(generate)h(the)e(ScaLAP)-8 b(A)m(CK)31 b(library)-99 307 y(4.)46 b(T)m(yp)s(e)30 b Fr(make)47 b(exe)29 b Fw(to)i(generate)h (the)f(ScaLAP)-8 b(A)m(CK)31 b(T)-8 b(est)31 b(Suite\(s\).)-99 482 y(5.)46 b(Run)30 b(the)g(T)-8 b(est)31 b(Suite\(s\).)-69 638 y(If)26 b(failures)h(are)g(encoun)m(tered)h(during)d(an)m(y)i (phase)g(of)g(the)g(installation)h(or)f(testing)h(pro)s(cess,)f(please) -210 751 y(\014rst)j(refer)g(to)h(the)f(F)-10 b(A)m(Q)31 b(and)f(Errata)h(\014les)f(for)g(information)17 908 y Fr(http://www.netlib.org/sca)o(lapa)o(ck/f)o(aq.)o(html)17 1083 y(http://www.netlib.org/sca)o(lapa)o(ck/e)o(rra)o(ta.h)o(tml)-210 1239 y Fw(and)g(if)g(that)h(do)s(es)f(not)h(resolv)m(e)g(the)g (problem,)f(please)h(con)m(tact)i(the)e(dev)m(elop)s(ers)f(at)17 1395 y Fr(scalapack@cs.utk.edu)-210 1633 y Fm(2.1)112 b(Gunzip)38 b(and)h(tar)e(the)g(\014le)h Fl(scalapack.tgz)72 1804 y Fw(The)21 b(soft)m(w)m(are)i(is)f(distributed)e(in)h(the)h(form) f(of)h(a)f(gzipp)s(ed)g(tar)h(\014le)g(whic)m(h)f(con)m(tains)i(the)e (ScaLA-)-210 1917 y(P)-8 b(A)m(CK)37 b(source)h(co)s(de)f(and)f(test)i (suite,)i(as)d(w)m(ell)h(as)g(the)f(PBLAS)g(source)g(co)s(de)g(and)g (testing/timing)-210 2030 y(programs.)i(The)25 b(PBLAS)g(are)h (parallel)h(v)m(ersions)f(of)f(the)h(Lev)m(el)h(1,)g(2,)g(and)e(3)h (BLAS.)g(F)-8 b(or)26 b(more)g(details)-210 2143 y(on)k(the)h(PBLAS,)f (refer)g(to)h([2)q(,)g(3].)17 2299 y Fr(http://www.netlib.org/sca)o (lapa)o(ck/s)o(cal)o(apac)o(k.tg)o(z)-69 2455 y Fw(T)-8 b(o)31 b(unpac)m(k)f(the)h Fr(scalapack.tgz)26 b Fw(\014le,)31 b(t)m(yp)s(e)g(the)f(follo)m(wing)i(command:)17 2630 y Fr(gunzip)46 b(-c)i(scalapack.tgz)c(|)j(tar)g(xvf)g(-)-210 2805 y Fw(This)33 b(will)i(create)h(a)e(top-lev)m(el)j(directory)e (called)g Fr(SCALAPACK)d Fw(as)i(sho)m(wn)g(in)g(Figure)g(1.)53 b(Please)35 b(note)-210 2918 y(that)j(this)f(\014gure)g(do)s(es)g(not)g (re\015ect)h(ev)m(erything)g(that)g(is)f(con)m(tained)i(in)e(the)g Fr(SCALAPACK)e Fw(directory)-8 b(.)-210 3031 y(Input)31 b(and)g(instructional)i(\014les)f(are)g(also)h(lo)s(cated)g(at)g(v)-5 b(arious)32 b(lev)m(els.)47 b(Libraries)32 b(are)g(created)h(in)f(the) 1069 3141 y(SCALAP)-8 b(A)m(CK)p 1388 3241 4 113 v 269 3242 2840 4 v 267 3353 4 113 v 715 3353 V 1164 3353 V 1612 3353 V 2060 3353 V 2583 3353 V 3106 3353 V 136 3440 a(PBLAS)207 b(SR)m(C)150 b(TESTING)82 b(TOOLS)116 b(REDIST)i(EXAMPLE)59 b(INST)-8 b(ALL)p 267 3577 V 44 3579 449 4 v 43 3689 4 113 v 491 3689 V 1164 3577 V 941 3579 449 4 v 939 3689 4 113 v 1388 3689 V 2060 3577 V 1838 3579 449 4 v 1836 3689 4 113 v 2284 3689 V -27 3776 a(SR)m(C)149 b(TESTING)161 b(LIN)286 b(EIG)275 b(SR)m(C)150 b(TESTING)808 3997 y(Figure)30 b(1:)42 b(Organization)31 b(of)g(ScaLAP)-8 b(A)m(CK)-210 4288 y Fr(SCALAPACK)31 b Fw(directory)i(and)g(executable)h(\014les)g (are)f(created)h(in)f(the)g Fr(TESTING)f Fw(directory\(ies\).)50 b(Input)-210 4401 y(\014les)30 b(are)g(copied)g(in)m(to)h(the)f Fr(TESTING)e Fw(directory)i(at)g(the)g(time)h(eac)m(h)g(executable)g (is)f(created.)41 b(Y)-8 b(ou)31 b(will)-210 4514 y(need)g(appro)m (ximately)h(28)g(Mb)m(ytes)g(of)f(space)h(for)f(the)g(tar)g(\014le.)43 b(Y)-8 b(our)31 b(total)i(space)f(requiremen)m(ts)f(will)-210 4627 y(v)-5 b(ary)29 b(dep)s(ending)e(up)s(on)h(if)h(all)h(platforms)e (of)i(the)f(BLA)m(CS)g(are)g(installed)h(and)e(the)h(size)h(of)f (executable)-210 4740 y(\014les)h(that)h(y)m(our)g(con\014guration)f (can)h(handle.)1567 4989 y(5)p eop end %%Page: 6 6 TeXDict begin 6 5 bop -210 -269 a Fm(2.2)112 b(Edit)37 b(the)h Fl(SLmake.inc)i Fm(include)e(\014le)72 -97 y Fw(Example)28 b(mac)m(hine-sp)s(eci\014c)h Fr(SCALAPACK/SLmake.inc)22 b Fw(\014les)27 b(are)h(pro)m(vided)g(in)f(the)h Fr(INSTALL)-210 16 y Fw(sub)s(directory)33 b(for)g(the)h(In)m(tel)g(i860,)i(IBM)e(SP)-8 b(,)34 b(Cra)m(y)g(T3E,)g(SGI)f(Origin,)h(and)f(v)-5 b(arious)34 b(w)m(orkstations)-210 129 y(using)20 b(MPI)h(or)g(PVM.)h (When)e(y)m(ou)i(ha)m(v)m(e)g(selected)g(the)f(mac)m(hine)h(to)g(whic)m (h)e(y)m(ou)i(wish)e(to)i(install)f(ScaLA-)-210 242 y(P)-8 b(A)m(CK,)21 b(cop)m(y)g(the)f(appropriate)h(sample)f(include)g(\014le) g(\(if)h(one)g(is)f(presen)m(t\))h(in)m(to)g Fr(SCALAPACK/SLmake.inc)p Fw(.)-210 355 y(F)-8 b(or)31 b(example,)g(if)g(y)m(ou)f(wish)g(to)h (run)e(ScaLAP)-8 b(A)m(CK)31 b(on)f(a)h(DEC)f(ALPHA,)17 565 y Fr(cp)47 b(INSTALL/SLmake.ALPHA)c(SLmake.inc)-69 775 y Fw(Edit)30 b(the)h Fr(SLmake.inc)d Fw(mak)m(e)j(include)f(\014le) h(to)g(con)m(tain)g(the)g(follo)m(wing:)-99 960 y(1.)46 b(Sp)s(ecify)30 b(the)g(complete)i(path)e(to)i(the)e(top)h(lev)m(el)h Fr(SCALAPACK)c Fw(directory)j(called)g Fr(home)p Fw(.)-99 1147 y(2.)46 b(Iden)m(tify)36 b(the)g(platform)f(to)h(whic)m(h)g(y)m (ou)g(will)f(b)s(e)g(installing)i(the)f(libraries.)56 b(If)35 b(y)m(our)g(directory)17 1260 y(structure)24 b(for)f(ScaLAP)-8 b(A)m(CK)25 b(is)f(di\013eren)m(t)g(than)g(the)g (aforemen)m(tioned)h(structure,)g(y)m(ou)f(will)g(also)17 1373 y(need)30 b(to)i(sp)s(ecify)e(lo)s(cations)h(of)g Fr(SCALAPACK)d Fw(sub)s(directories.)-99 1560 y(3.)46 b(De\014ne)41 b Fr(F77)p Fw(,)h Fr(NOOPT)p Fw(,)d Fr(F77FLAGS)p Fw(,)f Fr(CC)p Fw(,)i Fr(CCFLAGS)p Fw(,)f Fr(LOADER)p Fw(,)g Fr(LOADFLAGS)p Fw(,)f Fr(ARCH)p Fw(,)h Fr(ARCHFLAGS)p Fw(,)17 1672 y(and)28 b Fr(RANLIB)p Fw(,)e(to)j(refer)f(to)h(the)f (compiler)g(and)g(compiler)h(options,)g(loader)f(and)g(loader)g (options,)17 1785 y(library)33 b(arc)m(hiv)m(er)h(and)f(options,)h(and) e(ranlib)h(for)g(y)m(our)g(mac)m(hine.)49 b(If)33 b(y)m(our)g(mac)m (hine)h(do)s(es)f(not)17 1898 y(require)d(ranlib)g(set)h Fr(RANLIB)46 b(=)i(echo)p Fw(.)-99 2085 y(4.)e(Sp)s(ecify)30 b(the)g(C)g(prepro)s(cessor)f(de\014nitions)h(for)g(compilation,)i Fr(BLACSDBGLVL)27 b Fw(and)i Fr(CDEFS)p Fw(.)g(The)17 2198 y(p)s(ossible)42 b(v)-5 b(alues)42 b(for)g Fr(BLACSDBGLVL)d Fw(are)j(0)g(and)f(1.)76 b(The)42 b(p)s(ossible)f(options)h(for)g Fr(CDEFS)f Fw(are)17 2311 y Fr(-DAdd)p 263 2311 29 4 v 33 w Fw(,)28 b Fr(-DNoChange)p Fw(,)d(and)h Fr(-DUPCASE)p Fw(.)e(If)i(y)m(ou)h(are)g(on)g(a)g(DEC)f(ALPHA,)h(y)m(ou)g(m)m(ust)g (also)h(add)17 2424 y Fr(-DNO)p 215 2424 V 34 w(IEEE)h Fw(to)i(the)f(de\014nition)g(of)h Fr(CDEFS)p Fw(.)-99 2611 y(5.)46 b(Sp)s(ecify)30 b(the)g(lo)s(cations)i(of)f(the)f(needed)g (libraries:)41 b Fr(BLACS)p Fw(,)29 b Fr(PVM)h Fw(or)g Fr(MPI)p Fw(,)g Fr(BLAS)f Fw(and)h Fr(LAPACK)p Fw(.)-210 2796 y(This)23 b(mak)m(e)i(include)f(\014le)g(is)g(referenced)g(inside) g(eac)m(h)h(of)f(the)g(mak)m(e\014les)h(in)f(the)g(v)-5 b(arious)24 b(sub)s(directories.)-210 2909 y(As)37 b(a)h(result,)h (there)f(is)f(no)h(need)f(to)h(edit)g(the)f(mak)m(e\014les)i(in)e(the)g (sub)s(directories.)62 b(All)38 b(information)-210 3022 y(that)31 b(is)f(mac)m(hine)h(sp)s(eci\014c)g(has)f(b)s(een)f (de\014ned)h(in)g(this)g(include)g(\014le.)-210 3262 y Fq(2.2.1)105 b(F)-9 b(urther)35 b(Details)g(to)g(obtain)g(BLA)m(CS,)f (BLAS,)i(LAP)-9 b(A)m(CK)35 b(and)g(PVM)g(or)g(MPI)-69 3433 y Fw(Prebuilt)20 b(BLA)m(CS)g(libraries)h(are)f(a)m(v)-5 b(ailable)23 b(on)d Fn(netlib)g Fw(for)h(a)f(v)-5 b(ariet)m(y)22 b(of)e(arc)m(hitectures)i(and)e(message)-210 3546 y(passing)30 b(library)g(com)m(binations;)17 3732 y Fr(http://www.netlib.org/bla)o (cs/a)o(rchi)o(ves)-210 3917 y Fw(otherwise,)h(the)g(BLA)m(CS)f (distribution)g(tar)g(\014les)h(are)f(a)m(v)-5 b(ailable.)17 4103 y Fr(http://www.netlib.org/bla)o(cs/m)o(pibl)o(acs)o(.tgz)17 4216 y(http://www.netlib.org/bla)o(cs/p)o(vmbl)o(acs)o(.tgz)-210 4401 y Fw(After)44 b(obtaining)g(the)g(source,)k(follo)m(w)d(the)f (instructions)f(in)g(\\A)i(User's)f(Guide)f(to)i(the)f(BLA)m(CS")-210 4514 y(or)d(in)h(the)f("Installing)i(the)f(BLA)m(CS")g(section)g(of)g (the)g(BLA)m(CS)f(w)m(ebpage)h(to)h(install)f(the)g(library)-8 b(.)-210 4627 y(Instructions)38 b(for)g(running)f(the)i(BLA)m(CS)g(T)-8 b(est)39 b(Suite)f(can)h(b)s(e)f(found)g(in)g(\\A)h(User's)g(Guide)f (to)i(the)-210 4740 y(BLA)m(CS)30 b(T)-8 b(ester".)42 b(Both)31 b(of)g(these)g(do)s(cumen)m(ts)f(are)g(a)m(v)-5 b(ailable)33 b(via)e(the)g Fn(blacs)f(index)h Fw(on)f Fn(netlib)p Fw(.)1567 4989 y(6)p eop end %%Page: 7 7 TeXDict begin 7 6 bop -69 -269 a Fw(If)31 b(an)f(v)m(endor)h(optimized) h(BLAS)f(library)f(is)h(not)g(a)m(v)-5 b(ailable,)34 b(then)d(the)g(user)f(can)h(install)h(A)-8 b(TLAS)-210 -156 y(whic)m(h)39 b(will)h(generate)h(an)e(optimized)i(BLAS)e(library) g(for)g(the)h(giv)m(en)g(arc)m(hitecture,)k(or)39 b(install)i(the)-210 -43 y(F)-8 b(ortran77)32 b(reference)f(implemen)m(tation)h(of)e(the)h (BLAS.)17 130 y Fr(http://www.netlib.org/bla)o(s/fa)o(q.ht)o(ml#)o(1.6) 17 243 y(http://www.netlib.org/atl)o(as/)17 356 y (http://www.netlib.org/bla)o(s/bl)o(as.t)o(gz)-210 529 y Fw(An)41 b(optimized)h(BLAS)g(library)f(is)g(essen)m(tial)j(for)d(b)s (est)g(p)s(erformance,)j(and)d(use)g(of)h(the)f(F)-8 b(ortran77)-210 642 y(reference)31 b(implemen)m(tation)h(BLAS)e(is)g (strongly)h(discouraged.)-69 755 y(If)45 b(an)g(v)m(endor)h(optimized)g (LAP)-8 b(A)m(CK)45 b(library)g(is)h(not)f(a)m(v)-5 b(ailable,)52 b(then)45 b(the)h(user)e(can)i(install)-210 868 y(LAP)-8 b(A)m(CK)31 b(from)f(netlib.)17 1041 y Fr(http://www.netlib.org/lap)o (ack/)o(faq.)o(htm)o(l#1.)o(1)17 1154 y(http://www.netlib.org/lap)o (ack/)17 1266 y(http://www.netlib.org/lap)o(ack/)o(lapa)o(ck.)o(tgz)-69 1439 y Fw(If)g(a)h(v)m(endor-supplied)f(MPI)h(or)f(PVM)h(library)g(is)g (not)f(a)m(v)-5 b(ailable,)34 b(p)s(ortable)d(implemen)m(tations)h(of) -210 1552 y(PVM)f(and)f(MPI)g(\(MPICH)h(and)f(LAM/MPI\))h(are)g(a)m(v) -5 b(ailable:)43 b(If)30 b(a)h(v)m(endor-supplied)f(MPI)g(or)h(PVM)-210 1665 y(library)38 b(is)g(not)g(a)m(v)-5 b(ailable,)43 b(p)s(ortable)38 b(implemen)m(tations)i(of)f(PVM)f(and)g(MPI)g (\(MPICH,)g(MPICH2,)-210 1778 y(Op)s(en)29 b(MPI)h(and)g(LAM/MPI\))i (are)e(a)m(v)-5 b(ailable:)17 1951 y Fr(http://www.netlib.org/pvm)o(3/) 17 2064 y(http://www-unix.mcs.anl.g)o(ov/m)o(pi/m)o(pic)o(h1/)17 2177 y(http://www-unix.mcs.anl.g)o(ov/m)o(pi/m)o(pic)o(h//)17 2290 y(http://www.lam-mpi.org/)24 b(http://www.open-mpi.org/)-210 2463 y Fw(Installation)36 b(instructions)e(for)h(PVM)g(are)f(con)m (tained)i(in)e(the)h(PVM)g(Users')g(Guide)f([8)q(].)53 b(An)34 b(Instal-)-210 2576 y(lation)h(Guide)f(for)g(MPICH/MPICH2)g(is) h(a)m(v)-5 b(ailable)36 b(on)e(the)g(aforemen)m(tioned)h(w)m(ebpage.)53 b(Lik)m(ewise,)-210 2689 y(installation)41 b(instructions)e(for)g(Op)s (en)f(MPI)h(and)g(LAM/MPI)h(are)g(con)m(tained)g(on)f(their)h(resp)s (ectiv)m(e)-210 2802 y(w)m(ebpage.)-210 3042 y Fm(2.3)112 b(Edit)37 b(the)h(top-lev)m(el)g Fl(SCALAPACK/Makefile)k Fm(and)c(t)m(yp)s(e)g Fl(make)-69 3214 y Fw(A)e(top-lev)m(el)i Fr(SCALAPACK/Makefile)31 b Fw(has)k(b)s(een)g(included)g(to)h(build)f (all)i(libraries,)g(testing)g(exe-)-210 3327 y(cutables)29 b(and)f(examples.)41 b(This)28 b(mak)m(e\014le)i(is)f(v)m(ery)g(useful) f(if)g(y)m(ou)h(are)g(familiar)h(with)e(the)h(installation)-210 3440 y(pro)s(cess)36 b(and)h(wish)f(to)i(do)f(a)g(quic)m(k)g (installation.)63 b(Y)-8 b(our)37 b(instructions)g(to)g(build)f(the)h (ScaLAP)-8 b(A)m(CK)-210 3553 y(library)30 b(are:)17 3748 y Fr(cd)47 b(SCALAPACK)17 3929 y(make)-69 4124 y Fw(If)29 b(y)m(ou)h(wish)f(to)h(build)f(the)h(testing)g(executables)h (\(assuming)f(that)g(all)h(libraries)e(ha)m(v)m(e)i(previously)-210 4237 y(b)s(een)f(built\),)g(y)m(ou)h(can)g(sp)s(ecify)17 4432 y Fr(make)47 b(exe)p Fw(.)-69 4627 y(If)22 b(y)m(ou)h(wish)e(to)i (build)e(the)i(examples)g(\(assuming)f(that)h(all)g(libraries)f(ha)m(v) m(e)i(previously)e(b)s(een)g(built\),)-210 4740 y(y)m(ou)31 b(can)f(sp)s(ecify)1567 4989 y(7)p eop end %%Page: 8 8 TeXDict begin 8 7 bop 17 -269 a Fr(make)47 b(example)p Fw(.)-69 -56 y(If)35 b(y)m(ou)i(wish)e(to)h(build)f(only)h(selected)h (libraries)f(or)g(executables,)j(y)m(ou)d(can)g(mo)s(dify)f(the)h Fr(lib)f Fw(or)-210 57 y Fr(exe)29 b Fw(de\014nition)i(accordingly)-8 b(.)-69 170 y(T)g(o)21 b(sp)s(ecify)f(the)g(data)h(t)m(yp)s(es)g(to)g (b)s(e)e(built,)k(y)m(ou)d(will)h(need)f(to)h(mo)s(dify)e(the)i (de\014nition)f(of)g Fr(PRECISIONS)p Fw(.)-210 283 y(By)31 b(default,)f Fr(PRECISIONS)e Fw(is)i(set)h(to)-210 470 y Fr(PRECISIONS)45 b(=)i(single)g(double)f(complex)f(complex16)-210 658 y Fw(to)c(build)e(all)j(precisions)e(of)h(the)f(libraries)h(and)e (executables.)73 b(If)40 b(y)m(ou)g(only)h(wish)e(to)i(compile)h(the) -210 771 y(single)f(precision)g(real)f(v)m(ersion)h(of)g(a)g(target)g (sp)s(ecify)f Fr(single)p Fw(,)i(for)e(double)g(precision)g(real)h(sp)s (ecify)-210 884 y Fr(double)p Fw(,)i(for)e(single)h(precision)f (complex)h(sp)s(ecify)f Fr(complex)p Fw(,)h(and)f(for)g(double)g (precision)g(complex)-210 997 y(sp)s(ecify)30 b Fr(complex16)p Fw(.)-69 1109 y(By)e(default,)g(the)g(presence)f(of)h(no)f(argumen)m (ts)h(follo)m(wing)h(the)e Fr(make)f Fw(command)i(will)f(result)h(in)f (the)-210 1222 y(building)j(of)g(all)h(data)g(t)m(yp)s(es.)41 b(The)30 b(mak)m(e)i(command)e(can)g(b)s(e)g(run)f(more)i(than)f(once)h (to)g(add)f(another)-210 1335 y(data)h(t)m(yp)s(e)g(to)g(the)f(library) g(if)h(necessary)-8 b(.)-69 1448 y(Y)g(ou)31 b(ma)m(y)g(then)g(pro)s (ceed)f(to)h(running)e(eac)m(h)j(of)f(the)g(individual)f(test)h (suites.)42 b(See)31 b(section)g(2.4)h(for)-210 1561 y(details)38 b(on)f(the)h(PBLAS)f(T)-8 b(est)38 b(Suite,)h(section)f (2.6)h(to)f(run)e(the)h(REDIST)f(test)j(suite,)g(and)e(section)-210 1674 y(2.7)g(for)e(details)i(on)f(the)g(ScaLAP)-8 b(A)m(CK)36 b(T)-8 b(est)36 b(Suite.)57 b(After)36 b(all)h(testing)g(has)e(b)s(een) g(completed,)k(y)m(ou)-210 1787 y(can)j(remo)m(v)m(e)h(all)g(ob)5 b(ject)43 b(\014les)f(from)f(the)h(v)-5 b(arious)42 b(sub)s (directories)f(and)g(all)i(executables)g(from)f(the)-210 1900 y Fr(SCALAPACK/TESTING)26 b Fw(directory)31 b(b)m(y)f(t)m(yping)17 2112 y Fr(make)47 b(clean)p Fw(.)-69 2325 y(Or,)22 b(y)m(ou)f(can)h (selectiv)m(ely)h(remo)m(v)m(e)g(only)d(the)h(ob)5 b(ject)22 b(\014les)f(with)f Fr(make)47 b(cleanlib)p Fw(,)21 b Fr(make)46 b(cleanexe)-210 2438 y Fw(to)31 b(remo)m(v)m(e)h(only)e(the) h(testing)g(routine)f(ob)5 b(ject)31 b(\014les)f(and)g(executable)i (\014les,)e(or)h Fr(make)46 b(cleanexample)-210 2551 y Fw(to)31 b(remo)m(v)m(e)h(only)e(the)h(ob)5 b(ject)31 b(\014les)g(created)g(for)f(the)h(examples.)-210 2794 y Fm(2.4)112 b(Run)38 b(the)f(PBLAS)g(T)-9 b(est)37 b(Suite)-69 2966 y Fw(The)29 b(PBLAS)f(testing)i(executables)h(are)e(created)h(in)f (the)g Fr(PBLASTSTdir)d Fw(directory)j(as)h(de\014ned)d(in)-210 3079 y Fr(SLmake.inc)p Fw(.)37 b(By)29 b(default,)g(these)g(testing)h (executables)g(are)f(copied)g(in)m(to)g(the)g Fr(SCALAPACK/TESTING)-210 3192 y Fw(directory)-8 b(.)40 b(F)-8 b(or)26 b(the)g(Lev)m(el)h(1)f (PBLAS)g(routines,)g(the)g(testing)h(executables)g(are)f(called)h Fr(xspblas1tst)p Fw(,)-210 3305 y Fr(xdpblas1tst)p Fw(,)36 b Fr(xcpblas1tst)p Fw(,)g(and)g Fr(xzpblas1tst)p Fw(.)57 b(Lik)m(ewise,)40 b(the)d(testing)i(executables)f(for)f(the)-210 3417 y(Lev)m(el)43 b(2)g(PBLAS)e(are)i Fr(xspblas2tst)p Fw(,)f Fr(xdpblas2tst)p Fw(,)g Fr(xcpblas2tst)p Fw(,)g(and)f Fr(xzpblas2tst)p Fw(.)72 b(The)-210 3530 y(testing)33 b(executables)g(for)e(the)h(Lev)m(el)h(3)f(PBLAS)f(are)h Fr(xspblas3tst)p Fw(,)e Fr(xdpblas3tst)p Fw(,)f Fr(xcpblas3tst)p Fw(,)-210 3643 y(and)40 b Fr(xzpblas3tst)p Fw(.)69 b(There)40 b(is)g(one)h(input)f(\014le)h(asso)s(ciated)h(with)e(eac)m(h)i(testing) g(executable.)73 b(F)-8 b(or)-210 3756 y(example,)50 b(the)45 b(input)f(\014le)i(for)f Fr(xspblas1tst)d Fw(is)j(called)h Fr(PSBLA1TST.dat)p Fw(.)82 b(The)44 b(input)g(\014les)i(are)-210 3869 y(copied)31 b(to)g(the)g Fr(PBLASTSTdir)c Fw(directory)k(at)g(the) f(time)h(the)g(executables)h(are)f(built.)-69 3982 y(F)-8 b(or)29 b(brevit)m(y)-8 b(,)29 b(w)m(e)g(shall)f(only)g(list)g (instructions)g(for)g(testing)h(PBLAS)e(executables)j(using)d(MPICH) -210 4095 y(on)32 b(a)g(net)m(w)m(ork)g(of)g(w)m(orkstations,)i(and)d (PVM)h(on)f(a)h(net)m(w)m(ork)h(of)f(w)m(orkstations.)46 b(Execution)32 b(instruc-)-210 4208 y(tions)f(for)f(the)g(v)-5 b(arious)31 b(distributed-memory)e(computers)i(are)f(mac)m(hine-dep)s (enden)m(t.)-210 4448 y Fq(T)-9 b(esting)35 b(instructions)h(with)e (MPICH)g(on)i(a)e(net)m(w)m(ork)h(of)g(w)m(orkstations)-69 4620 y Fw(F)-8 b(or)36 b(the)g(sak)m(e)h(of)e(an)h(example,)i(w)m(e)d (shall)h(assume)g(that)g(y)m(ou)f(ha)m(v)m(e)i(installed)f(the)g(p)s (ortable)g(im-)-210 4732 y(plemen)m(tation)30 b(of)e(MPI,)h(called)g Fr(MPICH)p Fw(,)e(and)h(built)g(the)h(PBLAS)f(tester)h(executables)h (for)e(eac)m(h)h(of)g(the)1567 4989 y(8)p eop end %%Page: 9 9 TeXDict begin 9 8 bop -210 -269 a Fw(mac)m(hines)33 b(used)f(in)g(y)m (our)g(application.)49 b(The)32 b(executable)i(\014les)e(are)h(not)g (required)f(to)h(b)s(e)f(stored)g(in)h(a)-210 -156 y(particular)g (directory)-8 b(.)49 b(Then,)32 b(to)h(run)f(the)g(executable,)k(y)m (ou)d(will)g(use)f(the)h(command)f Fr(mpirun)p Fw(.)46 b(F)-8 b(or)-210 -43 y(example,)17 159 y Fr(mpirun)46 b(-np)h Fk(<)p Fr(number)f(of)h(processes)p Fk(>)e(<)p Fr(executable)p Fk(>)-69 361 y Fw(where)31 b Fk(<)p Fr(executable)p Fk(>)e Fw(is)i(replaced)i(b)m(y)e Fr(xspblas1tst)p Fw(,)e(and)i(so)h (on.)45 b(If)31 b(the)h(net)m(w)m(ork)h(of)f(w)m(ork-)-210 474 y(stations)i(is)e(heterogeneous,)j(y)m(ou)d(will)h(need)f(to)i(sp)s (ecify)e(the)h Fr(-p4pg)e Fw(option)i(and)e(supply)g(a)i(text)h(\014le) -210 587 y(con)m(taining)27 b(the)f(names)g(of)f(the)h(mac)m(hines)g (and)f(the)h(lo)s(cations)h(of)f(the)g(executables)h(to)f(whic)m(h)g(y) m(ou)g(will)-210 700 y(spa)m(wn)k(tasks.)41 b(Refer)30 b(to)h(the)g Fr(mpirun)e Fw(manpage)i(for)f(complete)h(details.)-210 938 y Fq(T)-9 b(esting)35 b(instructions)h(with)e(PVM)i(on)f(a)f(net)m (w)m(ork)h(of)g(w)m(orkstations)-69 1110 y Fw(First,)j(insure)d(that)i (the)f(PVM)g(library)g(and)f(tester)i(executable)g(\014les)f(ha)m(v)m (e)i(b)s(een)d(compiled)h(for)-210 1223 y(eac)m(h)27 b(of)g(the)f(mac)m(hines)h(used)e(in)h(y)m(our)g(PVM)h(implemen)m (tation.)41 b(PVM)26 b(3.3)i(requires)e(that)g(executable)-210 1336 y(\014les)37 b(b)s(e)g(stored)h(in)f(a)h(particular)g(directory)g (so)g(that)g(the)g(PVM)f(daemon)h(can)g(\014nd)e(them.)62 b(In)37 b(the)-210 1448 y(general)32 b(case,)g(PVM)f(lo)s(oks)h(for)e (executable)j(\014les)e(in)f Fr(~/pvm3/bin/)p Fj(arch)p Fw(,)e(where)i Fn(ar)-5 b(ch)32 b Fw(sp)s(eci\014es)f(the)-210 1561 y(arc)m(hitecture)f(for)e(whic)m(h)f(the)i(executable)g(has)f(b)s (een)g(built.)39 b(F)-8 b(or)29 b(example,)h(if)e(one)g(wished)f(to)i (run)e(the)-210 1674 y(test)32 b(program)f(on)g(a)g(SUN)g(SP)-8 b(AR)m(Cstation)32 b(and)f(on)g(an)g(IBM)h(RS6000)g(w)m(orkstation,)h (appropriately)-210 1787 y(compiled)25 b(executable)h(\014les)f(need)g (to)g(b)s(e)f(placed)h(in)g Fr(~/pvm3/bin/SUN4)20 b Fw(and)k Fr(~/pvm3/bin/RS6K)d Fw(\(for)-210 1900 y(more)28 b(directory)h (information,)g(consult)g(the)f(PVM)h(do)s(cumen)m(tation\).)41 b(If)28 b(y)m(ou)g(wish)g(to)h(run)e(the)h(tests)-210 2013 y(on)36 b(mac)m(hines)g(that)g(are)g(not)g(connected)h(to)f(the)g (same)g(\014le)g(system,)i(y)m(ou)e(need)f(to)i(mak)m(e)f(sure)f(that) -210 2126 y(the)c(executable)h(is)e(a)m(v)-5 b(ailable)33 b(on)d(eac)m(h)h(\014le)g(system.)41 b(Next,)31 b(start)g(p)m(vm)f(b)m (y)h(t)m(yping)17 2328 y Fr(pvm)-69 2530 y Fw(A)m(t)f(this)f(p)s(oin)m (t,)h(y)m(ou)g(sp)s(ecify)f(the)g(mac)m(hines)h(that)g(are)f(to)h(tak)m (e)h(part)e(in)g(the)h(testing)g(pro)s(cess)f(\(see)-210 2643 y(the)35 b(PVM)g(do)s(cumen)m(tation)g(for)g(more)f (information\).)54 b(Finally)-8 b(,)38 b(to)d(test)h(the)e(REAL)h(PVM)g (Lev)m(el)h(1)-210 2755 y(PBLAS,)30 b(start)h(the)g(test)g(program)f(b) m(y)g(t)m(yping:)17 2957 y Fr(xspblas1tst)-69 3159 y Fw(on)c(one)h(of)f(the)g(mac)m(hines)h(that)g(is)f(a)g(mem)m(b)s(er)g (of)g(y)m(our)g(PVM)h(mac)m(hine.)40 b(This)25 b(program)h(will)g(then) -210 3272 y(instruct)h(the)g(PVM)h(daemon)f(to)h(start)g(pro)s(cesses)f (on)g(the)h(other)f(computers)g(in)g(y)m(our)h(PVM)f(mac)m(hine)-210 3385 y(and)j(y)m(ou)i(will)f(b)s(e)f(prompted)g(b)m(y)h(the)g(program)g (for)f(the)h(name)g(of)h(the)f(executable.)44 b(Mak)m(e)32 b(sure)e(that)-210 3498 y Fr(PSBLA1TST.dat)24 b Fw(is)j(lo)s(cated)i (in)e(the)h(same)g(directory)g(as)g Fr(xspblas1tst)p Fw(.)37 b(It)27 b(is)h(read)f(on)h(the)f(mac)m(hine)-210 3611 y(from)33 b(whic)m(h)h(y)m(ou)g(t)m(yp)s(e)g Fr(xspblas1tst)d Fw(and)i(its)h(con)m(ten)m(ts)h(distributed)e(to)i(the)f(other)g (computers)f(in)-210 3724 y(y)m(our)d(PVM)h(mac)m(hine.)-69 3837 y Fn(A)n(lternatively,)j(you)g(c)-5 b(an)34 b(use)f Fr(blacs)p 1246 3837 29 4 v 33 w(setup.dat)e Fn(to)j(p)-5 b(erform)36 b(much)d(of)h(this)g(pr)-5 b(o)g(c)g(ess.)47 b(This)34 b(\014le)-210 3950 y(sp)-5 b(e)g(ci\014es)31 b(the)f(name)g(of)g(the)g(exe)-5 b(cutable)30 b(and)h(the)f(machines)h (to)f(sp)-5 b(awn)32 b(in)d(your)i(pvm)f(cluster,)g(as)h(wel)5 b(l)-210 4063 y(as)33 b(a)g(few)f(other)h(fe)-5 b(atur)g(es.)43 b(Se)-5 b(e)32 b(the)h(\\A)f(User's)g(Guide)g(to)h(the)g(BLA)n(CS")e (for)i(details.)43 b(However,)33 b(the)-210 4175 y(use)f(of)h(this)g (\014le)g(is)g(not)g(r)-5 b(e)g(c)g(ommende)g(d)36 b(for)d(the)g(naive) g(user.)-69 4288 y Fw(Similar)j(commands)f(should)g(b)s(e)g(used)g(for) g(the)h(other)g(test)h(programs,)g(with)e(the)h(second)g(letter)-210 4401 y(`s')30 b(in)g(the)g(executable)i(and)d(data)i(\014le)f(replaced) g(b)m(y)g(`d',)g(`c',)h(or)f(`z'.)42 b(The)29 b(name)h(of)g(the)g (output)g(\014le)g(is)-210 4514 y(indicated)25 b(on)f(the)g(\014rst)g (line)g(of)h(the)f(input)g(\014le)g(and)g(is)g(curren)m(tly)g (de\014ned)f(to)i(b)s(e)f Fr(PSBLA1TST.SUMM)c Fw(for)-210 4627 y(the)31 b(REAL)g(v)m(ersion,)g(with)g(similar)g(names)g(for)f (the)h(other)h(data)f(t)m(yp)s(es.)42 b(The)31 b(user)f(ma)m(y)h(also)h (c)m(ho)s(ose)-210 4740 y(to)f(send)f(all)h(output)f(to)h(standard)f (error.)1567 4989 y(9)p eop end %%Page: 10 10 TeXDict begin 10 9 bop -210 -269 a Fm(2.5)112 b(Run)38 b(the)f(PBLAS)g(Timing)h(Suite)g(\(optional\))-109 -97 y Fw(a\))46 b(Go)31 b(to)g(the)g(directory)g Fr(SCALAPACK/PBLAS/TIMING) p Fw(.)-114 91 y(b\))45 b(T)m(yp)s(e)d Fr(make)f Fw(follo)m(w)m(ed)k(b) m(y)d(the)g(data)i(t)m(yp)s(es)e(desired.)76 b(F)-8 b(or)44 b(the)e(Lev)m(el)i(1)f(PBLAS)f(routines,)17 203 y(the)48 b(timing)g(executables)g(are)g(called)g Fr(xspblas1tim)p Fw(,)h Fr(xdpblas1tim)p Fw(,)g Fr(xcpblas1tim)p Fw(,)g(and)17 316 y Fr(xzpblas1tim)p Fw(,)21 b(and)g(are)h(created)h(in)f(the)g Fr(PBLASTSTdir)c Fw(directory)k(as)g(de\014ned)f(in)g Fr(SLmake.inc)p Fw(.)17 429 y(Lik)m(ewise,)j(the)c(timing)h (executables)h(for)e(the)g(Lev)m(el)i(2)f(PBLAS)e(are)i Fr(xspblas2tim)p Fw(,)f Fr(xdpblas2tim)p Fw(,)17 542 y Fr(xcpblas2tim)p Fw(,)28 b(and)i Fr(xzpblas2tim)p Fw(.)39 b(The)30 b(timing)h(executables)h(for)e(the)h(Lev)m(el)h(3)f(PBLAS)g (are)17 655 y Fr(xspblas3tim)p Fw(,)26 b Fr(xdpblas3tim)p Fw(,)g Fr(xcpblas3tim)p Fw(,)g(and)i Fr(xzpblas3tim)p Fw(.)37 b(There)27 b(is)i(one)f(input)g(\014le)17 768 y(asso)s(ciated)k(with)f(eac)m(h)h(timing)f(executable.)43 b(F)-8 b(or)31 b(example,)h(the)f(input)f(\014le)h(for)f Fr(xspblas1tim)17 881 y Fw(is)h(called)g Fr(PSBLA1TIM.dat)p Fw(.)38 b(The)30 b(input)f(\014les)h(are)h(copied)g(to)g(the)g Fr(PBLASTSTdir)c Fw(directory)k(at)17 994 y(the)g(time)g(the)f (executables)i(are)f(built.)-104 1181 y(c\))46 b(Run)30 b(the)i(timing)f(executables)i(on)e(the)g(desired)g(platform)g(as)h (analogously)g(describ)s(ed)f(in)f(Sec-)17 1294 y(tion)h(2.4.)-210 1538 y Fm(2.6)112 b(Run)38 b(the)f(REDIST)g(T)-9 b(est)36 b(Suite)-69 1709 y Fw(The)46 b(redistribution/cop)m(y)h(routines)f (allo)m(w)h(the)g(redistribution)e(of)i(a)f(2-D)i(blo)s(c)m(k)e(cyclic) i(dis-)-210 1822 y(tributed)24 b(general)i(or)e(trap)s(ezoidal)i (matrix)f(from)f(an)h(arbitrary)f Fk(P)e Fo(\002)9 b Fk(Q)23 b Fw(grid)i(with)f(arbitrary)g(blo)s(c)m(ksize)-210 1935 y(to)31 b(another)g(grid)f(with)g(arbitrary)g(blo)s(c)m(ksize.) -109 2148 y(a\))46 b(Go)31 b(to)g(the)g(directory)g Fr (SCALAPACK/REDIST/TESTING)o Fw(.)-114 2335 y(b\))45 b(T)m(yp)s(e)40 b Fr(make)g Fw(follo)m(w)m(ed)i(b)m(y)f(the)g(data)g(t)m(yp)s(es)g (desired.)71 b(The)40 b(testing)i(executables)g(are)f(called)17 2448 y Fr(xigemr)p Fw(,)34 b Fr(xsgemr)p Fw(,)g Fr(xdgemr)p Fw(,)g Fr(xcgemr)p Fw(,)g Fr(xzgemr)f Fw(for)h(the)h(redistribution)f (of)g(general)i(matrices.)17 2561 y(They)31 b(are)g(called)h Fr(xitrmr)p Fw(,)e Fr(xstrmr)p Fw(,)g Fr(xdtrmr)p Fw(,)g Fr(xctrmr)p Fw(,)g(and)g Fr(xztrmr)f Fw(for)i(trap)s(ezoidal)h(matri-) 17 2674 y(ces,)j(and)d(are)h(created)h(in)f(the)g Fr(REDISTdir/TESTING) 28 b Fw(directory)34 b(as)f(de\014ned)f(in)g Fr(SLmake.inc)p Fw(.)17 2787 y(There)23 b(is)f(one)i(input)e(\014le)g Fr(GEMR2D.dat)e Fw(for)j(general)h(matrices,)i(and)c(one)h(input)f (\014le)h Fr(TRMR2D.dat)17 2900 y Fw(for)30 b(trap)s(ezoidal)i (matrices.)42 b(Eac)m(h)31 b(line)f(of)h(the)f(input)g(\014le)g(is)h(a) g(separate)g(test.)-210 3143 y Fm(2.7)112 b(Run)38 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(T)-9 b(est)37 b(Suite)72 3315 y Fw(There)23 b(are)i(eigh)m(teen)g(distinct)f(test)h(programs)e(for)g(testing)i(the) f(ScaLAP)-8 b(A)m(CK)24 b(routines)g(of)g(the)-210 3428 y(follo)m(wing)32 b(t)m(yp)s(e:)42 b(LU,)31 b(Cholesky)-8 b(,)32 b(Band)e(LU,)h(Band)g(Cholesky)-8 b(,)31 b(General)h(T)-8 b(ridiagonal,)32 b(Band)f(T)-8 b(ridi-)-210 3541 y(agonal,)28 b(QR)d(\(R)m(Q,)g(LQ,)g(QL,)g(QP)-8 b(,)25 b(and)g(TZ\),)g(Linear)g (Least)h(Squares,)f(upp)s(er)f(Hessen)m(b)s(erg)h(reduction,)-210 3654 y(tridiagonal)30 b(reduction,)g(bidiagonal)f(reduction,)h(matrix)f (in)m(v)m(ersion,)h(the)f(symmetric)g(eigenproblem,)-210 3767 y(the)f(generalized)h(symmetric)f(eigenproblem,)h(the)e (nonsymmetric)h(eigenproblem,)g(and)f(the)h(singular)-210 3880 y(v)-5 b(alue)31 b(decomp)s(osition.)-69 3993 y(Eac)m(h)47 b(of)f(the)g(test)h(programs)e(is)h(automatically)j(timed)d(and)f(rep)s (orts)h(a)g(table)h(of)f(execution)-210 4105 y(times)40 b(and)f(mega\015op)i(rates.)69 b(There)39 b(is)h(one)g(input)f(\014le)h (for)f(eac)m(h)i(test)g(program.)68 b(As)40 b(previously)-210 4218 y(stated,)e(the)e(input)f(\014les)h(reside)f(in)h(the)g Fr(SCALAPACK/TESTING)31 b Fw(sub)s(directory)j(and)i(are)g(copied)g(in) m(to)-210 4331 y(the)31 b Fr(TESTINGdir)c Fw(directory)k(\(as)g(sp)s (eci\014ed)e(in)h(the)h Fr(SLmake.inc)d Fw(\014le\))i(at)h(the)g(time)g (the)f(executables)-210 4444 y(are)i(built.)45 b(All)32 b(testing)h(programs)e(o)s(ccur)h(in)f(four)h(precisions,)g(with)f(the) h(exception)h(of)f(the)g(singular)-210 4557 y(v)-5 b(alue)28 b(decomp)s(osition)g(whic)m(h)e(only)i(o)s(ccurs)f(in)g(SINGLE)f(and)h (DOUBLE)g(PRECISION)f(REAL.)h(F)-8 b(or)-210 4670 y(more)31 b(information)f(on)g(the)h(test)g(programs)f(and)g(ho)m(w)g(to)i(mo)s (dify)d(the)i(input)e(\014les)h(see)h(Section)h(3.)1545 4989 y(10)p eop end %%Page: 11 11 TeXDict begin 11 10 bop -69 -269 a Fw(Run)36 b(the)i(testing)h (executables)g(on)e(the)h(desired)f(platform)g(as)h(analogously)h (describ)s(ed)d(in)h(Sec-)-210 -156 y(tion)30 b(2.4.)42 b(F)-8 b(or)30 b(example,)g(in)g(double)f(precision,)h(the)g(testing)g (executables)h(are)f(named)f Fr(xdlu)p Fw(,)g Fr(xdllt)p Fw(,)-210 -43 y Fr(xddblu)p Fw(,)44 b Fr(xdgblu)p Fw(,)h Fr(xddtlu)p Fw(,)f Fr(xdpbllt)p Fw(,)h Fr(xdptllt)p Fw(,)f Fr(xdls)p Fw(,)h Fr(xdqr)p Fw(,)g Fr(xdhrd)p Fw(,)g Fr(xdtrd)p Fw(,)f Fr(xdbrd)p Fw(,)h Fr(xdinv)p Fw(,)-210 70 y Fr(xdsep)p Fw(,)21 b Fr(xdgsep)p Fw(,)g Fr(xdnep)p Fw(,)g(and)f Fr(xdsvd)p Fw(.)36 b(The)19 b(input)h(\014les)g(are)h Fr(LU.dat)p Fw(,)f Fr(LLT.dat)p Fw(,)h Fr(BLU.dat)p Fw(,)g Fr(BLLT.dat)p Fw(,)-210 183 y Fr(LS.dat)p Fw(,)29 b Fr(QR.dat)p Fw(,)g Fr(HRD.dat)p Fw(,)f Fr(TRD.dat)p Fw(,)h Fr(BRD.dat)p Fw(,)g Fr(INV.dat)p Fw(,)f Fr(SEP.dat)p Fw(,)h Fr(NEP.dat)p Fw(,)g(and)g Fr(SVD.dat)p Fw(.)-69 296 y(Similar)24 b(commands)g(can)g (b)s(e)f(used)h(for)f(alternate)j(precisions)e(of)g(the)h(same)f(test)h (program)f(or)g(other)-210 409 y(test)37 b(programs.)57 b(The)36 b(name)g(of)g(the)g(output)g(\014le)g(is)g(indicated)h(on)f (the)g(\014rst)f(line)h(of)h(the)f(input)f(\014le)-210 522 y(and)f(is)g(curren)m(tly)g(de\014ned)f(to)i(b)s(e)e Fr(lu.out)g Fw(for)g(the)i(LU)f(tester,)i(with)e(similar)g(names)g(for) g(the)h(other)-210 635 y(data)c(t)m(yp)s(es.)41 b(The)30 b(user)f(ma)m(y)i(also)h(c)m(ho)s(ose)f(to)g(send)f(all)h(output)f(to)h (standard)f(error.)-210 876 y Fm(2.8)112 b(Run)38 b(the)f(examples)72 1048 y Fw(In)d(the)h(EXAMPLE)f(directory)-8 b(,)37 b(y)m(ou)e(ha)m(v)m (e)h(a)f(program)f(declined)h(in)f(the)h(4)g(precisions)f(that)-210 1161 y(solv)m(es)29 b(a)g(linear)f(system)g(b)m(y)g(calling)h(the)g (ScaLAP)-8 b(A)m(CK)28 b(routine)g(PDGESV.)h(The)e(input)g(matrix)i (and)-210 1273 y(righ)m(t-and-sides)i(are)g(read)f(from)g(a)h(\014le.) 40 b(The)30 b(solution)h(is)g(written)f(to)h(a)g(\014le.)-69 1386 y(T)-8 b(o)30 b(compile)g(and)e(create)j(the)e(example)g (executables)i(\(assuming)e(that)g(all)h(librairies)g(ha)m(v)m(e)g (previ-)-210 1499 y(ously)g(b)s(een)g(built\),)h(t)m(yp)s(e)f Fr(make)47 b(example)28 b Fw(or)j Fr(make)e Fw(if)h(y)m(ou)h(are)g(in)f (the)g(EXAMPLE)h(directory)-8 b(.)-69 1612 y(This)30 b(will)g(create)i(the)f(four)f(executables)h(in)f(the)h(TESTING)e (directory:)-74 1789 y Fo(\017)46 b Fw(xsscaex:)c(for)30 b(the)h(example)g(using)f(single)h(precision)-74 1973 y Fo(\017)46 b Fw(xdscaex:)c(for)30 b(the)g(example)h(using)f(double)g (precision)-74 2156 y Fo(\017)46 b Fw(xcscaex:)c(for)31 b(the)f(example)h(using)f(complex)h(precision)-74 2340 y Fo(\017)46 b Fw(xzscaex:)c(for)31 b(the)f(example)h(using)f(double)g (complex)h(precision.)-210 2517 y(and)36 b(cop)m(y)i(the)f(input)f (\014les)h(in)f(the)h(TESTING)f(directory)-8 b(.)61 b(The)36 b(input)g(\014les)h(are)g Fr(CSCAEXMAT.dat)p Fw(,)-210 2629 y Fr(CSCAEXRHS.dat)p Fw(,)19 b Fr(DSCAEXMAT.dat)p Fw(,)g Fr(DSCAEXRHS.dat)p Fw(,)g Fr(SCAEX.dat)p Fw(,)h Fr(SSCAEXMAT.dat)p Fw(,)f Fr(SSCAEXRHS.dat)p Fw(,)-210 2742 y Fr(ZSCAEXMAT.dat)27 b Fw(and)j Fr(ZSCAEXRHS.dat)p Fw(.)-69 2855 y(T)-8 b(o)31 b(run)e(the)h(example)i(programs)e(using)f (MPI,)i(t)m(yp)s(e)g(\(for)f(single)h(precision)g(example\))17 3032 y Fr(mpirun)46 b(-np)h Fk(<)p Fr(number)f(of)h(processes)p Fk(>)e Fr(xsscaex)-69 3209 y Fw(The)27 b(results)g(will)h(b)s(e)f (written)g(in)g Fr(CSCAEXSOL.dat)d Fw(for)j(xcscaex)i(,)f Fr(DSCAEXSOL.dat)c Fw(for)j(xdscaex,)-210 3322 y Fr(SSCAEXSOL.dat)g Fw(for)j(xsscaex)h(and)p Fr(ZSCAEXSOL.dat)26 b Fw(for)k(xzscaex.)-210 3564 y Fm(2.9)112 b(T)-9 b(roublesho)s(oting)-69 3735 y Fw(If)26 b(failures)h(are)g(encoun)m(tered)h(during)d(an)m(y)i(phase) g(of)g(the)g(installation)h(or)f(testing)h(pro)s(cess,)f(please)-210 3848 y(\014rst)j(refer)g(to)h(the)f(F)-10 b(A)m(Q)31 b(and)f(Errata)h(\014les)f(for)g(information)17 4025 y Fr(http://www.netlib.org/sca)o(lapa)o(ck/f)o(aq.)o(html)17 4209 y(http://www.netlib.org/sca)o(lapa)o(ck/e)o(rra)o(ta.h)o(tml)-210 4386 y Fw(and)g(if)g(that)h(do)s(es)f(not)h(resolv)m(e)g(the)g (problem,)f(please)h(con)m(tact)i(the)e(dev)m(elop)s(ers)f(at)17 4563 y Fr(scalapack@cs.utk.edu)-69 4740 y Fw(This)g(release)h(of)g (ScaLAP)-8 b(A)m(CK)31 b(is)f(compatible)i(with)e(the)h(previous)e (release)j(\(v)m(ersion)f(1.7\).)1545 4989 y(11)p eop end %%Page: 12 12 TeXDict begin 12 11 bop -210 -269 a Fp(3)135 b(More)45 b(Ab)t(out)f(the)h(ScaLAP)-11 b(A)l(CK)44 b(T)-11 b(est)45 b(Suite)72 -66 y Fw(The)20 b(main)g(test)h(programs)f(for)g(the)h (ScaLAP)-8 b(A)m(CK)21 b(routines)f(are)g(lo)s(cated)i(in)e(the)g Fr(SCALAPACK/TESTING/LIN)-210 47 y Fw(and)h Fr(SCALAPACK/TESTING/EIG)15 b Fw(sub)s(directories)21 b(and)g(are)h(called)g Fr(pd)p 2213 47 29 4 v 2247 47 V 68 w(driver.f)e Fw(\()p Fr(ps)p 2816 47 V 2849 47 V 68 w(driver.f)f Fw(for)-210 160 y(REAL,)28 b Fr(pc)p 199 160 V 233 160 V 68 w(driver.f)e Fw(for)i(COMPLEX,)g(and)g Fr(pz)p 1607 160 V 1640 160 V 68 w(driver.f)e Fw(for)i(COMPLEX*16\),)i (where)d(the)p 3330 160 28 4 v 3363 160 V -210 273 a(is)41 b(replaced)g(b)m(y)g Fr(lu)p Fw(,)i Fr(qr)p Fw(,)g Fr(llt)p Fw(,)g(and)d(so)h(on.)72 b(Eac)m(h)42 b(of)f(the)g(test)g(programs)g (for)f(the)h(ScaLAP)-8 b(A)m(CK)-210 386 y(routines)30 b(has)g(a)h(similar)g(st)m(yle)g(of)g(input.)-69 499 y(The)24 b(follo)m(wing)h(sections)g(describ)s(e)f(the)g(di\013eren)m (t)g(input)g(formats)g(and)f(testing)i(v)m(eri\014cations.)40 b(The)-210 612 y(data)35 b(inside)f(the)g(input)f(\014les)h(is)h(only)f (test)h(data)g(designed)f(to)h(exercise)g(the)g(co)s(de.)52 b(It)34 b(should)f(NOT)-210 725 y(b)s(e)i(in)m(terpreted)h(in)f(an)m(y) i(w)m(a)m(y)f(as)g(OPTIMAL)f(p)s(erformance)g(v)-5 b(alues)36 b(for)g(an)m(y)g(of)g(the)g(routines.)56 b(F)-8 b(or)-210 838 y(b)s(est)32 b(p)s(erformance,)h(the)g(v)-5 b(alue)33 b(of)g(the)g(blo)s(c)m(ksize)h(NB)f(should)f(b)s(e)g(set)h(to)h(the)f (v)-5 b(alue)33 b(determined)f(b)m(y)-210 950 y(A)-8 b(TLAS)30 b(as)h(optimal.)41 b(A)31 b(go)s(o)s(d)f(starting)h(p)s(oin)m (t)g(is)f(a)h(m)m(ultiple)g(of)f(16)i({)e(e.g.,)i(16,)g(32,)f(48,)h (64.)-69 1063 y(The)e(test)h(programs)f(for)g(the)h(routines)f(are)h (driv)m(en)f(b)m(y)g(separate)i(data)f(\014les.)-69 1176 y(The)40 b(n)m(um)m(b)s(er)g(and)g(size)i(of)f(the)g(input)e(v)-5 b(alues)42 b(are)f(limited)g(b)m(y)g(certain)h(program)e(maxim)m(ums) -210 1289 y(whic)m(h)29 b(are)g(de\014ned)f(in)g(P)-8 b(ARAMETER)29 b(statemen)m(ts)i(in)e(the)g(main)g(test)h(programs.)39 b(These)29 b(program)-210 1402 y(maxim)m(ums)h(are:)-19 1581 y(P)m(arameter)118 b(Description)2075 b(V)-8 b(alue)p -69 1618 3380 4 v -19 1697 a(TOTMEM)99 b(T)-8 b(otal)32 b(Memory)f(a)m(v)-5 b(ailable)33 b(for)d(testing)h(data)846 b(2000000)-19 1810 y(INTGSZ)185 b(Length)30 b(in)h(b)m(ytes)f(to)i (store)f(a)f(INTEGER)g(elemen)m(t)879 b(4)-19 1923 y(REALSZ)169 b(Length)30 b(in)h(b)m(ytes)f(to)i(store)f(a)f(REAL)g(elemen)m(t)1054 b(4)-19 2036 y(DBLESZ)171 b(Length)30 b(in)h(b)m(ytes)f(to)i(store)f(a) f(DOUBLE)h(PRECISION)e(elemen)m(t)374 b(8)-19 2149 y(CPLXSZ)170 b(Length)30 b(in)h(b)m(ytes)f(to)i(store)f(a)f(COMPLEX)g(elemen)m(t)839 b(8)-19 2262 y(ZPLXSZ)180 b(Length)30 b(in)h(b)m(ytes)f(to)i(store)f(a) f(COMPLEX*16)h(elemen)m(t)657 b(16)-19 2374 y(NTESTS)166 b(Maxim)m(um)31 b(n)m(um)m(b)s(er)e(of)i(tests)g(to)g(b)s(e)f(p)s (erformed)938 b(20)-69 2571 y(The)41 b(user)f(should)g(mo)s(dify)g (TOTMEM)h(to)g(indicate)i(the)e(maxim)m(um)g(amoun)m(t)g(of)g(memory)g (in)-210 2684 y(b)m(ytes)34 b(his)g(system)g(has)g(a)m(v)-5 b(ailable.)53 b(Y)-8 b(ou)35 b(m)m(ust)f(remem)m(b)s(er)f(to)i(lea)m(v) m(e)h(ro)s(om)e(in)f(memory)h(for)g(the)g(op-)-210 2797 y(erating)e(system,)g(the)g(BLA)m(CS)f(bu\013er,)g(etc.)44 b(F)-8 b(or)32 b(example,)h(for)e(PVM,)h(the)f(parameters)h(w)m(e)g (use)f(are)-210 2910 y(TOTMEM=2,000,000,)41 b(and)c(the)f(length)i(of)e (a)i(DOUBLE)f(is)f(8.)61 b(Some)36 b(exp)s(erimen)m(ting)h(with)g(the) -210 3023 y(maxim)m(um)g(allo)m(w)m(able)i(v)-5 b(alue)37 b(of)g(TOTMEM)g(ma)m(y)g(b)s(e)f(required.)59 b(All)38 b(arra)m(ys)f(used)f(b)m(y)h(the)g(factor-)-210 3136 y(izations,)31 b(reductions,)f(solv)m(es,)h(and)e(condition)i(and)e (error)g(estimation)i(are)f(allo)s(cated)h(out)f(of)g(the)g(big)-210 3249 y(arra)m(y)h(called)g(MEM.)-69 3361 y(Please)42 b(note)f(that)g(these)f(parameter)h(maxim)m(ums)f(in)g(the)h(test)g (programs)f(assume)g(at)h(least)h(2)-210 3474 y(Megab)m(ytes)35 b(of)e(memory)g(p)s(er)f(pro)s(cess.)48 b(Th)m(us,)33 b(if)g(y)m(ou)g(do)g(not)g(ha)m(v)m(e)h(that)g(m)m(uc)m(h)f(space)g(p)s (er)f(pro)s(cess)-210 3587 y(then)e(y)m(ou)h(will)g(need)f(to)h(reduce) f(the)g(size)i(of)e(the)h(parameters.)-69 3700 y(F)-8 b(or)37 b(eac)m(h)g(of)e(the)h(test)h(programs,)g(the)f(test)h(program) e(generates)i(test)g(matrices)g(\(nonsymmet-)-210 3813 y(ric,)i(symmetric,)g(symmetric)e(p)s(ositiv)m(e-de\014nite,)j(or)d (upp)s(er)e(Hessen)m(b)s(erg\),)k(calls)f(the)f(ScaLAP)-8 b(A)m(CK)-210 3926 y(routines)29 b(in)f(that)i(path,)f(and)f(computes)i (a)f(solv)m(e)h(and/or)f(factorization)i(and/or)e(reduction)g(residual) -210 4039 y(error)24 b(c)m(hec)m(k)i(to)g(v)m(erify)f(that)g(eac)m(h)h (op)s(eration)f(has)f(p)s(erformed)f(correctly)-8 b(.)41 b(The)24 b(factorization)j(residual)-210 4152 y(is)35 b(only)g(calculated)i(if)e(the)h(residual)e(for)h(the)h(solv)m(e)g (step)f(exceeds)h(the)f(threshold)g(v)-5 b(alue)35 b(THRESH.)-210 4265 y(Th)m(us,)30 b(if)g(a)h(user)e(w)m(an)m(ts)i(b)s(oth)f(c)m(hec)m (ks)i(automatically)h(done)d(then)g(he)h(should)e(set)i(THRESH)e(=)h (0.0.)-69 4378 y(When)35 b(the)h(tests)g(are)g(run,)g(eac)m(h)h(test)f (ratio)g(that)h(is)e(greater)i(than)e(or)h(equal)g(to)g(the)g (threshold)-210 4491 y(v)-5 b(alue)31 b(causes)g(a)f(line)h(of)g (information)f(to)h(b)s(e)f(prin)m(ted)g(to)h(the)g(output)f(\014le.) -69 4603 y(A)e(table)g(of)g(timing)g(information)g(is)g(prin)m(ted)f (in)h(the)f(output)h(\014le)f(con)m(taining)j(execution)e(times)h(as) -210 4716 y(w)m(ell)i(as)g(mega\015op)g(rates.)1545 4989 y(12)p eop end %%Page: 13 13 TeXDict begin 13 12 bop -69 -269 a Fw(After)31 b(all)g(of)g(the)f (tests)h(ha)m(v)m(e)h(b)s(een)d(completed,)j(summary)d(lines)i(are)g (prin)m(ted)f(of)g(the)h(form)-210 -83 y Fr(Finished)93 b(180)47 b(tests,)f(with)h(the)g(following)e(results:)-115 30 y(180)i(tests)g(completed)e(and)i(passed)f(residual)g(checks.)-19 143 y(0)h(tests)g(completed)e(and)i(failed)f(residual)g(checks.)-19 256 y(0)h(tests)g(skipped)f(because)f(of)j(illegal)d(input)i(values.) -210 482 y(END)g(OF)g(TESTS.)-210 725 y Fm(3.1)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(LU)g(routines)-69 897 y Fw(The)30 b(LU)g(test)i(program)e(generates)i(random)e (nonsymmetric)g(test)h(matrices)h(with)e(v)-5 b(alues)31 b(in)f(the)-210 1010 y(in)m(terv)-5 b(al)23 b([-1,1],)i(calls)e(the)f (ScaLAP)-8 b(A)m(CK)22 b(routines)g(to)g(factor)h(and)e(solv)m(e)i(the) f(system,)i(and)d(computes)h(a)-210 1122 y(solv)m(e)32 b(and/or)e(factorization)j(residual)d(error)g(c)m(hec)m(k)i(to)f(v)m (erify)g(that)g(eac)m(h)h(op)s(eration)e(has)g(p)s(erformed)-210 1235 y(correctly)-8 b(.)88 b(Condition)45 b(estimation)i(and)e (iterativ)m(e)j(re\014nemen)m(t)d(routines)g(are)h(included)f(and)g (are)-210 1348 y(optionally)32 b(tested.)-69 1461 y(Sp)s(eci\014cally) -8 b(,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 1672 y Fo(\017)46 b Fw(F)-8 b(actor)33 b(the)d(matrix)h Fk(A)25 b Fw(=)g Fk(LU)40 b Fw(using)30 b(PxGETRF)-74 1859 y Fo(\017)46 b Fw(Solv)m(e)32 b(the)e(system)h Fk(AX)i Fw(=)25 b Fk(B)34 b Fw(using)c(PxGETRS,)g(and)g(compute)g(the)h(ratio)217 2046 y Fi(SRESID)f Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-74 2233 y Fo(\017)46 b Fw(If)30 b Fi(SRESID)g Fk(>)g Fi(THRESH)p Fw(,)f(then)i(compute)f(the)h(ratio)217 2420 y Fi(FRESID)f Fw(=)25 b Fo(jj)p Fk(LU)31 b Fo(\000)20 b Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-69 2630 y(The)k(exp)s(ert)g(driv)m(er)g(\()p Fr(PxGESVX)p Fw(\))e(p)s(erforms)h(condition)i(estimation)g(and)f(iterativ)m(e)j (re\014nemen)m(t)d(and)-210 2743 y(th)m(us)30 b(incorp)s(orates)h(the)f (follo)m(wing)i(additional)f(test:)-74 2954 y Fo(\017)46 b Fw(Compute)30 b(the)h(recipro)s(cal)g(condition)g(n)m(um)m(b)s(er)e (R)m(COND)i(using)f(PxGECON.)-74 3141 y Fo(\017)46 b Fw(Use)24 b(iterativ)m(e)i(re\014nemen)m(t)d(\(PxGERFS\))g(to)h(impro)m (v)m(e)g(the)g(solution,)h(and)e(recompute)g(the)h(ratio)217 3328 y Fi(SRESID)30 b Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-210 3567 y Fq(3.1.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(LU)f(Routines)72 3739 y Fw(An)30 b(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g (the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 3950 y Fr('ScaLAPACK)45 b(LU)i(factorization)d(input)j(file')-210 4063 y('MPI)g(machine.')-210 4175 y('lu.out')761 b(output)47 b(file)f(name)h(\(if)g(any\))-210 4288 y(6)1097 b(device)47 b(out)-210 4401 y(2)1097 b(number)47 b(of)g(problems)e(sizes)-210 4514 y(250)i(553)810 b(values)47 b(of)g(N)-210 4627 y(3)1097 b(number)47 b(of)g(NB's)-210 4740 y(2)g(3)h(5)906 b(values)47 b(of)g(NB)1545 4989 y Fw(13)p eop end %%Page: 14 14 TeXDict begin 14 13 bop -210 -269 a Fr(2)1097 b(number)47 b(of)g(NRHS's)-210 -156 y(1)g(5)1002 b(values)47 b(of)g(NRHS)-210 -43 y(3)1097 b(Number)47 b(of)g(NBRHS's)-210 70 y(1)g(3)h(5)906 b(values)47 b(of)g(NBRHS)-210 183 y(5)1097 b(Number)47 b(of)g(processor)e(grids)h(\(ordered)g(pairs)g(of)h(P)h(&)f(Q\))-210 296 y(1)g(4)h(2)f(1)h(8)715 b(values)47 b(of)g(P)-210 409 y(1)g(2)h(4)f(8)h(1)715 b(values)47 b(of)g(Q)-210 522 y(1.0)1001 b(threshold)-210 635 y(T)1097 b(\(T)48 b(or)f(F\))g(Test)g(Cond.)f(Est.)h(and)g(Iter.)f(Ref.)h(Routines)-210 878 y Fm(3.2)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(Band)h(and)g(T)-9 b(ridiagonal)39 b(LU)f(routines)-69 1049 y Fw(The)33 b(LU)h(test)g(program)g(generates)h(random)e (nonsymmetric)g(band)g(test)i(matrices)f(with)g(v)-5 b(alues)-210 1162 y(in)37 b(the)g(in)m(terv)-5 b(al)39 b([-1,1],)i(calls)d(the)g(ScaLAP)-8 b(A)m(CK)37 b(routines)g(to)h (factor)h(and)d(solv)m(e)j(the)e(system,)j(and)-210 1275 y(computes)27 b(a)g(solv)m(e)i(and/or)d(factorization)k(residual)d (error)f(c)m(hec)m(k)i(to)g(v)m(erify)f(that)h(eac)m(h)g(op)s(eration)f (has)-210 1388 y(p)s(erformed)i(correctly)-8 b(.)-69 1501 y(Sp)s(eci\014cally)g(,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(test:)-74 1712 y Fo(\017)46 b Fw(Compute)32 b(the)g(Band)g(or)g(T)-8 b(ridiagonal)33 b(LU)f(factorization)i(using)e(PxDBTRF)g(\(PxGBTRF)h (or)17 1825 y(PxDTTRF\))-74 2011 y Fo(\017)46 b Fw(Solv)m(e)28 b(the)g(system)f Fk(AX)33 b Fw(=)25 b Fk(B)31 b Fw(using)c(PxDBTRS)f (\(PxGBTRS)h(or)h(PxDTTRS\),)e(and)h(compute)17 2124 y(the)k(ratio)217 2311 y Fi(SRESID)f Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-210 2551 y Fq(3.2.1)105 b(Input)36 b(File)g(for)h(T)-9 b(esting)36 b(the)g(ScaLAP)-9 b(A)m(CK)37 b(Band)f(and)g(T)-9 b(ridiagonal)37 b(LU)f(Rou-)109 2664 y(tines)72 2836 y Fw(An)30 b(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g (the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 3046 y Fr('ScaLAPACK,)45 b(Version)h(1.5,)g(banded)g(linear)g(systems)g (input)h(file')-210 3159 y('PVM.')-210 3272 y('')1431 b(output)46 b(file)h(name)g(\(if)g(any\))-210 3385 y(6)1479 b(device)46 b(out)-210 3498 y('T')1383 b(define)46 b(transpose)g(or)h (not)-162 3611 y(7)g(3)h(4)f(8)1527 b(number)46 b(of)h(problem)f(sizes) -162 3724 y(2)h(5)h(17)f(28)g(37)g(121)g(200)g(1023)g(2048)f(3073)190 b(values)46 b(of)h(N)-162 3837 y(6)1479 b(number)46 b(of)h(bandwidths) -162 3950 y(1)g(2)h(3)f(15)g(6)95 b(8)621 b(values)46 b(of)h(BWL)-162 4063 y(2)g(1)h(1)f(4)95 b(15)47 b(6)668 b(values)46 b(of)i(BWU)-210 4175 y(1)1479 b(number)46 b(of)i(NB's)-210 4288 y(-1)f(3)h(4)f(5)1145 b(values)46 b(of)i(NB)f(\(-1)g(for)g(automatic)e(determination\))-210 4401 y(1)1479 b(number)46 b(of)i(NRHS's)e(\(must)g(be)h(1\))-210 4514 y(8)1479 b(values)46 b(of)i(NRHS)-210 4627 y(1)1479 b(number)46 b(of)i(NBRHS's)d(\(ignored\))-210 4740 y(1)1479 b(values)46 b(of)i(NBRHS)e(\(ignored\))1545 4989 y Fw(14)p eop end %%Page: 15 15 TeXDict begin 15 14 bop -210 -269 a Fr(4)1432 b(number)46 b(of)h(process)f(grids)-210 -156 y(1)h(2)h(3)f(4)h(5)f(7)h(8)f(15)g(26) h(47)f(64)286 b(values)46 b(of)h("Number)f(of)h(Process)f(Columns")-210 -43 y(3.0)1383 b(threshold)-210 199 y Fm(3.3)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(LL)-9 b(T)37 b(routines)-69 371 y Fw(The)f(Cholesky)h(test)h(program)e (generates)i(random)e(symmetric)h(test)h(matrices)g(with)e(v)-5 b(alues)37 b(in)-210 484 y(the)f(in)m(terv)-5 b(al)38 b([-1,1])g(and)d(then)h(mo)s(di\014es)f(these)i(matrices)g(to)g(b)s(e)e (diagonally)j(dominan)m(t)e(with)g(p)s(osi-)-210 597 y(tiv)m(e)c(diagonal)g(elemen)m(ts)g(th)m(us)e(creating)i(symmetric)f (p)s(ositiv)m(e-de\014nite)h(matrices.)42 b(It)31 b(then)f(calls)i(the) -210 710 y(ScaLAP)-8 b(A)m(CK)31 b(routines)g(to)g(factor)h(and)e(solv) m(e)i(the)f(system,)h(and)e(computes)h(a)g(solv)m(e)h(and/or)f(factor-) -210 823 y(ization)26 b(residual)d(error)h(c)m(hec)m(k)h(to)g(v)m (erify)f(that)h(eac)m(h)g(op)s(eration)g(has)e(p)s(erformed)g (correctly)-8 b(.)40 b(Condition)-210 935 y(estimation)32 b(and)e(iterativ)m(e)j(re\014nemen)m(t)d(routines)g(are)h(included)e (and)h(optionally)i(tested.)-69 1048 y(Sp)s(eci\014cally)-8 b(,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 1253 y Fo(\017)46 b Fw(Compute)30 b(the)h(LL)-8 b(T)30 b(factorization)j(using)d(PxPOTRF) -74 1437 y Fo(\017)46 b Fw(Solv)m(e)32 b(the)e(system)h Fk(AX)i Fw(=)25 b Fk(B)34 b Fw(using)c(PxPOTRS,)f(and)h(compute)h(the)f (ratio)217 1622 y Fi(SRESID)g Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-74 1807 y Fo(\017)46 b Fw(IF)31 b Fi(SRESID)e Fk(>)h Fi(THRESH)p Fw(,)g(then)g(compute)h(the)f(ratio)217 1992 y Fi(FRESID)g Fw(=)25 b Fo(jj)p Fk(LL)858 1959 y Fh(T)934 1992 y Fo(\000)20 b Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-69 2196 y(The)k(exp)s(ert)g(driv)m(er)g(\()p Fr(PxPOSVX)p Fw(\))e(p)s(erforms)h(condition)i(estimation)g(and)f (iterativ)m(e)j(re\014nemen)m(t)d(and)-210 2309 y(th)m(us)30 b(incorp)s(orates)h(the)f(follo)m(wing)i(additional)f(tests:)-74 2514 y Fo(\017)46 b Fw(Compute)30 b(the)h(recipro)s(cal)g(condition)g (n)m(um)m(b)s(er)e(R)m(COND)i(using)f(PxPOCON.)-74 2698 y Fo(\017)46 b Fw(Use)24 b(iterativ)m(e)i(re\014nemen)m(t)d (\(PxPORFS\))g(to)h(impro)m(v)m(e)g(the)g(solution,)h(and)e(recompute)g (the)h(ratio)217 2883 y Fi(SRESID)30 b Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-210 3122 y Fq(3.3.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(LL)-9 b(T)34 b(Routines)72 3294 y Fw(An)c(annotated)i(example)f(of)f(an)g(input)g (\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.) -210 3498 y Fr('ScaLAPACK)45 b(LLT)i(factorization)d(input)i(file')-210 3611 y('MPI)h(machine.')-210 3724 y('lltest.out')999 b(output)46 b(file)h(name)f(\(if)h(any\))-210 3837 y(6)1479 b(device)46 b(out)-210 3950 y(2)1479 b(number)46 b(of)i(problems)d (sizes)-210 4063 y(250)i(553)1192 b(values)46 b(of)i(N)-210 4175 y(3)1479 b(number)46 b(of)i(NB's)-210 4288 y(2)f(3)h(5)1288 b(values)46 b(of)i(NB)-210 4401 y(2)1479 b(number)46 b(of)i(NRHS's)-210 4514 y(1)f(5)1384 b(values)46 b(of)i(NRHS)-210 4627 y(3)1479 b(Number)46 b(of)i(NBRHS's)-210 4740 y(1)f(3)h(5)1288 b(values)46 b(of)i(NBRHS)1545 4989 y Fw(15)p eop end %%Page: 16 16 TeXDict begin 16 15 bop -210 -269 a Fr(5)1479 b(Number)46 b(of)i(processor)d(grids)h(\(ordered)g(pairs)g(of)h(P)h(&)f(Q\))-210 -156 y(1)g(4)h(2)f(8)h(1)1097 b(values)46 b(of)i(P)-210 -43 y(1)f(2)h(4)f(1)h(8)1097 b(values)46 b(of)i(Q)-210 70 y(1.0)1383 b(threshold)-210 183 y(T)1479 b(\(T)47 b(or)h(F\))f(Test)f(Cond.)h(Est.)f(and)h(Iter.)g(Ref.)f(Routines)-210 426 y Fm(3.4)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(Band)h(and)g(T)-9 b(ridiagonal)39 b(LL)-9 b(T)37 b(routines)-69 598 y Fw(The)27 b(Cholesky)g(test)h(program)f (generates)h(random)f(symmetric)g(p)s(ositiv)m(e)h(de\014nite)g(band)e (or)h(tridi-)-210 711 y(agonal)e(test)f(matrices)h(with)e(v)-5 b(alues)24 b(in)f(the)g(in)m(terv)-5 b(al)25 b([-1,1].)40 b(It)24 b(then)f(calls)h(the)g(ScaLAP)-8 b(A)m(CK)24 b(routines)-210 824 y(to)i(factor)h(and)e(solv)m(e)i(the)e(system,)i (and)e(computes)h(a)g(solv)m(e)h(residual)e(error)g(c)m(hec)m(k)i(to)g (v)m(erify)f(that)g(eac)m(h)-210 937 y(op)s(eration)31 b(has)f(p)s(erformed)f(correctly)-8 b(.)-69 1050 y(Sp)s(eci\014cally)g (,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 1262 y Fo(\017)46 b Fw(Compute)25 b(the)h(Band)f(or)g(T)-8 b(ridiagonal)27 b(LL)-8 b(T)25 b(factorization)j(using)c(PxPBTRF)i(\(or)f(PxPTTRF\))-74 1450 y Fo(\017)46 b Fw(Solv)m(e)32 b(the)e(system)h Fk(AX)i Fw(=)25 b Fk(B)34 b Fw(using)c(PxPBTRS)g(\(or)g(PxPTTRS\),)g(and)g (compute)g(the)h(ratio)217 1637 y Fi(SRESID)f Fw(=)25 b Fo(jj)p Fk(AX)j Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)15 b(jj)p Fk(X)7 b Fo(jj)p Fk(")p Fw(\))-210 1878 y Fq(3.4.1)105 b(Input)39 b(File)f(for)h(T)-9 b(esting)39 b(the)f(ScaLAP)-9 b(A)m(CK)40 b(Band)f(or)g(T)-9 b(ridiagonal)39 b(LL)-9 b(T)38 b(Rou-)109 1990 y(tines)72 2162 y Fw(An)30 b(annotated)i(example)f(of)f(an)g (input)g(\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo) m(w.)-210 2375 y Fr('ScaLAPACK,)45 b(banded)h(linear)g(systems)g(input) g(file')-210 2488 y('PVM.')-210 2600 y('')1431 b(output)46 b(file)h(name)g(\(if)g(any\))-210 2713 y(6)1479 b(device)46 b(out)-210 2826 y('L')1383 b(define)46 b(Lower)h(or)g(Upper)-210 2939 y(7)1479 b(number)46 b(of)i(problem)d(sizes)-210 3052 y(1)i(5)h(17)f(28)g(37)g(121)g(200)572 b(values)46 b(of)i(N)-210 3165 y(6)1479 b(number)46 b(of)i(bandwidths)-210 3278 y(1)f(2)h(4)f(10)h(31)f(64)858 b(values)46 b(of)i(BW)-210 3391 y(1)1479 b(number)46 b(of)i(NB's)-210 3504 y(-1)f(3)h(4)f(5)1145 b(values)46 b(of)i(NB)f(\(-1)g(for)g(automatic)e(determination\))-210 3617 y(1)1479 b(number)46 b(of)i(NRHS's)e(\(must)g(be)h(1\))-210 3730 y(8)1479 b(values)46 b(of)i(NRHS)-210 3842 y(1)1479 b(number)46 b(of)i(NBRHS's)d(\(ignored\))-210 3955 y(1)1479 b(values)46 b(of)i(NBRHS)e(\(ignored\))-210 4068 y(4)1479 b(number)46 b(of)i(process)d(grids)-210 4181 y(1)i(2)h(3)f(4)h(5)f(7) 1002 b(values)46 b(of)i("Number)d(of)j(Process)d(Columns")-210 4294 y(3.0)1383 b(threshold)1545 4989 y Fw(16)p eop end %%Page: 17 17 TeXDict begin 17 16 bop -210 -269 a Fm(3.5)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(QR,)g(R)m(Q,)g(LQ,)h (QL,)g(QP)-9 b(,)37 b(and)h(TZ)g(routines)-69 -97 y Fw(The)43 b(QR)g(test)i(program)e(generates)i(random)e(nonsymmetric)g(test)i (matrices)f(with)f(v)-5 b(alues)44 b(in)-210 16 y(the)c(in)m(terv)-5 b(al)41 b([-1,1],)j(calls)c(the)g(ScaLAP)-8 b(A)m(CK)40 b(routines)g(to)g(factor)h(the)e(system,)k(and)c(computes)h(a)-210 129 y(factorization)33 b(residual)d(error)g(c)m(hec)m(k)i(to)f(v)m (erify)g(that)g(eac)m(h)h(op)s(eration)e(has)g(p)s(erformed)f (correctly)-8 b(.)-69 242 y(Sp)s(eci\014cally)g(,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m (wing)i(tests:)-74 413 y Fo(\017)46 b Fw(Compute)28 b(the)g(QR)f (factorization)k(using)c(PxGEQRF,)h(and)g(generate)h(the)f(orthogonal)i (matrix)17 526 y Fk(Q)g Fw(from)g(the)h(Householder)f(v)m(ectors)-74 700 y Fo(\017)46 b Fw(Compute)30 b(the)h(ratio)217 874 y Fi(FRESID)f Fw(=)25 b Fo(jj)p Fk(QR)d Fo(\000)e Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-69 1045 y(The)30 b(testing)h(of)f(the)g(R)m(Q,)g(LQ,)g (QL,)f(and)h(QP)f(routines)h(pro)s(ceeds)f(in)h(a)g(similar)g(fashion.) 41 b(Simply)-210 1158 y(replace)34 b(all)g(o)s(ccurrences)f(of)g(QR)g (in)g(the)g(previous)g(discussion)f(with)h(R)m(Q,)h(LQ,)e(QL,)h(or)g (QP)g(resp)s(ec-)-210 1271 y(tiv)m(ely)-8 b(.)43 b(F)-8 b(or)31 b(TZ,)f(the)g(factorization)j(routine)e(is)f(called)i(PxTZRZF.) -210 1504 y Fq(3.5.1)105 b(Input)39 b(File)g(for)g(T)-9 b(esting)40 b(the)f(ScaLAP)-9 b(A)m(CK)39 b(QR,)g(R)m(Q,)h(LQ,)e(QL,)h (QP)-9 b(,)39 b(and)g(TZ)109 1617 y(Routines)72 1789 y Fw(An)30 b(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g (the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 1960 y Fr('ScaLAPACK,)45 b(Orthogonal)g(factorizations)f(input)i(file') -210 2073 y('MPI)h(machine')-210 2186 y('QR.out')1143 b(output)46 b(file)h(name)g(\(if)g(any\))-210 2299 y(6)1479 b(device)46 b(out)-210 2412 y(6)1479 b(number)46 b(of)i(factorizations) -210 2525 y('QR')f('QL')f('LQ')h('RQ')g('QP')f('TZ')142 b(factorizations:)44 b(QR,)j(QL,)g(LQ,)g(RQ,)g(QP,)f(TZ)-210 2638 y(4)1479 b(number)46 b(of)i(problems)d(sizes)-210 2751 y(2)i(5)h(13)f(15)g(13)g(26)h(30)f(15)524 b(values)46 b(of)i(M)-210 2864 y(2)f(7)h(8)95 b(10)47 b(17)g(20)h(30)f(35)524 b(values)46 b(of)i(N)-210 2976 y(4)1479 b(number)46 b(of)i(blocking)d (sizes)-210 3089 y(4)i(3)h(5)f(5)h(4)f(6)1002 b(values)46 b(of)i(MB)-210 3202 y(4)f(7)h(3)f(5)h(8)f(2)1002 b(values)46 b(of)i(NB)-210 3315 y(4)1479 b(number)46 b(of)i(process)d(grids)i (\(ordered)e(pairs)i(P)g(&)h(Q\))-210 3428 y(1)f(2)h(1)f(4)h(2)f(3)h(8) 906 b(values)46 b(of)i(P)-210 3541 y(1)f(2)h(4)f(1)h(3)f(2)h(1)906 b(values)46 b(of)i(Q)-210 3654 y(3.0)1383 b(threshold)-210 3891 y Fm(3.6)112 b(T)-9 b(ests)37 b(for)h(the)f(Linear)i(Least)f (Squares)g(\(LLS\))g(routines)-69 4063 y Fw(The)25 b(LLS)f(test)j (program)e(tests)h(the)g(PxGELS)e(driv)m(er)h(routine)h(for)f (computing)g(solutions)h(to)g(o)m(v)m(er-)-210 4175 y(and)36 b(underdetermined,)g(full-rank)g(systems)g(of)h(linear)g(equations)g Fk(AX)43 b Fw(=)35 b Fk(B)41 b Fw(\()p Fk(A)c Fw(is)f Fk(m)p Fw(-b)m(y-)p Fk(n)p Fw(\).)59 b(F)-8 b(or)-210 4288 y(eac)m(h)35 b(test)h(matrix)e(t)m(yp)s(e,)i(w)m(e)f(generate)h (three)e(matrices:)50 b(One)34 b(whic)m(h)g(is)g(scaled)h(near)f (under\015o)m(w,)h(a)-210 4401 y(matrix)c(with)f(mo)s(derate)h(norm,)e (and)h(one)h(whic)m(h)f(is)g(scaled)i(near)e(o)m(v)m(er\015o)m(w.)-69 4514 y(The)20 b(PxGELS)f(driv)m(er)i(computes)f(the)g(least-squares)i (solutions)f(\(when)e Fk(m)25 b Fo(\025)g Fk(n)p Fw(\))20 b(and)g(the)h(minim)m(um-)-210 4627 y(norm)35 b(solution)i(\(when)e Fk(m)g(<)f(n)p Fw(\))i(for)g(an)g Fk(m)p Fw(-b)m(y-)p Fk(n)g Fw(matrix)g Fk(A)g Fw(of)g(full)g(rank.)57 b(T)-8 b(o)37 b(test)g(PxGELS,)e(w)m(e)-210 4740 y(generate)d(a)f(diagonally)h (dominan)m(t)e(matrix)h Fk(A)p Fw(,)g(and)e(for)h Fk(C)i Fw(=)25 b Fk(A)31 b Fw(and)e Fk(C)j Fw(=)25 b Fk(A)2537 4707 y Fh(H)2604 4740 y Fw(,)31 b(w)m(e)1545 4989 y(17)p eop end %%Page: 18 18 TeXDict begin 18 17 bop -74 -269 a Fo(\017)46 b Fw(generate)23 b(a)e(consisten)m(t)h(righ)m(t-hand)f(side)f Fk(B)26 b Fw(suc)m(h)20 b(that)h Fk(X)29 b Fw(is)20 b(in)h(the)g(range)g(space) g(of)g Fk(C)7 b Fw(,)23 b(compute)17 -156 y(a)31 b(matrix)g Fk(X)37 b Fw(using)30 b(PxGELS,)g(and)g(compute)h(the)f(ratio)1025 48 y Fo(jj)p Fk(AX)f Fo(\000)20 b Fk(B)5 b Fo(jj)p Fk(=)p Fw(\(max)q(\()p Fk(m;)15 b(n)p Fw(\))p Fo(jj)p Fk(A)p Fo(jjjj)p Fk(X)7 b Fo(jj)p Fk(\017)p Fw(\))-74 290 y Fo(\017)46 b Fw(If)31 b Fk(C)39 b Fw(has)31 b(more)h(ro)m(ws)f(than)h (columns)f(\(i.e.)46 b(w)m(e)32 b(are)g(solving)g(a)h(least-squares)f (problem\),)g(form)17 403 y Fk(R)49 b Fw(=)f Fk(AX)37 b Fo(\000)29 b Fk(B)5 b Fw(,)48 b(and)43 b(c)m(hec)m(k)j(whether)e Fk(R)h Fw(is)f(orthogonal)h(to)g(the)g(column)f(space)g(of)h Fk(A)f Fw(b)m(y)17 516 y(computing)976 629 y Fo(jj)p Fk(R)1096 591 y Fh(H)1164 629 y Fk(C)7 b Fo(jj)p Fk(=)p Fw(\(max)q(\()p Fk(m;)15 b(n;)g(nr)s(hs)p Fw(\))p Fo(jj)p Fk(A)p Fo(jjjj)p Fk(B)5 b Fo(jj)p Fk(\017)p Fw(\))-74 833 y Fo(\017)46 b Fw(If)22 b Fk(C)28 b Fw(has)21 b(more)h(columns)g (than)g(ro)m(ws)f(\(i.e.)39 b(w)m(e)23 b(are)f(solving)g(an)g(o)m(v)m (erdetermined)h(system\),)h(c)m(hec)m(k)17 946 y(whether)36 b(the)h(solution)g Fk(X)44 b Fw(is)37 b(in)f(the)h(ro)m(w)g(space)g(of) g Fk(C)43 b Fw(b)m(y)37 b(scaling)g(b)s(oth)f Fk(X)44 b Fw(and)36 b Fk(C)43 b Fw(to)38 b(ha)m(v)m(e)17 1059 y(norm)c(one,)h(and)f(forming)g(the)g(QR)g(factorization)i(of)f Fk(D)f Fw(=)d([)p Fk(A;)15 b(X)7 b Fw(])36 b(if)e Fk(C)k Fw(=)31 b Fk(A)2794 1026 y Fh(H)2861 1059 y Fw(,)36 b(and)d(the)h(LQ)17 1172 y(factorization)29 b(of)d Fk(D)i Fw(=)d([)p Fk(A)932 1139 y Fh(H)1000 1172 y Fk(;)15 b(X)7 b Fw(])1147 1139 y Fh(H)1241 1172 y Fw(if)26 b Fk(C)32 b Fw(=)25 b Fk(A)p Fw(.)39 b(Letting)27 b Fk(E)k Fw(=)25 b Fk(D)s Fw(\()p Fk(n)g Fw(:)g Fk(n)12 b Fw(+)g Fk(nr)s(hs;)j(n)d Fw(+)g(1)p Fk(;)j(n)d Fw(+)g Fk(nr)s(hs)p Fw(\))17 1285 y(in)34 b(the)g(\014rst)f(case,)j(and)d Fk(E)j Fw(=)30 b Fk(D)s Fw(\()p Fk(m)23 b Fw(+)f(1)31 b(:)h Fk(m)22 b Fw(+)g Fk(nr)s(hs;)15 b(m)22 b Fw(+)g(1)31 b(:)g Fk(m)23 b Fw(+)f Fk(nr)s(hs)p Fw(\))33 b(in)g(the)h(latter,)i(w)m(e)17 1398 y(compute)1132 1510 y(max)15 b Fo(j)p Fk(d)1388 1524 y Fh(ij)1449 1510 y Fo(j)p Fk(=)p Fw(\(max)q(\()p Fk(m;)g(n;)g(nr)s(hs)p Fw(\))p Fk(\017)p Fw(\))-210 1751 y Fq(3.6.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(LLS)e(Routines)72 1922 y Fw(An)c(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g(the)h (test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 2135 y Fr('ScaLAPACK)45 b(LLS)i(input)f(file')-210 2248 y('MPI)h(machine') -210 2361 y('LS.out')1143 b(output)46 b(file)h(name)g(\(if)g(any\))-210 2473 y(6)1479 b(device)46 b(out)-210 2586 y(3)1479 b(number)46 b(of)i(problems)d(sizes)-210 2699 y(55)i(17)g(31)1145 b(values)46 b(of)i(M)-210 2812 y(5)f(71)h(31)1192 b(values)46 b(of)i(N)-210 2925 y(3)1479 b(number)46 b(of)i(NB's)-210 3038 y(2)f(3)h(5)1288 b(values)46 b(of)i(NB)-210 3151 y(3)1479 b(number)46 b(of)i(NRHS's)-210 3264 y(2)f(3)h(5)1288 b(values)46 b(of)i(NRHS)-210 3377 y(2)1479 b(number)46 b(of)i(NBRHS's)-210 3490 y(1)f(2)1384 b(values)46 b(of)i(NBRHS)-210 3603 y(4)1479 b(number)46 b(of)i(process)d(grids)i(\(ordered)e(pairs)i (P)g(&)h(Q\))-210 3715 y(1)f(2)h(1)f(4)h(2)f(3)h(8)906 b(values)46 b(of)i(P)-210 3828 y(1)f(2)h(4)f(1)h(3)f(2)h(1)906 b(values)46 b(of)i(Q)-210 3941 y(4.0)1383 b(threshold)-210 4185 y Fm(3.7)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(INV)f(routines)-69 4356 y Fw(The)22 b(in)m(v)m(ersion)g (test)h(driv)m(er)f(tests)h(\014v)m(e)f(di\013eren)m(t)g(matrix)h(t)m (yp)s(es)f({)g(general)h(nonsymmetric)f(\(GEN\),)-210 4469 y(general)31 b(upp)s(er)c(or)j(lo)m(w)m(er)h(triangular)f(\(UTR)g (and)f(L)-8 b(TR\),)30 b(and)f(symmetric)h(p)s(ositiv)m(e)h(de\014nite) f(\(upp)s(er)-210 4582 y(or)g(lo)m(w)m(er)i(triangular\))f(\(UPD)g(or)g (LPD\).)1545 4989 y(18)p eop end %%Page: 19 19 TeXDict begin 19 18 bop -74 -269 a Fo(\017)46 b Fw(If)22 b(GEN,)i(compute)f(the)g(LU)f(factorization)k(using)c(PxGETRF,)h(and)f (then)g(compute)h(the)g(in)m(v)m(erse)17 -156 y(b)m(y)31 b(in)m(v)m(oking)g(PxGETRI)-74 32 y Fo(\017)46 b Fw(If)32 b(UTR)f(or)h(L)-8 b(TR,)32 b(set)h(UPLO='U')f(or)f(UPLO='L')h(resp)s (ectiv)m(ely)-8 b(,)34 b(and)e(compute)g(the)g(in)m(v)m(erse)17 145 y(b)m(y)f(in)m(v)m(oking)g(PxTR)-8 b(TRI)-74 332 y Fo(\017)46 b Fw(If)39 b(UPD)h(or)g(LPD,)g(set)g(UPLO='U')f(or)h (UPLO='L')f(resp)s(ectiv)m(ely)-8 b(,)43 b(compute)d(the)g(Cholesky)17 445 y(factorization)33 b(using)d(PxPOTRF,)g(and)g(then)g(compute)h(the) f(in)m(v)m(erse)h(b)m(y)g(in)m(v)m(oking)g(PxPOTRI)-74 633 y Fo(\017)46 b Fw(Compute)30 b(the)h(ratio)217 821 y Fi(FRESID)f Fw(=)25 b Fo(jj)p Fk(AA)870 788 y Fg(\000)p Fv(1)986 821 y Fo(\000)20 b Fk(I)7 b Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-210 1061 y Fq(3.7.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(INV)e(Routines)72 1232 y Fw(An)c(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g(the)h (test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 1445 y Fr('ScaLAPACK,)45 b(Matrix)h(Inversion)f(Testing)h(input)g(file')-210 1558 y('MPI)h(machine.')-210 1671 y('INV.out')1095 b(output)46 b(file)h(name)g(\(if)g(any\))-210 1784 y(6)1479 b(device)46 b(out)-210 1896 y(5)1479 b(number)46 b(of)i(matrix)e(types)g(\(next)g (line\))-210 2009 y('GEN')g('UTR')h('LTR')f('UPD')g('LPD')142 b(GEN,)47 b(UTR,)g(LTR,)f(UPD,)h(LPD)-210 2122 y(4)1479 b(number)46 b(of)i(problems)d(sizes)-210 2235 y(2)i(5)h(10)f(15)g(13)g (20)h(30)f(50)524 b(values)46 b(of)i(N)-210 2348 y(4)1479 b(number)46 b(of)i(NB's)-210 2461 y(2)f(3)h(4)f(5)h(6)f(20)954 b(values)46 b(of)i(NB)-210 2574 y(4)1479 b(number)46 b(of)i(process)d(grids)i(\(ordered)e(P)j(&)f(Q\))-210 2687 y(1)g(2)h(1)f(4)h(2)f(3)h(8)906 b(values)46 b(of)i(P)-210 2800 y(1)f(1)h(4)f(1)h(3)f(2)h(1)906 b(values)46 b(of)i(Q)-210 2913 y(1.0)1383 b(threshold)-210 3156 y Fm(3.8)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(HRD)f(routines)-69 3328 y Fw(The)23 b(HRD)i(test)f(program)g(generates)h(random)e (nonsymmetric)h(test)h(matrices)g(with)e(v)-5 b(alues)24 b(in)g(the)-210 3441 y(in)m(terv)-5 b(al)25 b([-1,1],)j(calls)d(the)f (ScaLAP)-8 b(A)m(CK)25 b(routines)f(to)h(reduce)e(the)i(test)g(matrix)f (to)h(upp)s(er)d(Hessen)m(b)s(erg)-210 3554 y(form,)48 b(and)c(computes)g(a)h(reduction)g(residual)f(error)g(c)m(hec)m(k)i(to) g(v)m(erify)e(that)i(eac)m(h)f(op)s(eration)g(has)-210 3666 y(p)s(erformed)29 b(correctly)-8 b(.)-69 3779 y(Sp)s(eci\014cally) g(,)32 b(eac)m(h)f(test)g(matrix)g(is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 3992 y Fo(\017)46 b Fw(Reduce)31 b(the)f(matrix)h Fk(A)f Fw(to)i(upp)s(er)c(Hessen)m(b)s (erg)i(form)g Fk(H)38 b Fw(using)29 b(PxGEHRD)217 4180 y Fk(Q)289 4147 y Fh(T)364 4180 y Fo(\003)21 b Fk(A)f Fo(\003)h Fk(Q)k Fw(=)g Fk(H)7 b Fw(.)-74 4367 y Fo(\017)46 b Fw(and)30 b(compute)h(the)f(ratio)217 4555 y Fi(FRESID)g Fw(=)25 b Fo(jj)p Fk(Q)c Fo(\003)g Fk(H)27 b Fo(\003)20 b Fk(Q)1133 4522 y Fh(T)1209 4555 y Fo(\000)g Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))1545 4989 y(19)p eop end %%Page: 20 20 TeXDict begin 20 19 bop -210 -269 a Fq(3.8.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(HRD)f(Routines)72 -97 y Fw(An)30 b(annotated)i(example)f(of)f(an)g (input)g(\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo) m(w.)-210 112 y Fr('ScaLAPACK)45 b(HRD)i(input)f(file')-210 225 y('MPI)h(machine.')-210 338 y('HRD.out')713 b(output)47 b(file)f(name)h(\(if)g(any\))-210 451 y(6)1097 b(device)47 b(out)-210 564 y(1)1097 b(number)47 b(of)g(problems)e(sizes)-210 677 y(100)i(101)810 b(values)47 b(of)g(N)-210 790 y(1)g(1)1002 b(values)47 b(of)g(ILO)-210 903 y(100)g(101)810 b(values)47 b(of)g(IHI)-210 1016 y(1)1097 b(number)47 b(of)g(NB's)-210 1129 y(2)g(1)h(2)f(3)h(4)f(5)620 b(values)47 b(of)g(NB)-210 1242 y(1)1097 b(number)47 b(of)g(processor)e(grids)h(\(ordered)g(pairs) g(of)h(P)h(&)f(Q\))-210 1354 y(2)g(1)h(4)906 b(values)47 b(of)g(P)-210 1467 y(2)g(4)h(1)906 b(values)47 b(of)g(Q)-210 1580 y(1.0)1001 b(threshold)-210 1823 y Fm(3.9)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(TRD)f(routines)-69 1995 y Fw(The)g(TRD)g(test)h(program)f(generates)i(random)d(symmetric)i (test)g(matrices)g(with)f(v)-5 b(alues)37 b(in)f(the)-210 2108 y(in)m(terv)-5 b(al)30 b([-1,1],)h(calls)f(the)f(ScaLAP)-8 b(A)m(CK)29 b(routines)f(to)i(reduce)e(the)h(test)g(matrix)g(to)h (symmetric)f(tridi-)-210 2221 y(agonal)37 b(form,)e(and)g(computes)g(a) h(reduction)e(residual)h(error)g(c)m(hec)m(k)h(to)g(v)m(erify)g(that)f (eac)m(h)h(op)s(eration)-210 2334 y(has)30 b(p)s(erformed)f(correctly) -8 b(.)-69 2447 y(Sp)s(eci\014cally)g(,)32 b(eac)m(h)f(test)g(matrix)g (is)f(sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 2656 y Fo(\017)46 b Fw(Reduce)31 b(the)f(symmetric)h(matrix)g Fk(A)f Fw(to)h(symmetric)g(tridiagonal)h(form)e Fk(T)43 b Fw(using)30 b(PxSYTRD)217 2843 y Fk(Q)289 2810 y Fh(T)364 2843 y Fo(\003)21 b Fk(A)f Fo(\003)h Fk(Q)k Fw(=)g Fk(T)13 b Fw(.)-74 3029 y Fo(\017)46 b Fw(and)30 b(compute)h(the)f(ratio)217 3216 y Fi(FRESID)g Fw(=)25 b Fo(jj)p Fk(Q)c Fo(\003)g Fk(T)33 b Fo(\003)20 b Fk(Q)1116 3183 y Fh(T)1191 3216 y Fo(\000)g Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-210 3456 y Fq(3.9.1)105 b(Input)35 b(File)f(for)i(T)-9 b(esting)35 b(the)f(SCALAP)-9 b(A)m(CK)35 b(TRD)g(Routines)72 3627 y Fw(An)30 b(annotated)i(example)f (of)f(an)g(input)g(\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn) f(b)s(elo)m(w.)-210 3837 y Fr('ScaLAPACK)45 b(TRD)i(computation)e (input)h(file')-210 3950 y('MPI)h(machine.')-210 4063 y('TRD.out')332 b(output)46 b(file)g(name)-210 4175 y(6)716 b(device)46 b(out)-210 4288 y('L')620 b(define)46 b(Lower)g(or)h(Upper) -210 4401 y(2)716 b(number)46 b(of)h(problems)e(sizes)-210 4514 y(16)i(17)g(100)g(101)143 b(values)46 b(of)h(N)-210 4627 y(3)716 b(number)46 b(of)h(NB's)-210 4740 y(3)g(4)h(5)525 b(values)46 b(of)h(NB)1545 4989 y Fw(20)p eop end %%Page: 21 21 TeXDict begin 21 20 bop -210 -269 a Fr(3)716 b(Number)46 b(of)h(processor)e(grids)i(\(ordered)e(pairs)h(of)i(P)f(&)h(Q\))-210 -156 y(2)f(4)h(1)525 b(values)46 b(of)h(P)-210 -43 y(2)g(1)h(4)525 b(values)46 b(of)h(Q)-210 70 y(1.0)620 b(threshold)-210 308 y Fm(3.10)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(BRD)f(routines)-69 480 y Fw(The)h(BRD)i(test)g(program)e (generates)i(random)e(nonsymmetric)h(test)h(matrices)g(with)e(v)-5 b(alues)38 b(in)-210 593 y(the)h(in)m(terv)-5 b(al)41 b([-1,1],)i(calls)d(the)g(ScaLAP)-8 b(A)m(CK)39 b(routines)g(to)h (reduce)f(the)g(test)h(matrix)g(to)g(upp)s(er)d(or)-210 706 y(lo)m(w)m(er)h(bidiagonal)f(form,)h(and)d(computes)i(a)g (reduction)f(residual)g(error)g(c)m(hec)m(k)i(to)f(v)m(erify)g(that)g (eac)m(h)-210 819 y(op)s(eration)31 b(has)f(p)s(erformed)f(correctly)-8 b(.)-69 932 y(Sp)s(eci\014cally)g(,)32 b(eac)m(h)f(test)g(matrix)g(is)f (sub)5 b(jected)31 b(to)g(the)f(follo)m(wing)i(tests:)-74 1111 y Fo(\017)46 b Fw(Reduce)31 b(the)f(matrix)h Fk(A)f Fw(to)i(upp)s(er)c(or)i(lo)m(w)m(er)i(bidiagonal)f(form)f Fk(B)35 b Fw(using)30 b(PxGEBRD)217 1288 y Fk(Q)289 1255 y Fh(T)364 1288 y Fo(\003)21 b Fk(A)f Fo(\003)h Fk(P)38 b Fw(=)25 b Fk(B)5 b Fw(.)-74 1464 y Fo(\017)46 b Fw(and)30 b(compute)h(the)f(ratio)217 1641 y Fi(FRESID)g Fw(=)25 b Fo(jj)p Fk(Q)c Fo(\003)g Fk(B)j Fo(\003)d Fk(P)1123 1608 y Fh(T)1198 1641 y Fo(\000)f Fk(A)p Fo(jj)p Fk(=)p Fw(\()p Fk(n)p Fo(jj)p Fk(A)p Fo(jj)p Fk(")p Fw(\))-210 1876 y Fq(3.10.1)106 b(Input)34 b(File)h(for)g(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(BRD)g(Routines)72 2047 y Fw(An)30 b(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g (the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 2227 y Fr('ScaLAPACK)45 b(BRD)i(input)f(file')-210 2340 y('MPI)h(machine.')-210 2452 y('BRD.out')332 b(output)46 b(file)g(name)h(\(if)g(any\))-210 2565 y(6)716 b(device)46 b(out)-210 2678 y(3)716 b(number)46 b(of)h(problems)e(sizes)-210 2791 y(16)i(14)g(25)h(15)f(16)95 b(values)46 b(of)h(M)-210 2904 y(9)95 b(13)47 b(20)h(15)f(16)95 b(values)46 b(of)h(N)-210 3017 y(2)716 b(number)46 b(of)h(NB's)-210 3130 y(3)g(4)h(5)525 b(values)46 b(of)h(NB)-210 3243 y(3)716 b(Number)46 b(of)h(processor)e (grids)i(\(ordered)e(pairs)h(of)i(P)f(&)h(Q\))-210 3356 y(2)f(4)h(1)525 b(values)46 b(of)h(P)-210 3469 y(2)g(1)h(4)525 b(values)46 b(of)h(Q)-210 3582 y(1.0)620 b(threshold)-210 3820 y Fm(3.11)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(SEP)f(routines)72 3991 y Fw(The)22 b(follo)m(wing)i (tests)g(will)f(b)s(e)e(p)s(erformed)g(on)i(PxSYEV/PxHEEV,)g (PxSYEVX/PxHEEVX)-210 4104 y(and)30 b(PxSYEVD/PxHEEVD:)1090 4413 y Fk(r)1131 4427 y Fv(1)1254 4413 y Fw(=)1514 4352 y Fo(k)p Fk(AZ)d Fo(\000)20 b Fk(Z)7 b(L)p Fo(k)p 1417 4392 663 4 v 1417 4476 a Fk(abstol)22 b Fw(+)e Fk(ul)r(p)30 b Fo(k)p Fk(A)p Fo(k)1090 4701 y Fk(r)1131 4715 y Fv(2)1254 4701 y Fw(=)1417 4639 y Fo(k)q Fk(Z)1532 4606 y Fg(\003)1571 4639 y Fk(Z)c Fo(\000)20 b Fk(I)7 b Fo(k)p 1417 4680 426 4 v 1472 4763 a Fk(ul)r(p)30 b Fo(k)p Fk(A)p Fo(k)1545 4989 y Fw(21)p eop end %%Page: 22 22 TeXDict begin 22 21 bop -210 -269 a Fw(where)34 b Fk(Z)40 b Fw(is)34 b(the)h(matrix)f(of)g(eigen)m(v)m(ectors)j(returned)c(when)g (the)i(eigen)m(v)m(ector)i(option)e(is)f(giv)m(en,)i Fk(L)e Fw(is)-210 -156 y(the)e(matrix)g(of)g(eigen)m(v)-5 b(alues,)35 b Fk(ul)r(p)c Fw(represen)m(ts)h(PxLAMCH\()g(ICTXT,)f('P')h (\),)g(and)g Fk(abstol)h Fw(represen)m(ts)-210 -43 y Fk(ul)r(p)20 b Fo(\003)h(k)p Fk(A)p Fo(k)q Fw(.)-69 70 y(The)33 b(tester)h(allo)m(ws)g(m)m(ultiple)g(test)f(requests)g(to)h(b) s(e)e(con)m(trolled)j(from)e(a)g(single)h(input)e(\014le.)48 b(Eac)m(h)-210 183 y(test)31 b(request)g(is)f(con)m(trolled)i(b)m(y)e (the)h(follo)m(wing)h(inputs:)-165 395 y(V)-8 b(alues)32 b(of)e(N)17 508 y(N)h(=)f(The)g(matrix)g(size)-165 696 y(V)-8 b(alues)32 b(of)e(P)-8 b(,)31 b(Q,)f(NB)17 809 y(P)g(=)g(NPR)m(O)m(W,)i(the)f(n)m(um)m(b)s(er)e(of)h(pro)s(cessor)g (ro)m(ws)17 922 y(Q)g(=)g(NPCOL,)g(the)h(n)m(um)m(b)s(er)e(of)h(pro)s (cessor)g(columns)17 1035 y(NB)h(=)f(the)h(blo)s(c)m(k)g(size)-165 1222 y(V)-8 b(alues)32 b(of)e(the)h(matrix)g(t)m(yp)s(es)17 1335 y(See)g(Section)g(3.11.1.)-165 1523 y(Num)m(b)s(er)f(of)g(eigen)i (requests)17 1636 y(1)f(=)f(T)-8 b(est)31 b(full)f(eigendecomp)s (osition)i(only)17 1749 y(8)f(=)f(T)-8 b(est)31 b(the)g(follo)m(wing)g (eigen)h(requests:)17 1862 y(F)-8 b(ull)31 b(eigendecomp)s(osition)17 1975 y(All)g(eigen)m(v)-5 b(alues,)33 b(no)d(eigen)m(v)m(ectors)17 2087 y(Eigen)m(v)-5 b(alues)32 b(requested)e(b)m(y)h(v)-5 b(alue)30 b(\(i.e.)42 b(VL,VU\))17 2200 y(Eigen)m(v)-5 b(alues)32 b(and)e(v)m(ectors)i(requested)e(b)m(y)g(v)-5 b(alue)17 2313 y(Eigen)m(v)g(alues)32 b(requested)e(b)m(y)h(index)e (\(i.e.)42 b(IL,)31 b(IU\))17 2426 y(Eigen)m(v)-5 b(alues)32 b(and)e(v)m(ectors)i(requested)e(b)m(y)g(index)17 2539 y(F)-8 b(ull)31 b(eigendecomp)s(osition)h(with)e(minimal)h(w)m (orkspace)g(pro)m(vided)17 2652 y(F)-8 b(ull)31 b(eigendecomp)s (osition)h(with)e(random)g(w)m(orkspace)h(pro)m(vided)-165 2840 y(Threshold)17 2953 y(The)f(highest)h(v)-5 b(alue)31 b(of)f Fk(r)897 2967 y Fv(1)937 2953 y Fk(;)15 b(r)1018 2967 y Fv(2)1088 2953 y Fw(and)30 b Fk(r)1306 2967 y Fv(3)1375 2953 y Fw(that)h(will)g(b)s(e)f(accepted.)-165 3140 y(Absolute)h(tolerance)17 3253 y(Must)g(b)s(e)e(-1.0)j(to)f (ensure)f(orthogonal)i(eigen)m(v)m(ectors)-165 3441 y(Prin)m(t)f (Request)17 3554 y(1)g(=)f(Prin)m(t)g(ev)m(ery)i(test)17 3667 y(2)f(=)f(Prin)m(t)g(only)h(failing)g(tests)g(and)f(a)h(summary)e (of)i(the)f(request)-210 3907 y Fq(3.11.1)106 b(T)-9 b(est)34 b(Matrices)i(for)f(the)f(Symmetric)i(Eigen)m(v)-6 b(alue)35 b(Routines)72 4078 y Fw(Tw)m(en)m(t)m(y-t)m(w)m(o)45 b(di\013eren)m(t)e(t)m(yp)s(es)f(of)g(test)h(matrices)h(ma)m(y)e(b)s(e) g(generated)h(for)f(the)h(symmetric)-210 4191 y(eigen)m(v)-5 b(alue)34 b(routines.)47 b(T)-8 b(able)32 b(1)h(sho)m(ws)f(the)h(t)m (yp)s(es,)g(along)g(with)f(the)g(n)m(um)m(b)s(ers)f(used)h(to)h(refer)f (to)h(the)-210 4304 y(matrix)24 b(t)m(yp)s(es.)38 b(Except)24 b(as)g(noted,)h(all)g(matrices)f(ha)m(v)m(e)h(norm)e Fk(O)s Fw(\(1\).)39 b(The)23 b(expression)g Fk(U)10 b(D)s(U)3024 4271 y Fg(\000)p Fv(1)3142 4304 y Fw(means)-210 4417 y(a)33 b(real)g(diagonal)h(matrix)f Fk(D)i Fw(with)d(en)m(tries)h(of)g (magnitude)f Fk(O)s Fw(\(1\))i(conjugated)f(b)m(y)g(a)f(unitary)g(\(or) h(real)-210 4530 y(orthogonal\))f(matrix)f Fk(U)10 b Fw(.)1545 4989 y(22)p eop end %%Page: 23 23 TeXDict begin 23 22 bop -126 -357 3433 4 v -128 -244 4 113 v 1166 -244 V 1767 -278 a Fw(Eigen)m(v)-5 b(alue)32 b(Distribution)p 3305 -244 V 1167 -240 2140 4 v -128 -131 4 113 v -77 -165 a(T)m(yp)s(e)p 1166 -131 V 1123 w(Arithmetic)p 1764 -131 V 134 w(Geometric)p 2274 -131 V 101 w(Clustered)p 2753 -131 V 209 w(Other)p 3305 -131 V -126 -128 3433 4 v -128 -15 4 113 v -77 -49 a(Zero)p 1166 -15 V 2753 -15 V 2908 w(1)p 3305 -15 V -126 -11 3433 4 v -128 102 4 113 v -77 68 a(Iden)m(tit)m(y)p 1166 102 V 2753 102 V 2773 w(2)p 3305 102 V -126 105 3433 4 v -128 223 4 118 v -77 189 a(Diagonal)p 1166 223 V 1171 w(3)p 1764 223 V 374 w(4,)f(6)2009 157 y Ff(y)2044 189 y Fw(,)f(7)2144 157 y Ff(z)p 2274 223 V 2492 189 a Fw(5)p 2753 223 V 3305 223 V -126 226 3433 4 v -128 344 4 118 v -77 310 a Fk(U)10 b(D)s(U)145 277 y Fg(\000)p Fv(1)p 1166 344 V 1251 310 a Fw(8,)31 b(11)1442 278 y Ff(y)1477 310 y Fw(,)f(12)1622 278 y Ff(z)1657 310 y Fw(,)p 1764 344 V 224 w(9,)h(17)2097 278 y Ff(\003)p 2274 344 V 2377 310 a Fw(10,)h(18)2614 278 y Ff(\003)p 2753 344 V 3305 344 V -128 457 4 113 v 1166 457 V 1217 423 a Fw(16)1307 391 y Ff(\003)1346 423 y Fw(,)f(19)1492 391 y Fe(?)1531 423 y Fw(,)g(20)1677 391 y Ff(\017)p 1764 457 V 2274 457 V 2753 457 V 3305 457 V -126 460 3433 4 v -128 578 4 118 v -77 544 a Fw(Symmetric)g(w/Random)f(en)m (tries)p 1166 578 V 2753 578 V 1688 w(13,)h(14)3041 512 y Ff(y)3076 544 y Fw(,)g(15)3222 512 y Ff(z)p 3305 578 V -126 582 3433 4 v -128 695 4 113 v -77 661 a Fw(T)-8 b(ridiagonal)p 1166 695 V 1764 695 V 2274 695 V 2753 695 V 2591 w(21)3055 628 y Fe(a)p 3305 695 V -126 698 3433 4 v -128 816 4 118 v -77 782 a Fw(Multiple)31 b(Clusters)p 1166 816 V 1764 816 V 2274 816 V 2753 816 V 2355 w(22)3059 750 y Fe(b)p 3305 816 V -126 819 3433 4 v -77 898 a Fo(y)p Fw({)g(matrix)g(en)m(tries)g(are)g Fe(O)r Ft(\()868 834 y Ff(p)p 933 834 274 4 v 933 898 a Ft(o)n(v)n(er\015o)n(w\))-77 1011 y Fo(z)p Fw({)g(matrix)g(en)m(tries)g(are)g Fe(O)r Ft(\()868 947 y Ff(p)p 933 947 327 4 v 933 1011 a Ft(under\015o)n(w)o (\))-77 1124 y Fo(\003)g Fw({)g(diagonal)g(en)m(tries)h(are)e(p)s (ositiv)m(e)-77 1237 y Fk(?)h Fw({)g(matrix)g(en)m(tries)g(are)f Fe(O)r Ft(\()903 1173 y Ff(p)p 968 1173 274 4 v 968 1237 a Ft(o)n(v)n(er\015o)n(w\))g Fw(and)g(diagonal)i(en)m(tries)f(are)g(p)s (ositiv)m(e)-77 1350 y Fo(\017)g Fw({)g(matrix)g(en)m(tries)g(are)f Fe(O)r Ft(\()903 1286 y Ff(p)p 968 1286 327 4 v 968 1350 a Ft(under\015o)n(w)o(\))h Fw(and)e(diagonal)j(en)m(tries)f(are)g(p)s (ositiv)m(e)-77 1463 y Fk(a)g Fw({)f(Some)h(of)f(the)h(immediately)h (o\013-diagonal)g(elemen)m(ts)g(are)f(zero)g(-)f(guaran)m(teeing)i (splitting)-77 1576 y Fk(b)f Fw({)f(Clusters)g(are)h(sized:)41 b(1,)31 b(2,)g(4,)g(.)15 b(.)h(.)f(,)31 b(2)1358 1543 y Fh(i)1386 1576 y Fw(.)379 1764 y(T)-8 b(able)31 b(1:)41 b(T)-8 b(est)31 b(matrices)h(for)e(the)g(symmetric)h(eigen)m(v)-5 b(alue)32 b(problem)-210 2039 y Fq(3.11.2)106 b(Input)33 b(File)g(for)h(T)-9 b(esting)34 b(the)f(Symmetric)h(Eigen)m(v)-6 b(alue)34 b(Routines)h(and)e(Driv)m(ers)72 2211 y Fw(An)28 b(annotated)h(example)g(of)g(an)f(input)f(\014le)h(for)g(testing)i(the) e(symmetric)h(eigen)m(v)-5 b(alue)30 b(routines)-210 2323 y(and)g(driv)m(ers)g(is)g(sho)m(wn)g(b)s(elo)m(w.)-210 2536 y Fr('ScaLAPACK)45 b(Symmetric)g(Eigensolver)g(Test)i(File')-210 2649 y(')g(')-210 2762 y('sep.out')1095 b(output)46 b(file)h(name)g (\(if)g(any\))-210 2875 y(6)1479 b(device)46 b(out)h(\(13)g(&)h(14)f (reserved)e(for)i(internal)f(testing\))-210 2988 y(4)95 b(maximum)46 b(number)g(of)h(processes)-210 3101 y('N'disable)e(pxsyev) h(tests,)g(recommended)f(for)i(heterogeneous)d(systems.)-210 3213 y(')j(')-210 3326 y('TEST)f(1)i(-)f(test)g(tiny)g(matrices)e(-)j (different)d(process)h(configurations')-210 3439 y(3)1479 b(number)46 b(of)i(matrices)-210 3552 y(0)f(1)h(2)f(matrix)f(size)-210 3665 y(1)h(number)g(of)g(uplo)f(choices)-210 3778 y('L'uplo)g(choices) -210 3891 y(2)h(number)g(of)g(processor)e(configurations)f(\(P,)j(Q,)g (NB\))-210 4004 y(1)g(1)525 b(values)46 b(of)h(P)h(\(NPROW\))-210 4117 y(2)f(1)143 b(values)46 b(of)i(Q)f(\(NPCOL\))-210 4230 y(1)g(1)143 b(values)46 b(of)i(NB)-210 4343 y(1)f(number)g(of)g (matrix)f(types)-210 4455 y(8)191 b(matrix)46 b(types)g(\(see)h (pdseptst.f\))-210 4568 y('N'perform)e(subset)h(tests?)-210 4681 y(80.0)h(Threshold)e(\(*)i(5)h(for)e(generalized)f(tests\))1545 4989 y Fw(23)p eop end %%Page: 24 24 TeXDict begin 24 23 bop -210 -269 a Fr(-1)95 b(Absolute)45 b(Tolerance)-210 -156 y(')i(')-210 -43 y('End)g(of)g(tests')-210 70 y(-1)-210 310 y Fm(3.12)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(GSEP)f(routines)-69 482 y Fw(Finding)23 b(the)h(eigen)m(v)-5 b(alues)25 b(and)e(eigen)m(v)m(ectors)k(of)c (symmetric)h(matrices)g(A)g(and)f(B,)h(where)f(B)g(is)h(also)-210 595 y(p)s(ositiv)m(e)32 b(de\014nite,)g(follo)m(ws)h(the)e(same)h (stages)h(as)e(the)h(symmetric)g(eigen)m(v)-5 b(alue)33 b(problem)e(except)h(that)-210 708 y(the)24 b(problem)g(is)g(\014rst)f (reduced)g(from)g(generalized)j(to)f(standard)e(form)g(using)h (PxSYGST/PxHEGST.)-69 821 y(T)-8 b(o)31 b(c)m(hec)m(k)h(these)f (calculations,)h(the)f(follo)m(wing)h(test)f(ratios)g(are)g(computed:) 191 1049 y Fk(r)232 1063 y Fv(1)355 1049 y Fw(=)519 987 y Fo(k)p Fk(A)15 b(Z)27 b Fo(\000)20 b Fk(B)g(Z)h(D)s Fo(k)p 519 1027 605 4 v 533 1111 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 1235 y Fw(calling)32 b(PxSYGVX/PxHEGVX)f(with)f(ITYPE=1)g(and)g(UPLO='U')191 1421 y Fk(r)232 1435 y Fv(2)355 1421 y Fw(=)519 1359 y Fo(k)p Fk(A)15 b(Z)27 b Fo(\000)20 b Fk(B)g(Z)h(D)s Fo(k)p 519 1400 V 533 1483 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 1607 y Fw(calling)32 b(PxSYGVX/PxHEGVX)f(with)f(ITYPE=1)g(and)g(UPLO='L')191 1793 y Fk(r)232 1807 y Fv(5)355 1793 y Fw(=)519 1731 y Fo(k)p Fk(A)15 b(Z)27 b Fo(\000)20 b Fk(B)g(Z)h(D)s Fo(k)p 519 1772 V 533 1855 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 1979 y Fw(calling)32 b(PxSYGVX/PxHEEVX)f(with)f(ITYPE=2)g(and)f(UPLO='U')191 2165 y Fk(r)232 2179 y Fv(8)355 2165 y Fw(=)519 2103 y Fo(k)p Fk(A)15 b(B)20 b(Z)27 b Fo(\000)20 b Fk(Z)h(D)s Fo(k)p 519 2144 V 533 2227 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 2351 y Fw(calling)32 b(PxSYGVX/PxHEEVX)f(with)f(ITYPE=2)g(and)f(UPLO='L')156 2537 y Fk(r)197 2551 y Fv(10)355 2537 y Fw(=)519 2475 y Fo(k)p Fk(A)15 b(B)20 b(Z)27 b Fo(\000)20 b Fk(Z)h(D)s Fo(k)p 519 2516 V 533 2599 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 2723 y Fw(calling)32 b(PxSYGVX/PxHEEVX)f(with)f(ITYPE=3)g(and)f(UPLO='U')156 2909 y Fk(r)197 2923 y Fv(12)355 2909 y Fw(=)519 2848 y Fo(k)p Fk(B)20 b(A)15 b(Z)27 b Fo(\000)20 b Fk(Z)h(D)s Fo(k)p 519 2888 V 533 2971 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)509 3096 y Fw(calling)32 b(PxSYGVX/PxHEEVX)f(with)f(ITYPE=3)g(and)f(UPLO='L')156 3281 y Fk(r)197 3295 y Fv(14)355 3281 y Fw(=)519 3220 y Fo(k)p Fk(B)20 b(A)15 b(Z)27 b Fo(\000)20 b Fk(Z)h(D)s Fo(k)p 519 3260 V 533 3343 a(k)p Fk(A)p Fo(k)31 b(k)q Fk(Z)7 b Fo(k)30 b Fk(n)15 b(ul)r(p)3274 3468 y Fw(\(1\))-210 3705 y Fq(3.12.1)106 b(Input)32 b(File)h(for)h(T)-9 b(esting)33 b(the)g(Generalized)h(Symmetric)g(Eigen)m(v)-6 b(alue)33 b(Routines)162 3818 y(and)i(Driv)m(ers)72 3989 y Fw(The)c(input)f (\014le)h(for)f(testing)i(the)g(generalized)g(symmetric)f(eigen)m(v)-5 b(alue)33 b(routines)e(and)f(driv)m(ers)-210 4102 y(is)j(the)g(same)h (as)f(that)g(for)g(testing)h(the)f(symmetric)h(eigenproblem)f (routines.)48 b(Refer)33 b(to)h(the)f(Section)-210 4215 y(3.11.2)g(for)d(further)f(details.)-210 4455 y Fm(3.13)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(NEP)f(routines)-69 4627 y Fw(The)31 b(PxLAHQR)h(test)h(program)e (generates)i(random)e(upp)s(er)f(Hessen)m(b)s(erg)i(matrices,)i (completes)-210 4740 y(a)26 b(Sc)m(h)m(ur)g(decomp)s(osition)g(on)g (them,)h(and)e(then)h(tests)g(the)g(resulting)g(Sc)m(h)m(ur)g(decomp)s (osition)g(for)g(main-)1545 4989 y(24)p eop end %%Page: 25 25 TeXDict begin 25 24 bop -210 -269 a Fw(taining)31 b(similarit)m(y)-8 b(.)43 b(The)30 b(follo)m(wing)h(tests)g(will)g(b)s(e)f(p)s(erformed)f (on)h(P)p 2238 -269 28 4 v 33 w(LAHQR:)1148 108 y Fk(r)1189 122 y Fv(1)1312 108 y Fw(=)1476 -76 y Fd(\015)1476 -26 y(\015)1476 23 y(\015)1522 21 y Fk(H)d Fo(\000)20 b Fk(QS)5 b(Q)1921 -12 y Fh(T)1976 -76 y Fd(\015)1976 -26 y(\015)1976 23 y(\015)p 1476 87 547 4 v 1548 170 a Fk(n)15 b(ul)r(p)30 b Fo(k)p Fk(H)7 b Fo(k)1148 449 y Fk(r)1189 463 y Fv(2)1312 449 y Fw(=)1476 265 y Fd(\015)1476 315 y(\015)1476 365 y(\015)1522 363 y Fk(I)27 b Fo(\000)20 b Fk(Q)1752 330 y Fh(T)1807 363 y Fk(Q)1879 265 y Fd(\015)1879 315 y(\015)1879 365 y(\015)p 1476 428 450 4 v 1602 511 a Fk(n)15 b(ul)r(p)3274 669 y Fw(\(2\))-210 854 y(where)39 b Fk(Q)g Fw(is)g(the)h(Sc)m(h)m(ur)e (v)m(ectors)j(of)e(the)h(upp)s(er)d(Hessen)m(b)s(erg)i(matrix)h Fk(H)46 b Fw(when)38 b(the)i(Sc)m(h)m(ur)e(v)m(ector)-210 967 y(and)j(Sc)m(h)m(ur)h(decomp)s(osition)g(option)h(is)f(giv)m(en.)77 b Fk(N)52 b Fw(is)42 b(the)g(order)f(of)i(the)f(matrix,)j Fk(ul)r(p)d Fw(represen)m(ts)-210 1079 y(PxLAMCH\()31 b(ICTXT,)e('P')i(\),)g(and)f(the)g(one-norm)g(is)h(used)e(for)i(the)f (norm)g(computations.)-210 1316 y Fq(3.13.1)106 b(Input)34 b(File)h(for)g(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(NEP)f(Routines)72 1488 y Fw(An)30 b(annotated)i(example)f(of)f(an)g (input)g(\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo) m(w.)-210 1681 y Fr('SCALAPACK)45 b(NEP)i(\(Nonsymmetric)d(Eigenvalue)h (Problem\))h(input)g(file')-210 1794 y('MPI)h(Machine')-210 1906 y('NEP.out')713 b(output)47 b(file)f(name)h(\(if)g(any\))-210 2019 y(6)1097 b(device)47 b(out)-210 2132 y(8)1097 b(number)47 b(of)g(problems)e(sizes)-210 2245 y(1)i(2)h(3)f(4)h(6)f(10)g(100)g(200) 190 b(values)47 b(of)g(N)-210 2358 y(3)1097 b(number)47 b(of)g(NB's)-210 2471 y(6)g(20)h(40)810 b(values)47 b(of)g(NB)-210 2584 y(4)1097 b(number)47 b(of)g(process)f(grids)g(\(ordered)f(pairs)i (of)g(P)g(&)h(Q\))-210 2697 y(1)f(2)h(1)f(4)811 b(values)47 b(of)g(P)-210 2810 y(1)g(2)h(4)f(1)811 b(values)47 b(of)g(Q)-210 2923 y(20.0)953 b(threshold)-210 3163 y Fm(3.14)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(EV)m(C)f(routines)-69 3335 y Fw(The)27 b(PCTREV)m(C/PZTREV)m(C)g(test)h(program)f(p)s (erforms)f(a)i(righ)m(t)g(and)f(left)h(eigen)m(v)m(ector)j(calcula-) -210 3447 y(tion)g(of)f(a)h(triangular)g(matrix)g(follo)m(w)m(ed)h(b)m (y)e(a)h(residual)f(c)m(hec)m(ks)i(of)e(the)h(calculated)h(eigen)m(v)m (ectors.)-69 3560 y(The)e(follo)m(wing)i(tests)f(will)g(b)s(e)e(p)s (erformed)g(on)i(P)p 1639 3560 28 4 v 32 w(TREV)m(C.)f(The)g(basic)h (test)g(is:)1171 3883 y Fk(r)1212 3897 y Fv(1)1335 3883 y Fw(=)1499 3821 y Fo(k)p Fk(H)7 b(Z)27 b Fo(\000)20 b Fk(Z)7 b(D)s Fo(k)p 1499 3862 500 4 v 1557 3945 a Fk(n)15 b(ul)r(p)30 b Fo(k)p Fk(T)13 b Fo(k)3274 4108 y Fw(\(3\))-210 4292 y(using)30 b(the)g(1-norm.)41 b(It)31 b(also)g(tests)g(the)g (normalization)h(of)e Fk(Z)7 b Fw(.)832 4519 y Fk(r)873 4533 y Fv(2)995 4519 y Fw(=)1159 4458 y Fk(max)1339 4472 y Fh(j)1390 4458 y Fo(k)q Fk(m)20 b Fo(\000)g Fk(nor)s(m)p Fw(\()p Fk(Z)7 b Fw(\()p Fk(j)e Fw(\)\))20 b Fo(\000)g Fw(1)p Fo(k)q Fw(\))p 1159 4498 1180 4 v 1650 4582 a Fk(n)15 b(ul)r(p)3274 4740 y Fw(\(4\))1545 4989 y(25)p eop end %%Page: 26 26 TeXDict begin 26 25 bop -210 -269 a Fw(where)39 b Fk(H)47 b Fw(is)40 b(the)g(upp)s(er)e(Hessen)m(b)s(erg)i(matrix,)i Fk(n)e Fw(is)f(the)i(order)e(of)h(the)g(matrix,)j Fk(Z)7 b Fw(\()p Fk(j)e Fw(\))40 b(is)g(the)g(j-th)-210 -156 y(eigen)m(v)m(ector,)51 b(and)43 b(m-norm)g(is)h(the)g(max-norm)g(of)g (a)h(v)m(ector,)k(and)43 b Fk(ul)r(p)h Fw(represen)m(ts)g(PxLAMCH\() -210 -43 y(ICTXT,)e('P')h(\).)g(The)f(max-norm)g(of)h(a)g(complex)h (n-v)m(ector)g Fk(x)e Fw(in)h(this)f(case)i(is)e(the)h(maxim)m(um)g(of) -210 70 y Fo(k)p Fk(r)s(e)p Fw(\()p Fk(x)p Fw(\()p Fk(i)p Fw(\)\))p Fo(k)33 b Fw(+)d Fo(k)p Fk(im)p Fw(\()p Fk(x)p Fw(\()p Fk(i)p Fw(\)\))p Fo(k)j Fw(o)m(v)m(er)f Fk(i)25 b Fw(=)g(1)p Fk(;)15 b(:)g(:)g(:)i(;)e(n)p Fw(.)-210 306 y Fq(3.14.1)106 b(Input)34 b(File)h(for)g(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(EV)m(C)e(Routines)72 478 y Fw(An)c(annotated)i(example)f(of)f(an)g(input)g(\014le)g(for)g (the)h(test)g(program)f(is)h(sho)m(wn)f(b)s(elo)m(w.)-210 664 y Fr('SCALAPACK)45 b(NEP)i(\(Nonsymmetric)d(Eigenvalue)h(Problem\)) h(input)g(file')-210 777 y('MPI)h(Machine')-210 890 y('EVC.out')713 b(output)47 b(file)f(name)h(\(if)g(any\))-210 1003 y(6)1097 b(device)47 b(out)-210 1116 y(1)1097 b(number)47 b(of)g(problems)e (sizes)-210 1229 y(100)i(1000)g(1500)f(2000)h(2500)f(3000)190 b(Probs)-210 1342 y(1)1097 b(number)47 b(of)g(NB's)-210 1455 y(8)1050 b(values)46 b(of)h(NB)-210 1568 y(4)1097 b(number)47 b(of)g(process)f(grids)g(\(ordered)f(pairs)i(of)g(P)g(&)h (Q\))-210 1681 y(1)f(1)h(4)f(2)h(3)f(2)h(2)f(1)525 b(values)46 b(of)h(P)-210 1793 y(1)g(4)h(1)f(2)h(3)f(1)h(4)f(8)525 b(values)46 b(of)h(Q)-210 1906 y(20.0)953 b(threshold)-210 2146 y Fm(3.15)112 b(T)-9 b(ests)37 b(for)h(the)f(ScaLAP)-9 b(A)m(CK)37 b(SVD)g(routines)-69 2317 y Fw(The)c(follo)m(wing)i(tests)f (will)g(b)s(e)e(p)s(erformed)g(on)h(PSGESVD/PDGESVD.)i(A)e(n)m(um)m(b)s (er)f(of)i(matrix)-210 2430 y(\\t)m(yp)s(es")e(are)f(sp)s(eci\014ed,)g (as)g(denoted)g(in)g(T)-8 b(able)31 b(2.)43 b(F)-8 b(or)32 b(eac)m(h)g(t)m(yp)s(e)f(of)g(matrix,)h(and)e(for)h(the)g(minimal)-210 2543 y(w)m(orkspace)c(as)g(w)m(ell)g(as)g(for)f(larger)i(than)e (minimal)h(w)m(orkspace)g(an)f Fk(M)10 b Fw(-b)m(y)p Fk(N)37 b Fw(matrix)27 b(\\A")g(with)f(kno)m(wn)-210 2656 y(singular)k(v)-5 b(alues)30 b(is)g(generated)h(and)f(used)f(to)i (test)g(the)f(SVD)g(routines.)40 b(F)-8 b(or)31 b(eac)m(h)g(matrix,)g (A)f(will)h(b)s(e)-210 2769 y(factored)g(as)g Fk(A)56 b Fw(=)f Fk(U)40 b(diag)s Fw(\()p Fk(S)5 b Fw(\))32 b Fk(V)21 b(T)43 b Fw(and)30 b(the)g(follo)m(wing)i(9)f(tests)g (computed:)62 3085 y Fk(r)103 3099 y Fv(1)226 3085 y Fw(=)389 3024 y Fo(k)q Fk(A)20 b Fo(\000)g Fk(U)10 b Fw(1diag)q(\()p Fk(S)5 b Fw(1\))p Fk(V)21 b(T)13 b Fw(1)p Fo(k)p 389 3064 916 4 v 461 3148 a(k)p Fk(A)p Fo(k)j Fw(max\()p Fk(M)5 b(;)15 b(N)10 b Fw(\))15 b Fk(ul)r(p)62 3387 y(r)103 3401 y Fv(2)226 3387 y Fw(=)389 3204 y Fd(\015)389 3253 y(\015)389 3303 y(\015)435 3301 y Fk(I)28 b Fo(\000)20 b Fw(\()p Fk(U)10 b Fw(1\))781 3268 y Fh(T)837 3301 y Fk(U)g Fw(1)954 3204 y Fd(\015)954 3253 y(\015)954 3303 y(\015)p 389 3366 612 4 v 575 3450 a Fk(M)25 b(ul)r(p)62 3684 y(r)103 3698 y Fv(3)226 3684 y Fw(=)389 3501 y Fd(\015)389 3550 y(\015)389 3600 y(\015)435 3598 y Fk(I)j Fo(\000)20 b Fk(V)g(T)13 b Fw(1\()p Fk(V)21 b(T)13 b Fw(1\))1033 3565 y Fh(T)1088 3501 y Fd(\015)1088 3550 y(\015)1088 3600 y(\015)p 389 3663 746 4 v 649 3747 a Fk(N)25 b(ul)r(p)62 3945 y(r)103 3959 y Fv(4)226 3945 y Fw(=)379 3801 y Fd(\()488 3883 y Fw(0)156 b(if)30 b Fk(S)5 b Fw(1)31 b(con)m(tains)g(SIZE)f (nonnegativ)m(e)i(v)-5 b(alues)30 b(in)g(decreasing)h(order.)529 3960 y Fv(1)p 498 3975 99 4 v 498 4027 a Fh(ul)q(p)689 3996 y Fw(otherwise)62 4210 y Fk(r)103 4224 y Fv(5)226 4210 y Fw(=)446 4148 y Fo(k)q Fk(S)5 b Fw(1)21 b Fo(\000)e Fk(S)5 b Fw(2)p Fo(k)p 389 4189 530 4 v 389 4272 a Fk(S)g(I)i(Z)g(E)20 b(M)26 b Fo(k)p Fk(S)5 b Fo(k)62 4458 y Fk(r)103 4472 y Fv(6)226 4458 y Fw(=)389 4396 y Fo(k)q Fk(U)10 b Fw(1)20 b Fo(\000)g Fk(U)10 b Fw(2)p Fo(k)p 389 4437 437 4 v 488 4520 a Fk(M)25 b(ul)r(p)62 4701 y(r)103 4715 y Fv(7)226 4701 y Fw(=)461 4639 y Fo(k)p Fk(S)5 b Fw(1)21 b Fo(\000)f Fk(S)5 b Fw(3)p Fo(k)p 389 4680 558 4 v 389 4763 a Fk(S)g(I)i(Z)g(E)20 b(ul)r(p)15 b Fo(k)q Fk(S)5 b Fo(k)1545 4989 y Fw(26)p eop end %%Page: 27 27 TeXDict begin 27 26 bop 62 -230 a Fk(r)103 -216 y Fv(8)226 -230 y Fw(=)389 -292 y Fo(k)q Fk(V)20 b(T)13 b Fw(1)20 b Fo(\000)g Fk(V)g(T)13 b Fw(3)p Fo(k)p 389 -251 571 4 v 562 -168 a Fk(N)25 b(ul)r(p)62 13 y(r)103 27 y Fv(9)226 13 y Fw(=)468 -49 y Fo(k)q Fk(S)5 b Fw(1)20 b Fo(\000)g Fk(S)5 b Fw(4)p Fo(k)p 389 -8 574 4 v 389 75 a Fk(S)g(I)i(Z)g(E)20 b(ul)r(p)30 b Fo(k)q Fk(S)5 b Fo(k)-210 239 y Fw(where)30 b Fk(ul)r(p)g Fw(represen)m(ts)g(PxLAMCH\(ICTXT,)g('P'\).)-210 475 y Fq(3.15.1)106 b(T)-9 b(est)34 b(Matrices)i(for)f(the)f(Singular)i (V)-9 b(alue)35 b(Decomp)s(osition)h(Routines)72 646 y Fw(Six)c(di\013eren)m(t)h(t)m(yp)s(es)g(of)f(test)i(matrices)f(ma)m (y)g(b)s(e)f(generated)h(for)f(the)h(singular)f(v)-5 b(alue)33 b(decom-)-210 759 y(p)s(osition)e(routines.)44 b(T)-8 b(able)32 b(2)f(sho)m(ws)g(the)h(t)m(yp)s(es)f(a)m(v)-5 b(ailable,)34 b(along)e(with)f(the)h(n)m(um)m(b)s(ers)e(used)g(to)i (refer)-210 872 y(to)i(the)g(matrix)g(t)m(yp)s(es.)51 b(Except)34 b(as)g(noted,)h(all)f(matrix)g(t)m(yp)s(es)g(other)g(than)f (the)h(random)f(bidiagonal)-210 985 y(matrices)40 b(ha)m(v)m(e)g Fk(O)s Fw(\(1\))g(en)m(tries.)67 b(The)38 b(expression)h Fk(U)10 b(D)s(V)59 b Fw(means)38 b(a)i(real)f(diagonal)h(matrix)g Fk(D)h Fw(with)-210 1098 y Fk(O)s Fw(\(1\))31 b(en)m(tries)g(m)m (ultiplied)g(b)m(y)g(unitary)f(\(or)g(real)h(orthogonal\))h(matrices)g (on)e(the)h(left)g(and)f(righ)m(t.)p 766 1215 1648 4 v 764 1328 4 113 v 1217 1328 V 1268 1294 a(Singular)g(V)-8 b(alue)31 b(Distribution)p 2412 1328 V 1218 1331 1196 4 v 764 1441 4 113 v 816 1407 a(T)m(yp)s(e)p 1217 1441 V 247 w(Arithmetic)p 1748 1441 V 267 w(Other)p 2412 1441 V 766 1444 1648 4 v 764 1557 4 113 v 816 1523 a(Zero)p 1217 1557 V 1748 1557 V 1066 w(1)p 2412 1557 V 766 1560 1648 4 v 764 1673 4 113 v 816 1639 a(Iden)m(tit)m(y)p 1217 1673 V 1748 1673 V 931 w(2)p 2412 1673 V 766 1676 1648 4 v 764 1789 4 113 v 816 1755 a(Diagonal)p 1217 1789 V 295 w(3)p 1748 1789 V 2412 1789 V 766 1793 1648 4 v 764 1911 4 118 v 816 1877 a Fk(U)10 b(D)s(V)p 1217 1911 V 307 w Fw(4,)31 b(5)1472 1845 y Ff(y)1507 1877 y Fw(,)f(6)1607 1845 y Ff(z)p 1748 1911 V 2412 1911 V 766 1914 1648 4 v 816 1993 a Fo(y)p Fw({)h(matrix)g(en)m(tries)g(are)g Fe(O)r Ft(\()1761 1929 y Ff(p)p 1825 1929 274 4 v 64 x Ft(o)n(v)n(er\015o)n(w)q(\))816 2106 y Fo(z)p Fw({)g(matrix)g(en)m (tries)g(are)g Fe(O)r Ft(\()1761 2042 y Ff(p)p 1825 2042 327 4 v 64 x Ft(under\015o)n(w\))406 2294 y Fw(T)-8 b(able)31 b(2:)41 b(T)-8 b(est)31 b(matrices)g(for)f(the)h(singular)f(v)-5 b(alue)31 b(decomp)s(osition)-210 2616 y Fq(3.15.2)106 b(Input)34 b(File)h(for)g(T)-9 b(esting)35 b(the)f(ScaLAP)-9 b(A)m(CK)36 b(SVD)f(Routines)72 2787 y Fw(An)30 b(annotated)i(example)f (of)f(an)g(input)g(\014le)g(for)g(the)h(test)g(program)f(is)h(sho)m(wn) f(b)s(elo)m(w.)-210 2973 y Fr('ScaLAPACK)45 b(Singular)h(Value)g (Decomposition)92 b(input)46 b(file')-210 3086 y(6)1479 b(device)46 b(out)-210 3199 y(4)h(maxnodes)-210 3312 y(')g(')-210 3425 y('TEST)f(1)i(-)f(test)g(medium)f(matrices)g(-)h(all) g(types)f(and)h(requests')-210 3538 y(20.0)1335 b(Threshold)-210 3651 y(1)1479 b(number)46 b(of)i(matrices)-210 3764 y(100)1383 b(number)46 b(of)i(rows)-210 3877 y(25)1431 b(number)46 b(of)i(columns)-210 3990 y(1)1479 b(number)46 b(of)i(processor)d (configurations)f(\(P,)j(Q,)g(NB\))-210 4102 y(2)1479 b(values)46 b(of)i(P)f(\(NPROW\))-210 4215 y(2)1479 b(values)46 b(of)i(Q)f(\(NPCOL\))-210 4328 y(8)1479 b(values)46 b(of)i(NB)-210 4441 y(')f(')-210 4554 y('End)g(of)g(tests')-210 4667 y(-1)1545 4989 y Fw(27)p eop end %%Page: 28 28 TeXDict begin 28 27 bop -210 354 a Fc(App)5 b(endix)64 b(A)-210 769 y Fs(ScaLAP)-19 b(A)-6 b(CK)77 b(Routines)-69 1214 y Fw(In)22 b(this)h(app)s(endix,)g(w)m(e)g(review)h(the)f (subroutine)e(naming)i(sc)m(heme)h(for)e(ScaLAP)-8 b(A)m(CK)24 b(and)e(indicate)-210 1327 y(b)m(y)32 b(means)g(of)h(a)g(table)g(whic)m (h)f(subroutines)f(are)h(included)g(in)g(this)g(release.)48 b(W)-8 b(e)33 b(also)h(list)f(the)f(driv)m(er)-210 1440 y(routines.)-69 1553 y(Eac)m(h)39 b(subroutine)f(name)g(in)g(ScaLAP)-8 b(A)m(CK,)40 b(whic)m(h)e(has)g(an)h(LAP)-8 b(A)m(CK)39 b(equiv)-5 b(alen)m(t,)42 b(is)c(simply)-210 1666 y(the)f(LAP)-8 b(A)m(CK)36 b(name)g(prep)s(ended)f(b)m(y)h(a)g Fr(P)p Fw(.)h(All)g(names)f(consist)h(of)f(sev)m(en)h(c)m(haracters)h(in)e (the)g(form)-210 1779 y(PTXXYYY.)31 b(The)f(second)g(letter,)i(T,)e (indicates)i(the)e(matrix)h(data)g(t)m(yp)s(e)g(as)f(follo)m(ws:)-210 1966 y(S)257 b(REAL)-210 2079 y(D)239 b(DOUBLE)31 b(PRECISION)-210 2192 y(C)242 b(COMPLEX)-210 2305 y(Z)252 b(COMPLEX*16)31 b(\(if)g(a)m(v)-5 b(ailable\))-69 2493 y(The)34 b(next)h(t)m(w)m(o)g (letters,)i(XX,)e(indicate)g(the)g(t)m(yp)s(e)f(of)h(matrix.)53 b(Most)35 b(of)g(these)f(t)m(w)m(o-letter)k(co)s(des)-210 2606 y(apply)e(to)h(b)s(oth)f(real)h(and)e(complex)i(routines;)j(a)d (few)f(apply)g(sp)s(eci\014cally)h(to)g(one)f(or)h(the)f(other,)j(as) -210 2718 y(indicated)31 b(b)s(elo)m(w:)-210 2931 y(DB)175 b(general)31 b(band)f(\(diagonally-dominan)m(t)j(lik)m(e\))-210 3044 y(DT)173 b(general)31 b(tridiagonal)h(\(diagonally-dominan)m(t)h (lik)m(e\))-210 3157 y(GB)173 b(general)31 b(band)-210 3270 y(GE)175 b(general)31 b(\(i.e.)42 b(unsymmetric,)30 b(in)g(some)h(cases)g(rectangular\))-210 3383 y(GG)166 b(general)31 b(matrices,)h(generalized)g(problem)e(\(i.e.)42 b(a)31 b(pair)f(of)g(general)i(matrices\))-210 3496 y(HE)178 b(\(complex\))32 b(Hermitian)-210 3608 y(OR)170 b(\(real\))32 b(orthogonal)-210 3721 y(PB)182 b(symmetric)31 b(or)f(Hermitian)h(p)s (ositiv)m(e)h(de\014nite)e(band)-210 3834 y(PO)175 b(symmetric)31 b(or)f(Hermitian)h(p)s(ositiv)m(e)h(de\014nite)-210 3947 y(PT)180 b(symmetric)31 b(or)f(Hermitian)h(p)s(ositiv)m(e)h(de\014nite) e(tridiagonal)-210 4060 y(ST)191 b(symmetric)31 b(tridiagonal)-210 4173 y(SY)189 b(symmetric)-210 4286 y(TR)175 b(triangular)31 b(\(or)g(in)f(some)g(cases)i(quasi-triangular\))-210 4399 y(TZ)186 b(trap)s(ezoidal)-210 4512 y(UN)172 b(\(complex\))32 b(unitary)1545 4989 y(28)p eop end %%Page: 29 29 TeXDict begin 29 28 bop -69 -269 a Fw(The)28 b(last)i(three)f(c)m (haracters,)i(YYY,)e(indicate)h(the)f(computation)h(done)e(b)m(y)h(a)g (particular)g(subrou-)-210 -156 y(tine.)41 b(Included)29 b(in)h(this)h(release)g(are)g(subroutines)e(to)i(p)s(erform)e(the)i (follo)m(wing)h(computations:)-210 32 y(BRD)108 b(reduce)30 b(to)h(bidiagonal)h(form)e(b)m(y)g(orthogonal)i(transformations)-210 145 y(CON)103 b(estimate)32 b(condition)f(n)m(um)m(b)s(er)-210 258 y(EBZ)126 b(compute)31 b(selected)h(eigen)m(v)-5 b(alues)32 b(b)m(y)e(bisection)-210 371 y(EDC)111 b(compute)31 b(eigen)m(v)m(ectors)i(using)d(divide)g(and)g(conquer)-210 484 y(EIN)145 b(compute)31 b(selected)h(eigen)m(v)m(ectors)h(b)m(y)d (in)m(v)m(erse)i(iteration)-210 596 y(EQU)107 b(equilibrate)31 b(a)g(matrix)g(to)g(reduce)f(its)h(condition)g(n)m(um)m(b)s(er)-210 709 y(EV)m(C)115 b(compute)31 b(the)f(eigen)m(v)m(ectors)k(from)c(the)g (Sc)m(h)m(ur)g(factorization)-210 822 y(GBR)106 b(generate)32 b(the)f(orthogonal/unitary)g(matrix)g(from)f(PxGEBRD)-210 935 y(GHR)102 b(generate)32 b(the)f(orthogonal/unitary)g(matrix)g(from) f(PxGEHRD)-210 1048 y(GLQ)109 b(generate)32 b(the)f(orthogonal/unitary) g(matrix)g(from)f(PxGELQF)-210 1161 y(GQL)109 b(generate)32 b(the)f(orthogonal/unitary)g(matrix)g(from)f(PxGEQLF)-210 1274 y(GQR)99 b(generate)32 b(the)f(orthogonal/unitary)g(matrix)g(from) f(PxGEQRF)-210 1387 y(GR)m(Q)102 b(generate)32 b(the)f (orthogonal/unitary)g(matrix)g(from)f(PxGER)m(QF)-210 1500 y(GST)120 b(reduce)30 b(a)h(symmetric-de\014nite)g(generalized)h (eigen)m(v)-5 b(alue)32 b(problem)e(to)h(standard)f(form)-210 1613 y(HRD)104 b(reduce)30 b(to)h(upp)s(er)e(Hessen)m(b)s(erg)h(form)g (b)m(y)g(orthogonal)i(transformations)-210 1726 y(LQF)121 b(compute)31 b(an)f(LQ)g(factorization)j(without)d(piv)m(oting)-210 1838 y(MBR)94 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g (matrix)g(from)f(PxGEBRD)-210 1951 y(MHR)90 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g(matrix)g(from)f(PxGEHRD)-210 2064 y(MLQ)97 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g (matrix)g(from)f(PxGELQF)-210 2177 y(MQL)97 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g(matrix)g(from)f(PxGEQLF)-210 2290 y(MQR)87 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g (matrix)g(from)f(PxGEQRF)-210 2403 y(MR)m(Q)90 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g(matrix)g(from)f(PxGER)m(QF)-210 2516 y(MRZ)102 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g (matrix)g(from)f(PxTZRZF)-210 2629 y(MTR)92 b(m)m(ultiply)31 b(b)m(y)f(the)h(orthogonal/unitary)g(matrix)g(from)f(PxxxTRD)-210 2742 y(QLF)121 b(compute)31 b(a)g(QL)e(factorization)k(without)e(piv)m (oting)-210 2855 y(QPF)116 b(compute)31 b(a)g(QR)e(factorization)k (with)e(column)f(piv)m(oting)-210 2968 y(QRF)111 b(compute)31 b(a)g(QR)e(factorization)k(without)e(piv)m(oting)-210 3080 y(RFS)131 b(re\014ne)30 b(initial)h(solution)g(returned)f(b)m(y)g (TRS)f(routines)-210 3193 y(R)m(QF)114 b(compute)31 b(an)f(R)m(Q)g (factorization)j(without)e(piv)m(oting)-210 3306 y(RZF)126 b(compute)31 b(an)f(RZ)g(factorization)j(without)d(piv)m(oting)-210 3419 y(TRD)106 b(reduce)30 b(a)h(symmetric)g(matrix)f(to)h(real)g (symmetric)g(tridiagonal)h(form)-210 3532 y(TRF)116 b(compute)31 b(a)g(triangular)f(factorization)j(\(LU,)e(Cholesky)-8 b(,)31 b(etc.\))-210 3645 y(TRI)142 b(compute)31 b(in)m(v)m(erse)g (\(based)f(on)h(triangular)f(factorization\))-210 3758 y(TRS)124 b(solv)m(e)32 b(systems)e(of)h(linear)f(equations)i(\(based)e (on)g(triangular)h(factorization\))-69 3987 y(Giv)m(en)36 b(these)g(de\014nitions,)h(the)e(follo)m(wing)i(table)f(indicates)h (the)e(ScaLAP)-8 b(A)m(CK)36 b(subroutines)e(for)-210 4100 y(the)d(solution)g(of)f(systems)g(of)h(linear)g(equations:)1545 4989 y(29)p eop end %%Page: 30 30 TeXDict begin 30 29 bop 2439 -281 a Fw(HE)553 b(UN)338 -168 y(GE)100 b(GG)h(DB)f(GB)g(DT)g(GT)g(PO)f(PB)h(PT)105 b(SY)g(TR)99 b(TZ)g(OR)-19 -55 y(TRF)197 b Fo(\002)404 b(\002)164 b(\002)g(\002)399 b(\002)158 b(\002)d(\002)-19 58 y Fw(TRS)205 b Fo(\002)404 b(\002)164 b(\002)g(\002)399 b(\002)158 b(\002)d(\002)389 b(\002)-19 171 y Fw(RFS)212 b Fo(\002)1344 b(\002)844 b(\002)-19 284 y Fw(TRI)223 b Fo(\002)1344 b(\002)844 b(\002)-19 396 y Fw(CON)184 b Fo(\002)1344 b(\002)844 b(\002)-19 509 y Fw(EQU)188 b Fo(\002)1344 b(\002)-19 622 y Fw(QPF)197 b Fo(\002)-19 750 y Fw(QRF)178 714 y Fb(y)370 750 y Fo(\002)166 b(\002)-19 862 y Fw(RZF)2763 b Fo(\002)-19 990 y Fw(GQR)190 954 y Fb(y)3155 990 y Fo(\002)-19 1117 y Fw(MQR)202 1081 y Fb(z)3155 1117 y Fo(\002)-19 1230 y(y)p Fw({)31 b(also)g(R)m(Q,)g (QL,)f(and)g(LQ)-19 1343 y Fo(z)p Fw({)h(also)g(R)m(Q,)g(RZ,)f(QL,)g (and)g(LQ)-69 1539 y(The)j(follo)m(wing)i(table)g(indicates)f(the)g (ScaLAP)-8 b(A)m(CK)34 b(subroutines)f(for)g(\014nding)f(eigen)m(v)-5 b(alues)36 b(and)-210 1652 y(eigen)m(v)m(ectors)d(or)e(singular)f(v)-5 b(alues)31 b(and)e(singular)i(v)m(ectors:)1687 1799 y(HE)285 1912 y(GE)100 b(GG)g(HS)g(HG)g(TR)f(TG)105 b(SY)g(ST)99 b(PT)g(BD)-19 2025 y(HRD)131 b Fo(\002)-19 2138 y Fw(TRD)1533 b Fo(\002)-19 2251 y Fw(BRD)135 b Fo(\002)-19 2363 y Fw(EQZ)-19 2476 y(EIN)1795 b Fo(\002)-19 2589 y Fw(EBZ)1776 b Fo(\002)-19 2702 y Fw(EDC)1761 b Fo(\002)-19 2815 y Fw(EV)m(C)1075 b Fo(\002)619 b(\002)-19 2928 y Fw(GST)1547 b Fo(\002)-69 3038 y Fw(Orthogonal/unitary)30 b(transformation)f (routines)g(ha)m(v)m(e)h(also)g(b)s(een)f(pro)m(vided)f(for)h(the)g (reductions)-210 3151 y(that)i(use)f(elemen)m(tary)i(transformations.) 300 3256 y(UN)299 3369 y(OR)-19 3482 y(GHR)145 b Fo(\002)-19 3595 y Fw(GTR)i Fo(\002)-19 3708 y Fw(GBR)i Fo(\002)-19 3821 y Fw(MHR)133 b Fo(\002)-19 3934 y Fw(MTR)i Fo(\002)-19 4047 y Fw(MBR)i Fo(\002)-69 4193 y Fw(In)38 b(addition,)i(a)f(n)m(um)m (b)s(er)e(of)h(driv)m(er)g(routines)g(are)h(pro)m(vided)f(with)g(this)g (release.)65 b(The)38 b(naming)-210 4306 y(con)m(v)m(en)m(tion)h(for)e (the)h(driv)m(er)e(routines)h(is)h(the)f(same)g(as)h(for)f(the)g(LAP)-8 b(A)m(CK)37 b(routines,)i(but)e(the)g(last)-210 4419 y(3)d(c)m(haracters)h(YYY)f(ha)m(v)m(e)h(the)f(follo)m(wing)h(meanings) f(\(note)h(an)e(`X')i(in)e(the)h(last)h(c)m(haracter)g(p)s(osition)-210 4532 y(indicates)c(a)g(more)g(exp)s(ert)f(driv)m(er\):)-210 4715 y(SV)189 b(factor)31 b(the)g(matrix)g(and)e(solv)m(e)j(a)f(system) f(of)h(equations)1545 4989 y(30)p eop end %%Page: 31 31 TeXDict begin 31 30 bop -210 -269 a Fw(SVX)121 b(equilibrate,)32 b(factor,)f(solv)m(e,)h(compute)f(error)f(b)s(ounds)e(and)i(do)g (iterativ)m(e)j(re\014nemen)m(t,)d(and)98 -156 y(estimate)i(the)f (condition)g(n)m(um)m(b)s(er)-210 -43 y(LS)200 b(solv)m(e)32 b(o)m(v)m(er-)g(or)e(underdetermined)f(linear)h(system)h(using)f (orthogonal)i(factorizations)-210 70 y(EV)178 b(compute)31 b(all)g(eigen)m(v)-5 b(alues)32 b(and/or)f(eigen)m(v)m(ectors)-210 183 y(EVD)109 b(compute)31 b(all)g(eigen)m(v)-5 b(alues)32 b(and,)e(optionally)-8 b(,)33 b(eigen)m(v)m(ectors)g(\(using)d(divide)g (and)g(conquer)g(algorithm\))-210 296 y(EVX)110 b(compute)31 b(selected)h(eigen)m(v)-5 b(alues)32 b(and)e(eigen)m(v)m(ectors)-210 409 y(GVX)101 b(compute)31 b(selected)h(generalized)g(eigen)m(v)-5 b(alues)32 b(and/or)e(generalized)i(eigen)m(v)m(ectors)-210 522 y(SVD)120 b(compute)31 b(the)f(SVD)h(and/or)f(singular)g(v)m (ectors)-69 734 y(The)g(driv)m(er)g(routines)g(pro)m(vided)g(in)g (ScaLAP)-8 b(A)m(CK)31 b(are)g(indicated)g(b)m(y)f(the)h(follo)m(wing)h (table:)2389 881 y(HE)100 b(HB)288 994 y(GE)g(GG)g(DB)h(GB)f(DT)g(GT)g (PO)f(PB)h(PT)105 b(SY)113 b(SB)c(ST)-19 1107 y(SV)220 b Fo(\002)404 b(\002)163 b(\002)h(\002)400 b(\002)158 b(\002)d(\002)-19 1219 y Fw(SVX)d Fo(\002)1344 b(\002)-19 1332 y Fw(LS)231 b Fo(\002)-19 1445 y Fw(EV)2308 b Fo(\002)-19 1558 y Fw(EVD)2239 b Fo(\002)-19 1671 y Fw(EVX)h Fo(\002)-19 1784 y Fw(GVX)2231 b Fo(\002)-19 1897 y Fw(SVD)151 b Fo(\002)1545 4989 y Fw(31)p eop end %%Page: 32 32 TeXDict begin 32 31 bop -210 354 a Fc(App)5 b(endix)64 b(B)-210 769 y Fs(ScaLAP)-19 b(A)-6 b(CK)77 b(Auxiliary)f(Routines)-69 1214 y Fw(This)35 b(app)s(endix)g(lists)i(all)g(of)f(the)g(auxiliary)h (routines)f(\(except)i(for)e(the)g(BLAS)g(and)f(LAP)-8 b(A)m(CK\))-210 1327 y(that)34 b(are)h(called)g(from)e(the)h(ScaLAP)-8 b(A)m(CK)34 b(routines.)51 b(These)34 b(routines)f(are)i(found)d(in)h (the)h(directory)-210 1440 y Fr(SCALAPACK/SRC)p Fw(.)f(Routines)k(sp)s (eci\014ed)f(with)h(a)g(\014rst)f(c)m(haracter)i(P)f(follo)m(w)m(ed)h (b)m(y)f(an)g(underscore)f(as)-210 1553 y(the)k(second)g(c)m(haracter)i (are)e(a)m(v)-5 b(ailable)43 b(in)c(all)i(four)f(data)g(t)m(yp)s(es)g (\(S,)g(D,)h(C,)f(and)f(Z\),)h(except)h(those)-210 1666 y(mark)m(ed)28 b(\(real\),)i(for)e(whic)m(h)g(the)g(\014rst)f(c)m (haracter)j(ma)m(y)e(b)s(e)f(`S')h(or)g(`D',)h(and)f(those)g(mark)m(ed) g(\(complex\),)-210 1779 y(for)i(whic)m(h)g(the)h(\014rst)e(c)m (haracter)j(ma)m(y)f(b)s(e)f(`C')h(or)f(`Z'.)-210 1892 y(F)-8 b(unctions)31 b(for)f(computing)h(norms:)-210 2079 y(P)p -143 2079 28 4 v 33 w(LANGE)59 b(General)31 b(matrix)-210 2192 y(P)p -143 2192 V 33 w(LANHE)62 b(\(complex\))32 b(Hermitian)f(matrix)-210 2305 y(P)p -143 2305 V 33 w(LANHS)73 b(Upp)s(er)29 b(Hessen)m(b)s(erg)h(matrix)-210 2418 y(P)p -143 2418 V 33 w(LANSY)73 b(Symmetric)30 b(matrix)-210 2531 y(P)p -143 2531 V 33 w(LANTR)59 b(T)-8 b(rap)s(ezoidal)31 b(matrix)-210 2718 y(Lev)m(el)h(2)e(BLAS)h(v)m(ersions)f(of)h(the)f (blo)s(c)m(k)h(routines:)-210 2906 y(P)p -143 2906 V 33 w(GEBD2)74 b(reduce)30 b(a)h(general)g(matrix)g(to)g(bidiagonal)g (form)-210 3019 y(P)p -143 3019 V 33 w(GEHD2)70 b(reduce)30 b(a)h(square)f(matrix)h(to)g(upp)s(er)d(Hessen)m(b)s(erg)i(form)-210 3132 y(P)p -143 3132 V 33 w(GELQ2)79 b(compute)30 b(an)h(LQ)f (factorization)j(without)d(piv)m(oting)-210 3245 y(P)p -143 3245 V 33 w(GEQL2)79 b(compute)30 b(a)h(QL)f(factorization)j (without)d(piv)m(oting)-210 3358 y(P)p -143 3358 V 33 w(GEQR2)69 b(compute)30 b(a)h(QR)f(factorization)j(without)e(piv)m (oting)-210 3471 y(P)p -143 3471 V 33 w(GER)m(Q2)72 b(compute)30 b(an)h(R)m(Q)f(factorization)j(without)e(piv)m(oting)-210 3584 y(P)p -143 3584 V 33 w(GETF2)82 b(compute)30 b(the)h(LU)f (factorization)j(of)e(a)g(general)g(matrix)-210 3696 y(P)p -143 3696 V 33 w(HETD2)75 b(\(complex\))32 b(reduce)e(a)g (Hermitian)i(matrix)e(to)h(real)g(tridiagonal)h(form)-210 3809 y(P)p -143 3809 V 33 w(OR)m(G2L)77 b(\(real\))32 b(generate)f(the)g(orthogonal)h(matrix)e(from)g(PxGEQLF)-210 3922 y(P)p -143 3922 V 33 w(OR)m(G2R)67 b(\(real\))32 b(generate)f(the)g(orthogonal)h(matrix)e(from)g(PxGEQRF)-210 4035 y(P)p -143 4035 V 33 w(OR)m(GL2)77 b(\(real\))32 b(generate)f(the)g(orthogonal)h(matrix)e(from)g(PxGEQLF)-210 4148 y(P)p -143 4148 V 33 w(OR)m(GR2)67 b(\(real\))32 b(generate)f(the)g(orthogonal)h(matrix)e(from)g(PxGER)m(QF)-210 4261 y(P)p -143 4261 V 33 w(ORM2L)62 b(\(real\))32 b(m)m(ultiply)e(b)m (y)g(the)h(orthogonal)h(matrix)e(from)g(PxGEQLF)-210 4374 y(P)p -143 4374 V 33 w(ORM2R)52 b(\(real\))32 b(m)m(ultiply)e(b)m (y)g(the)h(orthogonal)h(matrix)e(from)g(PxGEQRF)-210 4487 y(P)p -143 4487 V 33 w(ORML2)62 b(\(real\))32 b(m)m(ultiply)e(b)m (y)g(the)h(orthogonal)h(matrix)e(from)g(PxGELQF)-210 4600 y(P)p -143 4600 V 33 w(ORMR2)52 b(\(real\))32 b(m)m(ultiply)e(b)m (y)g(the)h(orthogonal)h(matrix)e(from)g(PxGER)m(QF)-210 4713 y(P)p -143 4713 V 33 w(ORMR3)52 b(\(real\))32 b(m)m(ultiply)e(b)m (y)g(the)h(orthogonal)h(matrix)e(from)g(PxTZRZF)1545 4989 y(32)p eop end %%Page: 33 33 TeXDict begin 33 32 bop -210 -269 a Fw(P)p -143 -269 28 4 v 33 w(POTF2)82 b(compute)30 b(the)h(Cholesky)f(factorization)j (of)e(a)g(p)s(ositiv)m(e)g(de\014nite)f(matrix)-210 -156 y(P)p -143 -156 V 33 w(SYGS2)99 b(\(real\))32 b(reduce)e(a)g (symmetric-de\014nite)h(generalized)h(eigen)m(v)-5 b(alue)33 b(problem)c(to)-210 -43 y(P)p -143 -43 V 33 w(SYTD2)86 b(\(real\))32 b(reduce)e(a)g(symmetric)h(matrix)g(to)g(tridiagonal)h (form)-210 70 y(P)p -143 70 V 33 w(TR)-8 b(TI2)116 b(compute)30 b(the)h(in)m(v)m(erse)g(of)g(a)g(triangular)f(matrix)-210 183 y(P)p -143 183 V 33 w(UNG2L)76 b(\(complex\))32 b(generate)f(the)g (unitary)f(matrix)h(from)f(PxGEQLF)-210 296 y(P)p -143 296 V 33 w(UNG2R)66 b(\(complex\))32 b(generate)f(the)g(unitary)f (matrix)h(from)f(PxGEQRF)-210 409 y(P)p -143 409 V 33 w(UNGL2)76 b(\(complex\))32 b(generate)f(the)g(unitary)f(matrix)h(from) f(PxGEQLF)-210 522 y(P)p -143 522 V 33 w(UNGR2)66 b(\(complex\))32 b(generate)f(the)g(unitary)f(matrix)h(from)f(PxGER)m(QF)-210 635 y(P)p -143 635 V 33 w(UNM2L)64 b(\(complex\))32 b(m)m(ultiply)e(b)m (y)g(the)h(unitary)f(matrix)h(from)f(PxGEQLF)-210 748 y(P)p -143 748 V 33 w(UNM2R)54 b(\(complex\))32 b(m)m(ultiply)e(b)m(y)g (the)h(unitary)f(matrix)h(from)f(PxGEQRF)-210 860 y(P)p -143 860 V 33 w(UNML2)64 b(\(complex\))32 b(m)m(ultiply)e(b)m(y)g(the)h (unitary)f(matrix)h(from)f(PxGELQF)-210 973 y(P)p -143 973 V 33 w(UNMR2)54 b(\(complex\))32 b(m)m(ultiply)e(b)m(y)g(the)h (unitary)f(matrix)h(from)f(PxGER)m(QF)-210 1086 y(P)p -143 1086 V 33 w(UNMR3)54 b(\(complex\))32 b(m)m(ultiply)e(b)m(y)g(the) h(unitary)f(matrix)h(from)f(PxTZRZF)-210 1274 y(Other)g(ScaLAP)-8 b(A)m(CK)31 b(auxiliary)g(routines:)-210 1461 y(P)p -143 1461 V 33 w(LABAD)438 b(\(real\))31 b(returns)e(square)i(ro)s(ot)f(of)h (under\015o)m(w)e(and)g(o)m(v)m(er\015o)m(w)j(if)f(exp)s(onen)m(t)f (range)h(is)f(large)-210 1574 y(P)p -143 1574 V 33 w(LABRD)439 b(reduce)30 b(NB)h(ro)m(ws)f(or)g(columns)g(of)h(a)g(matrix)f(to)h(upp) s(er)e(or)h(lo)m(w)m(er)i(bidiagonal)f(form)-210 1687 y(P)p -143 1687 V 33 w(LA)m(CGV)437 b(\(complex\))31 b(conjugates)h(a)f(complex)g(v)m(ector)h(of)e(length)h(n)-210 1800 y(P)p -143 1800 V 33 w(LA)m(CHKIEEE)218 b(\(real\))31 b(p)s(erforms)e(a)i(simple)f(c)m(hec)m(k)i(for)e(the)h(features)g(of)f (the)h(IEEE)e(standard)-210 1913 y(P)p -143 1913 V 33 w(LA)m(CON)437 b(estimate)32 b(the)e(norm)g(of)g(a)h(matrix)g(for)f (use)g(in)g(condition)h(estimation)-210 2026 y(P)p -143 2026 V 33 w(LA)m(CONSB)322 b(\(real\))31 b(lo)s(oks)g(for)f(t)m(w)m(o)i (consecutiv)m(e)g(small)f(sub)s(diagonal)f(elemen)m(ts)-210 2139 y(P)p -143 2139 V 33 w(LA)m(CP2)469 b(copies)31 b(all)g(or)f(part)h(of)f(a)h(distributed)e(matrix)i(to)g(another)g (distributed)e(matrix)-210 2252 y(P)p -143 2252 V 33 w(LA)m(CP3)469 b(\(real\))31 b(copies)g(from)f(a)h(global)h(parallel)f (arra)m(y)g(in)m(to)h(a)e(lo)s(cal)649 2365 y(replicated)h(arra)m(y)g (or)f(vice)i(v)m(ersa.)-210 2478 y(P)p -143 2478 V 33 w(LA)m(CPY)446 b(cop)m(y)31 b(all)g(or)f(part)h(of)f(a)h(distributed)e (matrix)i(to)g(another)g(distributed)e(matrix)-210 2591 y(P)p -143 2591 V 33 w(LAED0)463 b(Used)30 b(b)m(y)g(PxSTEDC.)-210 2704 y(P)p -143 2704 V 33 w(LAED1)463 b(\(real\))31 b(Used)g(b)m(y)f (PxSTEDC.)-210 2816 y(P)p -143 2816 V 33 w(LAED2)463 b(\(real\))31 b(Used)g(b)m(y)f(PxSTEDC.)-210 2929 y(P)p -143 2929 V 33 w(LAED3)463 b(\(real\))31 b(Used)g(b)m(y)f(PxSTEDC.)-210 3042 y(P)p -143 3042 V 33 w(LAEDZ)452 b(\(real\))31 b(Used)g(b)m(y)f (PxSTEDC.)-210 3155 y(P)p -143 3155 V 33 w(LAEVSWP)303 b(mo)m(v)m(es)31 b(the)g(eigen)m(v)m(ectors)i(from)d(where)g(they)h (are)f(computed)g(to)i(a)649 3268 y(standard)d(blo)s(c)m(k)i(cyclic)h (arra)m(y)-210 3381 y(P)p -143 3381 V 33 w(LAHEF)450 b(\(complex\))31 b(compute)g(part)f(of)h(the)f(diagonal)i(piv)m(oting)g (factorization)h(of)d(a)h(Hermitian)649 3494 y(matrix)-210 3607 y(P)p -143 3607 V 33 w(LAHQR)433 b(Find)29 b(the)i(Sc)m(h)m(ur)f (factorization)j(of)d(a)h(Hessen)m(b)s(erg)g(matrix)f(\(mo)s(di\014ed)g (v)m(ersion)h(of)649 3720 y(HQR)f(from)g(EISP)-8 b(A)m(CK\))-210 3833 y(P)p -143 3833 V 33 w(LAHRD)435 b(reduce)30 b(NB)h(columns)f(of)g (a)h(general)g(matrix)g(to)g(Hessen)m(b)s(erg)g(form)-210 3946 y(P)p -143 3946 V 33 w(LAIECTB)348 b(\(real\))31 b(computes)g(the)f(n)m(um)m(b)s(er)f(of)i(negativ)m(e)i(eigen)m(v)-5 b(alues)32 b(in)e(\()p Fk(A)21 b Fo(\000)f Fw(\006)p Fk(I)7 b Fw(\))649 4058 y(where)29 b(the)i(sign)f(bit)h(is)f(assumed)g (to)h(b)s(e)f(bit)g(32.)-210 4171 y(P)p -143 4171 V 33 w(LAIECTL)355 b(\(real\))31 b(computes)g(the)f(n)m(um)m(b)s(er)f(of)i (negativ)m(e)i(eigen)m(v)-5 b(alues)32 b(in)e(\()p Fk(A)21 b Fo(\000)f Fw(\006)p Fk(I)7 b Fw(\))649 4284 y(where)29 b(the)i(sign)f(bit)h(is)f(assumed)g(to)h(b)s(e)f(bit)g(64.)p -205 4397 V -177 4397 a(LANV2)520 b(\(complex\))31 b(computes)g(the)f (Sc)m(h)m(ur)g(factorization)j(of)e(a)g(real)g(2-b)m(y-2)g (nonsymmetric)g(matrix)-210 4510 y(P)p -143 4510 V 33 w(LAPIV)476 b(applies)30 b(p)s(erm)m(utation)h(matrix)f(to)h(a)g (general)h(distributed)d(matrix)-210 4623 y(P)p -143 4623 V 33 w(LAPV2)464 b(piv)m(oting)-210 4736 y(P)p -143 4736 V 33 w(LA)m(QGE)438 b(equilibrate)31 b(a)g(general)g(matrix)1545 4989 y(33)p eop end %%Page: 34 34 TeXDict begin 34 33 bop -210 -269 a Fw(P)p -143 -269 28 4 v 33 w(LA)m(QSY)452 b(equilibrate)31 b(a)g(symmetric)f(matrix)-210 -156 y(P)p -143 -156 V 33 w(LARED1D)327 b(\(real\))31 b(Redistributes)f(an)h(arra)m(y)g(assuming)e(that)i(the)g(input)649 -43 y(arra)m(y)-8 b(,)31 b(BYCOL,)f(is)h(distributed)e(across)i(ro)m (ws)g(and)e(that)i(all)649 70 y(pro)s(cess)f(columns)g(con)m(tain)h (the)g(same)g(cop)m(y)g(of)f(BYCOL.)-210 183 y(P)p -143 183 V 33 w(LARED2D)327 b(Redistributes)30 b(an)g(arra)m(y)h(assuming)f (that)h(the)f(input)g(arra)m(y)-8 b(,)649 296 y(BYR)m(O)m(W,)32 b(is)e(distributed)g(across)g(columns)h(and)e(that)i(all)h(pro)s(cess) 649 409 y(ro)m(ws)e(con)m(tain)i(the)e(same)h(cop)m(y)g(of)g(BYR)m(O)m (W.)h(The)e(output)g(arra)m(y)-8 b(,)649 522 y(BY)g(ALL,)31 b(will)g(b)s(e)e(iden)m(tical)k(on)d(all)h(pro)s(cesses.)-210 635 y(P)p -143 635 V 33 w(LARF)513 b(apply)30 b(\(m)m(ultiply)h(b)m (y\))f(an)g(elemen)m(tary)i(re\015ector)f(to)h(a)e(general)649 748 y(rectangular)h(matrix.)-210 860 y(P)p -143 860 V 33 w(LARFB)449 b(apply)30 b(\(m)m(ultiply)h(b)m(y\))f(a)h(blo)s(c)m(k)g (re\015ector)g(or)f(its)h(transp)s(ose/)649 973 y(conjugate-transp)s (ose)g(to)g(a)g(general)g(rectangular)h(matrix.)-210 1086 y(P)p -143 1086 V 33 w(LARF)m(C)450 b(\(complex\))31 b(apply)f(\(m)m(ultiply)h(b)m(y\))g(the)g(conjugate-transp)s(ose)649 1199 y(of)f(an)g(elemen)m(tary)i(re\015ector)f(to)g(a)g(general)h (matrix.)-210 1312 y(P)p -143 1312 V 33 w(LARF)m(G)445 b(generate)31 b(an)g(elemen)m(tary)h(re\015ector)f(\(Householder)f (matrix\).)-210 1425 y(P)p -143 1425 V 33 w(LARFT)447 b(form)29 b(the)i(triangular)g(factor)g(of)g(a)f(blo)s(c)m(k)h (re\015ector)-210 1538 y(P)p -143 1538 V 33 w(LARZ)516 b(apply)30 b(\(m)m(ultiply)h(b)m(y\))f(an)g(elemen)m(tary)i (re\015ector)f(as)g(returned)e(b)m(y)649 1651 y(P)p 716 1651 V 32 w(TZRZF)h(to)h(a)f(general)i(matrix.)-210 1764 y(P)p -143 1764 V 33 w(LARZB)452 b(apply)30 b(\(m)m(ultiply)h(b)m(y\))f (a)h(blo)s(c)m(k)g(re\015ector)g(or)f(its)h(transp)s(ose/)649 1877 y(conjugate)g(transp)s(ose)f(as)h(returned)e(b)m(y)h(P)p 2142 1877 V 33 w(TZRZF)f(to)i(a)g(general)g(matrix.)-210 1990 y(P)p -143 1990 V 33 w(LARZC)450 b(\(complex\))31 b(apply)f(\(m)m(ultiply)h(b)m(y\))g(the)g(conjugate)g(transp)s(ose)f (of)649 2102 y(an)g(elemen)m(tary)i(re\015ector)f(as)f(returned)g(b)m (y)g(P)p 2268 2102 V 32 w(TZRZF)g(to)h(a)649 2215 y(general)g(matrix.) -210 2328 y(P)p -143 2328 V 33 w(LARZT)450 b(form)29 b(the)i(triangular)g(factor)g(of)g(a)f(blo)s(c)m(k)h(re\015ector)g(as)g (returned)649 2441 y(b)m(y)f(P)p 842 2441 V 32 w(TZRZF.)-210 2554 y(P)p -143 2554 V 33 w(LASCL)465 b(m)m(ultiplies)31 b(a)g(general)g(rectangular)g(matrix)g(b)m(y)f(a)h(real)g(scalar)g (CTO/CFR)m(OM)-210 2667 y(P)p -143 2667 V 33 w(LASE2)-210 2780 y(P)p -143 2780 V 33 w(LASET)460 b(initializes)32 b(a)f(matrix)g(to)g(BET)-8 b(A)31 b(on)f(the)g(diagonal)i(and)e(ALPHA)g (on)649 2893 y(the)g(o\013-diagonals)-210 3006 y(P)p -143 3006 V 33 w(LASMSUB)322 b(\(real\))31 b(lo)s(oks)g(for)f(a)h (small)g(sub)s(diagonal)f(elemen)m(t)i(from)e(the)g(b)s(ottom)649 3119 y(of)g(the)h(matrix)f(that)h(it)g(can)g(safely)g(set)g(to)g(zero.) -210 3232 y(P)p -143 3232 V 33 w(LASNBT)390 b(computes)30 b(the)h(p)s(osition)f(of)h(the)f(sign)h(bit)f(of)h(a)f(double)g (precision)649 3344 y(\015oating)h(p)s(oin)m(t)f(n)m(um)m(b)s(er)-210 3457 y(P)p -143 3457 V 33 w(LASR)-8 b(T)-210 3570 y(P)p -143 3570 V 33 w(LASSQ)466 b(Compute)30 b(a)g(scaled)h(sum)f(of)g (squares)g(of)h(the)f(elemen)m(ts)i(of)f(a)g(v)m(ector)-210 3683 y(P)p -143 3683 V 33 w(LASWP)433 b(P)m(erform)30 b(a)h(series)f(of)h(ro)m(w)f(in)m(terc)m(hanges)-210 3796 y(P)p -143 3796 V 33 w(LA)-8 b(TRA)446 b(computes)30 b(the)h(trace)g(of)g(a)g(distributed)e(matrix)-210 3909 y(P)p -143 3909 V 33 w(LA)-8 b(TRD)445 b(reduce)30 b(NB)h(ro)m(ws)f (and)g(columns)g(of)g(a)h(real)g(symmetric)g(or)f(complex)h(Hermitian) 649 4022 y(matrix)f(to)h(tridiagonal)h(form)-210 4135 y(P)p -143 4135 V 33 w(LA)-8 b(TRS)463 b(solv)m(e)31 b(a)g(triangular)g(system)f(with)g(scaling)i(to)f(prev)m(en)m(t)g(o)m (v)m(er\015o)m(w)-210 4248 y(P)p -143 4248 V 33 w(LA)-8 b(TRZ)458 b(reduces)30 b(an)g(upp)s(er)e(trap)s(ezoidal)k(matrix)e(to)h (upp)s(er)e(triangular)i(form)-210 4361 y(P)p -143 4361 V 33 w(LA)m(UU2)461 b(Un)m(blo)s(c)m(k)m(ed)31 b(v)m(ersion)g(of)f(P)p 1572 4361 V 33 w(LA)m(UUM)-210 4474 y(P)p -143 4474 V 33 w(LA)m(UUM)423 b(Compute)30 b(the)g(pro)s(duct)f(U*U')j(or)e(L'*L)h (\(blo)s(c)m(k)m(ed)g(v)m(ersion\))-210 4586 y(P)p -143 4586 V 33 w(LA)-10 b(WIL)466 b(forms)29 b(the)i(Wilkinson)g(transform) 1545 4989 y(34)p eop end %%Page: 35 35 TeXDict begin 35 34 bop -210 395 a Fs(Bibliograph)-6 b(y)-165 840 y Fw([1])47 b Fa(L.)31 b(S.)g(Bla)n(ckf)n(ord,)f(J.)g (Choi,)j(A.)e(Clear)-6 b(y,)31 b(E.)g(D'Azevedo,)g(J.)f(Demmel,)h(I.)g (Dhillon,)-23 953 y(J.)77 b(Dongarra,)88 b(S.)78 b(Hammarling,)88 b(G.)78 b(Henr)-6 b(y,)88 b(A.)78 b(Petitet,)88 b(K.)78 b(St)-6 b(anley,)-23 1066 y(D.)35 b(W)-11 b(alker,)32 b(and)i(R.)g(C.)h(Whaley)p Fw(,)28 b Fn(Sc)-5 b(aLAP)e(A)n(CK)32 b(Users')g(Guide)p Fw(,)f(So)s(ciet)m(y)h(for)e(Industrial)-23 1179 y(and)g(Applied)g(Mathematics,)i(Philadelphia,)f(P)-8 b(A,)31 b(1997.)-165 1362 y([2])47 b Fa(J.)40 b(Choi,)i(J.)e(Dongarra,) h(S.)f(Ostr)n(oucho)n(v,)f(A.)h(Petitet,)i(D.)e(W)-11 b(alker,)41 b(and)e(R.)h(C.)-23 1475 y(Whaley)p Fw(,)31 b Fn(A)j(pr)-5 b(op)g(osal)38 b(for)d(a)g(set)f(of)h(p)-5 b(ar)g(al)5 b(lel)37 b(b)-5 b(asic)34 b(line)-5 b(ar)36 b(algebr)-5 b(a)36 b(subpr)-5 b(o)g(gr)g(ams)p Fw(,)36 b(Computer)-23 1587 y(Science)k(Dept.)g(Tec)m(hnical)g(Rep)s(ort)f (CS-95-292,)44 b(Univ)m(ersit)m(y)c(of)f(T)-8 b(ennessee,)42 b(Kno)m(xville,)h(TN,)-23 1700 y(Ma)m(y)31 b(1995.)43 b(\(Also)31 b(LAP)-8 b(A)m(CK)31 b(W)-8 b(orking)31 b(Note)h(#100\).) -165 1883 y([3])p -23 1870 191 4 v 238 w(,)e Fn(The)i(design)f(and)i (implementation)h(of)e(the)g(Sc)-5 b(aLAP)e(A)n(CK)31 b(LU,)g(QR,)h(and)h(Cholesky)f(fac-)-23 1996 y(torization)42 b(r)-5 b(outines)p Fw(,)42 b(Scien)m(ti\014c)e(Programming,)h(5)e (\(1996\),)k(pp.)38 b(173{184.)68 b(\(Also)40 b(LAP)-8 b(A)m(CK)-23 2109 y(W)g(orking)31 b(Note)h(#80\).)-165 2291 y([4])47 b Fa(J.)41 b(Dongarra)f(and)g(R.)h(C.)h(Whaley)p Fw(,)37 b Fn(A)h(user's)g(guide)h(to)g(the)g(BLA)n(CS)e(v1.1)p Fw(,)j(Computer)-23 2404 y(Science)g(Dept.)g(Tec)m(hnical)g(Rep)s(ort)f (CS-95-281,)44 b(Univ)m(ersit)m(y)c(of)f(T)-8 b(ennessee,)42 b(Kno)m(xville,)h(TN,)-23 2517 y(1995.)f(\(Also)32 b(LAP)-8 b(A)m(CK)30 b(W)-8 b(orking)32 b(Note)f(#94\).)-165 2699 y([5])47 b Fa(J.)33 b(J.)h(Dongarra,)f(J.)h(Du)g(Cr)n(oz,)g(I.)f(S.)h (Duff,)g(and)f(S.)g(Hammarling)p Fw(,)d Fn(A)i(set)g(of)h(Level)f(3)-23 2812 y(Basic)h(Line)-5 b(ar)33 b(Algebr)-5 b(a)33 b(Subpr)-5 b(o)g(gr)g(ams)p Fw(,)34 b(A)m(CM)d(T)-8 b(rans.)30 b(Math.)h(Soft.,)g (16)g(\(1990\),)i(pp.)d(1{17.)-165 2994 y([6])47 b Fa(J.)34 b(J.)f(Dongarra,)h(J.)f(Du)i(Cr)n(oz,)f(S.)f(Hammarling,)h(and)f(R.)h (J.)g(Hanson)p Fw(,)29 b Fn(A)n(n)j(extende)-5 b(d)-23 3107 y(set)29 b(of)g(F)n(OR)-7 b(TRAN)29 b(b)-5 b(asic)29 b(line)-5 b(ar)30 b(algebr)-5 b(a)30 b(subr)-5 b(outines)p Fw(,)29 b(A)m(CM)e(T)-8 b(rans.)26 b(Math.)h(Soft.,)h(14)f(\(1988\),) -23 3220 y(pp.)i(1{17.)-165 3403 y([7])47 b Fa(M.)h(P.)g(I.)g(F)m(or)n (um)p Fw(,)e Fn(MPI:)e(A)g(message)h(p)-5 b(assing)46 b(interfac)-5 b(e)45 b(standar)-5 b(d)p Fw(,)50 b(In)m(ternational)44 b(Jour-)-23 3515 y(nal)54 b(of)g(Sup)s(ercomputer)e(Applications)j(and) e(High)i(P)m(erformance)f(Computing,)60 b(8)54 b(\(1994\),)-23 3628 y(pp.)73 b(3{4.)172 b(Sp)s(ecial)74 b(issue)g(on)g(MPI.)g(Also)g (a)m(v)-5 b(ailable)77 b(electronically)-8 b(,)88 b(the)74 b(URL)g(is)-23 3741 y Fr(ftp://www.netlib.org/mpi)o(/mp)o(i-re)o(port)o (.ps)41 b Fw(.)-165 3924 y([8])47 b Fa(A.)39 b(Geist,)h(A.)f(Beguelin,) h(J.)f(Dongarra,)g(W.)g(Jiang,)h(R.)e(Manchek,)h(and)g(V.)g(Sun-)-23 4037 y(deram)p Fw(,)31 b Fn(PVM:)i(Par)-5 b(al)5 b(lel)35 b(Virtual)g(Machine.)f(A)f(Users')h(Guide)h(and)g(T)-7 b(utorial)35 b(for)g(Networke)-5 b(d)-23 4149 y(Par)g(al)5 b(lel)34 b(Computing)p Fw(,)e(MIT)e(Press,)g(Cam)m(bridge,)g(MA,)h (1994.)-165 4332 y([9])47 b Fa(C.)30 b(L.)f(La)-8 b(wson,)29 b(R.)g(J.)g(Hanson,)g(D.)g(Kincaid,)h(and)f(F.)g(T.)g(Kr)n(ogh)p Fw(,)d Fn(Basic)i(line)-5 b(ar)30 b(algebr)-5 b(a)-23 4445 y(subpr)g(o)g(gr)g(ams)36 b(for)d(Fortr)-5 b(an)35 b(usage)p Fw(,)c(A)m(CM)g(T)-8 b(rans.)30 b(Math.)h(Soft.,)g(5)f (\(1979\),)k(pp.)29 b(308{323.)-210 4627 y([10])47 b Fa(R.)38 b(C.)i(Whaley)p Fw(,)34 b Fn(Basic)j(line)-5 b(ar)37 b(algebr)-5 b(a)38 b(c)-5 b(ommunic)g(ation)39 b(subpr)-5 b(o)g(gr)g(ams:)53 b(A)n(nalysis)37 b(and)g(im-)-23 4740 y(plementation)e(acr)-5 b(oss)35 b(multiple)e(p)-5 b(ar)g(al)5 b(lel)36 b(ar)-5 b(chite)g(ctur)g(es)p Fw(,)33 b(Computer)d(Science)i(Dept.)f(T)-8 b(ec)m(hnical)1545 4989 y(35)p eop end %%Page: 36 36 TeXDict begin 36 35 bop -23 -269 a Fw(Rep)s(ort)26 b(CS-94-234,)k(Univ) m(ersit)m(y)e(of)e(T)-8 b(ennessee,)28 b(Kno)m(xville,)h(TN,)e(Ma)m(y)h (1994.)36 b(\(Also)28 b(LAP)-8 b(A)m(CK)-23 -156 y(W)g(orking)31 b(Note)h(73\).)-210 32 y([11])47 b Fa(S.)37 b(Bla)n(ckf)n(ord)e(and)h (J.)h(Dongarra)p Fw(,)c Fn(Quick)g(Instal)5 b(lation)38 b(Guide)d(for)g(LAP)-7 b(A)n(CK)34 b(on)h(Unix)-23 145 y(Systems)23 b Fw(Computer)f(Science)h(Dept.)h(Tec)m(hnical)f(Rep)s (ort)g(CS-94-249,)j(Univ)m(ersit)m(y)e(of)e(T)-8 b(ennessee,)-23 258 y(Kno)m(xville,)32 b(TN,)e(Septem)m(b)s(er)g(1994.)42 b(\(Also)32 b(LAP)-8 b(A)m(CK)30 b(W)-8 b(orking)32 b(Note)f(81\).)1545 4989 y(36)p eop end %%Trailer userdict /end-hook known{end-hook}if %%EOF scalapack-1.8.0/INSTALL/SLmake.SUN4SOL20000640000175000017500000000575710604316307016731 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = SUN4SOL2 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = /usr/local/lib # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/local/mpich/lib/libmpich.a -lnsl -lsocket BLACSFINIT = $(BLACSdir)/libmpiblacsF77init-p4.a BLACSCINIT = $(BLACSdir)/libmpiblacsCinit-p4.a BLACSLIB = $(BLACSdir)/libmpiblacs-p4.a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a -lnsl -lsocket #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = mpif77 CC = mpicc NOOPT = -f #NOOPT = -f -mt F77FLAGS = $(NOOPT) -dalign -native -xO5 -xarch=v8plusa DRVOPTS = $(F77FLAGS) CCFLAGS = -xO4 SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a #BLASLIB = /usr/local/lib/libf77blas.a /usr/local/lib/libatlas.a BLASLIB = -xlic_lib=sunperf #BLASLIB = -xlic_lib=sunperf_mt LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.HPPA0000640000175000017500000000561010604316307016254 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = HPPA # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/lib/mpi/build/hpux/ch_p4/lib/libmpich.a -lV3 -Wl,-B,immediate BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = F77FLAGS = -O $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DNoChange $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lblas LAPACKLIB = -llapack # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.T3D0000640000175000017500000000610610604316307016117 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = T3D # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; uncomment and tailor to your system if using MPIBLACS # Will need to comment out the default native BLACS setup below below # #USEMPI = -DUsingMpiBlacs #SMPLIB = /mpp/lib/libmpi.a #BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a #BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(home)/TESTING # # PVMBLACS setup, comment out next 6 lines if using MPI # USEMPI = SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a BLACSFINIT = BLACSCINIT = BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) TESTINGdir = $(home)/TESTING # # system primitive BLACS setup, comment out if using MPI # SMPLIB = USEMPI = BLACSFINIT = BLACSCINIT = BLACSLIB = CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = /mpp/bin/cft77 CC = /mpp/bin/cc NOOPT = F77FLAGS = -dp DRVOPTS = $(F77FLAGS) CCFLAGS = -Tcray-t3d SRCFLAG = F77LOADER = /mpp/bin/mppldr CCLOADER = $(CC) F77LOADFLAGS = -O MSHARED CCLOADFLAGS = -Tcray-t3d # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DUpCase -DT3D $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.POWER20000640000175000017500000000557010604316307016507 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = POWER2 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, comment out next 6 lines if using MPI # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = xlf CC = xlc NOOPT = F77FLAGS = -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = -qnoansialias F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DNoChange $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lesslp2 LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.LINUX0000640000175000017500000000565410604316307016433 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = LINUX # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/lib/mpi/build/LINUX/ch_p4/lib/libmpich.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = mpif77 CC = mpicc NOOPT = F77FLAGS = -funroll-all-loops -O3 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = -O4 SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -Df77IsF2C -DNO_IEEE $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = /usr/lib/libblas.a LAPACKLIB = /usr/lib/liblapack.a # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.O2K0000640000175000017500000000563610604316307016127 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = O2K # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = -lmpi BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = -64 -mips4 -r12000 #NOOPT = -n32 -mips4 -r12000 F77FLAGS = -O2 $(NOOPT) DRVOPTS = $(F77FLAGS) -static CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = $(F77FLAGS) CCLOADFLAGS = $(CCFLAGS) # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lblas LAPACKLIB = -llapack # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.ALPHA0000640000175000017500000000556310604316307016360 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: Feburary 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = ALPHA # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/lib/mpi/build/alpha/ch_p4/lib/libmpich.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = F77FLAGS = -O $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ -DNO_IEEE $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lcxml LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(CBLACSLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.RS6K0000640000175000017500000000560510604316307016255 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = RS6K # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = /usr/local/lib # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/local/mpich/lib/libmpich.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, comment out next 6 lines if using MPI # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = xlf CC = xlc NOOPT = F77FLAGS = -O3 $(NOOPT) -qarch=pwr3 -qmaxmem=-1 DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = -qnoansialias F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DNoChange $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lessl LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.PCA0000640000175000017500000000564010604316307016132 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = PCA # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = -lmpi BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment the next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = -64 -mips4 -r8000 #NOOPT = -n32 -mips4 -r8000 F77FLAGS = -O2 $(NOOPT) DRVOPTS = $(F77FLAGS) -static CCFLAGS = $(F77FLAGS) SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = $(F77FLAGS) CCLOADFLAGS = $(CCFLAGS) # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lblas LAPACKLIB = =llapack # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.SX40000640000175000017500000000574210604316307016150 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = SX4 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/lib0/libmpi.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = F77FLAGS = -eb -float0 -P stack -Cvopt -Wf"-O nomsg -i -pvctl nomsg noassume vwork=stack" DRVOPTS = $(F77FLAGS) CCFLAGS = -h ansi, float0 -D_REENTRANT SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = $(F77FLAGS) CCLOADFLAGS = $(CCFLAGS) # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ -DNO_IEEE $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = /usr/lib0/libblas.a LAPACKLIB = /usr/lib0/liblapack.a # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.SUNMP0000640000175000017500000000600610604316307016426 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: March 20, 1995 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = SUNMP # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /usr/lib/mpi/build/solaris/ch_shmem/lib/libmpich.a -lnsl -lsocket -lthread BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, comment out next 6 lines if using MPI # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a -lnsl -lsocket -lthread #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f77 CC = cc NOOPT = -f #NOOPT = -f -mt F77FLAGS = $(NOOPT) -dalign -native -xO5 -xarch=v8plusa DRVOPTS = $(F77FLAGS) CCFLAGS = -xO4 SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DAdd_ $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/scalapack_$(PLAT).a BLASLIB = -xlic_lib=sunperf #BLASLIB = -xlic_lib=sunperf_mt LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.T3E0000640000175000017500000000576210604316307016127 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = T3E # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = /mpp/lib/libmpi.a BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, uncomment next 6 lines if using PVM # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) TESTINGdir = $(home)/TESTING # # system primitive BLACS setup, comment out if using MPI # SMPLIB = USEMPI = BLACSFINIT = BLACSCINIT = BLACSLIB = CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = f90 CC = cc NOOPT = F77FLAGS = -dp -O3 DRVOPTS = $(F77FLAGS) CCFLAGS = -g -O3 SRCFLAG = F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DUpCase -DT3E $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = echo # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/INSTALL/SLmake.POWER30000640000175000017500000000556610604316307016515 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: SLmake.inc # # Purpose: Top-level Definitions # # Creation date: February 15, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ # SHELL = /bin/sh # # The complete path to the top level of ScaLAPACK directory, usually # $(HOME)/SCALAPACK # home = $(HOME)/SCALAPACK # # The platform identifier to suffix to the end of library names # PLAT = POWER3 # # BLACS setup. All version need the debug level (0 or 1), # and the directory where the BLACS libraries are # BLACSDBGLVL = 0 BLACSdir = $(HOME)/BLACS/LIB # # MPI setup; tailor to your system if using MPIBLACS # Will need to comment out these 6 lines if using PVM # USEMPI = -DUsingMpiBlacs SMPLIB = BLACSFINIT = $(BLACSdir)/blacsF77init_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSCINIT = $(BLACSdir)/blacsCinit_MPI-$(PLAT)-$(BLACSDBGLVL).a BLACSLIB = $(BLACSdir)/blacs_MPI-$(PLAT)-$(BLACSDBGLVL).a TESTINGdir = $(home)/TESTING # # PVMBLACS setup, comment out next 6 lines if using MPI # #USEMPI = #SMPLIB = $(PVM_ROOT)/lib/$(PLAT)/libpvm3.a #BLACSFINIT = #BLACSCINIT = #BLACSLIB = $(BLACSdir)/blacs_PVM-$(PLAT)-$(BLACSDBGLVL).a #TESTINGdir = $(HOME)/pvm3/bin/$(PLAT) CBLACSLIB = $(BLACSCINIT) $(BLACSLIB) $(BLACSCINIT) FBLACSLIB = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) # # The directories to find the various pieces of ScaLapack # PBLASdir = $(home)/PBLAS SRCdir = $(home)/SRC TESTdir = $(home)/TESTING PBLASTSTdir = $(TESTINGdir) TOOLSdir = $(home)/TOOLS REDISTdir = $(home)/REDIST REDISTTSTdir = $(TESTINGdir) # # The fortran and C compilers, loaders, and their flags # F77 = xlf CC = xlc NOOPT = F77FLAGS = -O3 -qarch=pwr3 -qtune=pwr3 -qmaxmem=-1 $(NOOPT) DRVOPTS = $(F77FLAGS) CCFLAGS = $(F77FLAGS) SRCFLAG = -qnoansialias F77LOADER = $(F77) CCLOADER = $(CC) F77LOADFLAGS = CCLOADFLAGS = # # C preprocessor defs for compilation # (-DNoChange, -DAdd_, -DUpCase, or -Df77IsF2C) # CDEFS = -DNoChange $(USEMPI) # # The archiver and the flag(s) to use when building archive (library) # Also the ranlib routine. If your system has no ranlib, set RANLIB = echo # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib # # The name of the libraries to be created/linked to # SCALAPACKLIB = $(home)/libscalapack.a BLASLIB = -lessl LAPACKLIB = # PBLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) PRLIBS = $(SCALAPACKLIB) $(CBLACSLIB) $(SMPLIB) RLIBS = $(SCALAPACKLIB) $(FBLACSLIB) $(CBLACSLIB) $(LAPACKLIB) $(BLASLIB) $(SMPLIB) LIBS = $(PBLIBS) scalapack-1.8.0/TESTING/0000750000175000017500000000000010605234756014442 5ustar muammarmuammarscalapack-1.8.0/TESTING/INV.dat0000640000175000017500000000071010363532303015555 0ustar muammarmuammar'ScaLAPACK, Version 1.0, Matrix Inversion Testing input file' 'MPI machine.' 'INV.out' output file name (if any) 6 device out 5 number of matrix types (next line) 'GEN' 'UTR' 'LTR' 'UPD' 'LPD' GEN, UTR, LTR, UPD, LPD 4 number of problems sizes 2 5 10 15 13 20 30 50 values of N 4 number of NB's 2 3 4 5 6 20 values of NB 4 number of process grids (ordered P & Q) 1 2 1 4 2 3 8 values of P 1 1 4 1 3 2 1 values of Q 1.0 threshold scalapack-1.8.0/TESTING/NEP.dat0000640000175000017500000000075610363532303015555 0ustar muammarmuammar'SCALAPACK NEP (Nonsymmetric Eigenvalue Problem) input file' 'MPI machine' 'NEP.out' output file name (if any) 6 device out 7 number of problems sizes 1 2 3 4 6 10 50 Probs 3 number of NB's 6 8 17 values of NB 2 number of process grids (ordered pairs of P & Q) 1 2 1 1 4 2 1 values of P 1 2 3 4 1 4 8 values of Q 20.0 threshold scalapack-1.8.0/TESTING/SEP.dat0000640000175000017500000001050510363532303015553 0ustar muammarmuammar 'ScaLAPACK Symmetric Eigensolver Test File' ' ' 'sep.out' output file name (if any) 6 device out (13 & 14 reserved for internal testing) 4 maximum number of processes 'N' disable pxsyev tests, recommended for heterogeneous systems. ' ' 'TEST 1 - test tiny matrices - different process configurations' 3 number of matrices 0 1 2 matrix size 1 number of uplo choices 'L' uplo choices 2 number of processor configurations (P, Q, NB) 1 1 values of P (NPROW) 2 1 values of Q (NPCOL) 1 1 values of NB 1 number of matrix types 8 matrix types (see pdseptst.f) 'N' perform subset tests? 80.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 2 - test tiny matrices - all requests' 2 number of matrices 0 1 1 number of uplo choices 'L' uplo choices 1 number of processor configurations (P, Q, NB) 1 values of P (NPROW) 2 values of Q (NPCOL) 1 values of NB 1 number of matrix types 8 matrix types (see pdseptst.f) 'Y' perform subset tests? 80.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 3 - test a small matrix - all types' 1 number of matrices 5 1 number of uplo choices 'L' uplo choices 2 number of processor configurations (P, Q, NB) 1 1 values of P (NPROW) 1 2 values of Q (NPCOL) 1 2 values of NB 22 number of matrix types 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 'N' perform subset tests? 250.0 Threshold -1 Absolute Tolerance ' ' 'TEST 4 - test a small matrix - all requests' 1 number of matrices 4 1 number of uplo choices 'L' uplo choices 2 number of processor configurations (P, Q, NB) 1 1 values of P (NPROW) 1 2 values of Q (NPCOL) 1 2 values of NB 2 number of matrix types 10 22 matrix types 'Y' perform subset tests? 250.0 Threshold -1 Absolute Tolerance ' ' 'TEST 5 - test a small matrix - all processor configurations' 1 number of matrices 6 matrix size 2 number of uplo choices 'L' 'U' uplo choices 13 number of processor configurations (P, Q, NB) 1 1 2 1 2 1 3 1 3 1 2 2 2 values of P (NPROW) 1 1 1 2 1 2 1 3 1 3 2 2 2 values of Q (NPCOL) 1 3 1 1 2 2 1 1 2 2 1 2 3 values of NB 1 number of matrix types 8 matrix types (see pdseptst.f) 'N' perform subset tests? 50.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 6 - test a medium matrix - hard matrix types' 1 number of matrices 21 1 number of uplo choices 'U' uplo choices 1 number of processor configurations (P, Q, NB) 2 values of P (NPROW) 2 values of Q (NPCOL) 8 values of NB 4 number of matrix types 9 10 21 22 'N' perform subset tests? 50.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 7 - test a medium matrix - all processor configurations' 1 number of matrices 27 1 number of uplo choices 'U' uplo choices 13 number of processor configurations (P, Q, NB) 1 1 2 1 2 1 3 1 3 1 2 2 2 values of P (NPROW) 1 1 1 2 1 2 1 3 1 3 2 2 2 values of Q (NPCOL) 1 3 1 1 2 2 1 1 2 2 1 2 3 values of NB 1 number of matrix types 10 matrix types (see pdseptst.f) 'N' perform subset tests? 50.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 8 - test a medium matrix - L and U' 1 number of matrices 24 2 number of uplo choices 'L' 'U' uplo choices 4 number of processor configurations (P, Q, NB) 1 1 3 1 values of P (NPROW) 1 2 1 4 values of Q (NPCOL) 1 3 1 1 values of NB 1 number of matrix types 22 matrix types (see pdseptst.f) 'N' perform subset tests? 20.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'TEST 9 - test one large matrix' 1 number of matrices 100 1 number of uplo choices 'U' uplo choices 1 number of processor configurations (P, Q, NB) 2 values of P (NPROW) 2 values of Q (NPCOL) 8 values of NB 1 number of matrix types 8 matrix types (see pdseptst.f) 'N' perform subset tests? 20.0 Threshold (* 5 for generalized tests) -1 Absolute Tolerance ' ' 'End of tests' -1 scalapack-1.8.0/TESTING/LU.dat0000640000175000017500000000100010363532303015432 0ustar muammarmuammar'SCALAPACK, LU factorization input file' 'MPI Machine' 'LU.out' output file name (if any) 6 device out 4 number of problems sizes 4 10 17 13 23 31 57 values of M 4 12 13 13 23 31 50 values of N 3 number of NB's 2 3 4 5 values of NB 3 number of NRHS's 1 3 9 28 values of NRHS 3 Number of NBRHS's 1 3 5 7 values of NBRHS 4 number of process grids (ordered pairs of P & Q) 1 2 1 4 2 3 8 values of P 1 2 4 1 3 2 1 values of Q 1.0 threshold T (T or F) Test Cond. Est. and Iter. Ref. Routines scalapack-1.8.0/TESTING/BRD.dat0000640000175000017500000000055210363532303015534 0ustar muammarmuammar'ScaLAPACK BRD input file' 'MPI machine' 'BRD.out' output file name (if any) 6 device out 4 number of problems sizes 4 10 17 13 23 31 57 values of M 4 12 13 13 23 31 50 values of N 4 number of NB's 2 3 4 5 values of NB 4 number of processor grids (ordered pairs of P & Q) 1 2 1 4 2 3 8 values of P 1 2 4 1 3 2 1 values of Q 10.0 threshold scalapack-1.8.0/TESTING/LIN/0000750000175000017500000000000010605234753015061 5ustar muammarmuammarscalapack-1.8.0/TESTING/LIN/pdinvchk.f0000640000175000017500000003404510363532303017036 0ustar muammarmuammar SUBROUTINE PDINVCHK( MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, $ FRESID, RCOND, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IASEED, JA, N DOUBLE PRECISION ANORM, FRESID, RCOND * .. * .. Array Arguments .. CHARACTER*3 MATTYP INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDINVCHK computes the scaled residual * * || sub( A ) * inv( sub( A ) ) - I || / ( || sub( A ) || * N * eps ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). to check the result * returned by the matrix inversion routines. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MATTYP (global input) CHARACTER*3 * The type of the distributed matrix to be generated: * if MATTYP = 'GEN' then GENeral matrix, * if MATTYP = 'UTR' then Upper TRiangular matrix, * if MATTYP = 'LTR' then Lower TRiangular matrix, * if MATTYP = 'UPD' then (Upper) symmetric Positive Definite, * if MATTYP = 'LPD' then (Lower) symmetric Positive Definite, * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_A, LOCc(JA+N-1)). On * entry, sub( A ) contains the distributed matrix inverse * computed by PDGETRI, PDPOTRI or PDTRTRI. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * Seed for the random generation of sub( A ). * * ANORM (global input) DOUBLE PRECISION * The 1-norm of the original matrix sub( A ). * * FRESID (global output) DOUBLE PRECISION * The inversion residual. * * RCOND (global output) DOUBLE PRECISION * The condition number of the original distributed matrix. * RCOND = || sub( A ) ||.|| sub( A )^{-1} || where ||A|| * denotes the 1-norm of A. * * WORK (local workspace) DOUBLE PRECISION array, dimension * MAX(2*LOCr(N_A+MOD(IA-1,MB_A))*MB_A, LDW) * where LDW is the workspace requirement for the norm computa- * tions, see PDLANGE, PDLANSY and PDLANTR. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER AFORM, DIAG, UPLO INTEGER ICTXT, ICURCOL, ICURROW, II, IIA, IPW, IROFF, $ IW, J, JB, JJA, JN, KK, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION AUXNORM, EPS, NRMINVAXA, TEMP * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDGEMM, $ PDLASET, PDMATGEN, PDSYMM, PDTRMM * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE, PDLANSY, PDLANTR EXTERNAL ICEIL, LSAMEN, NUMROC, PDLAMCH, PDLANGE, $ PDLANSY, PDLANTR * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * EPS = PDLAMCH( DESCA( CTXT_ ), 'eps' ) * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Compute the condition number * IF( LSAMEN( 1, MATTYP( 1:1 ), 'U' ) ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PDLANGE( '1', N, N, A, IA, JA, DESCA, WORK ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PDLANTR( '1', UPLO, 'Non unit', N, N, A, IA, JA, $ DESCA, WORK ) ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * AFORM = 'S' DIAG = 'D' AUXNORM = PDLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * END IF RCOND = ANORM*AUXNORM * * Compute inv(A)*A * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ ICURROW, ICURCOL ) * * Define array descriptor for working array WORK * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, ICURROW, NPROW ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), ICURROW, ICURCOL, DESCA( CTXT_ ), $ MAX( 1, NP ) ) IPW = DESCW( LLD_ ) * DESCW( NB_ ) + 1 * IF( MYROW.EQ.ICURROW ) THEN II = IROFF + 1 NP = NP - IROFF ELSE II = 1 END IF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Handle first block separately, regenerate a block of columns of A * IW = IROFF + 1 IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK, $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PDLASET( 'Lower', N-1, JB, ZERO, ZERO, WORK, IW+1, $ 1, DESCW ) ELSE CALL PDLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, WORK, IW, $ 2, DESCW ) END IF ELSE CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK( IPW ), $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PDGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, A, $ IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, ZERO, $ WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * CALL PDTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PDSYMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK( IPW ), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 KK = 0, JB-1 WORK( II+KK*(DESCW(LLD_)+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) )-ONE 10 CONTINUE END IF * NRMINVAXA = PDLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) * JB = MIN( N-J+JA, DESCA( NB_ ) ) * * regenerate a block of columns of A * IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK, DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, IIA-1, NP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PDLASET( 'Lower', JA+N-J-1, JB, ZERO, ZERO, $ WORK, IW+J-JA+1, 1, DESCW ) ELSE CALL PDLASET( 'All', J-JA, JB, ZERO, ZERO, WORK, IW, $ 1, DESCW ) CALL PDLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, $ WORK, IW+J-JA, 2, DESCW ) END IF ELSE CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK( IPW ), DESCW( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PDGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, $ A, IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, $ ZERO, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP(2:3), 'TR' ) ) THEN * CALL PDTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PDSYMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these * cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 KK = 0, JB-1 WORK( II+KK*(DESCW( LLD_ )+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) ) - ONE 20 CONTINUE END IF * * Compute the 1-norm of these JB cols * TEMP = PDLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) NRMINVAXA = MAX( TEMP, NRMINVAXA ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * 30 CONTINUE * * Compute the scaled residual * FRESID = NRMINVAXA / ( N * EPS * ANORM ) * RETURN * * End of PDINVCHK * END scalapack-1.8.0/TESTING/LIN/pcqrt16.f0000640000175000017500000002534410363532303016532 0ustar muammarmuammar SUBROUTINE PCQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, $ JX, DESCX, B, IB, JB, DESCB, RWORK, RESID ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IX, JA, JB, JX, M, N, NRHS REAL RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) REAL RWORK( * ) COMPLEX A( * ), B( * ), X( * ) * .. * * Purpose * ======= * * PCQRT16 computes the residual for a solution of a system of linear * equations sub( A )*sub( X ) = B or sub( A' )*sub( X ) = B: * RESID = norm(B - sub( A )*sub( X ) ) / * ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ), * where EPS is the machine epsilon, sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes * X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations: * = 'N': sub( A )*sub( X ) = sub( B ) * = 'T': sub( A' )*sub( X )= sub( B ), where A' is the * transpose of sub( A ). * = 'C': sub( A' )*sub( X )= B, where A' is the conjugate * transpose of sub( A ). * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local * memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * The original M x N matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This * array contains the local pieces of the computed solution * distributed vectors for the system of linear equations. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input/local output) COMPLEX pointer into * the local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributes right hand side vectors for * the system of linear equations. On exit, sub( B ) is over- * written with the difference sub( B ) - sub( A )*sub( X ) or * sub( B ) - sub( A )'*sub( X ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * RWORK (local workspace) REAL array, dimension (LRWORK) * LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * RESID (global output) REAL * The maximum over the number of right hand sides of * norm( sub( B )- sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL, $ NPROW REAL ANORM, BNORM, EPS, XNORM * .. * .. Local Arrays .. REAL TEMP( 2 ) * .. * .. External Functions .. LOGICAL LSAME REAL PCLANGE, PSLAMCH EXTERNAL LSAME, PCLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCGEMM, PSCASUM, $ SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick exit if M = 0 or N = 0 or NRHS = 0 * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN RESID = ZERO RETURN END IF * IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN ANORM = PCLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK ) N1 = N N2 = M ELSE ANORM = PCLANGE( '1', M, N, A, IA, JA, DESCA, RWORK ) N1 = M N2 = N END IF * EPS = PSLAMCH( ICTXT, 'Epsilon' ) * * Compute B - sub( A )*sub( X ) (or B - sub( A' )*sub( X ) ) and * store in B. * CALL PCGEMM( TRANS, 'No transpose', N1, NRHS, N2, -CONE, A, IA, $ JA, DESCA, X, IX, JX, DESCX, CONE, B, IB, JB, DESCB ) * * Compute the maximum over the number of right hand sides of * norm( sub( B ) - sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * RESID = ZERO DO 10 J = 1, NRHS * CALL PSCASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 ) CALL PSCASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 ) * * Only the process columns owning the vector operands will have * the correct result, the other will have zero. * TEMP( 1 ) = BNORM TEMP( 2 ) = XNORM CALL SGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM, $ -1, -1, IDUMM ) BNORM = TEMP( 1 ) XNORM = TEMP( 2 ) * * Every processes have ANORM, BNORM and XNORM now. * IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN RESID = ZERO ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN RESID = ONE / EPS ELSE RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) / $ ( MAX( M, N )*EPS ) ) END IF * 10 CONTINUE * RETURN * * End of PCQRT16 * END scalapack-1.8.0/TESTING/LIN/psdblaschk.f0000640000175000017500000002605310363532303017346 0ustar muammarmuammar SUBROUTINE PSDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PSDBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'T', A 'Transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) REAL pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) REAL array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTRAN, PSMATGEN, $ SGAMX2D, SGEBR2D, SGEBS2D, SGEMM, $ SGERV2D, SGESD2D, SGSUM2D, SLASET * .. * .. External Functions .. INTEGER ISAMAX, NUMROC REAL PSLAMCH EXTERNAL ISAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PSBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PSBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PSBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PSGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO ) * * * Regenerate column of B * CALL PSMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PSAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PSBLASCHK * END scalapack-1.8.0/TESTING/LIN/pzqrt17.f0000640000175000017500000003305110363532303016554 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZQRT17( TRANS, IRESID, M, N, NRHS, A, $ IA, JA, DESCA, X, IX, JX, $ DESCX, B, IB, JB, DESCB, WORK, $ RWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IRESID, IX, JA, JB, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), X( * ) DOUBLE PRECISION RWORK( * ) * .. * * Purpose * ======= * * PZQRT17 computes the ratio * * || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps) * * where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and * * alpha = ||B|| if IRESID = 1 (zero-residual problem) * alpha = ||R|| if IRESID = 2 (otherwise). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies whether or not the transpose of sub( A ) is used. * = 'N': No transpose, op( sub( A ) ) = sub( A ). * = 'C': Conjugate transpose, op( sub( A ) ) = sub( A' ). * * IRESID (global input) INTEGER * IRESID = 1 indicates zero-residual problem. * IRESID = 2 indicates non-zero residual. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( B ). * If TRANS = 'C', the number of rows of the distributed * submatrix sub( X ). * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( X ). Otherwise N is the number of rows of * the distributed submatrix sub( B ). * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices sub( X ) and sub( B ). * NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed M-by-N * submatrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX*16 pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * If TRANS = 'N', this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ). Otherwise, this * array contains the local pieces of the M-by-NRHS distributed * submatrix sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B,LOCc(JB+NRHS-1)). * If TRANS='N', this array contains the local pieces of the * distributed M-by-NRHS submatrix operand sub( B ). Otherwise, * this array contains the local pieces of the distributed * N-by-NRHS submatrix operand sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0 * otherwise LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0 * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ), * Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ), * NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER IACOL, IBCOL, IBROW, ICOFFB, ICTXT, INFO, $ IOFFA, IROFFB, ISCL, IW, IW2, JW, JW2, MYCOL, $ NRHSQ, NRHSP, MYROW, NCOLS, NPCOL, NPROW, $ NROWS, NROWSP DOUBLE PRECISION ERR, NORMA, NORMB, NORMRS, NORMX, SMLNUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), DESCW2( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL INDXG2P, LSAME, NUMROC, PDLAMCH, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PZGEMM, PZLACPY, $ PZLASCL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX * .. * .. Executable Statements .. * PZQRT17 = ZERO * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN NROWS = M NCOLS = N IOFFA = MOD( JA-1, DESCA( NB_ ) ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN NROWS = N NCOLS = M IOFFA = MOD( IA-1, DESCA( MB_ ) ) ELSE CALL PXERBLA( ICTXT, 'PZQRT17', -1 ) RETURN END IF * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.LE.0 ) $ RETURN * IROFFB = MOD( IA-1, DESCA( MB_ ) ) ICOFFB = MOD( JA-1, DESCA( NB_ ) ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) NRHSP = NUMROC( NRHS+IROFFB, DESCB( NB_ ), MYROW, IBROW, NPROW ) NROWSP = NUMROC( NROWS+IROFFB, DESCA( MB_ ), MYROW, IBROW, NPROW ) * * Assign array descriptor DESCW for workspace WORK, where DESCW * holds a copy of the distributed submatrix sub( B ) * CALL DESCSET( DESCW, NROWS+IROFFB, NRHS+ICOFFB, DESCB( MB_ ), $ DESCB( NB_ ), IBROW, IBCOL, ICTXT, MAX( 1, $ NROWSP ) ) * * Assign array descriptor DESCW2 for workspace WORK, where DESCW2 * holds a copy of the distributed submatrix sub( X**T ) * CALL DESCSET( DESCW2, NRHS+ICOFFB, NCOLS+IOFFA, DESCX( NB_ ), $ DESCX( MB_ ), IBROW, IACOL, ICTXT, MAX( 1, $ NRHSP ) ) * NORMA = PZLANGE( 'One-norm', M, N, A, IA, JA, DESCA, RWORK ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'Precision' ) ISCL = 0 * * compute residual and scale it * IW = 1 + IROFFB JW = 1 + ICOFFB CALL PZLACPY( 'All', NROWS, NRHS, B, IB, JB, DESCB, WORK, IW, JW, $ DESCW ) CALL PZGEMM( TRANS, 'No transpose', NROWS, NRHS, NCOLS, $ DCMPLX( -ONE ), A, IA, JA, DESCA, X, IX, JX, DESCX, $ DCMPLX( ONE ), WORK, IW, JW, DESCW ) NORMRS = PZLANGE( 'Max', NROWS, NRHS, WORK, IW, JW, DESCW, $ RWORK ) IF( NORMRS.GT.SMLNUM ) THEN ISCL = 1 CALL PZLASCL( 'General', NORMRS, ONE, NROWS, NRHS, WORK, $ IW, JW, DESCW, INFO ) END IF * * compute R'*sub( A ) * IW2 = 1 + ICOFFB JW2 = 1 + IOFFA CALL PZGEMM( 'Conjugate transpose', TRANS, NRHS, NCOLS, NROWS, $ DCMPLX( ONE ), WORK, IW, JW, DESCW, A, IA, JA, DESCA, $ DCMPLX( ZERO ), WORK( NROWSP*NRHSQ+1 ), IW2, JW2, $ DESCW2 ) * * compute and properly scale error * ERR = PZLANGE( 'One-norm', NRHS, NCOLS, WORK( NROWSP*NRHSQ+1 ), $ IW2, JW2, DESCW2, RWORK ) IF( NORMA.NE.ZERO ) $ ERR = ERR / NORMA * IF( ISCL.EQ.1 ) $ ERR = ERR*NORMRS * IF( IRESID.EQ.1 ) THEN NORMB = PZLANGE( 'One-norm', NROWS, NRHS, B, IB, JB, DESCB, $ RWORK ) IF( NORMB.NE.ZERO ) $ ERR = ERR / NORMB ELSE NORMX = PZLANGE( 'One-norm', NCOLS, NRHS, X, IX, JX, DESCX, $ RWORK ) IF( NORMX.NE.ZERO ) $ ERR = ERR / NORMX END IF * PZQRT17 = ERR / ( PDLAMCH( ICTXT, 'Epsilon' ) * $ DBLE( MAX( M, N, NRHS ) ) ) * RETURN * * End of PZQRT17 * END scalapack-1.8.0/TESTING/LIN/psqrt14.f0000640000175000017500000003355410363532303016552 0ustar muammarmuammar REAL FUNCTION PSQRT14( TRANS, M, N, NRHS, A, IA, JA, $ DESCA, X, IX, JX, DESCX, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IX, JA, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PSQRT14 checks whether sub( X ) is in the row space of sub( A ) or * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both * sub( X ) and sub( A ) such that their norms are in the range * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing * triangle, scaled by MAX(M,N,NRHS)*eps. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * = 'N': No transpose, check for sub( X ) in the row space of * sub( A ), * = 'T': Transpose, check for sub( X ) in row space of * sub( A )'. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( X ). NRHS >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array * contains the local pieces of the M-by-N distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) REAL pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * On entry, this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N', * and the M-by-NRHS distributed submatrix sub( X ) otherwise. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * WORK (local workspace) REAL array dimension (LWORK) * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where * * IF TRANS='N', (LQ fact) * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW, * NPROW ) * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW, * RSRC_A, NPROW ) * LWF = MB_A * ( MB_A + MNRHSP + NQ0 ) * ELSE (QR fact) * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL, * NPCOL ) * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL, * CSRC_A, NPCOL ) * LWF = NB_A * ( NB_A + MP0 + NNRHSQ ) * END IF * * and, * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL TPSD INTEGER IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO, $ IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA, $ JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL, $ MYROW, NPCOL, NPROW, NPW, NQWA, NQW REAL AMAX, ANRM, ERR, XNRM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) REAL RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLANGE, PSLAMCH EXTERNAL LSAME, NUMROC, PSLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSAMAX, $ PSCOPY, PSGELQF, PSGEQRF, PSLACPY, $ PSLASCL, PXERBLA, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PSQRT14 = ZERO * IPWA = 1 IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IWA = IROFFA + 1 JWA = ICOFFA + 1 CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN IF( N.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .FALSE. MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) NQW = NQWA * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA + M JWX = JWA LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) * ELSE IF( LSAME( TRANS, 'T' ) ) THEN IF( M.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .TRUE. MPW = MPWA NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA JWX = JWA + N LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) ELSE CALL PXERBLA( ICTXT, 'PSQRT14', -1 ) RETURN END IF * * Copy and scale sub( A ) * IPTAU = IPWA + MPW*NQW CALL PSLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA, $ JWA, DESCW ) RWORK( 1 ) = ZERO ANRM = PSLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK ) IF( ANRM.NE.ZERO ) $ CALL PSLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA, $ JWA, DESCW, INFO ) * * Copy sub( X ) or sub( X )' into the right place and scale it * IF( TPSD ) THEN * * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work * DO 10 J = 1, NRHS CALL PSCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX, $ JWX+J-1, DESCW, 1 ) 10 CONTINUE XNRM = PSLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PSLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1) * MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPW = IPTAU + MIN( MQW, NQW ) LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) ) CALL PSGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in upper triangle of * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1) * ERR = ZERO IF( N.LT.M ) THEN DO 20 J = JWX, JWA+N+NRHS-1 CALL PSAMAX( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ), $ IWA+N, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 20 CONTINUE END IF CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * ELSE * * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work * DO 30 J = 1, NRHS CALL PSCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), $ IWX+J-1, JWX, DESCW, DESCW( M_ ) ) 30 CONTINUE * XNRM = PSLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PSLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1) * NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPW = IPTAU + MIN( MPW, NPW ) LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) ) CALL PSGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in lower triangle in * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1) * ERR = ZERO DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 ) CALL PSAMAX( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ), $ IWX+J-JWA-M, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 40 CONTINUE CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * END IF * PSQRT14 = ERR / ( REAL( MAX( M, N, NRHS ) ) * $ PSLAMCH( ICTXT, 'Epsilon' ) ) * RETURN * * End of PSQRT14 * END scalapack-1.8.0/TESTING/LIN/pclaschk.f0000640000175000017500000003035510363532303017020 0ustar muammarmuammar SUBROUTINE PCLASCHK( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, SYMM INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX WORK( * ), X( * ) * .. * * Purpose * ======= * * PCLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed matrix, * otherwise sub( A ) is a general distributed matrix. * * DIAG (global input) CHARACTER * If DIAG = 'D', sub( A ) is diagonally dominant. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 0. * * X (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X), * NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM, $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF, $ IXCOL, IXROW, J, JBRHS, JJ, JJA, JJX, LDX, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL DIVISOR, EPS, RESID1 COMPLEX BETA * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D, $ CLASET, PBCTRAN, PCMATGEN, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICAMAX, NUMROC REAL PSLAMCH EXTERNAL ICAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * LDX = MAX( 1, NP ) IPB = 1 IPX = IPB + NP * DESCX( NB_ ) IPA = IPX + NQ * DESCX( NB_ ) * IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF * ICURCOL = IXCOL * * Loop over the rhs * DO 40 J = 1, NRHS, DESCX( NB_ ) JBRHS = MIN( DESCX( NB_ ), NRHS-J+1 ) * * Transpose x from ICURCOL to all rows * IOFFX = IIX + ( JJX - 1 ) * DESCX( LLD_ ) CALL PBCTRAN( ICTXT, 'Column', 'Transpose', N, JBRHS, $ DESCX( MB_ ), X( IOFFX ), DESCX( LLD_ ), ZERO, $ WORK( IPX ), JBRHS, IXROW, ICURCOL, -1, IACOL, $ WORK( IPA ) ) * * Regenerate B in IXCOL * IF( MYCOL.EQ.ICURCOL ) THEN CALL PCMATGEN( ICTXT, 'N', 'N', DESCX( M_ ), DESCX( N_ ), $ DESCX( MB_ ), DESCX( NB_ ), WORK( IPB ), LDX, $ IXROW, IXCOL, IBSEED, IIX-1, NP, JJX-1, $ JBRHS, MYROW, MYCOL, NPROW, NPCOL ) BETA = ONE ELSE BETA = ZERO END IF * IF( NQ.GT.0 ) THEN DO 10 II = IIA, IIA+NP-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IIA+NP-II ) * * Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1). * CALL PCMATGEN( ICTXT, SYMM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK( IPA ), IB, DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, II-1, IB, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Compute B <= B - A * X. * CALL CGEMM( 'No transpose', 'Transpose', IB, JBRHS, NQ, $ -ONE, WORK( IPA ), IB, WORK( IPX ), JBRHS, $ BETA, WORK( IPB+II-IIA ), LDX ) * 10 CONTINUE * ELSE IF( MYCOL.NE.ICURCOL ) THEN * CALL CLASET( 'All', NP, JBRHS, ZERO, ZERO, WORK( IPB ), $ LDX ) * END IF * * Add B rowwise to ICURCOL * CALL CGSUM2D( ICTXT, 'Row', ' ', NP, JBRHS, WORK( IPB ), LDX, $ MYROW, ICURCOL ) * IF( MYCOL.EQ.ICURCOL ) THEN * * Figure || A * X - B || & || X || * IPW = IPA + JBRHS DO 20 JJ = 0, JBRHS - 1 IF( NP.GT.0 ) THEN II = ICAMAX( NP, WORK( IPB+JJ*LDX ), 1 ) WORK( IPA+JJ ) = ABS( WORK( IPB+II-1+JJ*LDX ) ) WORK( IPW+JJ ) = ABS( X( IOFFX + ICAMAX( NP, $ X( IOFFX + JJ*DESCX( LLD_ ) ), 1 )-1+JJ* $ DESCX( LLD_ ) ) ) ELSE WORK( IPA+JJ ) = ZERO WORK( IPW+JJ ) = ZERO END IF 20 CONTINUE * * After CGAMX2D computation, * WORK(IPB) has the maximum of || Ax - b ||, and * WORK(IPX) has the maximum of || X ||. * CALL CGAMX2D( ICTXT, 'Column', ' ', 1, 2*JBRHS, $ WORK( IPA ), 1, IDUMM, IDUMM, -1, 0, ICURCOL ) * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * IF( MYROW.EQ.0 ) THEN DO 30 JJ = 0, JBRHS - 1 RESID1 = REAL( WORK( IPA+JJ ) ) / $ ( REAL( WORK( IPW+JJ ) )*DIVISOR ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 30 CONTINUE IF( MYCOL.NE.0 ) $ CALL SGESD2D( ICTXT, 1, 1, RESID, 1, 0, 0 ) END IF * ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * CALL SGERV2D( ICTXT, 1, 1, RESID1, 1, 0, ICURCOL ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 * END IF * IF( MYCOL.EQ.ICURCOL ) $ JJX = JJX + JBRHS ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, RESID, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, RESID, 1, 0, 0 ) END IF * RETURN * * End of PCLASCHK * END scalapack-1.8.0/TESTING/LIN/psqrt17.f0000640000175000017500000003265610363532303016557 0ustar muammarmuammar REAL FUNCTION PSQRT17( TRANS, IRESID, M, N, NRHS, A, $ IA, JA, DESCA, X, IX, JX, $ DESCX, B, IB, JB, DESCB, WORK, $ RWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IRESID, IX, JA, JB, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) REAL A( * ), B( * ), WORK( * ), X( * ) REAL RWORK( * ) * .. * * Purpose * ======= * * PSQRT17 computes the ratio * * || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps) * * where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and * * alpha = ||B|| if IRESID = 1 (zero-residual problem) * alpha = ||R|| if IRESID = 2 (otherwise). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies whether or not the transpose of sub( A ) is used. * = 'N': No transpose, op( sub( A ) ) = sub( A ). * = 'T': Transpose, op( sub( A ) ) = sub( A' ). * * IRESID (global input) INTEGER * IRESID = 1 indicates zero-residual problem. * IRESID = 2 indicates non-zero residual. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( B ). * If TRANS = 'T', the number of rows of the distributed * submatrix sub( X ). * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( X ). Otherwise N is the number of rows of * the distributed submatrix sub( B ). * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices sub( X ) and sub( B ). * NRHS >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed M-by-N * submatrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) REAL pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * If TRANS = 'N', this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ). Otherwise, this * array contains the local pieces of the M-by-NRHS distributed * submatrix sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input) REAL pointer into the local memory * to an array of dimension (LLD_B,LOCc(JB+NRHS-1)). * If TRANS='N', this array contains the local pieces of the * distributed M-by-NRHS submatrix operand sub( B ). Otherwise, * this array contains the local pieces of the distributed * N-by-NRHS submatrix operand sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) REAL array, dimension (LWORK) * If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0 * otherwise LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0 * * RWORK (local workspace) REAL array, dimension (LRWORK) * LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ), * Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ), * NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER IACOL, IBCOL, IBROW, ICOFFB, ICTXT, INFO, $ IOFFA, IROFFB, ISCL, IW, IW2, JW, JW2, MYCOL, $ NRHSQ, NRHSP, MYROW, NCOLS, NPCOL, NPROW, $ NROWS, NROWSP REAL ERR, NORMA, NORMB, NORMRS, NORMX, SMLNUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), DESCW2( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PSLANGE, PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PSLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PSGEMM, PSLACPY, $ PSLASCL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, REAL * .. * .. Executable Statements .. * PSQRT17 = ZERO * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN NROWS = M NCOLS = N IOFFA = MOD( JA-1, DESCA( NB_ ) ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN NROWS = N NCOLS = M IOFFA = MOD( IA-1, DESCA( MB_ ) ) ELSE CALL PXERBLA( ICTXT, 'PSQRT17', -1 ) RETURN END IF * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.LE.0 ) $ RETURN * IROFFB = MOD( IA-1, DESCA( MB_ ) ) ICOFFB = MOD( JA-1, DESCA( NB_ ) ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) NRHSP = NUMROC( NRHS+IROFFB, DESCB( NB_ ), MYROW, IBROW, NPROW ) NROWSP = NUMROC( NROWS+IROFFB, DESCA( MB_ ), MYROW, IBROW, NPROW ) * * Assign array descriptor DESCW for workspace WORK, where DESCW * holds a copy of the distributed submatrix sub( B ) * CALL DESCSET( DESCW, NROWS+IROFFB, NRHS+ICOFFB, DESCB( MB_ ), $ DESCB( NB_ ), IBROW, IBCOL, ICTXT, MAX( 1, $ NROWSP ) ) * * Assign array descriptor DESCW2 for workspace WORK, where DESCW2 * holds a copy of the distributed submatrix sub( X**T ) * CALL DESCSET( DESCW2, NRHS+ICOFFB, NCOLS+IOFFA, DESCX( NB_ ), $ DESCX( MB_ ), IBROW, IACOL, ICTXT, MAX( 1, $ NRHSP ) ) * NORMA = PSLANGE( 'One-norm', M, N, A, IA, JA, DESCA, RWORK ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'Precision' ) ISCL = 0 * * compute residual and scale it * IW = 1 + IROFFB JW = 1 + ICOFFB CALL PSLACPY( 'All', NROWS, NRHS, B, IB, JB, DESCB, WORK, IW, JW, $ DESCW ) CALL PSGEMM( TRANS, 'No transpose', NROWS, NRHS, NCOLS, -ONE, A, $ IA, JA, DESCA, X, IX, JX, DESCX, ONE, WORK, IW, JW, $ DESCW ) NORMRS = PSLANGE( 'Max', NROWS, NRHS, WORK, IW, JW, DESCW, $ RWORK ) IF( NORMRS.GT.SMLNUM ) THEN ISCL = 1 CALL PSLASCL( 'General', NORMRS, ONE, NROWS, NRHS, WORK, $ IW, JW, DESCW, INFO ) END IF * * compute R'*sub( A ) * IW2 = 1 + ICOFFB JW2 = 1 + IOFFA CALL PSGEMM( 'Transpose', TRANS, NRHS, NCOLS, NROWS, ONE, WORK, $ IW, JW, DESCW, A, IA, JA, DESCA, ZERO, $ WORK( NROWSP*NRHSQ+1 ), IW2, JW2, DESCW2 ) * * compute and properly scale error * ERR = PSLANGE( 'One-norm', NRHS, NCOLS, WORK( NROWSP*NRHSQ+1 ), $ IW2, JW2, DESCW2, RWORK ) IF( NORMA.NE.ZERO ) $ ERR = ERR / NORMA * IF( ISCL.EQ.1 ) $ ERR = ERR*NORMRS * IF( IRESID.EQ.1 ) THEN NORMB = PSLANGE( 'One-norm', NROWS, NRHS, B, IB, JB, DESCB, $ RWORK ) IF( NORMB.NE.ZERO ) $ ERR = ERR / NORMB ELSE NORMX = PSLANGE( 'One-norm', NCOLS, NRHS, X, IX, JX, DESCX, $ RWORK ) IF( NORMX.NE.ZERO ) $ ERR = ERR / NORMX END IF * PSQRT17 = ERR / ( PSLAMCH( ICTXT, 'Epsilon' ) * $ REAL( MAX( M, N, NRHS ) ) ) * RETURN * * End of PSQRT17 * END scalapack-1.8.0/TESTING/LIN/pzpbinfo.f0000640000175000017500000004472710363532303017067 0ustar muammarmuammar SUBROUTINE PZPBINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZPBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZPBINFO * END scalapack-1.8.0/TESTING/LIN/pdgetrrv.f0000640000175000017500000003011610363532303017060 0ustar muammarmuammar SUBROUTINE PDGETRRV( M, N, A, IA, JA, DESCA, IPIV, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDGETRRV reforms sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from the * triangular matrices L and U returned by PDGETRF. It multiplies * an upper triangular matrix stored in the upper triangle of sub( A ) * times the unit lower triangular matrix stored in the lower triangle. * To accomplish this, the routine basically performs the PDGETRF * routine in reverse. * * It computes L*U first, and then apply P: P*L*U => sub( A ). In the * J-th loop, the block column (or column panel), which has the lower * triangular unit matrix L is multiplied with the block row (or row * panel), which contains the upper triangular matrix U. * * ( L1 ) ( 0 0 ) ( L1*U1 L1*U2 ) * A` = L * U + A` = ( ) * (U1 U2) + ( ) = ( ) * ( L2 ) ( 0 A`) ( L2*U1 L2*U2+A` ) * * where L1 is a lower unit triangular matrix and U1 is an upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the distributed matrix sub( A ) * contains the the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. On exit, the original distributed matrix sub( A ) * is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array of dimension (LWORK) * LWORK >= MpA0 * NB_A + NqA0 * MB_A, where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of columns of L, and a block of * rows of U. INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J, $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ), $ DESCU( DLEN_ ), IDUM( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PDGEMM, PDLACPY, $ PDLAPIV, PDLASET, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPL = 1 IPU = IPL + MP * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Define array descriptors for L and U * MN = MIN( M, N ) IL = MAX( ( ( IA+MN-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) JL = MAX( ( ( JA+MN-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * CALL DESCSET( DESCL, IA+M-IL, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * CALL DESCSET( DESCU, DESCA( MB_ ), JA+N-JL, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) + DESCA( MB_ ) ) * * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+MN-J, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PDLACPY( 'Lower', M-IL+IA, JB, A, IL, J, DESCA, $ WORK( IPL ), 1, 1, DESCL ) CALL PDLASET( 'Upper', M-IL+IA, JB, ZERO, ONE, WORK( IPL ), $ 1, 1, DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PDLACPY( 'Upper', JB, JA+N-J, A, IL, J, DESCA, $ WORK( IPU ), 1, 1, DESCU ) CALL PDLASET( 'Lower', JB-1, JA+N-J, ZERO, ZERO, $ WORK( IPU ), 2, 1, DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PDLASET( 'Lower', IA+M-IL-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Zero the upper triangular piece of the current block. * CALL PDLASET( 'Upper', JB, JA+N-J, ZERO, ZERO, A, IL, J, $ DESCA ) * * Update the matrix sub( A ). * CALL PDGEMM( 'No transpose', 'No transpose', IA+M-IL, $ JA+N-J, JB, ONE, WORK( IPL ), 1, 1, DESCL, $ WORK( IPU ), 1, 1, DESCU, ONE, A, IL, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCL( M_ ) = DESCL( M_ ) + DESCL( MB_ ) DESCL( RSRC_ ) = MOD( DESCL( RSRC_ ) + NPROW - 1, NPROW ) DESCL( CSRC_ ) = MOD( DESCL( CSRC_ ) + NPCOL - 1, NPCOL ) DESCU( N_ ) = DESCU( N_ ) + DESCU( NB_ ) DESCU( RSRC_ ) = DESCL( RSRC_ ) DESCU( CSRC_ ) = DESCL( CSRC_ ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PDLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPL ), $ 1, 1, DESCL ) CALL PDLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPL ), 1, 1, $ DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PDLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPU ), 1, $ 1, DESCU ) CALL PDLASET( 'Lower', JB-1, N, ZERO, ZERO, WORK( IPU ), 2, 1, $ DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PDLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Zero the upper triangular piece of the current block. * CALL PDLASET( 'Upper', JB, N, ZERO, ZERO, A, IA, JA, DESCA ) * * Update the matrix sub( A ). * CALL PDGEMM( 'No transpose', 'No transpose', M, N, JB, ONE, $ WORK( IPL ), 1, 1, DESCL, WORK( IPU ), 1, 1, $ DESCU, ONE, A, IA, JA, DESCA ) * * Apply pivots so that sub( A ) = P*L*U * CALL PDLAPIV( 'Backward', 'Row', 'Col', MIN( M, N ), N, A, IA, JA, $ DESCA, IPIV, IA, 1, DESCIP, IDUM ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDGETRRV * END scalapack-1.8.0/TESTING/LIN/pdpblaschk.f0000640000175000017500000002550010363532303017337 0ustar muammarmuammar SUBROUTINE PDPBLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PDPBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, DGEBR2D, $ DGEBS2D, DGEMM, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, PBDTRAN, PDMATGEN * .. * .. External Functions .. INTEGER IDAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PDBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BW+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PDPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PDBLASCHK * END scalapack-1.8.0/TESTING/LIN/pzptinfo.f0000640000175000017500000004475010363532303017105 0ustar muammarmuammar SUBROUTINE PZPTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZPTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZPTINFO * END scalapack-1.8.0/TESTING/LIN/pzgbdriver.f0000640000175000017500000010013710363532303017402 0ustar muammarmuammar PROGRAM PZGBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PZGBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by ZGB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTMEM INTEGER, default = 2048. * INTMEM is the size of the integer workspace used in this * driver as input as the IPIV vector. It represents an * upper bound on NB, the blocksize of the data * distribution. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER INTMEM PARAMETER ( INTMEM = 2048 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER MEMSIZ, NTESTS, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER IPIV(INTMEM) INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, $ PZCHEKPAD, PZDBLASCHK, PZFILLPAD, PZGBINFO, $ PZGBTRF, PZGBTRS, PZMATGEN, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PZLANGE EXTERNAL LSAME, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PZGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) $ + (BWL+BWU) NB = MAX( NB, 2*(BWL+BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 * IF( NB.GT.INTMEM ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) THEN WRITE( NOUT,* )'You have chosen an ' $ ,'NB > INTMEM in the driver.' WRITE(NOUT, *)'Please edit the driver ' $ ,'and increase the value of INTMEM' ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2*BWL+2*BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((2*BWL+2*BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, $ (2*BWL+2*BWU+1), NB, 0, 0, $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 1 * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PZLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PZDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PZLANGE( '1', (2*BWL+2*BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PZGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PZGBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, 'PZGBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PZMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PZGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZGBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PZDBLASCHK( 'N', 'N', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl (bwl+bwu) + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE((BWL+BWU))) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST (bwl+bwu)*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE((BWL+BWU))*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST (bwl+bwu)*(bwl+bwu) * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+(bwl+bwu)) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE((BWL+BWU))) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * (BWL+BWU)*(BWL+BWU) ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZGBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzqrt14.f0000640000175000017500000003402110363532303016547 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZQRT14( TRANS, M, N, NRHS, A, IA, JA, $ DESCA, X, IX, JX, DESCX, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IX, JA, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 A( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PZQRT14 checks whether sub( X ) is in the row space of sub( A ) or * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both * sub( X ) and sub( A ) such that their norms are in the range * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing * triangle, scaled by MAX(M,N,NRHS)*eps. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * = 'N': No transpose, check for sub( X ) in the row space of * sub( A ), * = 'C': Conjugate transpose, check for sub( X ) in row space * of sub( A )'. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( X ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array * contains the local pieces of the M-by-N distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX*16 pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * On entry, this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N', * and the M-by-NRHS distributed submatrix sub( X ) otherwise. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * WORK (local workspace) COMPLEX*16 array dimension (LWORK) * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where * * IF TRANS='N', (LQ fact) * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW, * NPROW ) * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW, * RSRC_A, NPROW ) * LWF = MB_A * ( MB_A + MNRHSP + NQ0 ) * ELSE (QR fact) * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL, * NPCOL ) * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL, * CSRC_A, NPCOL ) * LWF = NB_A * ( NB_A + MP0 + NNRHSQ ) * END IF * * and, * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL TPSD INTEGER IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO, $ IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA, $ JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL, $ MYROW, NPCOL, NPROW, NPW, NQWA, NQW DOUBLE PRECISION ANRM, ERR, XNRM COMPLEX*16 AMAX * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) DOUBLE PRECISION RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL LSAME, NUMROC, PDLAMCH, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, DGAMX2D, INFOG2L, $ PXERBLA, PZMAX1, PZCOPY, PZGELQF, $ PZGEQRF, PZLACGV, PZLACPY, PZLASCL * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PZQRT14 = ZERO * IPWA = 1 IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IWA = IROFFA + 1 JWA = ICOFFA + 1 CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN IF( N.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .FALSE. MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) NQW = NQWA * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA + M JWX = JWA LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) * ELSE IF( LSAME( TRANS, 'C' ) ) THEN IF( M.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .TRUE. MPW = MPWA NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA JWX = JWA + N LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) ELSE CALL PXERBLA( ICTXT, 'PZQRT14', -1 ) RETURN END IF * * Copy and scale sub( A ) * IPTAU = IPWA + MPW*NQW CALL PZLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA, $ JWA, DESCW ) RWORK( 1 ) = ZERO ANRM = PZLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK ) IF( ANRM.NE.ZERO ) $ CALL PZLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA, $ JWA, DESCW, INFO ) * * Copy sub( X ) or sub( X )' into the right place and scale it * IF( TPSD ) THEN * * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work * DO 10 J = 1, NRHS CALL PZCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX, $ JWX+J-1, DESCW, 1 ) 10 CONTINUE XNRM = PZLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PZLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1) * MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPW = IPTAU + MIN( MQW, NQW ) LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) ) CALL PZGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in upper triangle of * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1) * ERR = ZERO IF( N.LT.M ) THEN DO 20 J = JWX, JWA+N+NRHS-1 CALL PZMAX1( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ), $ IWA+N, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 20 CONTINUE END IF CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * ELSE * * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work * DO 30 J = 1, NRHS CALL PZCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), $ IWX+J-1, JWX, DESCW, DESCW( M_ ) ) CALL PZLACGV( N, WORK( IPWA ), IWX+J-1, JWX, DESCW, $ DESCW( M_ ) ) 30 CONTINUE * XNRM = PZLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PZLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1) * NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPW = IPTAU + MIN( MPW, NPW ) LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) ) CALL PZGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in lower triangle in * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1) * ERR = ZERO DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 ) CALL PZMAX1( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ), $ IWX+J-JWA-M, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 40 CONTINUE CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * END IF * PZQRT14 = ERR / ( DBLE( MAX( M, N, NRHS ) ) * $ PDLAMCH( ICTXT, 'Epsilon' ) ) * RETURN * * End of PZQRT14 * END scalapack-1.8.0/TESTING/LIN/pcgelqrv.f0000640000175000017500000002310110363532303017042 0ustar muammarmuammar SUBROUTINE PCGELQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGELQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PCGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PCGELQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PCGELQF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX array, dimension * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IL, IN, $ IPT, IPV, IPW, J, JJA, JV, K, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLACPY, $ PCLARFB, PCLARFT, PCLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IL, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+K-I, DESCA( MB_ ) ) J = JA + I - IA JV = 1 + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PCLARFT( 'Forward', 'Rowwise', N-J+JA, IB, A, I, J, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'Upper', IB, N-J+JA, A, I, J, DESCA, WORK( IPV ), $ 1, JV, DESCV ) CALL PCLASET( 'Lower', IB, N-J+JA, ZERO, ONE, WORK( IPV ), 1, $ JV, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PCLASET( 'Upper', IB, N-J+JA-1, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-I+IA, N-J+JA, IB, WORK( IPV ), 1, $ JV, DESCV, WORK( IPT ), A, I, J, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) * 10 CONTINUE * * Handle first block separately * IB = IN - IA + 1 * * Compute upper triangular matrix T * CALL PCLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, TAU, $ WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'Upper', IB, N, A, IA, JA, DESCA, WORK( IPV ), 1, $ ICOFF+1, DESCV ) CALL PCLASET( 'Lower', IB, N, ZERO, ONE, WORK, 1, ICOFF+1, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PCLASET( 'Upper', IB, N-1, ZERO, ZERO, A, IA, JA+1, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N, IB, WORK( IPV ), 1, ICOFF+1, DESCV, $ WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCGELQRV * END scalapack-1.8.0/TESTING/LIN/pspbmv1.f0000640000175000017500000007267210363532303016630 0ustar muammarmuammar SUBROUTINE PSPBDCMV( LDBW, BW, UPLO, N, A, JA, DESCA, NRHS, B, IB, $ DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPBTRF and this is stored in AF. If a linear system * is to be solved using PSPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, $ FIRST_PROC, I, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, J, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, OFST, PART_OFFSET, $ PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Pre-calculate bw^2 * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PSPBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PSPBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (BW+2)*BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( UPLO, 'L' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL SLATCPY( 'U', BW, BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to right * CALL STRSD2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL SSBMV( 'L', NUMROC_SIZE, BW, ONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, ZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * CALL STRMV( 'U', 'N', 'N', BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), LLDA-1, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, BW*( BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL-1 ) * CALL STRMV( $ 'L', $ 'N', $ 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 30 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( UPLO, 'U' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL SLATCPY( 'L', BW, BW, A( OFST+1 ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to left * CALL STRSD2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL-1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL SSBMV( 'U', NUMROC_SIZE, BW, ONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, ZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL+1 ) * CALL STRMV( 'U', 'N', 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, BW*( BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * CALL STRMV( $ 'L', $ 'N', $ 'N', BW, $ A( 1 ), LLDA-1, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 60 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PSBsBMV1 * END scalapack-1.8.0/TESTING/LIN/pctzrzrv.f0000640000175000017500000002443310363532303017134 0ustar muammarmuammar SUBROUTINE PCTZRZRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCTZRZRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from T, Z * computed by PCTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= M >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors T and Z computed * by PCTZRZF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PCTZRZF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JM1, JV, L, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLACPY, $ PCLARZB, PCLARZT, PCLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LT.M ) $ RETURN * L = N - M JM1 = JA + MIN( M+1, N ) - 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA + 1 JV = ICOFF + JM1 - JA + 1 * * Compute upper triangular matrix T * CALL PCLARZT( 'Backward', 'Rowwise', L, IB, A, IA, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', IB, L, A, IA, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(IA:IA+IB-1,JA:JA+IB-1) * CALL PCLACPY( 'Lower', IB-1, IB-1, A, IA+1, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) * * Zeroes the row panel of sub( A ) to get T(IA:IN,JA:JA+N-1) * CALL PCLASET( 'All', IB, L, ZERO, ZERO, A, IA, JM1, DESCA ) CALL PCLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Apply block Householder transformation * CALL PCLARZB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', IN-IA+1, N, IB, L, WORK( IPV ), 1, JV, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * * Restore strict lower part of A( IA:IA+IB-1, JA:JA+N-1 ) * CALL PCLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, ICOFF+1, DESCV, $ A, IA+1, JA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute upper triangular matrix T * CALL PCLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', IB, L, A, I, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(I:I+IB-1,J:J+IB-1 ) * CALL PCLACPY( 'Lower', IB-1, IB-1, A, I+1, JA+I-IA, DESCA, $ WORK( IPV ), 1, ICOFF+1+I-IA, DESCV ) * * Zeoes the row panel of sub( A ) to get T(IA:I-1,JA+I-IA:JA+N-1) * CALL PCLASET( 'All', IB, L, ZERO, ZERO, A, I, JM1, DESCA ) CALL PCLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, I+1, JA+I-IA, $ DESCA ) * * Apply block Householder transformation * CALL PCLARZB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I+IB-IA, N-I+IA, IB, L, WORK( IPV ), $ 1, JV, DESCV, WORK( IPT ), A, IA, JA+I-IA, DESCA, $ WORK( IPW ) ) * CALL PCLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, $ ICOFF+1+I-IA, DESCV, A, I+1, JA+I-IA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCTZRZRV * END scalapack-1.8.0/TESTING/LIN/Makefile0000640000175000017500000003330710363532303016521 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Linear Equations Testing Makefile # # Creation date: March 20, 1995 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc sluexe = $(TESTINGdir)/xslu dluexe = $(TESTINGdir)/xdlu cluexe = $(TESTINGdir)/xclu zluexe = $(TESTINGdir)/xzlu sdbluexe = $(TESTINGdir)/xsdblu ddbluexe = $(TESTINGdir)/xddblu cdbluexe = $(TESTINGdir)/xcdblu zdbluexe = $(TESTINGdir)/xzdblu sdtluexe = $(TESTINGdir)/xsdtlu ddtluexe = $(TESTINGdir)/xddtlu cdtluexe = $(TESTINGdir)/xcdtlu zdtluexe = $(TESTINGdir)/xzdtlu sgbluexe = $(TESTINGdir)/xsgblu dgbluexe = $(TESTINGdir)/xdgblu cgbluexe = $(TESTINGdir)/xcgblu zgbluexe = $(TESTINGdir)/xzgblu slltexe = $(TESTINGdir)/xsllt dlltexe = $(TESTINGdir)/xdllt clltexe = $(TESTINGdir)/xcllt zlltexe = $(TESTINGdir)/xzllt spblltexe = $(TESTINGdir)/xspbllt dpblltexe = $(TESTINGdir)/xdpbllt cpblltexe = $(TESTINGdir)/xcpbllt zpblltexe = $(TESTINGdir)/xzpbllt sptlltexe = $(TESTINGdir)/xsptllt dptlltexe = $(TESTINGdir)/xdptllt cptlltexe = $(TESTINGdir)/xcptllt zptlltexe = $(TESTINGdir)/xzptllt sinvexe = $(TESTINGdir)/xsinv dinvexe = $(TESTINGdir)/xdinv cinvexe = $(TESTINGdir)/xcinv zinvexe = $(TESTINGdir)/xzinv sqrexe = $(TESTINGdir)/xsqr dqrexe = $(TESTINGdir)/xdqr cqrexe = $(TESTINGdir)/xcqr zqrexe = $(TESTINGdir)/xzqr slsexe = $(TESTINGdir)/xsls dlsexe = $(TESTINGdir)/xdls clsexe = $(TESTINGdir)/xcls zlsexe = $(TESTINGdir)/xzls smatgen = psmatgen.o pmatgeninc.o dmatgen = pdmatgen.o pmatgeninc.o cmatgen = pcmatgen.o pmatgeninc.o zmatgen = pzmatgen.o pmatgeninc.o slinchk = pslaschk.o pslafchk.o dlinchk = pdlaschk.o pdlafchk.o clinchk = pclaschk.o pclafchk.o zlinchk = pzlaschk.o pzlafchk.o slu = psludriver.o psluinfo.o psgetrrv.o $(smatgen) $(slinchk) dlu = pdludriver.o pdluinfo.o pdgetrrv.o $(dmatgen) $(dlinchk) clu = pcludriver.o pcluinfo.o pcgetrrv.o $(cmatgen) $(clinchk) zlu = pzludriver.o pzluinfo.o pzgetrrv.o $(zmatgen) $(zlinchk) sdblu = psdbdriver.o psdbinfo.o psdblaschk.o psdbmv1.o psbmatgen.o $(smatgen) ddblu = pddbdriver.o pddbinfo.o pddblaschk.o pddbmv1.o pdbmatgen.o $(dmatgen) cdblu = pcdbdriver.o pcdbinfo.o pcdblaschk.o pcdbmv1.o pcbmatgen.o $(cmatgen) zdblu = pzdbdriver.o pzdbinfo.o pzdblaschk.o pzdbmv1.o pzbmatgen.o $(zmatgen) sdtlu = psdtdriver.o psdtinfo.o psdtlaschk.o psdbmv1.o psbmatgen.o $(smatgen) ddtlu = pddtdriver.o pddtinfo.o pddtlaschk.o pddbmv1.o pdbmatgen.o $(dmatgen) cdtlu = pcdtdriver.o pcdtinfo.o pcdtlaschk.o pcdbmv1.o pcbmatgen.o $(cmatgen) zdtlu = pzdtdriver.o pzdtinfo.o pzdtlaschk.o pzdbmv1.o pzbmatgen.o $(zmatgen) sgblu = psgbdriver.o psgbinfo.o psdblaschk.o psgbmv1.o psbmatgen.o $(smatgen) dgblu = pdgbdriver.o pdgbinfo.o pddblaschk.o pdgbmv1.o pdbmatgen.o $(dmatgen) cgblu = pcgbdriver.o pcgbinfo.o pcdblaschk.o pcgbmv1.o pcbmatgen.o $(cmatgen) zgblu = pzgbdriver.o pzgbinfo.o pzdblaschk.o pzgbmv1.o pzbmatgen.o $(zmatgen) sllt = pslltdriver.o pslltinfo.o pspotrrv.o $(smatgen) $(slinchk) dllt = pdlltdriver.o pdlltinfo.o pdpotrrv.o $(dmatgen) $(dlinchk) cllt = pclltdriver.o pclltinfo.o pcpotrrv.o $(cmatgen) $(clinchk) zllt = pzlltdriver.o pzlltinfo.o pzpotrrv.o $(zmatgen) $(zlinchk) spbllt = pspbdriver.o pspbinfo.o pspblaschk.o pspbmv1.o psbmatgen.o $(smatgen) dpbllt = pdpbdriver.o pdpbinfo.o pdpblaschk.o pdpbmv1.o pdbmatgen.o $(dmatgen) cpbllt = pcpbdriver.o pcpbinfo.o pcpblaschk.o pcpbmv1.o pcbmatgen.o $(cmatgen) zpbllt = pzpbdriver.o pzpbinfo.o pzpblaschk.o pzpbmv1.o pzbmatgen.o $(zmatgen) sptllt = psptdriver.o psptinfo.o psptlaschk.o pspbmv1.o psbmatgen.o $(smatgen) dptllt = pdptdriver.o pdptinfo.o pdptlaschk.o pdpbmv1.o pdbmatgen.o $(dmatgen) cptllt = pcptdriver.o pcptinfo.o pcptlaschk.o pcpbmv1.o pcbmatgen.o $(cmatgen) zptllt = pzptdriver.o pzptinfo.o pzptlaschk.o pzpbmv1.o pzbmatgen.o $(zmatgen) sinv = psinvdriver.o psinvinfo.o psinvchk.o $(smatgen) dinv = pdinvdriver.o pdinvinfo.o pdinvchk.o $(dmatgen) cinv = pcinvdriver.o pcinvinfo.o pcinvchk.o $(cmatgen) zinv = pzinvdriver.o pzinvinfo.o pzinvchk.o $(zmatgen) sqr = psqrdriver.o psqrinfo.o psgeqrrv.o psgeqlrv.o psgelqrv.o \ psgerqrv.o pstzrzrv.o pslafchk.o $(smatgen) dqr = pdqrdriver.o pdqrinfo.o pdgeqrrv.o pdgeqlrv.o pdgelqrv.o \ pdgerqrv.o pdtzrzrv.o pdlafchk.o $(dmatgen) cqr = pcqrdriver.o pcqrinfo.o pcgeqrrv.o pcgeqlrv.o pcgelqrv.o \ pcgerqrv.o pctzrzrv.o pclafchk.o $(cmatgen) zqr = pzqrdriver.o pzqrinfo.o pzgeqrrv.o pzgeqlrv.o pzgelqrv.o \ pzgerqrv.o pztzrzrv.o pzlafchk.o $(zmatgen) sls = pslsdriver.o pslsinfo.o psqrt13.o psqrt14.o psqrt16.o \ psqrt17.o $(smatgen) dls = pdlsdriver.o pdlsinfo.o pdqrt13.o pdqrt14.o pdqrt16.o \ pdqrt17.o $(dmatgen) cls = pclsdriver.o pclsinfo.o pcqrt13.o pcqrt14.o pcqrt16.o \ pcqrt17.o $(cmatgen) zls = pzlsdriver.o pzlsinfo.o pzqrt13.o pzqrt14.o pzqrt16.o \ pzqrt17.o $(zmatgen) all : single double complex complex16 single: $(sluexe) $(sdbluexe) $(sdtluexe) $(sgbluexe) $(slltexe) \ $(spblltexe) $(sptlltexe) $(sinvexe) $(sqrexe) $(slsexe) double: $(dluexe) $(ddbluexe) $(ddtluexe) $(dgbluexe) $(dlltexe) \ $(dpblltexe) $(dptlltexe) $(dinvexe) $(dqrexe) $(dlsexe) complex: $(cluexe) $(cdbluexe) $(cdtluexe) $(cgbluexe) $(clltexe) \ $(cpblltexe) $(cptlltexe) $(cinvexe) $(cqrexe) $(clsexe) complex16: $(zluexe) $(zdbluexe) $(zdtluexe) $(zgbluexe) $(zlltexe) \ $(zpblltexe) $(zptlltexe) $(zinvexe) $(zqrexe) $(zlsexe) $(TESTINGdir)/LU.dat: ../LU.dat cp ../LU.dat $(TESTINGdir) $(sluexe): $(SCALAPACKLIB) $(slu) $(F77LOADER) $(F77LOADFLAGS) -o $(sluexe) $(slu) $(LIBS) $(MAKE) $(TESTINGdir)/LU.dat $(dluexe): $(SCALAPACKLIB) $(dlu) $(F77LOADER) $(F77LOADFLAGS) -o $(dluexe) $(dlu) $(LIBS) $(MAKE) $(TESTINGdir)/LU.dat $(cluexe): $(SCALAPACKLIB) $(clu) $(F77LOADER) $(F77LOADFLAGS) -o $(cluexe) $(clu) $(LIBS) $(MAKE) $(TESTINGdir)/LU.dat $(zluexe): $(SCALAPACKLIB) $(zlu) $(F77LOADER) $(F77LOADFLAGS) -o $(zluexe) $(zlu) $(LIBS) $(MAKE) $(TESTINGdir)/LU.dat $(TESTINGdir)/BLU.dat: ../BLU.dat cp ../BLU.dat $(TESTINGdir) $(sdbluexe): $(SCALAPACKLIB) $(sdblu) $(F77LOADER) $(F77LOADFLAGS) -o $(sdbluexe) $(sdblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(ddbluexe): $(SCALAPACKLIB) $(ddblu) $(F77LOADER) $(F77LOADFLAGS) -o $(ddbluexe) $(ddblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(cdbluexe): $(SCALAPACKLIB) $(cdblu) $(F77LOADER) $(F77LOADFLAGS) -o $(cdbluexe) $(cdblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(zdbluexe): $(SCALAPACKLIB) $(zdblu) $(F77LOADER) $(F77LOADFLAGS) -o $(zdbluexe) $(zdblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(sdtluexe): $(SCALAPACKLIB) $(sdtlu) $(F77LOADER) $(F77LOADFLAGS) -o $(sdtluexe) $(sdtlu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(ddtluexe): $(SCALAPACKLIB) $(ddtlu) $(F77LOADER) $(F77LOADFLAGS) -o $(ddtluexe) $(ddtlu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(cdtluexe): $(SCALAPACKLIB) $(cdtlu) $(F77LOADER) $(F77LOADFLAGS) -o $(cdtluexe) $(cdtlu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(zdtluexe): $(SCALAPACKLIB) $(zdtlu) $(F77LOADER) $(F77LOADFLAGS) -o $(zdtluexe) $(zdtlu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(sgbluexe): $(SCALAPACKLIB) $(sgblu) $(F77LOADER) $(F77LOADFLAGS) -o $(sgbluexe) $(sgblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(dgbluexe): $(SCALAPACKLIB) $(dgblu) $(F77LOADER) $(F77LOADFLAGS) -o $(dgbluexe) $(dgblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(cgbluexe): $(SCALAPACKLIB) $(cgblu) $(F77LOADER) $(F77LOADFLAGS) -o $(cgbluexe) $(cgblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(zgbluexe): $(SCALAPACKLIB) $(zgblu) $(F77LOADER) $(F77LOADFLAGS) -o $(zgbluexe) $(zgblu) $(LIBS) $(MAKE) $(TESTINGdir)/BLU.dat $(TESTINGdir)/LLT.dat: ../LLT.dat cp ../LLT.dat $(TESTINGdir) $(slltexe): $(SCALAPACKLIB) $(sllt) $(F77LOADER) $(F77LOADFLAGS) -o $(slltexe) $(sllt) $(LIBS) $(MAKE) $(TESTINGdir)/LLT.dat $(dlltexe): $(SCALAPACKLIB) $(dllt) $(F77LOADER) $(F77LOADFLAGS) -o $(dlltexe) $(dllt) $(LIBS) $(MAKE) $(TESTINGdir)/LLT.dat $(clltexe): $(SCALAPACKLIB) $(cllt) $(F77LOADER) $(F77LOADFLAGS) -o $(clltexe) $(cllt) $(LIBS) $(MAKE) $(TESTINGdir)/LLT.dat $(zlltexe): $(SCALAPACKLIB) $(zllt) $(F77LOADER) $(F77LOADFLAGS) -o $(zlltexe) $(zllt) $(LIBS) $(MAKE) $(TESTINGdir)/LLT.dat $(TESTINGdir)/BLLT.dat: ../BLLT.dat cp ../BLLT.dat $(TESTINGdir) $(spblltexe): $(SCALAPACKLIB) $(spbllt) $(F77LOADER) $(F77LOADFLAGS) -o $(spblltexe) $(spbllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(dpblltexe): $(SCALAPACKLIB) $(dpbllt) $(F77LOADER) $(F77LOADFLAGS) -o $(dpblltexe) $(dpbllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(cpblltexe): $(SCALAPACKLIB) $(cpbllt) $(F77LOADER) $(F77LOADFLAGS) -o $(cpblltexe) $(cpbllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(zpblltexe): $(SCALAPACKLIB) $(zpbllt) $(F77LOADER) $(F77LOADFLAGS) -o $(zpblltexe) $(zpbllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(sptlltexe): $(SCALAPACKLIB) $(sptllt) $(F77LOADER) $(F77LOADFLAGS) -o $(sptlltexe) $(sptllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(dptlltexe): $(SCALAPACKLIB) $(dptllt) $(F77LOADER) $(F77LOADFLAGS) -o $(dptlltexe) $(dptllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(cptlltexe): $(SCALAPACKLIB) $(cptllt) $(F77LOADER) $(F77LOADFLAGS) -o $(cptlltexe) $(cptllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(zptlltexe): $(SCALAPACKLIB) $(zptllt) $(F77LOADER) $(F77LOADFLAGS) -o $(zptlltexe) $(zptllt) $(LIBS) $(MAKE) $(TESTINGdir)/BLLT.dat $(TESTINGdir)/INV.dat: ../INV.dat cp ../INV.dat $(TESTINGdir) $(sinvexe): $(SCALAPACKLIB) $(sinv) $(F77LOADER) $(F77LOADFLAGS) -o $(sinvexe) $(sinv) $(LIBS) $(MAKE) $(TESTINGdir)/INV.dat $(dinvexe): $(SCALAPACKLIB) $(dinv) $(F77LOADER) $(F77LOADFLAGS) -o $(dinvexe) $(dinv) $(LIBS) $(MAKE) $(TESTINGdir)/INV.dat $(cinvexe): $(SCALAPACKLIB) $(cinv) $(F77LOADER) $(F77LOADFLAGS) -o $(cinvexe) $(cinv) $(LIBS) $(MAKE) $(TESTINGdir)/INV.dat $(zinvexe): $(SCALAPACKLIB) $(zinv) $(F77LOADER) $(F77LOADFLAGS) -o $(zinvexe) $(zinv) $(LIBS) $(MAKE) $(TESTINGdir)/INV.dat $(TESTINGdir)/QR.dat: ../QR.dat cp ../QR.dat $(TESTINGdir) $(sqrexe): $(SCALAPACKLIB) $(sqr) $(F77LOADER) $(F77LOADFLAGS) -o $(sqrexe) $(sqr) $(LIBS) $(MAKE) $(TESTINGdir)/QR.dat $(dqrexe): $(SCALAPACKLIB) $(dqr) $(F77LOADER) $(F77LOADFLAGS) -o $(dqrexe) $(dqr) $(LIBS) $(MAKE) $(TESTINGdir)/QR.dat $(cqrexe): $(SCALAPACKLIB) $(cqr) $(F77LOADER) $(F77LOADFLAGS) -o $(cqrexe) $(cqr) $(LIBS) $(MAKE) $(TESTINGdir)/QR.dat $(zqrexe): $(SCALAPACKLIB) $(zqr) $(F77LOADER) $(F77LOADFLAGS) -o $(zqrexe) $(zqr) $(LIBS) $(MAKE) $(TESTINGdir)/QR.dat $(TESTINGdir)/LS.dat: ../LS.dat cp ../LS.dat $(TESTINGdir) $(slsexe): $(SCALAPACKLIB) $(sls) $(F77LOADER) $(F77LOADFLAGS) -o $(slsexe) $(sls) $(LIBS) $(MAKE) $(TESTINGdir)/LS.dat $(dlsexe): $(SCALAPACKLIB) $(dls) $(F77LOADER) $(F77LOADFLAGS) -o $(dlsexe) $(dls) $(LIBS) $(MAKE) $(TESTINGdir)/LS.dat $(clsexe): $(SCALAPACKLIB) $(cls) $(F77LOADER) $(F77LOADFLAGS) -o $(clsexe) $(cls) $(LIBS) $(MAKE) $(TESTINGdir)/LS.dat $(zlsexe): $(SCALAPACKLIB) $(zls) $(F77LOADER) $(F77LOADFLAGS) -o $(zlsexe) $(zls) $(LIBS) $(MAKE) $(TESTINGdir)/LS.dat $(slu): $(FRC) $(dlu): $(FRC) $(clu): $(FRC) $(zlu): $(FRC) $(sdblu): $(FRC) $(ddblu): $(FRC) $(cdblu): $(FRC) $(zdblu): $(FRC) $(sdtlu): $(FRC) $(ddtlu): $(FRC) $(cdtlu): $(FRC) $(zdtlu): $(FRC) $(sllt): $(FRC) $(dllt): $(FRC) $(cllt): $(FRC) $(zllt): $(FRC) $(spbllt): $(FRC) $(dpbllt): $(FRC) $(cpbllt): $(FRC) $(zpbllt): $(FRC) $(sptllt): $(FRC) $(dptllt): $(FRC) $(cptllt): $(FRC) $(zptllt): $(FRC) $(sinv): $(FRC) $(dinv): $(FRC) $(cinv): $(FRC) $(zinv): $(FRC) $(sqr): $(FRC) $(dqr): $(FRC) $(cqr): $(FRC) $(zqr): $(FRC) $(sls): $(FRC) $(dls): $(FRC) $(cls): $(FRC) $(zls): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o psludriver.o: psludriver.f $(F77) $(DRVOPTS) -c $< pdludriver.o: pdludriver.f $(F77) $(DRVOPTS) -c $< pcludriver.o: pcludriver.f $(F77) $(DRVOPTS) -c $< pzludriver.o: pzludriver.f $(F77) $(DRVOPTS) -c $< psdbdriver.o: psdbdriver.f $(F77) $(DRVOPTS) -c $< pddbdriver.o: pddbdriver.f $(F77) $(DRVOPTS) -c $< pcdbdriver.o: pcdbdriver.f $(F77) $(DRVOPTS) -c $< pzdbdriver.o: pzdbdriver.f $(F77) $(DRVOPTS) -c $< psdtdriver.o: psdtdriver.f $(F77) $(DRVOPTS) -c $< pddtdriver.o: pddtdriver.f $(F77) $(DRVOPTS) -c $< pcdtdriver.o: pcdtdriver.f $(F77) $(DRVOPTS) -c $< pzdtdriver.o: pzdtdriver.f $(F77) $(DRVOPTS) -c $< psgbdriver.o: psgbdriver.f $(F77) $(DRVOPTS) -c $< pdgbdriver.o: pdgbdriver.f $(F77) $(DRVOPTS) -c $< pcgbdriver.o: pcgbdriver.f $(F77) $(DRVOPTS) -c $< pzgbdriver.o: pzgbdriver.f $(F77) $(DRVOPTS) -c $< pslltdriver.o: pslltdriver.f $(F77) $(DRVOPTS) -c $< pdlltdriver.o: pdlltdriver.f $(F77) $(DRVOPTS) -c $< pclltdriver.o: pclltdriver.f $(F77) $(DRVOPTS) -c $< pzlltdriver.o: pzlltdriver.f $(F77) $(DRVOPTS) -c $< pspbdriver.o: pspbdriver.f $(F77) $(DRVOPTS) -c $< pdpbdriver.o: pdpbdriver.f $(F77) $(DRVOPTS) -c $< pcpbdriver.o: pcpbdriver.f $(F77) $(DRVOPTS) -c $< pzpbdriver.o: pzpbdriver.f $(F77) $(DRVOPTS) -c $< psptdriver.o: psptdriver.f $(F77) $(DRVOPTS) -c $< pdptdriver.o: pdptdriver.f $(F77) $(DRVOPTS) -c $< pcptdriver.o: pcptdriver.f $(F77) $(DRVOPTS) -c $< pzptdriver.o: pzptdriver.f $(F77) $(DRVOPTS) -c $< psinvdriver.o: psinvdriver.f $(F77) $(DRVOPTS) -c $< pdinvdriver.o: pdinvdriver.f $(F77) $(DRVOPTS) -c $< pcinvdriver.o: pcinvdriver.f $(F77) $(DRVOPTS) -c $< pzinvdriver.o: pzinvdriver.f $(F77) $(DRVOPTS) -c $< psqrdriver.o: psqrdriver.f $(F77) $(DRVOPTS) -c $< pdqrdriver.o: pdqrdriver.f $(F77) $(DRVOPTS) -c $< pcqrdriver.o: pcqrdriver.f $(F77) $(DRVOPTS) -c $< pzqrdriver.o: pzqrdriver.f $(F77) $(DRVOPTS) -c $< pslsdriver.o: pslsdriver.f $(F77) $(DRVOPTS) -c $< pdlsdriver.o: pdlsdriver.f $(F77) $(DRVOPTS) -c $< pclsdriver.o: pclsdriver.f $(F77) $(DRVOPTS) -c $< pzlsdriver.o: pzlsdriver.f $(F77) $(DRVOPTS) -c $< .f.o : ; $(F77) -c $(F77FLAGS) $*.f scalapack-1.8.0/TESTING/LIN/psgbmv1.f0000640000175000017500000007574210363532303016620 0ustar muammarmuammar SUBROUTINE PSGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDBTRF and this is stored in AF. If a linear system * is to be solved using PSDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PSDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PSDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL SGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL STRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL STRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'T' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL SLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL STRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL SLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL STRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL SGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL STRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL STRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PSBsBMV1 * END scalapack-1.8.0/TESTING/LIN/pdludriver.f0000640000175000017500000012504210363532303017406 0ustar muammarmuammar PROGRAM PDLUDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * PDLUDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK LU routines. This test driver performs an LU factorization * and solve. If the input matrix is non-square, only the factorization * is performed. Condition estimation and iterative refinement are * optionally performed. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 2.0, LU factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LU.out' output file name (if any) * 6 device out * 1 number of problems sizes * 31 201 values of M * 31 201 values of N * 1 number of NB's * 2 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 1 4 2 3 8 values of P * 2 4 1 3 2 1 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPOSTPAD, IPPIV, IPREPAD, IPW, IPW2, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIPIV, LIWORK, LWORK, LW2, M, MAXMN, $ MINMN, MP, MYCOL, MYRHS, MYROW, N, NB, NBRHS, $ NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, NPCOL, $ NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, ANORM1, FRESID, NOPS, RCOND, $ SRESID, SRESID2, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ MVAL( NTESTS ), NBRVAL( NTESTS ), $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDGECON, PDGERFS, $ PDGETRF, PDGETRRV, PDGETRS, $ PDLAFCHK, PDLASCHK, PDLUINFO, $ PDMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PDLANGE EXTERNAL ICEIL, ILCM, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PDLUINFO( OUTFILE, NOUT, NMAT, MVAL, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, THRESH, $ EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST .AND. M.EQ.N ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPPIV = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPPIV = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF LIPIV = ICEIL( INTGSZ*( MP+NB ), DBLESZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * IF( CHECK ) THEN * * Calculate the amount of workspace required by the * checking routines PDLANGE, PDGETRRV, and * PDLAFCHK * WORKSIZ = MAX( 2, NQ ) * WORKSIZ = MAX( WORKSIZ, MP*DESCA( NB_ )+ $ NQ*DESCA( MB_ ) ) * WORKSIZ = MAX( WORKSIZ, MP * DESCA( NB_ ) ) * WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate matrix A of Ax = b * CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ MP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, LIPIV, 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) ANORM1 = PDLANGE( '1', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST .AND. M.EQ.N ) THEN CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform LU factorization * CALL PDGETRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDGETRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LU factorization * CALL PDCHEKPAD( ICTXT, 'PDGETRF', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * IF( M.NE.N ) THEN * * For non-square matrices, factorization only * NRHS = 0 NBRHS = 0 * IF( CHECK ) THEN * * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps) * CALL PDGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGETRRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRRV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( (FRESID-FRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) * * M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU * factorization when M >= N * NOPS = DBLE( MAXMN )*( DBLE( MINMN )**2 ) - $ (1.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) - $ (1.0D+0 / 2.0D+0)*( DBLE( MINMN )**2 ) * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * WTIME( 2 ) = 0.0D+0 IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ), $ WTIME( 2 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * CTIME( 2 ) = 0.0D+0 IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ), $ CTIME( 2 ), TMFLOPS, PASSED END IF * ELSE * * If M = N * IF( EST ) THEN * * Calculate workspace required for PDGECON * LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PDGECON( '1', N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LIWORK, INFO ) * IF( CHECK ) THEN CALL PDCHEKPAD( ICTXT, 'PDGECON', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGECON', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGECON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), $ MYCOL, DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + $ IPOSTPAD + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * * Set worksiz: routines requiring most workspace * is PDLASCHK * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PDMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) $ CALL PDFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PDMATGEN( ICTXT, 'No', 'No', $ DESCB( M_ ), DESCB( N_ ), $ DESCB( MB_ ), DESCB( NB_ ), $ MEM( IPB0 ), DESCB( LLD_ ), $ DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, $ 0, MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear sytem via LU factorization * CALL PDGETRS( 'No', N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), MEM( IPB ), $ 1, 1, DESCB, INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGETRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRS', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PDLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( SRESID.LE.THRESH .AND. $ ( SRESID-SRESID ).EQ.0.0D+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PDGERFS * LWORK = MAX( 1, 3*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PDGERFS( 'No', N, NRHS, MEM( IPA0 ), 1, $ 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), $ MEM( IPB0 ), 1, 1, DESCB, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LIWORK, INFO ) * IF( CHECK ) THEN CALL PDCHEKPAD( ICTXT, 'PDGERFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGERFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PDLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 2/3 N^3 - 1/2 N^2 flops for LU factorization * NOPS = (2.0D+0/3.0D+0)*( DBLE(N)**3 ) - $ (1.0D+0/2.0D+0)*( DBLE(N)**2 ) * * nrhs * 2 N^2 flops for LU solve. * NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization * and solve -- for WALL and CPU time, and print * output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * * Print WALL time if supported * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if supported * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED END IF 10 CONTINUE 20 CONTINUE * IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN * * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps) * CALL PDGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGETRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRRV', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF END IF 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ', $ 'Sol Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDLUDRIVER * END scalapack-1.8.0/TESTING/LIN/pcptinfo.f0000640000175000017500000004475010363532303017056 0ustar muammarmuammar SUBROUTINE PCPTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCPTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCPTINFO * END scalapack-1.8.0/TESTING/LIN/pdinvdriver.f0000640000175000017500000007760410430450246017574 0ustar muammarmuammar PROGRAM PDINVDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PDINVDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK matrix inversion routines. This test driver computes the * inverse of different kind of matrix and tests the results. * * The program must be driven by a short data file. An annotated example * of a data file can be obtained by deleting the first 3 characters * from the following 14 lines: * 'ScaLAPACK Matrix Inversion Testing input file' * 'PVM machine.' * 'INV.out' output file name (if any) * 6 device out * 5 number of matrix types (next line) * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD * 4 number of problems sizes * 1000 2000 3000 4000 values of N * 3 number of NB's * 4 30 35 values of NB * 2 number of process grids (ordered P & Q) * 4 2 values of P * 4 4 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER UPLO CHARACTER*3 MTYP CHARACTER*6 PASSED CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPPIV, IPREPAD, IPOSTPAD, IPIW, IPW, ITEMP, J, $ K, KTESTS, KPASS, KFAIL, KSKIP, L, LCM, LIPIV, $ LIWORK, LWORK, MYCOL, MYROW, N, NB, NGRIDS, $ NMAT, NMTYP, NNB, NOUT, NP, NPCOL, NPROCS, $ NPROW, NQ, WORKIINV, WORKINV, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, RCOND, TMFLOPS * .. * .. Local Arrays .. CHARACTER*3 MATTYP( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION MEM( MEMSIZ ), CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDGETRF, PDGETRI, $ PDINVCHK, PDINVINFO, PDLASET, $ PDMATGEN, PDPOTRF, PDPOTRI, $ PDTRTRI, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PDLANGE, PDLANSY, PDLANTR EXTERNAL ICEIL, ILCM, LSAMEN, NUMROC, PDLANGE, $ PDLANSY, PDLANTR * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDINVINFO( OUTFILE, NOUT, NMTYP, MATTYP, NTESTS, NMAT, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different matrix types * DO 40 I = 1, NMTYP * MTYP = MATTYP( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a general matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is an upper triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'LTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a lower triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a symmetric positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the upper triangular part will be '// $ 'referenced.' ELSE IF( LSAMEN( 3, MTYP, 'LPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a symmetric positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the lower triangular part will be '// $ 'referenced.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * N = NVAL( K ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * NB = NBVAL( L ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 * LCM = ILCM( NPROW, NPCOL ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Pivots are needed by LU factorization * IPPIV = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + $ IPREPAD LIPIV = ICEIL( INTGSZ * ( NP + NB ), DBLESZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * LWORK = MAX( 1, NP * DESCA( NB_ ) ) WORKINV = LWORK + IPOSTPAD * * Figure the amount of workspace required by the * general matrix inversion * IF( NPROW.EQ.NPCOL ) THEN LIWORK = NQ + DESCA( NB_ ) ELSE * * change the integer workspace needed for PDGETRI * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) * * $ ICEIL( ICEIL( DESCA( LLD_ ), * $ DESCA( MB_ ) ), LCM / NPROW ) ) * $ + NQ LIWORK = NUMROC( DESCA( M_ ) + $ DESCA( MB_ ) * NPROW $ + MOD ( 1 - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF WORKIINV = ICEIL( LIWORK*INTGSZ, DBLESZ ) + $ IPOSTPAD IPIW = IPW + WORKINV + IPREPAD WORKSIZ = WORKINV + IPREPAD + WORKIINV * ELSE * * No pivots or workspace needed for triangular or * symmetric positive definite matrices. * IPW = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD WORKSIZ = 1 + IPOSTPAD * END IF * IF( CHECK ) THEN * * Figure amount of work space for the norm * computations * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN ITEMP = NQ ELSE ITEMP = 2 * NQ + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), $ LCM / NPROW ) END IF END IF WORKSIZ = MAX( WORKSIZ-IPOSTPAD, ITEMP ) * * Figure the amount of workspace required by the * checking routine * WORKSIZ = MAX( WORKSIZ, 2 * NB * MAX( 1, NP ) ) + $ IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'inversion', $ ( IPW + WORKSIZ ) * DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Generate a general diagonally dominant matrix A * CALL PDMATGEN( ICTXT, 'N', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Generate a symmetric positive definite matrix * CALL PDMATGEN( ICTXT, 'S', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * END IF * * Zeros not-referenced part of A, if any. * IF( LSAMEN( 1, MTYP, 'U' ) ) THEN * UPLO = 'U' CALL PDLASET( 'Lower', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 2, 1, DESCA ) * ELSE IF( LSAMEN( 1, MTYP, 'L' ) ) THEN * UPLO = 'L' CALL PDLASET( 'Upper', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 1, 2, DESCA ) * ELSE * UPLO = 'G' * END IF * * Need 1-norm of A for checking * IF( CHECK ) THEN * CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * CALL PDFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANGE( '1', N, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * ANORM = PDLANTR( '1', UPLO, 'Non unit', N, N, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANTR', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANTR', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * ANORM = PDLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'SY' ) ) THEN * CALL PDFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD,IPOSTPAD, PADVAL ) * END IF * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Perform LU factorization * CALL SLTIMER( 1 ) CALL PDGETRF( N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGETRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PDGETRI( N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ), LWORK, $ MEM( IPIW ), LIWORK, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGETRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRI', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRI', $ WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRI', $ WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PDTRTRI( UPLO, 'Non unit', N, MEM( IPA ), 1, $ 1, DESCA, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDTRTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Perform Cholesky factorization * CALL SLTIMER( 1 ) CALL PDPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the symmetric positive definite matrix * inversion * CALL SLTIMER( 2 ) CALL PDPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPOTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * END IF * IF( CHECK ) THEN * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Compute fresid = || inv(A)*A-I || * CALL PDINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA, $ IASEED, ANORM, FRESID, RCOND, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDINVCHK', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDINVCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. INFO.EQ.0 .AND. $ ( (FRESID-FRESID) .EQ. 0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 IF( INFO.GT.0 ) THEN PASSED = 'SINGUL' ELSE PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only the timing * operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * 2/3 N^3 - 1/2 N^2 flops for LU factorization * NOPS = ( 2.0D+0 / 3.0D+0 )*( DBLE( N )**3 ) - $ ( 1.0D+0 / 2.0D+0 )*( DBLE( N )**2 ) * * 4/3 N^3 - N^2 flops for inversion * NOPS = NOPS + $ ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) - $ DBLE( N )**2 * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * 1/3 N^3 + 2/3 N flops for triangular inversion * CTIME(1) = 0.0D+0 WTIME(1) = 0.0D+0 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N ) ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * 1/3 N^3 + 1/2 N^2 flops for Cholesky * factorization * NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 ) * * 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion * NOPS = NOPS + $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 ) * END IF * * Figure total megaflops -- factorization and * inversion, for WALL and CPU time, and print * output. * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ) + CTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ', $ ' MFLOPS Cond Resid CHECK' ) 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ', $ '----------- ------- ------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2, $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PDINVDRIVER * END scalapack-1.8.0/TESTING/LIN/psgetrrv.f0000640000175000017500000003006610363532303017103 0ustar muammarmuammar SUBROUTINE PSGETRRV( M, N, A, IA, JA, DESCA, IPIV, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSGETRRV reforms sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from the * triangular matrices L and U returned by PSGETRF. It multiplies * an upper triangular matrix stored in the upper triangle of sub( A ) * times the unit lower triangular matrix stored in the lower triangle. * To accomplish this, the routine basically performs the PSGETRF * routine in reverse. * * It computes L*U first, and then apply P: P*L*U => sub( A ). In the * J-th loop, the block column (or column panel), which has the lower * triangular unit matrix L is multiplied with the block row (or row * panel), which contains the upper triangular matrix U. * * ( L1 ) ( 0 0 ) ( L1*U1 L1*U2 ) * A` = L * U + A` = ( ) * (U1 U2) + ( ) = ( ) * ( L2 ) ( 0 A`) ( L2*U1 L2*U2+A` ) * * where L1 is a lower unit triangular matrix and U1 is an upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the distributed matrix sub( A ) * contains the the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. On exit, the original distributed matrix sub( A ) * is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * WORK (local workspace) REAL array of dimension (LWORK) * LWORK >= MpA0 * NB_A + NqA0 * MB_A, where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of columns of L, and a block of * rows of U. INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J, $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ), $ DESCU( DLEN_ ), IDUM( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PSGEMM, PSLACPY, $ PSLAPIV, PSLASET, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPL = 1 IPU = IPL + MP * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Define array descriptors for L and U * MN = MIN( M, N ) IL = MAX( ( ( IA+MN-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) JL = MAX( ( ( JA+MN-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * CALL DESCSET( DESCL, IA+M-IL, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * CALL DESCSET( DESCU, DESCA( MB_ ), JA+N-JL, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) + DESCA( MB_ ) ) * * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+MN-J, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PSLACPY( 'Lower', M-IL+IA, JB, A, IL, J, DESCA, $ WORK( IPL ), 1, 1, DESCL ) CALL PSLASET( 'Upper', M-IL+IA, JB, ZERO, ONE, WORK( IPL ), $ 1, 1, DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PSLACPY( 'Upper', JB, JA+N-J, A, IL, J, DESCA, $ WORK( IPU ), 1, 1, DESCU ) CALL PSLASET( 'Lower', JB-1, JA+N-J, ZERO, ZERO, $ WORK( IPU ), 2, 1, DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PSLASET( 'Lower', IA+M-IL-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Zero the upper triangular piece of the current block. * CALL PSLASET( 'Upper', JB, JA+N-J, ZERO, ZERO, A, IL, J, $ DESCA ) * * Update the matrix sub( A ). * CALL PSGEMM( 'No transpose', 'No transpose', IA+M-IL, $ JA+N-J, JB, ONE, WORK( IPL ), 1, 1, DESCL, $ WORK( IPU ), 1, 1, DESCU, ONE, A, IL, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCL( M_ ) = DESCL( M_ ) + DESCL( MB_ ) DESCL( RSRC_ ) = MOD( DESCL( RSRC_ ) + NPROW - 1, NPROW ) DESCL( CSRC_ ) = MOD( DESCL( CSRC_ ) + NPCOL - 1, NPCOL ) DESCU( N_ ) = DESCU( N_ ) + DESCU( NB_ ) DESCU( RSRC_ ) = DESCL( RSRC_ ) DESCU( CSRC_ ) = DESCL( CSRC_ ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PSLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPL ), $ 1, 1, DESCL ) CALL PSLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPL ), 1, 1, $ DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PSLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPU ), 1, $ 1, DESCU ) CALL PSLASET( 'Lower', JB-1, N, ZERO, ZERO, WORK( IPU ), 2, 1, $ DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PSLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Zero the upper triangular piece of the current block. * CALL PSLASET( 'Upper', JB, N, ZERO, ZERO, A, IA, JA, DESCA ) * * Update the matrix sub( A ). * CALL PSGEMM( 'No transpose', 'No transpose', M, N, JB, ONE, $ WORK( IPL ), 1, 1, DESCL, WORK( IPU ), 1, 1, $ DESCU, ONE, A, IA, JA, DESCA ) * * Apply pivots so that sub( A ) = P*L*U * CALL PSLAPIV( 'Backward', 'Row', 'Col', MIN( M, N ), N, A, IA, JA, $ DESCA, IPIV, IA, 1, DESCIP, IDUM ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSGETRRV * END scalapack-1.8.0/TESTING/LIN/psqrt16.f0000640000175000017500000002510610363532303016546 0ustar muammarmuammar SUBROUTINE PSQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, $ JX, DESCX, B, IB, JB, DESCB, RWORK, RESID ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IX, JA, JB, JX, M, N, NRHS REAL RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) REAL A( * ), B( * ), RWORK( * ), X( * ) * .. * * Purpose * ======= * * PSQRT16 computes the residual for a solution of a system of linear * equations sub( A )*sub( X ) = B or sub( A' )*sub( X ) = B: * RESID = norm(B - sub( A )*sub( X ) ) / * ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ), * where EPS is the machine epsilon, sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes * X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations: * = 'N': sub( A )*sub( X ) = sub( B ) * = 'T': sub( A' )*sub( X )= sub( B ), where A' is the * transpose of sub( A ). * = 'C': sub( A' )*sub( X )= B, where A' is the transpose * of sub( A ). * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) REAL pointer into the local * memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * The original M x N matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) REAL pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This * array contains the local pieces of the computed solution * distributed vectors for the system of linear equations. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input/local output) REAL pointer into * the local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributes right hand side vectors for * the system of linear equations. On exit, sub( B ) is over- * written with the difference sub( B ) - sub( A )*sub( X ) or * sub( B ) - sub( A )'*sub( X ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * RWORK (local workspace) REAL array, dimension (LRWORK) * LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * RESID (global output) REAL * The maximum over the number of right hand sides of * norm( sub( B )- sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL, $ NPROW REAL ANORM, BNORM, EPS, XNORM * .. * .. Local Arrays .. REAL TEMP( 2 ) * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH, PSLANGE EXTERNAL LSAME, PSLAMCH, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSASUM, PSGEMM, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick exit if M = 0 or N = 0 or NRHS = 0 * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN RESID = ZERO RETURN END IF * IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN ANORM = PSLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK ) N1 = N N2 = M ELSE ANORM = PSLANGE( '1', M, N, A, IA, JA, DESCA, RWORK ) N1 = M N2 = N END IF * EPS = PSLAMCH( ICTXT, 'Epsilon' ) * * Compute B - sub( A )*sub( X ) (or B - sub( A' )*sub( X ) ) and * store in B. * CALL PSGEMM( TRANS, 'No transpose', N1, NRHS, N2, -ONE, A, IA, $ JA, DESCA, X, IX, JX, DESCX, ONE, B, IB, JB, DESCB ) * * Compute the maximum over the number of right hand sides of * norm( sub( B ) - sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * RESID = ZERO DO 10 J = 1, NRHS * CALL PSASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 ) CALL PSASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 ) * * Only the process columns owning the vector operands will have * the correct result, the other will have zero. * TEMP( 1 ) = BNORM TEMP( 2 ) = XNORM CALL SGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM, $ -1, -1, IDUMM ) BNORM = TEMP( 1 ) XNORM = TEMP( 2 ) * * Every processes have ANORM, BNORM and XNORM now. * IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN RESID = ZERO ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN RESID = ONE / EPS ELSE RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) / $ ( MAX( M, N )*EPS ) ) END IF * 10 CONTINUE * RETURN * * End of PSQRT16 * END scalapack-1.8.0/TESTING/LIN/pspblaschk.f0000640000175000017500000002542010363532303017357 0ustar muammarmuammar SUBROUTINE PSPBLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PSPBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) REAL pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) REAL array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTRAN, PSMATGEN, $ SGAMX2D, SGEBR2D, SGEBS2D, SGEMM, $ SGERV2D, SGESD2D, SGSUM2D, SLASET * .. * .. External Functions .. INTEGER ISAMAX, NUMROC REAL PSLAMCH EXTERNAL ISAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PSBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BW+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PSBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PSBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PSPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PSMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PSAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PSBLASCHK * END scalapack-1.8.0/TESTING/LIN/pclsinfo.f0000640000175000017500000004077510363532303017054 0ustar muammarmuammar SUBROUTINE PCLSINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBRVAL, LDNBVAL, LDNRVAL, $ LDNVAL, LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, $ NNBR, NNR, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCLSINFO gets needed startup information for LS solve and * transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (workspace) INTEGER array of dimension >= * MAX(5,LDMVAL+LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LS.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 5, 1, WORK, 5 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK min ||Ax-b|| by QR factorizations.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision least-square solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - QR|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used. If CPU and WALL time' WRITE( NOUT, FMT = 9999 ) $ ' are the same, only one line '// $ 'is printed, and the label is ''BOTH''.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'QR time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 5, 1, WORK, 5, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCLSINFO * END scalapack-1.8.0/TESTING/LIN/pclafchk.f0000640000175000017500000002675510363532303017014 0ustar muammarmuammar SUBROUTINE PCLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = (1.0E+0, 0.0E+0) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ II, IIA, IOFFA, IROFF, JB, JJ, JJA, JN, KK, $ LDA, LDW, LDWP1, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CMATADD, INFOG2L, PCMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH, PCLANGE EXTERNAL ICEIL, LSAME, NUMROC, PCLANGE, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDWP1 = LDW + 1 LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * IF( LSAME( AFORM, 'H' ) ) THEN * * Handle first block of columns separately * II = 1 ICURROW = IAROW ICURCOL = IACOL JB = JN - JA + 1 * IF( MYCOL.EQ.ICURCOL ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 10, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) ) 10 CONTINUE END IF CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30, JJ = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-JJ, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK, LDW, DESCA( RSRC_ ), DESCA( CSRC_ ), $ IASEED, IIA-1, MP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 20, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) ) 20 CONTINUE END IF CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * ELSE * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 40 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 )*LDA CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJ-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) 40 CONTINUE * END IF * * Calculate factor residual * FRESID = PCLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PCLAFCHK * END scalapack-1.8.0/TESTING/LIN/psdtlaschk.f0000640000175000017500000003056310363532303017371 0ustar muammarmuammar SUBROUTINE PSDTLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PSDTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'T', A 'Transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) REAL pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) REAL array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTRAN, PSMATGEN, $ SGAMX2D, SGEBR2D, SGEBS2D, SGEMM, $ SGERV2D, SGESD2D, SGSUM2D, SLASET * .. * .. External Functions .. INTEGER ISAMAX, NUMROC REAL PSLAMCH EXTERNAL ISAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PSTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (3), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PSBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PSBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.GT.0 ) THEN CALL SGESD2D( ICTXT, 1, 1, A( START+2), LDA, $ MYROW, MYCOL-1 ) ENDIF * IF( MYCOL.LT.NPCOL-1 ) THEN CALL SGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 220 I=0,DESCA( NB_ )-1 A( START+2+(I)*LDA ) = A( START+2+(I+1)*LDA ) 220 CONTINUE * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, 1, 1, $ A( START+2+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * IF( MYCOL.GT.0 ) THEN CALL SGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PSGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (INT_ONE+2)*INT_ONE, INFO ) * * * Regenerate column of B * CALL PSMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PSAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PSTLASCHK * END scalapack-1.8.0/TESTING/LIN/psbmatgen.f0000640000175000017500000001665210363532303017214 0ustar muammarmuammar SUBROUTINE PSBMATGEN( ICTXT, AFORM, AFORM2, BWL, BWU, N, $ MB, NB, A, $ LDA, IAROW, IACOL, ISEED, $ MYROW, MYCOL, NPROW, NPCOL ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. * .. Scalar Arguments .. CHARACTER*1 AFORM, AFORM2 INTEGER IACOL, IAROW, ICTXT, $ ISEED, LDA, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW, BWL, BWU * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PSBMATGEN : Parallel Real Single precision Band MATrix GENerator. * (Re)Generate a distributed Band matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'L' : A is returned as a symmetric lower * triangular matrix, and is diagonally dominant. * if AFORM = 'U' : A is returned as a symmetric upper * triangular matrix, and is diagonally dominant. * if AFORM = 'G' : A is returned as a general matrix. * if AFORM = 'T' : A is returned as a general matrix in * tridiagonal-compatible form. * * AFORM2 (global input) CHARACTER*1 * if the matrix is general: * if AFORM2 = 'D' : A is returned diagonally dominant. * if AFORM2 != 'D' : A is not returned diagonally dominant. * if the matrix is symmetric or hermitian: * if AFORM2 = 'T' : A is returned in tridiagonally-compatible * form (a transpose form). * if AFORM2 != 'T' : A is returned in banded-compatible form. * * M (global input) INTEGER * The number of nonzero rows in the generated distributed * band matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) REAL, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) + BWL+BWU * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * This code is a simple wrapper around PSMATGEN, for band matrices. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER DIAG_INDEX, I, J, M_MATGEN, NQ, N_MATGEN, $ START_INDEX * .. * .. External Subroutines .. EXTERNAL PSMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC, LSAME * .. * .. Executable Statements .. * * IF( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) THEN M_MATGEN = BWL + 1 N_MATGEN = N START_INDEX = 1 IF( LSAME( AFORM, 'L' ) ) THEN DIAG_INDEX = 1 ELSE DIAG_INDEX = BWL + 1 ENDIF ELSE M_MATGEN = BWL + BWU + 1 N_MATGEN = N DIAG_INDEX = BWU + 1 START_INDEX = 1 ENDIF * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * * Generate a random matrix initially * IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN * CALL PSMATGEN( ICTXT, 'T', 'N', $ N_MATGEN, M_MATGEN, $ NB, M_MATGEN, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, NQ, 0, M_MATGEN, $ MYCOL, MYROW, NPCOL, NPROW ) * ELSE * CALL PSMATGEN( ICTXT, 'N', 'N', $ M_MATGEN, N_MATGEN, $ M_MATGEN, NB, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, M_MATGEN, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Zero out padding at tops of columns * DO 1000 J=1,NB * c write(*,*) 'LDA-M_MATGEN =',LDA-M_MATGEN DO 2000 I=1, LDA-M_MATGEN * * Indexing goes negative; BMATGEN assumes that space * has been preallocated above the first column as it * has to be if the matrix is to be input to * Scalapack's band solvers. * c write(*,*) 'I LDA M_MATGEN and J =',I,LDA,M_MATGEN, J A( I-LDA+M_MATGEN, J ) = ZERO * 2000 CONTINUE * 1000 CONTINUE * ENDIF * IF( LSAME( AFORM2, 'D' ).OR. $ ( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) ) THEN * * Loop over diagonal elements stored on this processor. * * DO 330 I=1, NQ IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN IF( NPROW .EQ. 1 ) THEN A( I, DIAG_INDEX ) = A( I, DIAG_INDEX ) $ + REAL( BWL+BWU+1 ) ENDIF ELSE IF( NPROW .EQ. 1 ) THEN A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) $ + REAL( BWL+BWU+1 ) ENDIF END IF 330 CONTINUE * * ELSE * * Must add elements to keep condition of matrix in check * DO 380 I=1, NQ * IF( NPROW .EQ. 1 ) THEN * IF( MOD(I+MYCOL*NB,2) .EQ. 1 ) THEN A( DIAG_INDEX+1, I ) = A( DIAG_INDEX+1, I ) $ + REAL( BWL+BWU+1 ) * ELSE * A( DIAG_INDEX-1, I ) = A( DIAG_INDEX-1, I ) $ + REAL( BWL+BWU+1 ) ENDIF * ENDIF * 380 CONTINUE * END IF * RETURN * * End of PSBMATGEN * END scalapack-1.8.0/TESTING/LIN/pzdtlaschk.f0000640000175000017500000003075710363532303017405 0ustar muammarmuammar SUBROUTINE PZDTLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PZDTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'C', A 'Conjugate transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, PBZTRAN, $ PZMATGEN, ZGAMX2D, ZGEMM, ZGSUM2D, $ ZLASET * .. * .. External Functions .. INTEGER IZAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PZTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (3), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PZBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PZBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.GT.0 ) THEN CALL ZGESD2D( ICTXT, 1, 1, A( START+2), LDA, $ MYROW, MYCOL-1 ) ENDIF * IF( MYCOL.LT.NPCOL-1 ) THEN CALL ZGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 220 I=0,DESCA( NB_ )-1 A( START+2+(I)*LDA ) = A( START+2+(I+1)*LDA ) 220 CONTINUE * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, 1, 1, $ A( START+2+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * IF( MYCOL.GT.0 ) THEN CALL ZGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PZGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (INT_ONE+2)*INT_ONE, INFO ) * * * Regenerate column of B * CALL PZMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PZAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDZNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDZNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PZTLASCHK * END scalapack-1.8.0/TESTING/LIN/pcqrt14.f0000640000175000017500000003401010363532303016516 0ustar muammarmuammar REAL FUNCTION PCQRT14( TRANS, M, N, NRHS, A, IA, JA, $ DESCA, X, IX, JX, DESCX, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IX, JA, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX A( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PCQRT14 checks whether sub( X ) is in the row space of sub( A ) or * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both * sub( X ) and sub( A ) such that their norms are in the range * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing * triangle, scaled by MAX(M,N,NRHS)*eps. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * = 'N': No transpose, check for sub( X ) in the row space of * sub( A ), * = 'C': Conjugate transpose, check for sub( X ) in row space * of sub( A )'. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( X ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array * contains the local pieces of the M-by-N distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * On entry, this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N', * and the M-by-NRHS distributed submatrix sub( X ) otherwise. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * WORK (local workspace) COMPLEX array dimension (LWORK) * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where * * IF TRANS='N', (LQ fact) * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW, * NPROW ) * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW, * RSRC_A, NPROW ) * LWF = MB_A * ( MB_A + MNRHSP + NQ0 ) * ELSE (QR fact) * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL, * NPCOL ) * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL, * CSRC_A, NPCOL ) * LWF = NB_A * ( NB_A + MP0 + NNRHSQ ) * END IF * * and, * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL TPSD INTEGER IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO, $ IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA, $ JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL, $ MYROW, NPCOL, NPROW, NPW, NQWA, NQW REAL ANRM, ERR, XNRM COMPLEX AMAX * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) REAL RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PCLANGE, PSLAMCH EXTERNAL LSAME, NUMROC, PCLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCMAX1, $ PCCOPY, PCGELQF, PCGEQRF, PCLACGV, $ PCLACPY, PCLASCL, PXERBLA, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PCQRT14 = ZERO * IPWA = 1 IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IWA = IROFFA + 1 JWA = ICOFFA + 1 CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN IF( N.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .FALSE. MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) NQW = NQWA * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA + M JWX = JWA LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) * ELSE IF( LSAME( TRANS, 'C' ) ) THEN IF( M.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .TRUE. MPW = MPWA NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA JWX = JWA + N LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) ELSE CALL PXERBLA( ICTXT, 'PCQRT14', -1 ) RETURN END IF * * Copy and scale sub( A ) * IPTAU = IPWA + MPW*NQW CALL PCLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA, $ JWA, DESCW ) RWORK( 1 ) = ZERO ANRM = PCLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK ) IF( ANRM.NE.ZERO ) $ CALL PCLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA, $ JWA, DESCW, INFO ) * * Copy sub( X ) or sub( X )' into the right place and scale it * IF( TPSD ) THEN * * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work * DO 10 J = 1, NRHS CALL PCCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX, $ JWX+J-1, DESCW, 1 ) 10 CONTINUE XNRM = PCLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PCLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1) * MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPW = IPTAU + MIN( MQW, NQW ) LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) ) CALL PCGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in upper triangle of * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1) * ERR = ZERO IF( N.LT.M ) THEN DO 20 J = JWX, JWA+N+NRHS-1 CALL PCMAX1( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ), $ IWA+N, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 20 CONTINUE END IF CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * ELSE * * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work * DO 30 J = 1, NRHS CALL PCCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), $ IWX+J-1, JWX, DESCW, DESCW( M_ ) ) CALL PCLACGV( N, WORK( IPWA ), IWX+J-1, JWX, DESCW, $ DESCW( M_ ) ) 30 CONTINUE * XNRM = PCLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PCLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1) * NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPW = IPTAU + MIN( MPW, NPW ) LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) ) CALL PCGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in lower triangle in * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1) * ERR = ZERO DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 ) CALL PCMAX1( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ), $ IWX+J-JWA-M, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 40 CONTINUE CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * END IF * PCQRT14 = ERR / ( REAL( MAX( M, N, NRHS ) ) * $ PSLAMCH( ICTXT, 'Epsilon' ) ) * RETURN * * End of PCQRT14 * END scalapack-1.8.0/TESTING/LIN/pcgerqrv.f0000640000175000017500000002375410363532303017066 0ustar muammarmuammar SUBROUTINE PCGERQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGERQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from R, Q * computed by PCGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors R and Q computed * by PCGERQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PCGERQF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JV, K, MYCOL, MYROW, NPCOL, $ NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLACPY, $ PCLARFB, PCLARFT, PCLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+M-K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA - M + K + 1 JV = 1 + N - K + ICOFF * * Compute upper triangular matrix T * CALL PCLARFT( 'Backward', 'Rowwise', N-M+IN-IA+1, IB, A, IA+M-K, $ JA, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', IB, N-M+IN-IA+1, A, IA+M-K, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PCLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict lower triangular part of sub( A ) to get block * column of R * CALL PCLASET( 'All', IB, N-K, ZERO, ZERO, A, IA+M-K, JA, $ DESCA ) CALL PCLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, IA+M-K+1, $ JA+N-K, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', IN-IA+1, N-M+IN-IA+1, IB, WORK( IPV ), 1, $ ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) JV = 1 + N - M + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PCLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', IB, N-M+I+IB-IA, A, I, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PCLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict Lower triangular part of sub( A ) to get * block column of R * CALL PCLASET( 'All', IB, N-M+I-IA, ZERO, ZERO, A, I, JA, $ DESCA ) CALL PCLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, I+1, $ JA+N-M+I-IA, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I+IB-IA, N-M+I+IB-IA, IB, WORK( IPV ), $ 1, ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCGERQRV * END scalapack-1.8.0/TESTING/LIN/pslltinfo.f0000640000175000017500000004214510363532303017242 0ustar muammarmuammar SUBROUTINE PSLLTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, THRESH, EST, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSLLTINFO get needed startup information for LLt factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 7, LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'LLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 2 END IF IF( EST ) THEN WORK( 7 ) = 1 ELSE WORK( 7 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 7, 1, WORK, 7 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Ax=b by LLt factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision LLt factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data is stored in ''Upper'// $ ''' or ''Lower'' portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LLt time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 7, 1, WORK, 7, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF IF( WORK( 7 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSLLTINFO * END scalapack-1.8.0/TESTING/LIN/pdgeqrrv.f0000640000175000017500000002344110363532303017060 0ustar muammarmuammar SUBROUTINE PDGEQRRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQRRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from Q, R * computed by PDGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors Q and R computed * by PDGEQRF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PDGEQRF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, IIA, IPT, IPV, IPW, $ IROFF, IV, J, JB, JJA, JL, JN, K, MP, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLACPY, $ PDLARFB, PDLARFT, PDLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * K = MIN( M, N ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, INDXG2P( JL, DESCA( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, MP ) ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+K-J, DESCA( NB_ ) ) I = IA + J - JA IV = 1 + J - JA + IROFF * * Compute upper triangular matrix T * CALL PDLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'Lower', M-I+IA, JB, A, I, J, DESCA, WORK( IPV ), $ IV, 1, DESCV ) CALL PDLASET( 'Upper', M-I+IA, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get * block column of R * CALL PDLASET( 'Lower', M-I+IA-1, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-I+IA, N-J+JA, JB, WORK( IPV ), IV, 1, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = JN - JA + 1 * * Compute upper triangular matrix T * CALL PDLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IROFF+1, 1, DESCV ) CALL PDLASET( 'Upper', M, JB, ZERO, ONE, WORK, IROFF+1, 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get block * column of R * CALL PDLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', M, $ N, JB, WORK( IPV ), IROFF+1, 1, DESCV, WORK( IPT ), $ A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDGEQRRV * END scalapack-1.8.0/TESTING/LIN/pzgbmv1.f0000640000175000017500000007613110363532303016620 0ustar muammarmuammar SUBROUTINE PZGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDBTRF and this is stored in AF. If a linear system * is to be solved using PZDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL ZGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL ZTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'C' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL ZTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL ZTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL ZGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL ZTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PZBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pcgeqrrv.f0000640000175000017500000002347210363532303017063 0ustar muammarmuammar SUBROUTINE PCGEQRRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQRRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from Q, R * computed by PCGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors Q and R computed * by PCGEQRF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PCGEQRF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, IIA, IPT, IPV, IPW, $ IROFF, IV, J, JB, JJA, JL, JN, K, MP, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLACPY, $ PCLARFB, PCLARFT, PCLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * K = MIN( M, N ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, INDXG2P( JL, DESCA( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, MP ) ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+K-J, DESCA( NB_ ) ) I = IA + J - JA IV = 1 + J - JA + IROFF * * Compute upper triangular matrix T * CALL PCLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'Lower', M-I+IA, JB, A, I, J, DESCA, WORK( IPV ), $ IV, 1, DESCV ) CALL PCLASET( 'Upper', M-I+IA, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get * block column of R * CALL PCLASET( 'Lower', M-I+IA-1, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-I+IA, N-J+JA, JB, WORK( IPV ), IV, 1, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = JN - JA + 1 * * Compute upper triangular matrix T * CALL PCLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IROFF+1, 1, DESCV ) CALL PCLASET( 'Upper', M, JB, ZERO, ONE, WORK, IROFF+1, 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get block * column of R * CALL PCLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', M, $ N, JB, WORK( IPV ), IROFF+1, 1, DESCV, WORK( IPT ), $ A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCGEQRRV * END scalapack-1.8.0/TESTING/LIN/psgbdriver.f0000640000175000017500000007744010363532303017405 0ustar muammarmuammar PROGRAM PSGBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PSGBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by SGB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTMEM INTEGER, default = 2048. * INTMEM is the size of the integer workspace used in this * driver as input as the IPIV vector. It represents an * upper bound on NB, the blocksize of the data * distribution. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER INTMEM PARAMETER ( INTMEM = 2048 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER MEMSIZ, NTESTS, REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER IPIV(INTMEM) INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, $ PSCHEKPAD, PSDBLASCHK, PSFILLPAD, PSGBINFO, $ PSGBTRF, PSGBTRS, PSMATGEN, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PSLANGE EXTERNAL LSAME, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PSGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) $ + (BWL+BWU) NB = MAX( NB, 2*(BWL+BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 * IF( NB.GT.INTMEM ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) THEN WRITE( NOUT,* )'You have chosen an ' $ ,'NB > INTMEM in the driver.' WRITE(NOUT, *)'Please edit the driver ' $ ,'and increase the value of INTMEM' ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2*BWL+2*BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((2*BWL+2*BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, $ (2*BWL+2*BWU+1), NB, 0, 0, $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 1 * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PSLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PSDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PSLANGE( '1', (2*BWL+2*BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PSGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PSGBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, 'PSGBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PSMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PSGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSGBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PSDBLASCHK( 'N', 'N', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl (bwl+bwu) + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE((BWL+BWU))) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST (bwl+bwu)*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE((BWL+BWU))*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST (bwl+bwu)*(bwl+bwu) * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+(bwl+bwu)) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE((BWL+BWU))) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * (BWL+BWU)*(BWL+BWU) ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSGBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/psqrt13.f0000640000175000017500000002045010363532303016540 0ustar muammarmuammar SUBROUTINE PSQRT13( SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, $ WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ISEED, JA, M, N, SCALE REAL NORMA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) REAL A( * ) * .. * * Purpose * ======= * * PSQRT13 generates a full-rank matrix that may be scaled to have * large or small norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCALE (global input) INTEGER * SCALE = 1: normally scaled matrix * SCALE = 2: matrix scaled up * SCALE = 3: matrix scaled down * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local output) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * NORMA (global output) REAL * The one-norm of A. * * ISEED (global input/global output) INTEGER * Seed for random number generator. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= Nq0, where * * ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), and * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IIA, INFO, $ IROFFA, J, JJA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL AJJ, ASUM, BIGNUM, SMLNUM * .. * .. External Functions .. INTEGER NUMROC REAL PSLAMCH, PSLANGE EXTERNAL NUMROC, PSLAMCH, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSLABAD, PSLASCL, $ PSMATGEN, PSASUM, PSELGET, PSELSET * .. * .. Intrinsic Functions .. INTRINSIC MOD, SIGN * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * generate the matrix * IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * CALL PSMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), A, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED, IIA-1, MP, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * DO 10 J = JA, JA+N-1 I = IA + J - JA IF( I.LE.IA+M-1 ) THEN CALL PSASUM( M, ASUM, A, IA, J, DESCA, 1 ) CALL PSELGET( 'Column', ' ', AJJ, A, I, J, DESCA ) AJJ = AJJ + SIGN( ASUM, AJJ ) CALL PSELSET( A, I, J, DESCA, AJJ ) END IF 10 CONTINUE * * scaled versions * IF( SCALE.NE.1 ) THEN * NORMA = PSLANGE( 'M', M, N, A, IA, JA, DESCA, WORK ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'Epsilon' ) BIGNUM = ONE / SMLNUM * IF( SCALE.EQ.2 ) THEN * * matrix scaled up * CALL PSLASCL( 'General', NORMA, BIGNUM, M, N, A, IA, $ JA, DESCA, INFO ) * ELSE IF( SCALE.EQ.3 ) THEN * * matrix scaled down * CALL PSLASCL( 'General', NORMA, SMLNUM, M, N, A, IA, $ JA, DESCA, INFO ) * END IF * END IF * NORMA = PSLANGE( 'One-norm', M, N, A, IA, JA, DESCA, WORK ) * RETURN * * End of PSQRT13 * END scalapack-1.8.0/TESTING/LIN/pcdtinfo.f0000640000175000017500000004436110363532303017040 0ustar muammarmuammar SUBROUTINE PCDTINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCDTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCDTINFO * END scalapack-1.8.0/TESTING/LIN/pdpbinfo.f0000640000175000017500000004472410363532303017036 0ustar muammarmuammar SUBROUTINE PDPBINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDPBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDPBINFO * END scalapack-1.8.0/TESTING/LIN/pcqrdriver.f0000640000175000017500000012377510363532303017422 0ustar muammarmuammar PROGRAM PCQRDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PCQRDRIVER is the main test program for the COMPLEX * SCALAPACK QR factorization routines. This test driver performs a QR * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ * (complete unitary factorization) factorization and checks the * results. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 16 lines: * 'ScaLAPACK QR factorizations input file' * 'PVM machine' * 'QR.out' output file name (if any) * 6 device out * 6 number of factorizations * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of MB's and NB's * 4 3 5 values of MB * 4 7 3 values of NB * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ, REALSZ and CPLXSZ indicate the length in bytes on * the given platform for an integer, a single precision real * and a single precision complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, INTGSZ = 4, REALSZ = 4, $ TOTMEM = 2000000, MEMSIZ = TOTMEM / CPLXSZ, $ NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER*2 FACT CHARACTER*6 PASSED CHARACTER*7 ROUT CHARACTER*8 ROUTCHK CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPPIV, IPREPAD, IPTAU, IPRW, IPW, J, $ K, KFAIL, KPASS, KSKIP, KTESTS, L, LIPIV, $ LRWORK, LTAU, LWORK, M, MAXMN, MB, MINMN, MNP, $ MNQ, MP, MYCOL, MYROW, N, NB, NFACT, NGRIDS, $ NMAT, NNB, NOUT, NPCOL, NPROCS, NPROW, NQ, $ WORKFCT, WORKRFCT, WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Arrays .. CHARACTER*2 FACTOR( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), MBVAL( NTESTS ), $ MVAL( NTESTS ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCGELQF, PCGELQRV, $ PCGEQLF, PCGEQLRV, PCGEQPF, $ PCQPPIV, PCGEQRF, PCGEQRRV, $ PCGERQF, PCGERQRV, PCTZRZRV, $ PCMATGEN, PCLAFCHK, PCQRINFO, $ PCTZRZF, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC REAL PCLANGE EXTERNAL ICEIL, LSAMEN, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCQRINFO( OUTFILE, NOUT, NFACT, FACTOR, NTESTS, NMAT, MVAL, $ NTESTS, NVAL, NTESTS, NNB, MBVAL, NTESTS, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different factorization types * DO 40 I = 1, NFACT * FACT = FACTOR( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 2, FACT, 'QR' ) ) THEN ROUT = 'PCGEQRF' ROUTCHK = 'PCGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN ROUT = 'PCGEQLF' ROUTCHK = 'PCGEQLRV' WRITE( NOUT, FMT = 9986 ) $ 'QL factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN ROUT = 'PCGELQF' ROUTCHK = 'PCGELQRV' WRITE( NOUT, FMT = 9986 ) $ 'LQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN ROUT = 'PCGERQF' ROUTCHK = 'PCGERQRV' WRITE( NOUT, FMT = 9986 ) $ 'RQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN ROUT = 'PCGEQPF' ROUTCHK = 'PCGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization with column pivoting tests.' ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN ROUT = 'PCTZRZF' ROUTCHK = 'PCTZRZRV' WRITE( NOUT, FMT = 9986 ) $ 'Complete unitary factorization tests.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * M = MVAL( K ) N = NVAL( K ) * * Make sure matrix information is correct * IERR(1) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * MB = MBVAL( L ) NB = NBVAL( L ) * * Make sure mb is legal * IERR( 1 ) = 0 IF( MB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MB', 'MB', MB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, MB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), MB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( MB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, MB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IPTAU = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * LTAU = MNQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCGEQRRV and * PCLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * LTAU = NQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QL * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCGEQLRV and * PCLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * LTAU = MNP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the LQ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCGELQRV and * PCLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCGERQRV and * PCLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * LTAU = MNQ IPPIV = IPTAU + LTAU + IPOSTPAD + IPREPAD LIPIV = ICEIL( INTGSZ*NQ, CPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the * factorization i.e from IPW on. * LWORK = MAX( 3, MP + MAX( 1, NQ ) ) WORKFCT = LWORK + IPOSTPAD LRWORK = MAX( 1, 2 * NQ ) WORKRFCT = ICEIL( LRWORK*REALSZ, CPLXSZ ) + $ IPOSTPAD IPRW = IPW + WORKFCT + IPREPAD WORKSIZ = WORKFCT + IPREPAD + WORKRFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCGEQRRV, * PCLANGE. * WORKSIZ = MAX( WORKSIZ - IPOSTPAD, $ DESCA( NB_ )*( 2*MP + NQ + DESCA( NB_ ) ) ) + $ IPOSTPAD END IF * ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the TZ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCLAFCHK, PCTZRZRV and * PCLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ FACT // ' factorization', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PCMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need the Infinity of A for checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PCFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PCFILLPAD( ICTXT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANGE( 'I', M, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PCFILLPAD( ICTXT, WORKRFCT-IPOSTPAD, 1, $ MEM( IPRW-IPREPAD ), $ WORKRFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PCFILLPAD( ICTXT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform QR factorizations * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN CALL SLTIMER( 1 ) CALL PCGEQRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN CALL SLTIMER( 1 ) CALL PCGEQLF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN CALL SLTIMER( 1 ) CALL PCGELQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN CALL SLTIMER( 1 ) CALL PCGERQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL SLTIMER( 1 ) CALL PCGEQPF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPTAU ), $ MEM( IPW ), LWORK, MEM( IPRW ), $ LRWORK, INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN CALL SLTIMER( 1 ) IF( N.GE.M ) $ CALL PCTZRZF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, ROUT, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, ROUT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PCCHEKPAD( ICTXT, ROUT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, ROUT, WORKRFCT-IPOSTPAD, $ 1, MEM( IPRW-IPREPAD ), $ WORKRFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PCCHEKPAD( ICTXT, ROUT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * * Compute residual = ||A-Q*R|| / (||A||*N*eps) * CALL PCGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * * Compute residual = ||A-Q*L|| / (||A||*N*eps) * CALL PCGEQLRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * * Compute residual = ||A-L*Q|| / (||A||*N*eps) * CALL PCGELQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * * Compute residual = ||A-R*Q|| / (||A||*N*eps) * CALL PCGERQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * * Compute residual = ||AP-Q*R|| / (||A||*N*eps) * CALL PCGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * * Compute residual = ||A-T*Z|| / (||A||*N*eps) * IF( N.GE.M ) THEN CALL PCTZRZRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) END IF CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) END IF * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, ROUTCHK, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, ROUTCHK, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, ROUTCHK, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * CALL PCQPPIV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCQPPIV', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQPPIV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCLAFCHK', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLAFCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Test residual and detect NaN result * IF( LSAMEN( 2, FACT, 'TZ' ) .AND. N.LT.M ) THEN KSKIP = KSKIP + 1 PASSED = 'BYPASS' ELSE IF( FRESID.LE.THRESH .AND. $ (FRESID-FRESID).EQ.0.0E+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MINMN = MIN( M, N ) MAXMN = MAX( M, N ) * IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN IF( M.GE.N ) THEN NOPS = 0.0D+0 ELSE * * 9 ( M^2 N - M^3 ) + 13 M N - M^2 for * complete unitary factorization (M <= N). * NOPS = 9.0D+0 * ( $ DBLE( N )*( DBLE( M )**2 ) - $ DBLE( M )**3 ) + $ 13.0D+0*DBLE( N )*DBLE( M ) - $ DBLE( M )**2 END IF * ELSE * * 8 M N^2 - 8/3 N^2 + 6 M N + 8 N^2 for QR type * factorization when M >= N. * NOPS = 8.0D+0 * ( DBLE( MINMN )**2 ) * $ ( DBLE( MAXMN )-DBLE( MINMN ) / 3.0D+0 ) + $ ( 6.0D+0 * DBLE( MAXMN ) + $ 8.0D+0 * DBLE( MINMN ) ) * $ DBLE( MINMN ) END IF * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, MB, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, $ PASSED, FRESID * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, MB, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, $ PASSED, FRESID * END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ', $ ' MFLOPS CHECK Residual' ) 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ', $ '----------- ------ --------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, A6, 2X, G8.1 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PCQRDRIVER * END * SUBROUTINE PCQPPIV( M, N, A, IA, JA, DESCA, IPIV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots * returned by PCGEQPF in reverse order for checking purposes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be permuted. On exit, the local pieces * of the distributed permuted submatrix sub( A ) * Inv( P ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL, $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, IGERV2D, $ IGESD2D, IGAMN2D, INFOG1L, PCSWAP * .. * .. External Functions .. INTEGER INDXL2G, NUMROC EXTERNAL INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG1L( JA, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), JJA, $ IACOL ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * DO 20 J = JA, JA+N-2 * IPVT = JA+N-1 ITMP = JA+N * * Find first the local minimum candidate for pivoting * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, IACOL ) DO 10 KK = JJ, JJA+NQ-1 IF( IPIV( KK ).LT.IPVT )THEN IITMP = KK IPVT = IPIV( KK ) END IF 10 CONTINUE * * Find the global minimum pivot * CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, IPVT, 1, IPROW, $ IPCOL, 1, -1, MYCOL ) * * Broadcast the corresponding index to the other process columns * IF( MYCOL.EQ.IPCOL ) THEN ITMP = INDXL2G( IITMP, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1 ) IF( IPCOL.NE.IACOL ) THEN CALL IGERV2D( ICTXT, 1, 1, IPIV( IITMP ), 1, MYROW, $ IACOL ) ELSE IF( MYCOL.EQ.IACOL ) $ IPIV( IITMP ) = IPIV( JJ ) END IF ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1, MYROW, $ IPCOL ) IF( MYCOL.EQ.IACOL .AND. IPCOL.NE.IACOL ) $ CALL IGESD2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, IPCOL ) END IF * * Swap the columns of A * CALL PCSWAP( M, A, IA, ITMP, DESCA, 1, A, IA, J, DESCA, 1 ) * 20 CONTINUE * * End of PCQPPIV * END scalapack-1.8.0/TESTING/LIN/pzlsinfo.f0000640000175000017500000004077510363532303017103 0ustar muammarmuammar SUBROUTINE PZLSINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBRVAL, LDNBVAL, LDNRVAL, $ LDNVAL, LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, $ NNBR, NNR, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZLSINFO gets needed startup information for LS solve and * transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (workspace) INTEGER array of dimension >= * MAX(5,LDMVAL+LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LS.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 5, 1, WORK, 5 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK min ||Ax-b|| by QR factorizations.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision least-square solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - QR|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used. If CPU and WALL time' WRITE( NOUT, FMT = 9999 ) $ ' are the same, only one line '// $ 'is printed, and the label is ''BOTH''.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'QR time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 5, 1, WORK, 5, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZLSINFO * END scalapack-1.8.0/TESTING/LIN/pcludriver.f0000640000175000017500000012530210363532303017404 0ustar muammarmuammar PROGRAM PCLUDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * PCLUDRIVER is the main test program for the COMPLEX * SCALAPACK LU routines. This test driver performs an LU factorization * and solve. If the input matrix is non-square, only the factorization * is performed. Condition estimation and iterative refinement are * optionally performed. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 2.0, LU factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LU.out' output file name (if any) * 6 device out * 1 number of problems sizes * 31 201 values of M * 31 201 values of N * 1 number of NB's * 2 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 1 4 2 3 8 values of P * 2 4 1 3 2 1 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL ZERO COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, INTGSZ = 4, REALSZ = 4, $ TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPOSTPAD, IPPIV, IPREPAD, IPW, IPW2, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIPIV, LRWORK, LWORK, LW2, M, MAXMN, $ MINMN, MP, MYCOL, MYRHS, MYROW, N, NB, NBRHS, $ NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, NPCOL, $ NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL ANORM, ANORM1, FRESID, RCOND, SRESID, SRESID2, $ THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ MVAL( NTESTS ), NBRVAL( NTESTS ), $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCGECON, PCGERFS, $ PCGETRF, PCGETRRV, PCGETRS, $ PCLAFCHK, PCLASCHK, PCLUINFO, $ PCMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, ILCM, NUMROC REAL PCLANGE EXTERNAL ICEIL, ILCM, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PCLUINFO( OUTFILE, NOUT, NMAT, MVAL, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, THRESH, $ EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST .AND. M.EQ.N ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPPIV = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPPIV = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF LIPIV = ICEIL( INTGSZ*( MP+NB ), CPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * IF( CHECK ) THEN * * Calculate the amount of workspace required by the * checking routines PCLANGE, PCGETRRV, and * PCLAFCHK * WORKSIZ = MAX( 2, NQ ) * WORKSIZ = MAX( WORKSIZ, MP*DESCA( NB_ )+ $ NQ*DESCA( MB_ ) ) * WORKSIZ = MAX( WORKSIZ, MP * DESCA( NB_ ) ) * WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate matrix A of Ax = b * CALL PCMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ MP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, LIPIV, 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) ANORM1 = PCLANGE( '1', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST .AND. M.EQ.N ) THEN CALL PCMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PCFILLPAD( ICTXT, MP, NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform LU factorization * CALL PCGETRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCGETRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LU factorization * CALL PCCHEKPAD( ICTXT, 'PCGETRF', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * IF( M.NE.N ) THEN * * For non-square matrices, factorization only * NRHS = 0 NBRHS = 0 * IF( CHECK ) THEN * * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps) * CALL PCGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGETRRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRRV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( (FRESID-FRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) * * 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU * factorization M >= N * NOPS = 4.0D+0*DBLE(MAXMN)*(DBLE(MINMN)**2) - $ (4.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) + $ (2.0D+0)*DBLE( MAXMN )*DBLE( MINMN ) - $ (3.0D+0)*( DBLE( MINMN )**2 ) * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * WTIME( 2 ) = 0.0D+0 IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ), $ WTIME( 2 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * CTIME( 2 ) = 0.0D+0 IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ), $ CTIME( 2 ), TMFLOPS, PASSED END IF * ELSE * * If M = N * IF( EST ) THEN * * Calculate workspace required for PCGECON * LWORK = MAX( 1, 2*NP ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, 2*NQ ) LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PCGECON( '1', N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LRWORK, INFO ) * IF( CHECK ) THEN CALL PCCHEKPAD( ICTXT, 'PCGECON', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGECON', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGECON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), $ MYCOL, DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + $ IPOSTPAD + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * * Set worksiz: routines requiring most workspace * is PCLASCHK * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PCMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) $ CALL PCFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PCMATGEN( ICTXT, 'No', 'No', $ DESCB( M_ ), DESCB( N_ ), $ DESCB( MB_ ), DESCB( NB_ ), $ MEM( IPB0 ), DESCB( LLD_ ), $ DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, $ 0, MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear sytem via LU factorization * CALL PCGETRS( 'No', N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), MEM( IPB ), $ 1, 1, DESCB, INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGETRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRS', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PCLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( SRESID.LE.THRESH .AND. $ ( SRESID-SRESID ).EQ.0.0E+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PCGERFS * LWORK = MAX( 1, 2*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, NP ) LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PCGERFS( 'No', N, NRHS, MEM( IPA0 ), 1, $ 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), $ MEM( IPB0 ), 1, 1, DESCB, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LRWORK, INFO ) * IF( CHECK ) THEN CALL PCCHEKPAD( ICTXT, 'PCGERFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGERFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PCLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 8/3 N^3 - N^2 flops for LU factorization * NOPS = (8.0D+0/3.0D+0)*( DBLE(N)**3 ) - $ DBLE(N)**2 * * nrhs * 8 N^2 flops for LU solve. * NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization * and solve -- for WALL and CPU time, and print * output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * * Print WALL time if supported * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if supported * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED END IF 10 CONTINUE 20 CONTINUE * IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN * * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps) * CALL PCGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGETRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRRV', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF END IF 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ', $ 'Sol Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCLUDRIVER * END scalapack-1.8.0/TESTING/LIN/pcinvinfo.f0000640000175000017500000003525710363532303017231 0ustar muammarmuammar SUBROUTINE PCINVINFO( SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, $ IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDMTYP, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NMTYP, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*3 MATTYP( LDMTYP ) CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCINVINFO gets needed startup information for matrix inversion * tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMTYP (global output) INTEGER * The number of different matrix types to be tested. * * MATTYP (global output) CHARACTER*3 array of dimension of LDMTYP, * The types of matrix to be generated: * if MATTYP(i) = 'GEN' then GENeral matrix, * if MATTYP(i) = 'UTR' then Upper TRiangular matrix, * if MATTYP(i) = 'LTR' then Lower TRiangular matrix, * if MATTYP(i) = 'UPD' then (Upper) hermitian Pos. Definite, * if MATTYP(i) = 'LPD' then (Lower) hermitian Pos. Definite. * * LDMTYP (global input) INTEGER * The maximum number of different matrix types to be tested. * LDMTYP >= NMTYP. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDMTYP+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN REAL PSLAMCH EXTERNAL LSAMEN, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='INV.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NMTYP IF( NMTYP.LT.1 .OR. NMTYP.GT.LDMTYP ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of matrix types', LDMTYP GO TO 40 END IF READ( NIN, FMT = * ) ( MATTYP( I ), I = 1, NMTYP ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NMTYP CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NMTYP IF( LSAMEN( 3, MATTYP( K ), 'GEN' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UTR' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LTR' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UPD' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LPD' ) ) THEN WORK( I ) = 5 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Matrix Inversion routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision Matrix Inversion '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Inverse residual = ||inv(A)*A - I|| '// $ '/ (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less '// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'Fct time : Time in seconds to factor the'// $ ' matrix, if needed.' WRITE( NOUT, FMT = 9999 ) $ 'Inv Time : Time in seconds to inverse the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and inverse.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NMTYP = WORK( 4 ) * I = NMTYP+NMAT+NNB+2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NMTYP IF( WORK( K ).EQ.1 ) THEN MATTYP( K ) = 'GEN' ELSE IF( WORK( K ).EQ.2 ) THEN MATTYP( K ) = 'UTR' ELSE IF( WORK( K ).EQ.3 ) THEN MATTYP( K ) = 'LTR' ELSE IF( WORK( K ).EQ.4 ) THEN MATTYP( K ) = 'UPD' ELSE IF( WORK( K ).EQ.5 ) THEN MATTYP( K ) = 'LPD' END IF 30 CONTINUE * I = NMTYP + 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCINVINFO * END scalapack-1.8.0/TESTING/LIN/pcpblaschk.f0000640000175000017500000002551310363532303017342 0ustar muammarmuammar SUBROUTINE PCPBLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PCPBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D, $ CLASET, PBCTRAN, PCMATGEN, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICAMAX, NUMROC REAL PSLAMCH EXTERNAL ICAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PCBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BW+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PCBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PCBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PCPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PCMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PCAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSCNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSCNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PCBLASCHK * END scalapack-1.8.0/TESTING/LIN/pdqrdriver.f0000640000175000017500000012165710363532303017420 0ustar muammarmuammar PROGRAM PDQRDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PDQRDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK QR factorization routines. This test driver performs a QR * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ * (complete orthogonal factorization) factorization and checks the * results. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 16 lines: * 'ScaLAPACK QR factorizations input file' * 'PVM machine' * 'QR.out' output file name (if any) * 6 device out * 6 number of factorizations * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of MB's and NB's * 4 3 5 values of MB * 4 7 3 values of NB * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0 ) * .. * .. Local Scalars .. CHARACTER*2 FACT CHARACTER*6 PASSED CHARACTER*7 ROUT CHARACTER*8 ROUTCHK CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPPIV, IPREPAD, IPTAU, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, L, LIPIV, LTAU, $ LWORK, M, MAXMN, MB, MINMN, MNP, MNQ, MP, $ MYCOL, MYROW, N, NB, NFACT, NGRIDS, NMAT, NNB, $ NOUT, NPCOL, NPROCS, NPROW, NQ, WORKFCT, $ WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Arrays .. CHARACTER*2 FACTOR( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), MBVAL( NTESTS ), $ MVAL( NTESTS ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDGELQF, PDGELQRV, $ PDGEQLF, PDGEQLRV, PDGEQPF, $ PDQPPIV, PDGEQRF, PDGEQRRV, $ PDGERQF, PDGERQRV, PDTZRZRV, $ PDMATGEN, PDLAFCHK, PDQRINFO, $ PDTZRZF, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLANGE EXTERNAL ICEIL, LSAMEN, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDQRINFO( OUTFILE, NOUT, NFACT, FACTOR, NTESTS, NMAT, MVAL, $ NTESTS, NVAL, NTESTS, NNB, MBVAL, NTESTS, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different factorization types * DO 40 I = 1, NFACT * FACT = FACTOR( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 2, FACT, 'QR' ) ) THEN ROUT = 'PDGEQRF' ROUTCHK = 'PDGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN ROUT = 'PDGEQLF' ROUTCHK = 'PDGEQLRV' WRITE( NOUT, FMT = 9986 ) $ 'QL factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN ROUT = 'PDGELQF' ROUTCHK = 'PDGELQRV' WRITE( NOUT, FMT = 9986 ) $ 'LQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN ROUT = 'PDGERQF' ROUTCHK = 'PDGERQRV' WRITE( NOUT, FMT = 9986 ) $ 'RQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN ROUT = 'PDGEQPF' ROUTCHK = 'PDGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization with column pivoting tests.' ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN ROUT = 'PDTZRZF' ROUTCHK = 'PDTZRZRV' WRITE( NOUT, FMT = 9986 ) $ 'Complete orthogonal factorization tests.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * M = MVAL( K ) N = NVAL( K ) * * Make sure matrix information is correct * IERR(1) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * MB = MBVAL( L ) NB = NBVAL( L ) * * Make sure mb is legal * IERR( 1 ) = 0 IF( MB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MB', 'MB', MB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, MB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), MB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( MB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, MB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IPTAU = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * LTAU = MNQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDGEQRRV and * PDLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * LTAU = NQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QL * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDGEQLRV and * PDLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * LTAU = MNP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the LQ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDGELQRV and * PDLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDGERQRV and * PDLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * LTAU = MNQ IPPIV = IPTAU + LTAU + IPOSTPAD + IPREPAD LIPIV = ICEIL( INTGSZ*NQ, DBLESZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the * factorization i.e from IPW on. * LWORK = MAX( 3, MP + MAX( 1, NQ ) ) + 2 * NQ WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDGEQRRV, * PDLANGE. * WORKSIZ = MAX( WORKSIZ - IPOSTPAD, $ DESCA( NB_ )*( 2*MP + NQ + DESCA( NB_ ) ) ) + $ IPOSTPAD END IF * ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the TZ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDLAFCHK, PDTZRZRV and * PDLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ FACT // ' factorization', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PDMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need the Infinity of A for checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PDFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PDFILLPAD( ICTXT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANGE( 'I', M, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform QR factorizations * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN CALL SLTIMER( 1 ) CALL PDGEQRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN CALL SLTIMER( 1 ) CALL PDGEQLF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN CALL SLTIMER( 1 ) CALL PDGELQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN CALL SLTIMER( 1 ) CALL PDGERQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL SLTIMER( 1 ) CALL PDGEQPF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPTAU ), $ MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN CALL SLTIMER( 1 ) IF( N.GE.M ) $ CALL PDTZRZF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, ROUT, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, ROUT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PDCHEKPAD( ICTXT, ROUT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PDCHEKPAD( ICTXT, ROUT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * * Compute residual = ||A-Q*R|| / (||A||*N*eps) * CALL PDGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * * Compute residual = ||A-Q*L|| / (||A||*N*eps) * CALL PDGEQLRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * * Compute residual = ||A-L*Q|| / (||A||*N*eps) * CALL PDGELQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * * Compute residual = ||A-R*Q|| / (||A||*N*eps) * CALL PDGERQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * * Compute residual = ||AP-Q*R|| / (||A||*N*eps) * CALL PDGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * * Compute residual = ||A-T*Z|| / (||A||*N*eps) * IF( N.GE.M ) THEN CALL PDTZRZRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) END IF CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) END IF * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, ROUTCHK, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, ROUTCHK, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, ROUTCHK, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * CALL PDQPPIV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDQPPIV', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQPPIV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDLAFCHK', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLAFCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Test residual and detect NaN result * IF( LSAMEN( 2, FACT, 'TZ' ) .AND. N.LT.M ) THEN KSKIP = KSKIP + 1 PASSED = 'BYPASS' ELSE IF( FRESID.LE.THRESH .AND. $ (FRESID-FRESID).EQ.0.0D+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MINMN = MIN( M, N ) MAXMN = MAX( M, N ) * IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN IF( M.GE.N ) THEN NOPS = 0.0D+0 ELSE * * 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for * complete orthogonal factorization (M <= N). * NOPS = ( 5.0D+0 * ( $ DBLE( N )*( DBLE( M )**2 ) - $ DBLE( M )**3 + $ DBLE( N )*DBLE( M ) ) + $ DBLE( M )**2 ) / 2.0D+0 END IF * ELSE * * 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type * factorization when M >= N. * NOPS = 2.0D+0 * ( DBLE( MINMN )**2 ) * $ ( DBLE( MAXMN )-DBLE( MINMN ) / 3.0D+0 ) + $ ( DBLE( MAXMN )+DBLE( MINMN ) )*DBLE( MINMN ) END IF * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, MB, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, $ PASSED, FRESID * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, MB, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, $ PASSED, FRESID * END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ', $ ' MFLOPS CHECK Residual' ) 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ', $ '----------- ------ --------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, A6, 2X, G8.1 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PDQRDRIVER * END * SUBROUTINE PDQPPIV( M, N, A, IA, JA, DESCA, IPIV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots * returned by PDGEQPF in reverse order for checking purposes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be permuted. On exit, the local pieces * of the distributed permuted submatrix sub( A ) * Inv( P ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL, $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, IGERV2D, $ IGESD2D, IGAMN2D, INFOG1L, PDSWAP * .. * .. External Functions .. INTEGER INDXL2G, NUMROC EXTERNAL INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG1L( JA, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), JJA, $ IACOL ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * DO 20 J = JA, JA+N-2 * IPVT = JA+N-1 ITMP = JA+N * * Find first the local minimum candidate for pivoting * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, IACOL ) DO 10 KK = JJ, JJA+NQ-1 IF( IPIV( KK ).LT.IPVT )THEN IITMP = KK IPVT = IPIV( KK ) END IF 10 CONTINUE * * Find the global minimum pivot * CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, IPVT, 1, IPROW, $ IPCOL, 1, -1, MYCOL ) * * Broadcast the corresponding index to the other process columns * IF( MYCOL.EQ.IPCOL ) THEN ITMP = INDXL2G( IITMP, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1 ) IF( IPCOL.NE.IACOL ) THEN CALL IGERV2D( ICTXT, 1, 1, IPIV( IITMP ), 1, MYROW, $ IACOL ) ELSE IF( MYCOL.EQ.IACOL ) $ IPIV( IITMP ) = IPIV( JJ ) END IF ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1, MYROW, $ IPCOL ) IF( MYCOL.EQ.IACOL .AND. IPCOL.NE.IACOL ) $ CALL IGESD2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, IPCOL ) END IF * * Swap the columns of A * CALL PDSWAP( M, A, IA, ITMP, DESCA, 1, A, IA, J, DESCA, 1 ) * 20 CONTINUE * * End of PDQPPIV * END scalapack-1.8.0/TESTING/LIN/pdqrt16.f0000640000175000017500000002520210363532303016524 0ustar muammarmuammar SUBROUTINE PDQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, $ JX, DESCX, B, IB, JB, DESCB, RWORK, RESID ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IX, JA, JB, JX, M, N, NRHS DOUBLE PRECISION RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) DOUBLE PRECISION A( * ), B( * ), RWORK( * ), X( * ) * .. * * Purpose * ======= * * PDQRT16 computes the residual for a solution of a system of linear * equations sub( A )*sub( X ) = B or sub( A' )*sub( X ) = B: * RESID = norm(B - sub( A )*sub( X ) ) / * ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ), * where EPS is the machine epsilon, sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes * X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations: * = 'N': sub( A )*sub( X ) = sub( B ) * = 'T': sub( A' )*sub( X )= sub( B ), where A' is the * transpose of sub( A ). * = 'C': sub( A' )*sub( X )= B, where A' is the transpose * of sub( A ). * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local * memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * The original M x N matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) DOUBLE PRECISION pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This * array contains the local pieces of the computed solution * distributed vectors for the system of linear equations. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributes right hand side vectors for * the system of linear equations. On exit, sub( B ) is over- * written with the difference sub( B ) - sub( A )*sub( X ) or * sub( B ) - sub( A )'*sub( X ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * RESID (global output) DOUBLE PRECISION * The maximum over the number of right hand sides of * norm( sub( B )- sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL, $ NPROW DOUBLE PRECISION ANORM, BNORM, EPS, XNORM * .. * .. Local Arrays .. DOUBLE PRECISION TEMP( 2 ) * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL LSAME, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDASUM, PDGEMM * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick exit if M = 0 or N = 0 or NRHS = 0 * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN RESID = ZERO RETURN END IF * IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN ANORM = PDLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK ) N1 = N N2 = M ELSE ANORM = PDLANGE( '1', M, N, A, IA, JA, DESCA, RWORK ) N1 = M N2 = N END IF * EPS = PDLAMCH( ICTXT, 'Epsilon' ) * * Compute B - sub( A )*sub( X ) (or B - sub( A' )*sub( X ) ) and * store in B. * CALL PDGEMM( TRANS, 'No transpose', N1, NRHS, N2, -ONE, A, IA, $ JA, DESCA, X, IX, JX, DESCX, ONE, B, IB, JB, DESCB ) * * Compute the maximum over the number of right hand sides of * norm( sub( B ) - sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * RESID = ZERO DO 10 J = 1, NRHS * CALL PDASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 ) CALL PDASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 ) * * Only the process columns owning the vector operands will have * the correct result, the other will have zero. * TEMP( 1 ) = BNORM TEMP( 2 ) = XNORM CALL DGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM, $ -1, -1, IDUMM ) BNORM = TEMP( 1 ) XNORM = TEMP( 2 ) * * Every processes have ANORM, BNORM and XNORM now. * IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN RESID = ZERO ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN RESID = ONE / EPS ELSE RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) / $ ( MAX( M, N )*EPS ) ) END IF * 10 CONTINUE * RETURN * * End of PDQRT16 * END scalapack-1.8.0/TESTING/LIN/pdlafchk.f0000640000175000017500000002226010363532303017000 0ustar muammarmuammar SUBROUTINE PDLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, IROFF, $ JB, JJ, JJA, JN, LDA, LDW, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DMATADD, INFOG2L, PDMATGEN * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL DMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 10 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 ) * LDA CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJ-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL DMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) 10 CONTINUE * * Calculate factor residual * FRESID = PDLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PDLAFCHK * END scalapack-1.8.0/TESTING/LIN/psdbmv1.f0000640000175000017500000007574210363532303016615 0ustar muammarmuammar SUBROUTINE PSGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDBTRF and this is stored in AF. If a linear system * is to be solved using PSDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PSDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PSDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL SGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL STRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL STRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'T' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL SLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL STRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL SLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL STRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL SGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL SCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL STRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL SGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL SCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL STRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL STRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL SGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL SGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL SAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL SGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL SAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PSBsBMV1 * END scalapack-1.8.0/TESTING/LIN/pdptinfo.f0000640000175000017500000004474510363532303017063 0ustar muammarmuammar SUBROUTINE PDPTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDPTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDPTINFO * END scalapack-1.8.0/TESTING/LIN/pspbinfo.f0000640000175000017500000004472410363532303017055 0ustar muammarmuammar SUBROUTINE PSPBINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSPBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSPBINFO * END scalapack-1.8.0/TESTING/LIN/pcdtlaschk.f0000640000175000017500000003067010363532303017350 0ustar muammarmuammar SUBROUTINE PCDTLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PCDTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'C', A 'Conjugate transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D, $ CLASET, PBCTRAN, PCMATGEN, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICAMAX, NUMROC REAL PSLAMCH EXTERNAL ICAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PCTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (3), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PCBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PCBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.GT.0 ) THEN CALL CGESD2D( ICTXT, 1, 1, A( START+2), LDA, $ MYROW, MYCOL-1 ) ENDIF * IF( MYCOL.LT.NPCOL-1 ) THEN CALL CGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 220 I=0,DESCA( NB_ )-1 A( START+2+(I)*LDA ) = A( START+2+(I+1)*LDA ) 220 CONTINUE * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL CGERV2D( ICTXT, 1, 1, $ A( START+2+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * IF( MYCOL.GT.0 ) THEN CALL CGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PCGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (INT_ONE+2)*INT_ONE, INFO ) * * * Regenerate column of B * CALL PCMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PCAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSCNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSCNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PCTLASCHK * END scalapack-1.8.0/TESTING/LIN/pzdbdriver.f0000640000175000017500000007634410363532303017413 0ustar muammarmuammar PROGRAM PZDBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PZDBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by ZDB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER MEMSIZ, NTESTS, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, $ PZCHEKPAD, PZDBINFO, PZDBLASCHK, PZDBTRF, $ PZDBTRS, PZFILLPAD, PZMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PZLANGE EXTERNAL LSAME, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PZDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 ) $ + MAX(BWL,BWU) NB = MAX( NB, 2*MAX(BWL,BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BWL+BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((BWL+BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BWL+BWU+1), N, $ (BWL+BWU+1), NB, 0, 0, $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BWL+BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BWL+BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PZLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PZDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N, $ (BWL+BWU+1), NB, MEM( IPA ), $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) * CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PZLANGE( '1', (BWL+BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PZDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PZDBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, 'PZDBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PZMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PZDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZDBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZDBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PZDBLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl bwu + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(BWU)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+bwu) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU)) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*bwu * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*bwu flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*BWU*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*BWU*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+bwu) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(BWU)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*BWU ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZDBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzdtdriver.f0000640000175000017500000007665010363532303017435 0ustar muammarmuammar PROGRAM PZDTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PZDTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by ZDT. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER MEMSIZ, NTESTS, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, $ PZCHEKPAD, PZDTINFO, PZDTLASCHK, PZDTTRF, $ PZDTTRS, PZFILLPAD, PZMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PZLANGE EXTERNAL LSAME, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PZDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = 1 IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = 1 IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (3), (3), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((3)+10) IMIDPAD = 10 IPOSTPAD = ((3)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (3), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((3)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(3) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL+3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((3)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PZLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PZDTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PZFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PZLANGE( 'I', N, $ (3), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PZDTTRF( N, MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1, $ DESCA, MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW ), IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PZDTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, 'PZDTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PZMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PZDTTRS( TRANS, N, NRHS, $ MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZDTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZDTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (3), N, $ (3), NB, 0, 0, $ ICTXT, (3), IERR( 1 ) ) CALL PZDTLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl INT_ONE + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(INT_ONE)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE)) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*INT_ONE * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*INT_ONE flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*INT_ONE*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*INT_ONE*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+INT_ONE) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(INT_ONE)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*INT_ONE ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZDTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pdlltdriver.f0000640000175000017500000011022610363532303017557 0ustar muammarmuammar PROGRAM PDLLTDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PDLLTDRIVER is the main test program for the DOUBLE PRECISION * ScaLAPACK Cholesky routines. This test driver performs an * A = L*L**T or A = U**T*U factorization and solve, and optionally * performs condition estimation and iterative refinement. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'ScaLAPACK LLt factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LLT.out' output file name (if any) * 6 device out * 'U' define Lower or Upper * 1 number of problems sizes * 31 100 200 values of N * 1 number of NB's * 2 10 24 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 Number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 values of P * 2 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPREPAD, IPOSTPAD, IPW, IPW2, ITEMP, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIWORK, LWORK, LW2, MYCOL, MYRHS, MYROW, N, NB, $ NBRHS, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, ANORM1, FRESID, NOPS, RCOND, $ SRESID, SRESID2, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGSUM2D, BLACS_PINFO, PDCHEKPAD, PDFILLPAD, $ PDLAFCHK, PDLASCHK, PDLLTINFO, $ PDMATGEN, PDPOCON, PDPORFS, $ PDPOTRF, PDPOTRRV, PDPOTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PDLANSY EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PDLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PDLLTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPW = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF * * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PDLAFCHK, PDPOTRRV, and * PDLANSY * WORKSIZ = NP * DESCA( NB_ ) * WORKSIZ = MAX( WORKSIZ, DESCA( MB_ ) * DESCA( NB_ ) ) * LCM = ILCM( NPROW, NPCOL ) ITEMP = MAX( 2, 2 * NQ ) + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), LCM / NPROW ) END IF WORKSIZ = MAX( WORKSIZ, ITEMP ) WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate a symmetric positive definite matrix A * CALL PDMATGEN( ICTXT, 'Symm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) ANORM1 = PDLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN CALL PDMATGEN( ICTXT, 'Symm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PDFILLPAD( ICTXT, NP, NQ, $ MEM( IPA0-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform LLt factorization * CALL SLTIMER( 1 ) * CALL PDPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDPOTRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 60 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LLt factorization * CALL PDCHEKPAD( ICTXT, 'PDPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN * * Calculate workspace required for PDPOCON * LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 60 END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PDPOCON( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LIWORK, INFO ) * IF( CHECK ) THEN CALL PDCHEKPAD( ICTXT, 'PDPOCON', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPOCON', $ LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPOCON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), MYCOL, $ DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + IPOSTPAD $ + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PDLASCHK * LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PDMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, NPCOL ) * IF( CHECK ) $ CALL PDFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PDMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB0 ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear system via Cholesky factorization * CALL PDPOTRS( UPLO, N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB ), 1, 1, DESCB, $ INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPOTRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPOTRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PDLASCHK( 'Symm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PDPORFS * LWORK = MAX( 1, 3*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, $ 1, MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PDPORFS( UPLO, N, NRHS, MEM( IPA0 ), $ 1, 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB0 ), 1, 1, $ DESCB, MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LIWORK, INFO ) * * check for memory overwrite * IF( CHECK ) THEN CALL PDCHEKPAD( ICTXT, 'PDPORFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDPORFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PDLASCHK( 'Symm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 1/3 N^3 + 1/2 N^2 flops for LLt factorization * NOPS = (DBLE(N)**3)/3.0D+0 + $ (DBLE(N)**2)/2.0D+0 * * nrhs * 2 N^2 flops for LLt solve. * NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization and * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED * END IF 10 CONTINUE 20 CONTINUE * IF( CHECK .AND. SRESID.GT.THRESH ) THEN * * Compute FRESID = ||A - LL'|| / (||A|| * N * eps) * CALL PDPOTRRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDLAFCHK( 'Symm', 'Diag', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPOTRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( IAM.EQ.0 ) THEN IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9986 ) 'L*L''', FRESID ELSE WRITE( NOUT, FMT = 9986 ) 'U''*U', FRESID END IF END IF END IF * 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB NRHS NBRHS P Q LLt Time ', $ 'Slv Time MFLOPS CHECK' ) 9994 FORMAT( '---- ---- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 4X, A1, 1X, I5, 1X, I3, 1X, I4, 1X, I5, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDLLTDRIVER * END scalapack-1.8.0/TESTING/LIN/psgerqrv.f0000640000175000017500000002360710363532303017103 0ustar muammarmuammar SUBROUTINE PSGERQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGERQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from R, Q * computed by PSGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors R and Q computed * by PSGERQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PSGERQF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JV, K, MYCOL, MYROW, NPCOL, $ NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLACPY, $ PSLARFB, PSLARFT, PSLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+M-K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA - M + K + 1 JV = 1 + N - K + ICOFF * * Compute upper triangular matrix T * CALL PSLARFT( 'Backward', 'Rowwise', N-M+IN-IA+1, IB, A, IA+M-K, $ JA, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', IB, N-M+IN-IA+1, A, IA+M-K, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PSLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict lower triangular part of sub( A ) to get block * column of R * CALL PSLASET( 'All', IB, N-K, ZERO, ZERO, A, IA+M-K, JA, $ DESCA ) CALL PSLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, IA+M-K+1, $ JA+N-K, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ IN-IA+1, N-M+IN-IA+1, IB, WORK( IPV ), 1, ICOFF+1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) JV = 1 + N - M + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PSLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', IB, N-M+I+IB-IA, A, I, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PSLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict Lower triangular part of sub( A ) to get * block column of R * CALL PSLASET( 'All', IB, N-M+I-IA, ZERO, ZERO, A, I, JA, $ DESCA ) CALL PSLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, I+1, $ JA+N-M+I-IA, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I+IB-IA, N-M+I+IB-IA, IB, WORK( IPV ), 1, $ ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSGERQRV * END scalapack-1.8.0/TESTING/LIN/pslsinfo.f0000640000175000017500000004077210363532303017071 0ustar muammarmuammar SUBROUTINE PSLSINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBRVAL, LDNBVAL, LDNRVAL, $ LDNVAL, LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, $ NNBR, NNR, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSLSINFO gets needed startup information for LS solve and * transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (workspace) INTEGER array of dimension >= * MAX(5,LDMVAL+LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LS.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 5, 1, WORK, 5 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK min ||Ax-b|| by QR factorizations.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision least-square solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - QR|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used. If CPU and WALL time' WRITE( NOUT, FMT = 9999 ) $ ' are the same, only one line '// $ 'is printed, and the label is ''BOTH''.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'QR time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 5, 1, WORK, 5, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSLSINFO * END scalapack-1.8.0/TESTING/LIN/pzinvinfo.f0000640000175000017500000003525710363532303017260 0ustar muammarmuammar SUBROUTINE PZINVINFO( SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, $ IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDMTYP, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NMTYP, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*3 MATTYP( LDMTYP ) CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZINVINFO gets needed startup information for matrix inversion * tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMTYP (global output) INTEGER * The number of different matrix types to be tested. * * MATTYP (global output) CHARACTER*3 array of dimension of LDMTYP, * The types of matrix to be generated: * if MATTYP(i) = 'GEN' then GENeral matrix, * if MATTYP(i) = 'UTR' then Upper TRiangular matrix, * if MATTYP(i) = 'LTR' then Lower TRiangular matrix, * if MATTYP(i) = 'UPD' then (Upper) hermitian Pos. Definite, * if MATTYP(i) = 'LPD' then (Lower) hermitian Pos. Definite. * * LDMTYP (global input) INTEGER * The maximum number of different matrix types to be tested. * LDMTYP >= NMTYP. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDMTYP+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN DOUBLE PRECISION PDLAMCH EXTERNAL LSAMEN, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='INV.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NMTYP IF( NMTYP.LT.1 .OR. NMTYP.GT.LDMTYP ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of matrix types', LDMTYP GO TO 40 END IF READ( NIN, FMT = * ) ( MATTYP( I ), I = 1, NMTYP ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NMTYP CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NMTYP IF( LSAMEN( 3, MATTYP( K ), 'GEN' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UTR' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LTR' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UPD' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LPD' ) ) THEN WORK( I ) = 5 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Matrix Inversion routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision Matrix Inversion '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Inverse residual = ||inv(A)*A - I|| '// $ '/ (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less '// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'Fct time : Time in seconds to factor the'// $ ' matrix, if needed.' WRITE( NOUT, FMT = 9999 ) $ 'Inv Time : Time in seconds to inverse the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and inverse.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NMTYP = WORK( 4 ) * I = NMTYP+NMAT+NNB+2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NMTYP IF( WORK( K ).EQ.1 ) THEN MATTYP( K ) = 'GEN' ELSE IF( WORK( K ).EQ.2 ) THEN MATTYP( K ) = 'UTR' ELSE IF( WORK( K ).EQ.3 ) THEN MATTYP( K ) = 'LTR' ELSE IF( WORK( K ).EQ.4 ) THEN MATTYP( K ) = 'UPD' ELSE IF( WORK( K ).EQ.5 ) THEN MATTYP( K ) = 'LPD' END IF 30 CONTINUE * I = NMTYP + 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZINVINFO * END scalapack-1.8.0/TESTING/LIN/pdmatgen.f0000640000175000017500000004312410363532303017025 0ustar muammarmuammar SUBROUTINE PDMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDMATGEN : Parallel Real Double precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) DOUBLE PRECISION, pointer into the local * memory to an array of dimension ( LDA, * ) containing the * local pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDRAND EXTERNAL ICEIL, NUMROC, LSAME, PDRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = ONE - TWO*PDRAND(0) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = ONE - TWO*PDRAND(0) 30 CONTINUE A(IK,JK) = ONE - TWO*PDRAND(0) DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = ONE - TWO*PDRAND(0) A(IK,JK+J) = A(IK+J,JK) 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 A(IK,JK) = ONE - TWO*PDRAND(0) JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 A(IK,JK) = ONE - TWO*PDRAND(0) JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = ONE - TWO*PDRAND(0) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 A(IK,JK+J) = ABS(A(IK,JK+J)) + MAXMN IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PDMATGEN * END scalapack-1.8.0/TESTING/LIN/psluinfo.f0000640000175000017500000004107610363532303017071 0ustar muammarmuammar SUBROUTINE PSLUINFO( SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, EST, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDNVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ WORK( * ) * .. * * Purpose * ======= * * PSLUINFO gets needed startup information for LU factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M and N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * M and N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 6, 2*LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL ) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LU.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( EST ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 6, 1, WORK, 6 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Ax=b by LU factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision LU factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LU time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 6, 1, WORK, 6, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSLUINFO * END scalapack-1.8.0/TESTING/LIN/pzmatgen.f0000640000175000017500000004635310363532303017062 0ustar muammarmuammar SUBROUTINE PZMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZMATGEN : Parallel Complex Double precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX*16, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB DOUBLE PRECISION DUMMY * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDRAND EXTERNAL ICEIL, NUMROC, LSAME, PDRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = DCMPLX( PDRAND(0), PDRAND(0) ) 30 CONTINUE IF( SYMM ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), ZERO ) DUMMY = PDRAND(0) END IF DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IF( HERM ) THEN A(IK,JK+J) = DCONJG( A(IK+J,JK) ) ELSE A(IK,JK+J) = A(IK+J,JK) END IF 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 IF( SYMM ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ TWO*PDRAND(0) - ONE ) END IF JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 IF( TRAN ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ TWO*PDRAND(0) - ONE ) END IF JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 IF( HERM ) THEN A(IK,JK+J) = DCMPLX( $ ABS(DBLE(A(IK,JK+J)))+2*MAXMN, ZERO ) ELSE A(IK,JK+J)= DCMPLX( ABS(DBLE(A(IK,JK+J)))+MAXMN, $ ABS(DIMAG(A(IK,JK+J)))+ MAXMN ) END IF IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PZMATGEN * END scalapack-1.8.0/TESTING/LIN/pcgbinfo.f0000640000175000017500000004434010363532303017016 0ustar muammarmuammar SUBROUTINE PCGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCGBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCGBINFO * END scalapack-1.8.0/TESTING/LIN/pzqrdriver.f0000640000175000017500000012401010363532303017430 0ustar muammarmuammar PROGRAM PZQRDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PZQRDRIVER is the main test program for the COMPLEX*16 * SCALAPACK QR factorization routines. This test driver performs a QR * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ * (complete unitary factorization) factorization and checks the * results. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 16 lines: * 'ScaLAPACK QR factorizations input file' * 'PVM machine' * 'QR.out' output file name (if any) * 6 device out * 6 number of factorizations * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of MB's and NB's * 4 3 5 values of MB * 4 7 3 values of NB * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ, DBLESZ and ZPLXSZ indicate the length in bytes on * the given platform for an integer, a double precision real * and a double precision complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ ZPLXSZ = 16, MEMSIZ = TOTMEM / ZPLXSZ, $ NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER*2 FACT CHARACTER*6 PASSED CHARACTER*7 ROUT CHARACTER*8 ROUTCHK CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPPIV, IPREPAD, IPTAU, IPRW, IPW, J, $ K, KFAIL, KPASS, KSKIP, KTESTS, L, LIPIV, $ LRWORK, LTAU, LWORK, M, MAXMN, MB, MINMN, MNP, $ MNQ, MP, MYCOL, MYROW, N, NB, NFACT, NGRIDS, $ NMAT, NNB, NOUT, NPCOL, NPROCS, NPROW, NQ, $ WORKFCT, WORKRFCT, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Arrays .. CHARACTER*2 FACTOR( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), MBVAL( NTESTS ), $ MVAL( NTESTS ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZGELQF, PZGELQRV, $ PZGEQLF, PZGEQLRV, PZGEQPF, $ PZQPPIV, PZGEQRF, PZGEQRRV, $ PZGERQF, PZGERQRV, PZTZRZRV, $ PZMATGEN, PZLAFCHK, PZQRINFO, $ PZTZRZF, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC DOUBLE PRECISION PZLANGE EXTERNAL ICEIL, LSAMEN, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZQRINFO( OUTFILE, NOUT, NFACT, FACTOR, NTESTS, NMAT, MVAL, $ NTESTS, NVAL, NTESTS, NNB, MBVAL, NTESTS, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different factorization types * DO 40 I = 1, NFACT * FACT = FACTOR( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 2, FACT, 'QR' ) ) THEN ROUT = 'PZGEQRF' ROUTCHK = 'PZGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN ROUT = 'PZGEQLF' ROUTCHK = 'PZGEQLRV' WRITE( NOUT, FMT = 9986 ) $ 'QL factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN ROUT = 'PZGELQF' ROUTCHK = 'PZGELQRV' WRITE( NOUT, FMT = 9986 ) $ 'LQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN ROUT = 'PZGERQF' ROUTCHK = 'PZGERQRV' WRITE( NOUT, FMT = 9986 ) $ 'RQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN ROUT = 'PZGEQPF' ROUTCHK = 'PZGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization with column pivoting tests.' ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN ROUT = 'PZTZRZF' ROUTCHK = 'PZTZRZRV' WRITE( NOUT, FMT = 9986 ) $ 'Complete unitary factorization tests.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * M = MVAL( K ) N = NVAL( K ) * * Make sure matrix information is correct * IERR(1) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * MB = MBVAL( L ) NB = NBVAL( L ) * * Make sure mb is legal * IERR( 1 ) = 0 IF( MB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MB', 'MB', MB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, MB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), MB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( MB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, MB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IPTAU = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * LTAU = MNQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZGEQRRV and * PZLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * LTAU = NQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QL * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZGEQLRV and * PZLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * LTAU = MNP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the LQ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZGELQRV and * PZLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZGERQRV and * PZLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * LTAU = MNQ IPPIV = IPTAU + LTAU + IPOSTPAD + IPREPAD LIPIV = ICEIL( INTGSZ*NQ, ZPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the * factorization i.e from IPW on. * LWORK = MAX( 3, MP + MAX( 1, NQ ) ) WORKFCT = LWORK + IPOSTPAD LRWORK = MAX( 1, 2 * NQ ) WORKRFCT = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + $ IPOSTPAD IPRW = IPW + WORKFCT + IPREPAD WORKSIZ = WORKFCT + IPREPAD + WORKRFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZGEQRRV, * PZLANGE. * WORKSIZ = MAX( WORKSIZ - IPOSTPAD, $ DESCA( NB_ )*( 2*MP + NQ + DESCA( NB_ ) ) ) + $ IPOSTPAD END IF * ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the TZ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZLAFCHK, PZTZRZRV and * PZLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ FACT // ' factorization', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need the Infinity of A for checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PZFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PZFILLPAD( ICTXT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANGE( 'I', M, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PZFILLPAD( ICTXT, WORKRFCT-IPOSTPAD, 1, $ MEM( IPRW-IPREPAD ), $ WORKRFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PZFILLPAD( ICTXT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform QR factorizations * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN CALL SLTIMER( 1 ) CALL PZGEQRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN CALL SLTIMER( 1 ) CALL PZGEQLF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN CALL SLTIMER( 1 ) CALL PZGELQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN CALL SLTIMER( 1 ) CALL PZGERQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL SLTIMER( 1 ) CALL PZGEQPF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPTAU ), $ MEM( IPW ), LWORK, MEM( IPRW ), $ LRWORK, INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN CALL SLTIMER( 1 ) IF( N.GE.M ) $ CALL PZTZRZF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, ROUT, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, ROUT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PZCHEKPAD( ICTXT, ROUT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, ROUT, WORKRFCT-IPOSTPAD, $ 1, MEM( IPRW-IPREPAD ), $ WORKRFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PZCHEKPAD( ICTXT, ROUT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * * Compute residual = ||A-Q*R|| / (||A||*N*eps) * CALL PZGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * * Compute residual = ||A-Q*L|| / (||A||*N*eps) * CALL PZGEQLRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * * Compute residual = ||A-L*Q|| / (||A||*N*eps) * CALL PZGELQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * * Compute residual = ||A-R*Q|| / (||A||*N*eps) * CALL PZGERQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * * Compute residual = ||AP-Q*R|| / (||A||*N*eps) * CALL PZGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * * Compute residual = ||A-T*Z|| / (||A||*N*eps) * IF( N.GE.M ) THEN CALL PZTZRZRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) END IF CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) END IF * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, ROUTCHK, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, ROUTCHK, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, ROUTCHK, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * CALL PZQPPIV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZQPPIV', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQPPIV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZLAFCHK', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLAFCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Test residual and detect NaN result * IF( LSAMEN( 2, FACT, 'TZ' ) .AND. N.LT.M ) THEN KSKIP = KSKIP + 1 PASSED = 'BYPASS' ELSE IF( FRESID.LE.THRESH .AND. $ (FRESID-FRESID).EQ.0.0D+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MINMN = MIN( M, N ) MAXMN = MAX( M, N ) * IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN IF( M.GE.N ) THEN NOPS = 0.0D+0 ELSE * * 9 ( M^2 N - M^3 ) + 13 M N - M^2 for * complete unitary factorization (M <= N). * NOPS = 9.0D+0 * ( $ DBLE( N )*( DBLE( M )**2 ) - $ DBLE( M )**3 ) + $ 13.0D+0*DBLE( N )*DBLE( M ) - $ DBLE( M )**2 END IF * ELSE * * 8 M N^2 - 8/3 N^2 + 6 M N + 8 N^2 for QR type * factorization when M >= N. * NOPS = 8.0D+0 * ( DBLE( MINMN )**2 ) * $ ( DBLE( MAXMN )-DBLE( MINMN ) / 3.0D+0 ) + $ ( 6.0D+0 * DBLE( MAXMN ) + $ 8.0D+0 * DBLE( MINMN ) ) * $ DBLE( MINMN ) END IF * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, MB, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, $ PASSED, FRESID * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, MB, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, $ PASSED, FRESID * END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ', $ ' MFLOPS CHECK Residual' ) 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ', $ '----------- ------ --------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, A6, 2X, G8.1 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PZQRDRIVER * END * SUBROUTINE PZQPPIV( M, N, A, IA, JA, DESCA, IPIV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots * returned by PZGEQPF in reverse order for checking purposes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be permuted. On exit, the local pieces * of the distributed permuted submatrix sub( A ) * Inv( P ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL, $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, IGERV2D, $ IGESD2D, IGAMN2D, INFOG1L, PZSWAP * .. * .. External Functions .. INTEGER INDXL2G, NUMROC EXTERNAL INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG1L( JA, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), JJA, $ IACOL ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * DO 20 J = JA, JA+N-2 * IPVT = JA+N-1 ITMP = JA+N * * Find first the local minimum candidate for pivoting * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, IACOL ) DO 10 KK = JJ, JJA+NQ-1 IF( IPIV( KK ).LT.IPVT )THEN IITMP = KK IPVT = IPIV( KK ) END IF 10 CONTINUE * * Find the global minimum pivot * CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, IPVT, 1, IPROW, $ IPCOL, 1, -1, MYCOL ) * * Broadcast the corresponding index to the other process columns * IF( MYCOL.EQ.IPCOL ) THEN ITMP = INDXL2G( IITMP, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1 ) IF( IPCOL.NE.IACOL ) THEN CALL IGERV2D( ICTXT, 1, 1, IPIV( IITMP ), 1, MYROW, $ IACOL ) ELSE IF( MYCOL.EQ.IACOL ) $ IPIV( IITMP ) = IPIV( JJ ) END IF ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1, MYROW, $ IPCOL ) IF( MYCOL.EQ.IACOL .AND. IPCOL.NE.IACOL ) $ CALL IGESD2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, IPCOL ) END IF * * Swap the columns of A * CALL PZSWAP( M, A, IA, ITMP, DESCA, 1, A, IA, J, DESCA, 1 ) * 20 CONTINUE * * End of PZQPPIV * END scalapack-1.8.0/TESTING/LIN/pcpbmv1.f0000640000175000017500000007314310363532303016602 0ustar muammarmuammar SUBROUTINE PCPBDCMV( LDBW, BW, UPLO, N, A, JA, DESCA, NRHS, B, IB, $ DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPBTRF and this is stored in AF. If a linear system * is to be solved using PCPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, $ FIRST_PROC, I, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, J, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, OFST, PART_OFFSET, $ PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Pre-calculate bw^2 * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCPBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (BW+2)*BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( UPLO, 'L' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL CLATCPY( 'U', BW, BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to right * CALL CTRSD2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL CHBMV( 'L', NUMROC_SIZE, BW, CONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, CZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * CALL CTRMV( 'U', 'N', 'N', BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), LLDA-1, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, BW*( BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL-1 ) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 30 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( UPLO, 'U' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL CLATCPY( 'L', BW, BW, A( OFST+1 ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to left * CALL CTRSD2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL-1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL CHBMV( 'U', NUMROC_SIZE, BW, CONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, CZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL+1 ) * CALL CTRMV( 'U', 'N', 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, BW*( BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BW, $ A( 1 ), LLDA-1, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 60 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PCBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pcqrinfo.f0000640000175000017500000004254210363532303017052 0ustar muammarmuammar SUBROUTINE PCQRINFO( SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, $ MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, $ LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDFACT, LDMBVAL, LDMVAL, LDNBVAL, LDNVAL, $ LDPVAL, LDQVAL, NFACT, NGRIDS, NMAT, NNB, $ NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*2 FACTOR( LDFACT ) CHARACTER*(*) SUMMRY INTEGER MBVAL( LDMBVAL ), MVAL( LDMVAL ), $ NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCQRINFO gets needed startup information for the QR factoriza- * tion routines and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NFACT (global output) INTEGER * The number of different factorization types to be tested. * * FACTOR (global output) CHARACTER*2 array of dimension of LDFACT, * The factorization types to be tested: * if FACTOR(i) = 'QR' then QR factorization, * if FACTOR(i) = 'QL' then QL factorization, * if FACTOR(i) = 'LQ' then LQ factorization, * if FACTOR(i) = 'RQ' then RQ factorization, * if FACTOR(i) = 'QP' then QR factorization with column * pivoting. * if FACTOR(i) = 'TZ' then complete unitary factorization. * * LDFACT (global input) INTEGER * The maximum number of different factorization types to be * tested. LDFACT >= NFACT. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array of dimension (LDNVAL), the * values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array of dimension (LDNVAL), the * values of N (number of columns in matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for MB and * NB. * * MBVAL (global output) INTEGER array of dimension (LDMBVAL), the * values of MB (row blocksize) to run the code with. * * LDMBVAL (global input) INTEGER * The maximum number of different values that can be used for * MB, LDMBVAL >= NNB. * * NBVAL (global output) INTEGER array of dimension (LDNBVAL), the * values of NB (column blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array of dimension (LDPVAL), the * values of P (number of process rows) to run the code with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array of dimension (LDQVAL), the * values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDFACT+LDMVAL+LDNVAL+LDMBVAL+LDNBVAL+LDPVAL+LDQVAL ) * used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN REAL PSLAMCH EXTERNAL LSAMEN, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='QR.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NFACT IF( NFACT.LT.1 .OR. NFACT.GT.LDFACT ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of factorization', LDFACT GO TO 40 END IF READ( NIN, FMT = * ) ( FACTOR( I ), I = 1, NFACT ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDMBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'MB', LDMBVAL GO TO 40 ELSE IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MBVAL( I ), I = 1, NNB ) READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NFACT CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NFACT IF( LSAMEN( 2, FACTOR( K ), 'QR' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QL' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'LQ' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'RQ' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QP' ) ) THEN WORK( I ) = 5 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'TZ' ) ) THEN WORK( I ) = 6 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, MBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QR factorizations routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision QR factorizations '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' || A - QR || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QL || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - LQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - RQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QRP || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - TZ || / (|| A || * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'MB : The row blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The column blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'Fact Time: Time in seconds to factor the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Execution rate of the '// $ 'factorization.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'MB ', ( MBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NFACT = WORK( 4 ) * I = NFACT + 2*NMAT + 2*NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NFACT IF( WORK( K ).EQ.1 ) THEN FACTOR( K ) = 'QR' ELSE IF( WORK( K ).EQ.2 ) THEN FACTOR( K ) = 'QL' ELSE IF( WORK( K ).EQ.3 ) THEN FACTOR( K ) = 'LQ' ELSE IF( WORK( K ).EQ.4 ) THEN FACTOR( K ) = 'RQ' ELSE IF( WORK( K ).EQ.5 ) THEN FACTOR( K ) = 'QP' ELSE IF( WORK( K ).EQ.6 ) THEN FACTOR( K ) = 'TZ' END IF 30 CONTINUE * I = NFACT + 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, MBVAL, 1 ) I = I + NNB CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCQRINFO * END scalapack-1.8.0/TESTING/LIN/psqrinfo.f0000640000175000017500000004254210363532303017072 0ustar muammarmuammar SUBROUTINE PSQRINFO( SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, $ MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, $ LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDFACT, LDMBVAL, LDMVAL, LDNBVAL, LDNVAL, $ LDPVAL, LDQVAL, NFACT, NGRIDS, NMAT, NNB, $ NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*2 FACTOR( LDFACT ) CHARACTER*(*) SUMMRY INTEGER MBVAL( LDMBVAL ), MVAL( LDMVAL ), $ NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSQRINFO gets needed startup information for the QR factoriza- * tion routines and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NFACT (global output) INTEGER * The number of different factorization types to be tested. * * FACTOR (global output) CHARACTER*2 array of dimension of LDFACT, * The factorization types to be tested: * if FACTOR(i) = 'QR' then QR factorization, * if FACTOR(i) = 'QL' then QL factorization, * if FACTOR(i) = 'LQ' then LQ factorization, * if FACTOR(i) = 'RQ' then RQ factorization, * if FACTOR(i) = 'QP' then QR factorization with column * pivoting. * if FACTOR(i) = 'TZ' then complete orthogonal factorization. * * LDFACT (global input) INTEGER * The maximum number of different factorization types to be * tested. LDFACT >= NFACT. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array of dimension (LDNVAL), the * values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array of dimension (LDNVAL), the * values of N (number of columns in matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for MB and * NB. * * MBVAL (global output) INTEGER array of dimension (LDMBVAL), the * values of MB (row blocksize) to run the code with. * * LDMBVAL (global input) INTEGER * The maximum number of different values that can be used for * MB, LDMBVAL >= NNB. * * NBVAL (global output) INTEGER array of dimension (LDNBVAL), the * values of NB (column blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array of dimension (LDPVAL), the * values of P (number of process rows) to run the code with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array of dimension (LDQVAL), the * values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDFACT+LDMVAL+LDNVAL+LDMBVAL+LDNBVAL+LDPVAL+LDQVAL ) * used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN REAL PSLAMCH EXTERNAL LSAMEN, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='QR.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NFACT IF( NFACT.LT.1 .OR. NFACT.GT.LDFACT ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of factorization', LDFACT GO TO 40 END IF READ( NIN, FMT = * ) ( FACTOR( I ), I = 1, NFACT ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDMBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'MB', LDMBVAL GO TO 40 ELSE IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MBVAL( I ), I = 1, NNB ) READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NFACT CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NFACT IF( LSAMEN( 2, FACTOR( K ), 'QR' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QL' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'LQ' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'RQ' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QP' ) ) THEN WORK( I ) = 5 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'TZ' ) ) THEN WORK( I ) = 6 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, MBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QR factorizations routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision QR factorizations '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' || A - QR || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QL || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - LQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - RQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QRP || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - TZ || / (|| A || * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'MB : The row blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The column blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'Fact Time: Time in seconds to factor the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Execution rate of the '// $ 'factorization.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'MB ', ( MBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NFACT = WORK( 4 ) * I = NFACT + 2*NMAT + 2*NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NFACT IF( WORK( K ).EQ.1 ) THEN FACTOR( K ) = 'QR' ELSE IF( WORK( K ).EQ.2 ) THEN FACTOR( K ) = 'QL' ELSE IF( WORK( K ).EQ.3 ) THEN FACTOR( K ) = 'LQ' ELSE IF( WORK( K ).EQ.4 ) THEN FACTOR( K ) = 'RQ' ELSE IF( WORK( K ).EQ.5 ) THEN FACTOR( K ) = 'QP' ELSE IF( WORK( K ).EQ.6 ) THEN FACTOR( K ) = 'TZ' END IF 30 CONTINUE * I = NFACT + 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, MBVAL, 1 ) I = I + NNB CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSQRINFO * END scalapack-1.8.0/TESTING/LIN/psptlaschk.f0000640000175000017500000002735610363532303017413 0ustar muammarmuammar SUBROUTINE PSPTLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PSPTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) REAL pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) REAL array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTRAN, PSMATGEN, $ SGAMX2D, SGEBR2D, SGEBS2D, SGEMM, $ SGERV2D, SGESD2D, SGSUM2D, SLASET * .. * .. External Functions .. INTEGER ISAMAX, NUMROC REAL PSLAMCH EXTERNAL ISAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PSTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (2), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PSBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PSBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF IF( LSAME( UPLO, 'U' ) ) THEN * * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL SGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.GT.0 ) THEN CALL SGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PSPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PSMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PSAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PSTLASCHK * END scalapack-1.8.0/TESTING/LIN/pcqrt13.f0000640000175000017500000002057410363532303016527 0ustar muammarmuammar SUBROUTINE PCQRT13( SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, $ WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ISEED, JA, M, N, SCALE REAL NORMA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCQRT13 generates a full-rank matrix that may be scaled to have * large or small norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCALE (global input) INTEGER * SCALE = 1: normally scaled matrix * SCALE = 2: matrix scaled up * SCALE = 3: matrix scaled down * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * NORMA (global output) REAL * The one-norm of A. * * ISEED (global input/global output) INTEGER * Seed for random number generator. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= Nq0, where * * ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), and * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IIA, INFO, $ IROFFA, J, JJA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL ASUM, BIGNUM, SMLNUM COMPLEX AJJ * .. * .. External Functions .. INTEGER NUMROC REAL PCLANGE, PSLAMCH EXTERNAL NUMROC, PCLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PCLASCL, PCMATGEN, $ PCELGET, PCELSET, PSCASUM, $ PSLABAD * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MOD, REAL, SIGN * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * generate the matrix * IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * CALL PCMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), A, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED, IIA-1, MP, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * DO 10 J = JA, JA+N-1 I = IA + J - JA IF( I.LE.IA+M-1 ) THEN CALL PSCASUM( M, ASUM, A, IA, J, DESCA, 1 ) CALL PCELGET( 'Column', ' ', AJJ, A, I, J, DESCA ) AJJ = AJJ + CMPLX( SIGN( ASUM, REAL( AJJ ) ) ) CALL PCELSET( A, I, J, DESCA, AJJ ) END IF 10 CONTINUE * * scaled versions * IF( SCALE.NE.1 ) THEN * NORMA = PCLANGE( 'M', M, N, A, IA, JA, DESCA, WORK ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'Epsilon' ) BIGNUM = ONE / SMLNUM * IF( SCALE.EQ.2 ) THEN * * matrix scaled up * CALL PCLASCL( 'General', NORMA, BIGNUM, M, N, A, IA, $ JA, DESCA, INFO ) * ELSE IF( SCALE.EQ.3 ) THEN * * matrix scaled down * CALL PCLASCL( 'General', NORMA, SMLNUM, M, N, A, IA, $ JA, DESCA, INFO ) * END IF * END IF * NORMA = PCLANGE( 'One-norm', M, N, A, IA, JA, DESCA, WORK ) * RETURN * * End of PCQRT13 * END scalapack-1.8.0/TESTING/LIN/pzgeqlrv.f0000640000175000017500000002410210363532303017073 0ustar muammarmuammar SUBROUTINE PZGEQLRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQLRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PZGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PZGEQLF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(N_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PZGEQLF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IIA, IPT, IPV, IPW, IROFF, $ IV, J, JB, JJA, JN, K, MP, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PB_TOPGET, $ PB_TOPSET, PZLACPY, PZLARFB, PZLARFT, $ PZLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA+N-K, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * * Handle first block separately * IV = 1 + M - K + IROFF JB = JN - JA - N + K + 1 * * Compute upper triangular matrix T * CALL PZLARFT( 'Backward', 'Columnwise', M-N+JN-JA+1, JB, A, IA, $ JA+N-K, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', M-N+JN-JA+1, JB, A, IA, JA+N-K, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PZLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of A to get block * row of L * CALL PZLASET( 'All', M-K, JB, ZERO, ZERO, A, IA, JA+N-K, $ DESCA ) CALL PZLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-K, $ JA+N-K+1, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+JN-JA+1, JN-JA+1, JB, WORK( IPV ), IROFF+1, 1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * * Loop over the remaining column blocks * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IV = 1 + M - N + J - JA + IROFF * * Compute upper triangular matrix T * CALL PZLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', M-N+J+JB-JA, JB, A, IA, J, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PZLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of sub( A ) to get * block row of L * CALL PZLASET( 'All', M-N+J-JA, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PZLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-N+J-JA, $ J+1, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+J+JB-JA, J+JB-JA, JB, WORK( IPV ), IROFF+1, $ 1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZGEQLRV * END scalapack-1.8.0/TESTING/LIN/psdbinfo.f0000640000175000017500000004433510363532303017037 0ustar muammarmuammar SUBROUTINE PSDBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSDBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSDBINFO * END scalapack-1.8.0/TESTING/LIN/pdbmatgen.f0000640000175000017500000001647710363532303017202 0ustar muammarmuammar SUBROUTINE PDBMATGEN( ICTXT, AFORM, AFORM2, BWL, BWU, N, $ MB, NB, A, $ LDA, IAROW, IACOL, ISEED, $ MYROW, MYCOL, NPROW, NPCOL ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. * .. Scalar Arguments .. CHARACTER*1 AFORM, AFORM2 INTEGER IACOL, IAROW, ICTXT, $ ISEED, LDA, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW, BWL, BWU * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDBMATGEN : Parallel Real Double precision Band MATrix GENerator. * (Re)Generate a distributed Band matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'L' : A is returned as a symmetric lower * triangular matrix, and is diagonally dominant. * if AFORM = 'U' : A is returned as a symmetric upper * triangular matrix, and is diagonally dominant. * if AFORM = 'G' : A is returned as a general matrix. * if AFORM = 'T' : A is returned as a general matrix in * tridiagonal-compatible form. * * AFORM2 (global input) CHARACTER*1 * if the matrix is general: * if AFORM2 = 'D' : A is returned diagonally dominant. * if AFORM2 != 'D' : A is not returned diagonally dominant. * if the matrix is symmetric or hermitian: * if AFORM2 = 'T' : A is returned in tridiagonally-compatible * form (a transpose form). * if AFORM2 != 'T' : A is returned in banded-compatible form. * * M (global input) INTEGER * The number of nonzero rows in the generated distributed * band matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) DOUBLE PRECISION, pointer into the local * memory to an array of dimension ( LDA, * ) containing the * local pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) + BWL+BWU * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * This code is a simple wrapper around PDMATGEN, for band matrices. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER DIAG_INDEX, I, J, M_MATGEN, NQ, N_MATGEN, $ START_INDEX * .. * .. External Subroutines .. EXTERNAL PDMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC, LSAME * .. * .. Executable Statements .. * * IF( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) THEN M_MATGEN = BWL + 1 N_MATGEN = N START_INDEX = 1 IF( LSAME( AFORM, 'L' ) ) THEN DIAG_INDEX = 1 ELSE DIAG_INDEX = BWL + 1 ENDIF ELSE M_MATGEN = BWL + BWU + 1 N_MATGEN = N DIAG_INDEX = BWU + 1 START_INDEX = 1 ENDIF * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * * Generate a random matrix initially * IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN * CALL PDMATGEN( ICTXT, 'T', 'N', $ N_MATGEN, M_MATGEN, $ NB, M_MATGEN, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, NQ, 0, M_MATGEN, $ MYCOL, MYROW, NPCOL, NPROW ) * ELSE * CALL PDMATGEN( ICTXT, 'N', 'N', $ M_MATGEN, N_MATGEN, $ M_MATGEN, NB, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, M_MATGEN, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Zero out padding at tops of columns * DO 1000 J=1,NB * DO 2000 I=1, LDA-M_MATGEN * * Indexing goes negative; BMATGEN assumes that space * has been preallocated above the first column as it * has to be if the matrix is to be input to * Scalapack's band solvers. * A( I-LDA+M_MATGEN, J ) = ZERO * 2000 CONTINUE * 1000 CONTINUE * ENDIF * IF( LSAME( AFORM2, 'D' ).OR. $ ( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) ) THEN * * Loop over diagonal elements stored on this processor. * * DO 330 I=1, NQ IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN IF( NPROW .EQ. 1 ) THEN A( I, DIAG_INDEX ) = A( I, DIAG_INDEX ) $ + DBLE( BWL+BWU+1 ) ENDIF ELSE IF( NPROW .EQ. 1 ) THEN A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) $ + DBLE( BWL+BWU+1 ) ENDIF END IF 330 CONTINUE * * ELSE * * Must add elements to keep condition of matrix in check * DO 380 I=1, NQ * IF( NPROW .EQ. 1 ) THEN * IF( MOD(I+MYCOL*NB,2) .EQ. 1 ) THEN A( DIAG_INDEX+1, I ) = A( DIAG_INDEX+1, I ) $ + DBLE( BWL+BWU+1 ) * ELSE * A( DIAG_INDEX-1, I ) = A( DIAG_INDEX-1, I ) $ + DBLE( BWL+BWU+1 ) ENDIF * ENDIF * 380 CONTINUE * END IF * RETURN * * End of PDBMATGEN * END scalapack-1.8.0/TESTING/LIN/pcpbdriver.f0000640000175000017500000007417410363532303017377 0ustar muammarmuammar PROGRAM PCPBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PCPBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by CPB. This test driver performs an * A = L*L**H factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER CPLXSZ, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD, $ IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE, $ IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS, $ KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB, $ NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS, $ N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, $ PCCHEKPAD, PCFILLPAD, PCMATGEN, PCPBINFO, $ PCPBLASCHK, PCPBTRF, PCPBTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PCLANGE EXTERNAL LSAME, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PCPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = BWVAL( BW_NUM ) IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 ) $ + BW NB = MAX( NB, 2*BW ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*BW, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BW+1), (BW+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BW+1)+10) IMIDPAD = 10 IPOSTPAD = ((BW+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BW+1), N, $ (BW+1), NB, 0, 0, $ ICTXT,((BW+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BW+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+2*BW)*BW * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = BW*BW * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BW+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PCLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PCPBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(BW*(BW+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB, $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BW+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PCLANGE( '1', (BW+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PCPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PCPBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, 'PCPBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (BW*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PCMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PCPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCPBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PCPBLASCHK( 'H', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCPBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzdbinfo.f0000640000175000017500000004434010363532303017042 0ustar muammarmuammar SUBROUTINE PZDBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZDBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZDBINFO * END scalapack-1.8.0/TESTING/LIN/pcgeqlrv.f0000640000175000017500000002407110363532303017051 0ustar muammarmuammar SUBROUTINE PCGEQLRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQLRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PCGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PCGEQLF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(N_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PCGEQLF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IIA, IPT, IPV, IPW, IROFF, $ IV, J, JB, JJA, JN, K, MP, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLACPY, $ PCLARFB, PCLARFT, PCLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA+N-K, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * * Handle first block separately * IV = 1 + M - K + IROFF JB = JN - JA - N + K + 1 * * Compute upper triangular matrix T * CALL PCLARFT( 'Backward', 'Columnwise', M-N+JN-JA+1, JB, A, IA, $ JA+N-K, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', M-N+JN-JA+1, JB, A, IA, JA+N-K, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PCLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of A to get block * row of L * CALL PCLASET( 'All', M-K, JB, ZERO, ZERO, A, IA, JA+N-K, $ DESCA ) CALL PCLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-K, $ JA+N-K+1, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+JN-JA+1, JN-JA+1, JB, WORK( IPV ), IROFF+1, 1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * * Loop over the remaining column blocks * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IV = 1 + M - N + J - JA + IROFF * * Compute upper triangular matrix T * CALL PCLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PCLACPY( 'All', M-N+J+JB-JA, JB, A, IA, J, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PCLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of sub( A ) to get * block row of L * CALL PCLASET( 'All', M-N+J-JA, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PCLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-N+J-JA, $ J+1, DESCA ) * * Apply block Householder transformation * CALL PCLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+J+JB-JA, J+JB-JA, JB, WORK( IPV ), IROFF+1, $ 1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCGEQLRV * END scalapack-1.8.0/TESTING/LIN/psludriver.f0000640000175000017500000012504210363532303017425 0ustar muammarmuammar PROGRAM PSLUDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * PSLUDRIVER is the main test program for the REAL * SCALAPACK LU routines. This test driver performs an LU factorization * and solve. If the input matrix is non-square, only the factorization * is performed. Condition estimation and iterative refinement are * optionally performed. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 2.0, LU factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LU.out' output file name (if any) * 6 device out * 1 number of problems sizes * 31 201 values of M * 31 201 values of N * 1 number of NB's * 2 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 1 4 2 3 8 values of P * 2 4 1 3 2 1 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL, ZERO PARAMETER ( INTGSZ = 4, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPOSTPAD, IPPIV, IPREPAD, IPW, IPW2, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIPIV, LIWORK, LWORK, LW2, M, MAXMN, $ MINMN, MP, MYCOL, MYRHS, MYROW, N, NB, NBRHS, $ NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, NPCOL, $ NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL ANORM, ANORM1, FRESID, RCOND, SRESID, SRESID2, $ THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ MVAL( NTESTS ), NBRVAL( NTESTS ), $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSGECON, PSGERFS, $ PSGETRF, PSGETRRV, PSGETRS, $ PSLAFCHK, PSLASCHK, PSLUINFO, $ PSMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, ILCM, NUMROC REAL PSLANGE EXTERNAL ICEIL, ILCM, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PSLUINFO( OUTFILE, NOUT, NMAT, MVAL, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, THRESH, $ EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST .AND. M.EQ.N ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPPIV = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPPIV = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF LIPIV = ICEIL( INTGSZ*( MP+NB ), REALSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * IF( CHECK ) THEN * * Calculate the amount of workspace required by the * checking routines PSLANGE, PSGETRRV, and * PSLAFCHK * WORKSIZ = MAX( 2, NQ ) * WORKSIZ = MAX( WORKSIZ, MP*DESCA( NB_ )+ $ NQ*DESCA( MB_ ) ) * WORKSIZ = MAX( WORKSIZ, MP * DESCA( NB_ ) ) * WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate matrix A of Ax = b * CALL PSMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ MP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, LIPIV, 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) ANORM1 = PSLANGE( '1', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST .AND. M.EQ.N ) THEN CALL PSMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform LU factorization * CALL PSGETRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSGETRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LU factorization * CALL PSCHEKPAD( ICTXT, 'PSGETRF', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * IF( M.NE.N ) THEN * * For non-square matrices, factorization only * NRHS = 0 NBRHS = 0 * IF( CHECK ) THEN * * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps) * CALL PSGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGETRRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRRV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( (FRESID-FRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) * * M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU * factorization when M >= N * NOPS = DBLE( MAXMN )*( DBLE( MINMN )**2 ) - $ (1.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) - $ (1.0D+0 / 2.0D+0)*( DBLE( MINMN )**2 ) * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * WTIME( 2 ) = 0.0D+0 IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ), $ WTIME( 2 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * CTIME( 2 ) = 0.0D+0 IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ), $ CTIME( 2 ), TMFLOPS, PASSED END IF * ELSE * * If M = N * IF( EST ) THEN * * Calculate workspace required for PSGECON * LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PSGECON( '1', N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LIWORK, INFO ) * IF( CHECK ) THEN CALL PSCHEKPAD( ICTXT, 'PSGECON', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGECON', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGECON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), $ MYCOL, DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + $ IPOSTPAD + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * * Set worksiz: routines requiring most workspace * is PSLASCHK * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PSMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) $ CALL PSFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PSMATGEN( ICTXT, 'No', 'No', $ DESCB( M_ ), DESCB( N_ ), $ DESCB( MB_ ), DESCB( NB_ ), $ MEM( IPB0 ), DESCB( LLD_ ), $ DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, $ 0, MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear sytem via LU factorization * CALL PSGETRS( 'No', N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), MEM( IPB ), $ 1, 1, DESCB, INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGETRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRS', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PSLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( SRESID.LE.THRESH .AND. $ ( SRESID-SRESID ).EQ.0.0E+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PSGERFS * LWORK = MAX( 1, 3*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PSGERFS( 'No', N, NRHS, MEM( IPA0 ), 1, $ 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), $ MEM( IPB0 ), 1, 1, DESCB, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LIWORK, INFO ) * IF( CHECK ) THEN CALL PSCHEKPAD( ICTXT, 'PSGERFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGERFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PSLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 2/3 N^3 - 1/2 N^2 flops for LU factorization * NOPS = (2.0D+0/3.0D+0)*( DBLE(N)**3 ) - $ (1.0D+0/2.0D+0)*( DBLE(N)**2 ) * * nrhs * 2 N^2 flops for LU solve. * NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization * and solve -- for WALL and CPU time, and print * output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * * Print WALL time if supported * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if supported * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED END IF 10 CONTINUE 20 CONTINUE * IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN * * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps) * CALL PSGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGETRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRRV', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF END IF 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ', $ 'Sol Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSLUDRIVER * END scalapack-1.8.0/TESTING/LIN/pdgerqrv.f0000640000175000017500000002365310363532303017065 0ustar muammarmuammar SUBROUTINE PDGERQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGERQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from R, Q * computed by PDGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors R and Q computed * by PDGERQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PDGERQF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JV, K, MYCOL, MYROW, NPCOL, $ NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLACPY, $ PDLARFB, PDLARFT, PDLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+M-K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA - M + K + 1 JV = 1 + N - K + ICOFF * * Compute upper triangular matrix T * CALL PDLARFT( 'Backward', 'Rowwise', N-M+IN-IA+1, IB, A, IA+M-K, $ JA, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', IB, N-M+IN-IA+1, A, IA+M-K, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PDLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict lower triangular part of sub( A ) to get block * column of R * CALL PDLASET( 'All', IB, N-K, ZERO, ZERO, A, IA+M-K, JA, $ DESCA ) CALL PDLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, IA+M-K+1, $ JA+N-K, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ IN-IA+1, N-M+IN-IA+1, IB, WORK( IPV ), 1, ICOFF+1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) JV = 1 + N - M + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PDLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', IB, N-M+I+IB-IA, A, I, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PDLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict Lower triangular part of sub( A ) to get * block column of R * CALL PDLASET( 'All', IB, N-M+I-IA, ZERO, ZERO, A, I, JA, $ DESCA ) CALL PDLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, I+1, $ JA+N-M+I-IA, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I+IB-IA, N-M+I+IB-IA, IB, WORK( IPV ), 1, $ ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDGERQRV * END scalapack-1.8.0/TESTING/LIN/pddtinfo.f0000640000175000017500000004435610363532303017045 0ustar muammarmuammar SUBROUTINE PDDTINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDDTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDDTINFO * END scalapack-1.8.0/TESTING/LIN/pzpotrrv.f0000640000175000017500000002772410363532303017144 0ustar muammarmuammar SUBROUTINE PZPOTRRV( UPLO, N, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U * computed by PZPOTRF. The routine performs the Cholesky factorization * in reverse. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * hermitian distributed matrix sub( A ) is stored: * stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the factors L or U of the * distributed matrix sub( A ) from the Cholesky factorization. * On exit, the original distributed matrix sub( A ) is * restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= MB_A*NB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE, ZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PB_TOPGET, PB_TOPSET, $ PZLACPY, PZLASET, PZHERK, PZTRMM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * UPPER = LSAME( UPLO, 'U' ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) IL = MAX( ( ( IA+N-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * * Define array descriptor for working array WORK * CALL DESCSET( DESCW, DESCA( MB_ ), DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * IF ( UPPER ) THEN * * Compute A from the Cholesky factor U : A = U'*U. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PZHERK( 'Upper', 'Conjugate Transpose', JA+N-J-JB, JB, $ ONE, A, IL, J+JB, DESCA, ONE, A, IL+JB, J+JB, $ DESCA ) * * Copy current diagonal block of A into workspace * CALL PZLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PZLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PZTRMM( 'Left', 'Upper', 'Conjugate Transpose', $ 'Non-Unit', JB, N-J+JA, CONE, WORK, 1, 1, $ DESCW, A, IL, J, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PZLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, $ IL+1, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PZHERK( 'Upper', 'Conjugate Transpose', N-JB, JB, ONE, A, $ IA, JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PZLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PZLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PZTRMM( 'Left', 'Upper', 'Conjugate Transpose', 'Non-Unit', $ JB, N, CONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PZLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, IA+1, $ JA, DESCA ) * ELSE * * Compute A from the Cholesky factor L : A = L*L'. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * DO 20 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PZHERK( 'Lower', 'No Transpose', IA+N-IL-JB, JB, ONE, A, $ IL+JB, J, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PZLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PZLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IL, J+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PZTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', IA+N-IL, JB, CONE, WORK, 1, 1, $ DESCW, A, IL, J, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PZLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, $ IL, J+1, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 20 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PZHERK( 'Lower', 'No Transpose', N-JB, JB, ONE, A, $ IA+JB, JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PZLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PZLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PZTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', N, JB, CONE, WORK, 1, 1, DESCW, A, $ IA, JA, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PZLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, IA, $ JA+1, DESCA ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZPOTRRV * END scalapack-1.8.0/TESTING/LIN/pdlsinfo.f0000640000175000017500000004077210363532303017052 0ustar muammarmuammar SUBROUTINE PDLSINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBRVAL, LDNBVAL, LDNRVAL, $ LDNVAL, LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, $ NNBR, NNR, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDLSINFO gets needed startup information for LS solve and * transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (workspace) INTEGER array of dimension >= * MAX(5,LDMVAL+LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LS.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 5, 1, WORK, 5 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK min ||Ax-b|| by QR factorizations.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision least-square solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - QR|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used. If CPU and WALL time' WRITE( NOUT, FMT = 9999 ) $ ' are the same, only one line '// $ 'is printed, and the label is ''BOTH''.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'QR time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 5, 1, WORK, 5, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDLSINFO * END scalapack-1.8.0/TESTING/LIN/pdpotrrv.f0000640000175000017500000002736510363532303017117 0ustar muammarmuammar SUBROUTINE PDPOTRRV( UPLO, N, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U * computed by PDPOTRF. The routine performs the Cholesky factorization * in reverse. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is stored: * stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the factors L or U of the * distributed matrix sub( A ) from the Cholesky factorization. * On exit, the original distributed matrix sub( A ) is * restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= MB_A*NB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PDLACPY, PDLASET, $ PDSYRK, PDTRMM, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * UPPER = LSAME( UPLO, 'U' ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) IL = MAX( ( ( IA+N-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * * Define array descriptor for working array WORK * CALL DESCSET( DESCW, DESCA( MB_ ), DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * IF ( UPPER ) THEN * * Compute A from the Cholesky factor U : A = U'*U. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PDSYRK( 'Upper', 'Transpose', JA+N-J-JB, JB, ONE, A, IL, $ J+JB, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PDLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PDLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PDTRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', JB, $ N-J+JA, ONE, WORK, 1, 1, DESCW, A, IL, J, $ DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PDLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, $ IL+1, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PDSYRK( 'Upper', 'Transpose', N-JB, JB, ONE, A, IA, JA+JB, $ DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PDLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PDLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PDTRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', JB, $ N, ONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PDLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, IA+1, $ JA, DESCA ) * ELSE * * Compute A from the Cholesky factor L : A = L*L'. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * DO 20 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PDSYRK( 'Lower', 'No transpose', IA+N-IL-JB, JB, ONE, A, $ IL+JB, J, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PDLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PDLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IL, J+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PDTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', $ IA+N-IL, JB, ONE, WORK, 1, 1, DESCW, A, IL, $ J, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PDLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, $ IL, J+1, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 20 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PDSYRK( 'Lower', 'No transpose', N-JB, JB, ONE, A, $ IA+JB, JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PDLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PDLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PDTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', N, JB, $ ONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PDLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, IA, $ JA+1, DESCA ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDPOTRRV * END scalapack-1.8.0/TESTING/LIN/psinvinfo.f0000640000175000017500000003525410363532303017246 0ustar muammarmuammar SUBROUTINE PSINVINFO( SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, $ IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDMTYP, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NMTYP, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*3 MATTYP( LDMTYP ) CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSINVINFO gets needed startup information for matrix inversion * tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMTYP (global output) INTEGER * The number of different matrix types to be tested. * * MATTYP (global output) CHARACTER*3 array of dimension of LDMTYP, * The types of matrix to be generated: * if MATTYP(i) = 'GEN' then GENeral matrix, * if MATTYP(i) = 'UTR' then Upper TRiangular matrix, * if MATTYP(i) = 'LTR' then Lower TRiangular matrix, * if MATTYP(i) = 'UPD' then (Upper) symmetric Pos. Definite, * if MATTYP(i) = 'LPD' then (Lower) symmetric Pos. Definite, * * LDMTYP (global input) INTEGER * The maximum number of different matrix types to be tested. * LDMTYP >= NMTYP. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDMTYP+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN REAL PSLAMCH EXTERNAL LSAMEN, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='INV.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NMTYP IF( NMTYP.LT.1 .OR. NMTYP.GT.LDMTYP ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of matrix types', LDMTYP GO TO 40 END IF READ( NIN, FMT = * ) ( MATTYP( I ), I = 1, NMTYP ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NMTYP CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NMTYP IF( LSAMEN( 3, MATTYP( K ), 'GEN' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UTR' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LTR' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UPD' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LPD' ) ) THEN WORK( I ) = 5 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Matrix Inversion routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision Matrix Inversion '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Inverse residual = ||inv(A)*A - I|| '// $ '/ (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less '// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'Fct time : Time in seconds to factor the'// $ ' matrix, if needed.' WRITE( NOUT, FMT = 9999 ) $ 'Inv Time : Time in seconds to inverse the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and inverse.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NMTYP = WORK( 4 ) * I = NMTYP+NMAT+NNB+2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NMTYP IF( WORK( K ).EQ.1 ) THEN MATTYP( K ) = 'GEN' ELSE IF( WORK( K ).EQ.2 ) THEN MATTYP( K ) = 'UTR' ELSE IF( WORK( K ).EQ.3 ) THEN MATTYP( K ) = 'LTR' ELSE IF( WORK( K ).EQ.4 ) THEN MATTYP( K ) = 'UPD' ELSE IF( WORK( K ).EQ.5 ) THEN MATTYP( K ) = 'LPD' END IF 30 CONTINUE * I = NMTYP + 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSINVINFO * END scalapack-1.8.0/TESTING/LIN/pslltdriver.f0000640000175000017500000011022610363532303017576 0ustar muammarmuammar PROGRAM PSLLTDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PSLLTDRIVER is the main test program for the REAL * ScaLAPACK Cholesky routines. This test driver performs an * A = L*L**T or A = U**T*U factorization and solve, and optionally * performs condition estimation and iterative refinement. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'ScaLAPACK LLt factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LLT.out' output file name (if any) * 6 device out * 'U' define Lower or Upper * 1 number of problems sizes * 31 100 200 values of N * 1 number of NB's * 2 10 24 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 Number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 values of P * 2 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL, ZERO PARAMETER ( INTGSZ = 4, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPREPAD, IPOSTPAD, IPW, IPW2, ITEMP, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIWORK, LWORK, LW2, MYCOL, MYRHS, MYROW, N, NB, $ NBRHS, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL ANORM, ANORM1, FRESID, RCOND, SRESID, SRESID2, $ THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGSUM2D, BLACS_PINFO, PSCHEKPAD, PSFILLPAD, $ PSLAFCHK, PSLASCHK, PSLLTINFO, $ PSMATGEN, PSPOCON, PSPORFS, $ PSPOTRF, PSPOTRRV, PSPOTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PSLANSY EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PSLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PSLLTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPW = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF * * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PSLAFCHK, PSPOTRRV, and * PSLANSY * WORKSIZ = NP * DESCA( NB_ ) * WORKSIZ = MAX( WORKSIZ, DESCA( MB_ ) * DESCA( NB_ ) ) * LCM = ILCM( NPROW, NPCOL ) ITEMP = MAX( 2, 2 * NQ ) + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), LCM / NPROW ) END IF WORKSIZ = MAX( WORKSIZ, ITEMP ) WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate a symmetric positive definite matrix A * CALL PSMATGEN( ICTXT, 'Symm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) ANORM1 = PSLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN CALL PSMATGEN( ICTXT, 'Symm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PSFILLPAD( ICTXT, NP, NQ, $ MEM( IPA0-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform LLt factorization * CALL SLTIMER( 1 ) * CALL PSPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSPOTRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 60 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LLt factorization * CALL PSCHEKPAD( ICTXT, 'PSPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN * * Calculate workspace required for PSPOCON * LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 60 END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PSPOCON( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LIWORK, INFO ) * IF( CHECK ) THEN CALL PSCHEKPAD( ICTXT, 'PSPOCON', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPOCON', $ LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPOCON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), MYCOL, $ DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + IPOSTPAD $ + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PSLASCHK * LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PSMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, NPCOL ) * IF( CHECK ) $ CALL PSFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PSMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB0 ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear system via Cholesky factorization * CALL PSPOTRS( UPLO, N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB ), 1, 1, DESCB, $ INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPOTRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPOTRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PSLASCHK( 'Symm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PSPORFS * LWORK = MAX( 1, 3*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LIWORK = MAX( 1, NP ) LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, $ 1, MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PSPORFS( UPLO, N, NRHS, MEM( IPA0 ), $ 1, 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB0 ), 1, 1, $ DESCB, MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LIWORK, INFO ) * * check for memory overwrite * IF( CHECK ) THEN CALL PSCHEKPAD( ICTXT, 'PSPORFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSPORFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PSLASCHK( 'Symm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 1/3 N^3 + 1/2 N^2 flops for LLt factorization * NOPS = (DBLE(N)**3)/3.0D+0 + $ (DBLE(N)**2)/2.0D+0 * * nrhs * 2 N^2 flops for LLt solve. * NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization and * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED * END IF 10 CONTINUE 20 CONTINUE * IF( CHECK .AND. SRESID.GT.THRESH ) THEN * * Compute FRESID = ||A - LL'|| / (||A|| * N * eps) * CALL PSPOTRRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSLAFCHK( 'Symm', 'Diag', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPOTRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( IAM.EQ.0 ) THEN IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9986 ) 'L*L''', FRESID ELSE WRITE( NOUT, FMT = 9986 ) 'U''*U', FRESID END IF END IF END IF * 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB NRHS NBRHS P Q LLt Time ', $ 'Slv Time MFLOPS CHECK' ) 9994 FORMAT( '---- ---- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 4X, A1, 1X, I5, 1X, I3, 1X, I4, 1X, I5, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSLLTDRIVER * END scalapack-1.8.0/TESTING/LIN/pzpbdriver.f0000640000175000017500000007423710363532303017426 0ustar muammarmuammar PROGRAM PZPBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PZPBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by ZPB. This test driver performs an * A = L*L**H factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER MEMSIZ, NTESTS, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD, $ IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE, $ IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS, $ KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB, $ NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS, $ N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, $ PZCHEKPAD, PZFILLPAD, PZMATGEN, PZPBINFO, $ PZPBLASCHK, PZPBTRF, PZPBTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PZLANGE EXTERNAL LSAME, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PZPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = BWVAL( BW_NUM ) IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 ) $ + BW NB = MAX( NB, 2*BW ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*BW, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BW+1), (BW+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BW+1)+10) IMIDPAD = 10 IPOSTPAD = ((BW+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BW+1), N, $ (BW+1), NB, 0, 0, $ ICTXT,((BW+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BW+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+2*BW)*BW * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = BW*BW * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BW+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PZLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PZPBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(BW*(BW+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB, $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BW+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PZLANGE( '1', (BW+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PZPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PZPBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, 'PZPBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (BW*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PZMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PZPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZPBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PZPBLASCHK( 'H', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZPBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/psptinfo.f0000640000175000017500000004474510363532303017102 0ustar muammarmuammar SUBROUTINE PSPTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSPTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSPTINFO * END scalapack-1.8.0/TESTING/LIN/pzlsdriver.f0000640000175000017500000013556110363532303017441 0ustar muammarmuammar PROGRAM PZLSDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * Purpose * ======= * * PZLSDRIVER is the main test program for the COMPLEX*16 * SCALAPACK (full rank) Least Squares routines. This test driver solves * full-rank least square problems. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 17 lines: * 'ScaLapack LS solve input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LS.out' output file name (if any) * 6 device out * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 3 number of NRHS's * 2 3 5 values of NRHS * 2 number of NBRHS's * 1 2 values of NBRHS * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, MEMSIZ, NTESTS, TOTMEM, ZPLXSZ DOUBLE PRECISION RZERO, RONE COMPLEX*16 ONE, PADVAL, ZERO PARAMETER ( DBLESZ = 8, ZPLXSZ = 16, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ) ) PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), RZERO = 0.0D+0, $ RONE = 1.0D+0, ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK, TPSD CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, II, IMIDPAD, $ INFO, IPA, IPB, IPOSTPAD, IPREPAD, IPW, IPW2, $ IPX, ISCALE, ITRAN, ITYPE, J, JJ, K, KFAIL, KK, $ KPASS, KSKIP, KTESTS, LCM, LCMP, LTAU, LWF, $ LWORK, LWS, M, MNP, MNRHSP, MP, MQ, MYCOL, $ MYROW, N, NB, NBRHS, NCOLS, NGRIDS, NMAT, NNB, $ NNBR, NNR, NNRHSQ, NOUT, NP, NPCOL, NPROCS, $ NPROW, NROWS, NQ, NRHS, NRHSP, NRHSQ, WORKSIZ REAL THRESH DOUBLE PRECISION ADDFAC, ADDS, ANORM, BNORM, MULFAC, MULTS, $ NOPS, SRESID, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCW( LLD_ ), $ DESCX( DLEN_ ), IERR( 2 ), MVAL( NTESTS ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), RESULT( 2 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZGELS, PZGEMM, PZLACPY, $ PZLSINFO, PZMATGEN, PDZNRM2, $ PZDSCAL, PZQRT13, PZQRT16, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PZLANGE, PZQRT14, PZQRT17 EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PZLANGE, $ PZQRT14, PZQRT17 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * IASEED = 100 IBSEED = 200 CALL PZLSINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, NTESTS, $ NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 90 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 90 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( ( MYROW.GE.NPROW ).OR.( MYCOL.GE.NPCOL ) ) $ GO TO 90 * DO 80 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 80 END IF * * Loop over different blocking sizes * DO 70 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 70 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) MQ = NUMROC( M, NB, MYCOL, 0, NPCOL ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) MNP = MAX( MP, NP ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 70 END IF * DO 60 ISCALE = 1, 3 * ITYPE = ISCALE * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPX * WORKSIZ = NQ + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPX+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 70 END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Generate the matrix A and calculate its 1-norm * CALL PZQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, MEM( IPW ) ) * IF( CHECK ) THEN CALL PZCHEKPAD( ICTXT, 'PZQRT13', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQRT13', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * DO 50 ITRAN = 1, 2 * IF( ITRAN.EQ.1 ) THEN NROWS = M NCOLS = N TRANS = 'N' TPSD = .FALSE. ELSE NROWS = N NCOLS = M TRANS = 'C' TPSD = .TRUE. END IF * * Loop over the different values for NRHS * DO 40 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 30 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0, $ NPROW ) NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0, $ NPCOL ) * * Define Array descriptor for rhs MAX(M,N)xNRHS * CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MNP ) + IMIDPAD, $ IERR( 1 ) ) IF( TPSD ) THEN CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, MP ) + $ IMIDPAD, IERR( 2 ) ) ELSE CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, NP ) + $ IMIDPAD, IERR( 2 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, $ 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Check for enough memory * IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + $ IPREPAD IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD + $ IPREPAD WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Generation', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate RHS * IF( TPSD ) THEN CALL PZMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ MP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) ELSE CALL PZMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ NP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PZFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PZFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * DO 10 JJ = 1, NRHS CALL PDZNRM2( NCOLS, BNORM, MEM( IPW ), $ 1, JJ, DESCW, 1 ) IF( BNORM.GT.RZERO ) $ CALL PZDSCAL( NCOLS, RONE / BNORM, $ MEM( IPW ), 1, JJ, DESCW, $ 1 ) 10 CONTINUE * CALL PZGEMM( TRANS, 'N', NROWS, NRHS, NCOLS, $ ONE, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ), 1, 1, DESCW, ZERO, $ MEM( IPX ), 1, 1, DESCX ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'Generation', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'Generation', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PZCHEKPAD( ICTXT, 'Generation', $ MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PZCHEKPAD( ICTXT, 'Generation', $ NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Allocate space for copy of rhs * IPB = IPW * IF( TPSD ) THEN CALL DESCINIT( DESCB, N, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, $ IERR( 1 ) ) ELSE CALL DESCINIT( DESCB, M, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, $ IERR( 1 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * IPW = IPB + DESCB( LLD_ )*NRHSQ + $ IPOSTPAD + IPREPAD * END IF * * Calculate the amount of workspace for PZGELS * IF( M.GE.N ) THEN LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0, $ NPCOL ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, $ ( MP + NRHSQ ) * NB ) + NB*NB ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LTAU = NUMROC( MIN(M,N), NB, MYROW, 0, $ NPROW ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP + $ MAX( NQ + NUMROC( NUMROC( N, NB, 0, $ 0, NPROW ), NB, 0, 0, LCMP ), $ NRHSQ ) ) * NB ) + NB*NB END IF * LWORK = LTAU + MAX( LWF, LWS ) WORKSIZ = LWORK + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'solve', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Make the copy of the right hand side * CALL PZLACPY( 'All', NROWS, NRHS, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB ) * IF( TPSD ) THEN CALL PZFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PZFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF CALL PZFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Solve the LS or overdetermined system * CALL PZGELS( TRANS, M, N, NRHS, MEM( IPA ), $ 1, 1, DESCA, MEM( IPX ), 1, 1, $ DESCX, MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGELS', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGELS', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGELS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Regenerate A in place for testing and next * iteration * CALL PZQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, $ MEM( IPW ) ) * * check the solution to rhs * IF( CHECK ) THEN * * Am I going to call PZQRT17 ? * IF( ( M.GE.N .AND. ( .NOT.TPSD ) ) .OR. $ ( M.LT.N .AND. TPSD ) ) THEN * * Call PZQRT17 first, A, X, and B remain * unchanged. Solving LS system * * Check amount of memory for PZQRT17 * IF( TPSD ) THEN WORKSIZ = NP*NRHSQ + NRHSP*MQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ ICEIL( DBLESZ*MAX( NQ, MAX( $ MQ, NRHSQ ) ), ZPLXSZ ) + $ IPOSTPAD ELSE WORKSIZ = MP*NRHSQ + NRHSP*NQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ ICEIL( DBLESZ*MAX( NQ, $ NRHSQ ), ZPLXSZ ) + $ IPOSTPAD END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * RESULT( 2 ) = PZQRT17( TRANS, 1, M, N, $ NRHS, $ MEM( IPA ), $ 1, 1, DESCA, $ MEM( IPX ), 1, $ 1, DESCX, $ MEM( IPB ), $ 1, 1, DESCB, $ MEM( IPW ), $ MEM( IPW2 ) ) SRESID = RESULT( 2 ) * CALL PZCHEKPAD( ICTXT, 'PZQRT17', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQRT17', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PZCHEKPAD( ICTXT, 'PZQRT17', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PZCHEKPAD( ICTXT, 'PZQRT17', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PZCHEKPAD( ICTXT, 'PZQRT17', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Call PZQRT16, B will be destroyed. * IF( TPSD ) THEN WORKSIZ = MP + IPOSTPAD ELSE WORKSIZ = NQ + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZQRT16( TRANS, M, N, NRHS, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ), RESULT( 1 ) ) * CALL PZCHEKPAD( ICTXT, 'PZQRT16', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQRT16', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PZCHEKPAD( ICTXT, 'PZQRT16', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PZCHEKPAD( ICTXT, 'PZQRT16', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PZCHEKPAD( ICTXT, 'PZQRT16', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Call PZQRT14 * IF( ( M.GE.N .AND. TPSD ) .OR. $ ( M.LT.N .AND. ( .NOT.TPSD ) ) ) THEN * IPW = IPB * IF( TPSD ) THEN * NNRHSQ = NUMROC( N+NRHS, NB, MYCOL, $ 0, NPCOL ) LTAU = NUMROC( MIN( M, N+NRHS ), NB, $ MYCOL, 0, NPCOL ) LWF = NB * ( NB + MP + NNRHSQ ) WORKSIZ = MP * NNRHSQ + LTAU + LWF + $ IPOSTPAD * ELSE * MNRHSP = NUMROC( M+NRHS, NB, MYROW, $ 0, NPROW ) LTAU = NUMROC( MIN( M+NRHS, N ), NB, $ MYROW, 0, NPROW ) LWF = NB * ( NB + MNRHSP + NQ ) WORKSIZ = MNRHSP * NQ + LTAU + LWF + $ IPOSTPAD * END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Solve underdetermined system * RESULT( 2 ) = PZQRT14( TRANS, M, N, $ NRHS, $ MEM( IPA ), 1, $ 1, DESCA, $ MEM( IPX ), $ 1, 1, DESCX, $ MEM( IPW ) ) SRESID = RESULT( 2 ) * CALL PZCHEKPAD( ICTXT, 'PZQRT14', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQRT14', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZQRT14', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Print information about the tests that * did not pass the threshold. * PASSED = 'PASSED' DO 20 II = 1, 2 IF( ( RESULT( II ).GE.THRESH ) .AND. $ ( RESULT( II )-RESULT( II ).EQ.0.0E+0 $ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )TRANS, $ M, N, NRHS, NB, ITYPE, II, $ RESULT( II ) KFAIL = KFAIL + 1 PASSED = 'FAILED' ELSE KPASS = KPASS + 1 END IF 20 CONTINUE * ELSE * * By-pass the solve check * KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock * timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN ADDFAC = 2 MULFAC = 6 IF( M.GE.N ) THEN * * NOPS = DOPLA( 'ZGEQRF', M, N, 0, 0, * NB ) + DOPLA( 'ZUNMQR', M, NRHS, N, * 0, NB ) * MULTS = N*( ( ( 23.D0 / 6.D0 )+M+N / $ 2.D0 )+ N*( M-N / 3.D0 ) ) + $ N*NRHS*( 2.D0*M+2.D0-N ) ADDS = N*( ( 5.D0 / 6.D0 )+N* $ ( 1.D0 / 2.D0+( M-N / 3.D0 ) ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) ELSE * * NOPS = DOPLA( 'ZGELQF', M, N, 0, 0, * NB ) + DOPLA( 'ZUNMLQ', M, * NRHS, N, 0, NB ) * MULTS = M*( ( ( 29.D0 / 6.D0 )+2.D0*N-M $ / 2.D0 )+M*( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+2.D0-N ) ADDS = M*( ( 5.D0 / 6.D0 )+M / 2.D0+M* $ ( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) END IF NOPS = ADDFAC*ADDS + MULFAC*MULTS * * Calculate total megaflops, for WALL and * CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'WALL', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, WTIME( 1 ), $ TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'CPU ', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, CTIME( 1 ), $ TMFLOPS, PASSED END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE 60 CONTINUE 70 CONTINUE 80 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 90 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ', $ 'LS Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ', $ '--------- -------- ------' ) 9993 FORMAT( A4, 3X, A1, 3X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ I5, 1X, I5, 1X, F9.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( ' TRANS=''', A1, ''', M=', I5, ', N=', I5, ', NRHS=', I4, $ ', NB=', I4, ', type', I2, ', test(', I2, ')=', G12.5 ) * STOP * * End of PZLSDRIVER * END scalapack-1.8.0/TESTING/LIN/pzpblaschk.f0000640000175000017500000002560210363532303017370 0ustar muammarmuammar SUBROUTINE PZPBLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PZPBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(bw*(bw+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, PBZTRAN, $ PZMATGEN, ZGAMX2D, ZGEMM, ZGSUM2D, $ ZLASET * .. * .. External Functions .. INTEGER IZAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(BW*(BW+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PZBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BW+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PZBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PZBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PZPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PZMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PZAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDZNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDZNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PZBLASCHK * END scalapack-1.8.0/TESTING/LIN/psmatgen.f0000640000175000017500000004311010363532303017037 0ustar muammarmuammar SUBROUTINE PSMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PSMATGEN : Parallel Real Single precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) REAL, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSRAND EXTERNAL ICEIL, NUMROC, LSAME, PSRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = ONE - TWO*PSRAND(0) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = ONE - TWO*PSRAND(0) 30 CONTINUE A(IK,JK) = ONE - TWO*PSRAND(0) DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = ONE - TWO*PSRAND(0) A(IK,JK+J) = A(IK+J,JK) 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 A(IK,JK) = ONE - TWO*PSRAND(0) JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 A(IK,JK) = ONE - TWO*PSRAND(0) JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = ONE - TWO*PSRAND(0) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 A(IK,JK+J) = ABS(A(IK,JK+J)) + MAXMN IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PSMATGEN * END scalapack-1.8.0/TESTING/LIN/pcpotrrv.f0000640000175000017500000002771610363532303017116 0ustar muammarmuammar SUBROUTINE PCPOTRRV( UPLO, N, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U * computed by PCPOTRF. The routine performs the Cholesky factorization * in reverse. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * hermitian distributed matrix sub( A ) is stored: * stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the factors L or U of the * distributed matrix sub( A ) from the Cholesky factorization. * On exit, the original distributed matrix sub( A ) is * restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= MB_A*NB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE, ZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PCLACPY, PCLASET, $ PCHERK, PCTRMM, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * UPPER = LSAME( UPLO, 'U' ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) IL = MAX( ( ( IA+N-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * * Define array descriptor for working array WORK * CALL DESCSET( DESCW, DESCA( MB_ ), DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * IF ( UPPER ) THEN * * Compute A from the Cholesky factor U : A = U'*U. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PCHERK( 'Upper', 'Conjugate Transpose', JA+N-J-JB, JB, $ ONE, A, IL, J+JB, DESCA, ONE, A, IL+JB, J+JB, $ DESCA ) * * Copy current diagonal block of A into workspace * CALL PCLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PCLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PCTRMM( 'Left', 'Upper', 'Conjugate Transpose', $ 'Non-Unit', JB, N-J+JA, CONE, WORK, 1, 1, $ DESCW, A, IL, J, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PCLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, $ IL+1, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PCHERK( 'Upper', 'Conjugate Transpose', N-JB, JB, ONE, A, $ IA, JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PCLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PCLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PCTRMM( 'Left', 'Upper', 'Conjugate Transpose', 'Non-Unit', $ JB, N, CONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PCLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, IA+1, $ JA, DESCA ) * ELSE * * Compute A from the Cholesky factor L : A = L*L'. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * DO 20 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PCHERK( 'Lower', 'No Transpose', IA+N-IL-JB, JB, ONE, A, $ IL+JB, J, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PCLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PCLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IL, J+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PCTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', IA+N-IL, JB, CONE, WORK, 1, 1, $ DESCW, A, IL, J, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PCLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, $ IL, J+1, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 20 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PCHERK( 'Lower', 'No Transpose', N-JB, JB, ONE, A, $ IA+JB, JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PCLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PCLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PCTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', N, JB, CONE, WORK, 1, 1, DESCW, A, $ IA, JA, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PCLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, IA, $ JA+1, DESCA ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCPOTRRV * END scalapack-1.8.0/TESTING/LIN/pdpbmv1.f0000640000175000017500000007273610363532303016612 0ustar muammarmuammar SUBROUTINE PDPBDCMV( LDBW, BW, UPLO, N, A, JA, DESCA, NRHS, B, IB, $ DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPBTRF and this is stored in AF. If a linear system * is to be solved using PDPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, $ FIRST_PROC, I, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, J, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, OFST, PART_OFFSET, $ PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Pre-calculate bw^2 * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PDPBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PDPBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (BW+2)*BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( UPLO, 'L' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL DLATCPY( 'U', BW, BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to right * CALL DTRSD2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL DSBMV( 'L', NUMROC_SIZE, BW, ONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, ZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * CALL DTRMV( 'U', 'N', 'N', BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), LLDA-1, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, BW*( BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL-1 ) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 30 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( UPLO, 'U' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL DLATCPY( 'L', BW, BW, A( OFST+1 ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to left * CALL DTRSD2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL-1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL DSBMV( 'U', NUMROC_SIZE, BW, ONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, ZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL+1 ) * CALL DTRMV( 'U', 'N', 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, BW*( BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BW, $ A( 1 ), LLDA-1, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 60 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BW, ONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PDBsBMV1 * END scalapack-1.8.0/TESTING/LIN/psptdriver.f0000640000175000017500000007471410363532303017441 0ustar muammarmuammar PROGRAM PSPTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PSPTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by SPT. This test driver performs an * A = L*L**T factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER MEMSIZ, NTESTS, REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, INT_TEMP, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, $ PSCHEKPAD, PSFILLPAD, PSMATGEN, PSPTINFO, $ PSPTLASCHK, PSPTTRF, PSPTTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PSLANGE EXTERNAL LSAME, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PSPTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = 1 IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2), (2), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2)+10) IMIDPAD = 10 IPOSTPAD = ((2)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (2), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(2) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL + 3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PSLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PSPTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSBMATGEN( ICTXT, UPLO, 'T', BW, BW, N, (2), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PSFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PSLANGE( 'I', N, $ (2), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( LSAME( UPLO, 'L' ) ) THEN INT_TEMP = 0 ELSE INT_TEMP = DESCA2D( LLD_ ) ENDIF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PSPTTRF( N, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PSPTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, 'PSPTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PSMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PSPTTRS( N, NRHS, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSPTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (2), N, $ (2), NB, 0, 0, $ ICTXT, (2), IERR( 1 ) ) CALL PSPTLASCHK( 'S', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSPTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pcluinfo.f0000640000175000017500000004110110363532303017036 0ustar muammarmuammar SUBROUTINE PCLUINFO( SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, EST, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDNVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ WORK( * ) * .. * * Purpose * ======= * * PCLUINFO gets needed startup information for LU factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M and N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * M and N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 6, 2*LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL ) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LU.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( EST ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 6, 1, WORK, 6 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Ax=b by LU factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision LU factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LU time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 6, 1, WORK, 6, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCLUINFO * END scalapack-1.8.0/TESTING/LIN/pdgelqrv.f0000640000175000017500000002277510363532303017063 0ustar muammarmuammar SUBROUTINE PDGELQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGELQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PDGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PDGELQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PDGELQF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IL, IN, $ IPT, IPV, IPW, J, JJA, JV, K, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLACPY, $ PDLARFB, PDLARFT, PDLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IL, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+K-I, DESCA( MB_ ) ) J = JA + I - IA JV = 1 + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PDLARFT( 'Forward', 'Rowwise', N-J+JA, IB, A, I, J, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'Upper', IB, N-J+JA, A, I, J, DESCA, WORK( IPV ), $ 1, JV, DESCV ) CALL PDLASET( 'Lower', IB, N-J+JA, ZERO, ONE, WORK( IPV ), 1, $ JV, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PDLASET( 'Upper', IB, N-J+JA-1, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-I+IA, N-J+JA, IB, WORK( IPV ), 1, JV, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) * 10 CONTINUE * * Handle first block separately * IB = IN - IA + 1 * * Compute upper triangular matrix T * CALL PDLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, TAU, $ WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'Upper', IB, N, A, IA, JA, DESCA, WORK( IPV ), 1, $ ICOFF+1, DESCV ) CALL PDLASET( 'Lower', IB, N, ZERO, ONE, WORK, 1, ICOFF+1, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PDLASET( 'Upper', IB, N-1, ZERO, ZERO, A, IA, JA+1, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, N, $ IB, WORK( IPV ), 1, ICOFF+1, DESCV, WORK( IPT ), A, $ IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDGELQRV * END scalapack-1.8.0/TESTING/LIN/pcdtdriver.f0000640000175000017500000007660510363532303017406 0ustar muammarmuammar PROGRAM PCDTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PCDTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by CDT. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER CPLXSZ, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, $ PCCHEKPAD, PCDTINFO, PCDTLASCHK, PCDTTRF, $ PCDTTRS, PCFILLPAD, PCMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PCLANGE EXTERNAL LSAME, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PCDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = 1 IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = 1 IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (3), (3), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((3)+10) IMIDPAD = 10 IPOSTPAD = ((3)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (3), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((3)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(3) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL+3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((3)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PCLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PCDTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PCFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PCLANGE( 'I', N, $ (3), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PCDTTRF( N, MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1, $ DESCA, MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW ), IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PCDTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, 'PCDTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PCMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PCDTTRS( TRANS, N, NRHS, $ MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCDTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCDTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (3), N, $ (3), NB, 0, 0, $ ICTXT, (3), IERR( 1 ) ) CALL PCDTLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl INT_ONE + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(INT_ONE)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE)) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*INT_ONE * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*INT_ONE flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*INT_ONE*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*INT_ONE*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+INT_ONE) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(INT_ONE)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*INT_ONE ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCDTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/psqrdriver.f0000640000175000017500000012164310363532303017432 0ustar muammarmuammar PROGRAM PSQRDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PSQRDRIVER is the main test program for the REAL * SCALAPACK QR factorization routines. This test driver performs a QR * QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ * (complete orthogonal factorization) factorization and checks the * results. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 16 lines: * 'ScaLAPACK QR factorizations input file' * 'PVM machine' * 'QR.out' output file name (if any) * 6 device out * 6 number of factorizations * 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of MB's and NB's * 4 3 5 values of MB * 4 7 3 values of NB * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL PARAMETER ( INTGSZ = 4, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0 ) * .. * .. Local Scalars .. CHARACTER*2 FACT CHARACTER*6 PASSED CHARACTER*7 ROUT CHARACTER*8 ROUTCHK CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPPIV, IPREPAD, IPTAU, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, L, LIPIV, LTAU, $ LWORK, M, MAXMN, MB, MINMN, MNP, MNQ, MP, $ MYCOL, MYROW, N, NB, NFACT, NGRIDS, NMAT, NNB, $ NOUT, NPCOL, NPROCS, NPROW, NQ, WORKFCT, $ WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Arrays .. CHARACTER*2 FACTOR( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), MBVAL( NTESTS ), $ MVAL( NTESTS ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSGELQF, PSGELQRV, $ PSGEQLF, PSGEQLRV, PSGEQPF, $ PSQPPIV, PSGEQRF, PSGEQRRV, $ PSGERQF, PSGERQRV, PSTZRZRV, $ PSMATGEN, PSLAFCHK, PSQRINFO, $ PSTZRZF, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC REAL PSLANGE EXTERNAL ICEIL, LSAMEN, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSQRINFO( OUTFILE, NOUT, NFACT, FACTOR, NTESTS, NMAT, MVAL, $ NTESTS, NVAL, NTESTS, NNB, MBVAL, NTESTS, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different factorization types * DO 40 I = 1, NFACT * FACT = FACTOR( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 2, FACT, 'QR' ) ) THEN ROUT = 'PSGEQRF' ROUTCHK = 'PSGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN ROUT = 'PSGEQLF' ROUTCHK = 'PSGEQLRV' WRITE( NOUT, FMT = 9986 ) $ 'QL factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN ROUT = 'PSGELQF' ROUTCHK = 'PSGELQRV' WRITE( NOUT, FMT = 9986 ) $ 'LQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN ROUT = 'PSGERQF' ROUTCHK = 'PSGERQRV' WRITE( NOUT, FMT = 9986 ) $ 'RQ factorization tests.' ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN ROUT = 'PSGEQPF' ROUTCHK = 'PSGEQRRV' WRITE( NOUT, FMT = 9986 ) $ 'QR factorization with column pivoting tests.' ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN ROUT = 'PSTZRZF' ROUTCHK = 'PSTZRZRV' WRITE( NOUT, FMT = 9986 ) $ 'Complete orthogonal factorization tests.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * M = MVAL( K ) N = NVAL( K ) * * Make sure matrix information is correct * IERR(1) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * MB = MBVAL( L ) NB = NBVAL( L ) * * Make sure mb is legal * IERR( 1 ) = 0 IF( MB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MB', 'MB', MB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, MB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), MB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( MB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, MB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IPTAU = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * LTAU = MNQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSGEQRRV and * PSLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * LTAU = NQ IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QL * factorization * LWORK = DESCA( NB_ ) * ( MP + NQ + DESCA( NB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSGEQLRV and * PSLANGE * WORKSIZ = LWORK + MP*DESCA( NB_ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * LTAU = MNP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the LQ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSGELQRV and * PSLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the QR * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSGERQRV and * PSLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * LTAU = MNQ IPPIV = IPTAU + LTAU + IPOSTPAD + IPREPAD LIPIV = ICEIL( INTGSZ*NQ, REALSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the * factorization i.e from IPW on. * LWORK = MAX( 3, MP + MAX( 1, NQ ) ) + 2 * NQ WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSGEQRRV, * PSLANGE. * WORKSIZ = MAX( WORKSIZ - IPOSTPAD, $ DESCA( NB_ )*( 2*MP + NQ + DESCA( NB_ ) ) ) + $ IPOSTPAD END IF * ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * LTAU = MP IPW = IPTAU + LTAU + IPOSTPAD + IPREPAD * * Figure the amount of workspace required by the TZ * factorization * LWORK = DESCA( MB_ ) * ( MP + NQ + DESCA( MB_ ) ) WORKFCT = LWORK + IPOSTPAD WORKSIZ = WORKFCT * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSLAFCHK, PSTZRZRV and * PSLANGE * WORKSIZ = LWORK + $ MAX( MP*DESCA( NB_ ), NQ*DESCA( MB_ ) $ ) + IPOSTPAD * END IF * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ FACT // ' factorization', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PSMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need the Infinity of A for checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PSFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PSFILLPAD( ICTXT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANGE( 'I', M, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform QR factorizations * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN CALL SLTIMER( 1 ) CALL PSGEQRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN CALL SLTIMER( 1 ) CALL PSGEQLF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN CALL SLTIMER( 1 ) CALL PSGELQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN CALL SLTIMER( 1 ) CALL PSGERQF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL SLTIMER( 1 ) CALL PSGEQPF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPTAU ), $ MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN CALL SLTIMER( 1 ) IF( N.GE.M ) $ CALL PSTZRZF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ), LWORK, $ INFO ) CALL SLTIMER( 1 ) END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, ROUT, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, ROUT, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) IF( LSAMEN( 2, FACT, 'QP' ) ) THEN CALL PSCHEKPAD( ICTXT, ROUT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF CALL PSCHEKPAD( ICTXT, ROUT, WORKFCT-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKFCT-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QR' ) ) THEN * * Compute residual = ||A-Q*R|| / (||A||*N*eps) * CALL PSGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QL' ) ) THEN * * Compute residual = ||A-Q*L|| / (||A||*N*eps) * CALL PSGEQLRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'LQ' ) ) THEN * * Compute residual = ||A-L*Q|| / (||A||*N*eps) * CALL PSGELQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'RQ' ) ) THEN * * Compute residual = ||A-R*Q|| / (||A||*N*eps) * CALL PSGERQRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * * Compute residual = ||AP-Q*R|| / (||A||*N*eps) * CALL PSGEQRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) ELSE IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN * * Compute residual = ||A-T*Z|| / (||A||*N*eps) * IF( N.GE.M ) THEN CALL PSTZRZRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPTAU ), MEM( IPW ) ) END IF CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) END IF * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, ROUTCHK, MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, ROUTCHK, LTAU, 1, $ MEM( IPTAU-IPREPAD ), LTAU, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, ROUTCHK, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 2, FACT, 'QP' ) ) THEN * CALL PSQPPIV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSQPPIV', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQPPIV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSLAFCHK', MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLAFCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Test residual and detect NaN result * IF( LSAMEN( 2, FACT, 'TZ' ) .AND. N.LT.M ) THEN KSKIP = KSKIP + 1 PASSED = 'BYPASS' ELSE IF( FRESID.LE.THRESH .AND. $ (FRESID-FRESID).EQ.0.0E+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MINMN = MIN( M, N ) MAXMN = MAX( M, N ) * IF( LSAMEN( 2, FACT, 'TZ' ) ) THEN IF( M.GE.N ) THEN NOPS = 0.0D+0 ELSE * * 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for * complete orthogonal factorization (M <= N). * NOPS = ( 5.0D+0 * ( $ DBLE( N )*( DBLE( M )**2 ) - $ DBLE( M )**3 + $ DBLE( N )*DBLE( M ) ) + $ DBLE( M )**2 ) / 2.0D+0 END IF * ELSE * * 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type * factorization when M >= N. * NOPS = 2.0D+0 * ( DBLE( MINMN )**2 ) * $ ( DBLE( MAXMN )-DBLE( MINMN ) / 3.0D+0 ) + $ ( DBLE( MAXMN )+DBLE( MINMN ) )*DBLE( MINMN ) END IF * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, MB, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, $ PASSED, FRESID * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, MB, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, $ PASSED, FRESID * END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ', $ ' MFLOPS CHECK Residual' ) 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ', $ '----------- ------ --------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, A6, 2X, G8.1 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PSQRDRIVER * END * SUBROUTINE PSQPPIV( M, N, A, IA, JA, DESCA, IPIV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ) * .. * * Purpose * ======= * * PSQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots * returned by PSGEQPF in reverse order for checking purposes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be permuted. On exit, the local pieces * of the distributed permuted submatrix sub( A ) * Inv( P ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL, $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, IGERV2D, $ IGESD2D, IGAMN2D, INFOG1L, PSSWAP * .. * .. External Functions .. INTEGER INDXL2G, NUMROC EXTERNAL INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG1L( JA, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), JJA, $ IACOL ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * DO 20 J = JA, JA+N-2 * IPVT = JA+N-1 ITMP = JA+N * * Find first the local minimum candidate for pivoting * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, IACOL ) DO 10 KK = JJ, JJA+NQ-1 IF( IPIV( KK ).LT.IPVT )THEN IITMP = KK IPVT = IPIV( KK ) END IF 10 CONTINUE * * Find the global minimum pivot * CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, IPVT, 1, IPROW, $ IPCOL, 1, -1, MYCOL ) * * Broadcast the corresponding index to the other process columns * IF( MYCOL.EQ.IPCOL ) THEN ITMP = INDXL2G( IITMP, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1 ) IF( IPCOL.NE.IACOL ) THEN CALL IGERV2D( ICTXT, 1, 1, IPIV( IITMP ), 1, MYROW, $ IACOL ) ELSE IF( MYCOL.EQ.IACOL ) $ IPIV( IITMP ) = IPIV( JJ ) END IF ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ITMP, 1, MYROW, $ IPCOL ) IF( MYCOL.EQ.IACOL .AND. IPCOL.NE.IACOL ) $ CALL IGESD2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, IPCOL ) END IF * * Swap the columns of A * CALL PSSWAP( M, A, IA, ITMP, DESCA, 1, A, IA, J, DESCA, 1 ) * 20 CONTINUE * * End of PSQPPIV * END scalapack-1.8.0/TESTING/LIN/psdbdriver.f0000640000175000017500000007564510363532303017407 0ustar muammarmuammar PROGRAM PSDBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PSDBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by SDB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER MEMSIZ, NTESTS, REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, $ PSCHEKPAD, PSDBINFO, PSDBLASCHK, PSDBTRF, $ PSDBTRS, PSFILLPAD, PSMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PSLANGE EXTERNAL LSAME, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PSDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 ) $ + MAX(BWL,BWU) NB = MAX( NB, 2*MAX(BWL,BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BWL+BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((BWL+BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BWL+BWU+1), N, $ (BWL+BWU+1), NB, 0, 0, $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BWL+BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BWL+BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PSLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PSDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N, $ (BWL+BWU+1), NB, MEM( IPA ), $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) * CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PSLANGE( '1', (BWL+BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PSDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PSDBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, 'PSDBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PSMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PSDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSDBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSDBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PSDBLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl bwu + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(BWU)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+bwu) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU)) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*bwu * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*bwu flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*BWU*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*BWU*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+bwu) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(BWU)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*BWU ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSDBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzinvchk.f0000640000175000017500000003405110363532303017061 0ustar muammarmuammar SUBROUTINE PZINVCHK( MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, $ FRESID, RCOND, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IASEED, JA, N DOUBLE PRECISION ANORM, FRESID, RCOND * .. * .. Array Arguments .. CHARACTER*3 MATTYP INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZINVCHK computes the scaled residual * * || sub( A ) * inv( sub( A ) ) - I || / ( || sub( A ) || * N * eps ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). to check the result * returned by the matrix inversion routines. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MATTYP (global input) CHARACTER*3 * The type of the distributed matrix to be generated: * if MATTYP = 'GEN' then GENeral matrix, * if MATTYP = 'UTR' then Upper TRiangular matrix, * if MATTYP = 'LTR' then Lower TRiangular matrix, * if MATTYP = 'UPD' then (Upper) Hermitian Positive Definite, * if MATTYP = 'LPD' then (Lower) Hermitian Positive Definite. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_A, LOCc(JA+N-1)). On * entry, sub( A ) contains the distributed matrix inverse * computed by PZGETRI, PZPOTRI or PZTRTRI. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * Seed for the random generation of sub( A ). * * ANORM (global input) DOUBLE PRECISION * The 1-norm of the original matrix sub( A ). * * FRESID (global output) DOUBLE PRECISION * The inversion residual. * * RCOND (global output) DOUBLE PRECISION * The condition number of the original distributed matrix. * RCOND = || sub( A ) ||.|| sub( A )^{-1} || where ||A|| * denotes the 1-norm of A. * * WORK (local workspace) COMPLEX*16 array, dimension * MAX(2*LOCr(N_A+MOD(IA-1,MB_A))*MB_A, LDW) * where LDW is the workspace requirement for the norm computa- * tions, see PZLANGE, PZLANHE, PZLANSY and PZLANTR. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER AFORM, DIAG, UPLO INTEGER ICTXT, ICURCOL, ICURROW, II, IIA, IPW, IROFF, $ IW, J, JB, JJA, JN, KK, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION AUXNORM, EPS, NRMINVAXA, TEMP * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PZGEMM, $ PZHEMM, PZLASET, PZMATGEN, PZTRMM * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE, PZLANHE, PZLANTR EXTERNAL ICEIL, LSAMEN, NUMROC, PDLAMCH, PZLANGE, $ PZLANHE, PZLANSY, PZLANTR * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * EPS = PDLAMCH( DESCA( CTXT_ ), 'eps' ) * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Compute the condition number * IF( LSAMEN( 1, MATTYP( 1:1 ), 'U' ) ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PZLANGE( '1', N, N, A, IA, JA, DESCA, WORK ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PZLANTR( '1', UPLO, 'Non unit', N, N, A, IA, JA, $ DESCA, WORK ) ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * AFORM = 'H' DIAG = 'D' AUXNORM = PZLANHE( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * END IF RCOND = ANORM*AUXNORM * * Compute inv(A)*A * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ ICURROW, ICURCOL ) * * Define array descriptor for working array WORK * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, ICURROW, NPROW ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), ICURROW, ICURCOL, DESCA( CTXT_ ), $ MAX( 1, NP ) ) IPW = DESCW( LLD_ ) * DESCW( NB_ ) + 1 * IF( MYROW.EQ.ICURROW ) THEN II = IROFF + 1 NP = NP - IROFF ELSE II = 1 END IF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Handle first block separately, regenerate a block of columns of A * IW = IROFF + 1 IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK, $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PZLASET( 'Lower', N-1, JB, ZERO, ZERO, WORK, IW+1, $ 1, DESCW ) ELSE CALL PZLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, WORK, IW, $ 2, DESCW ) END IF ELSE CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK( IPW ), $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PZGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, A, $ IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, ZERO, $ WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * CALL PZTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PZHEMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 KK = 0, JB-1 WORK( II+KK*(DESCW(LLD_)+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) )-ONE 10 CONTINUE END IF * NRMINVAXA = PZLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) * JB = MIN( N-J+JA, DESCA( NB_ ) ) * * regenerate a block of columns of A * IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK, DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, IIA-1, NP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PZLASET( 'Lower', JA+N-J-1, JB, ZERO, ZERO, $ WORK, IW+J-JA+1, 1, DESCW ) ELSE CALL PZLASET( 'All', J-JA, JB, ZERO, ZERO, WORK, IW, $ 1, DESCW ) CALL PZLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, $ WORK, IW+J-JA, 2, DESCW ) END IF ELSE CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK( IPW ), DESCW( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PZGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, $ A, IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, $ ZERO, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP(2:3), 'TR' ) ) THEN * CALL PZTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PZHEMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these * cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 KK = 0, JB-1 WORK( II+KK*(DESCW( LLD_ )+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) ) - ONE 20 CONTINUE END IF * * Compute the 1-norm of these JB cols * TEMP = PZLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) NRMINVAXA = MAX( TEMP, NRMINVAXA ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * 30 CONTINUE * * Compute the scaled residual * FRESID = NRMINVAXA / ( N * EPS * ANORM ) * RETURN * * End of PZINVCHK * END scalapack-1.8.0/TESTING/LIN/pzbmatgen.f0000640000175000017500000001714510363532303017221 0ustar muammarmuammar SUBROUTINE PZBMATGEN( ICTXT, AFORM, AFORM2, BWL, BWU, N, $ MB, NB, A, $ LDA, IAROW, IACOL, ISEED, $ MYROW, MYCOL, NPROW, NPCOL ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. * .. Scalar Arguments .. CHARACTER*1 AFORM, AFORM2 INTEGER IACOL, IAROW, ICTXT, $ ISEED, LDA, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW, BWL, BWU * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZBMATGEN : Parallel Complex Double precision Band MATrix GENerator. * (Re)Generate a distributed Band matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'L' : A is returned as a hermitian lower * triangular matrix, and is diagonally dominant. * if AFORM = 'U' : A is returned as a hermitian upper * triangular matrix, and is diagonally dominant. * if AFORM = 'G' : A is returned as a general matrix. * if AFORM = 'T' : A is returned as a general matrix in * tridiagonal-compatible form. * * AFORM2 (global input) CHARACTER*1 * if the matrix is general: * if AFORM2 = 'D' : A is returned diagonally dominant. * if AFORM2 != 'D' : A is not returned diagonally dominant. * if the matrix is symmetric or hermitian: * if AFORM2 = 'T' : A is returned in tridiagonally-compatible * form (a transpose form). * if AFORM2 != 'T' : A is returned in banded-compatible form. * * M (global input) INTEGER * The number of nonzero rows in the generated distributed * band matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX*16, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) + BWL+BWU * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * This code is a simple wrapper around PZMATGEN, for band matrices. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER DIAG_INDEX, I, J, M_MATGEN, NQ, N_MATGEN, $ START_INDEX * .. * .. External Subroutines .. EXTERNAL PZMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC, LSAME * .. * .. Executable Statements .. * * IF( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) THEN M_MATGEN = BWL + 1 N_MATGEN = N START_INDEX = 1 IF( LSAME( AFORM, 'L' ) ) THEN DIAG_INDEX = 1 ELSE DIAG_INDEX = BWL + 1 ENDIF ELSE M_MATGEN = BWL + BWU + 1 N_MATGEN = N DIAG_INDEX = BWU + 1 START_INDEX = 1 ENDIF * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * * Generate a random matrix initially * IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN * CALL PZMATGEN( ICTXT, 'T', 'N', $ N_MATGEN, M_MATGEN, $ NB, M_MATGEN, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, NQ, 0, M_MATGEN, $ MYCOL, MYROW, NPCOL, NPROW ) * ELSE * CALL PZMATGEN( ICTXT, 'N', 'N', $ M_MATGEN, N_MATGEN, $ M_MATGEN, NB, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, M_MATGEN, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Zero out padding at tops of columns * DO 1000 J=1,NB * DO 2000 I=1, LDA-M_MATGEN * * Indexing goes negative; BMATGEN assumes that space * has been preallocated above the first column as it * has to be if the matrix is to be input to * Scalapack's band solvers. * A( I-LDA+M_MATGEN, J ) = CZERO * 2000 CONTINUE * 1000 CONTINUE * ENDIF * IF( LSAME( AFORM2, 'D' ).OR. $ ( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) ) THEN * * Loop over diagonal elements stored on this processor. * * DO 330 I=1, NQ IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN IF( NPROW .EQ. 1 ) THEN A( I, DIAG_INDEX ) = DCMPLX( DBLE( A( I, DIAG_INDEX ) ) $ + DBLE( 2*( BWL+BWU+1 ) ) ) ENDIF ELSE IF( NPROW .EQ. 1 ) THEN A( DIAG_INDEX, I ) = DCMPLX( DBLE( A( DIAG_INDEX, I ) ) $ + DBLE( 2*( BWL+BWU+1 ) ) ) ENDIF END IF 330 CONTINUE * * ELSE * * Must add elements to keep condition of matrix in check * DO 380 I=1, NQ * IF( NPROW .EQ. 1 ) THEN * IF( MOD(I+MYCOL*NB,2) .EQ. 1 ) THEN A( DIAG_INDEX+1, I ) = $ DCMPLX( DBLE( A( DIAG_INDEX+1, I ) ) $ + DBLE( 2*( BWL+BWU+1 ) ) ) * ELSE * A( DIAG_INDEX-1, I ) = $ DCMPLX( DBLE( A( DIAG_INDEX-1, I ) ) $ + DBLE( 2*( BWL+BWU+1 ) ) ) ENDIF * ENDIF * 380 CONTINUE * END IF * RETURN * * End of PZBMATGEN * END scalapack-1.8.0/TESTING/LIN/pzlltinfo.f0000640000175000017500000004215010363532303017245 0ustar muammarmuammar SUBROUTINE PZLLTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, THRESH, EST, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZLLTINFO get needed startup information for LLt factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 7, LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'LLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 2 END IF IF( EST ) THEN WORK( 7 ) = 1 ELSE WORK( 7 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 7, 1, WORK, 7 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Ax=b by LLt factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision LLt factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data is stored in ''Upper'// $ ''' or ''Lower'' portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LLt time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 7, 1, WORK, 7, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF IF( WORK( 7 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZLLTINFO * END scalapack-1.8.0/TESTING/LIN/pspotrrv.f0000640000175000017500000002733510363532303017133 0ustar muammarmuammar SUBROUTINE PSPOTRRV( UPLO, N, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U * computed by PSPOTRF. The routine performs the Cholesky factorization * in reverse. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is stored: * stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the factors L or U of the * distributed matrix sub( A ) from the Cholesky factorization. * On exit, the original distributed matrix sub( A ) is * restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= MB_A*NB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PSLACPY, PSLASET, $ PSSYRK, PSTRMM, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * UPPER = LSAME( UPLO, 'U' ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) IL = MAX( ( ( IA+N-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * * Define array descriptor for working array WORK * CALL DESCSET( DESCW, DESCA( MB_ ), DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * IF ( UPPER ) THEN * * Compute A from the Cholesky factor U : A = U'*U. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PSSYRK( 'Upper', 'Transpose', JA+N-J-JB, JB, ONE, A, IL, $ J+JB, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PSLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PSLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PSTRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', JB, $ N-J+JA, ONE, WORK, 1, 1, DESCW, A, IL, J, $ DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PSLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, $ IL+1, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + U'*U * CALL PSSYRK( 'Upper', 'Transpose', N-JB, JB, ONE, A, IA, JA+JB, $ DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PSLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict lower triangular part of diagonal block, to make * it U1. * CALL PSLASET( 'Lower', JB-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Update the row panel U with the triangular matrix * CALL PSTRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', JB, $ N, ONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict lower triangular part of diagonal block. * CALL PSLACPY( 'Lower', JB-1, JB, WORK, 2, 1, DESCW, A, IA+1, $ JA, DESCA ) * ELSE * * Compute A from the Cholesky factor L : A = L*L'. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * DO 20 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PSSYRK( 'Lower', 'No transpose', IA+N-IL-JB, JB, ONE, A, $ IL+JB, J, DESCA, ONE, A, IL+JB, J+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PSLACPY( 'All', JB, JB, A, IL, J, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PSLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IL, J+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PSTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', $ IA+N-IL, JB, ONE, WORK, 1, 1, DESCW, A, IL, $ J, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PSLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, $ IL, J+1, DESCA ) * IL = IL - DESCA( MB_ ) DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 20 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Update the trailing matrix, A = A + L*L' * CALL PSSYRK( 'Lower', 'No transpose', N-JB, JB, ONE, A, $ IA+JB, JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * * Copy current diagonal block of A into workspace * CALL PSLACPY( 'All', JB, JB, A, IA, JA, DESCA, WORK, 1, 1, $ DESCW ) * * Zero strict upper triangular part of diagonal block, to make * it L1. * CALL PSLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Update the column panel L with the triangular matrix * CALL PSTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', N, JB, $ ONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA ) * * Restore the strict upper triangular part of diagonal block. * CALL PSLACPY( 'Upper', JB, JB-1, WORK, 1, 2, DESCW, A, IA, $ JA+1, DESCA ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSPOTRRV * END scalapack-1.8.0/TESTING/LIN/pzqrinfo.f0000640000175000017500000004254210363532303017101 0ustar muammarmuammar SUBROUTINE PZQRINFO( SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, $ MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, $ LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDFACT, LDMBVAL, LDMVAL, LDNBVAL, LDNVAL, $ LDPVAL, LDQVAL, NFACT, NGRIDS, NMAT, NNB, $ NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*2 FACTOR( LDFACT ) CHARACTER*(*) SUMMRY INTEGER MBVAL( LDMBVAL ), MVAL( LDMVAL ), $ NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZQRINFO gets needed startup information for the QR factoriza- * tion routines and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NFACT (global output) INTEGER * The number of different factorization types to be tested. * * FACTOR (global output) CHARACTER*2 array of dimension of LDFACT, * The factorization types to be tested: * if FACTOR(i) = 'QR' then QR factorization, * if FACTOR(i) = 'QL' then QL factorization, * if FACTOR(i) = 'LQ' then LQ factorization, * if FACTOR(i) = 'RQ' then RQ factorization, * if FACTOR(i) = 'QP' then QR factorization with column * pivoting. * if FACTOR(i) = 'TZ' then complete unitary factorization. * * LDFACT (global input) INTEGER * The maximum number of different factorization types to be * tested. LDFACT >= NFACT. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array of dimension (LDNVAL), the * values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array of dimension (LDNVAL), the * values of N (number of columns in matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for MB and * NB. * * MBVAL (global output) INTEGER array of dimension (LDMBVAL), the * values of MB (row blocksize) to run the code with. * * LDMBVAL (global input) INTEGER * The maximum number of different values that can be used for * MB, LDMBVAL >= NNB. * * NBVAL (global output) INTEGER array of dimension (LDNBVAL), the * values of NB (column blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array of dimension (LDPVAL), the * values of P (number of process rows) to run the code with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array of dimension (LDQVAL), the * values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDFACT+LDMVAL+LDNVAL+LDMBVAL+LDNBVAL+LDPVAL+LDQVAL ) * used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN DOUBLE PRECISION PDLAMCH EXTERNAL LSAMEN, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='QR.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NFACT IF( NFACT.LT.1 .OR. NFACT.GT.LDFACT ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of factorization', LDFACT GO TO 40 END IF READ( NIN, FMT = * ) ( FACTOR( I ), I = 1, NFACT ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDMBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'MB', LDMBVAL GO TO 40 ELSE IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MBVAL( I ), I = 1, NNB ) READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NFACT CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NFACT IF( LSAMEN( 2, FACTOR( K ), 'QR' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QL' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'LQ' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'RQ' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QP' ) ) THEN WORK( I ) = 5 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'TZ' ) ) THEN WORK( I ) = 6 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, MBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QR factorizations routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision QR factorizations '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' || A - QR || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QL || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - LQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - RQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QRP || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - TZ || / (|| A || * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'MB : The row blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The column blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'Fact Time: Time in seconds to factor the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Execution rate of the '// $ 'factorization.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'MB ', ( MBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NFACT = WORK( 4 ) * I = NFACT + 2*NMAT + 2*NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NFACT IF( WORK( K ).EQ.1 ) THEN FACTOR( K ) = 'QR' ELSE IF( WORK( K ).EQ.2 ) THEN FACTOR( K ) = 'QL' ELSE IF( WORK( K ).EQ.3 ) THEN FACTOR( K ) = 'LQ' ELSE IF( WORK( K ).EQ.4 ) THEN FACTOR( K ) = 'RQ' ELSE IF( WORK( K ).EQ.5 ) THEN FACTOR( K ) = 'QP' ELSE IF( WORK( K ).EQ.6 ) THEN FACTOR( K ) = 'TZ' END IF 30 CONTINUE * I = NFACT + 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, MBVAL, 1 ) I = I + NNB CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZQRINFO * END scalapack-1.8.0/TESTING/LIN/pdinvinfo.f0000640000175000017500000003525410363532303017227 0ustar muammarmuammar SUBROUTINE PDINVINFO( SUMMRY, NOUT, NMTYP, MATTYP, LDMTYP, NMAT, $ NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, $ IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDMTYP, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NMTYP, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*3 MATTYP( LDMTYP ) CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDINVINFO gets needed startup information for matrix inversion * tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMTYP (global output) INTEGER * The number of different matrix types to be tested. * * MATTYP (global output) CHARACTER*3 array of dimension of LDMTYP, * The types of matrix to be generated: * if MATTYP(i) = 'GEN' then GENeral matrix, * if MATTYP(i) = 'UTR' then Upper TRiangular matrix, * if MATTYP(i) = 'LTR' then Lower TRiangular matrix, * if MATTYP(i) = 'UPD' then (Upper) symmetric Pos. Definite, * if MATTYP(i) = 'LPD' then (Lower) symmetric Pos. Definite, * * LDMTYP (global input) INTEGER * The maximum number of different matrix types to be tested. * LDMTYP >= NMTYP. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDMTYP+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN DOUBLE PRECISION PDLAMCH EXTERNAL LSAMEN, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='INV.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NMTYP IF( NMTYP.LT.1 .OR. NMTYP.GT.LDMTYP ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of matrix types', LDMTYP GO TO 40 END IF READ( NIN, FMT = * ) ( MATTYP( I ), I = 1, NMTYP ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NMTYP CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NMTYP IF( LSAMEN( 3, MATTYP( K ), 'GEN' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UTR' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LTR' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'UPD' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 3, MATTYP( K ), 'LPD' ) ) THEN WORK( I ) = 5 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Matrix Inversion routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision Matrix Inversion '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Inverse residual = ||inv(A)*A - I|| '// $ '/ (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less '// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'Fct time : Time in seconds to factor the'// $ ' matrix, if needed.' WRITE( NOUT, FMT = 9999 ) $ 'Inv Time : Time in seconds to inverse the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and inverse.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NMTYP = WORK( 4 ) * I = NMTYP+NMAT+NNB+2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NMTYP IF( WORK( K ).EQ.1 ) THEN MATTYP( K ) = 'GEN' ELSE IF( WORK( K ).EQ.2 ) THEN MATTYP( K ) = 'UTR' ELSE IF( WORK( K ).EQ.3 ) THEN MATTYP( K ) = 'LTR' ELSE IF( WORK( K ).EQ.4 ) THEN MATTYP( K ) = 'UPD' ELSE IF( WORK( K ).EQ.5 ) THEN MATTYP( K ) = 'LPD' END IF 30 CONTINUE * I = NMTYP + 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDINVINFO * END scalapack-1.8.0/TESTING/LIN/psinvchk.f0000640000175000017500000003375110363532303017060 0ustar muammarmuammar SUBROUTINE PSINVCHK( MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, $ FRESID, RCOND, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IASEED, JA, N REAL ANORM, FRESID, RCOND * .. * .. Array Arguments .. CHARACTER*3 MATTYP INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSINVCHK computes the scaled residual * * || sub( A ) * inv( sub( A ) ) - I || / ( || sub( A ) || * N * eps ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). to check the result * returned by the matrix inversion routines. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MATTYP (global input) CHARACTER*3 * The type of the distributed matrix to be generated: * if MATTYP = 'GEN' then GENeral matrix, * if MATTYP = 'UTR' then Upper TRiangular matrix, * if MATTYP = 'LTR' then Lower TRiangular matrix, * if MATTYP = 'UPD' then (Upper) symmetric Positive Definite, * if MATTYP = 'LPD' then (Lower) symmetric Positive Definite, * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of local dimension (LLD_A, LOCc(JA+N-1)). On * entry, sub( A ) contains the distributed matrix inverse * computed by PSGETRI, PSPOTRI or PSTRTRI. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * Seed for the random generation of sub( A ). * * ANORM (global input) REAL * The 1-norm of the original matrix sub( A ). * * FRESID (global output) REAL * The inversion residual. * * RCOND (global output) REAL * The condition number of the original distributed matrix. * RCOND = || sub( A ) ||.|| sub( A )^{-1} || where ||A|| * denotes the 1-norm of A. * * WORK (local workspace) REAL array, dimension * MAX(2*LOCr(N_A+MOD(IA-1,MB_A))*MB_A, LDW) * where LDW is the workspace requirement for the norm computa- * tions, see PSLANGE, PSLANSY and PSLANTR. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER AFORM, DIAG, UPLO INTEGER ICTXT, ICURCOL, ICURROW, II, IIA, IPW, IROFF, $ IW, J, JB, JJA, JN, KK, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL AUXNORM, EPS, NRMINVAXA, TEMP * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSGEMM, $ PSLASET, PSMATGEN, PSSYMM, PSTRMM * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC REAL PSLAMCH, PSLANGE, PSLANSY, PSLANTR EXTERNAL ICEIL, LSAMEN, NUMROC, PSLAMCH, PSLANGE, $ PSLANSY, PSLANTR * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * EPS = PSLAMCH( DESCA( CTXT_ ), 'eps' ) * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Compute the condition number * IF( LSAMEN( 1, MATTYP( 1:1 ), 'U' ) ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PSLANGE( '1', N, N, A, IA, JA, DESCA, WORK ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PSLANTR( '1', UPLO, 'Non unit', N, N, A, IA, JA, $ DESCA, WORK ) ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * AFORM = 'S' DIAG = 'D' AUXNORM = PSLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * END IF RCOND = ANORM*AUXNORM * * Compute inv(A)*A * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ ICURROW, ICURCOL ) * * Define array descriptor for working array WORK * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, ICURROW, NPROW ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), ICURROW, ICURCOL, DESCA( CTXT_ ), $ MAX( 1, NP ) ) IPW = DESCW( LLD_ ) * DESCW( NB_ ) + 1 * IF( MYROW.EQ.ICURROW ) THEN II = IROFF + 1 NP = NP - IROFF ELSE II = 1 END IF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Handle first block separately, regenerate a block of columns of A * IW = IROFF + 1 IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK, $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PSLASET( 'Lower', N-1, JB, ZERO, ZERO, WORK, IW+1, $ 1, DESCW ) ELSE CALL PSLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, WORK, IW, $ 2, DESCW ) END IF ELSE CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK( IPW ), $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PSGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, A, $ IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, ZERO, $ WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * CALL PSTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PSSYMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK( IPW ), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 KK = 0, JB-1 WORK( II+KK*(DESCW(LLD_)+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) )-ONE 10 CONTINUE END IF * NRMINVAXA = PSLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) * JB = MIN( N-J+JA, DESCA( NB_ ) ) * * regenerate a block of columns of A * IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK, DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, IIA-1, NP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PSLASET( 'Lower', JA+N-J-1, JB, ZERO, ZERO, $ WORK, IW+J-JA+1, 1, DESCW ) ELSE CALL PSLASET( 'All', J-JA, JB, ZERO, ZERO, WORK, IW, $ 1, DESCW ) CALL PSLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, $ WORK, IW+J-JA, 2, DESCW ) END IF ELSE CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK( IPW ), DESCW( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PSGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, $ A, IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, $ ZERO, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP(2:3), 'TR' ) ) THEN * CALL PSTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PSSYMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these * cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 KK = 0, JB-1 WORK( II+KK*(DESCW( LLD_ )+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) ) - ONE 20 CONTINUE END IF * * Compute the 1-norm of these JB cols * TEMP = PSLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) NRMINVAXA = MAX( TEMP, NRMINVAXA ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * 30 CONTINUE * * Compute the scaled residual * FRESID = NRMINVAXA / ( N * EPS * ANORM ) * RETURN * * End of PSINVCHK * END scalapack-1.8.0/TESTING/LIN/psgeqlrv.f0000640000175000017500000002377410363532303017102 0ustar muammarmuammar SUBROUTINE PSGEQLRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQLRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PSGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PSGEQLF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(N_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PSGEQLF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IIA, IPT, IPV, IPW, IROFF, $ IV, J, JB, JJA, JN, K, MP, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLACPY, $ PSLARFB, PSLARFT, PSLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA+N-K, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * * Handle first block separately * IV = 1 + M - K + IROFF JB = JN - JA - N + K + 1 * * Compute upper triangular matrix T * CALL PSLARFT( 'Backward', 'Columnwise', M-N+JN-JA+1, JB, A, IA, $ JA+N-K, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', M-N+JN-JA+1, JB, A, IA, JA+N-K, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PSLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of A to get block * row of L * CALL PSLASET( 'All', M-K, JB, ZERO, ZERO, A, IA, JA+N-K, $ DESCA ) CALL PSLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-K, $ JA+N-K+1, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+JN-JA+1, JN-JA+1, JB, WORK( IPV ), IROFF+1, 1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * * Loop over the remaining column blocks * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IV = 1 + M - N + J - JA + IROFF * * Compute upper triangular matrix T * CALL PSLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', M-N+J+JB-JA, JB, A, IA, J, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PSLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of sub( A ) to get * block row of L * CALL PSLASET( 'All', M-N+J-JA, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PSLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-N+J-JA, $ J+1, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+J+JB-JA, J+JB-JA, JB, WORK( IPV ), IROFF+1, $ 1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSGEQLRV * END scalapack-1.8.0/TESTING/LIN/psgelqrv.f0000640000175000017500000002273110363532303017072 0ustar muammarmuammar SUBROUTINE PSGELQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGELQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PSGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PSGELQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PSGELQF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) REAL array, dimension * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IL, IN, $ IPT, IPV, IPW, J, JJA, JV, K, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLACPY, $ PSLARFB, PSLARFT, PSLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IL, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+K-I, DESCA( MB_ ) ) J = JA + I - IA JV = 1 + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PSLARFT( 'Forward', 'Rowwise', N-J+JA, IB, A, I, J, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'Upper', IB, N-J+JA, A, I, J, DESCA, WORK( IPV ), $ 1, JV, DESCV ) CALL PSLASET( 'Lower', IB, N-J+JA, ZERO, ONE, WORK( IPV ), 1, $ JV, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PSLASET( 'Upper', IB, N-J+JA-1, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-I+IA, N-J+JA, IB, WORK( IPV ), 1, JV, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) * 10 CONTINUE * * Handle first block separately * IB = IN - IA + 1 * * Compute upper triangular matrix T * CALL PSLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, TAU, $ WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'Upper', IB, N, A, IA, JA, DESCA, WORK( IPV ), 1, $ ICOFF+1, DESCV ) CALL PSLASET( 'Lower', IB, N, ZERO, ONE, WORK, 1, ICOFF+1, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PSLASET( 'Upper', IB, N-1, ZERO, ZERO, A, IA, JA+1, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, N, $ IB, WORK( IPV ), 1, ICOFF+1, DESCV, WORK( IPT ), A, $ IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSGELQRV * END scalapack-1.8.0/TESTING/LIN/pdpbdriver.f0000640000175000017500000007375010363532303017377 0ustar muammarmuammar PROGRAM PDPBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PDPBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by DPB. This test driver performs an * A = L*L**T factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER DBLESZ, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD, $ IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE, $ IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS, $ KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB, $ NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS, $ N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, $ PDCHEKPAD, PDFILLPAD, PDMATGEN, PDPBINFO, $ PDPBLASCHK, PDPBTRF, PDPBTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PDLANGE EXTERNAL LSAME, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PDPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = BWVAL( BW_NUM ) IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 ) $ + BW NB = MAX( NB, 2*BW ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*BW, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BW+1), (BW+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BW+1)+10) IMIDPAD = 10 IPOSTPAD = ((BW+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BW+1), N, $ (BW+1), NB, 0, 0, $ ICTXT,((BW+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BW+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+2*BW)*BW * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = BW*BW * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BW+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PDLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PDPBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(BW*(BW+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB, $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BW+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PDLANGE( '1', (BW+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PDPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PDPBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, 'PDPBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (BW*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PDMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PDPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDPBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PDPBLASCHK( 'S', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDPBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pddbinfo.f0000640000175000017500000004433510363532303017020 0ustar muammarmuammar SUBROUTINE PDDBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDDBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDDBINFO * END scalapack-1.8.0/TESTING/LIN/pcbmatgen.f0000640000175000017500000001713610363532303017172 0ustar muammarmuammar SUBROUTINE PCBMATGEN( ICTXT, AFORM, AFORM2, BWL, BWU, N, $ MB, NB, A, $ LDA, IAROW, IACOL, ISEED, $ MYROW, MYCOL, NPROW, NPCOL ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. * .. Scalar Arguments .. CHARACTER*1 AFORM, AFORM2 INTEGER IACOL, IAROW, ICTXT, $ ISEED, LDA, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW, BWL, BWU * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCBMATGEN : Parallel Complex Single precision Band MATrix GENerator. * (Re)Generate a distributed Band matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'L' : A is returned as a hermitian lower * triangular matrix, and is diagonally dominant. * if AFORM = 'U' : A is returned as a hermitian upper * triangular matrix, and is diagonally dominant. * if AFORM = 'G' : A is returned as a general matrix. * if AFORM = 'T' : A is returned as a general matrix in * tridiagonal-compatible form. * * AFORM2 (global input) CHARACTER*1 * if the matrix is general: * if AFORM2 = 'D' : A is returned diagonally dominant. * if AFORM2 != 'D' : A is not returned diagonally dominant. * if the matrix is symmetric or hermitian: * if AFORM2 = 'T' : A is returned in tridiagonally-compatible * form (a transpose form). * if AFORM2 != 'T' : A is returned in banded-compatible form. * * M (global input) INTEGER * The number of nonzero rows in the generated distributed * band matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX, pointer into the local memory to * an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * A( DIAG_INDEX, I ) = A( DIAG_INDEX, I ) + BWL+BWU * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * This code is a simple wrapper around PCMATGEN, for band matrices. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER DIAG_INDEX, I, J, M_MATGEN, NQ, N_MATGEN, $ START_INDEX * .. * .. External Subroutines .. EXTERNAL PCMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC, LSAME * .. * .. Executable Statements .. * * IF( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) THEN M_MATGEN = BWL + 1 N_MATGEN = N START_INDEX = 1 IF( LSAME( AFORM, 'L' ) ) THEN DIAG_INDEX = 1 ELSE DIAG_INDEX = BWL + 1 ENDIF ELSE M_MATGEN = BWL + BWU + 1 N_MATGEN = N DIAG_INDEX = BWU + 1 START_INDEX = 1 ENDIF * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * * Generate a random matrix initially * IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN * CALL PCMATGEN( ICTXT, 'T', 'N', $ N_MATGEN, M_MATGEN, $ NB, M_MATGEN, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, NQ, 0, M_MATGEN, $ MYCOL, MYROW, NPCOL, NPROW ) * ELSE * CALL PCMATGEN( ICTXT, 'N', 'N', $ M_MATGEN, N_MATGEN, $ M_MATGEN, NB, A( START_INDEX, 1 ), $ LDA, IAROW, IACOL, $ ISEED, 0, M_MATGEN, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Zero out padding at tops of columns * DO 1000 J=1,NB * DO 2000 I=1, LDA-M_MATGEN * * Indexing goes negative; BMATGEN assumes that space * has been preallocated above the first column as it * has to be if the matrix is to be input to * Scalapack's band solvers. * A( I-LDA+M_MATGEN, J ) = CZERO * 2000 CONTINUE * 1000 CONTINUE * ENDIF * IF( LSAME( AFORM2, 'D' ).OR. $ ( LSAME( AFORM, 'L' ).OR.LSAME( AFORM, 'U' ) ) ) THEN * * Loop over diagonal elements stored on this processor. * * DO 330 I=1, NQ IF( LSAME( AFORM, 'T' ) .OR. $ ( LSAME( AFORM2, 'T' ) ) ) THEN IF( NPROW .EQ. 1 ) THEN A( I, DIAG_INDEX ) = CMPLX( REAL( A( I, DIAG_INDEX ) ) $ + REAL( 2*( BWL+BWU+1 ) ) ) ENDIF ELSE IF( NPROW .EQ. 1 ) THEN A( DIAG_INDEX, I ) = CMPLX( REAL( A( DIAG_INDEX, I ) ) $ + REAL( 2*( BWL+BWU+1 ) ) ) ENDIF END IF 330 CONTINUE * * ELSE * * Must add elements to keep condition of matrix in check * DO 380 I=1, NQ * IF( NPROW .EQ. 1 ) THEN * IF( MOD(I+MYCOL*NB,2) .EQ. 1 ) THEN A( DIAG_INDEX+1, I ) = $ CMPLX( REAL( A( DIAG_INDEX+1, I ) ) $ + REAL( 2*( BWL+BWU+1 ) ) ) * ELSE * A( DIAG_INDEX-1, I ) = $ CMPLX( REAL( A( DIAG_INDEX-1, I ) ) $ + REAL( 2*( BWL+BWU+1 ) ) ) ENDIF * ENDIF * 380 CONTINUE * END IF * RETURN * * End of PCBMATGEN * END scalapack-1.8.0/TESTING/LIN/pcdbinfo.f0000640000175000017500000004434010363532303017013 0ustar muammarmuammar SUBROUTINE PCDBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCDBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCDBINFO * END scalapack-1.8.0/TESTING/LIN/pcdbdriver.f0000640000175000017500000007630110363532303017355 0ustar muammarmuammar PROGRAM PCDBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PCDBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by CDB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER CPLXSZ, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, $ PCCHEKPAD, PCDBINFO, PCDBLASCHK, PCDBTRF, $ PCDBTRS, PCFILLPAD, PCMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PCLANGE EXTERNAL LSAME, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PCDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 ) $ + MAX(BWL,BWU) NB = MAX( NB, 2*MAX(BWL,BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BWL+BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((BWL+BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BWL+BWU+1), N, $ (BWL+BWU+1), NB, 0, 0, $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BWL+BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BWL+BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PCLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PCDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N, $ (BWL+BWU+1), NB, MEM( IPA ), $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) * CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PCLANGE( '1', (BWL+BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PCDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PCDBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, 'PCDBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PCMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PCDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCDBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCDBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PCDBLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl bwu + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(BWU)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+bwu) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU)) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*bwu * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*bwu flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*BWU*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*BWU*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+bwu) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(BWU)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*BWU ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCDBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/psdtdriver.f0000640000175000017500000007615110363532303017422 0ustar muammarmuammar PROGRAM PSDTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PSDTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by SDT. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER MEMSIZ, NTESTS, REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, $ PSCHEKPAD, PSDTINFO, PSDTLASCHK, PSDTTRF, $ PSDTTRS, PSFILLPAD, PSMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PSLANGE EXTERNAL LSAME, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PSDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = 1 IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = 1 IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (3), (3), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((3)+10) IMIDPAD = 10 IPOSTPAD = ((3)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (3), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((3)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(3) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL+3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((3)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PSLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PSDTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PSFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PSLANGE( 'I', N, $ (3), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PSDTTRF( N, MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1, $ DESCA, MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW ), IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PSDTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, 'PSDTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PSMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PSDTTRS( TRANS, N, NRHS, $ MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSDTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSDTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (3), N, $ (3), NB, 0, 0, $ ICTXT, (3), IERR( 1 ) ) CALL PSDTLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl INT_ONE + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(INT_ONE)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE)) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*INT_ONE * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*INT_ONE flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*INT_ONE*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*INT_ONE*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+INT_ONE) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(INT_ONE)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*INT_ONE ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSDTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pcptdriver.f0000640000175000017500000007632510363532303017421 0ustar muammarmuammar PROGRAM PCPTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PCPTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by CPT. This test driver performs an * A = L*L**H factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER CPLXSZ, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, INT_TEMP, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, $ PCCHEKPAD, PCFILLPAD, PCMATGEN, PCPTINFO, $ PCPTLASCHK, PCPTTRF, PCPTTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PCLANGE EXTERNAL LSAME, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PCPTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = 1 IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2), (2), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2)+10) IMIDPAD = 10 IPOSTPAD = ((2)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (2), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(2) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL + 3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PCLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PCPTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCBMATGEN( ICTXT, UPLO, 'T', BW, BW, N, (2), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PCFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PCLANGE( 'I', N, $ (2), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( LSAME( UPLO, 'L' ) ) THEN INT_TEMP = 0 ELSE INT_TEMP = DESCA2D( LLD_ ) ENDIF * * For SPD Tridiagonal complex matrices, diagonal is stored * as a real. Thus, compact D into half the space * DO 10 H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) $ +MEM( IPA+INT_TEMP+2*H-1 )*( 0.0E+0, 1.0E+0 ) 10 CONTINUE IF( 2*(NUMROC(N,NB,MYCOL,0,NPCOL)/2).NE. $ NUMROC(N,NB,MYCOL,0,NPCOL) ) THEN H=NUMROC(N,NB,MYCOL,0,NPCOL)/2+1 MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) ENDIF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PCPTTRF( N, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PCPTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, 'PCPTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PCMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PCPTTRS( UPLO, N, NRHS, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCPTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (2), N, $ (2), NB, 0, 0, $ ICTXT, (2), IERR( 1 ) ) CALL PCPTLASCHK( 'H', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCPTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pdqrt17.f0000640000175000017500000003275210363532303016535 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDQRT17( TRANS, IRESID, M, N, NRHS, A, $ IA, JA, DESCA, X, IX, JX, $ DESCX, B, IB, JB, DESCB, WORK, $ RWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IRESID, IX, JA, JB, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ), X( * ) DOUBLE PRECISION RWORK( * ) * .. * * Purpose * ======= * * PDQRT17 computes the ratio * * || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps) * * where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and * * alpha = ||B|| if IRESID = 1 (zero-residual problem) * alpha = ||R|| if IRESID = 2 (otherwise). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies whether or not the transpose of sub( A ) is used. * = 'N': No transpose, op( sub( A ) ) = sub( A ). * = 'T': Transpose, op( sub( A ) ) = sub( A' ). * * IRESID (global input) INTEGER * IRESID = 1 indicates zero-residual problem. * IRESID = 2 indicates non-zero residual. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( B ). * If TRANS = 'T', the number of rows of the distributed * submatrix sub( X ). * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( X ). Otherwise N is the number of rows of * the distributed submatrix sub( B ). * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices sub( X ) and sub( B ). * NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed M-by-N * submatrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) DOUBLE PRECISION pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * If TRANS = 'N', this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ). Otherwise, this * array contains the local pieces of the M-by-NRHS distributed * submatrix sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B,LOCc(JB+NRHS-1)). * If TRANS='N', this array contains the local pieces of the * distributed M-by-NRHS submatrix operand sub( B ). Otherwise, * this array contains the local pieces of the distributed * N-by-NRHS submatrix operand sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0 * otherwise LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0 * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ), * Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ), * NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER IACOL, IBCOL, IBROW, ICOFFB, ICTXT, INFO, $ IOFFA, IROFFB, ISCL, IW, IW2, JW, JW2, MYCOL, $ NRHSQ, NRHSP, MYROW, NCOLS, NPCOL, NPROW, $ NROWS, NROWSP DOUBLE PRECISION ERR, NORMA, NORMB, NORMRS, NORMX, SMLNUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), DESCW2( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL INDXG2P, LSAME, NUMROC, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PDGEMM, PDLACPY, $ PDLASCL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Executable Statements .. * PDQRT17 = ZERO * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN NROWS = M NCOLS = N IOFFA = MOD( JA-1, DESCA( NB_ ) ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN NROWS = N NCOLS = M IOFFA = MOD( IA-1, DESCA( MB_ ) ) ELSE CALL PXERBLA( ICTXT, 'PDQRT17', -1 ) RETURN END IF * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.LE.0 ) $ RETURN * IROFFB = MOD( IA-1, DESCA( MB_ ) ) ICOFFB = MOD( JA-1, DESCA( NB_ ) ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) NRHSP = NUMROC( NRHS+IROFFB, DESCB( NB_ ), MYROW, IBROW, NPROW ) NROWSP = NUMROC( NROWS+IROFFB, DESCA( MB_ ), MYROW, IBROW, NPROW ) * * Assign array descriptor DESCW for workspace WORK, where DESCW * holds a copy of the distributed submatrix sub( B ) * CALL DESCSET( DESCW, NROWS+IROFFB, NRHS+ICOFFB, DESCB( MB_ ), $ DESCB( NB_ ), IBROW, IBCOL, ICTXT, MAX( 1, $ NROWSP ) ) * * Assign array descriptor DESCW2 for workspace WORK, where DESCW2 * holds a copy of the distributed submatrix sub( X**T ) * CALL DESCSET( DESCW2, NRHS+ICOFFB, NCOLS+IOFFA, DESCX( NB_ ), $ DESCX( MB_ ), IBROW, IACOL, ICTXT, MAX( 1, $ NRHSP ) ) * NORMA = PDLANGE( 'One-norm', M, N, A, IA, JA, DESCA, RWORK ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'Precision' ) ISCL = 0 * * compute residual and scale it * IW = 1 + IROFFB JW = 1 + ICOFFB CALL PDLACPY( 'All', NROWS, NRHS, B, IB, JB, DESCB, WORK, IW, JW, $ DESCW ) CALL PDGEMM( TRANS, 'No transpose', NROWS, NRHS, NCOLS, -ONE, A, $ IA, JA, DESCA, X, IX, JX, DESCX, ONE, WORK, IW, JW, $ DESCW ) NORMRS = PDLANGE( 'Max', NROWS, NRHS, WORK, IW, JW, DESCW, $ RWORK ) IF( NORMRS.GT.SMLNUM ) THEN ISCL = 1 CALL PDLASCL( 'General', NORMRS, ONE, NROWS, NRHS, WORK, $ IW, JW, DESCW, INFO ) END IF * * compute R'*sub( A ) * IW2 = 1 + ICOFFB JW2 = 1 + IOFFA CALL PDGEMM( 'Transpose', TRANS, NRHS, NCOLS, NROWS, ONE, WORK, $ IW, JW, DESCW, A, IA, JA, DESCA, ZERO, $ WORK( NROWSP*NRHSQ+1 ), IW2, JW2, DESCW2 ) * * compute and properly scale error * ERR = PDLANGE( 'One-norm', NRHS, NCOLS, WORK( NROWSP*NRHSQ+1 ), $ IW2, JW2, DESCW2, RWORK ) IF( NORMA.NE.ZERO ) $ ERR = ERR / NORMA * IF( ISCL.EQ.1 ) $ ERR = ERR*NORMRS * IF( IRESID.EQ.1 ) THEN NORMB = PDLANGE( 'One-norm', NROWS, NRHS, B, IB, JB, DESCB, $ RWORK ) IF( NORMB.NE.ZERO ) $ ERR = ERR / NORMB ELSE NORMX = PDLANGE( 'One-norm', NCOLS, NRHS, X, IX, JX, DESCX, $ RWORK ) IF( NORMX.NE.ZERO ) $ ERR = ERR / NORMX END IF * PDQRT17 = ERR / ( PDLAMCH( ICTXT, 'Epsilon' ) * $ DBLE( MAX( M, N, NRHS ) ) ) * RETURN * * End of PDQRT17 * END scalapack-1.8.0/TESTING/LIN/pdgbdriver.f0000640000175000017500000007747310363532303017374 0ustar muammarmuammar PROGRAM PDGBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PDGBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by DGB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTMEM INTEGER, default = 2048. * INTMEM is the size of the integer workspace used in this * driver as input as the IPIV vector. It represents an * upper bound on NB, the blocksize of the data * distribution. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER INTMEM PARAMETER ( INTMEM = 2048 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER DBLESZ, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER IPIV(INTMEM) INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, $ PDCHEKPAD, PDDBLASCHK, PDFILLPAD, PDGBINFO, $ PDGBTRF, PDGBTRS, PDMATGEN, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PDLANGE EXTERNAL LSAME, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PDGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) $ + (BWL+BWU) NB = MAX( NB, 2*(BWL+BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 * IF( NB.GT.INTMEM ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) THEN WRITE( NOUT,* )'You have chosen an ' $ ,'NB > INTMEM in the driver.' WRITE(NOUT, *)'Please edit the driver ' $ ,'and increase the value of INTMEM' ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2*BWL+2*BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((2*BWL+2*BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, $ (2*BWL+2*BWU+1), NB, 0, 0, $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 1 * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PDLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PDDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PDLANGE( '1', (2*BWL+2*BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PDGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PDGBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, 'PDGBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PDMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PDGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDGBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PDDBLASCHK( 'N', 'N', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl (bwl+bwu) + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE((BWL+BWU))) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST (bwl+bwu)*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE((BWL+BWU))*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST (bwl+bwu)*(bwl+bwu) * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+(bwl+bwu)) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE((BWL+BWU))) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * (BWL+BWU)*(BWL+BWU) ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDGBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pdqrinfo.f0000640000175000017500000004254210363532303017053 0ustar muammarmuammar SUBROUTINE PDQRINFO( SUMMRY, NOUT, NFACT, FACTOR, LDFACT, NMAT, $ MVAL, LDMVAL, NVAL, LDNVAL, NNB, MBVAL, $ LDMBVAL, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDFACT, LDMBVAL, LDMVAL, LDNBVAL, LDNVAL, $ LDPVAL, LDQVAL, NFACT, NGRIDS, NMAT, NNB, $ NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*2 FACTOR( LDFACT ) CHARACTER*(*) SUMMRY INTEGER MBVAL( LDMBVAL ), MVAL( LDMVAL ), $ NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDQRINFO gets needed startup information for the QR factoriza- * tion routines and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NFACT (global output) INTEGER * The number of different factorization types to be tested. * * FACTOR (global output) CHARACTER*2 array of dimension of LDFACT, * The factorization types to be tested: * if FACTOR(i) = 'QR' then QR factorization, * if FACTOR(i) = 'QL' then QL factorization, * if FACTOR(i) = 'LQ' then LQ factorization, * if FACTOR(i) = 'RQ' then RQ factorization, * if FACTOR(i) = 'QP' then QR factorization with column * pivoting. * if FACTOR(i) = 'TZ' then complete orthogonal factorization. * * LDFACT (global input) INTEGER * The maximum number of different factorization types to be * tested. LDFACT >= NFACT. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * MVAL (global output) INTEGER array of dimension (LDNVAL), the * values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M, LDNVAL > = NMAT. * * NVAL (global output) INTEGER array of dimension (LDNVAL), the * values of N (number of columns in matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for MB and * NB. * * MBVAL (global output) INTEGER array of dimension (LDMBVAL), the * values of MB (row blocksize) to run the code with. * * LDMBVAL (global input) INTEGER * The maximum number of different values that can be used for * MB, LDMBVAL >= NNB. * * NBVAL (global output) INTEGER array of dimension (LDNBVAL), the * values of NB (column blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array of dimension (LDPVAL), the * values of P (number of process rows) to run the code with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array of dimension (LDQVAL), the * values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 4, LDFACT+LDMVAL+LDNVAL+LDMBVAL+LDNBVAL+LDPVAL+LDQVAL ) * used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT, K DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAMEN DOUBLE PRECISION PDLAMCH EXTERNAL LSAMEN, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='QR.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get the matrix types to be tested * READ( NIN, FMT = * ) NFACT IF( NFACT.LT.1 .OR. NFACT.GT.LDFACT ) THEN WRITE( NOUT, FMT = 9994 ) 'nb of factorization', LDFACT GO TO 40 END IF READ( NIN, FMT = * ) ( FACTOR( I ), I = 1, NFACT ) * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 40 ELSE IF( NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'M', LDMVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDMBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'MB', LDMBVAL GO TO 40 ELSE IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 40 END IF READ( NIN, FMT = * ) ( MBVAL( I ), I = 1, NNB ) READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 40 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 40 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I ) * QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS WORK( 4 ) = NFACT CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 DO 20 K = 1, NFACT IF( LSAMEN( 2, FACTOR( K ), 'QR' ) ) THEN WORK( I ) = 1 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QL' ) ) THEN WORK( I ) = 2 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'LQ' ) ) THEN WORK( I ) = 3 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'RQ' ) ) THEN WORK( I ) = 4 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'QP' ) ) THEN WORK( I ) = 5 I = I + 1 ELSE IF( LSAMEN( 2, FACTOR( K ), 'TZ' ) ) THEN WORK( I ) = 6 I = I + 1 END IF 20 CONTINUE * CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, MBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QR factorizations routines.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision QR factorizations '// $ 'routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' || A - QR || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QL || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - LQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - RQ || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - QRP || / (|| A || * eps * N) and/or' WRITE( NOUT, FMT = 9999 ) $ ' || A - TZ || / (|| A || * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'MB : The row blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The column blocksize of the blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 ) $ 'Fact Time: Time in seconds to factor the'// $ ' matrix.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Execution rate of the '// $ 'factorization.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'MB ', ( MBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) NFACT = WORK( 4 ) * I = NFACT + 2*NMAT + 2*NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * DO 30 K = 1, NFACT IF( WORK( K ).EQ.1 ) THEN FACTOR( K ) = 'QR' ELSE IF( WORK( K ).EQ.2 ) THEN FACTOR( K ) = 'QL' ELSE IF( WORK( K ).EQ.3 ) THEN FACTOR( K ) = 'LQ' ELSE IF( WORK( K ).EQ.4 ) THEN FACTOR( K ) = 'RQ' ELSE IF( WORK( K ).EQ.5 ) THEN FACTOR( K ) = 'QP' ELSE IF( WORK( K ).EQ.6 ) THEN FACTOR( K ) = 'TZ' END IF 30 CONTINUE * I = NFACT + 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, MBVAL, 1 ) I = I + NNB CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 40 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDQRINFO * END scalapack-1.8.0/TESTING/LIN/pzluinfo.f0000640000175000017500000004110110363532303017065 0ustar muammarmuammar SUBROUTINE PZLUINFO( SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, EST, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDNVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ WORK( * ) * .. * * Purpose * ======= * * PZLUINFO gets needed startup information for LU factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M and N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * M and N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 6, 2*LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL ) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LU.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( EST ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 6, 1, WORK, 6 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Ax=b by LU factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex double precision LU factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LU time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 6, 1, WORK, 6, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZLUINFO * END scalapack-1.8.0/TESTING/LIN/pzptdriver.f0000640000175000017500000007637010363532303017450 0ustar muammarmuammar PROGRAM PZPTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PZPTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by ZPT. This test driver performs an * A = L*L**H factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM/ZPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER MEMSIZ, NTESTS, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, INT_TEMP, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZBMATGEN, $ PZCHEKPAD, PZFILLPAD, PZMATGEN, PZPTINFO, $ PZPTLASCHK, PZPTTRF, PZPTTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PZLANGE EXTERNAL LSAME, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PZPTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = 1 IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2), (2), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2)+10) IMIDPAD = 10 IPOSTPAD = ((2)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (2), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(2) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL + 3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PZLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PZPTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PZBMATGEN( ICTXT, UPLO, 'T', BW, BW, N, (2), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PZFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PZLANGE( 'I', N, $ (2), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( LSAME( UPLO, 'L' ) ) THEN INT_TEMP = 0 ELSE INT_TEMP = DESCA2D( LLD_ ) ENDIF * * For SPD Tridiagonal complex matrices, diagonal is stored * as a real. Thus, compact D into half the space * DO 10 H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) $ +MEM( IPA+INT_TEMP+2*H-1 )*( 0.0D+0, 1.0D+0 ) 10 CONTINUE IF( 2*(NUMROC(N,NB,MYCOL,0,NPCOL)/2).NE. $ NUMROC(N,NB,MYCOL,0,NPCOL) ) THEN H=NUMROC(N,NB,MYCOL,0,NPCOL)/2+1 MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) ENDIF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PZPTTRF( N, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PZPTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PZCHEKPAD( ICTXT, 'PZPTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PZMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PZPTTRS( UPLO, N, NRHS, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZPTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (2), N, $ (2), NB, 0, 0, $ ICTXT, (2), IERR( 1 ) ) CALL PZPTLASCHK( 'H', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZPTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzinvdriver.f0000640000175000017500000010171310430450246017607 0ustar muammarmuammar PROGRAM PZINVDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PZINVDRIVER is the main test program for the COMPLEX*16 * SCALAPACK matrix inversion routines. This test driver computes the * inverse of different kind of matrix and tests the results. * * The program must be driven by a short data file. An annotated example * of a data file can be obtained by deleting the first 3 characters * from the following 14 lines: * 'ScaLAPACK Matrix Inversion Testing input file' * 'PVM machine.' * 'INV.out' output file name (if any) * 6 device out * 5 number of matrix types (next line) * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD * 4 number of problems sizes * 1000 2000 3000 4000 values of N * 3 number of NB's * 4 30 35 values of NB * 2 number of process grids (ordered P & Q) * 4 2 values of P * 4 4 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ, DBLESZ and ZPLXSZ indicate the length in bytes on * the given platform for an integer, a double precision real * and a double precision complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM, ZPLXSZ COMPLEX*16 PADVAL, ZERO PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ ZPLXSZ = 16, MEMSIZ = TOTMEM / ZPLXSZ, $ NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER UPLO CHARACTER*3 MTYP CHARACTER*6 PASSED CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPPIV, IPREPAD, IPOSTPAD, IPIW, IPW, ITEMP, J, $ K, KTESTS, KPASS, KFAIL, KSKIP, L, LCM, LIPIV, $ LIWORK, LWORK, MYCOL, MYROW, N, NB, NGRIDS, $ NMAT, NMTYP, NNB, NOUT, NP, NPCOL, NPROCS, $ NPROW, NQ, WORKIINV, WORKINV, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, RCOND, TMFLOPS * .. * .. Local Arrays .. CHARACTER*3 MATTYP( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZGETRF, PZGETRI, $ PZINVCHK, PZINVINFO, PZLASET, $ PZMATGEN, PZPOTRF, PZPOTRI, $ PZTRTRI, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PZLANGE, PZLANHE, PZLANSY, PZLANTR EXTERNAL ICEIL, ILCM, LSAMEN, NUMROC, PZLANGE, $ PZLANHE, PZLANSY, PZLANTR * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZINVINFO( OUTFILE, NOUT, NMTYP, MATTYP, NTESTS, NMAT, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different matrix types * DO 40 I = 1, NMTYP * MTYP = MATTYP( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a general matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is an upper triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'LTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a lower triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a Hermitian positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the upper triangular part will be '// $ 'referenced.' ELSE IF( LSAMEN( 3, MTYP, 'LPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a Hermitian positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the lower triangular part will be '// $ 'referenced.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * N = NVAL( K ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * NB = NBVAL( L ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 * LCM = ILCM( NPROW, NPCOL ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Pivots are needed by LU factorization * IPPIV = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + $ IPREPAD LIPIV = ICEIL( INTGSZ * ( NP + NB ), ZPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * LWORK = MAX( 1, NP * DESCA( NB_ ) ) WORKINV = LWORK + IPOSTPAD * * Figure the amount of workspace required by the * general matrix inversion * IF( NPROW.EQ.NPCOL ) THEN LIWORK = NQ + DESCA( NB_ ) ELSE * * change the integer workspace needed for PDGETRI * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) * * $ ICEIL( ICEIL( DESCA( LLD_ ), * $ DESCA( MB_ ) ), LCM / NPROW ) ) * $ + NQ LIWORK = NUMROC( DESCA( M_ ) + $ DESCA( MB_ ) * NPROW $ + MOD ( 1 - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF WORKIINV = ICEIL( LIWORK*INTGSZ, ZPLXSZ ) + $ IPOSTPAD IPIW = IPW + WORKINV + IPREPAD WORKSIZ = WORKINV + IPREPAD + WORKIINV * ELSE * * No pivots or workspace needed for triangular or * Hermitian positive definite matrices. * IPW = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD WORKSIZ = 1 + IPOSTPAD * END IF * IF( CHECK ) THEN * * Figure amount of work space for the norm * computations * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN ITEMP = NQ ELSE ITEMP = 2 * NQ + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), $ LCM / NPROW ) END IF END IF WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ ICEIL( DBLESZ * ITEMP, ZPLXSZ ) ) * * Figure the amount of workspace required by the * checking routine * WORKSIZ = MAX( WORKSIZ, 2 * NB * MAX( 1, NP ) ) + $ IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'inversion', $ ( IPW + WORKSIZ ) * ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Generate a general diagonally dominant matrix A * CALL PZMATGEN( ICTXT, 'N', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Generate a Hermitian positive definite matrix A * CALL PZMATGEN( ICTXT, 'H', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * END IF * * Zeros not-referenced part of A, if any. * IF( LSAMEN( 1, MTYP, 'U' ) ) THEN * UPLO = 'U' CALL PZLASET( 'Lower', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 2, 1, DESCA ) * ELSE IF( LSAMEN( 1, MTYP, 'L' ) ) THEN * UPLO = 'L' CALL PZLASET( 'Upper', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 1, 2, DESCA ) * ELSE * UPLO = 'G' * END IF * * Need 1-norm of A for checking * IF( CHECK ) THEN * CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * CALL PZFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANGE( '1', N, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * ANORM = PZLANTR( '1', UPLO, 'Non unit', N, N, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANTR', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANTR', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * ANORM = PZLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'SY' ) ) THEN * CALL PZFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD,IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'HE' ) ) THEN CALL PZFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * END IF * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Perform LU factorization * CALL SLTIMER( 1 ) CALL PZGETRF( N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGETRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PZGETRI( N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ), LWORK, $ MEM( IPIW ), LIWORK, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGETRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRI', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRI', $ WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRI', $ WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PZTRTRI( UPLO, 'Non unit', N, MEM( IPA ), 1, $ 1, DESCA, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZTRTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Perform Cholesky factorization * CALL SLTIMER( 1 ) CALL PZPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the Hermitian positive definite matrix * inversion * CALL SLTIMER( 2 ) CALL PZPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPOTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * END IF * IF( CHECK ) THEN * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Compute fresid = || inv(A)*A-I || * CALL PZINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA, $ IASEED, ANORM, FRESID, RCOND, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZINVCHK', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZINVCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. INFO.EQ.0 .AND. $ ( (FRESID-FRESID) .EQ. 0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 IF( INFO.GT.0 ) THEN PASSED = 'SINGUL' ELSE PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only the timing * operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * 8/3 N^3 - N^2 flops for LU factorization * NOPS = ( 8.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) - $ DBLE( N )**2 * * 16/3 N^3 for matrix inversion * NOPS = NOPS + $ ( 16.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * 4/3 N^3 + 2 N^2 for triangular matrix inversion * CTIME(1) = 0.0D+0 WTIME(1) = 0.0D+0 NOPS = ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 2.0D+0 * ( DBLE( N )**2 ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * 4/3 N^3 + 3 N^2 flops for Cholesky factorization * NOPS = ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 2.0D+0 * ( DBLE( N )**2 ) * * 8/3 N^3 + 5 N^2 flops for Cholesky inversion * NOPS = NOPS + $ ( 8.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 5.0D+0 * ( DBLE( N )**2 ) * END IF * * Figure total megaflops -- factorization and * inversion, for WALL and CPU time, and print * output. * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ) + CTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ', $ ' MFLOPS Cond Resid CHECK' ) 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ', $ '----------- ------- ------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2, $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PZINVDRIVER * END scalapack-1.8.0/TESTING/LIN/pdgbmv1.f0000640000175000017500000007600610363532303016573 0ustar muammarmuammar SUBROUTINE PDGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDBTRF and this is stored in AF. If a linear system * is to be solved using PDDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PDDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PDDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL DGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL DTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'T' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL DLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL DTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL DLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL DTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL DGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL DTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PDBsBMV1 * END scalapack-1.8.0/TESTING/LIN/pdlaschk.f0000640000175000017500000003024110363532303017013 0ustar muammarmuammar SUBROUTINE PDLASCHK( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, SYMM INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION WORK( * ), X( * ) * .. * * Purpose * ======= * * PDLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed matrix, * otherwise sub( A ) is a general distributed matrix. * * DIAG (global input) CHARACTER * If DIAG = 'D', sub( A ) is diagonally dominant. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 0. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X), * NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM, $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF, $ IXCOL, IXROW, J, JBRHS, JJ, JJA, JJX, LDX, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION BETA, DIVISOR, EPS, RESID1 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, DGEBR2D, $ DGEBS2D, DGEMM, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, PBDTRAN, PDMATGEN * .. * .. External Functions .. INTEGER IDAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * LDX = MAX( 1, NP ) IPB = 1 IPX = IPB + NP * DESCX( NB_ ) IPA = IPX + NQ * DESCX( NB_ ) * IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF * ICURCOL = IXCOL * * Loop over the rhs * DO 40 J = 1, NRHS, DESCX( NB_ ) JBRHS = MIN( DESCX( NB_ ), NRHS-J+1 ) * * Transpose x from ICURCOL to all rows * IOFFX = IIX + ( JJX - 1 ) * DESCX( LLD_ ) CALL PBDTRAN( ICTXT, 'Column', 'Transpose', N, JBRHS, $ DESCX( MB_ ), X( IOFFX ), DESCX( LLD_ ), ZERO, $ WORK( IPX ), JBRHS, IXROW, ICURCOL, -1, IACOL, $ WORK( IPA ) ) * * Regenerate B in IXCOL * IF( MYCOL.EQ.ICURCOL ) THEN CALL PDMATGEN( ICTXT, 'N', 'N', DESCX( M_ ), DESCX( N_ ), $ DESCX( MB_ ), DESCX( NB_ ), WORK( IPB ), LDX, $ IXROW, IXCOL, IBSEED, IIX-1, NP, JJX-1, $ JBRHS, MYROW, MYCOL, NPROW, NPCOL ) BETA = ONE ELSE BETA = ZERO END IF * IF( NQ.GT.0 ) THEN DO 10 II = IIA, IIA+NP-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IIA+NP-II ) * * Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1). * CALL PDMATGEN( ICTXT, SYMM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK( IPA ), IB, DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, II-1, IB, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Compute B <= B - A * X. * CALL DGEMM( 'No transpose', 'Transpose', IB, JBRHS, NQ, $ -ONE, WORK( IPA ), IB, WORK( IPX ), JBRHS, $ BETA, WORK( IPB+II-IIA ), LDX ) * 10 CONTINUE * ELSE IF( MYCOL.NE.ICURCOL ) THEN * CALL DLASET( 'All', NP, JBRHS, ZERO, ZERO, WORK( IPB ), $ LDX ) * END IF * * Add B rowwise to ICURCOL * CALL DGSUM2D( ICTXT, 'Row', ' ', NP, JBRHS, WORK( IPB ), LDX, $ MYROW, ICURCOL ) * IF( MYCOL.EQ.ICURCOL ) THEN * * Figure || A * X - B || & || X || * IPW = IPA + JBRHS DO 20 JJ = 0, JBRHS - 1 IF( NP.GT.0 ) THEN II = IDAMAX( NP, WORK( IPB+JJ*LDX ), 1 ) WORK( IPA+JJ ) = ABS( WORK( IPB+II-1+JJ*LDX ) ) WORK( IPW+JJ ) = ABS( X( IOFFX + IDAMAX( NP, $ X( IOFFX + JJ*DESCX( LLD_ ) ), 1 )-1+JJ* $ DESCX( LLD_ ) ) ) ELSE WORK( IPA+JJ ) = ZERO WORK( IPW+JJ ) = ZERO END IF 20 CONTINUE * * After DGAMX2D computation, * WORK(IPB) has the maximum of || Ax - b ||, and * WORK(IPX) has the maximum of || X ||. * CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 2*JBRHS, $ WORK( IPA ), 1, IDUMM, IDUMM, -1, 0, ICURCOL ) * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * IF( MYROW.EQ.0 ) THEN DO 30 JJ = 0, JBRHS - 1 RESID1 = WORK( IPA+JJ ) / ( WORK( IPW+JJ )*DIVISOR ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 30 CONTINUE IF( MYCOL.NE.0 ) $ CALL DGESD2D( ICTXT, 1, 1, RESID, 1, 0, 0 ) END IF * ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * CALL DGERV2D( ICTXT, 1, 1, RESID1, 1, 0, ICURCOL ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 * END IF * IF( MYCOL.EQ.ICURCOL ) $ JJX = JJX + JBRHS ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, RESID, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, RESID, 1, 0, 0 ) END IF * RETURN * * End of PDLASCHK * END scalapack-1.8.0/TESTING/LIN/pzlaschk.f0000640000175000017500000003044410363532303017046 0ustar muammarmuammar SUBROUTINE PZLASCHK( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, SYMM INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 WORK( * ), X( * ) * .. * * Purpose * ======= * * PZLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed matrix, * otherwise sub( A ) is a general distributed matrix. * * DIAG (global input) CHARACTER * If DIAG = 'D', sub( A ) is diagonally dominant. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 0. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X), * NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM, $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF, $ IXCOL, IXROW, J, JBRHS, JJ, JJA, JJX, LDX, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION DIVISOR, EPS, RESID1 COMPLEX*16 BETA * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, PBZTRAN, $ PZMATGEN, ZGAMX2D, ZGEMM, ZGSUM2D, $ ZLASET * .. * .. External Functions .. INTEGER IZAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * LDX = MAX( 1, NP ) IPB = 1 IPX = IPB + NP * DESCX( NB_ ) IPA = IPX + NQ * DESCX( NB_ ) * IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF * ICURCOL = IXCOL * * Loop over the rhs * DO 40 J = 1, NRHS, DESCX( NB_ ) JBRHS = MIN( DESCX( NB_ ), NRHS-J+1 ) * * Transpose x from ICURCOL to all rows * IOFFX = IIX + ( JJX - 1 ) * DESCX( LLD_ ) CALL PBZTRAN( ICTXT, 'Column', 'Transpose', N, JBRHS, $ DESCX( MB_ ), X( IOFFX ), DESCX( LLD_ ), ZERO, $ WORK( IPX ), JBRHS, IXROW, ICURCOL, -1, IACOL, $ WORK( IPA ) ) * * Regenerate B in IXCOL * IF( MYCOL.EQ.ICURCOL ) THEN CALL PZMATGEN( ICTXT, 'N', 'N', DESCX( M_ ), DESCX( N_ ), $ DESCX( MB_ ), DESCX( NB_ ), WORK( IPB ), LDX, $ IXROW, IXCOL, IBSEED, IIX-1, NP, JJX-1, $ JBRHS, MYROW, MYCOL, NPROW, NPCOL ) BETA = ONE ELSE BETA = ZERO END IF * IF( NQ.GT.0 ) THEN DO 10 II = IIA, IIA+NP-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IIA+NP-II ) * * Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1). * CALL PZMATGEN( ICTXT, SYMM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK( IPA ), IB, DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, II-1, IB, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Compute B <= B - A * X. * CALL ZGEMM( 'No transpose', 'Transpose', IB, JBRHS, NQ, $ -ONE, WORK( IPA ), IB, WORK( IPX ), JBRHS, $ BETA, WORK( IPB+II-IIA ), LDX ) * 10 CONTINUE * ELSE IF( MYCOL.NE.ICURCOL ) THEN * CALL ZLASET( 'All', NP, JBRHS, ZERO, ZERO, WORK( IPB ), $ LDX ) * END IF * * Add B rowwise to ICURCOL * CALL ZGSUM2D( ICTXT, 'Row', ' ', NP, JBRHS, WORK( IPB ), LDX, $ MYROW, ICURCOL ) * IF( MYCOL.EQ.ICURCOL ) THEN * * Figure || A * X - B || & || X || * IPW = IPA + JBRHS DO 20 JJ = 0, JBRHS - 1 IF( NP.GT.0 ) THEN II = IZAMAX( NP, WORK( IPB+JJ*LDX ), 1 ) WORK( IPA+JJ ) = ABS( WORK( IPB+II-1+JJ*LDX ) ) WORK( IPW+JJ ) = ABS( X( IOFFX + IZAMAX( NP, $ X( IOFFX + JJ*DESCX( LLD_ ) ), 1 )-1+JJ* $ DESCX( LLD_ ) ) ) ELSE WORK( IPA+JJ ) = ZERO WORK( IPW+JJ ) = ZERO END IF 20 CONTINUE * * After ZGAMX2D computation, * WORK(IPB) has the maximum of || Ax - b ||, and * WORK(IPX) has the maximum of || X ||. * CALL ZGAMX2D( ICTXT, 'Column', ' ', 1, 2*JBRHS, $ WORK( IPA ), 1, IDUMM, IDUMM, -1, 0, ICURCOL ) * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * IF( MYROW.EQ.0 ) THEN DO 30 JJ = 0, JBRHS - 1 RESID1 = DBLE( WORK( IPA+JJ ) ) / $ ( DBLE( WORK( IPW+JJ ) )*DIVISOR ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 30 CONTINUE IF( MYCOL.NE.0 ) $ CALL DGESD2D( ICTXT, 1, 1, RESID, 1, 0, 0 ) END IF * ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * CALL DGERV2D( ICTXT, 1, 1, RESID1, 1, 0, ICURCOL ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 * END IF * IF( MYCOL.EQ.ICURCOL ) $ JJX = JJX + JBRHS ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, RESID, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, RESID, 1, 0, 0 ) END IF * RETURN * * End of PZLASCHK * END scalapack-1.8.0/TESTING/LIN/pddbmv1.f0000640000175000017500000007600610363532303016570 0ustar muammarmuammar SUBROUTINE PDGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDBTRF and this is stored in AF. If a linear system * is to be solved using PDDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PDDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PDDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = ZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = ZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL DGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL DTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'T' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL DLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL DTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL DLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL DTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL DGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, ONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, ZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL DCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL DTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL DGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = ZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL DCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL DTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL DTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL DGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL DGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL DAXPY( BWU, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL DGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL DAXPY( BWL, ONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PDBsBMV1 * END scalapack-1.8.0/TESTING/LIN/pcdbmv1.f0000640000175000017500000007612010363532303016564 0ustar muammarmuammar SUBROUTINE PCGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDBTRF and this is stored in AF. If a linear system * is to be solved using PCDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL CGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL CTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'C' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL CLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL CTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL CLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL CTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL CGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL CTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PCBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pdtzrzrv.f0000640000175000017500000002442610363532303017137 0ustar muammarmuammar SUBROUTINE PDTZRZRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDTZRZRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from T, Z * computed by PDTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= M >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors T and Z computed * by PDTZRZF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PDTZRZF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JM1, JV, L, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLACPY, $ PDLARZB, PDLARZT, PDLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LT.M ) $ RETURN * L = N - M JM1 = JA + MIN( M+1, N ) - 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA + 1 JV = ICOFF + JM1 - JA + 1 * * Compute upper triangular matrix T * CALL PDLARZT( 'Backward', 'Rowwise', L, IB, A, IA, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', IB, L, A, IA, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(IA:IA+IB-1,JA:JA+IB-1) * CALL PDLACPY( 'Lower', IB-1, IB-1, A, IA+1, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) * * Zeroes the row panel of sub( A ) to get T(IA:IN,JA:JA+N-1) * CALL PDLASET( 'All', IB, L, ZERO, ZERO, A, IA, JM1, DESCA ) CALL PDLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Apply block Householder transformation * CALL PDLARZB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ IN-IA+1, N, IB, L, WORK( IPV ), 1, JV, DESCV, $ WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * * Restore strict lower part of A( IA:IA+IB-1, JA:JA+N-1 ) * CALL PDLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, ICOFF+1, DESCV, $ A, IA+1, JA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute upper triangular matrix T * CALL PDLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', IB, L, A, I, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(I:I+IB-1,J:J+IB-1 ) * CALL PDLACPY( 'Lower', IB-1, IB-1, A, I+1, JA+I-IA, DESCA, $ WORK( IPV ), 1, ICOFF+1+I-IA, DESCV ) * * Zeoes the row panel of sub( A ) to get T(IA:I-1,JA+I-IA:JA+N-1) * CALL PDLASET( 'All', IB, L, ZERO, ZERO, A, I, JM1, DESCA ) CALL PDLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, I+1, JA+I-IA, $ DESCA ) * * Apply block Householder transformation * CALL PDLARZB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I+IB-IA, N-I+IA, IB, L, WORK( IPV ), 1, JV, $ DESCV, WORK( IPT ), A, IA, JA+I-IA, DESCA, $ WORK( IPW ) ) * CALL PDLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, $ ICOFF+1+I-IA, DESCV, A, I+1, JA+I-IA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDTZRZRV * END scalapack-1.8.0/TESTING/LIN/pdptdriver.f0000640000175000017500000007474710363532303017430 0ustar muammarmuammar PROGRAM PDPTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PDPTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by DPT. This test driver performs an * A = L*L**T factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER DBLESZ, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, INT_TEMP, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, $ PDCHEKPAD, PDFILLPAD, PDMATGEN, PDPTINFO, $ PDPTLASCHK, PDPTTRF, PDPTTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PDLANGE EXTERNAL LSAME, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PDPTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = 1 IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2), (2), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2)+10) IMIDPAD = 10 IPOSTPAD = ((2)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (2), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(2) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL + 3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PDLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PDPTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDBMATGEN( ICTXT, UPLO, 'T', BW, BW, N, (2), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PDFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PDLANGE( 'I', N, $ (2), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( LSAME( UPLO, 'L' ) ) THEN INT_TEMP = 0 ELSE INT_TEMP = DESCA2D( LLD_ ) ENDIF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PDPTTRF( N, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PDPTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, 'PDPTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PDMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PDPTTRS( N, NRHS, MEM( IPA+INT_TEMP ), $ MEM( IPA+1*( NB+10-INT_TEMP ) ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDPTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDPTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (2), N, $ (2), NB, 0, 0, $ ICTXT, (2), IERR( 1 ) ) CALL PDPTLASCHK( 'S', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDPTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pstzrzrv.f0000640000175000017500000002436210363532303017155 0ustar muammarmuammar SUBROUTINE PSTZRZRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSTZRZRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from T, Z * computed by PSTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= M >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors T and Z computed * by PSTZRZF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PSTZRZF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JM1, JV, L, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLACPY, $ PSLARZB, PSLARZT, PSLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LT.M ) $ RETURN * L = N - M JM1 = JA + MIN( M+1, N ) - 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA + 1 JV = ICOFF + JM1 - JA + 1 * * Compute upper triangular matrix T * CALL PSLARZT( 'Backward', 'Rowwise', L, IB, A, IA, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', IB, L, A, IA, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(IA:IA+IB-1,JA:JA+IB-1) * CALL PSLACPY( 'Lower', IB-1, IB-1, A, IA+1, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) * * Zeroes the row panel of sub( A ) to get T(IA:IN,JA:JA+N-1) * CALL PSLASET( 'All', IB, L, ZERO, ZERO, A, IA, JM1, DESCA ) CALL PSLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Apply block Householder transformation * CALL PSLARZB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ IN-IA+1, N, IB, L, WORK( IPV ), 1, JV, DESCV, $ WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * * Restore strict lower part of A( IA:IA+IB-1, JA:JA+N-1 ) * CALL PSLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, ICOFF+1, DESCV, $ A, IA+1, JA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute upper triangular matrix T * CALL PSLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'All', IB, L, A, I, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(I:I+IB-1,J:J+IB-1 ) * CALL PSLACPY( 'Lower', IB-1, IB-1, A, I+1, JA+I-IA, DESCA, $ WORK( IPV ), 1, ICOFF+1+I-IA, DESCV ) * * Zeoes the row panel of sub( A ) to get T(IA:I-1,JA+I-IA:JA+N-1) * CALL PSLASET( 'All', IB, L, ZERO, ZERO, A, I, JM1, DESCA ) CALL PSLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, I+1, JA+I-IA, $ DESCA ) * * Apply block Householder transformation * CALL PSLARZB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I+IB-IA, N-I+IA, IB, L, WORK( IPV ), 1, JV, $ DESCV, WORK( IPT ), A, IA, JA+I-IA, DESCA, $ WORK( IPW ) ) * CALL PSLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, $ ICOFF+1+I-IA, DESCV, A, I+1, JA+I-IA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSTZRZRV * END scalapack-1.8.0/TESTING/LIN/pzdblaschk.f0000640000175000017500000002624710363532303017362 0ustar muammarmuammar SUBROUTINE PZDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PZDBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'C', A 'Conjugate transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, PBZTRAN, $ PZMATGEN, ZGAMX2D, ZGEMM, ZGSUM2D, $ ZLASET * .. * .. External Functions .. INTEGER IZAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PZBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PZBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PZBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PZGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO ) * * * Regenerate column of B * CALL PZMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PZAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDZNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDZNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PZBLASCHK * END scalapack-1.8.0/TESTING/LIN/pdqrt14.f0000640000175000017500000003362010363532303016525 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDQRT14( TRANS, M, N, NRHS, A, IA, JA, $ DESCA, X, IX, JX, DESCX, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IX, JA, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PDQRT14 checks whether sub( X ) is in the row space of sub( A ) or * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both * sub( X ) and sub( A ) such that their norms are in the range * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing * triangle, scaled by MAX(M,N,NRHS)*eps. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * = 'N': No transpose, check for sub( X ) in the row space of * sub( A ), * = 'T': Transpose, check for sub( X ) in row space of * sub( A )'. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( X ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array * contains the local pieces of the M-by-N distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) DOUBLE PRECISION pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * On entry, this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N', * and the M-by-NRHS distributed submatrix sub( X ) otherwise. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where * * IF TRANS='N', (LQ fact) * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW, * NPROW ) * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW, * RSRC_A, NPROW ) * LWF = MB_A * ( MB_A + MNRHSP + NQ0 ) * ELSE (QR fact) * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL, * NPCOL ) * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL, * CSRC_A, NPCOL ) * LWF = NB_A * ( NB_A + MP0 + NNRHSQ ) * END IF * * and, * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL TPSD INTEGER IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO, $ IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA, $ JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL, $ MYROW, NPCOL, NPROW, NPW, NQWA, NQW DOUBLE PRECISION AMAX, ANRM, ERR, XNRM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) DOUBLE PRECISION RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLANGE, PDLAMCH EXTERNAL LSAME, NUMROC, PDLANGE, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, DGAMX2D, INFOG2L, $ PDAMAX, PDCOPY, PDGELQF, PDGEQRF, $ PDLACPY, PDLASCL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PDQRT14 = ZERO * IPWA = 1 IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IWA = IROFFA + 1 JWA = ICOFFA + 1 CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN IF( N.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .FALSE. MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) NQW = NQWA * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA + M JWX = JWA LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) * ELSE IF( LSAME( TRANS, 'T' ) ) THEN IF( M.LE.0 .OR. NRHS.LE.0 ) $ RETURN TPSD = .TRUE. MPW = MPWA NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) * * Assign descriptor DESCW for workspace WORK and pointers to * matrices sub( A ) and sub( X ) in workspace * IWX = IWA JWX = JWA + N LDW = MAX( 1, MPW ) CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW ) ELSE CALL PXERBLA( ICTXT, 'PDQRT14', -1 ) RETURN END IF * * Copy and scale sub( A ) * IPTAU = IPWA + MPW*NQW CALL PDLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA, $ JWA, DESCW ) RWORK( 1 ) = ZERO ANRM = PDLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK ) IF( ANRM.NE.ZERO ) $ CALL PDLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA, $ JWA, DESCW, INFO ) * * Copy sub( X ) or sub( X )' into the right place and scale it * IF( TPSD ) THEN * * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work * DO 10 J = 1, NRHS CALL PDCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX, $ JWX+J-1, DESCW, 1 ) 10 CONTINUE XNRM = PDLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PDLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1) * MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPW = IPTAU + MIN( MQW, NQW ) LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) ) CALL PDGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in upper triangle of * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1) * ERR = ZERO IF( N.LT.M ) THEN DO 20 J = JWX, JWA+N+NRHS-1 CALL PDAMAX( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ), $ IWA+N, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 20 CONTINUE END IF CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * ELSE * * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work * DO 30 J = 1, NRHS CALL PDCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), $ IWX+J-1, JWX, DESCW, DESCW( M_ ) ) 30 CONTINUE * XNRM = PDLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW, $ RWORK ) IF( XNRM.NE.ZERO ) $ CALL PDLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX, $ JWX, DESCW, INFO ) * * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1) * NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPW = IPTAU + MIN( MPW, NPW ) LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) ) CALL PDGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW, $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO ) * * Compute largest entry in lower triangle in * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1) * ERR = ZERO DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 ) CALL PDAMAX( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ), $ IWX+J-JWA-M, J, DESCW, 1 ) ERR = MAX( ERR, ABS( AMAX ) ) 40 CONTINUE CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2, $ -1, -1, 0 ) * END IF * PDQRT14 = ERR / ( DBLE( MAX( M, N, NRHS ) ) * $ PDLAMCH( ICTXT, 'Epsilon' ) ) * RETURN * * End of PDQRT14 * END scalapack-1.8.0/TESTING/LIN/pslafchk.f0000640000175000017500000002220010363532303017011 0ustar muammarmuammar SUBROUTINE PSLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) REAL array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, IROFF, $ JB, JJ, JJA, JN, LDA, LDW, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSMATGEN, SMATADD * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL SMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 10 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 ) * LDA CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJ-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL SMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) 10 CONTINUE * * Calculate factor residual * FRESID = PSLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PSLAFCHK * END scalapack-1.8.0/TESTING/LIN/pddblaschk.f0000640000175000017500000002613310363532303017326 0ustar muammarmuammar SUBROUTINE PDDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PDDBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'T', A 'Transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, DGEBR2D, $ DGEBS2D, DGEMM, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, PBDTRAN, PDMATGEN * .. * .. External Functions .. INTEGER IDAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PDBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PDGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO ) * * * Regenerate column of B * CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PDBLASCHK * END scalapack-1.8.0/TESTING/LIN/pmatgeninc.f0000640000175000017500000002003110363532303017343 0ustar muammarmuammar* ===================================================================== * SUBROUTINE LADD * ===================================================================== * SUBROUTINE LADD( J, K, I ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER I(2), J(2), K(2) * .. * * ===================================================================== * * .. Parameters .. INTEGER IPOW16, IPOW15 PARAMETER ( IPOW16=2**16, IPOW15=2**15 ) * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * I(1) = MOD( K(1)+J(1), IPOW16 ) I(2) = MOD( (K(1)+J(1)) / IPOW16+K(2)+J(2), IPOW15 ) * RETURN * * End of LADD * END * * ===================================================================== * SUBROUTINE LMUL * ===================================================================== * SUBROUTINE LMUL( K, J, I ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER I(2), J(2), K(2) * .. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16, IPOW30 PARAMETER ( IPOW15=2**15, IPOW16=2**16, IPOW30=2**30 ) * .. * .. Local Scalars .. INTEGER KT, LT * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * KT = K(1)*J(1) IF( KT.LT.0 ) KT = (KT+IPOW30) + IPOW30 I(1) = MOD(KT,IPOW16) LT = K(1)*J(2) + K(2)*J(1) IF( LT.LT.0 ) LT = (LT+IPOW30) + IPOW30 KT = KT/IPOW16 + LT IF( KT.LT.0 ) KT = (KT+IPOW30) + IPOW30 I(2) = MOD( KT, IPOW15 ) * RETURN * * End of LMUL * END * * ===================================================================== * SUBROUTINE XJUMPM * ===================================================================== * SUBROUTINE XJUMPM( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER JUMPM * .. * .. Array Arguments .. INTEGER IADD(2), IAM(2), ICM(2), IRANM(2), IRANN(2) INTEGER MULT(2) * .. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Executable Statements .. * IF( JUMPM.GT.0 ) THEN DO 10 I = 1, 2 IAM(I) = MULT(I) ICM(I) = IADD(I) 10 CONTINUE DO 20 I = 1, JUMPM-1 CALL LMUL( IAM, MULT, J ) IAM(1) = J(1) IAM(2) = J(2) CALL LMUL( ICM, MULT, J ) CALL LADD( IADD, J, ICM ) 20 CONTINUE CALL LMUL( IRANN, IAM, J ) CALL LADD( J, ICM, IRANM ) ELSE IRANM(1) = IRANN(1) IRANM(2) = IRANN(2) END IF * RETURN * * End of XJUMPM * END * * ===================================================================== * SUBROUTINE SETRAN * ===================================================================== * SUBROUTINE SETRAN( IRAN, IA, IC ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER IA(2), IC(2), IRAN(2) * .. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER IAS(2), ICS(2), IRAND(2) * .. * .. Common Blocks .. COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * DO 10 I = 1, 2 IRAND(I) = IRAN(I) IAS(I) = IA(I) ICS(I) = IC(I) 10 CONTINUE * RETURN * * End of SETRAN * END * * ===================================================================== * SUBROUTINE JUMPIT * ===================================================================== * SUBROUTINE JUMPIT( MULT, IADD, IRANN, IRANM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER IADD(2), IRANM(2), IRANN(2), MULT(2) * .. * * ===================================================================== * * .. Local Arrays .. INTEGER IAS(2), ICS(2), IRAND(2), J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Common Blocks .. COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * CALL LMUL( IRANN, MULT, J ) CALL LADD( J, IADD, IRANM ) * IRAND(1) = IRANM(1) IRAND(2) = IRANM(2) * RETURN * * End of JUMPIT * END * * ===================================================================== * REAL FUNCTION PSRAND * ===================================================================== * REAL FUNCTION PSRAND( IDUMM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC=2.147483648E+9, POW16=6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IAS(2), ICS(2), IRAND(2) COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * PSRAND = ( REAL(IRAND(1)) + POW16 * REAL(IRAND(2)) ) / DIVFAC * CALL LMUL( IRAND, IAS, J ) CALL LADD( J, ICS, IRAND ) * RETURN * * End of PSRAND * END * * ===================================================================== * DOUBLE PRECISION FUNCTION PDRAND * ===================================================================== * DOUBLE PRECISION FUNCTION PDRAND( IDUMM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC=2.147483648D+9, POW16=6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IAS(2), ICS(2), IRAND(2) COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * PDRAND = ( DBLE(IRAND(1)) + POW16 * DBLE(IRAND(2)) ) / DIVFAC * CALL LMUL( IRAND, IAS, J ) CALL LADD( J, ICS, IRAND ) * RETURN * * End of PDRAND * END scalapack-1.8.0/TESTING/LIN/pztzrzrv.f0000640000175000017500000002444410363532303017165 0ustar muammarmuammar SUBROUTINE PZTZRZRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZTZRZRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from T, Z * computed by PZTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= M >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors T and Z computed * by PZTZRZF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PZTZRZF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JM1, JV, L, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PB_TOPGET, $ PB_TOPSET, PZLACPY, PZLARZB, PZLARZT, $ PZLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LT.M ) $ RETURN * L = N - M JM1 = JA + MIN( M+1, N ) - 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA + 1 JV = ICOFF + JM1 - JA + 1 * * Compute upper triangular matrix T * CALL PZLARZT( 'Backward', 'Rowwise', L, IB, A, IA, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', IB, L, A, IA, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(IA:IA+IB-1,JA:JA+IB-1) * CALL PZLACPY( 'Lower', IB-1, IB-1, A, IA+1, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) * * Zeroes the row panel of sub( A ) to get T(IA:IN,JA:JA+N-1) * CALL PZLASET( 'All', IB, L, ZERO, ZERO, A, IA, JM1, DESCA ) CALL PZLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Apply block Householder transformation * CALL PZLARZB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', IN-IA+1, N, IB, L, WORK( IPV ), 1, JV, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * * Restore strict lower part of A( IA:IA+IB-1, JA:JA+N-1 ) * CALL PZLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, ICOFF+1, DESCV, $ A, IA+1, JA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute upper triangular matrix T * CALL PZLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', IB, L, A, I, JM1, DESCA, WORK( IPV ), 1, $ JV, DESCV ) * * Save temporarily strict lower part of A(I:I+IB-1,J:J+IB-1 ) * CALL PZLACPY( 'Lower', IB-1, IB-1, A, I+1, JA+I-IA, DESCA, $ WORK( IPV ), 1, ICOFF+1+I-IA, DESCV ) * * Zeoes the row panel of sub( A ) to get T(IA:I-1,JA+I-IA:JA+N-1) * CALL PZLASET( 'All', IB, L, ZERO, ZERO, A, I, JM1, DESCA ) CALL PZLASET( 'Lower', IB-1, IB-1, ZERO, ZERO, A, I+1, JA+I-IA, $ DESCA ) * * Apply block Householder transformation * CALL PZLARZB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I+IB-IA, N-I+IA, IB, L, WORK( IPV ), $ 1, JV, DESCV, WORK( IPT ), A, IA, JA+I-IA, DESCA, $ WORK( IPW ) ) * CALL PZLACPY( 'Lower', IB-1, IB-1, WORK( IPV ), 1, $ ICOFF+1+I-IA, DESCV, A, I+1, JA+I-IA, DESCA ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZTZRZRV * END scalapack-1.8.0/TESTING/LIN/pslaschk.f0000640000175000017500000003016110363532303017033 0ustar muammarmuammar SUBROUTINE PSLASCHK( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, SYMM INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL WORK( * ), X( * ) * .. * * Purpose * ======= * * PSLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed matrix, * otherwise sub( A ) is a general distributed matrix. * * DIAG (global input) CHARACTER * If DIAG = 'D', sub( A ) is diagonally dominant. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 0. * * X (local input) REAL pointer into the local memory * to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X), * NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM, $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF, $ IXCOL, IXROW, J, JBRHS, JJ, JJA, JJX, LDX, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL BETA, DIVISOR, EPS, RESID1 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTRAN, PSMATGEN, $ SGAMX2D, SGEBR2D, SGEBS2D, SGEMM, $ SGERV2D, SGESD2D, SGSUM2D, SLASET * .. * .. External Functions .. INTEGER ISAMAX, NUMROC REAL PSLAMCH EXTERNAL ISAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * LDX = MAX( 1, NP ) IPB = 1 IPX = IPB + NP * DESCX( NB_ ) IPA = IPX + NQ * DESCX( NB_ ) * IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF * ICURCOL = IXCOL * * Loop over the rhs * DO 40 J = 1, NRHS, DESCX( NB_ ) JBRHS = MIN( DESCX( NB_ ), NRHS-J+1 ) * * Transpose x from ICURCOL to all rows * IOFFX = IIX + ( JJX - 1 ) * DESCX( LLD_ ) CALL PBSTRAN( ICTXT, 'Column', 'Transpose', N, JBRHS, $ DESCX( MB_ ), X( IOFFX ), DESCX( LLD_ ), ZERO, $ WORK( IPX ), JBRHS, IXROW, ICURCOL, -1, IACOL, $ WORK( IPA ) ) * * Regenerate B in IXCOL * IF( MYCOL.EQ.ICURCOL ) THEN CALL PSMATGEN( ICTXT, 'N', 'N', DESCX( M_ ), DESCX( N_ ), $ DESCX( MB_ ), DESCX( NB_ ), WORK( IPB ), LDX, $ IXROW, IXCOL, IBSEED, IIX-1, NP, JJX-1, $ JBRHS, MYROW, MYCOL, NPROW, NPCOL ) BETA = ONE ELSE BETA = ZERO END IF * IF( NQ.GT.0 ) THEN DO 10 II = IIA, IIA+NP-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IIA+NP-II ) * * Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1). * CALL PSMATGEN( ICTXT, SYMM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK( IPA ), IB, DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, II-1, IB, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Compute B <= B - A * X. * CALL SGEMM( 'No transpose', 'Transpose', IB, JBRHS, NQ, $ -ONE, WORK( IPA ), IB, WORK( IPX ), JBRHS, $ BETA, WORK( IPB+II-IIA ), LDX ) * 10 CONTINUE * ELSE IF( MYCOL.NE.ICURCOL ) THEN * CALL SLASET( 'All', NP, JBRHS, ZERO, ZERO, WORK( IPB ), $ LDX ) * END IF * * Add B rowwise to ICURCOL * CALL SGSUM2D( ICTXT, 'Row', ' ', NP, JBRHS, WORK( IPB ), LDX, $ MYROW, ICURCOL ) * IF( MYCOL.EQ.ICURCOL ) THEN * * Figure || A * X - B || & || X || * IPW = IPA + JBRHS DO 20 JJ = 0, JBRHS - 1 IF( NP.GT.0 ) THEN II = ISAMAX( NP, WORK( IPB+JJ*LDX ), 1 ) WORK( IPA+JJ ) = ABS( WORK( IPB+II-1+JJ*LDX ) ) WORK( IPW+JJ ) = ABS( X( IOFFX + ISAMAX( NP, $ X( IOFFX + JJ*DESCX( LLD_ ) ), 1 )-1+JJ* $ DESCX( LLD_ ) ) ) ELSE WORK( IPA+JJ ) = ZERO WORK( IPW+JJ ) = ZERO END IF 20 CONTINUE * * After SGAMX2D computation, * WORK(IPB) has the maximum of || Ax - b ||, and * WORK(IPX) has the maximum of || X ||. * CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 2*JBRHS, $ WORK( IPA ), 1, IDUMM, IDUMM, -1, 0, ICURCOL ) * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * IF( MYROW.EQ.0 ) THEN DO 30 JJ = 0, JBRHS - 1 RESID1 = WORK( IPA+JJ ) / ( WORK( IPW+JJ )*DIVISOR ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 30 CONTINUE IF( MYCOL.NE.0 ) $ CALL SGESD2D( ICTXT, 1, 1, RESID, 1, 0, 0 ) END IF * ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * CALL SGERV2D( ICTXT, 1, 1, RESID1, 1, 0, ICURCOL ) IF( RESID.LT.RESID1 ) $ RESID = RESID1 * END IF * IF( MYCOL.EQ.ICURCOL ) $ JJX = JJX + JBRHS ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, RESID, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, RESID, 1, 0, 0 ) END IF * RETURN * * End of PSLASCHK * END scalapack-1.8.0/TESTING/LIN/pddtdriver.f0000640000175000017500000007620410363532303017402 0ustar muammarmuammar PROGRAM PDDTDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PDDTDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by DDT. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER DBLESZ, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, $ PDCHEKPAD, PDDTINFO, PDDTLASCHK, PDDTTRF, $ PDDTTRS, PDFILLPAD, PDMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PDLANGE EXTERNAL LSAME, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PDDTINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = 1 IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = 1 IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*INT_ONE-1)/NPCOL + 1 ) $ + INT_ONE NB = MAX( NB, 2*INT_ONE ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*INT_ONE, N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (3), (3), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((3)+10) IMIDPAD = 10 IPOSTPAD = ((3)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, N, (3), $ NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((3)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + (NB+10)*(3) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (12*NPCOL+3*NB) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 8*NPCOL * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((3)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PDLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PDDTLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,NB)+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDBMATGEN( ICTXT, 'T', 'D', BWL, BWU, N, (3), NB, $ MEM( IPA ), NB+10, 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) CALL PDFILLPAD( ICTXT, NQ, NP, MEM( IPA-IPREPAD ), $ NB+10, IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PDLANGE( 'I', N, $ (3), MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NQ, NP, $ MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PDDTTRF( N, MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), 1, $ DESCA, MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW ), IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PDDTTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, 'PDDTTRF', NQ, $ NP, MEM( IPA-IPREPAD ), NB+10, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = 10*NPCOL+4*NRHS * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PDMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PDDTTRS( TRANS, N, NRHS, $ MEM( IPA+2*( NB+10 ) ), $ MEM( IPA+1*( NB+10 ) ), MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDDTTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDDTTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * * Reset descriptor describing A to 1-by-P grid for * use in banded utility routines * CALL DESCINIT( DESCA2D, (3), N, $ (3), NB, 0, 0, $ ICTXT, (3), IERR( 1 ) ) CALL PDDTLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl INT_ONE + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(INT_ONE)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+INT_ONE) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(INT_ONE)) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*INT_ONE * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*INT_ONE flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(INT_ONE)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*INT_ONE*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*INT_ONE*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+INT_ONE) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(INT_ONE)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+INT_ONE) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(INT_ONE)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*INT_ONE ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*INT_ONE ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDDTTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzgeqrrv.f0000640000175000017500000002350310363532303017105 0ustar muammarmuammar SUBROUTINE PZGEQRRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQRRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from Q, R * computed by PZGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors Q and R computed * by PZGEQRF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PZGEQRF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, IIA, IPT, IPV, IPW, $ IROFF, IV, J, JB, JJA, JL, JN, K, MP, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PB_TOPGET, $ PB_TOPSET, PZLACPY, PZLARFB, PZLARFT, $ PZLASET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * K = MIN( M, N ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, INDXG2P( JL, DESCA( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, MP ) ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+K-J, DESCA( NB_ ) ) I = IA + J - JA IV = 1 + J - JA + IROFF * * Compute upper triangular matrix T * CALL PZLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'Lower', M-I+IA, JB, A, I, J, DESCA, WORK( IPV ), $ IV, 1, DESCV ) CALL PZLASET( 'Upper', M-I+IA, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get * block column of R * CALL PZLASET( 'Lower', M-I+IA-1, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-I+IA, N-J+JA, JB, WORK( IPV ), IV, 1, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = JN - JA + 1 * * Compute upper triangular matrix T * CALL PZLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IROFF+1, 1, DESCV ) CALL PZLASET( 'Upper', M, JB, ZERO, ONE, WORK, IROFF+1, 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get block * column of R * CALL PZLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', M, $ N, JB, WORK( IPV ), IROFF+1, 1, DESCV, WORK( IPT ), $ A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZGEQRRV * END scalapack-1.8.0/TESTING/LIN/pcptlaschk.f0000640000175000017500000002745110363532303017367 0ustar muammarmuammar SUBROUTINE PCPTLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PCPTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D, $ CLASET, PBCTRAN, PCMATGEN, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICAMAX, NUMROC REAL PSLAMCH EXTERNAL ICAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PCTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (2), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PCBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PCBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF IF( LSAME( UPLO, 'U' ) ) THEN * * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL CGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.GT.0 ) THEN CALL CGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PCPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PCMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PCAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSCNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSCNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PCTLASCHK * END scalapack-1.8.0/TESTING/LIN/pcinvdriver.f0000640000175000017500000010170310430450246017557 0ustar muammarmuammar PROGRAM PCINVDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PCINVDRIVER is the main test program for the COMPLEX * SCALAPACK matrix inversion routines. This test driver computes the * inverse of different kind of matrix and tests the results. * * The program must be driven by a short data file. An annotated example * of a data file can be obtained by deleting the first 3 characters * from the following 14 lines: * 'ScaLAPACK Matrix Inversion Testing input file' * 'PVM machine.' * 'INV.out' output file name (if any) * 6 device out * 5 number of matrix types (next line) * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD * 4 number of problems sizes * 1000 2000 3000 4000 values of N * 3 number of NB's * 4 30 35 values of NB * 2 number of process grids (ordered P & Q) * 4 2 values of P * 4 4 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ, REALSZ and CPLXSZ indicate the length in bytes on * the given platform for an integer, a single precision real * and a single precision complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM COMPLEX PADVAL, ZERO PARAMETER ( CPLXSZ = 8, INTGSZ = 4, REALSZ = 4, $ TOTMEM = 2000000, MEMSIZ = TOTMEM / CPLXSZ, $ NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER UPLO CHARACTER*3 MTYP CHARACTER*6 PASSED CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPPIV, IPREPAD, IPOSTPAD, IPIW, IPW, ITEMP, J, $ K, KTESTS, KPASS, KFAIL, KSKIP, L, LCM, LIPIV, $ LIWORK, LWORK, MYCOL, MYROW, N, NB, NGRIDS, $ NMAT, NMTYP, NNB, NOUT, NP, NPCOL, NPROCS, $ NPROW, NQ, WORKIINV, WORKINV, WORKSIZ REAL ANORM, FRESID, RCOND, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. CHARACTER*3 MATTYP( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCGETRF, PCGETRI, $ PCINVCHK, PCINVINFO, PCLASET, $ PCMATGEN, PCPOTRF, PCPOTRI, $ PCTRTRI, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, ILCM, NUMROC REAL PCLANGE, PCLANHE, PCLANSY, PCLANTR EXTERNAL ICEIL, ILCM, LSAMEN, NUMROC, PCLANGE, $ PCLANHE, PCLANSY, PCLANTR * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCINVINFO( OUTFILE, NOUT, NMTYP, MATTYP, NTESTS, NMAT, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different matrix types * DO 40 I = 1, NMTYP * MTYP = MATTYP( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a general matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is an upper triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'LTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a lower triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a Hermitian positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the upper triangular part will be '// $ 'referenced.' ELSE IF( LSAMEN( 3, MTYP, 'LPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a Hermitian positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the lower triangular part will be '// $ 'referenced.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * N = NVAL( K ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * NB = NBVAL( L ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 * LCM = ILCM( NPROW, NPCOL ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Pivots are needed by LU factorization * IPPIV = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + $ IPREPAD LIPIV = ICEIL( INTGSZ * ( NP + NB ), CPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * LWORK = MAX( 1, NP * DESCA( NB_ ) ) WORKINV = LWORK + IPOSTPAD * * Figure the amount of workspace required by the * general matrix inversion * IF( NPROW.EQ.NPCOL ) THEN LIWORK = NQ + DESCA( NB_ ) ELSE * * change the integer workspace needed for PDGETRI * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) * * $ ICEIL( ICEIL( DESCA( LLD_ ), * $ DESCA( MB_ ) ), LCM / NPROW ) ) * $ + NQ LIWORK = NUMROC( DESCA( M_ ) + $ DESCA( MB_ ) * NPROW $ + MOD ( 1 - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF WORKIINV = ICEIL( LIWORK*INTGSZ, CPLXSZ ) + $ IPOSTPAD IPIW = IPW + WORKINV + IPREPAD WORKSIZ = WORKINV + IPREPAD + WORKIINV * ELSE * * No pivots or workspace needed for triangular or * Hermitian positive definite matrices. * IPW = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD WORKSIZ = 1 + IPOSTPAD * END IF * IF( CHECK ) THEN * * Figure amount of work space for the norm * computations * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN ITEMP = NQ ELSE ITEMP = 2 * NQ + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), $ LCM / NPROW ) END IF END IF WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ ICEIL( REALSZ * ITEMP, CPLXSZ ) ) * * Figure the amount of workspace required by the * checking routine * WORKSIZ = MAX( WORKSIZ, 2 * NB * MAX( 1, NP ) ) + $ IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'inversion', $ ( IPW + WORKSIZ ) * CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Generate a general diagonally dominant matrix A * CALL PCMATGEN( ICTXT, 'N', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Generate a Hermitian positive definite matrix A * CALL PCMATGEN( ICTXT, 'H', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * END IF * * Zeros not-referenced part of A, if any. * IF( LSAMEN( 1, MTYP, 'U' ) ) THEN * UPLO = 'U' CALL PCLASET( 'Lower', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 2, 1, DESCA ) * ELSE IF( LSAMEN( 1, MTYP, 'L' ) ) THEN * UPLO = 'L' CALL PCLASET( 'Upper', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 1, 2, DESCA ) * ELSE * UPLO = 'G' * END IF * * Need 1-norm of A for checking * IF( CHECK ) THEN * CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * CALL PCFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANGE( '1', N, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * ANORM = PCLANTR( '1', UPLO, 'Non unit', N, N, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANTR', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANTR', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * ANORM = PCLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'SY' ) ) THEN * CALL PCFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD,IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'HE' ) ) THEN CALL PCFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * END IF * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Perform LU factorization * CALL SLTIMER( 1 ) CALL PCGETRF( N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGETRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PCGETRI( N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ), LWORK, $ MEM( IPIW ), LIWORK, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGETRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRI', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRI', $ WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRI', $ WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PCTRTRI( UPLO, 'Non unit', N, MEM( IPA ), 1, $ 1, DESCA, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCTRTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Perform Cholesky factorization * CALL SLTIMER( 1 ) CALL PCPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the Hermitian positive definite matrix * inversion * CALL SLTIMER( 2 ) CALL PCPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPOTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * END IF * IF( CHECK ) THEN * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Compute fresid = || inv(A)*A-I || * CALL PCINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA, $ IASEED, ANORM, FRESID, RCOND, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCINVCHK', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCINVCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. INFO.EQ.0 .AND. $ ( (FRESID-FRESID) .EQ. 0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 IF( INFO.GT.0 ) THEN PASSED = 'SINGUL' ELSE PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only the timing * operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * 8/3 N^3 - N^2 flops for LU factorization * NOPS = ( 8.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) - $ DBLE( N )**2 * * 16/3 N^3 for matrix inversion * NOPS = NOPS + $ ( 16.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * 4/3 N^3 + 2 N^2 for triangular matrix inversion * CTIME(1) = 0.0D+0 WTIME(1) = 0.0D+0 NOPS = ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 2.0D+0 * ( DBLE( N )**2 ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * 4/3 N^3 + 3 N^2 flops for Cholesky factorization * NOPS = ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 2.0D+0 * ( DBLE( N )**2 ) * * 8/3 N^3 + 5 N^2 flops for Cholesky inversion * NOPS = NOPS + $ ( 8.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ 5.0D+0 * ( DBLE( N )**2 ) * END IF * * Figure total megaflops -- factorization and * inversion, for WALL and CPU time, and print * output. * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ) + CTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ', $ ' MFLOPS Cond Resid CHECK' ) 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ', $ '----------- ------- ------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2, $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PCINVDRIVER * END scalapack-1.8.0/TESTING/LIN/pcgetrrv.f0000640000175000017500000003007410363532303017062 0ustar muammarmuammar SUBROUTINE PCGETRRV( M, N, A, IA, JA, DESCA, IPIV, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCGETRRV reforms sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from the * triangular matrices L and U returned by PCGETRF. It multiplies * an upper triangular matrix stored in the upper triangle of sub( A ) * times the unit lower triangular matrix stored in the lower triangle. * To accomplish this, the routine basically performs the PCGETRF * routine in reverse. * * It computes L*U first, and then apply P: P*L*U => sub( A ). In the * J-th loop, the block column (or column panel), which has the lower * triangular unit matrix L is multiplied with the block row (or row * panel), which contains the upper triangular matrix U. * * ( L1 ) ( 0 0 ) ( L1*U1 L1*U2 ) * A` = L * U + A` = ( ) * (U1 U2) + ( ) = ( ) * ( L2 ) ( 0 A`) ( L2*U1 L2*U2+A` ) * * where L1 is a lower unit triangular matrix and U1 is an upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the distributed matrix sub( A ) * contains the the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. On exit, the original distributed matrix sub( A ) * is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX array of dimension (LWORK) * LWORK >= MpA0 * NB_A + NqA0 * MB_A, where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of columns of L, and a block of * rows of U. INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J, $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ), $ DESCU( DLEN_ ), IDUM( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PCGEMM, PCLACPY, $ PCLAPIV, PCLASET, PB_TOPGET, PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPL = 1 IPU = IPL + MP * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Define array descriptors for L and U * MN = MIN( M, N ) IL = MAX( ( ( IA+MN-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) JL = MAX( ( ( JA+MN-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * CALL DESCSET( DESCL, IA+M-IL, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * CALL DESCSET( DESCU, DESCA( MB_ ), JA+N-JL, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) + DESCA( MB_ ) ) * * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+MN-J, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PCLACPY( 'Lower', M-IL+IA, JB, A, IL, J, DESCA, $ WORK( IPL ), 1, 1, DESCL ) CALL PCLASET( 'Upper', M-IL+IA, JB, ZERO, ONE, WORK( IPL ), $ 1, 1, DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PCLACPY( 'Upper', JB, JA+N-J, A, IL, J, DESCA, $ WORK( IPU ), 1, 1, DESCU ) CALL PCLASET( 'Lower', JB-1, JA+N-J, ZERO, ZERO, $ WORK( IPU ), 2, 1, DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PCLASET( 'Lower', IA+M-IL-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Zero the upper triangular piece of the current block. * CALL PCLASET( 'Upper', JB, JA+N-J, ZERO, ZERO, A, IL, J, $ DESCA ) * * Update the matrix sub( A ). * CALL PCGEMM( 'No transpose', 'No transpose', IA+M-IL, $ JA+N-J, JB, ONE, WORK( IPL ), 1, 1, DESCL, $ WORK( IPU ), 1, 1, DESCU, ONE, A, IL, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCL( M_ ) = DESCL( M_ ) + DESCL( MB_ ) DESCL( RSRC_ ) = MOD( DESCL( RSRC_ ) + NPROW - 1, NPROW ) DESCL( CSRC_ ) = MOD( DESCL( CSRC_ ) + NPCOL - 1, NPCOL ) DESCU( N_ ) = DESCU( N_ ) + DESCU( NB_ ) DESCU( RSRC_ ) = DESCL( RSRC_ ) DESCU( CSRC_ ) = DESCL( CSRC_ ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PCLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPL ), $ 1, 1, DESCL ) CALL PCLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPL ), 1, 1, $ DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PCLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPU ), 1, $ 1, DESCU ) CALL PCLASET( 'Lower', JB-1, N, ZERO, ZERO, WORK( IPU ), 2, 1, $ DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PCLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Zero the upper triangular piece of the current block. * CALL PCLASET( 'Upper', JB, N, ZERO, ZERO, A, IA, JA, DESCA ) * * Update the matrix sub( A ). * CALL PCGEMM( 'No transpose', 'No transpose', M, N, JB, ONE, $ WORK( IPL ), 1, 1, DESCL, WORK( IPU ), 1, 1, $ DESCU, ONE, A, IA, JA, DESCA ) * * Apply pivots so that sub( A ) = P*L*U * CALL PCLAPIV( 'Backward', 'Row', 'Col', MIN( M, N ), N, A, IA, JA, $ DESCA, IPIV, IA, 1, DESCIP, IDUM ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCGETRRV * END scalapack-1.8.0/TESTING/LIN/pdgeqlrv.f0000640000175000017500000002404010363532303017046 0ustar muammarmuammar SUBROUTINE PDGEQLRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQLRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PDGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PDGEQLF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension LOCc(N_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PDGEQLF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IIA, IPT, IPV, IPW, IROFF, $ IV, J, JB, JJA, JN, K, MP, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLACPY, $ PDLARFB, PDLARFT, PDLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA+N-K, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * * Handle first block separately * IV = 1 + M - K + IROFF JB = JN - JA - N + K + 1 * * Compute upper triangular matrix T * CALL PDLARFT( 'Backward', 'Columnwise', M-N+JN-JA+1, JB, A, IA, $ JA+N-K, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', M-N+JN-JA+1, JB, A, IA, JA+N-K, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PDLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of A to get block * row of L * CALL PDLASET( 'All', M-K, JB, ZERO, ZERO, A, IA, JA+N-K, $ DESCA ) CALL PDLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-K, $ JA+N-K+1, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+JN-JA+1, JN-JA+1, JB, WORK( IPV ), IROFF+1, 1, $ DESCV, WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * * Loop over the remaining column blocks * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IV = 1 + M - N + J - JA + IROFF * * Compute upper triangular matrix T * CALL PDLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PDLACPY( 'All', M-N+J+JB-JA, JB, A, IA, J, DESCA, $ WORK( IPV ), IROFF+1, 1, DESCV ) CALL PDLASET( 'Lower', JB, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeoes the strict upper triangular part of sub( A ) to get * block row of L * CALL PDLASET( 'All', M-N+J-JA, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PDLASET( 'Upper', JB, JB-1, ZERO, ZERO, A, IA+M-N+J-JA, $ J+1, DESCA ) * * Apply block Householder transformation * CALL PDLARFB( 'Left', 'No transpose', 'Backward', 'Columnwise', $ M-N+J+JB-JA, J+JB-JA, JB, WORK( IPV ), IROFF+1, $ 1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDGEQLRV * END scalapack-1.8.0/TESTING/LIN/pddbdriver.f0000640000175000017500000007570010363532303017360 0ustar muammarmuammar PROGRAM PDDBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PDDBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by DDB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * DOUBLE PRECISION ZERO INTEGER DBLESZ, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, NOPS, NOPS2, SRESID, TMFLOPS, $ TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), MEM( MEMSIZ ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDBMATGEN, $ PDCHEKPAD, PDDBINFO, PDDBLASCHK, PDDBTRF, $ PDDBTRS, PDFILLPAD, PDMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME DOUBLE PRECISION PDLANGE EXTERNAL LSAME, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PDDBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0D+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 ) $ + MAX(BWL,BWU) NB = MAX( NB, 2*MAX(BWL,BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*MAX(BWL,BWU), N ) ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BWL+BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((BWL+BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BWL+BWU+1), N, $ (BWL+BWU+1), NB, 0, 0, $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BWL+BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU) * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BWL+BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PDLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PDDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDBMATGEN( ICTXT, 'G', 'D', BWL, BWU, N, $ (BWL+BWU+1), NB, MEM( IPA ), $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) * CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PDLANGE( '1', (BWL+BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PDDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PDDBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PDCHEKPAD( ICTXT, 'PDDBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PDMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PDFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PDDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PDDBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDDBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PDDBLASCHK( 'N', 'D', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl bwu + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE(BWU)) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+bwu) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU)) $ *DBLE(NRHS)) * * Second calc to represent actual hardware speed * * 2*N_FIRST bwl*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE(BWL)*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST bwl*bwu * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB bwl*bwu flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE(BWL) $ *DBLE(BWU)) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( BWL*BWU*BWL/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*BWL*BWU*BWL) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+bwu) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE(BWU)) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * (bwl+bwu) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE(BWL)+ $ DBLE(BWU)))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * BWL*BWU ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PDDBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pzlltdriver.f0000640000175000017500000011045110363532303017605 0ustar muammarmuammar PROGRAM PZLLTDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PZLLTDRIVER is the main test program for the COMPLEX*16 * ScaLAPACK Cholesky routines. This test driver performs an * A = L*L**H or A = U**H*U factorization and solve, and optionally * performs condition estimation and iterative refinement. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'ScaLAPACK LLt factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LLT.out' output file name (if any) * 6 device out * 'U' define Lower or Upper * 1 number of problems sizes * 31 100 200 values of N * 1 number of NB's * 2 10 24 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 Number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 values of P * 2 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, MEMSIZ, NTESTS, TOTMEM, ZPLXSZ DOUBLE PRECISION ZERO COMPLEX*16 PADVAL PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPREPAD, IPOSTPAD, IPW, IPW2, ITEMP, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LRWORK, LWORK, LW2, MYCOL, MYRHS, MYROW, N, NB, $ NBRHS, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, ANORM1, FRESID, NOPS, RCOND, $ SRESID, SRESID2, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGSUM2D, BLACS_PINFO, PZCHEKPAD, PZFILLPAD, $ PZLAFCHK, PZLASCHK, PZLLTINFO, $ PZMATGEN, PZPOCON, PZPORFS, $ PZPOTRF, PZPOTRRV, PZPOTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PZLANHE EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PZLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PZLLTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPW = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF * * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PZLAFCHK, PZPOTRRV, and * PZLANHE * WORKSIZ = NP * DESCA( NB_ ) * WORKSIZ = MAX( WORKSIZ, DESCA( MB_ ) * DESCA( NB_ ) ) * LCM = ILCM( NPROW, NPCOL ) ITEMP = MAX( 2, 2 * NQ ) + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), LCM / NPROW ) END IF WORKSIZ = MAX( WORKSIZ, $ ICEIL( DBLESZ * ITEMP, ZPLXSZ ) ) WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate a Hermitian positive definite matrix A * CALL PZMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) ANORM1 = PZLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST ) THEN CALL PZMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PZFILLPAD( ICTXT, NP, NQ, $ MEM( IPA0-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform LLt factorization * CALL SLTIMER( 1 ) * CALL PZPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZPOTRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 60 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LLt factorization * CALL PZCHEKPAD( ICTXT, 'PZPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN * * Calculate workspace required for PZPOCON * LWORK = MAX( 1, 2*NP ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, 2*NQ ) LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 60 END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PZPOCON( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LRWORK, INFO ) * IF( CHECK ) THEN CALL PZCHEKPAD( ICTXT, 'PZPOCON', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPOCON', $ LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPOCON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), MYCOL, $ DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + IPOSTPAD $ + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PZLASCHK * LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PZMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, NPCOL ) * IF( CHECK ) $ CALL PZFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PZMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB0 ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear system via Cholesky factorization * CALL PZPOTRS( UPLO, N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB ), 1, 1, DESCB, $ INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPOTRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPOTRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PZLASCHK( 'Herm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PZPORFS * LWORK = MAX( 1, 2*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, NP ) LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, $ 1, MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PZPORFS( UPLO, N, NRHS, MEM( IPA0 ), $ 1, 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB0 ), 1, 1, $ DESCB, MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LRWORK, INFO ) * * check for memory overwrite * IF( CHECK ) THEN CALL PZCHEKPAD( ICTXT, 'PZPORFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZPORFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PZLASCHK( 'Herm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 4/3 N^3 + 3 N^2 flops for LLt factorization * NOPS = 4.0D+0*(DBLE(N)**3)/3.0D+0 + $ 3.0D+0*(DBLE(N)**2) * * nrhs * 8 N^2 flops for LLt solve. * NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization and * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED * END IF 10 CONTINUE 20 CONTINUE * IF( CHECK .AND. SRESID.GT.THRESH ) THEN * * Compute FRESID = ||A - LL'|| / (||A|| * N * eps) * CALL PZPOTRRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZLAFCHK( 'Symm', 'Diag', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZPOTRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( IAM.EQ.0 ) THEN IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9986 ) 'L*L''', FRESID ELSE WRITE( NOUT, FMT = 9986 ) 'U''*U', FRESID END IF END IF END IF * 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB NRHS NBRHS P Q LLt Time ', $ 'Slv Time MFLOPS CHECK' ) 9994 FORMAT( '---- ---- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 4X, A1, 1X, I5, 1X, I3, 1X, I4, 1X, I5, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZLLTDRIVER * END scalapack-1.8.0/TESTING/LIN/pdlltinfo.f0000640000175000017500000004214510363532303017223 0ustar muammarmuammar SUBROUTINE PDLLTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, THRESH, EST, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDLLTINFO get needed startup information for LLt factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 7, LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'LLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 2 END IF IF( EST ) THEN WORK( 7 ) = 1 ELSE WORK( 7 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 7, 1, WORK, 7 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Ax=b by LLt factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision LLt factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data is stored in ''Upper'// $ ''' or ''Lower'' portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LLt time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 7, 1, WORK, 7, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF IF( WORK( 7 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDLLTINFO * END scalapack-1.8.0/TESTING/LIN/pzgelqrv.f0000640000175000017500000002311210363532303017073 0ustar muammarmuammar SUBROUTINE PZGELQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGELQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from L, Q * computed by PZGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors L and Q computed * by PZGELQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PZGELQF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IL, IN, $ IPT, IPV, IPW, J, JJA, JV, K, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PZLACPY, $ PZLARFB, PZLARFT, PZLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IL, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL DESCSET( DESCV, DESCA( MB_ ), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+K-I, DESCA( MB_ ) ) J = JA + I - IA JV = 1 + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PZLARFT( 'Forward', 'Rowwise', N-J+JA, IB, A, I, J, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'Upper', IB, N-J+JA, A, I, J, DESCA, WORK( IPV ), $ 1, JV, DESCV ) CALL PZLASET( 'Lower', IB, N-J+JA, ZERO, ONE, WORK( IPV ), 1, $ JV, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PZLASET( 'Upper', IB, N-J+JA-1, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-I+IA, N-J+JA, IB, WORK( IPV ), 1, $ JV, DESCV, WORK( IPT ), A, I, J, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) * 10 CONTINUE * * Handle first block separately * IB = IN - IA + 1 * * Compute upper triangular matrix T * CALL PZLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, TAU, $ WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'Upper', IB, N, A, IA, JA, DESCA, WORK( IPV ), 1, $ ICOFF+1, DESCV ) CALL PZLASET( 'Lower', IB, N, ZERO, ONE, WORK, 1, ICOFF+1, DESCV ) * * Zeroes the strict upper triangular part of sub( A ) to get * block column of L * CALL PZLASET( 'Upper', IB, N-1, ZERO, ZERO, A, IA, JA+1, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N, IB, WORK( IPV ), 1, ICOFF+1, DESCV, $ WORK( IPT ), A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZGELQRV * END scalapack-1.8.0/TESTING/LIN/pzlafchk.f0000640000175000017500000002701310363532303017027 0ustar muammarmuammar SUBROUTINE PZLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = (1.0D+0, 0.0D+0) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ II, IIA, IOFFA, IROFF, JB, JJ, JJA, JN, KK, $ LDA, LDW, LDWP1, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PZMATGEN, ZMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDWP1 = LDW + 1 LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * IF( LSAME( AFORM, 'H' ) ) THEN * * Handle first block of columns separately * II = 1 ICURROW = IAROW ICURCOL = IACOL JB = JN - JA + 1 * IF( MYCOL.EQ.ICURCOL ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 10, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = DBLE( WORK( II+KK*LDWP1 ) ) 10 CONTINUE END IF CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30, JJ = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-JJ, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK, LDW, DESCA( RSRC_ ), DESCA( CSRC_ ), $ IASEED, IIA-1, MP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 20, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = DBLE( WORK( II+KK*LDWP1 ) ) 20 CONTINUE END IF CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * ELSE * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 40 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 )*LDA CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJ-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) 40 CONTINUE * END IF * * Calculate factor residual * FRESID = PZLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PZLAFCHK * END scalapack-1.8.0/TESTING/LIN/pcgbdriver.f0000640000175000017500000010007410363532303017353 0ustar muammarmuammar PROGRAM PCGBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PCGBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by CGB. This test driver performs an * A = L*U factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTMEM INTEGER, default = 2048. * INTMEM is the size of the integer workspace used in this * driver as input as the IPIV vector. It represents an * upper bound on NB, the blocksize of the data * distribution. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER INTMEM PARAMETER ( INTMEM = 2048 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER CPLXSZ, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BWL, BWU, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, $ I, IAM, IASEED, IBSEED, ICTXT, ICTXTB, $ IERR_TEMP, IMIDPAD, INFO, IPA, IPB, IPOSTPAD, $ IPREPAD, IPW, IPW_SIZE, IPW_SOLVE, $ IPW_SOLVE_SIZE, IP_DRIVER_W, IP_FILLIN, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, MYCOL, MYRHS_SIZE, $ MYROW, N, NB, NBW, NGRIDS, NMAT, NNB, NNBR, $ NNR, NOUT, NP, NPCOL, NPROCS, NPROCS_REAL, $ NPROW, NQ, NRHS, N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER IPIV(INTMEM) INTEGER BWLVAL( NTESTS ), BWUVAL( NTESTS ), DESCA( 7 ), $ DESCA2D( DLEN_ ), DESCB( 7 ), DESCB2D( DLEN_ ), $ IERR( 1 ), NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCBMATGEN, $ PCCHEKPAD, PCDBLASCHK, PCFILLPAD, PCGBINFO, $ PCGBTRF, PCGBTRS, PCMATGEN, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PCLANGE EXTERNAL LSAME, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PCGBINFO( OUTFILE, NOUT, TRANS, NMAT, NVAL, NTESTS, NBW, $ BWLVAL, BWUVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, $ NRVAL, NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, $ NTESTS, QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BWL = BWLVAL( BW_NUM ) IF( BWL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Lower Band', 'bwl', BWL IERR( 1 ) = 1 END IF * BWU = BWUVAL( BW_NUM ) IF( BWU.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Upper Band', 'bwu', BWU IERR( 1 ) = 1 END IF * IF( BWL.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * IF( BWU.GT.N-1 ) THEN IF( IAM.EQ.0 ) THEN IERR( 1 ) = 1 ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*(BWL+BWU)-1)/NPCOL + 1 ) $ + (BWL+BWU) NB = MAX( NB, 2*(BWL+BWU) ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 * IF( NB.GT.INTMEM ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) THEN WRITE( NOUT,* )'You have chosen an ' $ ,'NB > INTMEM in the driver.' WRITE(NOUT, *)'Please edit the driver ' $ ,'and increase the value of INTMEM' ENDIF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (2*BWL+2*BWU+1), (2*BWL+2*BWU+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((2*BWL+2*BWU+1)+10) IMIDPAD = 10 IPOSTPAD = ((2*BWL+2*BWU+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (2*BWL+2*BWU+1), N, $ (2*BWL+2*BWU+1), NB, 0, 0, $ ICTXT,((2*BWL+2*BWU+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((2*BWL+2*BWU+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = 1 * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((2*BWL+2*BWU+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PCLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PCDBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCBMATGEN( ICTXT, 'G', 'N', BWL, BWU, N, $ (2*BWL+2*BWU+1), NB, MEM( IPA+BWL+BWU ), $ ((2*BWL+2*BWU+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((2*BWL+2*BWU+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PCLANGE( '1', (2*BWL+2*BWU+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PCGBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA, IPIV, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PCGBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PCCHEKPAD( ICTXT, 'PCGBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((2*BWL+2*BWU+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = NRHS*(NB+2*BWL+4*BWU) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PCMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PCGBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ), $ 1, DESCA, IPIV, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCGBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PCDBLASCHK( 'N', 'N', TRANS, $ N, BWL, BWU, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA+BWL+BWU ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * 2 N bwl (bwl+bwu) + N (bwl) flops * for LU factorization * NOPS = 2*(DBLE(N)*DBLE(BWL)* $ DBLE((BWL+BWU))) + $ (DBLE(N)*DBLE(BWL)) * * nrhs * 2 N*(bwl+(bwl+bwu)) flops for LU solve. * NOPS = NOPS + $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE((BWL+BWU))) $ *DBLE(NRHS)) * * Multiply by 4 to get complex count * NOPS = NOPS * DBLE(4) * * Second calc to represent actual hardware speed * * 2*N_FIRST (bwl+bwu)*bwu Flops for LU * factorization in proc 1 * NOPS2 = 2*( (DBLE(N_FIRST)* $ DBLE((BWL+BWU))*DBLE(BWU))) * IF ( NPROCS_REAL .GT. 1) THEN * 8 N_LAST (bwl+bwu)*(bwl+bwu) * flops for LU and spike * calc in last processor * NOPS2 = NOPS2 + $ 8*( (DBLE(N_LAST)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 8 NB (bwl+bwu)*(bwl+bwu) flops for LU and spike * calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 8*( (DBLE(NB)*DBLE((BWL+BWU)) $ *DBLE((BWL+BWU))) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-1 ) * $ ( (BWL+BWU)*(BWL+BWU)*(BWL+BWU)/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ 2*( NPROCS_REAL-2 ) * $ (2*(BWL+BWU)*(BWL+BWU)*(BWL+BWU)) ENDIF * * Solve stage * * nrhs*2 n_first* * (bwl+(bwl+bwu)) * flops for L,U solve in proc 1. * NOPS2 = NOPS2 + $ 2* $ DBLE(N_FIRST)* $ DBLE(NRHS) * $ ( DBLE(BWL)+DBLE((BWL+BWU))) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*2 n_last * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ 4* $ (DBLE(N_LAST)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2*nrhs*2 NB * ((bwl+bwu)+(bwl+bwu)) * flops for LU solve in other procs * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( (DBLE(NB)*(DBLE((BWL+BWU))+ $ DBLE((BWL+BWU))))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1)*2*((BWL+BWU)*(BWL+BWU) ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * $ ( 6 * (BWL+BWU)*(BWL+BWU) ) ENDIF * * * Multiply by 4 to get complex count * NOPS2 = NOPS2 * DBLE(4) * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', TRANS, $ N, $ BWL, BWU, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME TR N BWL BWU NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- -------- ', $ '-------- -------- -------- ------' ) 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5, $ 1X,I4,1X,I4,1X,F9.3, $ F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCGBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pdqrt13.f0000640000175000017500000002050010363532303016515 0ustar muammarmuammar SUBROUTINE PDQRT13( SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, $ WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ISEED, JA, M, N, SCALE DOUBLE PRECISION NORMA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDQRT13 generates a full-rank matrix that may be scaled to have * large or small norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCALE (global input) INTEGER * SCALE = 1: normally scaled matrix * SCALE = 2: matrix scaled up * SCALE = 3: matrix scaled down * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * NORMA (global output) DOUBLE PRECISION * The one-norm of A. * * ISEED (global input/global output) INTEGER * Seed for random number generator. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= Nq0, where * * ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), and * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IIA, INFO, $ IROFFA, J, JJA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION AJJ, ASUM, BIGNUM, SMLNUM * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL NUMROC, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PDLABAD, PDLASCL, $ PDMATGEN, PDASUM, PDELGET, PDELSET * .. * .. Intrinsic Functions .. INTRINSIC MOD, SIGN * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * generate the matrix * IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * CALL PDMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), A, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED, IIA-1, MP, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * DO 10 J = JA, JA+N-1 I = IA + J - JA IF( I.LE.IA+M-1 ) THEN CALL PDASUM( M, ASUM, A, IA, J, DESCA, 1 ) CALL PDELGET( 'Column', ' ', AJJ, A, I, J, DESCA ) AJJ = AJJ + SIGN( ASUM, AJJ ) CALL PDELSET( A, I, J, DESCA, AJJ ) END IF 10 CONTINUE * * scaled versions * IF( SCALE.NE.1 ) THEN * NORMA = PDLANGE( 'M', M, N, A, IA, JA, DESCA, WORK ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'Epsilon' ) BIGNUM = ONE / SMLNUM * IF( SCALE.EQ.2 ) THEN * * matrix scaled up * CALL PDLASCL( 'General', NORMA, BIGNUM, M, N, A, IA, $ JA, DESCA, INFO ) * ELSE IF( SCALE.EQ.3 ) THEN * * matrix scaled down * CALL PDLASCL( 'General', NORMA, SMLNUM, M, N, A, IA, $ JA, DESCA, INFO ) * END IF * END IF * NORMA = PDLANGE( 'One-norm', M, N, A, IA, JA, DESCA, WORK ) * RETURN * * End of PDQRT13 * END scalapack-1.8.0/TESTING/LIN/pclsdriver.f0000640000175000017500000013555110363532303017411 0ustar muammarmuammar PROGRAM PCLSDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * Purpose * ======= * * PCLSDRIVER is the main test program for the COMPLEX * SCALAPACK (full rank) Least Squares routines. This test driver solves * full-rank least square problems. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 17 lines: * 'ScaLapack LS solve input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LS.out' output file name (if any) * 6 device out * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 3 number of NRHS's * 2 3 5 values of NRHS * 2 number of NBRHS's * 1 2 values of NBRHS * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL RZERO, RONE COMPLEX ONE, PADVAL, ZERO PARAMETER ( CPLXSZ = 8, REALSZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ) ) PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), RZERO = 0.0E+0, $ RONE = 1.0E+0, ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK, TPSD CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, II, IMIDPAD, $ INFO, IPA, IPB, IPOSTPAD, IPREPAD, IPW, IPW2, $ IPX, ISCALE, ITRAN, ITYPE, J, JJ, K, KFAIL, KK, $ KPASS, KSKIP, KTESTS, LCM, LCMP, LTAU, LWF, $ LWORK, LWS, M, MNP, MNRHSP, MP, MQ, MYCOL, $ MYROW, N, NB, NBRHS, NCOLS, NGRIDS, NMAT, NNB, $ NNBR, NNR, NNRHSQ, NOUT, NP, NPCOL, NPROCS, $ NPROW, NROWS, NQ, NRHS, NRHSP, NRHSQ, WORKSIZ REAL ANORM, BNORM, SRESID, THRESH DOUBLE PRECISION ADDFAC, ADDS, MULFAC, MULTS, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCW( LLD_ ), $ DESCX( DLEN_ ), IERR( 2 ), MVAL( NTESTS ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL RESULT( 2 ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCGELS, PCGEMM, PCLACPY, $ PCLSINFO, PCMATGEN, PSCNRM2, $ PCSSCAL, PCQRT13, PCQRT16, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PCLANGE, PCQRT14, PCQRT17 EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PCLANGE, $ PCQRT14, PCQRT17 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * IASEED = 100 IBSEED = 200 CALL PCLSINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, NTESTS, $ NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 90 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 90 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( ( MYROW.GE.NPROW ).OR.( MYCOL.GE.NPCOL ) ) $ GO TO 90 * DO 80 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 80 END IF * * Loop over different blocking sizes * DO 70 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 70 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) MQ = NUMROC( M, NB, MYCOL, 0, NPCOL ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) MNP = MAX( MP, NP ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 70 END IF * DO 60 ISCALE = 1, 3 * ITYPE = ISCALE * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPX * WORKSIZ = NQ + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPX+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 70 END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Generate the matrix A and calculate its 1-norm * CALL PCQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, MEM( IPW ) ) * IF( CHECK ) THEN CALL PCCHEKPAD( ICTXT, 'PCQRT13', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQRT13', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * DO 50 ITRAN = 1, 2 * IF( ITRAN.EQ.1 ) THEN NROWS = M NCOLS = N TRANS = 'N' TPSD = .FALSE. ELSE NROWS = N NCOLS = M TRANS = 'C' TPSD = .TRUE. END IF * * Loop over the different values for NRHS * DO 40 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 30 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0, $ NPROW ) NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0, $ NPCOL ) * * Define Array descriptor for rhs MAX(M,N)xNRHS * CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MNP ) + IMIDPAD, $ IERR( 1 ) ) IF( TPSD ) THEN CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, MP ) + $ IMIDPAD, IERR( 2 ) ) ELSE CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, NP ) + $ IMIDPAD, IERR( 2 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, $ 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Check for enough memory * IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + $ IPREPAD IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD + $ IPREPAD WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Generation', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate RHS * IF( TPSD ) THEN CALL PCMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ MP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) ELSE CALL PCMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ NP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PCFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PCFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * DO 10 JJ = 1, NRHS CALL PSCNRM2( NCOLS, BNORM, MEM( IPW ), $ 1, JJ, DESCW, 1 ) IF( BNORM.GT.RZERO ) $ CALL PCSSCAL( NCOLS, RONE / BNORM, $ MEM( IPW ), 1, JJ, DESCW, $ 1 ) 10 CONTINUE * CALL PCGEMM( TRANS, 'N', NROWS, NRHS, NCOLS, $ ONE, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ), 1, 1, DESCW, ZERO, $ MEM( IPX ), 1, 1, DESCX ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'Generation', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'Generation', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PCCHEKPAD( ICTXT, 'Generation', $ MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PCCHEKPAD( ICTXT, 'Generation', $ NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Allocate space for copy of rhs * IPB = IPW * IF( TPSD ) THEN CALL DESCINIT( DESCB, N, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, $ IERR( 1 ) ) ELSE CALL DESCINIT( DESCB, M, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, $ IERR( 1 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * IPW = IPB + DESCB( LLD_ )*NRHSQ + $ IPOSTPAD + IPREPAD * END IF * * Calculate the amount of workspace for PCGELS * IF( M.GE.N ) THEN LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0, $ NPCOL ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, $ ( MP + NRHSQ ) * NB ) + NB*NB ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LTAU = NUMROC( MIN(M,N), NB, MYROW, 0, $ NPROW ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP + $ MAX( NQ + NUMROC( NUMROC( N, NB, 0, $ 0, NPROW ), NB, 0, 0, LCMP ), $ NRHSQ ) ) * NB ) + NB*NB END IF * LWORK = LTAU + MAX( LWF, LWS ) WORKSIZ = LWORK + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'solve', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Make the copy of the right hand side * CALL PCLACPY( 'All', NROWS, NRHS, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB ) * IF( TPSD ) THEN CALL PCFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PCFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF CALL PCFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Solve the LS or overdetermined system * CALL PCGELS( TRANS, M, N, NRHS, MEM( IPA ), $ 1, 1, DESCA, MEM( IPX ), 1, 1, $ DESCX, MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGELS', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGELS', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGELS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Regenerate A in place for testing and next * iteration * CALL PCQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, $ MEM( IPW ) ) * * check the solution to rhs * IF( CHECK ) THEN * * Am I going to call PCQRT17 ? * IF( ( M.GE.N .AND. ( .NOT.TPSD ) ) .OR. $ ( M.LT.N .AND. TPSD ) ) THEN * * Call PCQRT17 first, A, X, and B remain * unchanged. Solving LS system * * Check amount of memory for PCQRT17 * IF( TPSD ) THEN WORKSIZ = NP*NRHSQ + NRHSP*MQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ ICEIL( REALSZ*MAX( NQ, MAX( $ MQ, NRHSQ ) ), CPLXSZ ) + $ IPOSTPAD ELSE WORKSIZ = MP*NRHSQ + NRHSP*NQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ ICEIL( REALSZ*MAX( NQ, $ NRHSQ ), CPLXSZ ) + $ IPOSTPAD END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * RESULT( 2 ) = PCQRT17( TRANS, 1, M, N, $ NRHS, $ MEM( IPA ), $ 1, 1, DESCA, $ MEM( IPX ), 1, $ 1, DESCX, $ MEM( IPB ), $ 1, 1, DESCB, $ MEM( IPW ), $ MEM( IPW2 ) ) SRESID = RESULT( 2 ) * CALL PCCHEKPAD( ICTXT, 'PCQRT17', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQRT17', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PCCHEKPAD( ICTXT, 'PCQRT17', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PCCHEKPAD( ICTXT, 'PCQRT17', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PCCHEKPAD( ICTXT, 'PCQRT17', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Call PCQRT16, B will be destroyed. * IF( TPSD ) THEN WORKSIZ = MP + IPOSTPAD ELSE WORKSIZ = NQ + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCQRT16( TRANS, M, N, NRHS, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ), RESULT( 1 ) ) * CALL PCCHEKPAD( ICTXT, 'PCQRT16', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQRT16', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PCCHEKPAD( ICTXT, 'PCQRT16', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PCCHEKPAD( ICTXT, 'PCQRT16', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PCCHEKPAD( ICTXT, 'PCQRT16', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Call PCQRT14 * IF( ( M.GE.N .AND. TPSD ) .OR. $ ( M.LT.N .AND. ( .NOT.TPSD ) ) ) THEN * IPW = IPB * IF( TPSD ) THEN * NNRHSQ = NUMROC( N+NRHS, NB, MYCOL, $ 0, NPCOL ) LTAU = NUMROC( MIN( M, N+NRHS ), NB, $ MYCOL, 0, NPCOL ) LWF = NB * ( NB + MP + NNRHSQ ) WORKSIZ = MP * NNRHSQ + LTAU + LWF + $ IPOSTPAD * ELSE * MNRHSP = NUMROC( M+NRHS, NB, MYROW, $ 0, NPROW ) LTAU = NUMROC( MIN( M+NRHS, N ), NB, $ MYROW, 0, NPROW ) LWF = NB * ( NB + MNRHSP + NQ ) WORKSIZ = MNRHSP * NQ + LTAU + LWF + $ IPOSTPAD * END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PCFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Solve underdetermined system * RESULT( 2 ) = PCQRT14( TRANS, M, N, $ NRHS, $ MEM( IPA ), 1, $ 1, DESCA, $ MEM( IPX ), $ 1, 1, DESCX, $ MEM( IPW ) ) SRESID = RESULT( 2 ) * CALL PCCHEKPAD( ICTXT, 'PCQRT14', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQRT14', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCQRT14', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Print information about the tests that * did not pass the threshold. * PASSED = 'PASSED' DO 20 II = 1, 2 IF( ( RESULT( II ).GE.THRESH ) .AND. $ ( RESULT( II )-RESULT( II ).EQ.0.0E+0 $ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )TRANS, $ M, N, NRHS, NB, ITYPE, II, $ RESULT( II ) KFAIL = KFAIL + 1 PASSED = 'FAILED' ELSE KPASS = KPASS + 1 END IF 20 CONTINUE * ELSE * * By-pass the solve check * KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock * timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN ADDFAC = 2 MULFAC = 6 IF( M.GE.N ) THEN * * NOPS = SOPLA( 'CGEQRF', M, N, 0, 0, * NB ) + SOPLA( 'CUNMQR', M, NRHS, N, * 0, NB ) * MULTS = N*( ( ( 23.D0 / 6.D0 )+M+N / $ 2.D0 )+ N*( M-N / 3.D0 ) ) + $ N*NRHS*( 2.D0*M+2.D0-N ) ADDS = N*( ( 5.D0 / 6.D0 )+N* $ ( 1.D0 / 2.D0+( M-N / 3.D0 ) ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) ELSE * * NOPS = SOPLA( 'CGELQF', M, N, 0, 0, * NB ) + SOPLA( 'CUNMLQ', M, * NRHS, N, 0, NB ) * MULTS = M*( ( ( 29.D0 / 6.D0 )+2.D0*N-M $ / 2.D0 )+M*( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+2.D0-N ) ADDS = M*( ( 5.D0 / 6.D0 )+M / 2.D0+M* $ ( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) END IF NOPS = ADDFAC*ADDS + MULFAC*MULTS * * Calculate total megaflops, for WALL and * CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'WALL', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, WTIME( 1 ), $ TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'CPU ', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, CTIME( 1 ), $ TMFLOPS, PASSED END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE 60 CONTINUE 70 CONTINUE 80 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 90 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ', $ 'LS Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ', $ '--------- -------- ------' ) 9993 FORMAT( A4, 3X, A1, 3X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ I5, 1X, I5, 1X, F9.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( ' TRANS=''', A1, ''', M=', I5, ', N=', I5, ', NRHS=', I4, $ ', NB=', I4, ', type', I2, ', test(', I2, ')=', G12.5 ) * STOP * * End of PCLSDRIVER * END scalapack-1.8.0/TESTING/LIN/pzqrt13.f0000640000175000017500000002061510363532303016552 0ustar muammarmuammar SUBROUTINE PZQRT13( SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, $ WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ISEED, JA, M, N, SCALE DOUBLE PRECISION NORMA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZQRT13 generates a full-rank matrix that may be scaled to have * large or small norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCALE (global input) INTEGER * SCALE = 1: normally scaled matrix * SCALE = 2: matrix scaled up * SCALE = 3: matrix scaled down * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * NORMA (global output) DOUBLE PRECISION * The one-norm of A. * * ISEED (global input/global output) INTEGER * Seed for random number generator. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= Nq0, where * * ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), and * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IIA, INFO, $ IROFFA, J, JJA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION ASUM, BIGNUM, SMLNUM COMPLEX*16 AJJ * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL NUMROC, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PDLABAD, $ PDZASUM, PZLASCL, PZMATGEN, $ PZELGET, PZELSET * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MOD, SIGN * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * generate the matrix * IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), A, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED, IIA-1, MP, $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL ) * DO 10 J = JA, JA+N-1 I = IA + J - JA IF( I.LE.IA+M-1 ) THEN CALL PDZASUM( M, ASUM, A, IA, J, DESCA, 1 ) CALL PZELGET( 'Column', ' ', AJJ, A, I, J, DESCA ) AJJ = AJJ + DCMPLX( SIGN( ASUM, DBLE( AJJ ) ) ) CALL PZELSET( A, I, J, DESCA, AJJ ) END IF 10 CONTINUE * * scaled versions * IF( SCALE.NE.1 ) THEN * NORMA = PZLANGE( 'M', M, N, A, IA, JA, DESCA, WORK ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'Epsilon' ) BIGNUM = ONE / SMLNUM * IF( SCALE.EQ.2 ) THEN * * matrix scaled up * CALL PZLASCL( 'General', NORMA, BIGNUM, M, N, A, IA, $ JA, DESCA, INFO ) * ELSE IF( SCALE.EQ.3 ) THEN * * matrix scaled down * CALL PZLASCL( 'General', NORMA, SMLNUM, M, N, A, IA, $ JA, DESCA, INFO ) * END IF * END IF * NORMA = PZLANGE( 'One-norm', M, N, A, IA, JA, DESCA, WORK ) * RETURN * * End of PZQRT13 * END scalapack-1.8.0/TESTING/LIN/pslsdriver.f0000640000175000017500000013500210363532303017420 0ustar muammarmuammar PROGRAM PSLSDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PSLSDRIVER is the main test program for the REAL * SCALAPACK (full rank) Least Squares routines. This test driver solves * full-rank least square problems. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 17 lines: * 'ScaLapack LS solve input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LS.out' output file name (if any) * 6 device out * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 3 number of NRHS's * 2 3 5 values of NRHS * 2 number of NBRHS's * 1 2 values of NBRHS * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL REAL ONE, ZERO PARAMETER ( REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0 ) PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, TPSD CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, II, IMIDPAD, $ INFO, IPA, IPB, IPOSTPAD, IPREPAD, IPW, IPW2, $ IPX, ISCALE, ITRAN, ITYPE, J, JJ, K, KFAIL, KK, $ KPASS, KSKIP, KTESTS, LCM, LCMP, LTAU, LWF, $ LWORK, LWS, M, MNP, MNRHSP, MP, MQ, MYCOL, $ MYROW, N, NB, NBRHS, NCOLS, NGRIDS, NMAT, NNB, $ NNBR, NNR, NNRHSQ, NOUT, NP, NPCOL, NPROCS, $ NPROW, NROWS, NQ, NRHS, NRHSP, NRHSQ, WORKSIZ REAL ANORM, BNORM, SRESID, THRESH DOUBLE PRECISION ADDFAC, ADDS, MULFAC, MULTS, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCW( LLD_ ), $ DESCX( DLEN_ ), IERR( 2 ), MVAL( NTESTS ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ), RESULT( 2 ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSGELS, PSGEMM, PSLACPY, $ PSLSINFO, PSMATGEN, PSNRM2, PSSCAL, $ PSQRT13, PSQRT16, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PSLANGE, PSQRT14, PSQRT17 EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PSLANGE, $ PSQRT14, PSQRT17 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * IASEED = 100 IBSEED = 200 CALL PSLSINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, NTESTS, $ NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 90 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 90 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( ( MYROW.GE.NPROW ).OR.( MYCOL.GE.NPCOL ) ) $ GO TO 90 * DO 80 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 80 END IF * * Loop over different blocking sizes * DO 70 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 70 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) MQ = NUMROC( M, NB, MYCOL, 0, NPCOL ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) MNP = MAX( MP, NP ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 70 END IF * DO 60 ISCALE = 1, 3 * ITYPE = ISCALE * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPX * WORKSIZ = NQ + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPX+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 70 END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Generate the matrix A and calculate its 1-norm * CALL PSQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, MEM( IPW ) ) * IF( CHECK ) THEN CALL PSCHEKPAD( ICTXT, 'PSQRT13', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQRT13', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * DO 50 ITRAN = 1, 2 * IF( ITRAN.EQ.1 ) THEN NROWS = M NCOLS = N TRANS = 'N' TPSD = .FALSE. ELSE NROWS = N NCOLS = M TRANS = 'T' TPSD = .TRUE. END IF * * Loop over the different values for NRHS * DO 40 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 30 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0, $ NPROW ) NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0, $ NPCOL ) * * Define Array descriptor for rhs MAX(M,N)xNRHS * CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MNP ) + IMIDPAD, $ IERR( 1 ) ) IF( TPSD ) THEN CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, MP ) + $ IMIDPAD, IERR( 2 ) ) ELSE CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, NP ) + $ IMIDPAD, IERR( 2 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, $ 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Check for enough memory * IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + $ IPREPAD IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD + $ IPREPAD WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Generation', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate RHS * IF( TPSD ) THEN CALL PSMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ MP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) ELSE CALL PSMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ NP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PSFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PSFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * DO 10 JJ = 1, NRHS CALL PSNRM2( NCOLS, BNORM, MEM( IPW ), 1, $ JJ, DESCW, 1 ) IF( BNORM.GT.ZERO ) $ CALL PSSCAL( NCOLS, ONE / BNORM, $ MEM( IPW ), 1, JJ, DESCW, $ 1 ) 10 CONTINUE * CALL PSGEMM( TRANS, 'N', NROWS, NRHS, NCOLS, $ ONE, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ), 1, 1, DESCW, ZERO, $ MEM( IPX ), 1, 1, DESCX ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'Generation', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'Generation', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PSCHEKPAD( ICTXT, 'Generation', $ MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PSCHEKPAD( ICTXT, 'Generation', $ NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Allocate space for copy of rhs * IPB = IPW * IF( TPSD ) THEN CALL DESCINIT( DESCB, N, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, $ IERR( 1 ) ) ELSE CALL DESCINIT( DESCB, M, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, $ IERR( 1 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * IPW = IPB + DESCB( LLD_ )*NRHSQ + $ IPOSTPAD + IPREPAD * END IF * * Calculate the amount of workspace for PSGELS * IF( M.GE.N ) THEN LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0, $ NPCOL ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, $ ( MP + NRHSQ ) * NB ) + NB*NB ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LTAU = NUMROC( MIN(M,N), NB, MYROW, 0, $ NPROW ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP + $ MAX( NQ + NUMROC( NUMROC( N, NB, 0, $ 0, NPROW ), NB, 0, 0, LCMP ), $ NRHSQ ) ) * NB ) + NB*NB END IF * LWORK = LTAU + MAX( LWF, LWS ) WORKSIZ = LWORK + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'solve', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Make the copy of the right hand side * CALL PSLACPY( 'All', NROWS, NRHS, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB ) * IF( TPSD ) THEN CALL PSFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PSFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF CALL PSFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Solve the LS or overdetermined system * CALL PSGELS( TRANS, M, N, NRHS, MEM( IPA ), $ 1, 1, DESCA, MEM( IPX ), 1, 1, $ DESCX, MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGELS', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGELS', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGELS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Regenerate A in place for testing and next * iteration * CALL PSQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, $ MEM( IPW ) ) * * check the solution to rhs * IF( CHECK ) THEN * * Am I going to call PSQRT17 ? * IF( ( M.GE.N .AND. ( .NOT.TPSD ) ) .OR. $ ( M.LT.N .AND. TPSD ) ) THEN * * Call PSQRT17 first, A, X, and B remain * unchanged. Solving LS system * * Check amount of memory for PSQRT17 * IF( TPSD ) THEN WORKSIZ = NP*NRHSQ + NRHSP*MQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ MAX( NQ, MAX( MQ, NRHSQ ) ) + $ IPOSTPAD ELSE WORKSIZ = MP*NRHSQ + NRHSP*NQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ MAX( NQ, NRHSQ ) + $ IPOSTPAD END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * RESULT( 2 ) = PSQRT17( TRANS, 1, M, N, $ NRHS, $ MEM( IPA ), $ 1, 1, DESCA, $ MEM( IPX ), 1, $ 1, DESCX, $ MEM( IPB ), $ 1, 1, DESCB, $ MEM( IPW ), $ MEM( IPW2 ) ) SRESID = RESULT( 2 ) * CALL PSCHEKPAD( ICTXT, 'PSQRT17', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQRT17', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PSCHEKPAD( ICTXT, 'PSQRT17', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PSCHEKPAD( ICTXT, 'PSQRT17', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PSCHEKPAD( ICTXT, 'PSQRT17', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Call PSQRT16, B will be destroyed. * IF( TPSD ) THEN WORKSIZ = MP + IPOSTPAD ELSE WORKSIZ = NQ + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSQRT16( TRANS, M, N, NRHS, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ), RESULT( 1 ) ) * CALL PSCHEKPAD( ICTXT, 'PSQRT16', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQRT16', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PSCHEKPAD( ICTXT, 'PSQRT16', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PSCHEKPAD( ICTXT, 'PSQRT16', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PSCHEKPAD( ICTXT, 'PSQRT16', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Call PSQRT14 * IF( ( M.GE.N .AND. TPSD ) .OR. $ ( M.LT.N .AND. ( .NOT.TPSD ) ) ) THEN * IPW = IPB * IF( TPSD ) THEN * NNRHSQ = NUMROC( N+NRHS, NB, MYCOL, $ 0, NPCOL ) LTAU = NUMROC( MIN( M, N+NRHS ), NB, $ MYCOL, 0, NPCOL ) LWF = NB * ( NB + MP + NNRHSQ ) WORKSIZ = MP * NNRHSQ + LTAU + LWF + $ IPOSTPAD * ELSE * MNRHSP = NUMROC( M+NRHS, NB, MYROW, $ 0, NPROW ) LTAU = NUMROC( MIN( M+NRHS, N ), NB, $ MYROW, 0, NPROW ) LWF = NB * ( NB + MNRHSP + NQ ) WORKSIZ = MNRHSP * NQ + LTAU + LWF + $ IPOSTPAD * END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Solve underdetermined system * RESULT( 2 ) = PSQRT14( TRANS, M, N, $ NRHS, $ MEM( IPA ), 1, $ 1, DESCA, $ MEM( IPX ), $ 1, 1, DESCX, $ MEM( IPW ) ) SRESID = RESULT( 2 ) * CALL PSCHEKPAD( ICTXT, 'PSQRT14', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQRT14', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSQRT14', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Print information about the tests that * did not pass the threshold. * PASSED = 'PASSED' DO 20 II = 1, 2 IF( ( RESULT( II ).GE.THRESH ) .AND. $ ( RESULT( II )-RESULT( II ).EQ.0.0E+0 $ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )TRANS, $ M, N, NRHS, NB, ITYPE, II, $ RESULT( II ) KFAIL = KFAIL + 1 PASSED = 'FAILED' ELSE KPASS = KPASS + 1 END IF 20 CONTINUE * ELSE * * By-pass the solve check * KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock * timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN ADDFAC = 1 MULFAC = 1 IF( M.GE.N ) THEN * * NOPS = SOPLA( 'SGEQRF', M, N, 0, 0, * NB ) + SOPLA( 'SORMQR', M, NRHS, N, * 0, NB ) * MULTS = N*( ( ( 23.D0 / 6.D0 )+M+N / $ 2.D0 )+ N*( M-N / 3.D0 ) ) + $ N*NRHS*( 2.D0*M+2.D0-N ) ADDS = N*( ( 5.D0 / 6.D0 )+N* $ ( 1.D0 / 2.D0+( M-N / 3.D0 ) ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) ELSE * * NOPS = SOPLA( 'SGELQF', M, N, 0, 0, * NB ) + SOPLA( 'SORMLQ', M, * NRHS, N, 0, NB ) * MULTS = M*( ( ( 29.D0 / 6.D0 )+2.D0*N-M $ / 2.D0 )+M*( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+2.D0-N ) ADDS = M*( ( 5.D0 / 6.D0 )+M / 2.D0+M* $ ( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) END IF NOPS = ADDFAC*ADDS + MULFAC*MULTS * * Calculate total megaflops, for WALL and * CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'WALL', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, WTIME( 1 ), $ TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'CPU ', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, CTIME( 1 ), $ TMFLOPS, PASSED END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE 60 CONTINUE 70 CONTINUE 80 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 90 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ', $ 'LS Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ', $ '--------- -------- ------' ) 9993 FORMAT( A4, 3X, A1, 3X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ I5, 1X, I5, 1X, F9.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( ' TRANS=''', A1, ''', M=', I5, ', N=', I5, ', NRHS=', I4, $ ', NB=', I4, ', type', I2, ', test(', I2, ')=', G12.5 ) * STOP * * End of PSLSDRIVER * END scalapack-1.8.0/TESTING/LIN/pcpbinfo.f0000640000175000017500000004472710363532303017040 0ustar muammarmuammar SUBROUTINE PCPBINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NBW, $ BWVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, NNR, $ NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCPBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of subdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( UPLO, 'L' ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data represents ''Upper'// $ ''' or ''Lower'' triangular portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'TRANS : Whether solve is to be done with'// $ ' ''Transpose'' of matrix A (T,C) or not (N).' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bw : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bw ', ( BWVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCPBINFO * END scalapack-1.8.0/TESTING/LIN/pdgbinfo.f0000640000175000017500000004433510363532303017023 0ustar muammarmuammar SUBROUTINE PDGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PDGBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDGBINFO * END scalapack-1.8.0/TESTING/LIN/psgeqrrv.f0000640000175000017500000002337510363532303017105 0ustar muammarmuammar SUBROUTINE PSGEQRRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQRRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from Q, R * computed by PSGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors Q and R computed * by PSGEQRF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors computed by PSGEQRF. TAU * is tied to the distributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK = NB_A * ( 2*Mp0 + Nq0 + NB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, IIA, IPT, IPV, IPW, $ IROFF, IV, J, JB, JJA, JL, JN, K, MP, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLACPY, $ PSLARFB, PSLARFT, PSLASET, PB_TOPGET, $ PB_TOPSET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPV = 1 IPT = IPV + MP * DESCA( NB_ ) IPW = IPT + DESCA( NB_ ) * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * K = MIN( M, N ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) * CALL DESCSET( DESCV, M+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, INDXG2P( JL, DESCA( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, MP ) ) * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+K-J, DESCA( NB_ ) ) I = IA + J - JA IV = 1 + J - JA + IROFF * * Compute upper triangular matrix T * CALL PSLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'Lower', M-I+IA, JB, A, I, J, DESCA, WORK( IPV ), $ IV, 1, DESCV ) CALL PSLASET( 'Upper', M-I+IA, JB, ZERO, ONE, WORK( IPV ), IV, $ 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get * block column of R * CALL PSLASET( 'Lower', M-I+IA-1, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-I+IA, N-J+JA, JB, WORK( IPV ), IV, 1, DESCV, $ WORK( IPT ), A, I, J, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle first block separately * JB = JN - JA + 1 * * Compute upper triangular matrix T * CALL PSLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PSLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IROFF+1, 1, DESCV ) CALL PSLASET( 'Upper', M, JB, ZERO, ONE, WORK, IROFF+1, 1, DESCV ) * * Zeroes the strict lower triangular part of sub( A ) to get block * column of R * CALL PSLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Apply block Householder transformation * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', M, $ N, JB, WORK( IPV ), IROFF+1, 1, DESCV, WORK( IPT ), $ A, IA, JA, DESCA, WORK( IPW ) ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSGEQRRV * END scalapack-1.8.0/TESTING/LIN/pzptlaschk.f0000640000175000017500000002754010363532303017415 0ustar muammarmuammar SUBROUTINE PZPTLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX*16 A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PZPTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, PBZTRAN, $ PZMATGEN, ZGAMX2D, ZGEMM, ZGSUM2D, $ ZLASET * .. * .. External Functions .. INTEGER IZAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PZTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (2), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PZBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PZBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF IF( LSAME( UPLO, 'U' ) ) THEN * * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL ZGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.GT.0 ) THEN CALL ZGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PZPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PZMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PZAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDZNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDZNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PZTLASCHK * END scalapack-1.8.0/TESTING/LIN/pzludriver.f0000640000175000017500000012531210363532303017434 0ustar muammarmuammar PROGRAM PZLUDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * PZLUDRIVER is the main test program for the COMPLEX*16 * SCALAPACK LU routines. This test driver performs an LU factorization * and solve. If the input matrix is non-square, only the factorization * is performed. Condition estimation and iterative refinement are * optionally performed. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 2.0, LU factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LU.out' output file name (if any) * 6 device out * 1 number of problems sizes * 31 201 values of M * 31 201 values of N * 1 number of NB's * 2 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 1 4 2 3 8 values of P * 2 4 1 3 2 1 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INTGSZ, DBLESZ, MEMSIZ, NTESTS, TOTMEM, ZPLXSZ DOUBLE PRECISION ZERO COMPLEX*16 PADVAL PARAMETER ( INTGSZ = 4, DBLESZ = 8, TOTMEM = 2000000, $ ZPLXSZ = 16, MEMSIZ = TOTMEM / ZPLXSZ, $ NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPOSTPAD, IPPIV, IPREPAD, IPW, IPW2, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LIPIV, LRWORK, LWORK, LW2, M, MAXMN, $ MINMN, MP, MYCOL, MYRHS, MYROW, N, NB, NBRHS, $ NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, NPCOL, $ NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, ANORM1, FRESID, NOPS, RCOND, $ SRESID, SRESID2, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ MVAL( NTESTS ), NBRVAL( NTESTS ), $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZGECON, PZGERFS, $ PZGETRF, PZGETRRV, PZGETRS, $ PZLAFCHK, PZLASCHK, PZLUINFO, $ PZMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PZLANGE EXTERNAL ICEIL, ILCM, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PZLUINFO( OUTFILE, NOUT, NMAT, MVAL, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, THRESH, $ EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST .AND. M.EQ.N ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPPIV = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPPIV = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF LIPIV = ICEIL( INTGSZ*( MP+NB ), ZPLXSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * IF( CHECK ) THEN * * Calculate the amount of workspace required by the * checking routines PZLANGE, PZGETRRV, and * PZLAFCHK * WORKSIZ = MAX( 2, NQ ) * WORKSIZ = MAX( WORKSIZ, MP*DESCA( NB_ )+ $ NQ*DESCA( MB_ ) ) * WORKSIZ = MAX( WORKSIZ, MP * DESCA( NB_ ) ) * WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate matrix A of Ax = b * CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ MP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, LIPIV, 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) ANORM1 = PZLANGE( '1', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST .AND. M.EQ.N ) THEN CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PZFILLPAD( ICTXT, MP, NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform LU factorization * CALL PZGETRF( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PZGETRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LU factorization * CALL PZCHEKPAD( ICTXT, 'PZGETRF', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * IF( M.NE.N ) THEN * * For non-square matrices, factorization only * NRHS = 0 NBRHS = 0 * IF( CHECK ) THEN * * Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps) * CALL PZGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGETRRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRRV', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( (FRESID-FRESID).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) * * 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU * factorization M >= N * NOPS = 4.0D+0*DBLE(MAXMN)*(DBLE(MINMN)**2) - $ (4.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) + $ (2.0D+0)*DBLE( MAXMN )*DBLE( MINMN ) - $ (3.0D+0)*( DBLE( MINMN )**2 ) * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * WTIME( 2 ) = 0.0D+0 IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ), $ WTIME( 2 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * CTIME( 2 ) = 0.0D+0 IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ), $ CTIME( 2 ), TMFLOPS, PASSED END IF * ELSE * * If M = N * IF( EST ) THEN * * Calculate workspace required for PZGECON * LWORK = MAX( 1, 2*NP ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, 2*NQ ) LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PZGECON( '1', N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LRWORK, INFO ) * IF( CHECK ) THEN CALL PZCHEKPAD( ICTXT, 'PZGECON', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGECON', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGECON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), $ MYCOL, DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + $ IPOSTPAD + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * * Set worksiz: routines requiring most workspace * is PZLASCHK * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PZMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) $ CALL PZFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PZMATGEN( ICTXT, 'No', 'No', $ DESCB( M_ ), DESCB( N_ ), $ DESCB( MB_ ), DESCB( NB_ ), $ MEM( IPB0 ), DESCB( LLD_ ), $ DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, $ 0, MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear sytem via LU factorization * CALL PZGETRS( 'No', N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), MEM( IPB ), $ 1, 1, DESCB, INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGETRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRS', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PZLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( SRESID.LE.THRESH .AND. $ ( SRESID-SRESID ).EQ.0.0D+0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PZGERFS * LWORK = MAX( 1, 2*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, NP ) LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PZGERFS( 'No', N, NRHS, MEM( IPA0 ), 1, $ 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPPIV ), $ MEM( IPB0 ), 1, 1, DESCB, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LRWORK, INFO ) * IF( CHECK ) THEN CALL PZCHEKPAD( ICTXT, 'PZGERFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), $ LIPIV, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGERFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PZLASCHK( 'No', 'N', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 8/3 N^3 - N^2 flops for LU factorization * NOPS = (8.0D+0/3.0D+0)*( DBLE(N)**3 ) - $ DBLE(N)**2 * * nrhs * 8 N^2 flops for LU solve. * NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization * and solve -- for WALL and CPU time, and print * output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * * Print WALL time if supported * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if supported * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) $ THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED END IF 10 CONTINUE 20 CONTINUE * IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN * * Compute fresid = ||A - P*L*U|| / (||A|| * N * eps) * CALL PZGETRRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, $ 1, DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGETRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRRV', LIPIV, $ 1, MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID END IF END IF 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ', $ 'Sol Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PZLUDRIVER * END scalapack-1.8.0/TESTING/LIN/pddtlaschk.f0000640000175000017500000003064310363532303017351 0ustar muammarmuammar SUBROUTINE PDDTLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PDDTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'T', A 'Transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, DGEBR2D, $ DGEBS2D, DGEMM, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, PBDTRAN, PDMATGEN * .. * .. External Functions .. INTEGER IDAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PDTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (3), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.GT.0 ) THEN CALL DGESD2D( ICTXT, 1, 1, A( START+2), LDA, $ MYROW, MYCOL-1 ) ENDIF * IF( MYCOL.LT.NPCOL-1 ) THEN CALL DGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 220 I=0,DESCA( NB_ )-1 A( START+2+(I)*LDA ) = A( START+2+(I+1)*LDA ) 220 CONTINUE * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, 1, 1, $ A( START+2+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * IF( MYCOL.GT.0 ) THEN CALL DGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PDGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (INT_ONE+2)*INT_ONE, INFO ) * * * Regenerate column of B * CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PDTLASCHK * END scalapack-1.8.0/TESTING/LIN/psinvdriver.f0000640000175000017500000007757710430450246017624 0ustar muammarmuammar PROGRAM PSINVDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PSINVDRIVER is the main test program for the REAL * SCALAPACK matrix inversion routines. This test driver computes the * inverse of different kind of matrix and tests the results. * * The program must be driven by a short data file. An annotated example * of a data file can be obtained by deleting the first 3 characters * from the following 14 lines: * 'ScaLAPACK Matrix Inversion Testing input file' * 'PVM machine.' * 'INV.out' output file name (if any) * 6 device out * 5 number of matrix types (next line) * 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD * 4 number of problems sizes * 1000 2000 3000 4000 values of N * 3 number of NB's * 4 30 35 values of NB * 2 number of process grids (ordered P & Q) * 4 2 values of P * 4 4 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INTGSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL, ZERO PARAMETER ( INTGSZ = 4, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER UPLO CHARACTER*3 MTYP CHARACTER*6 PASSED CHARACTER*80 OUTFILE LOGICAL CHECK INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, $ IPPIV, IPREPAD, IPOSTPAD, IPIW, IPW, ITEMP, J, $ K, KTESTS, KPASS, KFAIL, KSKIP, L, LCM, LIPIV, $ LIWORK, LWORK, MYCOL, MYROW, N, NB, NGRIDS, $ NMAT, NMTYP, NNB, NOUT, NP, NPCOL, NPROCS, $ NPROW, NQ, WORKIINV, WORKINV, WORKSIZ REAL ANORM, FRESID, RCOND, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. CHARACTER*3 MATTYP( NTESTS ) INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSGETRF, PSGETRI, $ PSINVCHK, PSINVINFO, PSLASET, $ PSMATGEN, PSPOTRF, PSPOTRI, $ PSTRTRI, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, ILCM, NUMROC REAL PSLANGE, PSLANSY, PSLANTR EXTERNAL ICEIL, ILCM, LSAMEN, NUMROC, PSLANGE, $ PSLANSY, PSLANTR * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP /4*0/ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSINVINFO( OUTFILE, NOUT, NMTYP, MATTYP, NTESTS, NMAT, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Loop over the different matrix types * DO 40 I = 1, NMTYP * MTYP = MATTYP( I ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a general matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is an upper triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'LTR' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a lower triangular matrix.' ELSE IF( LSAMEN( 3, MTYP, 'UPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a symmetric positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the upper triangular part will be '// $ 'referenced.' ELSE IF( LSAMEN( 3, MTYP, 'LPD' ) ) THEN WRITE( NOUT, FMT = 9986 ) $ 'A is a symmetric positive definite matrix.' WRITE( NOUT, FMT = 9986 ) $ 'Only the lower triangular part will be '// $ 'referenced.' END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 J = 1, NGRIDS * NPROW = PVAL( J ) NPCOL = QVAL( J ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 K = 1, NMAT * N = NVAL( K ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 L = 1, NNB * NB = NBVAL( L ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for ScaLAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 * LCM = ILCM( NPROW, NPCOL ) IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Pivots are needed by LU factorization * IPPIV = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + $ IPREPAD LIPIV = ICEIL( INTGSZ * ( NP + NB ), REALSZ ) IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD * LWORK = MAX( 1, NP * DESCA( NB_ ) ) WORKINV = LWORK + IPOSTPAD * * Figure the amount of workspace required by the * general matrix inversion * IF( NPROW.EQ.NPCOL ) THEN LIWORK = NQ + DESCA( NB_ ) ELSE * * change the integer workspace needed for PDGETRI * LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) * * $ ICEIL( ICEIL( DESCA( LLD_ ), * $ DESCA( MB_ ) ), LCM / NPROW ) ) * $ + NQ LIWORK = NUMROC( DESCA( M_ ) + $ DESCA( MB_ ) * NPROW $ + MOD ( 1 - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF WORKIINV = ICEIL( LIWORK*INTGSZ, REALSZ ) + $ IPOSTPAD IPIW = IPW + WORKINV + IPREPAD WORKSIZ = WORKINV + IPREPAD + WORKIINV * ELSE * * No pivots or workspace needed for triangular or * symmetric positive definite matrices. * IPW = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD WORKSIZ = 1 + IPOSTPAD * END IF * IF( CHECK ) THEN * * Figure amount of work space for the norm * computations * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN ITEMP = NQ ELSE ITEMP = 2 * NQ + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), $ LCM / NPROW ) END IF END IF WORKSIZ = MAX( WORKSIZ-IPOSTPAD, ITEMP ) * * Figure the amount of workspace required by the * checking routine * WORKSIZ = MAX( WORKSIZ, 2 * NB * MAX( 1, NP ) ) + $ IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'inversion', $ ( IPW + WORKSIZ ) * REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( LSAMEN( 3, MTYP, 'GEN' ).OR. $ LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Generate a general diagonally dominant matrix A * CALL PSMATGEN( ICTXT, 'N', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Generate a symmetric positive definite matrix * CALL PSMATGEN( ICTXT, 'S', 'D', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, $ NQ, MYROW, MYCOL, NPROW, NPCOL ) * END IF * * Zeros not-referenced part of A, if any. * IF( LSAMEN( 1, MTYP, 'U' ) ) THEN * UPLO = 'U' CALL PSLASET( 'Lower', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 2, 1, DESCA ) * ELSE IF( LSAMEN( 1, MTYP, 'L' ) ) THEN * UPLO = 'L' CALL PSLASET( 'Upper', N-1, N-1, ZERO, ZERO, $ MEM( IPA ), 1, 2, DESCA ) * ELSE * UPLO = 'G' * END IF * * Need 1-norm of A for checking * IF( CHECK ) THEN * CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * CALL PSFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANGE( '1', N, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * ANORM = PSLANTR( '1', UPLO, 'Non unit', N, N, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANTR', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANTR', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * ANORM = PSLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'SY' ) ) THEN * CALL PSFILLPAD( ICTXT, LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANSY( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD,IPOSTPAD, PADVAL ) * END IF * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * Perform LU factorization * CALL SLTIMER( 1 ) CALL PSGETRF( N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGETRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRF', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PSGETRI( N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPPIV ), MEM( IPW ), LWORK, $ MEM( IPIW ), LIWORK, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGETRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRI', LIPIV, 1, $ MEM( IPPIV-IPREPAD ), LIPIV, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRI', $ WORKIINV-IPOSTPAD, 1, $ MEM( IPIW-IPREPAD ), $ WORKIINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGETRI', $ WORKINV-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKINV-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * Perform the general matrix inversion * CALL SLTIMER( 2 ) CALL PSTRTRI( UPLO, 'Non unit', N, MEM( IPA ), 1, $ 1, DESCA, INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSTRTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * Perform Cholesky factorization * CALL SLTIMER( 1 ) CALL PSPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * Perform the symmetric positive definite matrix * inversion * CALL SLTIMER( 2 ) CALL PSPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ INFO ) CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPOTRI', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * END IF * IF( CHECK ) THEN * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Compute fresid = || inv(A)*A-I || * CALL PSINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA, $ IASEED, ANORM, FRESID, RCOND, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSINVCHK', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSINVCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. INFO.EQ.0 .AND. $ ( (FRESID-FRESID) .EQ. 0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 IF( INFO.GT.0 ) THEN PASSED = 'SINGUL' ELSE PASSED = 'FAILED' END IF END IF * ELSE * * Don't perform the checking, only the timing * operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( LSAMEN( 3, MTYP, 'GEN' ) ) THEN * * 2/3 N^3 - 1/2 N^2 flops for LU factorization * NOPS = ( 2.0D+0 / 3.0D+0 )*( DBLE( N )**3 ) - $ ( 1.0D+0 / 2.0D+0 )*( DBLE( N )**2 ) * * 4/3 N^3 - N^2 flops for inversion * NOPS = NOPS + $ ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) - $ DBLE( N )**2 * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'TR' ) ) THEN * * 1/3 N^3 + 2/3 N flops for triangular inversion * CTIME(1) = 0.0D+0 WTIME(1) = 0.0D+0 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N ) ) * ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'PD' ) ) THEN * * 1/3 N^3 + 1/2 N^2 flops for Cholesky * factorization * NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 ) * * 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion * NOPS = NOPS + $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) + $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 ) * END IF * * Figure total megaflops -- factorization and * inversion, for WALL and CPU time, and print * output. * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ) + CTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ) .GE. 0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ RCOND, FRESID, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * 40 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q Fct Time Inv Time ', $ ' MFLOPS Cond Resid CHECK' ) 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ', $ '----------- ------- ------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2, $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( A ) * STOP * * End of PSINVDRIVER * END scalapack-1.8.0/TESTING/LIN/pcqrt17.f0000640000175000017500000003301410363532303016524 0ustar muammarmuammar REAL FUNCTION PCQRT17( TRANS, IRESID, M, N, NRHS, A, $ IA, JA, DESCA, X, IX, JX, $ DESCX, B, IB, JB, DESCB, WORK, $ RWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IRESID, IX, JA, JB, JX, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) COMPLEX A( * ), B( * ), WORK( * ), X( * ) REAL RWORK( * ) * .. * * Purpose * ======= * * PCQRT17 computes the ratio * * || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps) * * where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and * * alpha = ||B|| if IRESID = 1 (zero-residual problem) * alpha = ||R|| if IRESID = 2 (otherwise). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies whether or not the transpose of sub( A ) is used. * = 'N': No transpose, op( sub( A ) ) = sub( A ). * = 'C': Conjugate transpose, op( sub( A ) ) = sub( A' ). * * IRESID (global input) INTEGER * IRESID = 1 indicates zero-residual problem. * IRESID = 2 indicates non-zero residual. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( B ). * If TRANS = 'C', the number of rows of the distributed * submatrix sub( X ). * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * If TRANS = 'N', the number of rows of the distributed * submatrix sub( X ). Otherwise N is the number of rows of * the distributed submatrix sub( B ). * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices sub( X ) and sub( B ). * NRHS >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed M-by-N * submatrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). * If TRANS = 'N', this array contains the local pieces of the * N-by-NRHS distributed submatrix sub( X ). Otherwise, this * array contains the local pieces of the M-by-NRHS distributed * submatrix sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_B,LOCc(JB+NRHS-1)). * If TRANS='N', this array contains the local pieces of the * distributed M-by-NRHS submatrix operand sub( B ). Otherwise, * this array contains the local pieces of the distributed * N-by-NRHS submatrix operand sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0 * otherwise LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0 * * RWORK (local workspace) REAL array, dimension (LRWORK) * LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ), * Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ), * NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER IACOL, IBCOL, IBROW, ICOFFB, ICTXT, INFO, $ IOFFA, IROFFB, ISCL, IW, IW2, JW, JW2, MYCOL, $ NRHSQ, NRHSP, MYROW, NCOLS, NPCOL, NPROW, $ NROWS, NROWSP REAL ERR, NORMA, NORMB, NORMRS, NORMX, SMLNUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), DESCW2( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PCLANGE, PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PCLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PCGEMM, PCLACPY, $ PCLASCL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, REAL * .. * .. Executable Statements .. * PCQRT17 = ZERO * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( LSAME( TRANS, 'N' ) ) THEN NROWS = M NCOLS = N IOFFA = MOD( JA-1, DESCA( NB_ ) ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN NROWS = N NCOLS = M IOFFA = MOD( IA-1, DESCA( MB_ ) ) ELSE CALL PXERBLA( ICTXT, 'PCQRT17', -1 ) RETURN END IF * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.LE.0 ) $ RETURN * IROFFB = MOD( IA-1, DESCA( MB_ ) ) ICOFFB = MOD( JA-1, DESCA( NB_ ) ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) NRHSP = NUMROC( NRHS+IROFFB, DESCB( NB_ ), MYROW, IBROW, NPROW ) NROWSP = NUMROC( NROWS+IROFFB, DESCA( MB_ ), MYROW, IBROW, NPROW ) * * Assign array descriptor DESCW for workspace WORK, where DESCW * holds a copy of the distributed submatrix sub( B ) * CALL DESCSET( DESCW, NROWS+IROFFB, NRHS+ICOFFB, DESCB( MB_ ), $ DESCB( NB_ ), IBROW, IBCOL, ICTXT, MAX( 1, $ NROWSP ) ) * * Assign array descriptor DESCW2 for workspace WORK, where DESCW2 * holds a copy of the distributed submatrix sub( X**T ) * CALL DESCSET( DESCW2, NRHS+ICOFFB, NCOLS+IOFFA, DESCX( NB_ ), $ DESCX( MB_ ), IBROW, IACOL, ICTXT, MAX( 1, $ NRHSP ) ) * NORMA = PCLANGE( 'One-norm', M, N, A, IA, JA, DESCA, RWORK ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'Precision' ) ISCL = 0 * * compute residual and scale it * IW = 1 + IROFFB JW = 1 + ICOFFB CALL PCLACPY( 'All', NROWS, NRHS, B, IB, JB, DESCB, WORK, IW, JW, $ DESCW ) CALL PCGEMM( TRANS, 'No transpose', NROWS, NRHS, NCOLS, $ CMPLX( -ONE ), A, IA, JA, DESCA, X, IX, JX, DESCX, $ CMPLX( ONE ), WORK, IW, JW, DESCW ) NORMRS = PCLANGE( 'Max', NROWS, NRHS, WORK, IW, JW, DESCW, $ RWORK ) IF( NORMRS.GT.SMLNUM ) THEN ISCL = 1 CALL PCLASCL( 'General', NORMRS, ONE, NROWS, NRHS, WORK, $ IW, JW, DESCW, INFO ) END IF * * compute R'*sub( A ) * IW2 = 1 + ICOFFB JW2 = 1 + IOFFA CALL PCGEMM( 'Conjugate transpose', TRANS, NRHS, NCOLS, NROWS, $ CMPLX( ONE ), WORK, IW, JW, DESCW, A, IA, JA, DESCA, $ CMPLX( ZERO ), WORK( NROWSP*NRHSQ+1 ), IW2, JW2, $ DESCW2 ) * * compute and properly scale error * ERR = PCLANGE( 'One-norm', NRHS, NCOLS, WORK( NROWSP*NRHSQ+1 ), $ IW2, JW2, DESCW2, RWORK ) IF( NORMA.NE.ZERO ) $ ERR = ERR / NORMA * IF( ISCL.EQ.1 ) $ ERR = ERR*NORMRS * IF( IRESID.EQ.1 ) THEN NORMB = PCLANGE( 'One-norm', NROWS, NRHS, B, IB, JB, DESCB, $ RWORK ) IF( NORMB.NE.ZERO ) $ ERR = ERR / NORMB ELSE NORMX = PCLANGE( 'One-norm', NCOLS, NRHS, X, IX, JX, DESCX, $ RWORK ) IF( NORMX.NE.ZERO ) $ ERR = ERR / NORMX END IF * PCQRT17 = ERR / ( PSLAMCH( ICTXT, 'Epsilon' ) * $ REAL( MAX( M, N, NRHS ) ) ) * RETURN * * End of PCQRT17 * END scalapack-1.8.0/TESTING/LIN/pcinvchk.f0000640000175000017500000003377710363532303017050 0ustar muammarmuammar SUBROUTINE PCINVCHK( MATTYP, N, A, IA, JA, DESCA, IASEED, ANORM, $ FRESID, RCOND, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IASEED, JA, N REAL ANORM, FRESID, RCOND * .. * .. Array Arguments .. CHARACTER*3 MATTYP INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCINVCHK computes the scaled residual * * || sub( A ) * inv( sub( A ) ) - I || / ( || sub( A ) || * N * eps ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). to check the result * returned by the matrix inversion routines. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MATTYP (global input) CHARACTER*3 * The type of the distributed matrix to be generated: * if MATTYP = 'GEN' then GENeral matrix, * if MATTYP = 'UTR' then Upper TRiangular matrix, * if MATTYP = 'LTR' then Lower TRiangular matrix, * if MATTYP = 'UPD' then (Upper) Hermitian Positive Definite, * if MATTYP = 'LPD' then (Lower) Hermitian Positive Definite. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_A, LOCc(JA+N-1)). On * entry, sub( A ) contains the distributed matrix inverse * computed by PCGETRI, PCPOTRI or PCTRTRI. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * Seed for the random generation of sub( A ). * * ANORM (global input) REAL * The 1-norm of the original matrix sub( A ). * * FRESID (global output) REAL * The inversion residual. * * RCOND (global output) REAL * The condition number of the original distributed matrix. * RCOND = || sub( A ) ||.|| sub( A )^{-1} || where ||A|| * denotes the 1-norm of A. * * WORK (local workspace) COMPLEX array, dimension * MAX(2*LOCr(N_A+MOD(IA-1,MB_A))*MB_A, LDW) * where LDW is the workspace requirement for the norm computa- * tions, see PCLANGE, PCLANHE, PCLANSY and PCLANTR. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER AFORM, DIAG, UPLO INTEGER ICTXT, ICURCOL, ICURROW, II, IIA, IPW, IROFF, $ IW, J, JB, JJA, JN, KK, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL AUXNORM, EPS, NRMINVAXA, TEMP * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCGEMM, $ PCHEMM, PCLASET, PCMATGEN, PCTRMM * .. * .. External Functions .. LOGICAL LSAMEN INTEGER ICEIL, NUMROC REAL PCLANGE, PCLANHE, PCLANTR, PSLAMCH EXTERNAL ICEIL, LSAMEN, NUMROC, PCLANGE, PCLANHE, $ PCLANSY, PCLANTR, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * EPS = PSLAMCH( DESCA( CTXT_ ), 'eps' ) * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Compute the condition number * IF( LSAMEN( 1, MATTYP( 1:1 ), 'U' ) ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PCLANGE( '1', N, N, A, IA, JA, DESCA, WORK ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * AFORM = 'N' DIAG = 'D' AUXNORM = PCLANTR( '1', UPLO, 'Non unit', N, N, A, IA, JA, $ DESCA, WORK ) ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * AFORM = 'H' DIAG = 'D' AUXNORM = PCLANHE( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * END IF RCOND = ANORM*AUXNORM * * Compute inv(A)*A * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ ICURROW, ICURCOL ) * * Define array descriptor for working array WORK * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, ICURROW, NPROW ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), ICURROW, ICURCOL, DESCA( CTXT_ ), $ MAX( 1, NP ) ) IPW = DESCW( LLD_ ) * DESCW( NB_ ) + 1 * IF( MYROW.EQ.ICURROW ) THEN II = IROFF + 1 NP = NP - IROFF ELSE II = 1 END IF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Handle first block separately, regenerate a block of columns of A * IW = IROFF + 1 IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK, $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PCLASET( 'Lower', N-1, JB, ZERO, ZERO, WORK, IW+1, $ 1, DESCW ) ELSE CALL PCLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, WORK, IW, $ 2, DESCW ) END IF ELSE CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), WORK( IPW ), $ DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PCGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, A, $ IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, ZERO, $ WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN * CALL PCTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PCHEMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 KK = 0, JB-1 WORK( II+KK*(DESCW(LLD_)+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) )-ONE 10 CONTINUE END IF * NRMINVAXA = PCLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) * JB = MIN( N-J+JA, DESCA( NB_ ) ) * * regenerate a block of columns of A * IF( MYCOL.EQ.ICURCOL ) THEN IF( LSAMEN( 2, MATTYP( 2:3 ), 'TR' ) ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK, DESCW( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, IIA-1, NP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( LSAMEN( 3, MATTYP, 'UTR' ) ) THEN CALL PCLASET( 'Lower', JA+N-J-1, JB, ZERO, ZERO, $ WORK, IW+J-JA+1, 1, DESCW ) ELSE CALL PCLASET( 'All', J-JA, JB, ZERO, ZERO, WORK, IW, $ 1, DESCW ) CALL PCLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, $ WORK, IW+J-JA, 2, DESCW ) END IF ELSE CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCW( MB_ ), DESCW( NB_ ), $ WORK( IPW ), DESCW( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, $ JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) END IF END IF * * Multiply A^{-1}*A * IF( LSAMEN( 3, MATTYP, 'GEN' ) ) THEN * CALL PCGEMM( 'No tranpose', 'No transpose', N, JB, N, ONE, $ A, IA, JA, DESCA, WORK( IPW ), IW, 1, DESCW, $ ZERO, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP(2:3), 'TR' ) ) THEN * CALL PCTRMM( 'Left', UPLO, 'No tranpose', 'Non unit', N, JB, $ ONE, A, IA, JA, DESCA, WORK, IW, 1, DESCW ) * ELSE IF( LSAMEN( 2, MATTYP( 2:3 ), 'PD' ) ) THEN * CALL PCHEMM( 'Left', UPLO, N, JB, ONE, A, IA, JA, DESCA, $ WORK(IPW), IW, 1, DESCW, ZERO, WORK, IW, 1, $ DESCW ) * END IF * * subtract the identity matrix to the diagonal block of these * cols * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 KK = 0, JB-1 WORK( II+KK*(DESCW( LLD_ )+1) ) = $ WORK( II+KK*(DESCW( LLD_ )+1) ) - ONE 20 CONTINUE END IF * * Compute the 1-norm of these JB cols * TEMP = PCLANGE( '1', N, JB, WORK, IW, 1, DESCW, WORK( IPW ) ) NRMINVAXA = MAX( TEMP, NRMINVAXA ) * IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJA = JJA + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) DESCW( CSRC_ ) = ICURCOL * 30 CONTINUE * * Compute the scaled residual * FRESID = NRMINVAXA / ( N * EPS * ANORM ) * RETURN * * End of PCINVCHK * END scalapack-1.8.0/TESTING/LIN/pclltinfo.f0000640000175000017500000004215010363532303017216 0ustar muammarmuammar SUBROUTINE PCLLTINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, THRESH, EST, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER UPLO CHARACTER*(*) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PCLLTINFO get needed startup information for LLt factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 7, LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'LLT.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 2 END IF IF( EST ) THEN WORK( 7 ) = 1 ELSE WORK( 7 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 7, 1, WORK, 7 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Ax=b by LLt factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision LLt factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LL''|| /'// $ ' (||A|| * eps * N)' ELSE WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - U''U|| /'// $ ' (||A|| * eps * N)' END IF WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether data is stored in ''Upper'// $ ''' or ''Lower'' portion of array A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LLt time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 7, 1, WORK, 7, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF IF( WORK( 7 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCLLTINFO * END scalapack-1.8.0/TESTING/LIN/pzqrt16.f0000640000175000017500000002540510363532303016557 0ustar muammarmuammar SUBROUTINE PZQRT16( TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, $ JX, DESCX, B, IB, JB, DESCB, RWORK, RESID ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, IX, JA, JB, JX, M, N, NRHS DOUBLE PRECISION RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), B( * ), X( * ) * .. * * Purpose * ======= * * PZQRT16 computes the residual for a solution of a system of linear * equations sub( A )*sub( X ) = B or sub( A' )*sub( X ) = B: * RESID = norm(B - sub( A )*sub( X ) ) / * ( max(m,n) * norm(sub( A ) ) * norm(sub( X ) ) * EPS ), * where EPS is the machine epsilon, sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), and sub( X ) denotes * X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations: * = 'N': sub( A )*sub( X ) = sub( B ) * = 'T': sub( A' )*sub( X )= sub( B ), where A' is the * transpose of sub( A ). * = 'C': sub( A' )*sub( X )= B, where A' is the conjugate * transpose of sub( A ). * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local * memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * The original M x N matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input) COMPLEX*16 pointer into the local * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)). This * array contains the local pieces of the computed solution * distributed vectors for the system of linear equations. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * B (local input/local output) COMPLEX*16 pointer into * the local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributes right hand side vectors for * the system of linear equations. On exit, sub( B ) is over- * written with the difference sub( B ) - sub( A )*sub( X ) or * sub( B ) - sub( A )'*sub( X ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * LWORK >= Nq0 if TRANS = 'N', and LRWORK >= Mp0 otherwise. * * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * RESID (global output) DOUBLE PRECISION * The maximum over the number of right hand sides of * norm( sub( B )- sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER ICTXT, IDUMM, J, MYCOL, MYROW, N1, N2, NPCOL, $ NPROW DOUBLE PRECISION ANORM, BNORM, EPS, XNORM * .. * .. Local Arrays .. DOUBLE PRECISION TEMP( 2 ) * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL LSAME, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDZASUM, $ PZGEMM * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick exit if M = 0 or N = 0 or NRHS = 0 * IF( M.LE.0 .OR. N.LE.0 .OR. NRHS.EQ.0 ) THEN RESID = ZERO RETURN END IF * IF( LSAME( TRANS, 'T' ) .OR. LSAME( TRANS, 'C' ) ) THEN ANORM = PZLANGE( 'I', M, N, A, IA, JA, DESCA, RWORK ) N1 = N N2 = M ELSE ANORM = PZLANGE( '1', M, N, A, IA, JA, DESCA, RWORK ) N1 = M N2 = N END IF * EPS = PDLAMCH( ICTXT, 'Epsilon' ) * * Compute B - sub( A )*sub( X ) (or B - sub( A' )*sub( X ) ) and * store in B. * CALL PZGEMM( TRANS, 'No transpose', N1, NRHS, N2, -CONE, A, IA, $ JA, DESCA, X, IX, JX, DESCX, CONE, B, IB, JB, DESCB ) * * Compute the maximum over the number of right hand sides of * norm( sub( B ) - sub( A )*sub( X ) ) / * ( max(m,n) * norm( sub( A ) ) * norm( sub( X ) ) * EPS ). * RESID = ZERO DO 10 J = 1, NRHS * CALL PDZASUM( N1, BNORM, B, IB, JB+J-1, DESCB, 1 ) CALL PDZASUM( N2, XNORM, X, IX, JX+J-1, DESCX, 1 ) * * Only the process columns owning the vector operands will have * the correct result, the other will have zero. * TEMP( 1 ) = BNORM TEMP( 2 ) = XNORM CALL DGAMX2D( ICTXT, 'All', ' ', 2, 1, TEMP, 2, IDUMM, IDUMM, $ -1, -1, IDUMM ) BNORM = TEMP( 1 ) XNORM = TEMP( 2 ) * * Every processes have ANORM, BNORM and XNORM now. * IF( ANORM.EQ.ZERO .AND. BNORM.EQ.ZERO ) THEN RESID = ZERO ELSE IF( ANORM.LE.ZERO .OR. XNORM.LE.ZERO ) THEN RESID = ONE / EPS ELSE RESID = MAX( RESID, ( ( BNORM / ANORM ) / XNORM ) / $ ( MAX( M, N )*EPS ) ) END IF * 10 CONTINUE * RETURN * * End of PZQRT16 * END scalapack-1.8.0/TESTING/LIN/pcdblaschk.f0000640000175000017500000002616010363532303017325 0ustar muammarmuammar SUBROUTINE PCDBLASCHK( SYMM, UPLO, TRANS, N, BWL, BWU, NRHS, X, $ IX, JX, DESCX, IASEED, A, IA, JA, DESCA, $ IBSEED, ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, TRANS, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ REAL ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) COMPLEX A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PCDBLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'H', sub( A ) is a hermitian distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'H', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * TRANS if TRANS= 'C', A 'Conjugate transpose' is used as the * coefficient matrix in the solve. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) REAL * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) REAL * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) COMPLEX array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,max(max(bwl,bwu)*(max(bwl,bwu)+2),NB))+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER BW, INFO, IPPRODUCT, WORK_MIN REAL DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGAMX2D, CGEMM, CGSUM2D, $ CLASET, PBCTRAN, PCMATGEN, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICAMAX, NUMROC REAL PSLAMCH EXTERNAL ICAMAX, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'H' ) ) THEN BW = BWL WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ELSE BW = MAX(BWL, BWU) WORK_MIN = MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PCBLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) RESID = 0.0E+0 DIVISOR = ANORM * EPS * REAL( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (BWL+BWU+1), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'H' )) THEN CALL PCBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PCBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PCGBDCMV( BWL+BWU+1, BWL, BWU, TRANS, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), $ (MAX(BWL,BWU)+2)*MAX(BWL,BWU), INFO ) * * * Regenerate column of B * CALL PCMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PCAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PSCNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PSCNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PCBLASCHK * END scalapack-1.8.0/TESTING/LIN/pzdbmv1.f0000640000175000017500000007613110363532303016615 0ustar muammarmuammar SUBROUTINE PZGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDBTRF and this is stored in AF. If a linear system * is to be solved using PZDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL ZGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL ZTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'C' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL ZTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL ZTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL ZGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL ZTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PZBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pzpbmv1.f0000640000175000017500000007315410363532303016633 0ustar muammarmuammar SUBROUTINE PZPBDCMV( LDBW, BW, UPLO, N, A, JA, DESCA, NRHS, B, IB, $ DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPBTRF and this is stored in AF. If a linear system * is to be solved using PZPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, $ FIRST_PROC, I, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, J, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, OFST, PART_OFFSET, $ PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Pre-calculate bw^2 * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZPBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (BW+2)*BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( UPLO, 'L' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'U', BW, BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to right * CALL ZTRSD2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL ZHBMV( 'L', NUMROC_SIZE, BW, CONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, CZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * CALL ZTRMV( 'U', 'N', 'N', BW, $ A( LLDA*( NUMROC_SIZE-BW )+1+BW ), LLDA-1, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, BW*( BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'L', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL-1 ) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 30 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( UPLO, 'U' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL ZLATCPY( 'L', BW, BW, A( OFST+1 ), $ LLDA-1, WORK( 1 ), BW ) * * Send the triangle to neighboring processor to left * CALL ZTRSD2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), $ BW, MYROW, MYCOL-1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL ZHBMV( 'U', NUMROC_SIZE, BW, CONE, A( OFST+1 ), LLDA, $ B(PART_OFFSET+1), 1, CZERO, X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL ZCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( BW*BW+1+BW-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL ZTRRV2D(ICTXT, 'U', 'N', $ BW, BW, $ WORK( 1 ), BW, MYROW, MYCOL+1 ) * CALL ZTRMV( 'U', 'N', 'N', BW, $ WORK( 1 ), BW, $ WORK( BW*BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( BW*BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL ZGESD2D( ICTXT, BW, 1, $ WORK( BW*BW+1 ), BW, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, BW*( BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL ZCOPY( DL_P_M, B( 1 ), 1, $ WORK( BW*BW+1 ), 1) * CALL ZTRMV( $ 'L', $ 'N', $ 'N', BW, $ A( 1 ), LLDA-1, $ WORK( BW*BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DL_P_M .GT. DL_P_N ) THEN DO 60 I=1, DL_P_M-DL_P_N WORK( BW*BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL ZGESD2D( ICTXT, BW, 1, WORK(BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL ZGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL ZAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL ZGERV2D( ICTXT, BW, 1, WORK( BW*BW+1 ), $ BW, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL ZAXPY( BW, CONE, $ WORK( BW*BW+1 ), 1, $ X( NUMROC_SIZE-BW+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PZBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pdptlaschk.f0000640000175000017500000002743610363532303017373 0ustar muammarmuammar SUBROUTINE PDPTLASCHK( SYMM, UPLO, N, BWL, BWU, NRHS, X, IX, JX, $ DESCX, IASEED, A, IA, JA, DESCA, IBSEED, $ ANORM, RESID, WORK, WORKSIZ ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SYMM, UPLO INTEGER BWL, BWU, IA, IASEED, IBSEED, $ IX, JA, JX, N, NRHS, WORKSIZ DOUBLE PRECISION ANORM, RESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), WORK( * ), X( * ) * .. External Functions .. LOGICAL LSAME * .. * * Purpose * ======= * * PDPTLASCHK computes the residual * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N) * to check the accuracy of the factorization and solve steps in the * LU and Cholesky decompositions, where sub( A ) denotes * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SYMM (global input) CHARACTER * if SYMM = 'S', sub( A ) is a symmetric distributed band * matrix, otherwise sub( A ) is a general distributed matrix. * * UPLO (global input) CHARACTER * if SYMM = 'S', then * if UPLO = 'L', the lower half of the matrix is stored * if UPLO = 'U', the upper half of the matrix is stored * if SYMM != 'S' or 'H', then * if UPLO = 'D', the matrix is stable during factorization * without interchanges * if UPLO != 'D', the matrix is general * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right-hand-sides, i.e the number of columns * of the distributed matrix sub( X ). NRHS >= 1. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,LOCq(JX+NRHS-1). This array * contains the local pieces of the answer vector(s) sub( X ) of * sub( A ) sub( X ) - B, split up over a column of processes. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IBSEED (global input) INTEGER * The seed number to generate the original matrix B. * * ANORM (global input) DOUBLE PRECISION * The 1-norm or infinity norm of the distributed matrix * sub( A ). * * RESID (global output) DOUBLE PRECISION * The residual error: * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N). * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * IF SYMM='S' * LWORK >= max(5,NB)+2*NB * IF SYMM!='S' or 'H' * LWORK >= max(5,NB)+2*NB * * WORKSIZ (local input) size of WORK. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, $ IIA, IIX, IPB, IPW, $ IXCOL, IXROW, J, JJA, JJX, LDA, $ MYCOL, MYROW, NB, NP, NPCOL, NPROW, NQ INTEGER I, START INTEGER BW, INFO, IPPRODUCT, WORK_MIN DOUBLE PRECISION DIVISOR, EPS, RESID1, NORMX * .. * .. Local Arrays .. * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, DGEBR2D, $ DGEBS2D, DGEMM, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, PBDTRAN, PDMATGEN * .. * .. External Functions .. INTEGER IDAMAX, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get needed initial parameters * ICTXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * IF( LSAME( SYMM, 'S' ) ) THEN BW = BWL START = 1 WORK_MIN = MAX(5,NB)+2*NB ELSE BW = MAX(BWL, BWU) IF( LSAME( UPLO, 'D' )) THEN START = 1 ELSE START = 2 ENDIF WORK_MIN = MAX(5,NB)+2*NB ENDIF * IF ( WORKSIZ .LT. WORK_MIN ) THEN CALL PXERBLA( ICTXT, 'PDTLASCHK', -18 ) RETURN END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) RESID = 0.0D+0 DIVISOR = ANORM * EPS * DBLE( N ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) NP = NUMROC( (2), DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IPB = 1 IPPRODUCT = 1 + DESCA( NB_ ) IPW = 1 + 2*DESCA( NB_ ) * LDA = DESCA( LLD_ ) * * Regenerate A * IF( LSAME( SYMM, 'S' )) THEN CALL PDBMATGEN( ICTXT, UPLO, 'D', BW, BW, N, BW+1, $ DESCA( NB_ ), A, DESCA( LLD_ ), 0, 0, $ IASEED, MYROW, MYCOL, NPROW, NPCOL ) ELSE * CALL PDBMATGEN( ICTXT, 'N', UPLO, BWL, BWU, N, $ DESCA( MB_ ), DESCA( NB_ ), A, $ DESCA( LLD_ ), 0, 0, IASEED, MYROW, $ MYCOL, NPROW, NPCOL ) ENDIF IF( LSAME( UPLO, 'U' ) ) THEN * * * Matrix formed above has the diagonals shifted from what was * input to the tridiagonal routine. Shift them back. * * Send elements to neighboring processors * IF( MYCOL.LT.NPCOL-1 ) THEN CALL DGESD2D( ICTXT, 1, 1, $ A( START+( DESCA( NB_ )-1 )*LDA ), $ LDA, MYROW, MYCOL+1 ) ENDIF * * Shift local elements * DO 230 I=DESCA( NB_ )-1,0,-1 A( START+(I+1)*LDA ) = A( START+(I)*LDA ) 230 CONTINUE * * Receive elements from neighboring processors * IF( MYCOL.GT.0 ) THEN CALL DGERV2D( ICTXT, 1, 1, A( START), LDA, $ MYROW, MYCOL-1 ) ENDIF * ENDIF * * Loop over the rhs * RESID = 0.0 * DO 40 J = 1, NRHS * * Multiply A * current column of X * * CALL PDPBDCMV( BW+1, BW, UPLO, N, A, 1, DESCA, $ 1, X( 1 + (J-1)*DESCX( LLD_ )), 1, DESCX, $ WORK( IPPRODUCT ), WORK( IPW ), (BW+2)*BW, INFO ) * * * Regenerate column of B * CALL PDMATGEN( DESCX( CTXT_ ), 'No', 'No', DESCX( M_ ), $ DESCX( N_ ), DESCX( MB_ ), DESCX( NB_ ), $ WORK( IPB ), DESCX( LLD_ ), DESCX( RSRC_ ), $ DESCX( CSRC_ ), IBSEED, 0, NQ, J-1, 1, MYCOL, $ MYROW, NPCOL, NPROW ) * * Figure || A * X - B || & || X || * CALL PDAXPY( N, -ONE, WORK( IPPRODUCT ), 1, 1, DESCX, 1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * CALL PDNRM2( N, NORMX, $ X, 1, J, DESCX, 1 ) * CALL PDNRM2( N, RESID1, $ WORK( IPB ), 1, 1, DESCX, 1 ) * * * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N) * RESID1 = RESID1 / ( NORMX*DIVISOR ) * RESID = MAX( RESID, RESID1 ) * 40 CONTINUE * RETURN * * End of PDTLASCHK * END scalapack-1.8.0/TESTING/LIN/pcgbmv1.f0000640000175000017500000007612010363532303016567 0ustar muammarmuammar SUBROUTINE PCGBDCMV( LDBW, BWL, BWU, TRANS, N, A, JA, DESCA, NRHS, $ B, IB, DESCB, X, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LDBW, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ), X( * ) * .. * * * Purpose * ======= * * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDBTRF and this is stored in AF. If a linear system * is to be solved using PCDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, DL_N_M, DL_N_N, DL_P_M, DL_P_N, DU_N_M, $ DU_N_N, DU_P_M, DU_P_N, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM2, IDUM3, J, JA_NEW, $ LLDA, LLDB, MAX_BW, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, STORE_M_B, STORE_N_A INTEGER NUMROC_SIZE * .. * .. Local Arrays .. INTEGER PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * ICTXT = DESCA( CTXT_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) LLDA = DESCA( LLD_ ) STORE_N_A = DESCA( N_ ) LLDB = DESCB( LLD_ ) STORE_M_B = DESCB( M_ ) * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCDBDCMV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDBDCMV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBDCMV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * * * Zero out solution to use to accumulate answer * NUMROC_SIZE = $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL) * DO 2279 J=1,NRHS DO 4502 I=1,NUMROC_SIZE X( (J-1)*LLDB + I ) = CZERO 4502 CONTINUE 2279 CONTINUE * DO 5642 I=1, (MAX_BW+2)*MAX_BW WORK( I ) = CZERO 5642 CONTINUE * * Begin main code * * ************************************** * IF ( LSAME( TRANS, 'N' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * * Use main partition in each processor to multiply locally * CALL CGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWL-DL_N_N ), 1 ) * CALL CTRMV( 'U', 'N', 'N', BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 10 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 10 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BWL, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWL, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 20 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 20 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BWU, $ A( 1 ), LLDA-1, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 30 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 30 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BWU, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWU+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * ************************************** * IF ( LSAME( TRANS, 'C' ) ) THEN * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN * DL_P_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DL_P_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) * DU_P_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) DU_P_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * DL_N_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) DL_N_N= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) * DU_N_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) DU_N_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) ENDIF * * IF( MYCOL .GT. 0 ) THEN * ...must send triangle in lower half of matrix to left * * Transpose triangle in preparation for sending * CALL CLATCPY( 'L', BWU, BWU, A( OFST+1 ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to left * CALL CTRSD2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL-1 ) * ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN * ...must send triangle in upper half of matrix to right * * Transpose triangle in preparation for sending * CALL CLATCPY( 'U', BWL, BWL, $ A( LLDA*( NUMROC_SIZE-BWL )+1+BWU+BWL ), $ LLDA-1, WORK( 1 ), MAX_BW ) * * Send the triangle to neighboring processor to right * CALL CTRSD2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), $ MAX_BW, MYROW, MYCOL+1 ) * ENDIF * * Use main partition in each processor to multiply locally * CALL CGBMV( TRANS, NUMROC_SIZE, NUMROC_SIZE, BWL, BWU, CONE, $ A( OFST+1 ), LLDA, B(PART_OFFSET+1), 1, CZERO, $ X( PART_OFFSET+1 ), 1 ) * * * IF ( MYCOL .LT. NPCOL-1 ) THEN * * Do the multiplication of the triangle in the lower half * CALL CCOPY( DL_N_N, $ B( NUMROC_SIZE-DL_N_N+1 ), $ 1, WORK( MAX_BW*MAX_BW+1+BWU-DL_N_N ), 1 ) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'U', 'N', $ BWU, BWU, $ WORK( 1 ), MAX_BW, MYROW, MYCOL+1 ) * CALL CTRMV( 'U', 'N', 'N', BWU, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Zero out extraneous elements caused by TRMV if any * IF( DL_N_M .GT. DL_N_N ) THEN DO 40 I = DL_N_M-DL_N_N, DL_N_M WORK( MAX_BW*MAX_BW+I ) = 0 40 CONTINUE ENDIF * * Send the result to the neighbor * CALL CGESD2D( ICTXT, BWU, 1, $ WORK( MAX_BW*MAX_BW+1 ), BWU, MYROW, MYCOL+1 ) * ENDIF * IF ( MYCOL .GT. 0 ) THEN * DO 50 I=1, MAX_BW*( MAX_BW+2 ) WORK( I ) = CZERO 50 CONTINUE * * Do the multiplication of the triangle in the upper half * * Copy vector to workspace * CALL CCOPY( DU_P_N, B( 1 ), 1, $ WORK( MAX_BW*MAX_BW+1 ), 1) * * Receive the triangle prior to multiplying by it. * CALL CTRRV2D(ICTXT, 'L', 'N', $ BWL, BWL, $ WORK( 1 ), MAX_BW, MYROW, MYCOL-1 ) * CALL CTRMV( $ 'L', $ 'N', $ 'N', BWL, $ WORK( 1 ), MAX_BW, $ WORK( MAX_BW*MAX_BW+1 ), 1 ) * * Zero out extraneous results from TRMV if any * IF( DU_P_N .GT. DU_P_M ) THEN DO 60 I=1, DU_P_N-DU_P_M WORK( MAX_BW*MAX_BW+I ) = 0 60 CONTINUE ENDIF * * Send result back * CALL CGESD2D( ICTXT, BWL, 1, WORK(MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL-1 ) * * Receive vector result from neighboring processor * CALL CGERV2D( ICTXT, BWU, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWU, MYROW, MYCOL-1 ) * * Do addition of received vector * CALL CAXPY( BWU, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( 1 ), 1 ) * ENDIF * * * IF( MYCOL .LT. NPCOL-1 ) THEN * * Receive returned result * CALL CGERV2D( ICTXT, BWL, 1, WORK( MAX_BW*MAX_BW+1 ), $ BWL, MYROW, MYCOL+1 ) * * Do addition of received vector * CALL CAXPY( BWL, CONE, $ WORK( MAX_BW*MAX_BW+1 ), 1, $ X( NUMROC_SIZE-BWL+1 ), 1) * ENDIF * * ENDIF * * End of LSAME if * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * RETURN * * End of PCBhBMV1 * END scalapack-1.8.0/TESTING/LIN/pdluinfo.f0000640000175000017500000004107610363532303017052 0ustar muammarmuammar SUBROUTINE PDLUINFO( SUMMRY, NOUT, NMAT, MVAL, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, $ NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, EST, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL EST CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDNVAL ), NBRVAL( LDNBRVAL ), $ NBVAL( LDNBVAL ), NRVAL( LDNRVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ WORK( * ) * .. * * Purpose * ======= * * PDLUINFO gets needed startup information for LU factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M and N. * * MVAL (global output) INTEGER array, dimension (LDNVAL) * The values of M (number of rows in matrix) to run the code * with. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * M and N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * EST (global output) LOGICAL * Flag indicating if condition estimation and iterative * refinement routines are to be exercised. * * WORK (local workspace) INTEGER array of dimension >= * MAX( 6, 2*LDNVAL+LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL ) * Used to pack all input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='LU.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Read the flag that indicates whether to test the condition * estimation and iterative refinement routines. * READ( NIN, FMT = * ) EST * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NNR WORK( 4 ) = NNBR WORK( 5 ) = NGRIDS IF( EST ) THEN WORK( 6 ) = 1 ELSE WORK( 6 ) = 0 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 6, 1, WORK, 6 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Ax=b by LU factorization.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision LU factorization '// $ 'and solve.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| / '// $ '(||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the '// $ 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'LU time : Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'M ', ( MVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( MVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 6, 1, WORK, 6, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NNR = WORK( 3 ) NNBR = WORK( 4 ) NGRIDS = WORK( 5 ) IF( WORK( 6 ).EQ.1 ) THEN EST = .TRUE. ELSE EST = .FALSE. END IF * I = 2*NMAT + NNB + NNR + NNBR + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDLUINFO * END scalapack-1.8.0/TESTING/LIN/pzdtinfo.f0000640000175000017500000004436110363532303017067 0ustar muammarmuammar SUBROUTINE PZDTINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZDTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZDTINFO * END scalapack-1.8.0/TESTING/LIN/pzgetrrv.f0000640000175000017500000003010210363532303017101 0ustar muammarmuammar SUBROUTINE PZGETRRV( M, N, A, IA, JA, DESCA, IPIV, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZGETRRV reforms sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from the * triangular matrices L and U returned by PZGETRF. It multiplies * an upper triangular matrix stored in the upper triangle of sub( A ) * times the unit lower triangular matrix stored in the lower triangle. * To accomplish this, the routine basically performs the PZGETRF * routine in reverse. * * It computes L*U first, and then apply P: P*L*U => sub( A ). In the * J-th loop, the block column (or column panel), which has the lower * triangular unit matrix L is multiplied with the block row (or row * panel), which contains the upper triangular matrix U. * * ( L1 ) ( 0 0 ) ( L1*U1 L1*U2 ) * A` = L * U + A` = ( ) * (U1 U2) + ( ) = ( ) * ( L2 ) ( 0 A`) ( L2*U1 L2*U2+A` ) * * where L1 is a lower unit triangular matrix and U1 is an upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the distributed matrix sub( A ) * contains the the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. On exit, the original distributed matrix sub( A ) * is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array of dimension (LWORK) * LWORK >= MpA0 * NB_A + NqA0 * MB_A, where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of columns of L, and a block of * rows of U. INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J, $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ), $ DESCU( DLEN_ ), IDUM( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, PB_TOPGET, PB_TOPSET, $ PZGEMM, PZLACPY, PZLAPIV, PZLASET * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IPL = 1 IPU = IPL + MP * DESCA( NB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Define array descriptors for L and U * MN = MIN( M, N ) IL = MAX( ( ( IA+MN-2 ) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) JL = MAX( ( ( JA+MN-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) IAROW = INDXG2P( IL, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) * CALL DESCSET( DESCL, IA+M-IL, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, MP ) ) * CALL DESCSET( DESCU, DESCA( MB_ ), JA+N-JL, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) + DESCA( MB_ ) ) * * DO 10 J = JL, JN+1, -DESCA( NB_ ) * JB = MIN( JA+MN-J, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PZLACPY( 'Lower', M-IL+IA, JB, A, IL, J, DESCA, $ WORK( IPL ), 1, 1, DESCL ) CALL PZLASET( 'Upper', M-IL+IA, JB, ZERO, ONE, WORK( IPL ), $ 1, 1, DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PZLACPY( 'Upper', JB, JA+N-J, A, IL, J, DESCA, $ WORK( IPU ), 1, 1, DESCU ) CALL PZLASET( 'Lower', JB-1, JA+N-J, ZERO, ZERO, $ WORK( IPU ), 2, 1, DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PZLASET( 'Lower', IA+M-IL-1, JB, ZERO, ZERO, A, IL+1, J, $ DESCA ) * * Zero the upper triangular piece of the current block. * CALL PZLASET( 'Upper', JB, JA+N-J, ZERO, ZERO, A, IL, J, $ DESCA ) * * Update the matrix sub( A ). * CALL PZGEMM( 'No transpose', 'No transpose', IA+M-IL, $ JA+N-J, JB, ONE, WORK( IPL ), 1, 1, DESCL, $ WORK( IPU ), 1, 1, DESCU, ONE, A, IL, J, DESCA ) * IL = IL - DESCA( MB_ ) DESCL( M_ ) = DESCL( M_ ) + DESCL( MB_ ) DESCL( RSRC_ ) = MOD( DESCL( RSRC_ ) + NPROW - 1, NPROW ) DESCL( CSRC_ ) = MOD( DESCL( CSRC_ ) + NPCOL - 1, NPCOL ) DESCU( N_ ) = DESCU( N_ ) + DESCU( NB_ ) DESCU( RSRC_ ) = DESCL( RSRC_ ) DESCU( CSRC_ ) = DESCL( CSRC_ ) * 10 CONTINUE * * Handle first block separately * JB = MIN( JN-JA+1, DESCA( NB_ ) ) * * Copy unit lower triangular part of sub( A ) into WORK * CALL PZLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPL ), $ 1, 1, DESCL ) CALL PZLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPL ), 1, 1, $ DESCL ) * * Copy upper triangular part of sub( A ) into WORK(IPU) * CALL PZLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPU ), 1, $ 1, DESCU ) CALL PZLASET( 'Lower', JB-1, N, ZERO, ZERO, WORK( IPU ), 2, 1, $ DESCU ) * * Zero the strict lower triangular piece of the current block. * CALL PZLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Zero the upper triangular piece of the current block. * CALL PZLASET( 'Upper', JB, N, ZERO, ZERO, A, IA, JA, DESCA ) * * Update the matrix sub( A ). * CALL PZGEMM( 'No transpose', 'No transpose', M, N, JB, ONE, $ WORK( IPL ), 1, 1, DESCL, WORK( IPU ), 1, 1, $ DESCU, ONE, A, IA, JA, DESCA ) * * Apply pivots so that sub( A ) = P*L*U * CALL PZLAPIV( 'Backward', 'Row', 'Col', MIN( M, N ), N, A, IA, JA, $ DESCA, IPIV, IA, 1, DESCIP, IDUM ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZGETRRV * END scalapack-1.8.0/TESTING/LIN/psdtinfo.f0000640000175000017500000004435610363532303017064 0ustar muammarmuammar SUBROUTINE PSDTINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSDTINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW NBW = 1 IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSDTINFO * END scalapack-1.8.0/TESTING/LIN/psgbinfo.f0000640000175000017500000004433510363532303017042 0ustar muammarmuammar SUBROUTINE PSGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PSGBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'T' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSGBINFO * END scalapack-1.8.0/TESTING/LIN/pdlsdriver.f0000640000175000017500000013506410363532303017411 0ustar muammarmuammar PROGRAM PDLSDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * Purpose * ======= * * PDLSDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK (full rank) Least Squares routines. This test driver solves * full-rank least square problems. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 17 lines: * 'ScaLapack LS solve input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LS.out' output file name (if any) * 6 device out * 4 number of problems sizes * 55 17 31 201 values of M * 5 71 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 3 number of NRHS's * 2 3 5 values of NRHS * 2 number of NBRHS's * 1 2 values of NBRHS * 7 number of process grids (ordered P & Q) * 1 2 1 4 2 3 8 values of P * 7 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL DOUBLE PRECISION ONE, ZERO PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0 ) PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, TPSD CHARACTER TRANS CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, II, IMIDPAD, $ INFO, IPA, IPB, IPOSTPAD, IPREPAD, IPW, IPW2, $ IPX, ISCALE, ITRAN, ITYPE, J, JJ, K, KFAIL, KK, $ KPASS, KSKIP, KTESTS, LCM, LCMP, LTAU, LWF, $ LWORK, LWS, M, MNP, MNRHSP, MP, MQ, MYCOL, $ MYROW, N, NB, NBRHS, NCOLS, NGRIDS, NMAT, NNB, $ NNBR, NNR, NNRHSQ, NOUT, NP, NPCOL, NPROCS, $ NPROW, NROWS, NQ, NRHS, NRHSP, NRHSQ, WORKSIZ REAL THRESH DOUBLE PRECISION ADDFAC, ADDS, ANORM, BNORM, MULFAC, MULTS, $ NOPS, SRESID, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCW( LLD_ ), $ DESCX( DLEN_ ), IERR( 2 ), MVAL( NTESTS ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), RESULT( 2 ), $ WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDGELS, PDGEMM, PDLACPY, $ PDLSINFO, PDMATGEN, PDNRM2, PDSCAL, $ PDQRT13, PDQRT16, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PDLANGE, PDQRT14, PDQRT17 EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PDLANGE, $ PDQRT14, PDQRT17 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Data Statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * IASEED = 100 IBSEED = 200 CALL PDLSINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, $ NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, NTESTS, $ NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 90 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 90 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( ( MYROW.GE.NPROW ).OR.( MYCOL.GE.NPCOL ) ) $ GO TO 90 * DO 80 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 80 END IF * * Loop over different blocking sizes * DO 70 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 70 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) MQ = NUMROC( M, NB, MYCOL, 0, NPCOL ) NP = NUMROC( N, NB, MYROW, 0, NPROW ) MNP = MAX( MP, NP ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 70 END IF * DO 60 ISCALE = 1, 3 * ITYPE = ISCALE * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPX * WORKSIZ = NQ + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPX+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, $ 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 70 END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Generate the matrix A and calculate its 1-norm * CALL PDQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, MEM( IPW ) ) * IF( CHECK ) THEN CALL PDCHEKPAD( ICTXT, 'PDQRT13', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQRT13', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * DO 50 ITRAN = 1, 2 * IF( ITRAN.EQ.1 ) THEN NROWS = M NCOLS = N TRANS = 'N' TPSD = .FALSE. ELSE NROWS = N NCOLS = M TRANS = 'T' TPSD = .TRUE. END IF * * Loop over the different values for NRHS * DO 40 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 30 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0, $ NPROW ) NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0, $ NPCOL ) * * Define Array descriptor for rhs MAX(M,N)xNRHS * CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MNP ) + IMIDPAD, $ IERR( 1 ) ) IF( TPSD ) THEN CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, MP ) + $ IMIDPAD, IERR( 2 ) ) ELSE CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS, $ 0, 0, ICTXT, MAX( 1, NP ) + $ IMIDPAD, IERR( 2 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, $ 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Check for enough memory * IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + $ IPREPAD IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD + $ IPREPAD WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Generation', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate RHS * IF( TPSD ) THEN CALL PDMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ MP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) ELSE CALL PDMATGEN( ICTXT, 'No', 'No', $ DESCW( M_ ), DESCW( N_ ), $ DESCW( MB_ ), DESCW( NB_ ), $ MEM( IPW ), DESCW( LLD_ ), $ DESCW( RSRC_ ), $ DESCW( CSRC_ ), IBSEED, 0, $ NP, 0, NRHSQ, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PDFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PDFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF END IF * DO 10 JJ = 1, NRHS CALL PDNRM2( NCOLS, BNORM, MEM( IPW ), 1, $ JJ, DESCW, 1 ) IF( BNORM.GT.ZERO ) $ CALL PDSCAL( NCOLS, ONE / BNORM, $ MEM( IPW ), 1, JJ, DESCW, $ 1 ) 10 CONTINUE * CALL PDGEMM( TRANS, 'N', NROWS, NRHS, NCOLS, $ ONE, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ), 1, 1, DESCW, ZERO, $ MEM( IPX ), 1, 1, DESCX ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'Generation', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'Generation', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PDCHEKPAD( ICTXT, 'Generation', $ MP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PDCHEKPAD( ICTXT, 'Generation', $ NP, NRHSQ, $ MEM( IPW-IPREPAD ), $ DESCW( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Allocate space for copy of rhs * IPB = IPW * IF( TPSD ) THEN CALL DESCINIT( DESCB, N, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, $ IERR( 1 ) ) ELSE CALL DESCINIT( DESCB, M, NRHS, NB, $ NBRHS, 0, 0, ICTXT, $ MAX( 1, MP ) + IMIDPAD, $ IERR( 1 ) ) END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * IPW = IPB + DESCB( LLD_ )*NRHSQ + $ IPOSTPAD + IPREPAD * END IF * * Calculate the amount of workspace for PDGELS * IF( M.GE.N ) THEN LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0, $ NPCOL ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, $ ( MP + NRHSQ ) * NB ) + NB*NB ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LTAU = NUMROC( MIN(M,N), NB, MYROW, 0, $ NPROW ) LWF = NB * ( MP + NQ + NB ) LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP + $ MAX( NQ + NUMROC( NUMROC( N, NB, 0, $ 0, NPROW ), NB, 0, 0, LCMP ), $ NRHSQ ) ) * NB ) + NB*NB END IF * LWORK = LTAU + MAX( LWF, LWS ) WORKSIZ = LWORK + IPOSTPAD * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'solve', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Make the copy of the right hand side * CALL PDLACPY( 'All', NROWS, NRHS, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB ) * IF( TPSD ) THEN CALL PDFILLPAD( ICTXT, NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) ELSE CALL PDFILLPAD( ICTXT, MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) END IF CALL PDFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Solve the LS or overdetermined system * CALL PDGELS( TRANS, M, N, NRHS, MEM( IPA ), $ 1, 1, DESCA, MEM( IPX ), 1, 1, $ DESCX, MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGELS', MP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGELS', MNP, $ NRHSQ, MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGELS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Regenerate A in place for testing and next * iteration * CALL PDQRT13( ISCALE, M, N, MEM( IPA ), 1, 1, $ DESCA, ANORM, IASEED, $ MEM( IPW ) ) * * check the solution to rhs * IF( CHECK ) THEN * * Am I going to call PDQRT17 ? * IF( ( M.GE.N .AND. ( .NOT.TPSD ) ) .OR. $ ( M.LT.N .AND. TPSD ) ) THEN * * Call PDQRT17 first, A, X, and B remain * unchanged. Solving LS system * * Check amount of memory for PDQRT17 * IF( TPSD ) THEN WORKSIZ = NP*NRHSQ + NRHSP*MQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ MAX( NQ, MAX( MQ, NRHSQ ) ) + $ IPOSTPAD ELSE WORKSIZ = MP*NRHSQ + NRHSP*NQ IPW2 = IPW + WORKSIZ WORKSIZ = WORKSIZ + $ MAX( NQ, NRHSQ ) + $ IPOSTPAD END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * RESULT( 2 ) = PDQRT17( TRANS, 1, M, N, $ NRHS, $ MEM( IPA ), $ 1, 1, DESCA, $ MEM( IPX ), 1, $ 1, DESCX, $ MEM( IPB ), $ 1, 1, DESCB, $ MEM( IPW ), $ MEM( IPW2 ) ) SRESID = RESULT( 2 ) * CALL PDCHEKPAD( ICTXT, 'PDQRT17', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQRT17', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PDCHEKPAD( ICTXT, 'PDQRT17', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PDCHEKPAD( ICTXT, 'PDQRT17', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PDCHEKPAD( ICTXT, 'PDQRT17', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Call PDQRT16, B will be destroyed. * IF( TPSD ) THEN WORKSIZ = MP + IPOSTPAD ELSE WORKSIZ = NQ + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'MEMORY', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PDQRT16( TRANS, M, N, NRHS, $ MEM( IPA ), 1, 1, DESCA, $ MEM( IPX ), 1, 1, DESCX, $ MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ), RESULT( 1 ) ) * CALL PDCHEKPAD( ICTXT, 'PDQRT16', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQRT16', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) IF( TPSD ) THEN CALL PDCHEKPAD( ICTXT, 'PDQRT16', $ NP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) ELSE CALL PDCHEKPAD( ICTXT, 'PDQRT16', $ MP, NRHSQ, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF CALL PDCHEKPAD( ICTXT, 'PDQRT16', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Call PDQRT14 * IF( ( M.GE.N .AND. TPSD ) .OR. $ ( M.LT.N .AND. ( .NOT.TPSD ) ) ) THEN * IPW = IPB * IF( TPSD ) THEN * NNRHSQ = NUMROC( N+NRHS, NB, MYCOL, $ 0, NPCOL ) LTAU = NUMROC( MIN( M, N+NRHS ), NB, $ MYCOL, 0, NPCOL ) LWF = NB * ( NB + MP + NNRHSQ ) WORKSIZ = MP * NNRHSQ + LTAU + LWF + $ IPOSTPAD * ELSE * MNRHSP = NUMROC( M+NRHS, NB, MYROW, $ 0, NPROW ) LTAU = NUMROC( MIN( M+NRHS, N ), NB, $ MYROW, 0, NPROW ) LWF = NB * ( NB + MNRHSP + NQ ) WORKSIZ = MNRHSP * NQ + LTAU + LWF + $ IPOSTPAD * END IF * * Check for adequate memory for problem * size * IERR( 1 ) = 0 IF( ( IPW+WORKSIZ ).GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'MEMORY', ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PDFILLPAD( ICTXT, $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * * Solve underdetermined system * RESULT( 2 ) = PDQRT14( TRANS, M, N, $ NRHS, $ MEM( IPA ), 1, $ 1, DESCA, $ MEM( IPX ), $ 1, 1, DESCX, $ MEM( IPW ) ) SRESID = RESULT( 2 ) * CALL PDCHEKPAD( ICTXT, 'PDQRT14', $ MP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQRT14', $ MNP, NRHSQ, $ MEM( IPX-IPREPAD ), $ DESCX( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDQRT14', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Print information about the tests that * did not pass the threshold. * PASSED = 'PASSED' DO 20 II = 1, 2 IF( ( RESULT( II ).GE.THRESH ) .AND. $ ( RESULT( II )-RESULT( II ).EQ.0.0E+0 $ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )TRANS, $ M, N, NRHS, NB, ITYPE, II, $ RESULT( II ) KFAIL = KFAIL + 1 PASSED = 'FAILED' ELSE KPASS = KPASS + 1 END IF 20 CONTINUE * ELSE * * By-pass the solve check * KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock * timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN ADDFAC = 1 MULFAC = 1 IF( M.GE.N ) THEN * * NOPS = DOPLA( 'DGEQRF', M, N, 0, 0, * NB ) + DOPLA( 'DORMQR', M, NRHS, N, * 0, NB ) * MULTS = N*( ( ( 23.D0 / 6.D0 )+M+N / $ 2.D0 )+ N*( M-N / 3.D0 ) ) + $ N*NRHS*( 2.D0*M+2.D0-N ) ADDS = N*( ( 5.D0 / 6.D0 )+N* $ ( 1.D0 / 2.D0+( M-N / 3.D0 ) ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) ELSE * * NOPS = DOPLA( 'DGELQF', M, N, 0, 0, * NB ) + DOPLA( 'DORMLQ', M, * NRHS, N, 0, NB ) * MULTS = M*( ( ( 29.D0 / 6.D0 )+2.D0*N-M $ / 2.D0 )+M*( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+2.D0-N ) ADDS = M*( ( 5.D0 / 6.D0 )+M / 2.D0+M* $ ( N-M / 3.D0 ) ) $ + N*NRHS*( 2.D0*M+1.D0-N ) END IF NOPS = ADDFAC*ADDS + MULFAC*MULTS * * Calculate total megaflops, for WALL and * CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'WALL', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, WTIME( 1 ), $ TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) $ 'CPU ', TRANS, M, N, NB, NRHS, $ NBRHS, NPROW, NPCOL, CTIME( 1 ), $ TMFLOPS, PASSED END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE 60 CONTINUE 70 CONTINUE 80 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 90 CONTINUE * * Print out ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ', $ 'LS Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ', $ '--------- -------- ------' ) 9993 FORMAT( A4, 3X, A1, 3X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ I5, 1X, I5, 1X, F9.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( ' TRANS=''', A1, ''', M=', I5, ', N=', I5, ', NRHS=', I4, $ ', NB=', I4, ', type', I2, ', test(', I2, ')=', G12.5 ) * STOP * * End of PDLSDRIVER * END scalapack-1.8.0/TESTING/LIN/pzgerqrv.f0000640000175000017500000002376510363532303017117 0ustar muammarmuammar SUBROUTINE PZGERQRV( M, N, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGERQRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from R, Q * computed by PZGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, sub( A ) contains the the factors R and Q computed * by PZGERQF. On exit, the original matrix is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(M_A). * This array contains the scalar factors TAU of the elementary * reflectors computed by PZGERQF. TAU is tied to the dis- * tributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK = MB_A * ( Mp0 + 2*Nq0 + MB_A ), where * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ) * NB_A, * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ) * MB_A, * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), * NPROW ), * IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), * NPCOL ), * and NUMROC, INDXG2P are ScaLAPACK tool functions. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICOFF, ICTXT, IIA, IN, $ IPT, IPV, IPW, JJA, JV, K, MYCOL, MYROW, NPCOL, $ NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PB_TOPGET, $ PB_TOPSET, PZLACPY, PZLARFB, PZLARFT, $ PZLASET * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * K = MIN( M, N ) IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) * ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+M-K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IPV = 1 IPT = IPV + NQ * DESCA( MB_ ) IPW = IPT + DESCA( MB_ ) * DESCA( MB_ ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * CALL DESCSET( DESCV, DESCA( MB_), N + ICOFF, DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * * Handle first block separately * IB = IN - IA - M + K + 1 JV = 1 + N - K + ICOFF * * Compute upper triangular matrix T * CALL PZLARFT( 'Backward', 'Rowwise', N-M+IN-IA+1, IB, A, IA+M-K, $ JA, DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', IB, N-M+IN-IA+1, A, IA+M-K, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PZLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict lower triangular part of sub( A ) to get block * column of R * CALL PZLASET( 'All', IB, N-K, ZERO, ZERO, A, IA+M-K, JA, $ DESCA ) CALL PZLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, IA+M-K+1, $ JA+N-K, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', IN-IA+1, N-M+IN-IA+1, IB, WORK( IPV ), 1, $ ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * * Loop over the remaining row blocks * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) JV = 1 + N - M + I - IA + ICOFF * * Compute upper triangular matrix T * CALL PZLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace * CALL PZLACPY( 'All', IB, N-M+I+IB-IA, A, I, JA, DESCA, $ WORK( IPV ), 1, ICOFF+1, DESCV ) CALL PZLASET( 'Upper', IB, IB, ZERO, ONE, WORK( IPV ), 1, JV, $ DESCV ) * * Zeoes the strict Lower triangular part of sub( A ) to get * block column of R * CALL PZLASET( 'All', IB, N-M+I-IA, ZERO, ZERO, A, I, JA, $ DESCA ) CALL PZLASET( 'Lower', IB-1, IB, ZERO, ZERO, A, I+1, $ JA+N-M+I-IA, DESCA ) * * Apply block Householder transformation * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I+IB-IA, N-M+I+IB-IA, IB, WORK( IPV ), $ 1, ICOFF+1, DESCV, WORK( IPT ), A, IA, JA, DESCA, $ WORK( IPW ) ) * DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + 1, NPROW ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZGERQRV * END scalapack-1.8.0/TESTING/LIN/pspbdriver.f0000640000175000017500000007371510363532303017417 0ustar muammarmuammar PROGRAM PSPBDRIVER * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Purpose * ======= * * PSPBDRIVER is a test program for the * ScaLAPACK Band Cholesky routines corresponding to the options * indicated by SPB. This test driver performs an * A = L*L**T factorization * and solves a linear system with the factors for 1 or more RHS. * * The program must be driven by a short data file. * Here's an example file: *'ScaLAPACK, Version 1.2, banded linear systems input file' *'PVM.' *'' output file name (if any) *6 device out *'L' define Lower or Upper *9 number of problem sizes *1 5 17 28 37 121 200 1023 2048 3073 values of N *6 number of bandwidths *1 2 4 10 31 64 values of BW *1 number of NB's *-1 3 4 5 values of NB (-1 for automatic choice) *1 number of NRHS's (must be 1) *8 values of NRHS *1 number of NBRHS's (ignored) *1 values of NBRHS (ignored) *6 number of process grids *1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns" *3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 6200000. * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * All arrays used by ScaLAPACK routines are allocated from * this array and referenced by pointers. The integer IPB, * for example, is a pointer to the starting element of MEM for * the solution vector(s) B. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. Parameters .. INTEGER TOTMEM PARAMETER ( TOTMEM = 3000000 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * REAL ZERO INTEGER MEMSIZ, NTESTS, REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER BW, BW_NUM, FILLIN_SIZE, FREE_PTR, H, HH, I, $ IAM, IASEED, IBSEED, ICTXT, ICTXTB, IERR_TEMP, $ IMIDPAD, INFO, IPA, IPB, IPOSTPAD, IPREPAD, $ IPW, IPW_SIZE, IPW_SOLVE, IPW_SOLVE_SIZE, $ IP_DRIVER_W, IP_FILLIN, J, K, KFAIL, KPASS, $ KSKIP, KTESTS, MYCOL, MYRHS_SIZE, MYROW, N, NB, $ NBW, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROCS_REAL, NPROW, NQ, NRHS, $ N_FIRST, N_LAST, WORKSIZ REAL ANORM, SRESID, THRESH DOUBLE PRECISION NOPS, NOPS2, TMFLOPS, TMFLOPS2 * .. * .. Local Arrays .. INTEGER BWVAL( NTESTS ), DESCA( 7 ), DESCA2D( DLEN_ ), $ DESCB( 7 ), DESCB2D( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSBMATGEN, $ PSCHEKPAD, PSFILLPAD, PSMATGEN, PSPBINFO, $ PSPBLASCHK, PSPBTRF, PSPBTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC LOGICAL LSAME REAL PSLANGE EXTERNAL LSAME, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * * * * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 * CALL PSPBINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NBW, $ BWVAL, NTESTS, NNB, NBVAL, NTESTS, NNR, NRVAL, $ NTESTS, NNBR, NBRVAL, NTESTS, NGRIDS, PVAL, NTESTS, $ QVAL, NTESTS, THRESH, MEM, IAM, NPROCS ) * CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * * * Define transpose process grid * CALL BLACS_GET( -1, 0, ICTXTB ) CALL BLACS_GRIDINIT( ICTXTB, 'Column-major', NPCOL, NPROW ) * * Go to bottom of process grid loop if this case doesn't use my * process * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.LT.0 .OR. MYCOL.LT.0 ) THEN GO TO 50 ENDIF * DO 40 J = 1, NMAT * IERR( 1 ) = 0 * N = NVAL( J ) * * Make sure matrix information is correct * IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'size' KSKIP = KSKIP + 1 GO TO 40 END IF * * DO 45 BW_NUM = 1, NBW * IERR( 1 ) = 0 * BW = BWVAL( BW_NUM ) IF( BW.LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Band', 'bw', BW IERR( 1 ) = 1 END IF * IF( BW.GT.N-1 ) THEN IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 45 END IF * DO 30 K = 1, NNB * IERR( 1 ) = 0 * NB = NBVAL( K ) IF( NB.LT.0 ) THEN NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 ) $ + BW NB = MAX( NB, 2*BW ) NB = MIN( N, NB ) END IF * * Make sure NB is legal * IERR( 1 ) = 0 IF( NB.LT.MIN( 2*BW, N ) ) THEN IERR( 1 ) = 1 ENDIF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( (BW+1), (BW+1), $ MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( CHECK ) THEN IPREPAD = ((BW+1)+10) IMIDPAD = 10 IPOSTPAD = ((BW+1)+10) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA2D, (BW+1), N, $ (BW+1), NB, 0, 0, $ ICTXT,((BW+1)+10), IERR( 1 ) ) * * Convert this to 1D descriptor * DESCA( 1 ) = 501 DESCA( 3 ) = N DESCA( 4 ) = NB DESCA( 5 ) = 0 DESCA( 2 ) = ICTXT DESCA( 6 ) = ((BW+1)+10) DESCA( 7 ) = 0 * IERR_TEMP = IERR( 1 ) IERR( 1 ) = 0 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * FREE_PTR = 1 IPB = 0 * * Save room for prepadding FREE_PTR = FREE_PTR + IPREPAD * IPA = FREE_PTR FREE_PTR = FREE_PTR + DESCA2D( LLD_ )* $ DESCA2D( NB_ ) $ + IPOSTPAD * * Add memory for fillin * Fillin space needs to store: * Fillin spike: * Contribution to previous proc's diagonal block of * reduced system: * Off-diagonal block of reduced system: * Diagonal block of reduced system: * FILLIN_SIZE = $ (NB+2*BW)*BW * * Claim memory for fillin * FREE_PTR = FREE_PTR + IPREPAD IP_FILLIN = FREE_PTR FREE_PTR = FREE_PTR + FILLIN_SIZE * * Workspace needed by computational routines: * IPW_SIZE = 0 * * factorization: * IPW_SIZE = BW*BW * * Claim memory for IPW * IPW = FREE_PTR FREE_PTR = FREE_PTR + IPW_SIZE * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'divide and conquer factorization', $ (FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Worksize needed for LAPRNT WORKSIZ = MAX( ((BW+1)+10), NB ) * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines. * * PSLANGE WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) ) * * PSPBLASCHK WORKSIZ = MAX( WORKSIZ, $ MAX(5,MAX(BW*(BW+2),NB))+2*NB ) END IF * FREE_PTR = FREE_PTR + IPREPAD IP_DRIVER_W = FREE_PTR FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD * * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * CALL PSBMATGEN( ICTXT, UPLO, 'B', BW, BW, N, (BW+1), NB, $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED, $ MYROW, MYCOL, NPROW, NPCOL ) * CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ ((BW+1)+10), IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) * * Calculate norm of A for residual error-checking * IF( CHECK ) THEN * ANORM = PSLANGE( '1', (BW+1), $ N, MEM( IPA ), 1, 1, $ DESCA2D, MEM( IP_DRIVER_W ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform factorization * CALL SLTIMER( 1 ) * CALL PSPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA, $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ), $ IPW_SIZE, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) 'PSPBTRF INFO=', INFO ENDIF KFAIL = KFAIL + 1 GO TO 30 END IF * IF( CHECK ) THEN * * Check for memory overwrite in factorization * CALL PSCHEKPAD( ICTXT, 'PSPBTRF', NP, $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * IERR( 1 ) = 0 * NRHS = NRVAL( HH ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0, $ ICTXTB, NB+10, IERR( 1 ) ) * * Convert this to 1D descriptor * DESCB( 1 ) = 502 DESCB( 3 ) = N DESCB( 4 ) = NB DESCB( 5 ) = 0 DESCB( 2 ) = ICTXT DESCB( 6 ) = DESCB2D( LLD_ ) DESCB( 7 ) = 0 * * reset free_ptr to reuse space for right hand sides * IF( IPB .GT. 0 ) THEN FREE_PTR = IPB ENDIF * FREE_PTR = FREE_PTR + IPREPAD IPB = FREE_PTR FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ ) $ + IPOSTPAD * * Allocate workspace for workspace in TRS routine: * IPW_SOLVE_SIZE = (BW*NRHS) * IPW_SOLVE = FREE_PTR FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE * IERR( 1 ) = 0 IF( FREE_PTR.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( FREE_PTR )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, $ IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 15 END IF * MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * * Generate RHS * CALL PSMATGEN(ICTXTB, 'No', 'No', $ DESCB2D( M_ ), DESCB2D( N_ ), $ DESCB2D( MB_ ), DESCB2D( NB_ ), $ MEM( IPB ), $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ), $ DESCB2D( CSRC_ ), $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL, $ MYROW, NPCOL, NPROW ) * IF( CHECK ) THEN CALL PSFILLPAD( ICTXTB, NB, NRHS, $ MEM( IPB-IPREPAD ), $ DESCB2D( LLD_ ), $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * CALL BLACS_BARRIER( ICTXT, 'All') CALL SLTIMER( 2 ) * * Solve linear system via factorization * CALL PSPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1, $ DESCA, MEM( IPB ), 1, DESCB, $ MEM( IP_FILLIN ), FILLIN_SIZE, $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE, $ INFO ) * CALL SLTIMER( 2 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PSPBTRS INFO=', INFO KFAIL = KFAIL + 1 PASSED = 'FAILED' GO TO 20 END IF * IF( CHECK ) THEN * * check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSPBTRS-work', $ WORKSIZ, 1, $ MEM( IP_DRIVER_W-IPREPAD ), $ WORKSIZ, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * SRESID = ZERO * CALL PSPBLASCHK( 'S', UPLO, N, BW, BW, NRHS, $ MEM( IPB ), 1, 1, DESCB2D, $ IASEED, MEM( IPA ), 1, 1, DESCA2D, $ IBSEED, ANORM, SRESID, $ MEM( IP_DRIVER_W ), WORKSIZ ) * IF( IAM.EQ.0 ) THEN IF( SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID END IF * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * END IF * 15 CONTINUE * Skipped tests jump to here to print out "SKIPPED" * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * NOPS = 0 NOPS2 = 0 * N_FIRST = NB NPROCS_REAL = ( N-1 )/NB + 1 N_LAST = MOD( N-1, NB ) + 1 * * NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)* $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 / $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) ) NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW) $ *( -1.D0 /2.D0+DBLE(BW) $ *( -1.D0 / 3.D0 ) ) ) + $ DBLE(N)*( DBLE(BW) / $ 2.D0*( 1.D0+DBLE(BW) ) ) * NOPS = NOPS + $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )* $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)* $ ( DBLE(BW)*( 2*DBLE(N)- $ ( DBLE(BW)+1.D0 ) ) ) * * * Second calc to represent actual hardware speed * * NB bw^2 flops for LLt factorization in 1st proc * NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 ) * IF ( NPROCS_REAL .GT. 1) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in last processor * NOPS2 = NOPS2 + $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) ) ENDIF * IF ( NPROCS_REAL .GT. 2) THEN * 4 NB bw^2 flops for LLt factorization and * spike calc in other processors * NOPS2 = NOPS2 + (NPROCS_REAL-2)* $ 4*( (DBLE(NB)*DBLE(BW)**2) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW ) ENDIF * * * nrhs * 4 n_first*bw flops for LLt solve in proc 1. * NOPS2 = NOPS2 + $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) ) * IF ( NPROCS_REAL .GT. 1 ) THEN * * 2*nrhs*4 n_last*bw flops for LLt solve in last. * NOPS2 = NOPS2 + $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) ) ENDIF * IF ( NPROCS_REAL .GT. 2 ) THEN * * 2 * nrhs * 4 NB*bw flops for LLt solve in others. * NOPS2 = NOPS2 + $ ( NPROCS_REAL-2)*2* $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) ) ENDIF * * Reduced system * NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW ) IF( NPROCS_REAL .GT. 1 ) THEN NOPS2 = NOPS2 + $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW ) ENDIF * * * Calculate total megaflops - factorization and/or * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 1 )+WTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS2 = NOPS2 / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS2 = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, $ N, $ BW, $ NB, NRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ TMFLOPS2, PASSED * END IF 20 CONTINUE * * 30 CONTINUE * NNB loop * 45 CONTINUE * BW[] loop * 40 CONTINUE * NMAT loop * CALL BLACS_GRIDEXIT( ICTXT ) CALL BLACS_GRIDEXIT( ICTXTB ) * 50 CONTINUE * NGRIDS DROPOUT 60 CONTINUE * NGRIDS loop * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UL N BW NB NRHS P Q L*U Time ', $ 'Slv Time MFLOPS MFLOP2 CHECK' ) 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ', $ '-------- ------ ------ ------' ) 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X, $ I5, 1X, I2, 1X, $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PSPBTRS_DRIVER * END * scalapack-1.8.0/TESTING/LIN/pcmatgen.f0000640000175000017500000004632310363532303017030 0ustar muammarmuammar SUBROUTINE PCMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCMATGEN : Parallel Complex Single precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX, pointer into the local memory to * an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB REAL DUMMY * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MOD, REAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSRAND EXTERNAL ICEIL, NUMROC, LSAME, PSRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = CMPLX( PSRAND(0), PSRAND(0) ) 30 CONTINUE IF( SYMM ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), ZERO ) DUMMY = PSRAND(0) END IF DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IF( HERM ) THEN A(IK,JK+J) = CONJG( A(IK+J,JK) ) ELSE A(IK,JK+J) = A(IK+J,JK) END IF 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 IF( SYMM ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ TWO*PSRAND(0) - ONE ) END IF JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 IF( TRAN ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ TWO*PSRAND(0) - ONE ) END IF JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 IF( HERM ) THEN A(IK,JK+J) = CMPLX( $ ABS(REAL(A(IK,JK+J)))+2*MAXMN, ZERO ) ELSE A(IK,JK+J) = CMPLX( ABS(REAL(A(IK,JK+J)))+MAXMN, $ ABS(AIMAG(A(IK,JK+J)))+ MAXMN ) END IF IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PCMATGEN * END scalapack-1.8.0/TESTING/LIN/pzgbinfo.f0000640000175000017500000004434010363532303017045 0ustar muammarmuammar SUBROUTINE PZGBINFO( SUMMRY, NOUT, TRANS, NMAT, NVAL, LDNVAL, NBW, $ BWLVAL, BWUVAL, LDBWVAL, NNB, NBVAL, LDNBVAL, $ NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, $ NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, $ WORK, IAM, NPROCS ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS CHARACTER*(*) SUMMRY INTEGER IAM, $ LDBWVAL, LDNBRVAL, LDNBVAL, LDNRVAL, LDNVAL, $ LDPVAL, LDQVAL, NGRIDS, NMAT, NNB, NNBR, NBW, $ NPROCS, NNR, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER NBRVAL( LDNBRVAL ), NBVAL( LDNBVAL ), $ NRVAL( LDNRVAL ), NVAL( LDNVAL ), $ BWLVAL( LDBWVAL),BWUVAL( LDBWVAL), $ PVAL( LDPVAL ), QVAL(LDQVAL), WORK( * ) * .. * * Purpose * ======= * * PZGBINFO get needed startup information for band factorization * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * NBW (global output) INTEGER * The number of different values that can be used for @bw@. * BWLVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BWL (number of subdiagonals in matrix) to run * the code with. * BWUVAL (global output) INTEGER array, dimension (LDNVAL) * The values of BW (number of supdiagonals in matrix) to run * the code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NNR (global output) INTEGER * The number of different values that can be used for NRHS. * * NRVAL (global output) INTEGER array, dimension(LDNRVAL) * The values of NRHS (# of Right Hand Sides) to run the code * with. * * LDNRVAL (global input) INTEGER * The maximum number of different values that can be used for * NRHS, LDNRVAL >= NNR. * * NNBR (global output) INTEGER * The number of different values that can be used for NBRHS. * * NBRVAL (global output) INTEGER array, dimension (LDNBRVAL) * The values of NBRHS (RHS blocksize) to run the code with. * * LDNBRVAL (global input) INTEGER * The maximum number of different values that can be used for * NBRHS, LDNBRVAL >= NBRVAL. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * Not used (will be returned as all 1s) since proc grid is 1D * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH, perform * factor check only if solve check fails * * WORK (local workspace) INTEGER array of dimension >= * MAX( 8, LDNVAL+2*LDNBVAL+LDNRVAL+LDNBRVAL+LDPVAL+LDQVAL * $ +3*LDNVAL) * Used to pack input arrays in order to send info in one * message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. INTEGER I, ICTXT CHARACTER*79 USRINFO DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'BLU.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get TRANS * READ( NIN, FMT = * ) TRANS * * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get bandwidths * READ( NIN, FMT = * ) NBW IF( NBW.LT.1 .OR. NBW.GT.LDBWVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'BW', LDBWVAL GO TO 20 END IF READ( NIN, FMT = * ) ( BWLVAL( I ), I = 1, NBW ) READ( NIN, FMT = * ) ( BWUVAL( I ), I = 1, NBW ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get values of NRHS * READ( NIN, FMT = * ) NNR IF( NNR.LT.1 .OR. NNR.GT.LDNRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NRHS', LDNRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NRVAL( I ), I = 1, NNR ) * * Get values of NBRHS * READ( NIN, FMT = * ) NNBR IF( NNBR.LT.1 .OR. NNBR.GT.LDNBRVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NBRHS', LDNBRVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBRVAL( I ), I = 1, NNBR ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GO TO 20 END IF * * Processor grid must be 1D so set PVAL to 1 DO 8738 I = 1, NGRIDS PVAL( I ) = 1 8738 CONTINUE * * Get values of Q * READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) I = 1 WORK( I ) = NMAT I = I+1 WORK( I ) = NBW I = I+1 WORK( I ) = NNB I = I+1 WORK( I ) = NNR I = I+1 WORK( I ) = NNBR I = I+1 WORK( I ) = NGRIDS I = I+1 IF( LSAME( TRANS, 'N' ) ) THEN WORK( I ) = 1 ELSE TRANS = 'C' WORK( I ) = 2 END IF I = I+1 * Send number of elements to be sent CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, I-1, 1 ) * Send elements CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NBW, BWLVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NBW, BWUVAL, 1, WORK( I ), 1 ) I = I + NBW CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NNR, NRVAL, 1, WORK( I ), 1 ) I = I + NNR CALL ICOPY( NNBR, NBRVAL, 1, WORK( I ), 1 ) I = I + NNBR CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', I-1, 1, WORK, I-1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK banded linear systems.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision band matrix solve ' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Solve residual = ||Ax - b|| / '// $ '(||x|| * ||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ ' Factorization residual = ||A - LU|| /'// $ ' (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'bwl, bwu : The number of diagonals '// $ 'in the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the column panels the'// $ ' matrix A is split into. [-1 for default]' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The total number of RHS to solve'// $ ' for.' WRITE( NOUT, FMT = 9999 ) $ 'NBRHS : The number of RHS to be put on '// $ 'a column of processes before going' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than'// $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'Fact time: Time in seconds to factor the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'Sol Time: Time in seconds to solve the'// $ ' system.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for factor '// $ 'and solve using sequential operation count.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOP2 : Rough estimate of speed '// $ 'using actual op count (accurate big P,N).' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL(I), I = 1, MIN(NMAT, 10) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL(I), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'bwl ', ( BWLVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWLVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'bwu ', ( BWUVAL(I), I = 1, MIN(NBW, 10) ) IF( NBW.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( BWUVAL(I), I = 11, NBW ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL(I), I = 1, MIN(NNB, 10) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL(I), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'NRHS ', ( NRVAL(I), I = 1, MIN(NNR, 10) ) IF( NNR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NRVAL(I), I = 11, NNR ) WRITE( NOUT, FMT = 9996 ) $ 'NBRHS', ( NBRVAL(I), I = 1, MIN(NNBR, 10) ) IF( NNBR.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBRVAL(I), I = 11, NNBR ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997) ( PVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL(I), I = 1, MIN(NGRIDS, 10) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL(I), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, I, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 NMAT = WORK( I ) I = I+1 NBW = WORK( I ) I = I+1 NNB = WORK( I ) I = I+1 NNR = WORK( I ) I = I+1 NNBR = WORK( I ) I = I+1 NGRIDS = WORK( I ) I = I+1 IF( WORK( I ) .EQ. 1 ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF I = I+1 * I = NMAT + NBW + NNB + NNR + NNBR + 2*NGRIDS I = I + NBW * CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NBW, WORK( I ), 1, BWLVAL, 1 ) I = I + NBW CALL ICOPY( NBW, WORK( I ), 1, BWUVAL, 1 ) I = I + NBW CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NNR, WORK( I ), 1, NRVAL, 1 ) I = I + NNR CALL ICOPY( NNBR, WORK( I ), 1, NBRVAL, 1 ) I = I + NNBR CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) * CALL BLACS_ABORT( ICTXT, 1 ) STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ': ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZGBINFO * END scalapack-1.8.0/TESTING/LIN/pclltdriver.f0000640000175000017500000011044110363532303017555 0ustar muammarmuammar PROGRAM PCLLTDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PCLLTDRIVER is the main test program for the COMPLEX * ScaLAPACK Cholesky routines. This test driver performs an * A = L*L**H or A = U**H*U factorization and solve, and optionally * performs condition estimation and iterative refinement. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'ScaLAPACK LLt factorization input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'LLT.out' output file name (if any) * 6 device out * 'U' define Lower or Upper * 1 number of problems sizes * 31 100 200 values of N * 1 number of NB's * 2 10 24 values of NB * 1 number of NRHS's * 1 values of NRHS * 1 Number of NBRHS's * 1 values of NBRHS * 1 number of process grids (ordered pairs of P & Q) * 2 values of P * 2 values of Q * 1.0 threshold * T (T or F) Test Cond. Est. and Iter. Ref. Routines * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, MEMSIZ, NTESTS, REALSZ, TOTMEM REAL ZERO COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK, EST CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER HH, I, IAM, IASEED, IBSEED, ICTXT, IMIDPAD, $ INFO, IPA, IPA0, IPB, IPB0, IPBERR, IPFERR, $ IPREPAD, IPOSTPAD, IPW, IPW2, ITEMP, J, K, $ KFAIL, KK, KPASS, KSKIP, KTESTS, LCM, LCMQ, $ LRWORK, LWORK, LW2, MYCOL, MYRHS, MYROW, N, NB, $ NBRHS, NGRIDS, NMAT, NNB, NNBR, NNR, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, NRHS, WORKSIZ REAL ANORM, ANORM1, FRESID, RCOND, SRESID, SRESID2, $ THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), IERR( 1 ), $ NBRVAL( NTESTS ), NBVAL( NTESTS ), $ NRVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGSUM2D, BLACS_PINFO, PCCHEKPAD, PCFILLPAD, $ PCLAFCHK, PCLASCHK, PCLLTINFO, $ PCMATGEN, PCPOCON, PCPORFS, $ PCPOTRF, PCPOTRRV, PCPOTRS, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PCLANHE EXTERNAL ICEIL, ILCM, LSAME, NUMROC, PCLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 IBSEED = 200 CALL PCLLTINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NNR, NRVAL, NTESTS, NNBR, NBRVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, EST, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 50 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 50 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 50 * DO 40 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 40 END IF * DO 30 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 30 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 30 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD+1 IF( EST ) THEN IPA0 = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPA0 + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD ELSE IPW = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD END IF * * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PCLAFCHK, PCPOTRRV, and * PCLANHE * WORKSIZ = NP * DESCA( NB_ ) * WORKSIZ = MAX( WORKSIZ, DESCA( MB_ ) * DESCA( NB_ ) ) * LCM = ILCM( NPROW, NPCOL ) ITEMP = MAX( 2, 2 * NQ ) + NP IF( NPROW.NE.NPCOL ) THEN ITEMP = ITEMP + $ NB * ICEIL( ICEIL( NP, NB ), LCM / NPROW ) END IF WORKSIZ = MAX( WORKSIZ, $ ICEIL( REALSZ * ITEMP, CPLXSZ ) ) WORKSIZ = WORKSIZ + IPOSTPAD * ELSE * WORKSIZ = IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'factorization', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 30 END IF * * Generate a Hermitian positive definite matrix A * CALL PCMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) ANORM1 = PCLANHE( '1', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) END IF * IF( EST ) THEN CALL PCMATGEN( ICTXT, 'Herm', 'Diag', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA0 ), $ DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) IF( CHECK ) $ CALL PCFILLPAD( ICTXT, NP, NQ, $ MEM( IPA0-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Perform LLt factorization * CALL SLTIMER( 1 ) * CALL PCPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) 'PCPOTRF INFO=', INFO KFAIL = KFAIL + 1 RCOND = ZERO GO TO 60 END IF * IF( CHECK ) THEN * * Check for memory overwrite in LLt factorization * CALL PCCHEKPAD( ICTXT, 'PCPOTRF', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) END IF * IF( EST ) THEN * * Calculate workspace required for PCPOCON * LWORK = MAX( 1, 2*NP ) + $ MAX( 2, DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), $ NQ + DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, 2*NQ ) LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'cond est', $ ( IPW2+LW2 )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 60 END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) END IF * * Compute condition number of the matrix * CALL PCPOCON( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ ANORM1, RCOND, MEM( IPW ), LWORK, $ MEM( IPW2 ), LRWORK, INFO ) * IF( CHECK ) THEN CALL PCCHEKPAD( ICTXT, 'PCPOCON', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPOCON', $ LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPOCON', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF END IF * * Loop over the different values for NRHS * DO 20 HH = 1, NNR * NRHS = NRVAL( HH ) * DO 10 KK = 1, NNBR * NBRHS = NBRVAL( KK ) * * Initialize Array Descriptor for rhs * CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0, $ ICTXT, MAX( 1, NP )+IMIDPAD, $ IERR( 1 ) ) * * move IPW to allow room for RHS * MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ), MYCOL, $ DESCB( CSRC_ ), NPCOL ) IPB = IPW * IF( EST ) THEN IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD IPFERR = IPB0 + DESCB( LLD_ )*MYRHS + IPOSTPAD $ + IPREPAD IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD ELSE IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD + $ IPREPAD END IF * IF( CHECK ) THEN * * Calculate the amount of workspace required by * the checking routines PCLASCHK * LCMQ = LCM / NPCOL WORKSIZ = MAX( WORKSIZ-IPOSTPAD, $ NQ * NBRHS + NP * NBRHS + $ MAX( MAX( NQ*NB, 2*NBRHS ), $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB, $ 0,0,LCMQ ) ) ) WORKSIZ = IPOSTPAD + WORKSIZ ELSE WORKSIZ = IPOSTPAD END IF * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'solve', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, $ -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate RHS * CALL PCMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, NPCOL ) * IF( CHECK ) $ CALL PCFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * IF( EST ) THEN CALL PCMATGEN( ICTXT, 'No', 'No', DESCB( M_ ), $ DESCB( N_ ), DESCB( MB_ ), $ DESCB( NB_ ), MEM( IPB0 ), $ DESCB( LLD_ ), DESCB( RSRC_ ), $ DESCB( CSRC_ ), IBSEED, 0, NP, 0, $ MYRHS, MYROW, MYCOL, NPROW, $ NPCOL ) * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, 1, MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 2 ) * * Solve linear system via Cholesky factorization * CALL PCPOTRS( UPLO, N, NRHS, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB ), 1, 1, DESCB, $ INFO ) * CALL SLTIMER( 2 ) * IF( CHECK ) THEN * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPOTRS', NP, NQ, $ MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPOTRS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PCLASCHK( 'Herm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, IBSEED, $ ANORM, SRESID, MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * The second test is a NaN trap * IF( ( SRESID.LE.THRESH ).AND. $ ( (SRESID-SRESID).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE KPASS = KPASS + 1 SRESID = SRESID - SRESID PASSED = 'BYPASS' END IF * IF( EST ) THEN * * Calculate workspace required for PCPORFS * LWORK = MAX( 1, 2*NP ) IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD LRWORK = MAX( 1, NP ) LW2 = ICEIL( LRWORK*REALSZ, CPLXSZ ) + $ IPOSTPAD * IERR( 1 ) = 0 IF( IPW2+LW2.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) $ 'iter ref', ( IPW2+LW2 )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, $ 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) $ 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, LWORK, 1, $ MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, LW2-IPOSTPAD, $ 1, MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF * * Use iterative refinement to improve the * computed solution * CALL PCPORFS( UPLO, N, NRHS, MEM( IPA0 ), $ 1, 1, DESCA, MEM( IPA ), 1, 1, $ DESCA, MEM( IPB0 ), 1, 1, $ DESCB, MEM( IPB ), 1, 1, DESCB, $ MEM( IPFERR ), MEM( IPBERR ), $ MEM( IPW ), LWORK, MEM( IPW2 ), $ LRWORK, INFO ) * * check for memory overwrite * IF( CHECK ) THEN CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, $ NQ, MEM( IPA0-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, $ NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', NP, $ MYRHS, $ MEM( IPB0-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', 1, $ MYRHS, $ MEM( IPFERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', 1, $ MYRHS, $ MEM( IPBERR-IPREPAD ), 1, $ IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', LWORK, $ 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCPORFS', $ LW2-IPOSTPAD, 1, $ MEM( IPW2-IPREPAD ), $ LW2-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * check the solution to rhs * CALL PCLASCHK( 'Herm', 'Diag', N, NRHS, $ MEM( IPB ), 1, 1, DESCB, $ IASEED, 1, 1, DESCA, $ IBSEED, ANORM, SRESID2, $ MEM( IPW ) ) * IF( IAM.EQ.0 .AND. SRESID2.GT.THRESH ) $ WRITE( NOUT, FMT = 9985 ) SRESID2 * * check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCLASCHK', NP, $ MYRHS, MEM( IPB-IPREPAD ), $ DESCB( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLASCHK', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, $ PADVAL ) END IF END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 2, 1, $ WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 2, 1, $ CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 4/3 N^3 + 3 N^2 flops for LLt factorization * NOPS = 4.0D+0*(DBLE(N)**3)/3.0D+0 + $ 3.0D+0*(DBLE(N)**2) * * nrhs * 8 N^2 flops for LLt solve. * NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS) * * Calculate total megaflops -- factorization and * solve -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ) + WTIME( 2 ) .GT. 0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( WTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ WTIME( 1 ), WTIME( 2 ), TMFLOPS, $ PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 )+CTIME( 2 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 2 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', UPLO, N, $ NB, NRHS, NBRHS, NPROW, NPCOL, $ CTIME( 1 ), CTIME( 2 ), TMFLOPS, $ PASSED * END IF 10 CONTINUE 20 CONTINUE * IF( CHECK .AND. SRESID.GT.THRESH ) THEN * * Compute FRESID = ||A - LL'|| / (||A|| * N * eps) * CALL PCPOTRRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCLAFCHK( 'Symm', 'Diag', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCPOTRRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGETRRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * IF( IAM.EQ.0 ) THEN IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9986 ) 'L*L''', FRESID ELSE WRITE( NOUT, FMT = 9986 ) 'U''*U', FRESID END IF END IF END IF * 30 CONTINUE 40 CONTINUE CALL BLACS_GRIDEXIT( ICTXT ) 50 CONTINUE * * Print ending messages and close output file * 60 CONTINUE IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB NRHS NBRHS P Q LLt Time ', $ 'Slv Time MFLOPS CHECK' ) 9994 FORMAT( '---- ---- ----- --- ---- ----- ---- ---- -------- ', $ '-------- -------- ------' ) 9993 FORMAT( A4, 4X, A1, 1X, I5, 1X, I3, 1X, I4, 1X, I5, 1X, I4, 1X, $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 ) * STOP * * End of PCLLTDRIVER * END scalapack-1.8.0/TESTING/QR.dat0000640000175000017500000000101410363532303015441 0ustar muammarmuammar'ScaLAPACK, Orthogonal factorizations input file' 'MPI machine' 'QR.out' output file name (if any) 6 device out 6 number of factorizations 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorizations: QR, QL, LQ, RQ, QP, TZ 4 number of problems sizes 2 5 13 15 13 26 30 15 values of M 2 7 8 10 17 20 30 35 values of N 4 number of blocking sizes 4 3 5 5 4 6 values of MB 4 7 3 5 8 2 values of NB 4 number of process grids (ordered pairs P & Q) 1 2 1 4 2 3 8 values of P 1 2 4 1 3 2 1 values of Q 3.0 threshold scalapack-1.8.0/TESTING/TRD.dat0000640000175000017500000000066410363532303015562 0ustar muammarmuammar'ScaLAPACK TRD computation input file' 'MPI machine' 'TRD.out' output file name 6 device out 'L' define Lower or Upper 4 number of problems sizes 16 50 6 11 21 22 23 values of N 4 number of NB's 1 2 3 4 5 values of NB 3 Number of processor grids (ordered pairs of P & Q) 1 1 4 2 1 3 1 values of P 1 4 1 2 3 1 1 values of Q 10.0 threshold scalapack-1.8.0/TESTING/BLLT.dat0000640000175000017500000000166110363532303015664 0ustar muammarmuammar'ScaLAPACK, Version 1.2, banded linear systems input file' 'MPI machine' '' output file name (if any) 6 device out 'U' define Lower or Upper 8 number of problem sizes 3 5 17 28 37 121 200 1023 values of N 6 number of bandwidths 1 2 4 10 31 64 values of BW 1 number of NB's -1 values of NB (-1 for automatic determination) 1 number of NRHS's (must be 1) 4 values of NRHS 1 number of NBRHS's (ignored) 1 values of NBRHS (ignored) 4 number of process grids 1 2 3 4 values of "Number of Process Columns" 3.0 threshold scalapack-1.8.0/TESTING/EVC.dat0000640000175000017500000000077110363532303015545 0ustar muammarmuammar'SCALAPACK NEP (Nonsymmetric Eigenvalue Problem) input file' 'MPI Machine' 'EVC.out' output file name (if any) 6 device out 1 number of problems sizes 100 1000 1500 2000 2500 3000 Probs 1 number of NB's 8 values of NB 4 number of process grids (ordered pairs of P & Q) 1 1 4 2 3 2 2 1 values of P 1 4 1 2 3 1 4 8 values of Q 20.0 threshold scalapack-1.8.0/TESTING/SVD.dat0000640000175000017500000000301410363532303015555 0ustar muammarmuammar'ScaLAPACK Singular Value Decomposition input file' 6 device out 4 maxnodes ' ' 'TEST 1 - test medium matrices - all types and requests' 20.0 Threshold 1 number of matrices 100 number of rows 25 number of columns 1 number of processor configurations (P, Q, NB) 2 values of P (NPROW) 2 values of Q (NPCOL) 8 values of NB ' ' 'TEST 2 - test medium matrices - all processor configurations' 20.0 Threshold 1 number of matrices 80 number of rows 32 number of columns 1 number of processor configurations (P, Q, NB) 2 values of P (NPROW) 2 values of Q (NPCOL) 8 values of NB ' ' 'TEST 3 - test one large matrix' 15.0 Threshold 1 number of matrices 24 number of rows 8 number of columns 1 number of processor configurations (P, Q, NB) 2 values of P (NPROW) 2 values of Q (NPCOL) 8 values of NB ' ' 'End of tests' -1 scalapack-1.8.0/TESTING/LLT.dat0000640000175000017500000000105610363532303015560 0ustar muammarmuammar'ScaLAPACK, LLt factorization input file' 'MPI machine' 'LLT.out' output file name (if any) 6 device out 'U' define Lower or Upper 4 number of problems sizes 4 10 17 13 23 31 57 values of N 3 number of NB's 2 3 4 5 values of NB 3 number of NRHS's 1 3 9 28 values of NRHS 3 number of NBRHS's 1 3 5 7 values of NBRHS 4 number of process grids (ordered pairs P & Q) 1 2 1 4 2 3 8 values of P 1 2 4 1 3 2 1 values of Q 3.0 threshold T (T or F) Test Cond. Est. and Iter. Ref. Routines scalapack-1.8.0/TESTING/BLU.dat0000640000175000017500000000174010363532303015547 0ustar muammarmuammar'ScaLAPACK, Version 1.2, banded linear systems input file' 'MPI machine' '' output file name (if any) 6 device out 'N' define transpose or not 3 number of problem sizes 3 5 17 28 37 121 200 1023 values of N 3 number of bandwidths 1 3 15 6 13 20 values of BWL 1 1 4 18 24 33 values of BWU 1 number of NB's -1 values of NB (-1 for automatic determination) 1 number of NRHS's (must be 1) 4 values of NRHS 1 number of NBRHS's (ignored) 1 values of NBRHS (ignored) 4 number of process grids 1 2 3 4 values of "Number of Process Columns" 3.0 threshold scalapack-1.8.0/TESTING/EIG/0000750000175000017500000000000010605234756015046 5ustar muammarmuammarscalapack-1.8.0/TESTING/EIG/pssytdrv.f0000640000175000017500000004066610363532303017116 0ustar muammarmuammar SUBROUTINE PSSYTDRV( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSSYTDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from Q, the * symmetric tridiagonal matrix T (or D and E), and TAU, which were * computed by PSSYTRD: sub( A ) := Q * T * Q'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of sub( A ). On entry, * if UPLO='U', the diagonal and first superdiagonal of sub( A ) * have the corresponding elements of the tridiagonal matrix T, * and the elements above the first superdiagonal, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO='L', the diagonal and * first subdiagonal of sub( A ) have the corresponding elements * of the tridiagonal matrix T, and the elements below the first * subdiagonal, with the array TAU, represent the orthogonal * matrix Q as a product of elementary reflectors, and the * strictly upper triangular part of sub( A ) is not referenced. * On exit, if UPLO = 'U', the upper triangular part of the * distributed symmetric matrix sub( A ) is recovered. * If UPLO='L', the lower triangular part of the distributed * symmetric matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local input) REAL, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= 2 * NB *( NB + NP ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL EIGHT, HALF, ONE, ZERO PARAMETER ( EIGHT = 8.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER I, IACOL, IAROW, ICTXT, II, IPT, IPV, IPX, $ IPY, J, JB, JJ, JL, K, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW REAL ADDBND, D1, D2, E1, E2 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCT( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, IGSUM2D, $ PSELGET, PSGEMM, PSLACPY, $ PSLARFT, PSLASET, PSSYMM, $ PSSYR2K, PSTRMM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) UPPER = LSAME( UPLO, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = NB * NB + IPT IPX = NB * NP + IPV IPY = NB * NP + IPX * CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * ADDBND = EIGHT * PSLAMCH( ICTXT, 'eps' ) * IF( UPPER ) THEN * CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 10 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PSELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J+1, DESCE ) CALL PSELGET( 'Columnwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * * Compute the upper triangle of sub( A ). * CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * DO 20 K = 0, N-1, NB JB = MIN( NB, N-K ) I = IA + K J = JA + K * * Compute the lower triangular matrix T. * CALL PSLARFT( 'Backward', 'Columnwise', K+JB-1, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PSLACPY( 'All', K+JB-1, JB, A, IA, J, DESCA, $ WORK( IPV ), 1, 1, DESCV ) * IF( K.GT.0 ) THEN CALL PSLASET( 'Lower', JB+1, JB, ZERO, ONE, WORK( IPV ), $ K, 1, DESCV ) ELSE CALL PSLASET( 'Lower', JB, JB-1, ZERO, ONE, WORK( IPV ), $ 1, 2, DESCV ) CALL PSLASET( 'Ge', JB, 1, ZERO, ZERO, WORK( IPV ), 1, $ 1, DESCV ) END IF * * Zero out the strict upper triangular part of A. * IF( K.GT.0 ) THEN CALL PSLASET( 'Ge', K-1, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PSLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, A, I-1, $ J+1, DESCA ) ELSE IF( JB.GT.1 ) THEN CALL PSLASET( 'Upper', JB-2, JB-2, ZERO, ZERO, A, IA, $ J+2, DESCA ) END IF * * (1) X := A * V * T' * CALL PSSYMM( 'Left', 'Upper', K+JB, JB, ONE, A, IA, JA, $ DESCA, WORK( IPV ), 1, 1, DESCV, ZERO, $ WORK( IPX ), 1, 1, DESCV ) CALL PSTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', $ K+JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPX ), 1, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PSGEMM( 'Transpose', 'No transpose', JB, JB, K+JB, ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, ZERO, WORK( IPY ), 1, 1, DESCT ) CALL PSTRMM( 'Left', 'Lower', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PSGEMM( 'No tranpose', 'No transpose', K+JB, JB, JB, $ -HALF, WORK( IPV ), 1, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), 1, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PSSYR2K( 'Upper', 'No transpose', K+JB, JB, -ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, ONE, A, IA, JA, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * ELSE * CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PSELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PSELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * * Compute the lower triangle of sub( A ). * JL = MAX( ( ( JA+N-2 ) / NB ) * NB + 1, JA ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, INDXG2P( IA+JL-JA+1, NB, $ MYROW, DESCA( RSRC_ ), NPROW ), IACOL, ICTXT, $ NB ) * DO 40 J = JL, JA, -NB K = J - JA + 1 I = IA + K - 1 JB = MIN( N-K+1, NB ) * * Compute upper triangular matrix T from TAU. * CALL PSLARFT( 'Forward', 'Columnwise', N-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PSLACPY( 'Lower', N-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), K+1, 1, DESCV ) CALL PSLASET( 'Upper', N-K, JB, ZERO, ONE, WORK( IPV ), $ K+1, 1, DESCV ) CALL PSLASET( 'Ge', 1, JB, ZERO, ZERO, WORK( IPV ), K, 1, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', N-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * (1) X := A * V * T' * CALL PSSYMM( 'Left', 'Lower', N-K+1, JB, ONE, A, I, J, $ DESCA, WORK( IPV ), K, 1, DESCV, ZERO, $ WORK( IPX ), K, 1, DESCV ) CALL PSTRMM( 'Right', 'Upper', 'Transpose', 'Non-Unit', $ N-K+1, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPX ), K, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PSGEMM( 'Transpose', 'No transpose', JB, JB, N-K+1, $ ONE, WORK( IPV ), K, 1, DESCV, WORK( IPX ), $ K, 1, DESCV, ZERO, WORK( IPY ), 1, 1, DESCT ) CALL PSTRMM( 'Left', 'Upper', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PSGEMM( 'No transpose', 'No transpose', N-K+1, JB, JB, $ -HALF, WORK( IPV ), K, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), K, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PSSYR2K( 'Lower', 'No tranpose', N-K+1, JB, -ONE, $ WORK( IPV ), K, 1, DESCV, WORK( IPX ), K, 1, $ DESCV, ONE, A, I, J, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCT( RSRC_ ) = MOD( DESCT( RSRC_ ) + NPROW - 1, NPROW ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + NPCOL - 1, NPCOL ) * 40 CONTINUE * END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PSSYTDRV * END scalapack-1.8.0/TESTING/EIG/pzhetdrv.f0000640000175000017500000004152610363532303017062 0ustar muammarmuammar SUBROUTINE PZHETDRV( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZHETDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from Q, the * Hermitian tridiagonal matrix T (or D and E), and TAU, which were * computed by PZHETRD: sub( A ) := Q * T * Q'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of sub( A ). On entry, * if UPLO='U', the diagonal and first superdiagonal of sub( A ) * have the corresponding elements of the tridiagonal matrix T, * and the elements above the first superdiagonal, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO='L', the diagonal and * first subdiagonal of sub( A ) have the corresponding elements * of the tridiagonal matrix T, and the elements below the first * subdiagonal, with the array TAU, represent the unitary * matrix Q as a product of elementary reflectors, and the * strictly upper triangular part of sub( A ) is not referenced. * On exit, if UPLO = 'U', the upper triangular part of the * distributed Hermitian matrix sub( A ) is recovered. * If UPLO='L', the lower triangular part of the distributed * Hermitian matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= 2 * NB *( NB + NP ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION REIGHT, RONE, RZERO PARAMETER ( REIGHT = 8.0D+0, RONE = 1.0D+0, $ RZERO = 0.0D+0 ) COMPLEX*16 HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER I, IACOL, IAROW, ICTXT, II, IPT, IPV, IPX, $ IPY, J, JB, JJ, JL, K, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW DOUBLE PRECISION ADDBND, D2, E2 COMPLEX*16 D1, E1 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCT( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, IGSUM2D, $ PDELGET, PZELGET, PZGEMM, $ PZHEMM, PZHER2K, PZLACPY, $ PZLARFT, PZLASET, PZTRMM * .. * .. Intrinsic Functions .. INTRINSIC ABS, DCMPLX, MAX, MIN, MOD * .. * .. Executable statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) UPPER = LSAME( UPLO, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = NB * NB + IPT IPX = NB * NP + IPV IPY = NB * NP + IPX * CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * ADDBND = REIGHT * PDLAMCH( ICTXT, 'eps' ) * IF( UPPER ) THEN * CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 10 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PZELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J+1, DESCE ) CALL PZELGET( 'Columnwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1-DCMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1-DCMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * * Compute the upper triangle of sub( A ). * CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * DO 20 K = 0, N-1, NB JB = MIN( NB, N-K ) I = IA + K J = JA + K * * Compute the lower triangular matrix T. * CALL PZLARFT( 'Backward', 'Columnwise', K+JB-1, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PZLACPY( 'All', K+JB-1, JB, A, IA, J, DESCA, $ WORK( IPV ), 1, 1, DESCV ) * IF( K.GT.0 ) THEN CALL PZLASET( 'Lower', JB+1, JB, ZERO, ONE, WORK( IPV ), $ K, 1, DESCV ) ELSE CALL PZLASET( 'Lower', JB, JB-1, ZERO, ONE, WORK( IPV ), $ 1, 2, DESCV ) CALL PZLASET( 'Ge', JB, 1, ZERO, ZERO, WORK( IPV ), 1, $ 1, DESCV ) END IF * * Zero out the strict upper triangular part of A. * IF( K.GT.0 ) THEN CALL PZLASET( 'Ge', K-1, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PZLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, A, I-1, $ J+1, DESCA ) ELSE IF( JB.GT.1 ) THEN CALL PZLASET( 'Upper', JB-2, JB-2, ZERO, ZERO, A, IA, $ J+2, DESCA ) END IF * * (1) X := A * V * T' * CALL PZHEMM( 'Left', 'Upper', K+JB, JB, ONE, A, IA, JA, $ DESCA, WORK( IPV ), 1, 1, DESCV, ZERO, $ WORK( IPX ), 1, 1, DESCV ) CALL PZTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', K+JB, JB, ONE, WORK( IPT ), 1, 1, $ DESCT, WORK( IPX ), 1, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PZGEMM( 'Conjugate transpose', 'No transpose', JB, JB, $ K+JB, ONE, WORK( IPV ), 1, 1, DESCV, $ WORK( IPX ), 1, 1, DESCV, ZERO, WORK( IPY ), $ 1, 1, DESCT ) CALL PZTRMM( 'Left', 'Lower', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PZGEMM( 'No tranpose', 'No transpose', K+JB, JB, JB, $ -HALF, WORK( IPV ), 1, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), 1, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PZHER2K( 'Upper', 'No transpose', K+JB, JB, -ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, RONE, A, IA, JA, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * ELSE * CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PZELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PZELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1-DCMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1-DCMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * * Compute the lower triangle of sub( A ). * JL = MAX( ( ( JA+N-2 ) / NB ) * NB + 1, JA ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, INDXG2P( IA+JL-JA+1, NB, $ MYROW, DESCA( RSRC_ ), NPROW ), IACOL, ICTXT, $ NB ) * DO 40 J = JL, JA, -NB K = J - JA + 1 I = IA + K - 1 JB = MIN( N-K+1, NB ) * * Compute upper triangular matrix T from TAU. * CALL PZLARFT( 'Forward', 'Columnwise', N-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PZLACPY( 'Lower', N-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), K+1, 1, DESCV ) CALL PZLASET( 'Upper', N-K, JB, ZERO, ONE, WORK( IPV ), $ K+1, 1, DESCV ) CALL PZLASET( 'Ge', 1, JB, ZERO, ZERO, WORK( IPV ), K, 1, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', N-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * (1) X := A * V * T' * CALL PZHEMM( 'Left', 'Lower', N-K+1, JB, ONE, A, I, J, $ DESCA, WORK( IPV ), K, 1, DESCV, ZERO, $ WORK( IPX ), K, 1, DESCV ) CALL PZTRMM( 'Right', 'Upper', 'Conjugate transpose', $ 'Non-Unit', N-K+1, JB, ONE, WORK( IPT ), 1, 1, $ DESCT, WORK( IPX ), K, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PZGEMM( 'Conjugate transpose', 'No transpose', JB, JB, $ N-K+1, ONE, WORK( IPV ), K, 1, DESCV, $ WORK( IPX ), K, 1, DESCV, ZERO, WORK( IPY ), $ 1, 1, DESCT ) CALL PZTRMM( 'Left', 'Upper', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PZGEMM( 'No transpose', 'No transpose', N-K+1, JB, JB, $ -HALF, WORK( IPV ), K, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), K, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PZHER2K( 'Lower', 'No tranpose', N-K+1, JB, -ONE, $ WORK( IPV ), K, 1, DESCV, WORK( IPX ), K, 1, $ DESCV, RONE, A, I, J, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCT( RSRC_ ) = MOD( DESCT( RSRC_ ) + NPROW - 1, NPROW ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + NPCOL - 1, NPCOL ) * 40 CONTINUE * END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PZHETDRV * END scalapack-1.8.0/TESTING/EIG/pdlasizesep.f0000640000175000017500000001115210363532303017527 0ustar muammarmuammar* * SUBROUTINE PDLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESUBTST, ISIZESUBTST, SIZETST, $ ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PDLASIZESEP computes the amount of memory needed by * various SEP test routines, as well as SYEVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PDSYEVX * * SIZEMQRLEFT LWORK for the 1st PDORMQR call in PDLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PDORMQR call in PDLAGSY * * SIZEQRF LWORK for PDGEQRF in PDLAGSY * * SIZETMS LWORK for PDLATMS * * SIZEQTQ LWORK for PDSEPQTQ (nexer complex) * * SIZECHK LWORK for PDSEPCHK * * SIZESYEVX LWORK for PDSYEVX * * ISIZESYEVX LIWORK for PDSYEVX * * SIZESUBTST LWORK for PDSUBTST * * ISIZESUBTST LIWORK for PDSUBTST * * SIZETST LWORK for PDTST * * ISIZETST LIWORK for PDTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, LCM, $ LCMQ, LDA, MQ0, MYCOL, MYROW, N, NB, NEIG, NN, $ NNP, NP, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PDSYEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pdseptst.f0000640000175000017500000013524710363532303017066 0ustar muammarmuammar SUBROUTINE PDSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, $ ORDER, ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, IWORK, LIWORK, HETERO, NOUT, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER HETERO, SUBTESTS, UPLO INTEGER INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, LWORK, $ MATTYPE, N, NOUT, ORDER DOUBLE PRECISION ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) DOUBLE PRECISION A( LDA, * ), COPYA( LDA, * ), GAP( * ), $ WIN( * ), WNEW( * ), WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PDSEPTST builds a random matrix, runs PDSYEVX and PDSYEV to * compute the eigenvalues * and eigenvectors and then performs two tests to * determine if the result * is good enough. The two tests are: * |AQ -QL| / (abstol + ulp * norm(A) ) * and * |QT * Q - I| / ulp * norm(A) * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) symmetric matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PDSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) DOUBLE PRECISION array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PDSYEVX * * COPYA (local workspace) DOUBLE PRECISION array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PDSEPCHK and PDSEPQTQ * * W (local workspace) DOUBLE PRECISION array, dimension (N) * On normal exit from PDSYEVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PDLASIZESQP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PDLASIZESQP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION HALF, ONE, TEN, ZERO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, $ TEN = 10.0D+0, HALF = 0.5D+0 ) DOUBLE PRECISION PADVAL PARAMETER ( PADVAL = 19.25D+0 ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IHETERO, IINFO, IL, IMODE, IN, $ INDD, INDWORK, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, ITYPE, IU, J, LLWORK, LSYEVXSIZE, $ MAXSIZE, MINSIZE, MYCOL, MYROW, NB, NGEN, NLOC, $ NNODES, NP, NPCOL, NPROW, NQ, RES, SIZECHK, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, $ SIZESUBTST, SIZESYEV, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE, ISIZESYEVD,SIZESYEVD DOUBLE PRECISION ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL DLARAN, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, DLABAD, DLASRT, $ DLATMS, IGAMX2D, IGEBR2D, IGEBS2D, PDCHEKPAD, $ PDELSET, PDFILLPAD, PDLASET, PDLASIZESQP, $ PDLASIZESYEVX, PDLATMS, PDMATGEN, PDSEPSUBTST, $ PDSQPSUBTST, PDSYEV, SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED EVX' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * Distribute HETERO across processes * IF( IAM.EQ.0 ) THEN IF( LSAME( HETERO, 'Y' ) ) THEN IHETERO = 2 ELSE IHETERO = 1 END IF CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, IHETERO, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, IHETERO, 1, 0, 0 ) END IF IF( IHETERO.EQ.2 ) THEN HETERO = 'Y' ELSE HETERO = 'N' END IF * * Make sure that we have enough memory * CALL PDLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * IF( LWORK.LT.SIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDWORK = INDD + N LLWORK = LWORK - INDWORK + 1 * ULP = PDLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PDLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL DLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / DBLE( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log symmetric, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random symmetric * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N WORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PDLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N WORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PDLASET( 'All', N, N, ZERO, ONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * symmetric, eigenvalues specified * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * symmetric, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PDMATGEN( DESCA( CTXT_ ), 'S', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PDLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( DLARAN( ISEED )*DBLE( NLOC ) ), N-NGEN ) * CALL DLATMS( IN, IN, 'S', ISEED, 'P', WORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PDELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PDELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PDELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PDELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 WORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL DLASRT( 'I', N, WORK( INDD ), IINFO ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PDLASIZESYEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, WORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LSYEVXSIZE = MIN( MAXSIZE, LLWORK ) * CALL PDSEPSUBTST( WKNOWN, 'v', 'a', UPLO, N, VL, VU, IL, IU, $ THRESH, ABSTOL, A, COPYA, Z, 1, 1, DESCA, $ WORK( INDD ), WIN, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, TSTNRM, $ QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE + INT( DLARAN( ISEED )* $ DBLE( VECSIZE-VALSIZE ) ) * CALL PDSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF * * Now that PDSYEVX been tested, we check PDSYEV if we are a * homogeneous machine. * IF( LSAME( HETERO, 'N' ) .AND. LSAME( SUBTESTS, 'N' ) ) THEN PASSED = 'PASSED EV' * * PDSYEV test1: * JOBZ = 'N', eigenvalues only * IF( INFO.NE.0 ) THEN * * If the EVX tests fail, we do not perform the EV tests * PASSED = 'SKIPPED EV' ELSE JOBZ = 'N' * CALL PDSYEV( JOBZ, UPLO, N, A, 1, 1, DESCA, $ WORK( INDWORK ), Z, 1, 1, DESCA, $ WORK( INDWORK ), -1, INFO ) MINSIZE = INT( WORK( INDWORK ) ) * CALL PDSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EV test1' INFO = 1 END IF END IF * * PDSYEV test2: * JOBZ = 'V', eigenvalues and eigenvectors * IF( INFO.EQ.0 ) THEN JOBZ = 'V' * CALL PDSYEV( JOBZ, UPLO, N, A, 1, 1, DESCA, $ WORK( INDWORK ), Z, 1, 1, DESCA, $ WORK( INDWORK ), -1, INFO ) MINSIZE = INT( WORK( INDWORK ) ) * CALL PDSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EV test2' INFO = 1 END IF END IF IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, $ PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF ENDIF * * Now that PDSYEV been tested, we check PDSYEVD if we are a * homogeneous machine. * IF( LSAME( HETERO, 'N' ) .AND. LSAME( SUBTESTS, 'N' ) ) THEN PASSED = 'PASSED EVD' * * PDSYEVD test1: * IF( INFO.NE.0 ) THEN * * If the EV tests fail, we do not perform the EVD tests * PASSED = 'SKIPPED EVD' ELSE * NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) MINSIZE = MAX( 1+6*N+2*NP*NQ, $ 3*N + MAX( NB*( NP+1 ), 3*NB ) ) + 2*N * CALL PDSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, IWORK, ISIZESYEVD, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EVD test1' INFO = 1 END IF END IF IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), TSTNRM, $ QTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, $ PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF END IF RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, $ F8.2, 1X, F8.2, 1X, G9.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 21X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 21X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 32X, $ 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PDSEPDRIVER' ) * * End of PDSEPTST * END scalapack-1.8.0/TESTING/EIG/pdsepreq.f0000640000175000017500000002225510363532303017035 0ustar muammarmuammar* * SUBROUTINE PDSEPREQ( HETERO, NIN, MEM, MEMSIZE, NOUT, ISEED, $ NTESTS, NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER HETERO INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION MEM( MEMSIZE ) * .. * * Purpose * ======= * * PDSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PDSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input ) DOUBLE PRECISION ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, INTGSZ PARAMETER ( DBLESZ = 8, INTGSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IMIDPAD, INITCON, IPOSTPAD, $ IPREPAD, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ LDA, LLWORK, MATSIZE, MATTYPE, MYCOL, MYROW, N, $ NB, NMATSIZES, NMATTYPES, NNODES, NP, NPCOL, $ NPCONFIGS, NPROW, NQ, NUPLOS, ORDER, PCONFIG, $ PTRA, PTRCOPYA, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRW, PTRW2, PTRWORK, PTRZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST, UPLO, ISIZESYEVD, SIZESYEVD DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PDLASIZESQP, PDSEPINFO, PDSEPTST * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PDSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * DO 40 MATSIZE = 1, NMATSIZES * DO 30 PCONFIG = 1, NPCONFIGS * DO 20 MATTYPE = 1, NMATTYPES * DO 10 UPLO = 1, NUPLOS * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, $ MYCOL ) * IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PDLASIZESQP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, $ ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, $ SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + MAX( N, 1 ) + IPREPAD + IPOSTPAD PTRGAP = PTRW2 + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + NPROW*NPCOL + IPREPAD + $ IPOSTPAD PTRICLUS = PTRIFAIL + ICEIL( N+IPREPAD+IPOSTPAD, $ DBLESZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, DBLESZ / INTGSZ ) PTRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, DBLESZ / INTGSZ ) LLWORK = MEMSIZE - PTRWORK + 1 C LLWORK = MEMSIZE - PTRWORK - IPREPAD - C $ IPOSTPAD + 1 NTESTS = NTESTS + 1 IF( LLWORK.LT.SIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PDSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), SUBTESTS, $ THRESH, N, ABSTOL, ISEED, $ MEM( PTRA ), MEM( PTRCOPYA ), $ MEM( PTRZ ), LDA, MEM( PTRW ), $ MEM( PTRW2 ), MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ LLWORK, MEM( PTRIWRK ), $ ISIZETST, HETERO, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * )' PDSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE END IF * * RETURN * * End of PDDSEPREQ * END scalapack-1.8.0/TESTING/EIG/psttrdtester.f0000640000175000017500000006163010363532303017761 0ustar muammarmuammar SUBROUTINE PSTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 24, 2000 * * .. Scalar Arguments .. LOGICAL CHECK INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS, $ TOTMEM REAL THRESH * .. * .. Array Arguments .. INTEGER NVAL( * ) REAL MEM( * ) * .. * * Purpose * ======= * * PSTTRDTESTER tests PSSYTTRD * * Arguments * ========= * * IAM (local input) INTEGER * The local process number * * NPROCS (global input) INTEGER * The number of processors * * CHECK (global input) LOGICAL * Specifies whether the user wants to check the answer * * NOUT (local input) INTEGER * File descriptor * * THRESH (global input) REAL * Acceptable error threshold * * NVAL (global input) INTEGER array dimension NMAT * The matrix sizes to test * * NMAT (global input) INTEGER * The number of matrix sizes to test * * MEM (local input) REAL array dimension MEMSIZ * Where: * MEMSIZ = TOTMEM / REALSZ * * TOTMEM (global input) INTEGER * Number of bytes in MEM * * KPASS (local input/output) INTEGER * The number of tests which passed. Only relevant on * processor 0. * * KFAIL (local input/output) INTEGER * The number of tests which failed. Only relevant on * processor 0. * * KSKIP (local input/output) INTEGER * The number of tests which were skipped. Only relevant on * processor 0. * * ================================================================ * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER REALSZ REAL PADVAL PARAMETER ( REALSZ = 4, PADVAL = -9923.0E+0 ) INTEGER TIMETESTS PARAMETER ( TIMETESTS = 11 ) INTEGER TESTS PARAMETER ( TESTS = 8 ) INTEGER MINTIMEN PARAMETER ( MINTIMEN = 8 ) * .. * .. Local Scalars .. LOGICAL TIME CHARACTER UPLO CHARACTER*6 PASSED INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ LCM, LWMIN, MAXTESTS, MEMSIZ, MYCOL, MYROW, N, $ NB, NDIAG, NGRIDS, NN, NOFFD, NP, NPCOL, NPROW, $ NPS, NQ, SPLITSTIMED, WORKSIZ, WORKTRD REAL ANORM, FRESID DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ), $ BALTEST( TESTS ), BALTIME( TIMETESTS ), $ DESCA( DLEN_ ), DESCD( DLEN_ ), IERR( 1 ), $ INTERTEST( TESTS ), INTERTIME( TIMETESTS ), $ PNBTEST( TESTS ), PNBTIME( TIMETESTS ), $ TWOGEMMTEST( TESTS ), TWOGEMMTIME( TIMETESTS ) DOUBLE PRECISION CTIME( 100 ), WTIME( 100 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGEBR2D, IGEBS2D, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSLAFCHK, PSLATRAN, PSMATGEN, $ PSSYTDRV, PSSYTTRD, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC, PJLAENV REAL PSLANSY EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PJLAENV, PSLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, REAL, SQRT * .. * * .. Scalars in Common .. INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE, $ LLTBLOCK, MINSZ, PNB, TIMEINTERNALS, TIMING, $ TRSBLOCK, TWOGEMMS * .. * .. Common blocks .. COMMON / BLOCKSIZES / GSTBLOCK, LLTBLOCK, BCKBLOCK, $ TRSBLOCK COMMON / MINSIZE / MINSZ COMMON / PJLAENVTIMING / TIMING COMMON / TAILOREDOPTS / PNB, ANB, INTERLEAVE, $ BALANCED, TWOGEMMS COMMON / TIMECONTROL / TIMEINTERNALS * .. * .. Data statements .. DATA BALTIME / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 / DATA INTERTIME / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 / DATA TWOGEMMTIME / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 / DATA ANBTIME / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16, $ 16 / DATA PNBTIME / 32, 32, 32, 32, 32, 32, 32, 32, 32, $ 16, 64 / DATA BALTEST / 0, 0, 0, 0, 1, 1, 1, 1 / DATA INTERTEST / 0, 0, 1, 1, 0, 0, 1, 1 / DATA TWOGEMMTEST / 0, 1, 0, 1, 0, 1, 0, 1 / DATA ANBTEST / 1, 2, 3, 16, 1, 2, 3, 16 / DATA PNBTEST / 1, 16, 8, 1, 16, 8, 1, 16 / * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * IASEED = 100 SPLITSTIMED = 0 NB = 1 UPLO = 'L' MEMSIZ = TOTMEM / REALSZ * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * NGRIDS = INT( SQRT( REAL( NPROCS ) ) ) * DO 30 NN = 1, NGRIDS * NPROW = NN NPCOL = NN IERR( 1 ) = 0 * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * IF( N.GT.MINTIMEN ) THEN * * For timing tests, we perform one or two extra tests. * Both of these extra tests are performed with the * default values for the performance tuning parameters. * The second extra test (which is only performed if * split times are non-zero) is performed with timeinternals * set to 1 (which forces barrier syncs between many * phases of the computation). * TIME = .TRUE. MAXTESTS = TIMETESTS + 2 ELSE TIME = .FALSE. MAXTESTS = TESTS END IF * * DO 10 K = 1, MAXTESTS TIMEINTERNALS = 0 IF( TIME ) THEN IF( K.GE.MAXTESTS-1 ) THEN * * For the last two timings, we let pjlaenv set * the execution path values. These dummy * initializations aren't really necessary, * but they illustrate the fact that these values are * set in xpjlaenv. The dummy call to pjlaenv * has the side effect of setting ANB. * MINSZ = -13 BALANCED = -13 INTERLEAVE = -13 TWOGEMMS = -13 ANB = -13 PNB = -13 TIMING = 1 DUMMY = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, $ 0, 0 ) IF( K.EQ.MAXTESTS ) $ TIMEINTERNALS = 1 ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTIME( K ) INTERLEAVE = INTERTIME( K ) TWOGEMMS = TWOGEMMTIME( K ) ANB = ANBTIME( K ) PNB = PNBTIME( K ) END IF ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTEST( K ) INTERLEAVE = INTERTEST( K ) TWOGEMMS = TWOGEMMTEST( K ) ANB = ANBTEST( K ) PNB = PNBTEST( K ) END IF * * Skip the last test (with timeinternals = 1) if * PSSYTTRD is not collecting the split times. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, 1, $ 0, 0 ) END IF * * IF( SPLITSTIMED.EQ.0 .AND. K.EQ.MAXTESTS ) $ GO TO 10 * * The following hack tests to make sure that PNB need not * be the same on all processes. (Provided that PNB is set * to 1 in the TRD.dat file.) * IF( PNB.EQ.1 ) $ PNB = 1 + IAM * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1, $ INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORKTRD = LWMIN + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ITEMP, 2*( NB+NP )*NB ) WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * * * Generate the matrix A * CALL PSMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PSSYTTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWMIN, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSSYTTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSCHEKPAD( ICTXT, 'PSSYTTRDc', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTTRDd', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTTRDe', WORKTRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PSSYTDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) * * TTRD does not preserve the upper triangular part of A. * The following call to PSLATRAN means that we only * check the lower triangular part of A - QTQ' * CALL PSLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSSYTDRVf', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRVg', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRVh', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRVi', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0E+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9991 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' * * END IF * * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 50, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 50, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'WALL', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'CPU ', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ CTIME( 1 ), TMFLOPS, FRESID, PASSED * * * If split times were collected (in PSSYttrd.f), print * them out. * IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 ).GT.0.0D+0 .OR. $ CTIME( 13 )+CTIME( 15 )+CTIME( 16 ).GT.0.0D+0 ) $ THEN SPLITSTIMED = 1 END IF IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ), $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ), $ WTIME( 15 ) WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ), $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ), $ WTIME( 21 ) * WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ), $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ), $ CTIME( 15 ) WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ), $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ), $ CTIME( 21 ) WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB, $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS END IF END IF 10 CONTINUE 20 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) END IF END IF * * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE RETURN * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) * 9995 FORMAT( 'PSSYTTRD, tailored reduction to tridiagonal form, test.' $ ) 9994 FORMAT( 'TIME N int 2gm bal anb pnb prcs TRD Time ', $ ' MFLOPS Residual CHECK' ) 9993 FORMAT( '---- ---- --- --- --- --- --- ---- -------- ', $ '----------- -------- ------' ) 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9991 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) 9990 FORMAT( 'wsplit1=[wsplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9989 FORMAT( 'wsplit2=[wsplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9988 FORMAT( 'csplit1=[csplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9987 FORMAT( 'csplit2=[csplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9986 FORMAT( 'size_opts=[size_opts;', I4, 1X, I4, 1X, I4, 1X, I4, 1X, $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];' ) 9985 FORMAT( 'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;', $ ' TWOGEMMS=7; TIMEINTERNALS=8;' ) 9984 FORMAT( 'S1_OVERHEAD = 1; % Should be mainly cost of barrier' ) 9983 FORMAT( 'S1_BARRIER = 2; % Cost of barrier' ) 9982 FORMAT( 'S1_UPDCURCOL = 3; % Update the current column' ) 9981 FORMAT( 'S1_HOUSE = 4; % Compute the householder vector' ) 9980 FORMAT( 'S1_SPREAD = 5; % Spread across' ) 9979 FORMAT( 'S1_TRANSPOSE = 6; % Transpose' ) 9978 FORMAT( 'S2_UPDCURBLK = 1; % Update the current block column' ) 9977 FORMAT( 'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' ) 9976 FORMAT( 'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' ) 9975 FORMAT( 'S2_TRANS_SUM = 4; % v = v + vt'' ' ) 9974 FORMAT( 'S2_DOT = 5; % c = v'' * h ' ) 9973 FORMAT( 'S2_R2K = 6; % A = A - v * h'' - h * v'' ' ) * * * End of PSTTRDTESTER * END scalapack-1.8.0/TESTING/EIG/pdnepinfo.f0000640000175000017500000002767710363532303017211 0ustar muammarmuammar SUBROUTINE PDNEPINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDNEPINFO gets needed startup information for PDHSEQR drivers * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * * Implemented by: G. Henry, May 10, 1996 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'NEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QSQ^T by Schur Decomposition.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'real double precision Schur decomposition.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = ||H-QSQ^T|| / ' // $ '(||H|| * eps * N )' WRITE( NOUT, FMT = 9999 ) $ ' Orthogonality residual = ||I - Q^TQ|| / ' // '( eps * N )' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PDNEPINFO * END scalapack-1.8.0/TESTING/EIG/pslasizesep.f0000640000175000017500000001115210363532303017546 0ustar muammarmuammar* * SUBROUTINE PSLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESUBTST, ISIZESUBTST, SIZETST, $ ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PSLASIZESEP computes the amount of memory needed by * various SEP test routines, as well as SYEVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PSSYEVX * * SIZEMQRLEFT LWORK for the 1st PSORMQR call in PSLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PSORMQR call in PSLAGSY * * SIZEQRF LWORK for PSGEQRF in PSLAGSY * * SIZETMS LWORK for PSLATMS * * SIZEQTQ LWORK for PSSEPQTQ (nexer complex) * * SIZECHK LWORK for PSSEPCHK * * SIZESYEVX LWORK for PSSYEVX * * ISIZESYEVX LIWORK for PSSYEVX * * SIZESUBTST LWORK for PSSUBTST * * ISIZESUBTST LIWORK for PSSUBTST * * SIZETST LWORK for PSTST * * ISIZETST LIWORK for PSTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, LCM, $ LCMQ, LDA, MQ0, MYCOL, MYROW, N, NB, NEIG, NN, $ NNP, NP, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PSSYEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pshrdinfo.f0000640000175000017500000003237410363532303017211 0ustar muammarmuammar SUBROUTINE PSHRDINFO( SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ NVHI( LDNVAL ), NVLO( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSHRDINFO get the needed startup information for the Hessenberg * reduction tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for * N, IHI & ILO. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of rows & columns in matrix). * * NVLO (global output) INTEGER array, dimension (LDNVAL) * The values of ILO. * * NVHI (global output) INTEGER array, dimension (LDNVAL) * The values of IHI. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, ILO and IHI. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * 3*LDNVAL+LDNBVAL+2*LDPVAL. Used to pack all input arrays * in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'HRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = * ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'N', LDNVAL GO TO 20 END IF * * Get values of N, ILO, IHI * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVLO( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVHI( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVLO, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVHI, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS -1 CALL IGEBS2D( ICTXT, 'All', ' ', 1, I, WORK, 1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction routine to Hessenberg form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision Hessenberg ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q H Q''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'HRD time : Time in seconds to compute HRD ' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for HRD ' // $ 'reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'ILO ', ( NVLO( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVLO( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'IHI ', ( NVHI( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVHI( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 3*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) * I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVLO, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVHI, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' ILLEGAL INPUT IN FILE ', 40A, '. ABORTING RUN.' ) 9997 FORMAT( ' NUMBER OF VALUES OF ', 5A, $ ' IS LESS THAN 1 OR GREATER ', 'THAN ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G14.7 ) * * End of PSHRDINFO * END scalapack-1.8.0/TESTING/EIG/pdnepfchk.f0000640000175000017500000002605010363532303017151 0ustar muammarmuammar SUBROUTINE PDNEPFCHK( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, $ DESCZ, ANORM, FRESID, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IASEED, IZ, JA, JZ, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) DOUBLE PRECISION A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDNEPFCHK computes the residual * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1) * and ||.|| stands for the infinity norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The order of sub( A ) and sub( Z ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * Z (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this * array contains the local pieces of the N-by-N distributed * matrix sub( Z ). * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK). * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where * * IROFFA = MOD( IA-1, MB_A ), * ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of rows and a block of columns * of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF, $ IW, J, JB, JJA, JN, LDA, LDW, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION EPS * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, DMATADD, INFOG2L, $ PDGEMM, PDLACPY, PDLASET, PDMATGEN * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF LDW = MAX( 1, NP ) * * First compute H <- H * Z**T * CALL DESCSET( DESCW, DESCA( MB_ ), N, DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IA, IA + N - 1, DESCA( MB_ ) IB = MIN( IA+N-I, DESCA( MB_ ) ) * CALL PDLACPY( 'All', IB, N, A, I, JA, DESCA, WORK, 1, 1, $ DESCW ) CALL PDGEMM( 'No transpose', 'Transpose', IB, N, N, ONE, WORK, $ 1, 1, DESCW, Z, IZ, JZ, DESCZ, ZERO, A, I, JA, $ DESCA ) * DESCW( RSRC_ ) = MOD( DESCW( RSRC_ )+1, NPROW ) * 10 CONTINUE * * Then compute H <- Z * H = Z * H0 * Z**T * CALL DESCSET( DESCW, N, DESCA( NB_ ), DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, LDW ) * DO 20 J = JA, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * CALL PDLACPY( 'All', N, JB, A, IA, J, DESCA, WORK, 1, 1, $ DESCW ) CALL PDGEMM( 'No transpose', 'No transpose', N, JB, N, ONE, Z, $ IZ, JZ, DESCZ, WORK, 1, 1, DESCW, ZERO, A, IA, J, $ DESCA ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Compute H - H0 * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA-1 )*LDA IW = 1 JB = JN - JA + 1 DESCW( CSRC_ ) = IACOL * * Handle first block of columns separately * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PDMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ NP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL PDLASET( 'Lower', MAX( 0, N-2 ), JB, ZERO, ZERO, WORK, $ MIN( IW+2, N ), 1, DESCW ) CALL DMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * DO 30 J = JN + 1, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PDMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PDLASET( 'Lower', MAX( 0, N-IW-1 ), JB, ZERO, ZERO, $ WORK, MIN( N, IW+2 ), 1, DESCW ) CALL DMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) 30 CONTINUE * * Calculate factor residual * FRESID = PDLANGE( 'I', N, N, A, IA, JA, DESCA, WORK ) / $ ( N*EPS*ANORM ) * RETURN * * End PDNEPFCHK * END scalapack-1.8.0/TESTING/EIG/pslasizesqp.f0000640000175000017500000001324610363532303017570 0ustar muammarmuammar SUBROUTINE PSLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESYEV, SIZESYEVD, ISIZESYEVD, $ SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 23, 2000 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEV, $ SIZESYEVX, SIZETMS, SIZETST, $ SIZESYEVD, ISIZESYEVD * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PSLASIZESQP computes the amount of memory needed by * various SEP test routines, as well as PSYEVX and PSSYEV * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PSSYEVX or PSSYEV * * SIZEMQRLEFT LWORK for the 1st PSORMQR call in PSLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PSORMQR call in PSLAGSY * * SIZEQRF LWORK for PSGEQRF in PSLAGSY * * SIZETMS LWORK for PSLATMS * * SIZEQTQ LWORK for PSSEPQTQ (nexer complex) * * SIZECHK LWORK for PSSEPCHK * * SIZESYEVX LWORK for PSSYEVX * * ISIZESYEVX LIWORK for PSSYEVX * * SIZESYEV LWORK for PSSYEV * * SIZESYEVD LWORK for PSSYEVD * * ISIZESYEVD LIWORK for PSSYEVD * * SIZESUBTST LWORK for PSSUBTST * * ISIZESUBTST LIWORK for PSSUBTST * * SIZETST LWORK for PSTST * * ISIZETST LIWORK for PSTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, $ LCM, LCMQ, LDA, LDC, MQ0, MYCOL, MYPCOLC, $ MYPROWC, MYROW, N, NB, NEIG, NN, NNP, NP, $ NPCOLC, NPROWC, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE * .. * .. Executable Statements .. * * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_GRIDEXIT * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * * Allow room for the new context created in PSSYEV * CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROW*NPCOL, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) LDC = MAX( 1, NUMROC( N, NB, MYPROWC, 0, NPROW*NPCOL ) ) SIZESYEV = 5*N + MAX( 2*NP0 + MQ0 + NB*NN , 2*NN-2 ) + N*LDC CALL BLACS_GRIDEXIT( CONTEXTC ) * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NN = MAX( N, NB, 2 ) NNP = 3*N + MAX( NB*( NP+1 ), 3*NB ) SIZESYEVD = MAX( NNP, 1+6*N+2*NP*NQ ) + 2*N ISIZESYEVD = 2+7*N+8*NPCOL * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZESYEV ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PSSYEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pzseptst.f0000640000175000017500000012530110363532303017102 0ustar muammarmuammar* * SUBROUTINE PZSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, $ ORDER, ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, $ NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 15, 2002 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, LRWORK, $ LWORK, MATTYPE, N, NOUT, ORDER DOUBLE PRECISION ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX*16 A( LDA, * ), COPYA( LDA, * ), WORK( * ), $ Z( LDA, * ) * .. * * Purpose * ======= * * PZSEPTST builds a random matrix, runs PZHEEVX() to * compute the eigenvalues * and eigenvectors and then performs two tests to * determine if the result * is good enough. The two tests are: * |AQ -QL| / (abstol + ulp * norm(A) ) * and * |QT * Q - I| / ulp * norm(A) * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) Hermitian matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PZSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) COMPLEX*16 array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PZHEEVX * * COPYA (local workspace) COMPLEX*16 array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * Z (local workspace) COMPLEX*16 array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PZSEPCHK and PZSEPQTQ * * W (local workspace) DOUBLE PRECISION array, dimension (N) * On normal exit from PZHEEVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PZLASIZESEP * * RWORK (local workspace) COMPLEX*16 array, dimension (LWORK) * * LRWORK (local input) INTEGER * The length of the array WORK. LRWORK >= RSIZETST as * returned by PZLASIZESEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PZLASIZESEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0, $ HALF = 0.5D+0 ) COMPLEX*16 PADVAL PARAMETER ( PADVAL = ( 19.25D+0, 1.1D+1 ) ) COMPLEX*16 ZZERO PARAMETER ( ZZERO = ( 0.0D+0, 0.0D+0 ) ) COMPLEX*16 ZONE PARAMETER ( ZONE = ( 1.0D+0, 0.0D+0 ) ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDRWORK, INDWORK, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, ITYPE, IU, J, LHEEVXSIZE, LLRWORK, $ LLWORK, MAXSIZE, MYCOL, MYROW, NB, NGEN, NLOC, $ NNODES, NP, NPCOL, NPROW, NQ, RES, RSIZECHK, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZESUBTST, SIZETMS, SIZETST, VALSIZE, VECSIZE, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, NQ0, NP0, $ LHEEVDSIZE DOUBLE PRECISION ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL LSAME, NUMROC, DLARAN, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, DLABAD, DLASRT, $ IGAMX2D, IGEBR2D, IGEBS2D, PZCHEKPAD, PZELSET, $ PZFILLPAD, PZLASET, PZLASIZEHEEVX, PZLASIZESEP, $ PZLATMS, PZMATGEN, PZSEPSUBTST, SLCOMBINE, $ ZLATMS * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED EVX' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * IF( LRWORK.LT.RSIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDRWORK = INDD + N INDWORK = 1 LLWORK = LWORK - INDWORK + 1 LLRWORK = LRWORK - INDRWORK + 1 * ULP = PDLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PDLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL DLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / DBLE( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log Hermitian, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random Hermitian * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N RWORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PZLASET( 'All', N, N, ZZERO, ZZERO, COPYA, 1, 1, $ DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N RWORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PZLASET( 'All', N, N, ZZERO, ZONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Hermitian, eigenvalues specified * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * Hermitian, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PZMATGEN( DESCA( CTXT_ ), 'H', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PZLASET( 'All', N, N, ZZERO, ZZERO, COPYA, 1, 1, $ DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( DLARAN( ISEED )*DBLE( NLOC ) ), N-NGEN ) * CALL ZLATMS( IN, IN, 'S', ISEED, 'P', RWORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PZELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PZELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PZELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PZELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 RWORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL DLASRT( 'I', N, RWORK( INDD ), IINFO ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PZLASIZEHEEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, RWORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LHEEVXSIZE = MIN( MAXSIZE, LLRWORK ) * CALL PZSEPSUBTST( WKNOWN, 'v', 'a', UPLO, N, VL, VU, IL, IU, $ THRESH, ABSTOL, A, COPYA, Z, 1, 1, DESCA, $ RWORK( INDD ), WIN, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK( INDWORK ), LLWORK, $ RWORK( INDRWORK ), LLRWORK, LHEEVXSIZE, $ IWORK, ISIZEHEEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * res =0 IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LHEEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PZSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF * * Now that PZHEEVX been tested, we check PZHEEVD * PASSED = 'PASSED EEVD' * * PZHEEVD test1: * IF( INFO.EQ.0 ) THEN * NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( MAX( N, 1 ), NB, 0, 0, NPCOL ) LHEEVDSIZE = 1 + 9*N + 3*NP0*NQ0 ISIZEHEEVD = MAX( 1, 2+7*N+8*NPCOL ) * CALL PZSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, COPYA, Z, $ 1, 1, DESCA, WIN, WNEW, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVDSIZE, IWORK, ISIZEHEEVD, RES, TSTNRM, $ QTQNRM, NOUT ) * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( RES.NE.0 ) THEN PASSED = 'FAILED EEVD' INFO = 1 END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 21X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 21X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 32X, $ 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PZSEPDRIVER' ) * * End of PZSEPTST * END scalapack-1.8.0/TESTING/EIG/pdlatran.f0000640000175000017500000001561710363532303017023 0ustar muammarmuammar SUBROUTINE PDLATRAN( N, NB, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, JA, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * * ======= * * PDLATRAN transpose a lower triangular matrix on to the upper * triangular portion of the same matrix. * * This is an auxiliary routine called by PDSYTRD. * * Notes * ===== * * IA must equal 1 * JA must equal 1 * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( RSRC_ ) must equal 1 * DESCA( CSRC_ ) must equal 1 * * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * NB (global input) INTEGER * The number of rows and columns to be transposed with each * message sent. NB has no impact on the result, it is striclty * a performance tuning parameter. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). On entry, the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. On exit, the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is undefined (and may have been modified). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * Must be equal to 1. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * Must be equal to 1. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( ICTXT_ ) must point to a square process grid * i.e. one where NPROW is equal to NPCOL * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * Where: * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND, $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND, $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL, $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB, $ STARTCOL, STARTROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DTRRV2D, DTRSD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Further details * * Because the processor grid is square each process needs only send * data to its transpose process. (Likewsie it need only receive * data from its transpose process.) Because the data decomposition * is cyclic, the local portion of the array is triangular. * * This routine requires that the data be buffered (i.e. copied) * on the sending process (because of the triangular shape) and * unbuffered on the receiving process. Hence, two local memory to * memory copies are performed within the communications routines * followed by a memory to memory copy outside of the communications * routines. It would be nice to avoid having back to back memory * to memory copies (as we do presently on the receiving processor). * This could be done by packaging the data ourselves in the sender * and then unpacking it directly into the matrix. However, this * code seems cleaner and so since this routine is not a significant * performance bottleneck we have left it this way. * * * * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * * IF( MYROW.EQ.MYCOL ) THEN * DO 20 J = 1, NP DO 10 I = J + 1, NQ A( J+( I-1 )*LDA ) = A( I+( J-1 )*LDA ) 10 CONTINUE 20 CONTINUE * ELSE IF( MYROW.GT.MYCOL ) THEN STARTROW = 1 STARTCOL = 2 ELSE IF( MYROW.EQ.MYCOL ) THEN STARTROW = 2 STARTCOL = 2 ELSE STARTROW = 2 STARTCOL = 1 END IF END IF * DO 50 JJ = 1, MAX( NP, NQ ), NB MINJSEND = STARTCOL + JJ - 1 MINJRECV = STARTROW + JJ - 1 MAXJSEND = MIN( MINJSEND+NB-1, NQ ) MAXJRECV = MIN( MINJRECV+NB-1, NP ) * SENDNB = MAXJSEND - MINJSEND + 1 RECVNB = MAXJRECV - MINJRECV + 1 * MINISEND = 1 MINIRECV = 1 MAXISEND = MIN( NP, JJ+SENDNB-1 ) MAXIRECV = MIN( NQ, JJ+RECVNB-1 ) * ISEND = MAXISEND - MINISEND + 1 IRECV = MAXIRECV - MINIRECV + 1 JSEND = MAXJSEND - MINJSEND + 1 JRECV = MAXJRECV - MINJRECV + 1 * * * DO 40 J = MINJRECV, MAXJRECV DO 30 I = MINIRECV, MAXIRECV + J - MAXJRECV WORK( I+( J-MINJRECV )*IRECV ) = A( J+( I-1 )*LDA ) 30 CONTINUE 40 CONTINUE * IF( IRECV.GT.0 .AND. JRECV.GT.0 ) $ CALL DTRSD2D( ICTXT, 'U', 'N', IRECV, JRECV, WORK, IRECV, $ MYCOL, MYROW ) * IF( ISEND.GT.0 .AND. JSEND.GT.0 ) $ CALL DTRRV2D( ICTXT, 'U', 'N', ISEND, JSEND, $ A( MINISEND+( MINJSEND-1 )*LDA ), LDA, $ MYCOL, MYROW ) * * 50 CONTINUE * END IF * RETURN * * End of PDLATRD * END scalapack-1.8.0/TESTING/EIG/psgsepreq.f0000640000175000017500000002435410602576752017241 0ustar muammarmuammar* * SUBROUTINE PSGSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL MEM( MEMSIZE ) * .. * * Purpose * ======= * * PSGSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PSGSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) REAL ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL FIVE PARAMETER ( FIVE = 5.0E+0 ) INTEGER REALSZ, INTGSZ PARAMETER ( REALSZ = 4, INTGSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IBTYPE, IMIDPAD, INITCON, $ IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, LDA, LLWORK, MATSIZE, MATTYPE, MYCOL, $ MYROW, N, NB, NIBTYPES, NMATSIZES, NMATTYPES, $ NNODES, NP, NPCOL, NPCONFIGS, NPROW, NQ, $ NUPLOS, ORDER, PCONFIG, PTRA, PTRB, PTRCOPYA, $ PTRCOPYB, PTRGAP, PTRICLUS, PTRIFAIL, PTRIWRK, $ PTRW, PTRW2, PTRWORK, PTRZ, RES, SIZECHK, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, $ SIZESUBTST, SIZESYEVX, SIZETMS, SIZETST, UPLO REAL ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PSGSEPTST, PSLASIZEGSEP, PSSEPINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PSSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * Thresholds must be bigger for the generalized problem. * THRESH = THRESH*FIVE * DO 50 MATSIZE = 1, NMATSIZES * DO 40 PCONFIG = 1, NPCONFIGS * DO 30 MATTYPE = 1, NMATTYPES * DO 20 UPLO = 1, NUPLOS IF( LSAME( SUBTESTS, 'Y' ) ) THEN NIBTYPES = 3 ELSE NIBTYPES = 1 END IF DO 10 IBTYPE = 1, NIBTYPES * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, $ NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, $ MYROW, MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PSLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, $ ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, $ ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYB = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRB = PTRCOPYB + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRB + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRGAP = PTRW2 + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + NPROW*NPCOL + IPREPAD + $ IPOSTPAD PTRICLUS = PTRIFAIL + $ ICEIL( N+IPREPAD+IPOSTPAD, $ REALSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, REALSZ / INTGSZ ) PTRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, REALSZ / INTGSZ ) LLWORK = MEMSIZE - PTRWORK - IPOSTPAD - $ IPREPAD + 1 NTESTS = NTESTS + 1 IF( LLWORK.LT.SIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PSGSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), $ IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, $ MEM( PTRA ), $ MEM( PTRCOPYA ), $ MEM( PTRB ), $ MEM( PTRCOPYB ), $ MEM( PTRZ ), LDA, $ MEM( PTRW ), MEM( PTRW2 ), $ MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ LLWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * ) $ ' pSGSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE 50 CONTINUE END IF * * RETURN * * End of PSDGSEPREQ * END scalapack-1.8.0/TESTING/EIG/Makefile0000640000175000017500000002546610363532303016512 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Eigenroutine Testing Makefile # # Creation date: February 20, 2000 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc shrdexe = $(TESTINGdir)/xshrd dhrdexe = $(TESTINGdir)/xdhrd chrdexe = $(TESTINGdir)/xchrd zhrdexe = $(TESTINGdir)/xzhrd strdexe = $(TESTINGdir)/xstrd dtrdexe = $(TESTINGdir)/xdtrd ctrdexe = $(TESTINGdir)/xctrd ztrdexe = $(TESTINGdir)/xztrd sbrdexe = $(TESTINGdir)/xsbrd dbrdexe = $(TESTINGdir)/xdbrd cbrdexe = $(TESTINGdir)/xcbrd zbrdexe = $(TESTINGdir)/xzbrd ssepexe = $(TESTINGdir)/xssep dsepexe = $(TESTINGdir)/xdsep csepexe = $(TESTINGdir)/xcsep zsepexe = $(TESTINGdir)/xzsep sgsepexe = $(TESTINGdir)/xsgsep dgsepexe = $(TESTINGdir)/xdgsep cgsepexe = $(TESTINGdir)/xcgsep zgsepexe = $(TESTINGdir)/xzgsep ssvdexe = $(TESTINGdir)/xssvd dsvdexe = $(TESTINGdir)/xdsvd snepexe = $(TESTINGdir)/xsnep dnepexe = $(TESTINGdir)/xdnep cnepexe = $(TESTINGdir)/xcnep znepexe = $(TESTINGdir)/xznep cevcexe = $(TESTINGdir)/xcevc zevcexe = $(TESTINGdir)/xzevc smatgen = psmatgen.o pmatgeninc.o dmatgen = pdmatgen.o pmatgeninc.o cmatgen = pcmatgen.o pmatgeninc.o zmatgen = pzmatgen.o pmatgeninc.o sbrd = psbrddriver.o psbrdinfo.o psgebdrv.o pslafchk.o $(smatgen) dbrd = pdbrddriver.o pdbrdinfo.o pdgebdrv.o pdlafchk.o $(dmatgen) cbrd = pcbrddriver.o pcbrdinfo.o pcgebdrv.o pclafchk.o $(cmatgen) zbrd = pzbrddriver.o pzbrdinfo.o pzgebdrv.o pzlafchk.o $(zmatgen) shrd = pshrddriver.o pshrdinfo.o psgehdrv.o pslafchk.o $(smatgen) dhrd = pdhrddriver.o pdhrdinfo.o pdgehdrv.o pdlafchk.o $(dmatgen) chrd = pchrddriver.o pchrdinfo.o pcgehdrv.o pclafchk.o $(cmatgen) zhrd = pzhrddriver.o pzhrdinfo.o pzgehdrv.o pzlafchk.o $(zmatgen) strd = pstrddriver.o psttrdtester.o pslatran.o pstrdinfo.o pssytdrv.o \ pslafchk.o xpjlaenv.o $(smatgen) dtrd = pdtrddriver.o pdttrdtester.o pdlatran.o pdtrdinfo.o pdsytdrv.o \ pdlafchk.o xpjlaenv.o $(dmatgen) ctrd = pctrddriver.o pcttrdtester.o pclatran.o pctrdinfo.o pchetdrv.o \ pclafchk.o xpjlaenv.o $(cmatgen) ztrd = pztrddriver.o pzttrdtester.o pzlatran.o pztrdinfo.o pzhetdrv.o \ pzlafchk.o xpjlaenv.o $(zmatgen) ssvd = pssvddriver.o pslagge.o pssvdchk.o pssvdcmp.o pssvdtst.o $(smatgen) dsvd = pdsvddriver.o pdlagge.o pdsvdchk.o pdsvdcmp.o pdsvdtst.o $(dmatgen) ssep = psseptst.o pssepsubtst.o pssepchk.o pssepqtq.o pslagsy.o \ pslatms.o pslasizesep.o pslasizesyevx.o pssepdriver.o \ pssepreq.o pssepinfo.o pslasizesyev.o \ pssqpsubtst.o pslasizesqp.o pssdpsubtst.o $(smatgen) dsep = pdseptst.o pdsepsubtst.o pdsepchk.o pdsepqtq.o pdlagsy.o \ pdlatms.o pdlasizesep.o pdlasizesyevx.o pdsepdriver.o \ pdsepreq.o pdsepinfo.o pdlasizesyev.o \ pdsqpsubtst.o pdlasizesqp.o pdsdpsubtst.o $(dmatgen) csep = pcseptst.o pcsepsubtst.o pcsepchk.o pcsepqtq.o pclagsy.o \ pclatms.o pclasizesep.o pclasizeheevx.o pcsepdriver.o \ pcsepreq.o pssepinfo.o pcsdpsubtst.o $(cmatgen) zsep = pzseptst.o pzsepsubtst.o pzsepchk.o pzsepqtq.o pzlagsy.o \ pzlatms.o pzlasizesep.o pzlasizeheevx.o pzsepdriver.o \ pzsepreq.o pdsepinfo.o pzsdpsubtst.o $(zmatgen) sgsep = psgseptst.o psgsepsubtst.o psgsepchk.o pslagsy.o \ pslatms.o pslasizesyevx.o pslasizegsep.o pslasizesep.o \ psgsepdriver.o psgsepreq.o pssepinfo.o $(smatgen) dgsep = pdgseptst.o pdgsepsubtst.o pdgsepchk.o pdlagsy.o \ pdlatms.o pdlasizesyevx.o pdlasizegsep.o pdlasizesep.o \ pdgsepdriver.o pdgsepreq.o pdsepinfo.o $(dmatgen) cgsep = pcgseptst.o pcgsepsubtst.o pcgsepchk.o pclagsy.o \ pclatms.o pclasizegsep.o pclasizeheevx.o pclasizesep.o \ pcgsepdriver.o pcgsepreq.o pssepinfo.o $(cmatgen) zgsep = pzgseptst.o pzgsepsubtst.o pzgsepchk.o pzlagsy.o \ pzlatms.o pzlasizegsep.o pzlasizeheevx.o pzlasizesep.o \ pzgsepdriver.o pzgsepreq.o pdsepinfo.o $(zmatgen) snep = psnepdriver.o psnepinfo.o psnepfchk.o $(smatgen) dnep = pdnepdriver.o pdnepinfo.o pdnepfchk.o $(dmatgen) cnep = pcnepdriver.o pcnepinfo.o pcnepfchk.o $(cmatgen) znep = pznepdriver.o pznepinfo.o pznepfchk.o $(zmatgen) cevc = pcevcdriver.o pcevcinfo.o pcget22.o $(cmatgen) zevc = pzevcdriver.o pzevcinfo.o pzget22.o $(zmatgen) all : single double complex complex16 single: $(shrdexe) $(strdexe) $(sbrdexe) $(ssepexe) $(sgsepexe) \ $(snepexe) $(ssvdexe) double: $(dhrdexe) $(dtrdexe) $(dbrdexe) $(dsepexe) $(dgsepexe) \ $(dnepexe) $(dsvdexe) complex: $(chrdexe) $(ctrdexe) $(cbrdexe) $(csepexe) $(cgsepexe) \ $(cnepexe) $(cevcexe) complex16: $(zhrdexe) $(ztrdexe) $(zbrdexe) $(zsepexe) $(zgsepexe) \ $(znepexe) $(zevcexe) $(TESTINGdir)/BRD.dat: ../BRD.dat cp ../BRD.dat $(TESTINGdir) $(sbrdexe) : $(SCALAPACKLIB) $(sbrd) $(F77LOADER) $(F77LOADFLAGS) -o $(sbrdexe) $(sbrd) $(LIBS) $(MAKE) $(TESTINGdir)/BRD.dat $(dbrdexe) : $(SCALAPACKLIB) $(dbrd) $(F77LOADER) $(F77LOADFLAGS) -o $(dbrdexe) $(dbrd) $(LIBS) $(MAKE) $(TESTINGdir)/BRD.dat $(cbrdexe) : $(SCALAPACKLIB) $(cbrd) $(F77LOADER) $(F77LOADFLAGS) -o $(cbrdexe) $(cbrd) $(LIBS) $(MAKE) $(TESTINGdir)/BRD.dat $(zbrdexe) : $(SCALAPACKLIB) $(zbrd) $(F77LOADER) $(F77LOADFLAGS) -o $(zbrdexe) $(zbrd) $(LIBS) $(MAKE) $(TESTINGdir)/BRD.dat $(TESTINGdir)/HRD.dat: ../HRD.dat cp ../HRD.dat $(TESTINGdir) $(shrdexe) : $(SCALAPACKLIB) $(shrd) $(F77LOADER) $(F77LOADFLAGS) -o $(shrdexe) $(shrd) $(LIBS) $(MAKE) $(TESTINGdir)/HRD.dat $(dhrdexe) : $(SCALAPACKLIB) $(dhrd) $(F77LOADER) $(F77LOADFLAGS) -o $(dhrdexe) $(dhrd) $(LIBS) $(MAKE) $(TESTINGdir)/HRD.dat $(chrdexe) : $(SCALAPACKLIB) $(chrd) $(F77LOADER) $(F77LOADFLAGS) -o $(chrdexe) $(chrd) $(LIBS) $(MAKE) $(TESTINGdir)/HRD.dat $(zhrdexe) : $(SCALAPACKLIB) $(zhrd) $(F77LOADER) $(F77LOADFLAGS) -o $(zhrdexe) $(zhrd) $(LIBS) $(MAKE) $(TESTINGdir)/HRD.dat $(TESTINGdir)/TRD.dat: ../TRD.dat cp ../TRD.dat $(TESTINGdir) $(strdexe) : $(SCALAPACKLIB) $(strd) $(F77LOADER) $(F77LOADFLAGS) -o $(strdexe) $(strd) $(LIBS) $(MAKE) $(TESTINGdir)/TRD.dat $(dtrdexe) : $(SCALAPACKLIB) $(dtrd) $(F77LOADER) $(F77LOADFLAGS) -o $(dtrdexe) $(dtrd) $(LIBS) $(MAKE) $(TESTINGdir)/TRD.dat $(ctrdexe) : $(SCALAPACKLIB) $(ctrd) $(F77LOADER) $(F77LOADFLAGS) -o $(ctrdexe) $(ctrd) $(LIBS) $(MAKE) $(TESTINGdir)/TRD.dat $(ztrdexe) : $(SCALAPACKLIB) $(ztrd) $(F77LOADER) $(F77LOADFLAGS) -o $(ztrdexe) $(ztrd) $(LIBS) $(MAKE) $(TESTINGdir)/TRD.dat $(TESTINGdir)/SVD.dat: ../SVD.dat cp ../SVD.dat $(TESTINGdir) $(ssvdexe) : $(SCALAPACKLIB) $(ssvd) $(F77LOADER) $(F77LOADFLAGS) -o $(ssvdexe) $(ssvd) $(RLIBS) $(MAKE) $(TESTINGdir)/SVD.dat $(dsvdexe) : $(SCALAPACKLIB) $(dsvd) $(F77LOADER) $(F77LOADFLAGS) -o $(dsvdexe) $(dsvd) $(RLIBS) $(MAKE) $(TESTINGdir)/SVD.dat $(TESTINGdir)/SEP.dat: ../SEP.dat cp ../SEP.dat $(TESTINGdir) $(ssepexe) : $(SCALAPACKLIB) $(ssep) $(F77LOADER) $(F77LOADFLAGS) -o $(ssepexe) $(ssep) $(RLIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(dsepexe) : $(SCALAPACKLIB) $(dsep) $(F77LOADER) $(F77LOADFLAGS) -o $(dsepexe) $(dsep) $(RLIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(csepexe) : $(SCALAPACKLIB) $(csep) $(F77LOADER) $(F77LOADFLAGS) -o $(csepexe) $(csep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(zsepexe) : $(SCALAPACKLIB) $(zsep) $(F77LOADER) $(F77LOADFLAGS) -o $(zsepexe) $(zsep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(sgsepexe) : $(SCALAPACKLIB) $(sgsep) $(F77LOADER) $(F77LOADFLAGS) -o $(sgsepexe) $(sgsep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(dgsepexe) : $(SCALAPACKLIB) $(dgsep) $(F77LOADER) $(F77LOADFLAGS) -o $(dgsepexe) $(dgsep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(cgsepexe) : $(SCALAPACKLIB) $(cgsep) $(F77LOADER) $(F77LOADFLAGS) -o $(cgsepexe) $(cgsep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(zgsepexe) : $(SCALAPACKLIB) $(zgsep) $(F77LOADER) $(F77LOADFLAGS) -o $(zgsepexe) $(zgsep) $(LIBS) $(MAKE) $(TESTINGdir)/SEP.dat $(TESTINGdir)/NEP.dat: ../NEP.dat cp ../NEP.dat $(TESTINGdir) $(snepexe) : $(SCALAPACKLIB) $(snep) $(F77LOADER) $(F77LOADFLAGS) -o $(snepexe) $(snep) $(LIBS) $(MAKE) $(TESTINGdir)/NEP.dat $(dnepexe) : $(SCALAPACKLIB) $(dnep) $(F77LOADER) $(F77LOADFLAGS) -o $(dnepexe) $(dnep) $(LIBS) $(MAKE) $(TESTINGdir)/NEP.dat $(cnepexe) : $(SCALAPACKLIB) $(cnep) $(F77LOADER) $(F77LOADFLAGS) -o $(cnepexe) $(cnep) $(LIBS) $(MAKE) $(TESTINGdir)/NEP.dat $(znepexe) : $(SCALAPACKLIB) $(znep) $(F77LOADER) $(F77LOADFLAGS) -o $(znepexe) $(znep) $(LIBS) $(MAKE) $(TESTINGdir)/NEP.dat $(TESTINGdir)/EVC.dat: ../EVC.dat cp ../EVC.dat $(TESTINGdir) $(cevcexe) : $(SCALAPACKLIB) $(cevc) $(F77LOADER) $(F77LOADFLAGS) -o $(cevcexe) $(cevc) $(LIBS) $(MAKE) $(TESTINGdir)/EVC.dat $(zevcexe) : $(SCALAPACKLIB) $(zevc) $(F77LOADER) $(F77LOADFLAGS) -o $(zevcexe) $(zevc) $(LIBS) $(MAKE) $(TESTINGdir)/EVC.dat $(sbrd): $(FRC) $(dbrd): $(FRC) $(cbrd): $(FRC) $(zbrd): $(FRC) $(shrd): $(FRC) $(dhrd): $(FRC) $(chrd): $(FRC) $(zhrd): $(FRC) $(strd): $(FRC) $(dtrd): $(FRC) $(ctrd): $(FRC) $(ztrd): $(FRC) $(ssvd): $(FRC) $(dsvd): $(FRC) $(ssep): $(FRC) $(dsep): $(FRC) $(csep): $(FRC) $(zsep): $(FRC) $(sgsep): $(FRC) $(dgsep): $(FRC) $(cgsep): $(FRC) $(zgsep): $(FRC) $(snep): $(FRC) $(dnep): $(FRC) $(cnep): $(FRC) $(znep): $(FRC) $(cevc): $(FRC) $(zevc): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o psbrddriver.o: psbrddriver.f $(F77) $(DRVOPTS) -c $< pdbrddriver.o: pdbrddriver.f $(F77) $(DRVOPTS) -c $< pcbrddriver.o: pcbrddriver.f $(F77) $(DRVOPTS) -c $< pzbrddriver.o: pzbrddriver.f $(F77) $(DRVOPTS) -c $< pshrddriver.o: pshrddriver.f $(F77) $(DRVOPTS) -c $< pdhrddriver.o: pdhrddriver.f $(F77) $(DRVOPTS) -c $< pchrddriver.o: pchrddriver.f $(F77) $(DRVOPTS) -c $< pzhrddriver.o: pzhrddriver.f $(F77) $(DRVOPTS) -c $< pstrddriver.o: pstrddriver.f $(F77) $(DRVOPTS) -c $< pdtrddriver.o: pdtrddriver.f $(F77) $(DRVOPTS) -c $< pctrddriver.o: pctrddriver.f $(F77) $(DRVOPTS) -c $< pztrddriver.o: pztrddriver.f $(F77) $(DRVOPTS) -c $< pssvddriver.o: pssvddriver.f $(F77) $(DRVOPTS) -c $< pdsvddriver.o: pdsvddriver.f $(F77) $(DRVOPTS) -c $< pssepdriver.o: pssepdriver.f $(F77) $(DRVOPTS) -c $< pdsepdriver.o: pdsepdriver.f $(F77) $(DRVOPTS) -c $< pcsepdriver.o: pcsepdriver.f $(F77) $(DRVOPTS) -c $< pzsepdriver.o: pzsepdriver.f $(F77) $(DRVOPTS) -c $< psgsepdriver.o: psgsepdriver.f $(F77) $(DRVOPTS) -c $< pdgsepdriver.o: pdgsepdriver.f $(F77) $(DRVOPTS) -c $< pcgsepdriver.o: pcgsepdriver.f $(F77) $(DRVOPTS) -c $< pzgsepdriver.o: pzgsepdriver.f $(F77) $(DRVOPTS) -c $< psnepdriver.o: psnepdriver.f $(F77) $(DRVOPTS) -c $< pdnepdriver.o: pdnepdriver.f $(F77) $(DRVOPTS) -c $< pcnepdriver.o: pcnepdriver.f $(F77) $(DRVOPTS) -c $< pznepdriver.o: pznepdriver.f $(F77) $(DRVOPTS) -c $< pcevcdriver.o: pcevcdriver.f $(F77) $(DRVOPTS) -c $< pzevcdriver.o: pzevcdriver.f $(F77) $(DRVOPTS) -c $< .f.o : ; $(F77) -c $(F77FLAGS) $*.f scalapack-1.8.0/TESTING/EIG/pdlasizesqp.f0000640000175000017500000001324710363532303017552 0ustar muammarmuammar SUBROUTINE PDLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESYEV, SIZESYEVD, ISIZESYEVD, $ SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 23, 2000 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVD, $ ISIZESYEVX, ISIZETST, SIZECHK, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, SIZESUBTST, $ SIZESYEV, SIZESYEVD, SIZESYEVX, SIZETMS, $ SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PDLASIZESQP computes the amount of memory needed by * various SEP test routines, as well as PDYEVX and PDSYEV * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PDSYEVX or PDSYEV * * SIZEMQRLEFT LWORK for the 1st PDORMQR call in PDLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PDORMQR call in PDLAGSY * * SIZEQRF LWORK for PDGEQRF in PDLAGSY * * SIZETMS LWORK for PDLATMS * * SIZEQTQ LWORK for PDSEPQTQ (nexer complex) * * SIZECHK LWORK for PDSEPCHK * * SIZESYEVX LWORK for PDSYEVX * * ISIZESYEVX LIWORK for PDSYEVX * * SIZESYEV LWORK for PDSYEV * * SIZESYEVD LWORK for PSSYEVD * * ISIZESYEVD LIWORK for PSSYEVD * * SIZESUBTST LWORK for PDSUBTST * * ISIZESUBTST LIWORK for PDSUBTST * * SIZETST LWORK for PDTST * * ISIZETST LIWORK for PDTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, $ LCM, LCMQ, LDA, LDC, MQ0, MYCOL, MYPCOLC, $ MYPROWC, MYROW, N, NB, NEIG, NN, NNP, NP, $ NPCOLC, NPROWC, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE * .. * .. Executable Statements .. * * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_GRIDEXIT * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * * Allow room for the new context created in PDSYEV * CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROW*NPCOL, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) LDC = MAX( 1, NUMROC( N, NB, MYPROWC, 0, NPROW*NPCOL ) ) SIZESYEV = 5*N + MAX( 2*NP0 + MQ0 + NB*NN , 2*NN-2 ) + N*LDC CALL BLACS_GRIDEXIT( CONTEXTC ) * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NN = MAX( N, NB, 2 ) NNP = 3*N + MAX( NB*( NP+1 ), 3*NB ) SIZESYEVD = MAX( NNP, 1+6*N+2*NP*NQ ) + 2*N ISIZESYEVD = 2+7*N+8*NPCOL * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZESYEV ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PDSYEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pcsepdriver.f0000640000175000017500000002356310363532303017543 0ustar muammarmuammar* * PROGRAM PCSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel COMPLEX Hermitian eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * COMPLEX words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PCHEEVX, the expert driver for the parallel * Hermitian eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, CPLXSZ, NIN PARAMETER ( TOTMEM = 2000000, CPLXSZ = 8, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / CPLXSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Functions .. REAL SLAMCH EXTERNAL SLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PCSEPREQ, PSLACHKIEEE, PSLASNBT * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PSLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PSLACHKIEEE( ISIEEE, SLAMCH( 'O' ), SLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Hermitian Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'Hermitian eigenvalue routine: PCHEEVX.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 )' ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )' ||Q^T*Q - I|| ' // '/ (N * eps)' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pCSEPtst.f).' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pCSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )'QTQ : ||Q^T*Q - I||/ (N * eps)' WRITE( NOUT, FMT = 9999 ) $ ' : when the adjusted QTQ exceeds THRESH', $ ' the adjusted QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' : otherwise the true QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ 'If NT>1, CHK and QTQ are the max over all ' // $ 'eigen request tests' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PCSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP SUB WALL CPU ', $ ' CHK QTQ CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- --- -------- --------', $ ' --------- --------- -----' ) * * End of PCSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pzsepqtq.f0000640000175000017500000002541710363532303017104 0ustar muammarmuammar* * SUBROUTINE PZSEPQTQ( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, $ QTQNRM, INFO, RES ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES DOUBLE PRECISION QTQNRM, THRESH * .. * .. Array Arguments .. * INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ), $ PROCDIST( * ) DOUBLE PRECISION GAP( * ), WORK( * ) COMPLEX*16 C( * ), Q( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * Compute |I - QT * Q| / (ulp * n) * * Arguments * ========= * * NP = number of local rows in C * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in Q * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) COMPLEX*16 array, * global dimension (MS, NV), local dimension (LDQ, NQ) * * Contains the eigenvectors as computed by PZSTEIN * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) COMPLEX*16 array, * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ) * * Accumulator for computing I - QT * Q * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (input) DOUBLE PRECISION array, dimension (NV) * All procesors have an identical copy of W() * * Contains the computed eigenvalues * * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1) * Identifies which eigenvectors are the last to be computed * by a given process * * ICLUSTR (global input) INTEGER array dimension (2*P) * This input array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace. * This should be the output of PZSTEIN. * * GAP (global input) DOUBLE PRECISION array, dimension (P) * This input array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * Where: * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / EPS * * RES (global output) INTEGER * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE, NEGONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, $ NEGONE = -1.0D+0 ) * .. * .. Intrinsic Functions .. * INTRINSIC DBLE, DCMPLX, MAX * .. * .. Local Scalars .. INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN, $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW DOUBLE PRECISION NORM, QTQNRM2, ULP * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL NUMROC, PDLAMCH, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, PZGEMM, $ PZLASET, PZMATADD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * RES = 0 ULP = PDLAMCH( DESCC( CTXT_ ), 'P' ) * CALL BLACS_GRIDINFO( DESCC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IQ, JQ, DESCQ, 7, INFO ) CALL CHK1MAT( NV, 1, MS, 2, IC, JC, DESCC, 11, INFO ) * IF( INFO.EQ.0 ) THEN NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * LWMIN = 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * IF( IQ.NE.1 ) THEN INFO = -5 ELSE IF( JQ.NE.1 ) THEN INFO = -6 ELSE IF( IC.NE.1 ) THEN INFO = -9 ELSE IF( JC.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -16 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCC( CTXT_ ), 'PZSEPQTQ', -INFO ) RETURN END IF * * C = Identity matrix * CALL PZLASET( 'A', NV, NV, ZERO, ONE, C, IC, JC, DESCC ) * * C = C - QT * Q * IF( NV*MS.GT.0 ) THEN CALL PZGEMM( 'Conjugate transpose', 'N', NV, NV, MS, NEGONE, Q, $ 1, 1, DESCQ, Q, 1, 1, DESCQ, ONE, C, 1, 1, DESCC ) END IF * * Allow for poorly orthogonalized eigenvectors for large clusters * NORM = PZLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) QTQNRM = NORM / ( DBLE( MAX( MS, 1 ) )*ULP ) * CLUSTER = 1 10 CONTINUE DO 20 FIRSTP = 1, NPROW*NPCOL IF( PROCDIST( FIRSTP ).GE.ICLUSTR( 2*( CLUSTER-1 )+1 ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE * IMIN = ICLUSTR( 2*CLUSTER-1 ) JMAX = ICLUSTR( 2*CLUSTER ) * * IF( IMIN.EQ.0 ) $ GO TO 60 * DO 40 NEXTP = FIRSTP, NPROW*NPCOL IMAX = PROCDIST( NEXTP ) JMIN = IMAX + 1 * * CALL PZMATADD( IMAX-IMIN+1, JMAX-JMIN+1, ZERO, C, IMIN, JMIN, $ DESCC, DCMPLX( GAP( CLUSTER ) / 0.01D+0 ), C, $ IMIN, JMIN, DESCC ) CALL PZMATADD( JMAX-JMIN+1, IMAX-IMIN+1, ZERO, C, JMIN, IMIN, $ DESCC, DCMPLX( GAP( CLUSTER ) / 0.01D+0 ), C, $ JMIN, IMIN, DESCC ) IMIN = IMAX * IF( ICLUSTR( 2*CLUSTER ).LT.PROCDIST( NEXTP+1 ) ) $ GO TO 50 40 CONTINUE 50 CONTINUE * CLUSTER = CLUSTER + 1 GO TO 10 60 CONTINUE * * Compute the norm of C * NORM = PZLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) * QTQNRM2 = NORM / ( DBLE( MAX( MS, 1 ) )*ULP ) * IF( QTQNRM2.GT.THRESH ) THEN RES = 1 QTQNRM = QTQNRM2 END IF RETURN * * End of PZSEPQTQ * END scalapack-1.8.0/TESTING/EIG/pcsepreq.f0000640000175000017500000002314510363532303017033 0ustar muammarmuammar* * SUBROUTINE PCSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX MEM( MEMSIZE ) * .. * * Purpose * ======= * * PCSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PCSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) COMPLEX ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, INTGSZ PARAMETER ( CPLXSZ = 8, INTGSZ = 4 ) INTEGER REALSZ PARAMETER ( REALSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IMIDPAD, INITCON, IPOSTPAD, $ IPREPAD, ISIZEHEEVX, ISIZESUBTST, ISIZETST, $ LDA, LLRWORK, MATSIZE, MATTYPE, MYCOL, MYROW, $ N, NB, NMATSIZES, NMATTYPES, NNODES, NP, NPCOL, $ NPCONFIGS, NPROW, NQ, NUPLOS, ORDER, PCONFIG, $ PTRA, PTRCOPYA, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRRWORK, PTRW, PTRW2, PTRWORK, PTRZ, $ RES, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, UPLO,SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD REAL ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PCLASIZESEP, PCSEPTST, PSSEPINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PSSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * DO 40 MATSIZE = 1, NMATSIZES * DO 30 PCONFIG = 1, NPCONFIGS * DO 20 MATTYPE = 1, NMATTYPES * DO 10 UPLO = 1, NUPLOS * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, $ MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, $ RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, CPLXSZ / REALSZ ) PTRWORK = PTRW2 + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, CPLXSZ / REALSZ ) PTRGAP = PTRWORK + SIZETST + IPREPAD + IPOSTPAD PTRIFAIL = PTRGAP + ICEIL( NPROW*NPCOL+IPREPAD+ $ IPOSTPAD, CPLXSZ / REALSZ ) PTRICLUS = PTRIFAIL + ICEIL( N+IPREPAD+IPOSTPAD, $ CPLXSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, CPLXSZ / INTGSZ ) PTRRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, CPLXSZ / INTGSZ ) LLRWORK = ( MEMSIZE-PTRRWORK+1 )*CPLXSZ / REALSZ C LLRWORK = ( MEMSIZE-PTRRWORK-IPREPAD-IPOSTPAD+1) C $ *CPLXSZ / REALSZ NTESTS = NTESTS + 1 IF( LLRWORK.LT.RSIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PCSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), SUBTESTS, $ THRESH, N, ABSTOL, ISEED, $ MEM( PTRA ), MEM( PTRCOPYA ), $ MEM( PTRZ ), LDA, MEM( PTRW ), $ MEM( PTRW2 ), MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ SIZETST, MEM( PTRRWORK ), $ LLRWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT=*)'pCSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE END IF * * RETURN * * End of PCDSEPREQ * END scalapack-1.8.0/TESTING/EIG/pdsvdtst.f0000640000175000017500000005613010363532303017064 0ustar muammarmuammar SUBROUTINE PDSVDTST( M, N, NPROW, NPCOL, NB, ISEED, THRESH, WORK, $ RESULT, LWORK, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LWORK, M, N, NB, NOUT, NPCOL, NPROW DOUBLE PRECISION THRESH * .. * .. Array Arguments .. INTEGER ISEED( 4 ), RESULT( 9 ) DOUBLE PRECISION WORK( * ) * .. * * Purpose * ======= * * PDSVDTST checks the singular value decomposition (SVD) routine * PDGESVD. PDGESVD factors A = U diag(S) VT, where U and VT are * orthogonal and diag(S) is diagonal with the entries of the array * S on its diagonal. The entries of S are the singular values, stored * in decreasing order. U and VT can be optionally not computed, * computed and overwritten on A, or computed partially. * * A is M by N. Let SIZE = min( M, N ). S has dimension SIZE by SIZE. * U is M by SIZE and VT is SIZE by N. PDGESVD optionally calculates * U and VT, depending on the values of its parameters JOBU and JOBVT. * There are four possible combinations of "job" parameters for a call * to PDGESVD, that correspond to four values of internal index JOBTYPE. * The table below shows the mapping between "job" parameters of * PDGESVD and respective values of the index JOBTYPE together * with matrices computed for each type of the job. * * * | JOBU = 'V' | JOBU = 'N' * ---------- ------------------------------------------- * JOBVT = 'V'| JOBTYPE = 1 | JOBTYPE = 3 * | U1, S1, VT1 | S3, VT3 * ---------- ------------------------------------------ * JOBVT = 'N'| JOBTYPE = 2 | JOBTYPE = 4 * | U2, S2 | S4 * * * When PDSVDTST is called, a number of matrix "types" are specified. * For each type of matrix, and for the minimal workspace as well as * for larger than minimal workspace an M x N matrix "A" with known * singular values is generated and used to test the SVD routines. * For each matrix, A will be factored as A = U diag(S) VT and the * following 9 tests computed: * * (1) | A - U1 diag(S1) VT1 | / ( |A| max(M,N) ulp ) * * (2) | I - U1'U1 | / ( M ulp ) * * (3) | I - VT1 VT1' | / ( N ulp ), * * (4) S1 contains SIZE nonnegative values in decreasing order. * (Return 0 if true, 1/ULP if false.) * * (5) | S1 - S2 | / ( SIZE ulp |S| ) * * (6) | U1 - U2 | / ( M ulp ) * * (7) | S1 - S3 | / ( SIZE ulp |S| ) * * (8) | VT1 - VT3 | / ( N ulp ) * * (9) | S1 - S4 | / ( SIZE ulp |S| ) * * Currently, the list of possible matrix types is: * * (1) The zero matrix. * * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP. * (ULP = (first number larger than 1) - 1 ) * * (4) A matrix of the form U D VT, where U, VT are orthogonal and * D has evenly spaced entries 1, ..., ULP. * * (5) Same as (4), but multiplied by SQRT( overflow threshold ) * * (6) Same as (4), but multiplied by SQRT( underflow threshold ) * * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========== * * M (global input) INTEGER dimension * The value of the matrix row dimension. * * N (global input) INTEGER dimension * The value of the matrix column dimension. * * NPROW (global input) INTEGER * Number of process rows * * NPCOL (global input) INTEGER * Number of process columns * * NB (global input) INTEGER * The block size of the matrix A. NB >=1. * * ISEED (global input/local output) INTEGER array, dimension (4) * On entry, the seed of the random number generator. The array * elements should be between 0 and 4095; if not they will be * reduced mod 4096. Also, ISEED(4) must be odd. * On exit, ISEED is changed and can be used in the next call to * SDRVBD to continue the same random number sequence. * * THRESH (global input) DOUBLE PRECISION * The threshold value for the test ratios. A result is * included in the output file if RESULT >= THRESH. The test * ratios are scaled to be O(1), so THRESH should be a small * multiple of 1, e.g., 10 or 100. To have every test ratio * printed, use THRESH = 0. * * RESULT (global input/output) INTEGER array of dimension 9. Initially * RESULT( I ) = 0. On the output, RESULT ( I ) = 1 if test I * ( see above ) wasn't passed. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * Dimension of the array WORK. It is defined as follows * LWORK = 1 + 2*LDA*NQ + 3*SIZE + * MAX(WPDLAGGE, LDU*SIZEQ + LDVT*NQ + MAX(LDU*SIZEQ, LDVT*NQ) * + WPDGESVD + MAX( WPDSVDCHK, WPDSVDCMP)), * where WPDLAGGE, WPDGESVD, WPDSVDCHK, WPDSVDCMP are amounts * of workspace required respectively by PDLAGGE, PDGESVD, * PDSVDCHK, PDSVDCMP. * Here * LDA = NUMROC( M, NB, MYROW, 0, NPROW ), LDU = LDA, * LDVT = NUMROC( SIZE, NB, MYROW, 0, NPROW ), * NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ), * SIZEQ = NUMROC( SIZE, NB, MYCOL, 0, NPCOL ). * Values of the variables WPDLAGGE, WPDGESVD, WPDSVDCHK, * WPDSVDCMP are found by "dummy" calls to * the respective routines. In every "dummy" call, variable * LWORK is set to -1, thus causing respective routine * immediately return required workspace in WORK(1) without * executing any calculations * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_, NTYPES PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9, NTYPES = 6 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. CHARACTER HETERO, JOBU, JOBVT INTEGER CONTEXT, DINFO, I, IA, IAM, INFO, ITYPE, IU, $ IVT, JA, JOBTYPE, JU, JVT, LDA, LDU, LDVT, $ LLWORK, LWMIN, MYCOL, MYROW, NNODES, NQ, PASS, $ PTRA, PTRAC, PTRD, PTRWORK, PTRS, PTRSC, PTRU, $ PTRUC, PTRVT, PTRVTC, SETHET, SIZE, SIZEQ, $ WPDGESVD, WPDLAGGE, WPDSVDCHK, WPDSVDCMP DOUBLE PRECISION CHK, DELTA, H, MTM, OVFL, RTOVFL, RTUNFL, ULP, $ UNFL * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ BLACS_SET, $ DESCINIT, DGAMN2D, DGAMX2D, DLABAD, DSCAL, $ IGAMN2D, IGAMX2D, IGEBR2D, IGEBS2D, PDELSET, $ PDGESVD, PDLACPY, PDLAGGE, PDLASET, PDSVDCHK, $ PDSVDCMP, PXERBLA, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL NUMROC, PDLAMCH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCU( DLEN_ ), $ DESCVT( DLEN_ ), ITMP( 2 ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*DTYPE_*LLD_*MB_*M_*NB_*N_*RSRC_.LT.0 ) $ RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * If this process is not a part of the contex, bail out now. * IF( ( MYROW.GE.NPROW ) .OR. ( MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL ) .OR. ( MYCOL.LT.0 ) )GO TO 110 CALL BLACS_SET( CONTEXT, 15, 1 ) INFO = 0 * * Check input parameters. * IF( M.LE.0 ) THEN INFO = -1 ELSE IF( N.LE.0 ) THEN INFO = -2 ELSE IF( NPROW.LE.0 ) THEN INFO = -3 ELSE IF( NPCOL.LE.0 ) THEN INFO = -4 ELSE IF( NB.LE.0 ) THEN INFO = -5 ELSE IF( THRESH.LE.0 ) THEN INFO = -7 END IF * SIZE = MIN( M, N ) * * Initialize matrix descriptors. * IA = 1 JA = 1 IU = 1 JU = 1 IVT = 1 JVT = 1 * LDA = NUMROC( M, NB, MYROW, 0, NPROW ) LDA = MAX( 1, LDA ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) LDU = LDA SIZEQ = NUMROC( SIZE, NB, MYCOL, 0, NPCOL ) LDVT = NUMROC( SIZE, NB, MYROW, 0, NPROW ) LDVT = MAX( 1, LDVT ) CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, CONTEXT, LDA, DINFO ) CALL DESCINIT( DESCU, M, SIZE, NB, NB, 0, 0, CONTEXT, LDU, DINFO ) CALL DESCINIT( DESCVT, SIZE, N, NB, NB, 0, 0, CONTEXT, LDVT, $ DINFO ) * * Set some pointers to work array in order to do "dummy" calls. * PTRA = 2 PTRAC = PTRA + LDA*NQ PTRD = PTRAC + LDA*NQ PTRS = PTRD + SIZE PTRSC = PTRS + SIZE PTRWORK = PTRSC + SIZE * PTRU = PTRWORK PTRVT = PTRWORK PTRUC = PTRWORK PTRVTC = PTRWORK * * "Dummy" calls -- return required workspace in work(1) without * any calculation. * CALL PDLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, DESCA, $ ISEED, SIZE, WORK( PTRWORK ), -1, DINFO ) WPDLAGGE = INT( WORK( PTRWORK ) ) * CALL PDGESVD( 'V', 'V', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRS ), WORK( PTRU ), IU, JU, DESCU, $ WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRWORK ), -1, DINFO ) WPDGESVD = INT( WORK( PTRWORK ) ) * CALL PDSVDCHK( M, N, WORK( PTRAC ), IA, JA, DESCA, WORK( PTRUC ), $ IU, JU, DESCU, WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRS ), THRESH, WORK( PTRWORK ), -1, $ RESULT, CHK, MTM ) WPDSVDCHK = INT( WORK( PTRWORK ) ) * CALL PDSVDCMP( M, N, 1, WORK( PTRS ), WORK( PTRSC ), WORK( PTRU ), $ WORK( PTRUC ), IU, JU, DESCU, WORK( PTRVT ), $ WORK( PTRVTC ), IVT, JVT, DESCVT, THRESH, $ RESULT, DELTA, WORK( PTRWORK ), -1 ) WPDSVDCMP = INT( WORK( PTRWORK ) ) * * Calculation of workspace at last. * LWMIN = 1 + 2*LDA*NQ + 3*SIZE + $ MAX( WPDLAGGE, LDU*SIZEQ+LDVT*NQ+MAX( LDU*SIZEQ, $ LDVT*NQ )+WPDGESVD+MAX( WPDSVDCHK, WPDSVDCMP ) ) WORK( 1 ) = LWMIN * * If this is a "dummy" call, return. * IF( LWORK.EQ.-1 ) $ GO TO 120 IF( INFO.EQ.0 ) THEN IF( LWORK.LT.LWMIN ) THEN INFO = -10 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSVDTST', -INFO ) RETURN END IF * ULP = PDLAMCH( CONTEXT, 'P' ) UNFL = PDLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL DLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF * * Loop over matrix types. * DO 100 ITYPE = 1, NTYPES * PASS = 0 SETHET = 0 PTRWORK = PTRSC + SIZE LLWORK = LWORK - PTRWORK + 1 * * Compute A. * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix. * DO 10 I = 1, SIZE WORK( PTRD+I-1 ) = ZERO 10 CONTINUE * CALL PDLASET( 'All', M, N, ZERO, ZERO, WORK( PTRA ), $ IA, JA, DESCA ) * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix. * DO 20 I = 1, SIZE WORK( PTRD+I-1 ) = ONE 20 CONTINUE * CALL PDLASET( 'All', M, N, ZERO, ONE, WORK( PTRA ), $ IA, JA, DESCA ) * ELSE IF( ITYPE.GT.2 ) THEN * * Preset Singular Values. * IF( SIZE.NE.1 ) THEN H = ( ULP-1 ) / ( SIZE-1 ) DO 30 I = 1, SIZE WORK( PTRD+I-1 ) = 1 + H*( I-1 ) 30 CONTINUE ELSE WORK( PTRD ) = 1 END IF * IF( ITYPE.EQ.3 ) THEN * * Diagonal Matrix with specified singular values. * CALL PDLASET( 'All', M, N, ZERO, ZERO, WORK( PTRA ), $ IA, JA, DESCA ) * DO 40 I = 1, SIZE CALL PDELSET( WORK( PTRA ), I, I, DESCA, $ WORK( PTRD+I-1 ) ) 40 CONTINUE * ELSE IF( ITYPE.EQ.4 ) THEN * * General matrix with specified singular values. * CALL PDLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Singular values scaled by overflow. * CALL DSCAL( SIZE, RTOVFL, WORK( PTRD ), 1 ) * CALL PDLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * ELSE IF( ITYPE.EQ.6 ) THEN * * Singular values scaled by underflow. * CALL DSCAL( SIZE, RTUNFL, WORK( PTRD ), 1 ) CALL PDLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * END IF * END IF * * Set mapping between JOBTYPE and calling parameters of * PDGESVD, reset pointers to WORK array to save space. * DO 80 JOBTYPE = 1, 4 * IF( JOBTYPE.EQ.1 ) THEN JOBU = 'V' JOBVT = 'V' PTRVT = PTRU + LDU*SIZEQ PTRUC = PTRVT + LDVT*NQ PTRWORK = PTRUC + LDU*SIZEQ LLWORK = LWORK - PTRWORK + 1 ELSE IF( JOBTYPE.EQ.2 ) THEN JOBU = 'V' JOBVT = 'N' ELSE IF( JOBTYPE.EQ.3 ) THEN JOBU = 'N' JOBVT = 'V' PTRVTC = PTRUC PTRWORK = PTRVTC + LDVT*NQ LLWORK = LWORK - PTRWORK + 1 ELSE IF( JOBTYPE.EQ.4 ) THEN JOBU = 'N' JOBVT = 'N' PTRWORK = PTRUC LLWORK = LWORK - PTRWORK + 1 END IF * * Duplicate matrix A. * CALL PDLACPY( 'A', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRAC ), IA, JA, DESCA ) * * Test SVD calculation with minimum amount of workspace * calculated earlier. * IF( JOBTYPE.EQ.1 ) THEN * * Run SVD. * CALL SLBOOT CALL BLACS_BARRIER( CONTEXT, 'All' ) CALL SLTIMER( 1 ) * CALL PDGESVD( JOBU, JOBVT, M, N, WORK( PTRAC ), IA, JA, $ DESCA, WORK( PTRS ), WORK( PTRU ), IU, JU, $ DESCU, WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRWORK ), WPDGESVD, INFO ) * CALL SLTIMER( 1 ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 1, 1, CTIME ) * * Check INFO. Different INFO for different processes mean * something went wrong. * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, $ 1, -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), $ 1, 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' GO TO 120 END IF END IF * * If INFO is negative PXERBLA tells you. So the only thing * is to check for positive INFO -- detected heterogeneous * system. * IF( INFO.EQ.( SIZE+1 ) ) THEN HETERO = 'P' SETHET = 1 END IF * * If INFO was fine do more exhaustive check. * IF( INFO.EQ.ZERO ) THEN * DO 50 I = 1, SIZE WORK( I+PTRWORK ) = WORK( I+PTRS-1 ) WORK( I+SIZE+PTRWORK ) = WORK( I+PTRS-1 ) 50 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', SIZE, 1, $ WORK( 1+PTRWORK ), SIZE, 1, 1, -1, -1, $ 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', SIZE, 1, $ WORK( 1+SIZE+PTRWORK ), SIZE, 1, 1, -1, $ -1, 0 ) * DO 60 I = 1, SIZE IF( ABS( WORK( I+PTRWORK )-WORK( SIZE+I+ $ PTRWORK ) ).GT.ZERO ) THEN WRITE( NOUT, FMT = * )'I= ', I, ' MIN=', $ WORK( I+PTRWORK ), ' MAX=', $ WORK( SIZE+I+PTRWORK ) HETERO = 'T' SETHET = 1 GO TO 70 END IF * 60 CONTINUE 70 CONTINUE * END IF * IF( SETHET.NE.1 ) $ HETERO = 'N' * * Need to copy A again since AC was overwritten by PDGESVD. * CALL PDLACPY( 'A', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRAC ), IA, JA, DESCA ) * * PDSVDCHK overwrites U. So before the call to PDSVDCHK * U is copied to UC and a pointer to UC is passed to * PDSVDCHK. * CALL PDLACPY( 'A', M, SIZE, WORK( PTRU ), IU, JU, DESCU, $ WORK( PTRUC ), IU, JU, DESCU ) * * Run tests 1 - 4. * CALL PDSVDCHK( M, N, WORK( PTRAC ), IA, JA, DESCA, $ WORK( PTRUC ), IU, JU, DESCU, $ WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRS ), THRESH, WORK( PTRWORK ), $ LLWORK, RESULT, CHK, MTM ) * ELSE * * Once again test PDGESVD with min workspace. * CALL PDGESVD( JOBU, JOBVT, M, N, WORK( PTRAC ), IA, JA, $ DESCA, WORK( PTRSC ), WORK( PTRUC ), IU, $ JU, DESCU, WORK( PTRVTC ), IVT, JVT, $ DESCVT, WORK( PTRWORK ), WPDGESVD, INFO ) * CALL PDSVDCMP( M, N, JOBTYPE, WORK( PTRS ), $ WORK( PTRSC ), WORK( PTRU ), $ WORK( PTRUC ), IU, JU, DESCU, $ WORK( PTRVT ), WORK( PTRVTC ), IVT, JVT, $ DESCVT, THRESH, RESULT, DELTA, $ WORK( PTRWORK ), LLWORK ) * END IF * 80 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN DO 90 I = 1, 9 IF( RESULT( I ).EQ.1 ) THEN PASS = 1 WRITE( NOUT, FMT = * )'Test I = ', I, 'has failed' WRITE( NOUT, FMT = * )' ' END IF 90 CONTINUE IF( PASS.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Passed', WTIME( 1 ), $ CTIME( 1 ), M, N, NPROW, NPCOL, NB, ITYPE, CHK, MTM, $ DELTA, HETERO END IF END IF 100 CONTINUE CALL BLACS_GRIDEXIT( CONTEXT ) 110 CONTINUE * 9999 FORMAT( A6, 2E10.4, 2I6, 2I4, I5, I6, 3F6.2, 4X, A1 ) 120 CONTINUE * * End of PDSVDTST * RETURN END scalapack-1.8.0/TESTING/EIG/pcseptst.f0000640000175000017500000012522210363532303017055 0ustar muammarmuammar * SUBROUTINE PCSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, $ ORDER, ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, $ NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 15, 2002 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, LRWORK, $ LWORK, MATTYPE, N, NOUT, ORDER REAL ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) REAL GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX A( LDA, * ), COPYA( LDA, * ), WORK( * ), $ Z( LDA, * ) * .. * * Purpose * ======= * * PCSEPTST builds a random matrix, runs PCHEEVX() to * compute the eigenvalues * and eigenvectors and then performs two tests to * determine if the result * is good enough. The two tests are: * |AQ -QL| / (abstol + ulp * norm(A) ) * and * |QT * Q - I| / ulp * norm(A) * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) Hermitian matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PCSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) COMPLEX array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PCHEEVX * * COPYA (local workspace) COMPLEX array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * Z (local workspace) COMPLEX array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PCSEPCHK and PCSEPQTQ * * W (local workspace) REAL array, dimension (N) * On normal exit from PCHEEVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) COMPLEX array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PCLASIZESEP * * RWORK (local workspace) COMPLEX array, dimension (LWORK) * * LRWORK (local input) INTEGER * The length of the array WORK. LRWORK >= RSIZETST as * returned by PCLASIZESEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PCLASIZESEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0, $ HALF = 0.5E+0 ) COMPLEX PADVAL PARAMETER ( PADVAL = ( 19.25E+0, 1.1E+1 ) ) COMPLEX CZERO PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDRWORK, INDWORK, ISIZEHEEVD, ISIZEHEEVX, $ ISIZESUBTST, ISIZETST, ITYPE, IU, J, $ LHEEVDSIZE, LHEEVXSIZE, LLRWORK, LLWORK, $ MAXSIZE, MYCOL, MYROW, NB, NGEN, NLOC, NNODES, $ NP, NP0, NPCOL, NPROW, NQ, NQ0, RES, RSIZECHK, $ RSIZEHEEVD, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, SIZEHEEVD, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, VALSIZE, VECSIZE REAL ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, CLATMS, IGAMX2D, $ IGEBR2D, IGEBS2D, PCCHEKPAD, PCELSET, $ PCFILLPAD, PCLASET, PCLASIZEHEEVX, PCLASIZESEP, $ PCLATMS, PCMATGEN, PCSDPSUBTST, PCSEPSUBTST, $ SLABAD, SLASRT, SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED EEVX' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, SIZESUBTST, $ RSIZESUBTST, ISIZESUBTST, SIZETST, RSIZETST, $ ISIZETST ) * IF( LRWORK.LT.RSIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDRWORK = INDD + N INDWORK = 1 LLWORK = LWORK - INDWORK + 1 LLRWORK = LRWORK - INDRWORK + 1 * ULP = PSLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PSLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL SLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / REAL( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log Hermitian, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random Hermitian * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N RWORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PCLASET( 'All', N, N, CZERO, CZERO, COPYA, 1, 1, $ DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N RWORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PCLASET( 'All', N, N, CZERO, CONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Hermitian, eigenvalues specified * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * Hermitian, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PCMATGEN( DESCA( CTXT_ ), 'H', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PCLASET( 'All', N, N, CZERO, CZERO, COPYA, 1, 1, $ DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( SLARAN( ISEED )*REAL( NLOC ) ), N-NGEN ) * CALL CLATMS( IN, IN, 'S', ISEED, 'P', RWORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PCELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PCELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PCELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PCELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 RWORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL SLASRT( 'I', N, RWORK( INDD ), IINFO ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PCLASIZEHEEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, RWORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LHEEVXSIZE = MIN( MAXSIZE, LLRWORK ) * CALL PCSEPSUBTST( WKNOWN, 'v', 'a', UPLO, N, VL, VU, IL, IU, $ THRESH, ABSTOL, A, COPYA, Z, 1, 1, DESCA, $ RWORK( INDD ), WIN, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK( INDWORK ), LLWORK, $ RWORK( INDRWORK ), LLRWORK, LHEEVXSIZE, $ IWORK, ISIZEHEEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LHEEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PCSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF * * Now that PCHEEVX been tested, we check PCHEEVD * PASSED = 'PASSED EEVD' * * PCHEEVD test1: * IF( INFO.EQ.0 ) THEN * NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( MAX( N, 1 ), NB, 0, 0, NPCOL ) LHEEVDSIZE = 1 + 9*N + 3*NP0*NQ0 ISIZEHEEVD = MAX( 1, 2+7*N+8*NPCOL ) * CALL PCSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, COPYA, Z, $ 1, 1, DESCA, WIN, WNEW, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK, LRWORK, $ LHEEVDSIZE, IWORK, ISIZEHEEVD, RES, TSTNRM, $ QTQNRM, NOUT ) * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM IF( RES.NE.0 ) THEN PASSED = 'FAILED EEVD' INFO = 1 END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 21X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 21X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 32X, $ 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PCSEPDRIVER' ) * * End of PCSEPTST * END scalapack-1.8.0/TESTING/EIG/pclafchk.f0000640000175000017500000002675510363532303016776 0ustar muammarmuammar SUBROUTINE PCLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = (1.0E+0, 0.0E+0) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ II, IIA, IOFFA, IROFF, JB, JJ, JJA, JN, KK, $ LDA, LDW, LDWP1, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CMATADD, INFOG2L, PCMATGEN * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH, PCLANGE EXTERNAL ICEIL, LSAME, NUMROC, PCLANGE, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDWP1 = LDW + 1 LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * IF( LSAME( AFORM, 'H' ) ) THEN * * Handle first block of columns separately * II = 1 ICURROW = IAROW ICURCOL = IACOL JB = JN - JA + 1 * IF( MYCOL.EQ.ICURCOL ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 10, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) ) 10 CONTINUE END IF CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30, JJ = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-JJ, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK, LDW, DESCA( RSRC_ ), DESCA( CSRC_ ), $ IASEED, IIA-1, MP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 20, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = REAL( WORK( II+KK*LDWP1 ) ) 20 CONTINUE END IF CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * ELSE * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 40 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 )*LDA CALL PCMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJ-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL CMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) 40 CONTINUE * END IF * * Calculate factor residual * FRESID = PCLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PCLAFCHK * END scalapack-1.8.0/TESTING/EIG/psgrptseptst.f0000640000175000017500000000530110363532303017765 0ustar muammarmuammar PROGRAM PSRPTGSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * Repeat generalized parallel symmetric eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS, UPLO INTEGER CONTEXT, IAM, IBTYPE, INFO, IPOSTPAD, IPREPAD, $ LDA, MATTYPE, N, NB, NPCOL, NPROCS, NPROW REAL ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) REAL A( MAXN*MAXN ), B( MAXN, MAXN ), $ COPYA( MAXN*MAXN ), COPYB( MAXN, MAXN ), $ GAP( MAXN ), WIN( MAXN ), WNEW( MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PSGSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * These lines should be replaced by the output from pxGSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 IBTYPE = 1 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351E-37 THRESH = .350000E+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PSGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, A, COPYA, B, COPYB, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PSRPTGSEPTST * END scalapack-1.8.0/TESTING/EIG/pzgehdrv.f0000640000175000017500000001776010602576752017064 0ustar muammarmuammar SUBROUTINE PZGEHDRV( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEHDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from the * unitary matrix Q, the Hessenberg matrix, and the array TAU returned * by PZGEHRD: * sub( A ) := Q * H * Q' * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows and columns 1:ILO-1 and IHI+1:N. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) reduced to Hessenberg * form by PZGEHRD. The upper triangle and the first sub- * diagonal of sub( A ) contain the upper Hessenberg matrix H, * and the elements below the first subdiagonal, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors. On exit, the original distributed * N-by-N matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16 array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors returned by * PZGEHRD. TAU is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK). * LWORK >= NB*NB + NB*IHLP + MAX[ NB*( IHLP+INLQ ), * NB*( IHLQ + MAX[ IHIP, * IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, * NPCOL ), NB, 0, 0, LCMQ ) ] ) ] * * where NB = MB_A = NB_A, * LCM is the least common multiple of NPROW and NPCOL, * LCM = ILCM( NPROW, NPCOL ), LCMQ = LCM / NPCOL, * * IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * IHLP = NUMROC( IHI-ILO+IROFFA+1, NB, MYROW, ILROW, NPROW ), * IHLQ = NUMROC( IHI-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ), * INLQ = NUMROC( N-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ). * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IHLP, II, IOFF, IPT, $ IPV, IPW, IV, J, JB, JJ, JL, K, MYCOL, MYROW, $ NB, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PZLARFB, $ PZLARFT, PZLACPY, PZLASET * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * NB = DESCA( MB_ ) IOFF = MOD( IA+ILO-2, NB ) CALL INFOG2L( IA+ILO-1, JA+ILO-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = IPT + NB * NB IPW = IPV + IHLP * NB JL = MAX( ( ( JA+IHI-2 ) / NB ) * NB + 1, JA + ILO - 1 ) CALL DESCSET( DESCV, IHI-ILO+IOFF+1, NB, NB, NB, IAROW, $ INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, IHLP ) ) * DO 10 J = JL, ILO+JA+NB-IOFF-1, -NB JB = MIN( JA+IHI-J-1, NB ) I = IA + J - JA K = I - IA + 1 IV = K - ILO + IOFF + 1 * * Compute upper triangular matrix T from TAU. * CALL PZLARFT( 'Forward', 'Columnwise', IHI-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'All', IHI-K, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', IHI-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-K, N-K+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI, IHI-K, JB, WORK( IPV ), IV+1, $ 1, DESCV, WORK( IPT ), A, IA, J+1, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the first block separately * IV = IOFF + 1 I = IA + ILO - 1 J = JA + ILO - 1 JB = MIN( NB-IOFF, JA+IHI-J-1 ) * * Compute upper triangular matrix T from TAU. * CALL PZLARFT( 'Forward', 'Columnwise', IHI-ILO, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'All', IHI-ILO, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * IF( IHI-ILO.GT.0 ) $ CALL PZLASET( 'Lower', IHI-ILO-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-ILO, N-ILO+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI, IHI-ILO, JB, WORK( IPV ), IV+1, $ 1, DESCV, WORK( IPT ), A, IA, J+1, DESCA, $ WORK( IPW ) ) * RETURN * * End of PZGEHDRV * END scalapack-1.8.0/TESTING/EIG/pclasizesep.f0000640000175000017500000001350610363532303017533 0ustar muammarmuammar SUBROUTINE PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZEHEEVD, RSIZEHEEVD, $ ISIZEHEEVD, SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, RSIZETST, ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZEHEEVD, ISIZEHEEVX, $ ISIZESUBTST, ISIZETST, RSIZECHK, RSIZEHEEVD, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVD, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PCLASIZESEP computes the amount of memory needed by * various SEP test routines, as well as HEEVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PCHEEVX * * SIZEMQRLEFT LWORK for the 1st PCUNMQR call in PCLAGHE * * SIZEMQRRIGHT LWORK for the 2nd PCUNMQR call in PCLAGHE * * SIZEQRF LWORK for PCGEQRF in PCLAGHE * * SIZETMS LWORK for PCLATMS * * RSIZEQTQ LWORK for PCSEPQTQ (nexer complex) * * RSIZECHK LWORK for PCSEPCHK * * SIZEHEEVX LWORK for PCHEEVX * * RSIZEHEEVX LRWORK for PCHEEVX * * ISIZEHEEVX LIWORK for PCHEEVX * * SIZEHEEVD LWORK for PCHEEVD * * RSIZEHEEVD LRWORK for PCHEEVD * * ISIZEHEEVD LIWORK for PCHEEVD * * SIZESUBTST LWORK for PCSUBTST * * RSIZESUBTST LRWORK for PCSUBTST * * ISIZESUBTST LIWORK for PCSUBTST * * SIZETST LWORK for PCTST * * RSIZETST LRWORK for PCTST * * ISIZETST LIWORK for PCTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ANB, CSRC_A, IACOL, IAROW, ICOFFA, ICTXT, $ IROFFA, LCM, LCMQ, LDA, MQ0, MYCOL, MYROW, N, $ NB, NEIG, NHETRD_LWOPT, NN, NNP, NP, NP0, $ NPCOL, NPROW, NPS, NQ, RSRC_A, SIZECHK, $ SIZEQTQ, SQNPC * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, REAL, SQRT * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = 0 RSIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) RSIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZEHEEVX = N + ( NP0+MQ0+NB )*NB RSIZEHEEVX = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZEHEEVX = 6*NNP * ICTXT = DESCA( CTXT_ ) ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( REAL( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS * SIZEHEEVX = MAX( SIZEHEEVX, N+NHETRD_LWOPT ) * SIZEHEEVD = SIZEHEEVX RSIZEHEEVD = 7*N + 3*NP0*MQ0 ISIZEHEEVD = 7*N + 8*NPCOL + 2 SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZEHEEVX, $ SIZEHEEVD ) + IPREPAD + IPOSTPAD RSIZESUBTST = MAX( RSIZEHEEVX, RSIZEHEEVD, RSIZEQTQ, RSIZECHK ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = MAX( ISIZEHEEVX, ISIZEHEEVD ) + IPREPAD + IPOSTPAD * * Allow room for A, COPYA and Z and WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Room for DIAG, WIN, WNEW, GAP and RWORK * RSIZETST = 4*( N+IPREPAD+IPOSTPAD ) + RSIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PCHEEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pdgrptseptst.f0000640000175000017500000000530610363532303017753 0ustar muammarmuammar* * PROGRAM PDRPTGSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Repeat generalized parallel symmetric eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS, UPLO INTEGER CONTEXT, IAM, IBTYPE, INFO, IPOSTPAD, IPREPAD, $ LDA, MATTYPE, N, NB, NPCOL, NPROCS, NPROW DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) DOUBLE PRECISION A( MAXN*MAXN ), B( MAXN, MAXN ), $ COPYA( MAXN*MAXN ), COPYB( MAXN, MAXN ), $ GAP( MAXN ), WIN( MAXN ), WNEW( MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PDGSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * These lines should be replaced by the output from pxGSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 IBTYPE = 1 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351D-37 THRESH = .350000D+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PDGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, A, COPYA, B, COPYB, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PDRPTGSEPTST * END scalapack-1.8.0/TESTING/EIG/pcnepdriver.f0000640000175000017500000005031010363532303017524 0ustar muammarmuammar PROGRAM PCNEPDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * Purpose * ======= * * PCNEPDRIVER is the main test program for the COMPLEX * SCALAPACK NEP routines. This test driver performs a Schur * decomposition followed by residual check of a Hessenberg matrix. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * CPLXSZ INTEGER, default = 8 bytes. * CPLXSZ indicate the length in bytes on the given platform * for a single precision complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * Further Details * =============== * * Contributed by Mark Fahey, March 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, TOTMEM, MEMSIZ, NTESTS PARAMETER ( CPLXSZ = 16, TOTMEM = 200000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20 ) COMPLEX PADVAL, ZERO, ONE PARAMETER ( PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPREPAD, IPW, IPWR, IPC, J, K, KFAIL, $ KPASS, KSKIP, KTESTS, LDA, LDWORK, LDZ, LWORK, $ MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, $ NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH REAL ANORM, FRESID, QRESID, ZNORM DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IDUM( 1 ), $ IERR( 2 ), NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCGEMM, PCLAHQR, PCLASET, PCMATGEN, $ PCNEPFCHK, PCNEPINFO, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH, PCLANGE, PCLANHS EXTERNAL ILCM, NUMROC, PSLAMCH, PCLANGE, PCLANHS * .. * .. Intrinsic Functions .. INTRINSIC REAL, MAX, MIN * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCNEPINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) LDWORK = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAE+1 ) * IPA = IPREPAD + 1 IPC = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPC + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPWR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX( 2*N, ( 8*ILCM( NPROW, NPCOL )+2 )** $ 2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCNEPFCHK and PCLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate matrix Z = In * CALL PCLASET( 'All', N, N, ZERO, ONE, MEM( IPC ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PCMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PCLASET( 'Lower', MAX( 0, N-2 ), MAX( 0, N-2 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 3 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPC-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform NEP factorization * CALL PCLAHQR( .TRUE., .TRUE., N, 1, N, MEM( IPA ), DESCA, $ MEM( IPWR ), 1, N, MEM( IPC ), DESCZ, $ MEM( IPW ), LWORK, IDUM, 0, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PCLAHQR INFO=', INFO KFAIL = KFAIL + 1 GO TO 10 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PCCHEKPAD( ICTXT, 'PCLAHQR (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLAHQR (Z)', NP, NQ, $ MEM( IPC-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLAHQR (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLAHQR (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || Z * H * Z**T - H0 || / ( N*|| H0 ||*EPS ) * CALL PCNEPFCHK( N, MEM( IPA ), 1, 1, DESCA, IASEED, $ MEM( IPC ), 1, 1, DESCZ, ANORM, $ FRESID, MEM( IPW ) ) * CALL PCCHEKPAD( ICTXT, 'PCNEPFCHK (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCNEPFCHK (Z)', NP, NQ, $ MEM( IPC-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCNEPFCHK (WORK)', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || (Z**T)*Z - In ||_1 * CALL PCLASET( 'All', N, N, ZERO, ONE, MEM( IPA ), 1, $ 1, DESCA ) CALL PCGEMM( 'Cong Tran', 'No transpose', N, N, N, $ -ONE, MEM( IPC ), 1, 1, DESCZ, $ MEM( IPC ), 1, 1, DESCZ, ONE, MEM( IPA ), $ 1, 1, DESCA ) ZNORM = PCLANGE( '1', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) QRESID = ZNORM / ( REAL( N )*PSLAMCH( ICTXT, 'P' ) ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0E+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 18 N^3 flops for PxLAHQR * NOPS = 18.0D+0*DBLE( N )**3 * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H - Q*S*Q^T|| / (||H|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Q^T*Q - I|| / ( N * eps ) ', G25.7 ) * STOP * * End of PCNEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pdlagge.f0000640000175000017500000003075310363532303016617 0ustar muammarmuammar SUBROUTINE PDLAGGE( M, N, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION A( * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAGGE generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal * matrices: A = U*D*VT. * * This is just a quick implementation which will be replaced in the * future. The random matrix A1(m,n) is generated and random left * orthogonal matrix U(m,m) is obtained by running QR on A1: * A1(m,n) = U(m,m)*R, * where U(m,m) is a product of min(m,n) Householder rotations. * Afterwards the space of A1 is reused for a second random matrix * A2(m,n), which is used to obtain the right orthogonal matrix VT(n,n) * by running LQ on A2: * A2(m,n) = L*VT(n,n). * This requires vastly more computation than necessary, but not * significantly more communication than is used in the rest of this * routine, and hence is not that much slower than an efficient * solution. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * Number of rows of the matrix A. M >= 0. * * N (global input) INTEGER * Number of columns of matrix A. N >= 0. * * D (local input) DOUBLE PRECISION array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) DOUBLE PRECISION array * Global dimension (M, N), local dimension (MP, NQ) * * IA (global input) INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. On exit, the seed is updated and will remain identical * on all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= MAX( QR_WORK, LQ_WORK ) * QR_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU1 ) + * MAX( SIZEMQRLEFT, SIZEQRF) * LQ_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU2) + * MAX( SIZEMLQRIGHT, SIZEQRF ) * Where: * LDAA = DESCA( LLD_ ) * MB_A = DESCA( MB_ ) * NB_A = DESCA( NB_ ) * RSRC_A = DESCA( RSRC_ ) * CSRC_A = DESCA( CSRC_ ) * LCM = ILCM( NPROW, NPCOL ) * LCMQ = LCM / NPCOL * IROFFA = MOD( IA-1, MB_A ) * ICOFFA = MOD( JA-1, NB_A ) * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * MP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) * NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) * DTAU1 = NUMROC( JA + SIZE- 1, NB_A, MYCOL, IACOL, NPROW ) * DTAU2 = NUMROC( IA + SIZE- 1, MB_A, MYROW, IAROW, NPROW ) * SIZEMQRLEFT = MAX( (MB_A*(MB_A-1))/2, ( MP + NQ ) * MB_A ) * + ( MP + NB_A ) * NB_A * SIZEMLQRIGHT = MAX( (MB_A*(MB_A-1))/2, (MP + NQ)*MB_A ) + * MB_A * MB_A * SIZEQRF = NB_A*NP + MB_A*NQ + NB_A*NB_A * * INFO (local output) INTEGER * * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, DTAU1, DTAU2, I, IACOL, IAROW, ICOFFA, $ IROFFA, LCM, LCMQ, LDAA, LQ_WORK, LWMIN, MB_A, $ MP, MYCOL, MYROW, NB_A, NPCOL, NPROW, NQ, $ PTR2AA, PTR2TAU, PTR2WORK, QR_WORK, RSRC_A, $ SIZE, SIZELQF, SIZEMLQRIGHT, SIZEMQRLEFT, $ SIZEQRF * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PDELSET, PDGELQF, $ PDGEQRF, PDLASET, PDMATGEN, PDORMLQ, PDORMQR, $ PXERBLA * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*DLEN_*DTYPE_*M_*N_.LT.0 )RETURN * * Initialize grid information. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK. * INFO = 0 SIZE = MIN( M, N ) IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 8, INFO ) END IF * Calculation of a minimum workspace. LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) DTAU1 = NUMROC( JA+SIZE-1, NB_A, MYCOL, IACOL, NPCOL ) DTAU2 = NUMROC( IA+SIZE-1, MB_A, MYROW, IAROW, NPROW ) MP = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) * SIZEMQRLEFT = MAX( ( MB_A*( MB_A-1 ) ) / 2, ( MP+NQ )*MB_A ) + $ ( MP+NB_A )*NB_A SIZEMLQRIGHT = MAX( ( MB_A*( MB_A-1 ) ) / 2, ( MP+NQ )*MB_A ) + $ MB_A*MB_A SIZEQRF = NB_A*MP + MB_A*NQ + NB_A*NB_A + 100 SIZELQF = NB_A*( MP+NQ+NB_A ) + 100 * QR_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU1 ) + $ MAX( SIZEMQRLEFT, SIZEQRF ) LQ_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU2 ) + $ MAX( SIZEMLQRIGHT, SIZELQF ) LWMIN = MAX( QR_WORK, LQ_WORK ) WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 20 * * Test the input arguments. * IF( INFO.EQ.0 ) THEN IF( SIZE.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDLAGGE', -INFO ) RETURN END IF * * Build a diagonal matrix A with the eigenvalues specified in D. * CALL PDLASET( 'Full', M, N, ZERO, ZERO, A, IA, JA, DESCA ) DO 10 I = 1, SIZE CALL PDELSET( A, I, I, DESCA, D( I ) ) 10 CONTINUE * * Local dimension of array TAU in tis case is LOCc(JA+MIN(M,N)-1). * PTR2AA = 2 PTR2TAU = PTR2AA + LDAA*MAX( 1, NQ ) + 100 PTR2WORK = PTR2TAU + MAX( 1, DTAU1 ) + 100 * CALL PDLASET( 'All', M, N, ZERO, ZERO, WORK( PTR2AA ), IA, JA, $ DESCA ) * * Build a random matrix AA1. * CALL PDMATGEN( DESCA( CTXT_ ), 'N', 'N', M, N, DESCA( MB_ ), $ DESCA( NB_ ), WORK( PTR2AA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), 0, MP, $ 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Produce QR decomposition AA1 -> U*R. * CALL PDGEQRF( M, N, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), WORK( PTR2WORK ), SIZEQRF, INFO ) * * A = U*A. * CALL PDORMQR( 'L', 'N', M, N, SIZE, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), A, IA, JA, DESCA, WORK( PTR2WORK ), $ SIZEMQRLEFT, INFO ) * * Reinitialize pointer to WORK array. Dimension of array TAU in * this case is LOCr(IA+MIN(M,N)-1). * PTR2WORK = PTR2TAU + MAX( 1, DTAU2 ) + 100 * * Use the same workspace to generate a random matrix AA2. * CALL PDMATGEN( DESCA( CTXT_ ), 'N', 'N', M, N, DESCA( MB_ ), $ DESCA( NB_ ), WORK( PTR2AA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 2 ), 0, MP, $ 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Produce LQ decomposition of random matrix AA2 -> L*VT. * CALL PDGELQF( M, N, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), WORK( PTR2WORK ), SIZELQF, INFO ) * * Calculate A = A*VT. * CALL PDORMLQ( 'R', 'N', M, N, SIZE, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), A, IA, JA, DESCA, WORK( PTR2WORK ), $ SIZEMLQRIGHT, INFO ) * * End of PDLAGGE * 20 CONTINUE RETURN END scalapack-1.8.0/TESTING/EIG/pstrdinfo.f0000640000175000017500000003224110363532303017216 0ustar muammarmuammar SUBROUTINE PSTRDINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY*(*) INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSTRDINFO gets needed startup information for the symmetric * tridiagonal reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL PSLAMCH, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='TRD.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GOTO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GOTO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 4 ) = 1 ELSE WORK( 4 ) = 2 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction Routine to symmetric '// $ 'tridiagonal form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision symmetric '// $ 'tridiagonal' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - QTQ''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether the ''Upper'' or ''Low'// $ 'er'' part of A is to be referenced.' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'TRD time : Time in seconds to reduce the'// $ ' matrix to tridiagonal form.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'symmetric tridiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) IF( WORK( 4 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PSTRDINFO * END scalapack-1.8.0/TESTING/EIG/pdsepdriver.f0000640000175000017500000002526110363532303017541 0ustar muammarmuammar* * PROGRAM PDSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel DOUBLE PRECISION symmetric eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and DBLSIZ * indicate the length in bytes on the given platform for an integer * and a double precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * DOUBLE is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * DOUBLE PRECISION words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PDSYEVX, the expert driver for the parallel * symmetric eigenvalue problem, PDSYEV and PDSYEVD. We would like * to cover all possible combinations of: matrix size, process * configuration (nprow and npcol), block size (nb), * matrix type (??), range of eigenvalue (all, by value, * by position), sorting options, and upper vs. lower storage. * * As PDSYEV returns an error message when heterogeneity is detected, * the PDSYEV tests can be suppressed by changing the appropiate * entry in the input file. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, DBLESZ, NIN PARAMETER ( TOTMEM = 2000000, DBLESZ = 8, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / DBLESZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Functions .. DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PDLACHKIEEE, PDLASNBT, PDSEPREQ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PDLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PDLACHKIEEE( ISIEEE, DLAMCH( 'O' ), DLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK symmetric Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'symmetric eigenvalue routine: PDSYEVX & '// $ ' PDSYEV & PDSYEVD.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 )' ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )' ||Q^T*Q - I|| ' // '/ (N * eps)' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see PDSEPtst.f).' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see PDSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )'QTQ : ||Q^T*Q - I||/ (N * eps)' WRITE( NOUT, FMT = 9999 ) $ ' : when the adjusted QTQ exceeds THRESH', $ ' the adjusted QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' : otherwise the true QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' If NT>1, CHK and QTQ are the max over all ' // $ 'eigen request tests' WRITE( NOUT, FMT = 9999 ) $ 'TEST : EVX - testing PDSYEVX, EV - testing PDSYEV, '// $ 'EVD - testing PDSYEVD' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PDSEPREQ( HETERO, NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * c CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', ' ') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP SUB WALL CPU ', $ ' CHK QTQ CHECK TEST' ) 9978 FORMAT( ' ----- --- --- --- --- --- -------- --------', $ ' --------- --------- ----- ----' ) * * End of PDSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pzsepsubtst.f0000640000175000017500000007274210363532303017626 0ustar muammarmuammar* * SUBROUTINE PZSEPSUBTST( WKNOWN, JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, IA, JA, $ DESCA, WIN, WNEW, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK, LWORK, RWORK, $ LRWORK, LWORK1, IWORK, LIWORK, RESULT, $ TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, IPOSTPAD, IPREPAD, IU, JA, LIWORK, $ LRWORK, LWORK, LWORK1, N, NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX*16 A( * ), COPYA( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZSEPSUBTST calls PZHEEVX and then tests the output of * PZHEEVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PZHEEVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PZSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PZSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PZSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX*16 array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PZHEEVX for a description of block cyclic layout. * The test matrix, which is then modified by PZHEEVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX*16 array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) COMPLEX*16 array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PZSEPCHK and PZSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as copmuted by this call to PZHEEVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PZHEEVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PZHEEVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0 ) COMPLEX*16 ZPADVAL PARAMETER ( ZPADVAL = ( 13.989D+0, 1.93D+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZESUBTST, SIZETMS, SIZETST, VALSIZE, $ VECSIZE, SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD DOUBLE PRECISION EPS, EPSNORMA, ERROR, MAXERROR, MAXVU, $ MINERROR, MINVL, NORMWIN, OLDVL, OLDVU, ORFAC, $ SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PZLANHE EXTERNAL LSAME, NUMROC, PDLAMCH, PZLANHE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGAMN2D, DGAMX2D, $ IGAMN2D, IGAMX2D, PDCHEKPAD, PDFILLPAD, $ PICHEKPAD, PIFILLPAD, PZCHEKPAD, PZELSET, $ PZFILLPAD, PZHEEVX, PZLASIZEHEEVX, PZLASIZESEP, $ PZSEPCHK, PZSEPQTQ, SLBOOT, SLTIMER, ZLACPY * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63D+0, 1.1D+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 40 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 50 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 50 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 160 RESULT = 0 * * * DSEED is not used in this call to PZLASIZEHEEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PZLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL ZLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PZFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, ZPADVAL ) * CALL PZFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, ZPADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PZFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, ZPADVAL+4.1D+0 ) * * Make sure that PZHEEVX does not cheat (i.e. use answers * already computed.) * DO 70 I = 1, N, 1 DO 60 J = 1, MAXEIGS, 1 CALL PZELSET( Z( 1+IPREPAD ), I, J, DESCA, $ ( 13.0D+0, 1.34D+0 ) ) 60 CONTINUE 70 CONTINUE * ORFAC = -1.0D+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PZHEEVX( JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ VL, VU, IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), $ ORFAC, Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), SIZEHEEVX, RWORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, $ IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, ZPADVAL ) * CALL PZCHEKPAD( DESCZ( CTXT_ ), 'PZHEEVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ ZPADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, ZPADVAL+4.1D+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEEVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 80 I = 1, M RWORK( I ) = WNEW( I+IPREPAD ) RWORK( I+M ) = WNEW( I+IPREPAD ) 80 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, RWORK, M, $ 1, 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ RWORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 90 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( RWORK( I )-RWORK( M+ $ I ) ).GT.FIVE*EPS*ABS( RWORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 90 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 100 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 110 NCLUSTERS = NCLUSTERS + 1 100 CONTINUE 110 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 120 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 120 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 130 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 130 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 160 * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PZLANHE( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ RWORK )*EPS END IF * * Note that a couple key variables get redefined in PZSEPCHK * as described by this table: * * PZSEPTST name PZSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PDFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, $ RSIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * CALL PZSEPCHK( N, NZ, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), RWORK( 1+IPREPAD ), $ RSIZECHK, TSTNRM, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZSEPCHK-rWORK', RSIZECHK, $ 1, RWORK, RSIZECHK, IPREPAD, IPOSTPAD, $ 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * * Perform the |QTQ - I| test * CALL PDFILLPAD( DESCA( CTXT_ ), RSIZEQTQ, 1, RWORK, $ RSIZEQTQ, IPREPAD, IPOSTPAD, 4.3D+0 ) * * CALL PZSEPQTQ( N, NZ, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1+IPREPAD+1 ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), RWORK( IPREPAD+1 ), $ RSIZEQTQ, QTQNRM, INFO, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZSEPQTQ-rWORK', RSIZEQTQ, $ 1, RWORK, RSIZEQTQ, IPREPAD, IPOSTPAD, $ 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 150 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 140 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 150 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxHEEVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 160 CONTINUE * * RETURN * 9999 FORMAT( 'PZHEEVX returned INFO=', I7 ) 9998 FORMAT( 'PZSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PZSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PZHEEVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PZHEEVX' ) 9981 FORMAT( 'NZ altered by PZHEEVX with JOBZ=N' ) * * End of PZSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pztrdinfo.f0000640000175000017500000003224710363532303017233 0ustar muammarmuammar SUBROUTINE PZTRDINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 27, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY*(*) INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZTRDINFO gets needed startup information for the Hermitian * tridiagonal reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='TRD.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GOTO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GOTO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 4 ) = 1 ELSE WORK( 4 ) = 2 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction Routine to Hermitian '// $ 'tridiagonal form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex double precision Hermitian '// $ 'tridiagonal' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - QTQ''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether the ''Upper'' or ''Low'// $ 'er'' part of A is to be referenced.' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'TRD time : Time in seconds to reduce the'// $ ' matrix to tridiagonal form.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'Hermitian tridiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) IF( WORK( 4 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PZTRDINFO * END scalapack-1.8.0/TESTING/EIG/pslasizegsep.f0000640000175000017500000001111310363532303017712 0ustar muammarmuammar* * SUBROUTINE PSLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESUBTST, ISIZESUBTST, SIZETST, $ ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PSLASIZEGSEP computes the amount of memory needed by * various GSEP test routines, as well as SYGVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PSSYGVX * * SIZEMQRLEFT LWORK for the 1st PSORMQR call in PSLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PSORMQR call in PSLAGSY * * SIZEQRF LWORK for PSGEQRF in PSLAGSY * * SIZETMS LWORK for PSLATMS * * SIZEQTQ LWORK for PSSEPQTQ (nexer complex) * * SIZECHK LWORK for PSGSEPCHK * * SIZESYEVX LWORK for PSSYGVX * * ISIZESYEVX LIWORK for PSSYGVX * * SIZESUBTST LWORK for PSSUBTST * * ISIZESUBTST LIWORK for PSSUBTST * * SIZETST LWORK for PSTST * * ISIZETST LIWORK for PSTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, LCM, $ LCMQ, LDA, MQ0, MYCOL, MYROW, N, NB, NEIG, NN, $ NNP, NP, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 5*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PSSYGVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pssepdriver.f0000640000175000017500000002515710363532303017564 0ustar muammarmuammar* * PROGRAM PSSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel REAL symmetric eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and REALIZ * indicate the length in bytes on the given platform for an integer * and a double precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * REAL is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * REAL words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PSSYEVX, the expert driver for the parallel * symmetric eigenvalue problem, and PSSYEV. We would like to cover * all possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * As PSSYEV returns an error message when heterogeneity is detected, * the PSSYEV tests can be suppressed by changing the appropiate * entry in the input file. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, REALSZ, NIN PARAMETER ( TOTMEM = 2000000, REALSZ = 8, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / REALSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) REAL MEM( MEMSIZ ) * .. * .. External Functions .. REAL SLAMCH EXTERNAL SLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PSLACHKIEEE, PSLASNBT, PSSEPREQ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PSLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PSLACHKIEEE( ISIEEE, SLAMCH( 'O' ), SLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK symmetric Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'symmetric eigenvalue routine: PSSYEVX & PSSYEV & ' // $ ' PSSYEVD.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 )' ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )' ||Q^T*Q - I|| ' // '/ (N * eps)' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see PSSEPtst.f).' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see PSSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )'QTQ : ||Q^T*Q - I||/ (N * eps)' WRITE( NOUT, FMT = 9999 ) $ ' : when the adjusted QTQ exceeds THRESH', $ ' the adjusted QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' : otherwise the true QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' If NT>1, CHK and QTQ are the max over all ' // $ 'eigen request tests' WRITE( NOUT, FMT = 9999 ) $ 'TEST : EVX - testing PSSYEVX, EV - testing PSSYEV' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 1454 ISEED( 2 ) = 3834 ISEED( 3 ) = 2203 ISEED( 4 ) = 583 * CALL PSSEPREQ( HETERO, NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP SUB WALL CPU ', $ ' CHK QTQ CHECK TEST' ) 9978 FORMAT( ' ----- --- --- --- --- --- -------- --------', $ ' --------- --------- ----- ----' ) * * End of PSSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/listing0000640000175000017500000000067410363532303016440 0ustar muammarmuammar* orthogonal matrix Q, the Hessenberg matrix, and the array TAU returned INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, DOUBLE PRECISION ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, * orthogonal matrix Q, the Hessenberg matrix, and the array TAU returned INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, REAL ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, scalapack-1.8.0/TESTING/EIG/pzlatran.f0000640000175000017500000001570010363532303017042 0ustar muammarmuammar SUBROUTINE PZLATRAN( N, NB, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, JA, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * * ======= * * PZLATRAN transpose a lower triangular matrix on to the upper * triangular portion of the same matrix. * * This is an auxiliary routine called by PZHETRD. * * Notes * ===== * * IA must equal 1 * JA must equal 1 * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( RSRC_ ) must equal 1 * DESCA( CSRC_ ) must equal 1 * * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * NB (global input) INTEGER * The number of rows and columns to be transposed with each * message sent. NB has no impact on the result, it is striclty * a performance tuning parameter. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). On entry, the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. On exit, the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is undefined (and may have been modified). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * Must be equal to 1. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * Must be equal to 1. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( ICTXT_ ) must point to a square process grid * i.e. one where NPROW is equal to NPCOL * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * Where: * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND, $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND, $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL, $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB, $ STARTCOL, STARTROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZTRRV2D, ZTRSD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Further details * * Because the processor grid is square each process needs only send * data to its transpose process. (Likewsie it need only receive * data from its transpose process.) Because the data decomposition * is cyclic, the local portion of the array is triangular. * * This routine requires that the data be buffered (i.e. copied) * on the sending process (because of the triangular shape) and * unbuffered on the receiving process. Hence, two local memory to * memory copies are performed within the communications routines * followed by a memory to memory copy outside of the communications * routines. It would be nice to avoid having back to back memory * to memory copies (as we do presently on the receiving processor). * This could be done by packaging the data ourselves in the sender * and then unpacking it directly into the matrix. However, this * code seems cleaner and so since this routine is not a significant * performance bottleneck we have left it this way. * * * * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * * IF( MYROW.EQ.MYCOL ) THEN * DO 20 J = 1, NP DO 10 I = J + 1, NQ A( J+( I-1 )*LDA ) = DCONJG( A( I+( J-1 )*LDA ) ) 10 CONTINUE 20 CONTINUE * ELSE IF( MYROW.GT.MYCOL ) THEN STARTROW = 1 STARTCOL = 2 ELSE IF( MYROW.EQ.MYCOL ) THEN STARTROW = 2 STARTCOL = 2 ELSE STARTROW = 2 STARTCOL = 1 END IF END IF * DO 50 JJ = 1, MAX( NP, NQ ), NB MINJSEND = STARTCOL + JJ - 1 MINJRECV = STARTROW + JJ - 1 MAXJSEND = MIN( MINJSEND+NB-1, NQ ) MAXJRECV = MIN( MINJRECV+NB-1, NP ) * SENDNB = MAXJSEND - MINJSEND + 1 RECVNB = MAXJRECV - MINJRECV + 1 * MINISEND = 1 MINIRECV = 1 MAXISEND = MIN( NP, JJ+SENDNB-1 ) MAXIRECV = MIN( NQ, JJ+RECVNB-1 ) * ISEND = MAXISEND - MINISEND + 1 IRECV = MAXIRECV - MINIRECV + 1 JSEND = MAXJSEND - MINJSEND + 1 JRECV = MAXJRECV - MINJRECV + 1 * * * DO 40 J = MINJRECV, MAXJRECV DO 30 I = MINIRECV, MAXIRECV + J - MAXJRECV WORK( I+( J-MINJRECV )*IRECV ) $ = DCONJG( A( J+( I-1 )*LDA ) ) 30 CONTINUE 40 CONTINUE * IF( IRECV.GT.0 .AND. JRECV.GT.0 ) $ CALL ZTRSD2D( ICTXT, 'U', 'N', IRECV, JRECV, WORK, IRECV, $ MYCOL, MYROW ) * IF( ISEND.GT.0 .AND. JSEND.GT.0 ) $ CALL ZTRRV2D( ICTXT, 'U', 'N', ISEND, JSEND, $ A( MINISEND+( MINJSEND-1 )*LDA ), LDA, $ MYCOL, MYROW ) * * 50 CONTINUE * END IF * RETURN * * End of PZLATRD * END scalapack-1.8.0/TESTING/EIG/pdsvdchk.f0000640000175000017500000003301310363532303017012 0ustar muammarmuammar SUBROUTINE PDSVDCHK( M, N, A, IA, JA, DESCA, U, IU, JU, DESCU, VT, $ IVT, JVT, DESCVT, S, THRESH, WORK, LWORK, $ RESULT, CHK, MTM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IU, IVT, JA, JU, JVT, LWORK, M, N DOUBLE PRECISION CHK, MTM, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCU( * ), DESCVT( * ), $ RESULT( * ) DOUBLE PRECISION A( * ), S( * ), U( * ), VT( * ), WORK( * ) * .. * * Purpose * ======= * * For given two-dimensional matrices A, U, VT, and one-dimensional * array D compute the following four tests: * * (1) | A - U*diag(S) VT | / ( |A| max(M,N) ulp ) * * (2) | I - U'*U | / ( M ulp ) * * (3) | I - VT*VT' | / ( N ulp ), * * (4) S contains SIZE = MIN( M, N ) nonnegative values in * decreasing order. * It then compares result of computations (1)-(3) * with TRESH and returns results of comparisons and test (4) in * RESULT(I). When the i-th test fails, value of RESULT( I ) is set * to 1. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZEP = number of local rows in VT * SIZEQ = number of local columns in U * * M (global input) INTEGER * Matrix size. * The number of global rows in A and U and * * N (global input) INTEGER * The number of global columns in A and VT. * * A (input) block cyclic distributed DOUBLE PRECISION array, * global dimension (M, N), local dimension (DESCA( DLEN_ ), NQ) * Contains the original test matrix. * * IA (global input) INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * U (local input) DOUBLE PRECISION array * global dimension (M, SIZE), local dimension * (DESCU( DLEN_ ), SIZEQ) * Contains left singular vectors of matrix A. * * IU (global input) INTEGER * The global row index of the submatrix of the distributed * matrix U to operate on. * * JU (global input) INTEGER * The global column index of the submatrix of the distributed * matrix U to operate on. * * DESCU (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local input) DOUBLE PRECISION array * global dimension (SIZE, N), local dimension * (DESCVT( DLEN_ ), NQ) * Contains right singular vectors of matrix A. * * IVT (global input) INTEGER * The global row index of the submatrix of the distributed * matrix VT to operate on. * * JVT (global input) INTEGER * The global column index of the submatrix of the distributed * matrix VT to operate on. * * DESCVT (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * S (global input) DOUBLE PRECISION array, dimension (SIZE) * Contains the computed singular values * * THRESH (input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 1 + SIZEQ*SIZEP + MAX[WORK(pdlange(size,size)), * WORK(pdlange(m,n))], * where * SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ), * SIZEP = NUMROC( SIZE, DESCVT( MB_ ), MYROW, 0, NPROW ), * and worekspaces required to call pdlange are * WORK(pdlange(size,size)) < MAX(SIZEQ0,2) < SIZEB +2, * WORK(pdlange(m,n)) < MAX(NQ0,2) < SIZEB +2, * SIZEB = MAX(M, N) * Finally, upper limit on required workspace is * LWORK > 1 + SIZEQ*SIZEP + SIZEB + 2 * * RESULT (global input/output) INTEGER array. Four first elements of * the array are set to 0 or 1 depending on passing four * respective tests ( see above in Purpose ). The elements of * RESULT are set to * 0 if the test passes i.e. * | A - U*diag(S)*VT | / ( |A| max(M,N) ulp ) <= THRESH * 1 if the test fails i.e. * | A - U*diag(S)*VT | / ( |A| max(M,N) ulp ) > THRESH * * CHK (global output) DOUBLE PRECISION * value of the | A - U*diag(S) VT | / ( |A| max(M,N) ulp ) * * MTM (global output) DOUBLE PRECISION * maximum of the two values: * | I - U'*U | / ( M ulp ) and | I - VT*VT' | / ( N ulp ) * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, MONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, MONE = -1.0D0 ) * .. * .. Local Scalars .. INTEGER I, INFO, LDR, LOCALCOL, LWMIN, MP, MX, MYCOL, $ MYROW, NPCOL, NPROW, NQ, PCOL, PTRR, PTRWORK, $ SIZE, SIZEP, SIZEPOS, SIZEQ DOUBLE PRECISION FIRST, NORMA, NORMAI, NORMU, NORMVT, SECOND, $ THRESHA, ULP * .. * .. Local Arrays .. INTEGER DESCR( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCINIT, DSCAL, $ PDELSET, PDGEMM, PDLASET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*DTYPE_*M_*N_*RSRC_.LT.0 ) RETURN * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 SIZE = MIN( M, N ) * * Sizepos is a number of parameters to pdsvdchk plus one. It's used * for the error reporting. * SIZEPOS = 22 IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( M, 1, SIZE, SIZEPOS, IU, JU, DESCU, 10, INFO ) CALL CHK1MAT( SIZE, SIZEPOS, N, 2, IVT, JVT, DESCVT, 14, INFO ) END IF * IF( INFO.EQ.0 ) THEN * * Calculate workspace * MP = NUMROC( M, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) SIZEP = NUMROC( SIZE, DESCVT( MB_ ), MYROW, 0, NPROW ) SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ) MX = MAX( SIZEQ, NQ ) LWMIN = 2 + SIZEQ*SIZEP + MAX( 2, MX ) WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 40 IF( LWORK.LT.LWMIN ) THEN INFO = -18 ELSE IF( THRESH.LE.0 ) THEN INFO = -16 END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSVDCHK', -INFO ) RETURN END IF * LDR = MAX( 1, SIZEP ) ULP = PDLAMCH( DESCA( CTXT_ ), 'P' ) NORMAI = PDLANGE( '1', M, N, A, IA, JA, DESCA, WORK ) * * Allocate array R of global dimension SIZE x SIZE for testing * PTRR = 2 PTRWORK = PTRR + SIZEQ*SIZEP * CALL DESCINIT( DESCR, SIZE, SIZE, DESCVT( MB_ ), DESCU( NB_ ), 0, $ 0, DESCA( CTXT_ ), LDR, INFO ) * * Test 2. Form identity matrix R and make check norm(U'*U - I ) * CALL PDLASET( 'Full', SIZE, SIZE, ZERO, ONE, WORK( PTRR ), 1, 1, $ DESCR ) CALL PDGEMM( 'T', 'N', SIZE, SIZE, M, ONE, U, IU, JU, DESCU, U, $ IU, JU, DESCU, MONE, WORK( PTRR ), 1, 1, DESCR ) * NORMU = PDLANGE( '1', SIZE, SIZE, WORK( PTRR ), 1, 1, DESCR, $ WORK( PTRWORK ) ) * NORMU = NORMU / ULP / SIZE / THRESH IF( NORMU.GT.1. ) $ RESULT( 2 ) = 1 * * Test3. Form identity matrix R and check norm(VT*VT' - I ) * CALL PDLASET( 'Full', SIZE, SIZE, ZERO, ONE, WORK( PTRR ), 1, 1, $ DESCR ) CALL PDGEMM( 'N', 'T', SIZE, SIZE, N, ONE, VT, IVT, JVT, DESCVT, $ VT, IVT, JVT, DESCVT, MONE, WORK( PTRR ), $ 1, 1, DESCR ) NORMVT = PDLANGE( '1', SIZE, SIZE, WORK( PTRR ), 1, 1, DESCR, $ WORK( PTRWORK ) ) * NORMVT = NORMVT / ULP / SIZE / THRESH IF( NORMVT.GT.1. ) $ RESULT( 3 ) = 1 * MTM = MAX( NORMVT, NORMU )*THRESH * * Test 1. * Initialize R = diag( S ) * CALL PDLASET( 'Full', SIZE, SIZE, ZERO, ZERO, WORK( PTRR ), 1, 1, $ DESCR ) * DO 10 I = 1, SIZE CALL PDELSET( WORK( PTRR ), I, I, DESCR, S( I ) ) 10 CONTINUE * * Calculate U = U*R * DO 20 I = 1, SIZE PCOL = INDXG2P( I, DESCU( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( I, DESCU( NB_ ), 0, 0, NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CALL DSCAL( MP, S( I ), U( ( LOCALCOL-1 )*DESCU( LLD_ )+1 ), $ 1 ) END IF 20 CONTINUE * * Calculate A = U*VT - A * CALL PDGEMM( 'N', 'N', M, N, SIZE, ONE, U, IU, JU, DESCU, VT, $ IVT, JVT, DESCVT, MONE, A, IA, JA, DESCA ) * NORMA = PDLANGE( '1', M, N, A, IA, JA, DESCA, WORK( PTRWORK ) ) THRESHA = NORMAI*MAX( M, N )*ULP*THRESH * IF( NORMA.GT.THRESHA ) $ RESULT( 1 ) = 1 * IF( THRESHA.EQ.0 ) THEN CHK = 0.0D0 ELSE CHK = NORMA / THRESHA*THRESH END IF * * Test 4. * DO 30 I = 1, SIZE - 1 FIRST = S( I ) SECOND = S( I+1 ) IF( FIRST.LT.SECOND ) $ RESULT( 4 ) = 1 30 CONTINUE 40 CONTINUE RETURN END scalapack-1.8.0/TESTING/EIG/pdtrdinfo.f0000640000175000017500000003224110363532303017177 0ustar muammarmuammar SUBROUTINE PDTRDINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY*(*) INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDTRDINFO gets needed startup information for the symmetric * tridiagonal reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='TRD.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GOTO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GOTO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 4 ) = 1 ELSE WORK( 4 ) = 2 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction Routine to symmetric '// $ 'tridiagonal form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision symmetric '// $ 'tridiagonal' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - QTQ''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether the ''Upper'' or ''Low'// $ 'er'' part of A is to be referenced.' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'TRD time : Time in seconds to reduce the'// $ ' matrix to tridiagonal form.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'symmetric tridiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) IF( WORK( 4 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDTRDINFO * END scalapack-1.8.0/TESTING/EIG/pdlafchk.f0000640000175000017500000002226010363532303016762 0ustar muammarmuammar SUBROUTINE PDLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, IROFF, $ JB, JJ, JJA, JN, LDA, LDW, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DMATADD, INFOG2L, PDMATGEN * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL DMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 10 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 ) * LDA CALL PDMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJ-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL DMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) 10 CONTINUE * * Calculate factor residual * FRESID = PDLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PDLAFCHK * END scalapack-1.8.0/TESTING/EIG/pzgebdrv.f0000640000175000017500000004725610363532303017045 0ustar muammarmuammar SUBROUTINE PZGEBDRV( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEBDRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from sub( A ), * Q, P returned by PZGEBRD: * * sub( A ) := Q * B * P'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of sub( A ) * as returned by PZGEBRD. On exit, the original distribu- * ted matrix sub( A ) is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local input) COMPLEX*16 array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local input) COMPLEX*16 array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= 2*NB*( MP + NQ + NB ) * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MP = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION REIGHT, RZERO PARAMETER ( REIGHT = 8.0D+0, RZERO = 0.0D+0 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IIA, IL, IPTP, IPTQ, $ IPV, IPW, IPWK, IOFF, IV, J, JB, JJA, JL, JV, $ K, MN, MP, MYCOL, MYROW, NB, NPCOL, NPROW, NQ DOUBLE PRECISION ADDBND, D2, E2 COMPLEX*16 D1, E1 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, IGSUM2D, INFOG2L, $ PDELGET, PZLACPY, PZLARFB, PZLARFT, $ PZLASET, PZELGET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXG2P, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) IOFF = MOD( IA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) IPV = 1 IPW = IPV + MP*NB IPTP = IPW + NQ*NB IPTQ = IPTP + NB*NB IPWK = IPTQ + NB*NB * IV = 1 JV = 1 MN = MIN( M, N ) IL = MAX( ( (IA+MN-2) / NB )*NB + 1, IA ) JL = MAX( ( (JA+MN-2) / NB )*NB + 1, JA ) IAROW = INDXG2P( IL, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, IA+M-IL, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCW, NB, JA+N-JL, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * ADDBND = REIGHT * PDLAMCH( ICTXT, 'eps' ) * * When A is an upper bidiagonal form * IF( M.GE.N ) THEN * CALL DESCSET( DESCD, 1, JA+MN-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * DO 10 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PZELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, IA+J, 1, DESCE ) CALL PZELGET( 'Rowwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1-DCMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1-DCMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * DO 20 J = JL, JA+NB-IOFF, -NB JB = MIN( JA+N-J, NB ) I = IA + J - JA K = I - IA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PZLARFT( 'Forward', 'Columnwise', M-K+1, JB, A, I, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Lower', M-K+1, JB, A, I, J, DESCA, $ WORK( IPV ), IV, JV, DESCV ) CALL PZLASET( 'Upper', M-K+1, JB, ZERO, ONE, WORK( IPV ), $ IV, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', M-K, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PZLARFT( 'Forward', 'Rowwise', N-K, JB, A, I, J+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Upper', JB, N-K, A, I, J+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PZLASET( 'Lower', JB, N-K, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PZLASET( 'Upper', JB, N-K-1, ZERO, ZERO, A, I, J+2, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PZLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K+1, N-K+1, JB, WORK( IPV ), $ IV, JV, DESCV, WORK( IPTQ ), A, I, J, DESCA, $ WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-K+1, N-K, JB, WORK( IPW ), IV, $ JV+1, DESCW, WORK( IPTP ), A, I, J+1, DESCA, $ WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 20 CONTINUE * * Handle first block separately * JB = MIN( N, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PZLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IV, JV, DESCV ) CALL PZLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPV ), IV, JV, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PZLARFT( 'Forward', 'Rowwise', N-1, JB, A, IA, JA+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Upper', JB, N-1, A, IA, JA+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PZLASET( 'Lower', JB, N-1, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PZLASET( 'Upper', JB, N-2, ZERO, ZERO, A, IA, JA+2, $ DESCA ) * * Apply block Householder transformation from left. * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N, JB, WORK( IPV ), IV, JV, DESCV, $ WORK( IPTQ ), A, IA, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right. * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N-1, JB, WORK( IPW ), IV, JV+1, $ DESCW, WORK( IPTP ), A, IA, JA+1, DESCA, $ WORK( IPWK ) ) * ELSE * CALL DESCSET( DESCD, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MN-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PDELGET( ' ', ' ', D2, D, IA+J, 1, DESCD ) CALL PZELGET( 'Rowwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PZELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1-DCMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1-DCMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * DO 40 I = IL, IA+NB-IOFF, -NB JB = MIN( IA+M-I, NB ) J = JA + I - IA K = J - JA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PZLARFT( 'Forward', 'Columnwise', M-K, JB, A, I+1, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Lower', M-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PZLASET( 'Upper', M-K, JB, ZERO, ONE, WORK( IPV ), $ IV+1, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', M-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PZLARFT( 'Forward', 'Rowwise', N-K+1, JB, A, I, J, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Upper', JB, N-K+1, A, I, J, DESCA, $ WORK( IPW ), IV, JV, DESCW ) CALL PZLASET( 'Lower', JB, N-K+1, ZERO, ONE, WORK( IPW ), $ IV, JV, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PZLASET( 'Upper', JB, N-K, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PZLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K, N-K+1, JB, WORK( IPV ), $ IV+1, JV, DESCV, WORK( IPTQ ), A, I+1, J, $ DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-K+1, N-K+1, JB, WORK( IPW ), IV, $ JV, DESCW, WORK( IPTP ), A, I, J, DESCA, $ WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 40 CONTINUE * * Handle first block separately * JB = MIN( M, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PZLARFT( 'Forward', 'Columnwise', M-1, JB, A, IA+1, JA, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Lower', M-1, JB, A, IA+1, JA, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PZLASET( 'Upper', M-1, JB, ZERO, ONE, WORK( IPV ), IV+1, $ JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PZLASET( 'Lower', M-2, JB, ZERO, ZERO, A, IA+2, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PZLARFT( 'Forward', 'Rowwise', N, JB, A, IA, JA, DESCA, $ TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PZLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPW ), $ IV, JV, DESCW ) CALL PZLASET( 'Lower', JB, N, ZERO, ONE, WORK( IPW ), IV, JV, $ DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PZLASET( 'Upper', JB, N-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Apply block Householder transformation from left * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-1, N, JB, WORK( IPV ), IV+1, JV, DESCV, $ WORK( IPTQ ), A, IA+1, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N, JB, WORK( IPW ), IV, JV, DESCW, $ WORK( IPTP ), A, IA, JA, DESCA, WORK( IPWK ) ) END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PZGEBDRV * END scalapack-1.8.0/TESTING/EIG/pzttrdtester.f0000640000175000017500000006215010363532303017766 0ustar muammarmuammar SUBROUTINE PZTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 24, 2000 * * .. Scalar Arguments .. LOGICAL CHECK INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS, $ TOTMEM REAL THRESH * .. * .. Array Arguments .. INTEGER NVAL( * ) COMPLEX*16 MEM( * ) * .. * * Purpose * ======= * * PZTTRDTESTER tests PZHETTRD * * Arguments * ========= * * IAM (local input) INTEGER * The local process number * * NPROCS (global input) INTEGER * The number of processors * * CHECK (global input) LOGICAL * Specifies whether the user wants to check the answer * * NOUT (local input) INTEGER * File descriptor * * THRESH (global input) DOUBLE PRECISION * Acceptable error threshold * * NVAL (global input) INTEGER array dimension NMAT * The matrix sizes to test * * NMAT (global input) INTEGER * The number of matrix sizes to test * * MEM (local input) COMPLEX*16 array dimension MEMSIZ * Where: * MEMSIZ = TOTMEM / ZPLXSZ * * TOTMEM (global input) INTEGER * Number of bytes in MEM * * KPASS (local input/output) INTEGER * The number of tests which passed. Only relevant on * processor 0. * * KFAIL (local input/output) INTEGER * The number of tests which failed. Only relevant on * processor 0. * * KSKIP (local input/output) INTEGER * The number of tests which were skipped. Only relevant on * processor 0. * * ================================================================ * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( DBLESZ = 8, ZPLXSZ = 16, $ PADVAL = ( -9923.0D+0, -9924.0D+0 ) ) INTEGER TIMETESTS PARAMETER ( TIMETESTS = 11 ) INTEGER TESTS PARAMETER ( TESTS = 8 ) INTEGER MINTIMEN PARAMETER ( MINTIMEN = 8 ) * .. * .. Local Scalars .. LOGICAL TIME CHARACTER UPLO CHARACTER*6 PASSED INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ LCM, LWMIN, MAXTESTS, MEMSIZ, MYCOL, MYROW, N, $ NB, NDIAG, NGRIDS, NN, NOFFD, NP, NPCOL, NPROW, $ NPS, NQ, SPLITSTIMED, WORKSIZ, WORKTRD DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ), $ BALTEST( TESTS ), BALTIME( TIMETESTS ), $ DESCA( DLEN_ ), DESCD( DLEN_ ), IERR( 1 ), $ INTERTEST( TESTS ), INTERTIME( TIMETESTS ), $ PNBTEST( TESTS ), PNBTIME( TIMETESTS ), $ TWOGEMMTEST( TESTS ), TWOGEMMTIME( TIMETESTS ) DOUBLE PRECISION CTIME( 100 ), WTIME( 100 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGEBR2D, IGEBS2D, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZHETDRV, PZHETTRD, PZLAFCHK, $ PZLATRAN, PZMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC, PJLAENV DOUBLE PRECISION PZLANHE EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PJLAENV, PZLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, SQRT * .. * * .. Scalars in Common .. INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE, $ LLTBLOCK, MINSZ, PNB, TIMEINTERNALS, TIMING, $ TRSBLOCK, TWOGEMMS * .. * .. Common blocks .. COMMON / BLOCKSIZES / GSTBLOCK, LLTBLOCK, BCKBLOCK, $ TRSBLOCK COMMON / MINSIZE / MINSZ COMMON / PJLAENVTIMING / TIMING COMMON / TAILOREDOPTS / PNB, ANB, INTERLEAVE, $ BALANCED, TWOGEMMS COMMON / TIMECONTROL / TIMEINTERNALS * .. * .. Data statements .. DATA BALTIME / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 / DATA INTERTIME / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 / DATA TWOGEMMTIME / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 / DATA ANBTIME / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16, $ 16 / DATA PNBTIME / 32, 32, 32, 32, 32, 32, 32, 32, 32, $ 16, 64 / DATA BALTEST / 0, 0, 0, 0, 1, 1, 1, 1 / DATA INTERTEST / 0, 0, 1, 1, 0, 0, 1, 1 / DATA TWOGEMMTEST / 0, 1, 0, 1, 0, 1, 0, 1 / DATA ANBTEST / 1, 2, 3, 16, 1, 2, 3, 16 / DATA PNBTEST / 1, 16, 8, 1, 16, 8, 1, 16 / * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * IASEED = 100 SPLITSTIMED = 0 NB = 1 UPLO = 'L' MEMSIZ = TOTMEM / ZPLXSZ * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * NGRIDS = INT( SQRT( DBLE( NPROCS ) ) ) * DO 30 NN = 1, NGRIDS * NPROW = NN NPCOL = NN IERR( 1 ) = 0 * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * IF( N.GT.MINTIMEN ) THEN * * For timing tests, we perform one or two extra tests. * Both of these extra tests are performed with the * default values for the performance tuning parameters. * The second extra test (which is only performed if * split times are non-zero) is performed with timeinternals * set to 1 (which forces barrier syncs between many * phases of the computation). * TIME = .TRUE. MAXTESTS = TIMETESTS + 2 ELSE TIME = .FALSE. MAXTESTS = TESTS END IF * * DO 10 K = 1, MAXTESTS TIMEINTERNALS = 0 IF( TIME ) THEN IF( K.GE.MAXTESTS-1 ) THEN * * For the last two timings, we let pjlaenv set * the execution path values. These dummy * initializations aren't really necessary, * but they illustrate the fact that these values are * set in xpjlaenv. The dummy call to pjlaenv * has the side effect of setting ANB. * MINSZ = -13 BALANCED = -13 INTERLEAVE = -13 TWOGEMMS = -13 ANB = -13 PNB = -13 TIMING = 1 DUMMY = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, $ 0, 0 ) IF( K.EQ.MAXTESTS ) $ TIMEINTERNALS = 1 ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTIME( K ) INTERLEAVE = INTERTIME( K ) TWOGEMMS = TWOGEMMTIME( K ) ANB = ANBTIME( K ) PNB = PNBTIME( K ) END IF ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTEST( K ) INTERLEAVE = INTERTEST( K ) TWOGEMMS = TWOGEMMTEST( K ) ANB = ANBTEST( K ) PNB = PNBTEST( K ) END IF * * Skip the last test (with timeinternals = 1) if * PZHETTRD is not collecting the split times. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, 1, $ 0, 0 ) END IF * * IF( SPLITSTIMED.EQ.0 .AND. K.EQ.MAXTESTS ) $ GO TO 10 * * The following hack tests to make sure that PNB need not * be the same on all processes. (Provided that PNB is set * to 1 in the TRD.dat file.) * IF( PNB.EQ.1 ) $ PNB = 1 + IAM * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1, $ INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF NDIAG = ICEIL( DBLESZ*NDIAG, ZPLXSZ ) NOFFD = ICEIL( DBLESZ*NOFFD, ZPLXSZ ) * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORKTRD = LWMIN + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ICEIL( DBLESZ*ITEMP, ZPLXSZ ), $ 2*( NB+NP )*NB ) WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * * * Generate the matrix A * CALL PZMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PZHETTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWMIN, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZHETTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZCHEKPAD( ICTXT, 'PZHETTRDc', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETTRDd', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETTRDe', WORKTRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PZHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) * * TTRD does not preserve the upper triangular part of A. * The following call to PZLATRAN means that we only * check the lower triangular part of A - QTQ' * CALL PZLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZHETDRVf', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRVg', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRVh', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRVi', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0D+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9991 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' * * END IF * * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 50, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 50, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'WALL', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'CPU ', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ CTIME( 1 ), TMFLOPS, FRESID, PASSED * * * If split times were collected (in PZHEttrd.f), print * them out. * IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 ).GT.0.0D+0 .OR. $ CTIME( 13 )+CTIME( 15 )+CTIME( 16 ).GT.0.0D+0 ) $ THEN SPLITSTIMED = 1 END IF IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ), $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ), $ WTIME( 15 ) WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ), $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ), $ WTIME( 21 ) * WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ), $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ), $ CTIME( 15 ) WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ), $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ), $ CTIME( 21 ) WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB, $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS END IF END IF 10 CONTINUE 20 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) END IF END IF * * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE RETURN * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) * 9995 FORMAT( 'PZHETTRD, tailored reduction to tridiagonal form, test.' $ ) 9994 FORMAT( 'TIME N int 2gm bal anb pnb prcs TRD Time ', $ ' MFLOPS Residual CHECK' ) 9993 FORMAT( '---- ---- --- --- --- --- --- ---- -------- ', $ '----------- -------- ------' ) 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9991 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) 9990 FORMAT( 'wsplit1=[wsplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9989 FORMAT( 'wsplit2=[wsplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9988 FORMAT( 'csplit1=[csplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9987 FORMAT( 'csplit2=[csplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9986 FORMAT( 'size_opts=[size_opts;', I4, 1X, I4, 1X, I4, 1X, I4, 1X, $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];' ) 9985 FORMAT( 'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;', $ ' TWOGEMMS=7; TIMEINTERNALS=8;' ) 9984 FORMAT( 'S1_OVERHEAD = 1; % Should be mainly cost of barrier' ) 9983 FORMAT( 'S1_BARRIER = 2; % Cost of barrier' ) 9982 FORMAT( 'S1_UPDCURCOL = 3; % Update the current column' ) 9981 FORMAT( 'S1_HOUSE = 4; % Compute the householder vector' ) 9980 FORMAT( 'S1_SPREAD = 5; % Spread across' ) 9979 FORMAT( 'S1_TRANSPOSE = 6; % Transpose' ) 9978 FORMAT( 'S2_UPDCURBLK = 1; % Update the current block column' ) 9977 FORMAT( 'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' ) 9976 FORMAT( 'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' ) 9975 FORMAT( 'S2_TRANS_SUM = 4; % v = v + vt'' ' ) 9974 FORMAT( 'S2_DOT = 5; % c = v'' * h ' ) 9973 FORMAT( 'S2_R2K = 6; % A = A - v * h'' - h * v'' ' ) * * * End of PZTTRDTESTER * END scalapack-1.8.0/TESTING/EIG/pdsepsubtst.f0000640000175000017500000007054410363532303017576 0ustar muammarmuammar SUBROUTINE PDSEPSUBTST( WKNOWN, JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, IA, JA, $ DESCA, WIN, WNEW, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ IWORK, LIWORK, RESULT, TSTNRM, QTQNRM, $ NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 15, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, IPOSTPAD, IPREPAD, IU, JA, LIWORK, $ LWORK, LWORK1, N, NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) DOUBLE PRECISION A( * ), COPYA( * ), GAP( * ), WIN( * ), $ WNEW( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSEPSUBTST calls PDSYEVX and then tests the output of * PDSYEVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PDSYEVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PDSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PDSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PDSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) DOUBLE PRECISION array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PDSYEVX for a description of block cyclic layout. * The test matrix, which is then modified by PDSYEVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) DOUBLE PRECISION array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PDSEPCHK and PDSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as copmuted by this call to PDSYEVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PDSYEVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PDSYEVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0 ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE DOUBLE PRECISION EPS, EPSNORMA, ERROR, MAXERROR, MAXVU, $ MINERROR, MINVL, NORMWIN, OLDVL, OLDVU, ORFAC, $ SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, NUMROC, PDLAMCH, PDLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGAMN2D, DGAMX2D, $ DLACPY, IGAMN2D, IGAMX2D, PDCHEKPAD, PDELSET, $ PDFILLPAD, PDLASIZESEP, PDLASIZESYEVX, $ PDSEPCHK, PDSEPQTQ, PDSYEVX, PICHEKPAD, $ PIFILLPAD, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PDLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 30 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 40 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 40 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * * * DSEED is not used in this call to PDLASIZESYEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PDLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL DLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PDFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * * Make sure that PDSYEVX does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, MAXEIGS, 1 CALL PDELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0D+0 ) 50 CONTINUE 60 CONTINUE * ORFAC = -1.0D+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PDSYEVX( JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ VL, VU, IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), $ ORFAC, Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, IWORK( 1+IPREPAD ), $ LIWORK, IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PDCHEKPAD( DESCZ( CTXT_ ), 'PDSYEVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYEVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 70 I = 1, M WORK( I ) = WNEW( I+IPREPAD ) WORK( I+M ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, WORK, M, 1, $ 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ WORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 80 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( WORK( I )-WORK( M+ $ I ) ).GT.FIVE*EPS*ABS( WORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 80 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 90 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 100 NCLUSTERS = NCLUSTERS + 1 90 CONTINUE 100 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 110 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 110 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 120 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 120 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 150 * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PDLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PDSEPCHK * as described by this table: * * PDSEPTST name PDSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * CALL PDSEPCHK( N, NZ, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * * Perform the |QTQ - I| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * * CALL PDSEPQTQ( N, NZ, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1+IPREPAD+1 ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), WORK( IPREPAD+1 ), SIZEQTQ, $ QTQNRM, INFO, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 140 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 130 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 130 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 140 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxSYEVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PDSYEVX returned INFO=', I7 ) 9998 FORMAT( 'PDSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PDSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PDSYEVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PDSYEVX' ) 9981 FORMAT( 'NZ altered by PDSYEVX with JOBZ=N' ) * * End of PDSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/psgsepsubtst.f0000640000175000017500000007050410363532303017760 0ustar muammarmuammar* * SUBROUTINE PSGSEPSUBTST( WKNOWN, IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, IA, JA, DESCA, WIN, WNEW, $ IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, LWORK1, IWORK, LIWORK, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IBTYPE, IL, IPOSTPAD, IPREPAD, IU, JA, $ LIWORK, LWORK, LWORK1, N, NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) REAL A( * ), B( * ), COPYA( * ), COPYB( * ), $ GAP( * ), WIN( * ), WNEW( * ), WORK( * ), $ Z( * ) * .. * * Purpose * ======= * * PSGSEPSUBTST calls PSSYGVX and then tests the output of * PSSYGVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < THRESH * |QT * Q - I| / eps * norm(A) < THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PSSYGVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PSGSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PSGSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PSSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) REAL array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PSSYGVX for a description of block cyclic layout. * The test matrix, which is then modified by PSSYGVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) REAL array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * B (local workspace) REAL array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PSSYGVX * * COPYB (local input) REAL array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PSGSEPCHK and PSSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as copmuted by this call to PSSYGVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) REAL array, * dimension (NPROW*NPCOL) * * WORK (local workspace) REAL array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PSSYGVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PSSYGVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / |A|*N*EPS * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0 ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE REAL EPS, ERROR, MAXERROR, MAXVU, MINERROR, MINVL, $ NORMWIN, OLDVL, OLDVU, ORFAC, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH EXTERNAL LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, IGAMN2D, IGAMX2D, $ PICHEKPAD, PIFILLPAD, PSCHEKPAD, PSELSET, $ PSFILLPAD, PSGSEPCHK, PSLASIZEGSEP, $ PSLASIZESYEVX, PSSYGVX, SGAMN2D, SGAMX2D, $ SLACPY, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PSLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 30 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 40 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 40 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * * * DSEED is not used in this call to PSLASIZESYEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PSLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL SLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL SLACPY( 'A', NP, NQ, COPYB, DESCA( LLD_ ), B( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PSFILLPAD( DESCA( CTXT_ ), NP, NQ, B, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0E+2 ) * CALL PSFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * * Make sure that PSSYGVX does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, MAXEIGS, 1 CALL PSELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0E+0 ) 50 CONTINUE 60 CONTINUE * ORFAC = -1.0E+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PSSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, $ JA, DESCA, B( 1+IPREPAD ), IA, JA, DESCA, VL, VU, $ IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), ORFAC, $ Z( 1+IPREPAD ), IA, JA, DESCA, WORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, $ IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-B', NP, NQ, B, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0E+2 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PSCHEKPAD( DESCZ( CTXT_ ), 'PSSYGVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYGVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 70 I = 1, M WORK( I ) = WNEW( I+IPREPAD ) WORK( I+M ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, WORK, M, 1, $ 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ WORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 80 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( WORK( I )-WORK( M+ $ I ) ).GT.FIVE*EPS*ABS( WORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 80 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 90 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 100 NCLUSTERS = NCLUSTERS + 1 90 CONTINUE 100 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 110 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 110 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 120 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 120 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 150 * * Note that a couple key variables get redefined in PSGSEPCHK * as described by this table: * * PSGSEPTST name PSGSEPCHK name * ------------- ------------- * COPYA A * Z Q * B B * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the residual check * CALL PSFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * CALL PSGSEPCHK( IBTYPE, N, NZ, COPYA, IA, JA, DESCA, COPYB, $ IA, JA, DESCA, THRESH, Z( 1+IPREPAD ), IA, $ JA, DESCZ, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSGSEPCHK-WORK', SIZECHK, $ 1, WORK, SIZECHK, IPREPAD, IPOSTPAD, $ 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 140 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 130 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 130 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 140 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxSYGVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PSSYGVX returned INFO=', I7 ) 9998 FORMAT( 'PSSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PSGSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PSSYGVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PSSYGVX' ) 9981 FORMAT( 'NZ altered by PSSYGVX with JOBZ=N' ) * * End of PSGSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/psgsepchk.f0000640000175000017500000003102510363532303017174 0ustar muammarmuammar* * SUBROUTINE PSGSEPCHK( IBTYPE, MS, NV, A, IA, JA, DESCA, B, IB, JB, $ DESCB, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, W, WORK, LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, IBTYPE, IC, IQ, JA, JB, JC, JQ, LWORK, $ MS, NV, RESULT REAL THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCC( * ), DESCQ( * ) REAL A( * ), B( * ), C( * ), Q( * ), W( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PSGSEPCHK checks a decomposition of the form * * A Q = B Q D or * A B Q = Q D or * B A Q = Q D * * where A is a symmetric matrix, B is * symmetric positive definite, Q is orthogonal, and D is diagonal. * * One of the following test ratios is computed: * * IBTYPE = 1: TSTNRM = | A Q - B Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 2: TSTNRM = | A B Q - Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 3: TSTNRM = | B A Q - Q D | / ( |A| |Q| n ulp ) * * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, B and Q * MQ = number of local columns in A * NQ = number of local columns in B and Q * * IBTYPE (input) INTEGER * The form of the symmetric generalized eigenproblem. * = 1: A*Q = (lambda)*B*Q * = 2: A*B*Q = (lambda)*Q * = 3: B*A*Q = (lambda)*Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, B, C and Q * Also, the number of columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) REAL pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer to an * array in local memory of dimension (LLD_B, LOCc(JB+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix B * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix B. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), NQ) * * Contains the eigenvectors as computed by PSSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * * RESULT (global output) INTEGER * 0 if the test passes * 1 if the test fails * * .. Local Scalars .. * INTEGER I, INFO, MYCOL, MYROW, NPCOL, NPROW, NQ REAL ANORM, ULP * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) REAL CONE, CNEGONE, CZERO PARAMETER ( CONE = 1.0E+0, CNEGONE = -1.0E+0, $ CZERO = 0.0E+0 ) * .. * .. External Functions .. INTEGER NUMROC REAL PSLANGE, SLAMCH EXTERNAL NUMROC, PSLANGE, SLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PSGEMM, PSSCAL, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( MS, 1, MS, 2, IB, JB, DESCB, 11, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 16, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IB, JB, DESCB, 20, INFO ) * IF( INFO.EQ.0 ) THEN * NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -14 ELSE IF( JQ.NE.1 ) THEN INFO = -15 ELSE IF( IA.NE.1 ) THEN INFO = -5 ELSE IF( JA.NE.1 ) THEN INFO = -6 ELSE IF( IB.NE.1 ) THEN INFO = -9 ELSE IF( JB.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.NQ ) THEN INFO = -23 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSGSEPCHK', -INFO ) RETURN END IF * RESULT = 0 ULP = SLAMCH( 'Epsilon' ) * * Compute product of Max-norms of A and Q. * ANORM = PSLANGE( 'M', MS, MS, A, IA, JA, DESCA, WORK )* $ PSLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) IF( ANORM.EQ.ZERO ) $ ANORM = ONE * IF( IBTYPE.EQ.1 ) THEN * * Norm of AQ - BQD * * C = AQ * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 10 I = 1, NV CALL PSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 10 CONTINUE * * C = C - BQ (i.e. AQ-BQD) * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CNEGONE, C, IC, JC, DESCC ) * TSTNRM = ( PSLANGE( 'M', MS, NV, C, IC, JC, DESCC, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * * ELSE IF( IBTYPE.EQ.2 ) THEN * * Norm of ABQ - QD * * * C = BQ * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 20 I = 1, NV CALL PSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 20 CONTINUE * * Q = AC - Q * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PSLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * Norm of BAQ - QD * * * C = AQ * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 30 I = 1, NV CALL PSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 30 CONTINUE * * Q = BC - Q * CALL PSGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PSLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * END IF * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0E0 ) ) THEN RESULT = 1 END IF RETURN * * End of PSGSEPCHK * END scalapack-1.8.0/TESTING/EIG/psseptst.f0000640000175000017500000013511710363532303017101 0ustar muammarmuammar SUBROUTINE PSSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, $ ORDER, ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, IWORK, LIWORK, HETERO, NOUT, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER HETERO, SUBTESTS, UPLO INTEGER INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, LWORK, $ MATTYPE, N, NOUT, ORDER REAL ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) REAL A( LDA, * ), COPYA( LDA, * ), GAP( * ), $ WIN( * ), WNEW( * ), WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PSSEPTST builds a random matrix, runs PSSYEVX and PSSYEV to * compute the eigenvalues * and eigenvectors and then performs two tests to * determine if the result * is good enough. The two tests are: * |AQ -QL| / (abstol + ulp * norm(A) ) * and * |QT * Q - I| / ulp * norm(A) * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) symmetric matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PSSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) REAL array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PSSYEVX * * COPYA (local workspace) REAL array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PSSEPCHK and PSSEPQTQ * * W (local workspace) REAL array, dimension (N) * On normal exit from PSSYEVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PSLASIZESQP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PSLASIZESQP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL HALF, ONE, TEN, ZERO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, $ TEN = 10.0E+0, HALF = 0.5E+0 ) REAL PADVAL PARAMETER ( PADVAL = 19.25E+0 ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IHETERO, IINFO, IL, IMODE, IN, $ INDD, INDWORK, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, ITYPE, IU, J, LLWORK, LSYEVXSIZE, $ MAXSIZE, MINSIZE, MYCOL, MYROW, NB, NGEN, NLOC, $ NNODES, NP, NPCOL, NPROW, NQ, RES, SIZECHK, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, $ SIZESUBTST, SIZESYEV, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE,ISIZESYEVD, SIZESYEVD REAL ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, IGAMX2D, IGEBR2D, $ IGEBS2D, PSCHEKPAD, PSELSET, PSFILLPAD, $ PSLASET, PSLASIZESQP, PSLASIZESYEVX, PSLATMS, $ PSMATGEN, PSSEPSUBTST, PSSQPSUBTST, PSSYEV, $ SLABAD, SLASRT, SLATMS, SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, INT, MAX, MIN, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED EVX' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * Distribute HETERO across processes * IF( IAM.EQ.0 ) THEN IF( LSAME( HETERO, 'Y' ) ) THEN IHETERO = 2 ELSE IHETERO = 1 END IF CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, IHETERO, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, IHETERO, 1, 0, 0 ) END IF IF( IHETERO.EQ.2 ) THEN HETERO = 'Y' ELSE HETERO = 'N' END IF * * Make sure that we have enough memory * CALL PSLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, $ SIZESUBTST, ISIZESUBTST, SIZETST, ISIZETST ) * IF( LWORK.LT.SIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDWORK = INDD + N LLWORK = LWORK - INDWORK + 1 * ULP = PSLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PSLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL SLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / REAL( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log symmetric, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random symmetric * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N WORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PSLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N WORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PSLASET( 'All', N, N, ZERO, ONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * symmetric, eigenvalues specified * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * symmetric, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PSMATGEN( DESCA( CTXT_ ), 'S', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PSLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( SLARAN( ISEED )*REAL( NLOC ) ), N-NGEN ) * CALL SLATMS( IN, IN, 'S', ISEED, 'P', WORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PSELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PSELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PSELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PSELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 WORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL SLASRT( 'I', N, WORK( INDD ), IINFO ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PSLASIZESYEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, WORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LSYEVXSIZE = MIN( MAXSIZE, LLWORK ) * CALL PSSEPSUBTST( WKNOWN, 'v', 'a', UPLO, N, VL, VU, IL, IU, $ THRESH, ABSTOL, A, COPYA, Z, 1, 1, DESCA, $ WORK( INDD ), WIN, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, TSTNRM, $ QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE + INT( SLARAN( ISEED )* $ REAL( VECSIZE-VALSIZE ) ) * CALL PSSEPSUBTST( .TRUE., JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, 1, 1, $ DESCA, WIN( 1+IPREPAD ), WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, $ IWORK, ISIZESYEVX, RES, TSTNRM, QTQNRM, $ NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF * * Now that PSSYEVX been tested, we check PSSYEV if we are a * homogeneous machine. * IF( LSAME( HETERO, 'N' ) .AND. LSAME( SUBTESTS, 'N' ) ) THEN PASSED = 'PASSED EV' * * PSSYEV test1: * JOBZ = 'N', eigenvalues only * IF( INFO.NE.0 ) THEN * * If the EVX tests fail, we do not perform the EV tests * PASSED = 'SKIPPED EV' ELSE JOBZ = 'N' * CALL PSSYEV( JOBZ, UPLO, N, A, 1, 1, DESCA, $ WORK( INDWORK ), Z, 1, 1, DESCA, $ WORK( INDWORK ), -1, INFO ) MINSIZE = INT( WORK( INDWORK ) ) * CALL PSSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EV test1' INFO = 1 END IF END IF * * PSSYEV test2: * JOBZ = 'V', eigenvalues and eigenvectors * IF( INFO.EQ.0 ) THEN JOBZ = 'V' * CALL PSSYEV( JOBZ, UPLO, N, A, 1, 1, DESCA, $ WORK( INDWORK ), Z, 1, 1, DESCA, $ WORK( INDWORK ), -1, INFO ) MINSIZE = INT( WORK( INDWORK ) ) * CALL PSSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EV test2' INFO = 1 END IF END IF IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ MAXQTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, $ PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF END IF * * Now that PSSYEV been tested, we check PSSYEVD if we are a * homogeneous machine. * IF( LSAME( HETERO, 'N' ) .AND. LSAME( SUBTESTS, 'N' ) ) THEN PASSED = 'PASSED EVD' * * PSSYEVD test1: * IF( INFO.NE.0 ) THEN * * If the EV tests fail, we do not perform the EVD tests * PASSED = 'SKIPPED EVD' ELSE * NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) MINSIZE = MAX( 1+6*N+2*NP*NQ, $ 3*N + MAX( NB*( NP+1 ), 3*NB ) ) + 2*N * CALL PSSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, 1, 1, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ MINSIZE, IWORK, ISIZESYEVD, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAIL EVD test1' INFO = 1 END IF END IF IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ), TSTNRM, $ QTQNRM, PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ), MAXTSTNRM, MAXQTQNRM, $ PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ SUBTESTS END IF END IF END IF RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, $ F8.2, 1X, F8.2, 1X, G9.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 1X, G9.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, F8.2, $ 1X, F8.2, 21X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 1X, 8X, $ 1X, F8.2, 21X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, A1, 32X, $ 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PSSEPDRIVER' ) * * End of PSSEPTST * END scalapack-1.8.0/TESTING/EIG/pzgsepsubtst.f0000640000175000017500000007272010363532303017771 0ustar muammarmuammar* * SUBROUTINE PZGSEPSUBTST( WKNOWN, IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, IA, JA, DESCA, WIN, WNEW, $ IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, RWORK, LRWORK, LWORK1, $ IWORK, LIWORK, RESULT, TSTNRM, QTQNRM, $ NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IBTYPE, IL, IPOSTPAD, IPREPAD, IU, JA, $ LIWORK, LRWORK, LWORK, LWORK1, N, NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX*16 A( * ), B( * ), COPYA( * ), COPYB( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZGSEPSUBTST calls PZHEGVX and then tests the output of * PZHEGVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < THRESH * |QT * Q - I| / eps * norm(A) < THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PZHEGVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PZGSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PZGSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PZSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX*16 array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PZHEGVX for a description of block cyclic layout. * The test matrix, which is then modified by PZHEGVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX*16 array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * B (local workspace) COMPLEX*16 array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PZHEGVX * * COPYB (local input) COMPLEX*16 array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) COMPLEX*16 array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PZGSEPCHK and PZSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as copmuted by this call to PZHEGVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PZHEGVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PZHEGVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / |A|*N*EPS * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0 ) COMPLEX*16 ZPADVAL PARAMETER ( ZPADVAL = ( 13.989D+0, 1.93D+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZESUBTST, SIZETMS, SIZETST, VALSIZE, $ VECSIZE DOUBLE PRECISION EPS, ERROR, MAXERROR, MAXVU, MINERROR, MINVL, $ NORMWIN, OLDVL, OLDVU, ORFAC, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGAMN2D, DGAMX2D, $ IGAMN2D, IGAMX2D, PDCHEKPAD, PDFILLPAD, $ PICHEKPAD, PIFILLPAD, PZCHEKPAD, PZELSET, $ PZFILLPAD, PZGSEPCHK, PZHEGVX, PZLASIZEGSEP, $ PZLASIZEHEEVX, SLBOOT, SLTIMER, ZLACPY * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PZLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63D+0, 1.1D+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 40 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 50 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 50 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 160 RESULT = 0 * * * DSEED is not used in this call to PZLASIZEHEEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PZLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL ZLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL ZLACPY( 'A', NP, NQ, COPYB, DESCA( LLD_ ), B( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PZFILLPAD( DESCA( CTXT_ ), NP, NQ, B, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, ZPADVAL+1.0D+2 ) * CALL PZFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, ZPADVAL ) * CALL PZFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, ZPADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PZFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, ZPADVAL+4.1D+0 ) * * Make sure that PZHEGVX does not cheat (i.e. use answers * already computed.) * DO 70 I = 1, N, 1 DO 60 J = 1, MAXEIGS, 1 CALL PZELSET( Z( 1+IPREPAD ), I, J, DESCA, $ ( 13.0D+0, 1.34D+0 ) ) 60 CONTINUE 70 CONTINUE * ORFAC = -1.0D+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PZHEGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, $ JA, DESCA, B( 1+IPREPAD ), IA, JA, DESCA, VL, VU, $ IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), ORFAC, $ Z( 1+IPREPAD ), IA, JA, DESCA, WORK( 1+IPREPAD ), $ SIZEHEEVX, RWORK( 1+IPREPAD ), LWORK1, $ IWORK( 1+IPREPAD ), LIWORK, IFAIL( 1+IPREPAD ), $ ICLUSTR( 1+IPREPAD ), GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-B', NP, NQ, B, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ ZPADVAL+1.0D+2 ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, ZPADVAL ) * CALL PZCHEKPAD( DESCZ( CTXT_ ), 'PZHEGVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ ZPADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, ZPADVAL+4.1D+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEGVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 80 I = 1, M RWORK( I ) = WNEW( I+IPREPAD ) RWORK( I+M ) = WNEW( I+IPREPAD ) 80 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, RWORK, M, $ 1, 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ RWORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 90 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( RWORK( I )-RWORK( M+ $ I ) ).GT.FIVE*EPS*ABS( RWORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 90 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 100 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 110 NCLUSTERS = NCLUSTERS + 1 100 CONTINUE 110 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 120 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 120 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 130 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 130 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 160 * * Note that a couple key variables get redefined in PZGSEPCHK * as described by this table: * * PZGSEPTST name PZGSEPCHK name * ------------- ------------- * COPYA A * Z Q * B B * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the residual check * CALL PDFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, $ RSIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * CALL PZGSEPCHK( IBTYPE, N, NZ, COPYA, IA, JA, DESCA, COPYB, $ IA, JA, DESCA, THRESH, Z( 1+IPREPAD ), IA, $ JA, DESCZ, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), RWORK( 1+IPREPAD ), $ RSIZECHK, TSTNRM, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZGSEPCHK-rWORK', RSIZECHK, $ 1, RWORK, RSIZECHK, IPREPAD, IPOSTPAD, $ 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 150 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 140 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 150 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxHEGVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 160 CONTINUE * * RETURN * 9999 FORMAT( 'PZHEGVX returned INFO=', I7 ) 9998 FORMAT( 'PZSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PZGSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PZHEGVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PZHEGVX' ) 9981 FORMAT( 'NZ altered by PZHEGVX with JOBZ=N' ) * * End of PZGSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pzsepdriver.f0000640000175000017500000002440010363532303017561 0ustar muammarmuammar* * PROGRAM PZSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel COMPLEX*16 Hermitian eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and DBLSIZ * indicate the length in bytes on the given platform for an integer * and a double precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * DOUBLE is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * COMPLEX*16 words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PZHEEVX, the expert driver for the parallel * Hermitian eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, ZPLXSZ, NIN PARAMETER ( TOTMEM = 2000000, ZPLXSZ = 16, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / ZPLXSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Functions .. DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PDLACHKIEEE, PDLASNBT, PZSEPREQ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PDLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PDLACHKIEEE( ISIEEE, DLAMCH( 'O' ), DLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Hermitian Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'Hermitian eigenvalue routine: PZHEEVX.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 )' ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )' ||Q^T*Q - I|| ' // '/ (N * eps)' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pZSEPtst.f).' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pZSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : ||AQ - QL|| ' // $ '/ ((abstol + ||A|| * eps) * N)' WRITE( NOUT, FMT = 9999 )'QTQ : ||Q^T*Q - I||/ (N * eps)' WRITE( NOUT, FMT = 9999 ) $ ' : when the adjusted QTQ exceeds THRESH', $ ' the adjusted QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ ' : otherwise the true QTQ norm is printed' WRITE( NOUT, FMT = 9999 ) $ 'If NT>1, CHK and QTQ are the max over all ' // $ 'eigen request tests' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PZSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP SUB WALL CPU ', $ ' CHK QTQ CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- --- -------- --------', $ ' --------- --------- -----' ) * * End of PZSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pchrdinfo.f0000640000175000017500000003237710363532303017174 0ustar muammarmuammar SUBROUTINE PCHRDINFO( SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ NVHI( LDNVAL ), NVLO( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCHRDINFO get the needed startup information for the Hessenberg * reduction tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for * N, IHI & ILO. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of rows & columns in matrix). * * NVLO (global output) INTEGER array, dimension (LDNVAL) * The values of ILO. * * NVHI (global output) INTEGER array, dimension (LDNVAL) * The values of IHI. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, ILO and IHI. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * 3*LDNVAL+LDNBVAL+2*LDPVAL. Used to pack all input arrays * in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'HRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = * ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'N', LDNVAL GO TO 20 END IF * * Get values of N, ILO, IHI * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVLO( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVHI( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVLO, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVHI, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS -1 CALL IGEBS2D( ICTXT, 'All', ' ', 1, I, WORK, 1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction routine to Hessenberg form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision Hessenberg ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q H Q''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'HRD time : Time in seconds to compute HRD ' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for HRD ' // $ 'reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'ILO ', ( NVLO( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVLO( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'IHI ', ( NVHI( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVHI( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 3*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) * I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVLO, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVHI, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' ILLEGAL INPUT IN FILE ', 40A, '. ABORTING RUN.' ) 9997 FORMAT( ' NUMBER OF VALUES OF ', 5A, $ ' IS LESS THAN 1 OR GREATER ', 'THAN ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G14.7 ) * * End of PCHRDINFO * END scalapack-1.8.0/TESTING/EIG/pssepsubtst.f0000640000175000017500000006745110363532303017620 0ustar muammarmuammar SUBROUTINE PSSEPSUBTST( WKNOWN, JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, IA, JA, $ DESCA, WIN, WNEW, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ IWORK, LIWORK, RESULT, TSTNRM, QTQNRM, $ NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 15, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, IPOSTPAD, IPREPAD, IU, JA, LIWORK, $ LWORK, LWORK1, N, NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) REAL A( * ), COPYA( * ), GAP( * ), WIN( * ), $ WNEW( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSEPSUBTST calls PSSYEVX and then tests the output of * PSSYEVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PSSYEVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PSSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PSSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PSSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) REAL array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PSSYEVX for a description of block cyclic layout. * The test matrix, which is then modified by PSSYEVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) REAL array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PSSEPCHK and PSSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as copmuted by this call to PSSYEVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) REAL array, * dimension (NPROW*NPCOL) * * WORK (local workspace) REAL array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PSSYEVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PSSYEVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0 ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. INTEGER I, IAM, INDIWRK, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE REAL EPS, EPSNORMA, ERROR, MAXERROR, MAXVU, $ MINERROR, MINVL, NORMWIN, OLDVL, OLDVU, ORFAC, $ SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, PSLANSY EXTERNAL LSAME, NUMROC, PSLAMCH, PSLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, IGAMN2D, IGAMX2D, $ PICHEKPAD, PIFILLPAD, PSCHEKPAD, PSELSET, $ PSFILLPAD, PSLASIZESEP, PSLASIZESYEVX, $ PSSEPCHK, PSSEPQTQ, PSSYEVX, SGAMN2D, SGAMX2D, $ SLACPY, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PSLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 30 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 40 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 40 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * * * DSEED is not used in this call to PSLASIZESYEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PSLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL SLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PSFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * * Make sure that PSSYEVX does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, MAXEIGS, 1 CALL PSELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0E+0 ) 50 CONTINUE 60 CONTINUE * ORFAC = -1.0E+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PSSYEVX( JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ VL, VU, IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), $ ORFAC, Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, IWORK( 1+IPREPAD ), $ LIWORK, IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PSCHEKPAD( DESCZ( CTXT_ ), 'PSSYEVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PSSYEVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 70 I = 1, M WORK( I ) = WNEW( I+IPREPAD ) WORK( I+M ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, WORK, M, 1, $ 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ WORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 80 I = 1, M IF( RESULT.EQ.0 .AND. WORK( I ).NE.WORK( M+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 80 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 90 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 100 NCLUSTERS = NCLUSTERS + 1 90 CONTINUE 100 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 110 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 110 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 120 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 120 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 150 * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PSLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PSSEPCHK * as described by this table: * * PSSEPTST name PSSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * CALL PSSEPCHK( N, NZ, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * * Perform the |QTQ - I| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * * CALL PSSEPQTQ( N, NZ, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1+IPREPAD+1 ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), WORK( IPREPAD+1 ), SIZEQTQ, $ QTQNRM, INFO, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 140 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * IF( .NOT.LSAME( RANGE, 'V' ) .OR. $ ( MYIL.EQ.1 .OR. ( WIN( MYIL-1 ).LT.VL+NORMWIN*FIVE* $ THRESH*EPS ) ) ) THEN IF( .NOT.LSAME( RANGE, 'V' ) .OR. $ ( MYIL.EQ.N-M+1 .OR. ( WIN( MYIL+M ).GT.VU- $ NORMWIN*FIVE*THRESH*EPS ) ) ) THEN * * Make sure that the eigenvalues that we report are OK * DO 130 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 130 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 140 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxSYEVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PSSYEVX returned INFO=', I7 ) 9998 FORMAT( 'PSSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PSSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PSSYEVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PSSYEVX' ) 9981 FORMAT( 'NZ altered by PSSYEVX with JOBZ=N' ) * * End of PSSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pdmatgen.f0000640000175000017500000004312410363532303017007 0ustar muammarmuammar SUBROUTINE PDMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDMATGEN : Parallel Real Double precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) DOUBLE PRECISION, pointer into the local * memory to an array of dimension ( LDA, * ) containing the * local pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDRAND EXTERNAL ICEIL, NUMROC, LSAME, PDRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = ONE - TWO*PDRAND(0) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = ONE - TWO*PDRAND(0) 30 CONTINUE A(IK,JK) = ONE - TWO*PDRAND(0) DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = ONE - TWO*PDRAND(0) A(IK,JK+J) = A(IK+J,JK) 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 A(IK,JK) = ONE - TWO*PDRAND(0) JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 A(IK,JK) = ONE - TWO*PDRAND(0) JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = ONE - TWO*PDRAND(0) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 A(IK,JK+J) = ABS(A(IK,JK+J)) + MAXMN IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PDMATGEN * END scalapack-1.8.0/TESTING/EIG/pzlasizesep.f0000640000175000017500000001351010363532303017555 0ustar muammarmuammar SUBROUTINE PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZEHEEVD, RSIZEHEEVD, $ ISIZEHEEVD, SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, RSIZETST, ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZEHEEVD, ISIZEHEEVX, $ ISIZESUBTST, ISIZETST, RSIZECHK, RSIZEHEEVD, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVD, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PZLASIZESEP computes the amount of memory needed by * various SEP test routines, as well as HEEVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PZHEEVX * * SIZEMQRLEFT LWORK for the 1st PZUNMQR call in PZLAGHE * * SIZEMQRRIGHT LWORK for the 2nd PZUNMQR call in PZLAGHE * * SIZEQRF LWORK for PZGEQRF in PZLAGHE * * SIZETMS LWORK for PZLATMS * * RSIZEQTQ LWORK for PZSEPQTQ (nexer complex) * * RSIZECHK LWORK for PZSEPCHK * * SIZEHEEVX LWORK for PZHEEVX * * RSIZEHEEVX LRWORK for PZHEEVX * * ISIZEHEEVX LIWORK for PZHEEVX * * SIZEHEEVD LWORK for PCHEEVD * * RSIZEHEEVD LRWORK for PCHEEVD * * ISIZEHEEVD LIWORK for PCHEEVD * * SIZESUBTST LWORK for PZSUBTST * * RSIZESUBTST LRWORK for PZSUBTST * * ISIZESUBTST LIWORK for PZSUBTST * * SIZETST LWORK for PZTST * * RSIZETST LRWORK for PZTST * * ISIZETST LIWORK for PZTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ANB, CSRC_A, IACOL, IAROW, ICOFFA, ICTXT, $ IROFFA, LCM, LCMQ, LDA, MQ0, MYCOL, MYROW, N, $ NB, NEIG, NHETRD_LWOPT, NN, NNP, NP, NP0, $ NPCOL, NPROW, NPS, NQ, RSRC_A, SIZECHK, $ SIZEQTQ, SQNPC * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, SQRT * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = 0 RSIZEQTQ = 2 + MAX( DESCA( MB_ ), 2 )*( 2*NP0+MQ0 ) RSIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZEHEEVX = N + ( NP0+MQ0+NB )*NB RSIZEHEEVX = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZEHEEVX = 6*NNP * ICTXT = DESCA( CTXT_ ) ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS * SIZEHEEVX = MAX( SIZEHEEVX, N+NHETRD_LWOPT ) * SIZEHEEVD = SIZEHEEVX RSIZEHEEVD = 7*N + 3*NP0*MQ0 ISIZEHEEVD = 7*N + 8*NPCOL + 2 SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZEHEEVX, $ SIZEHEEVD ) + IPREPAD + IPOSTPAD RSIZESUBTST = MAX( RSIZEHEEVX, RSIZEHEEVD, RSIZEQTQ, RSIZECHK ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = MAX( ISIZEHEEVX, ISIZEHEEVD ) + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Room for DIAG, WIN, WNEW, GAP and RWORK * RSIZETST = 4*( N+IPREPAD+IPOSTPAD ) + RSIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PZHEEVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pcsepsubtst.f0000640000175000017500000007253510363532303017577 0ustar muammarmuammar* * SUBROUTINE PCSEPSUBTST( WKNOWN, JOBZ, RANGE, UPLO, N, VL, VU, IL, $ IU, THRESH, ABSTOL, A, COPYA, Z, IA, JA, $ DESCA, WIN, WNEW, IFAIL, ICLUSTR, GAP, $ IPREPAD, IPOSTPAD, WORK, LWORK, RWORK, $ LRWORK, LWORK1, IWORK, LIWORK, RESULT, $ TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, IPOSTPAD, IPREPAD, IU, JA, LIWORK, $ LRWORK, LWORK, LWORK1, N, NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) REAL GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX A( * ), COPYA( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCSEPSUBTST calls PCHEEVX and then tests the output of * PCHEEVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PCHEEVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PCSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PCSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PCSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PCHEEVX for a description of block cyclic layout. * The test matrix, which is then modified by PCHEEVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) COMPLEX array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PCSEPCHK and PCSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as copmuted by this call to PCHEEVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) REAL array, * dimension (NPROW*NPCOL) * * WORK (local workspace) COMPLEX array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) REAL array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PCHEEVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PCHEEVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0 ) COMPLEX CPADVAL PARAMETER ( CPADVAL = ( 13.989E+0, 1.93E+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZESUBTST, SIZETMS, SIZETST, VALSIZE, $ VECSIZE, SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD REAL EPS, EPSNORMA, ERROR, MAXERROR, MAXVU, $ MINERROR, MINVL, NORMWIN, OLDVL, OLDVU, ORFAC, $ SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PCLANHE, PSLAMCH EXTERNAL LSAME, NUMROC, PCLANHE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLACPY, DESCINIT, IGAMN2D, $ IGAMX2D, PCCHEKPAD, PCELSET, PCFILLPAD, $ PCHEEVX, PCLASIZEHEEVX, PCLASIZESEP, PCSEPCHK, $ PCSEPQTQ, PICHEKPAD, PIFILLPAD, PSCHEKPAD, $ PSFILLPAD, SGAMN2D, SGAMX2D, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63E+0, 1.1E+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 40 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 50 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 50 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 160 RESULT = 0 * * * DSEED is not used in this call to PCLASIZEHEEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PCLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL CLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PCFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL ) * CALL PCFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PCFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, CPADVAL+4.1E+0 ) * * Make sure that PCHEEVX does not cheat (i.e. use answers * already computed.) * DO 70 I = 1, N, 1 DO 60 J = 1, MAXEIGS, 1 CALL PCELSET( Z( 1+IPREPAD ), I, J, DESCA, $ ( 13.0E+0, 1.34E+0 ) ) 60 CONTINUE 70 CONTINUE * ORFAC = -1.0E+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PCHEEVX( JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ VL, VU, IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), $ ORFAC, Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), SIZEHEEVX, RWORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, $ IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, CPADVAL ) * CALL PCCHEKPAD( DESCZ( CTXT_ ), 'PCHEEVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ CPADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, CPADVAL+4.1E+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEEVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 80 I = 1, M RWORK( I ) = WNEW( I+IPREPAD ) RWORK( I+M ) = WNEW( I+IPREPAD ) 80 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, RWORK, M, $ 1, 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ RWORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 90 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( RWORK( I )-RWORK( M+ $ I ) ).GT.FIVE*EPS*ABS( RWORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 90 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 100 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 110 NCLUSTERS = NCLUSTERS + 1 100 CONTINUE 110 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 120 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 120 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 130 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 130 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 160 * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PCLANHE( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ RWORK )*EPS END IF * * Note that a couple key variables get redefined in PCSEPCHK * as described by this table: * * PCSEPTST name PCSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PSFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, $ RSIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * CALL PCSEPCHK( N, NZ, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), RWORK( 1+IPREPAD ), $ RSIZECHK, TSTNRM, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCSEPCHK-rWORK', RSIZECHK, $ 1, RWORK, RSIZECHK, IPREPAD, IPOSTPAD, $ 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * * Perform the |QTQ - I| test * CALL PSFILLPAD( DESCA( CTXT_ ), RSIZEQTQ, 1, RWORK, $ RSIZEQTQ, IPREPAD, IPOSTPAD, 4.3E+0 ) * * CALL PCSEPQTQ( N, NZ, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1+IPREPAD+1 ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), RWORK( IPREPAD+1 ), $ RSIZEQTQ, QTQNRM, INFO, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCSEPQTQ-rWORK', RSIZEQTQ, $ 1, RWORK, RSIZEQTQ, IPREPAD, IPOSTPAD, $ 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 150 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 140 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 150 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxHEEVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 160 CONTINUE * * RETURN * 9999 FORMAT( 'PCHEEVX returned INFO=', I7 ) 9998 FORMAT( 'PCSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PCSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PCHEEVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PCHEEVX' ) 9981 FORMAT( 'NZ altered by PCHEEVX with JOBZ=N' ) * * End of PCSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pssvdchk.f0000640000175000017500000003261210363532303017035 0ustar muammarmuammar SUBROUTINE PSSVDCHK( M, N, A, IA, JA, DESCA, U, IU, JU, DESCU, VT, $ IVT, JVT, DESCVT, S, THRESH, WORK, LWORK, $ RESULT, CHK, MTM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IU, IVT, JA, JU, JVT, LWORK, M, N REAL CHK, MTM, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCU( * ), DESCVT( * ), $ RESULT( * ) REAL A( * ), S( * ), U( * ), VT( * ), WORK( * ) * .. * * Purpose * ======= * * For given two-dimensional matrices A, U, VT, and one-dimensional * array D compute the following four tests: * * (1) | A - U*diag(S) VT | / ( |A| max(M,N) ulp ) * * (2) | I - U'*U | / ( M ulp ) * * (3) | I - VT*VT' | / ( N ulp ), * * (4) S contains SIZE = MIN( M, N ) nonnegative values in * decreasing order. * It then compares result of computations (1)-(3) * with TRESH and returns results of comparisons and test (4) in * RESULT(I). When the i-th test fails, value of RESULT( I ) is set * to 1. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZEP = number of local rows in VT * SIZEQ = number of local columns in U * * M (global input) INTEGER * Matrix size. * The number of global rows in A and U and * * N (global input) INTEGER * The number of global columns in A and VT. * * A (input) block cyclic distributed REAL array, * global dimension (M, N), local dimension (DESCA( DLEN_ ), NQ) * Contains the original test matrix. * * IA (global input) INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * U (local input) REAL array * global dimension (M, SIZE), local dimension * (DESCU( DLEN_ ), SIZEQ) * Contains left singular vectors of matrix A. * * IU (global input) INTEGER * The global row index of the submatrix of the distributed * matrix U to operate on. * * JU (global input) INTEGER * The global column index of the submatrix of the distributed * matrix U to operate on. * * DESCU (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local input) REAL array * global dimension (SIZE, N), local dimension * (DESCVT( DLEN_ ), NQ) * Contains right singular vectors of matrix A. * * IVT (global input) INTEGER * The global row index of the submatrix of the distributed * matrix VT to operate on. * * JVT (global input) INTEGER * The global column index of the submatrix of the distributed * matrix VT to operate on. * * DESCVT (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * S (global input) REAL array, dimension (SIZE) * Contains the computed singular values * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 1 + SIZEQ*SIZEP + MAX[WORK(pdlange(size,size)), * WORK(pdlange(m,n))], * where * SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ), * SIZEP = NUMROC( SIZE, DESCVT( MB_ ), MYROW, 0, NPROW ), * and worekspaces required to call pdlange are * WORK(pdlange(size,size)) < MAX(SIZEQ0,2) < SIZEB +2, * WORK(pdlange(m,n)) < MAX(NQ0,2) < SIZEB +2, * SIZEB = MAX(M, N) * Finally, upper limit on required workspace is * LWORK > 1 + SIZEQ*SIZEP + SIZEB + 2 * * RESULT (global input/output) INTEGER array. Four first elements of * the array are set to 0 or 1 depending on passing four * respective tests ( see above in Purpose ). The elements of * RESULT are set to * 0 if the test passes i.e. * | A - U*diag(S)*VT | / ( |A| max(M,N) ulp ) <= THRESH * 1 if the test fails i.e. * | A - U*diag(S)*VT | / ( |A| max(M,N) ulp ) > THRESH * * CHK (global output) REAL * value of the | A - U*diag(S) VT | / ( |A| max(M,N) ulp ) * * MTM (global output) REAL * maximum of the two values: * | I - U'*U | / ( M ulp ) and | I - VT*VT' | / ( N ulp ) * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, MONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, MONE = -1.0E0 ) * .. * .. Local Scalars .. INTEGER I, INFO, LDR, LOCALCOL, LWMIN, MP, MX, MYCOL, $ MYROW, NPCOL, NPROW, NQ, PCOL, PTRR, PTRWORK, $ SIZE, SIZEP, SIZEPOS, SIZEQ REAL FIRST, NORMA, NORMAI, NORMU, NORMVT, SECOND, $ THRESHA, ULP * .. * .. Local Arrays .. INTEGER DESCR( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PSLAMCH, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCINIT, SSCAL, $ PSELSET, PSGEMM, PSLASET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 SIZE = MIN( M, N ) * * Sizepos is a number of parameters to pdsvdchk plus one. It's used * for the error reporting. * SIZEPOS = 22 IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( M, 1, SIZE, SIZEPOS, IU, JU, DESCU, 10, INFO ) CALL CHK1MAT( SIZE, SIZEPOS, N, 2, IVT, JVT, DESCVT, 14, INFO ) END IF * IF( INFO.EQ.0 ) THEN * * Calculate workspace * MP = NUMROC( M, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) SIZEP = NUMROC( SIZE, DESCVT( MB_ ), MYROW, 0, NPROW ) SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ) MX = MAX( SIZEQ, NQ ) LWMIN = 2 + SIZEQ*SIZEP + MAX( 2, MX ) WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 40 IF( LWORK.LT.LWMIN ) THEN INFO = -18 ELSE IF( THRESH.LE.0 ) THEN INFO = -16 END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSSVDCHK', -INFO ) RETURN END IF * LDR = MAX( 1, SIZEP ) ULP = PSLAMCH( DESCA( CTXT_ ), 'P' ) NORMAI = PSLANGE( '1', M, N, A, 1, 1, DESCA, WORK ) * * Allocate array R of global dimension SIZE x SIZE for testing * PTRR = 2 PTRWORK = PTRR + SIZEQ*SIZEP * CALL DESCINIT( DESCR, SIZE, SIZE, DESCVT( MB_ ), DESCU( NB_ ), 0, $ 0, DESCA( CTXT_ ), LDR, INFO ) * * Test 2. Form identity matrix R and make check norm(U'*U - I ) * CALL PSLASET( 'Full', SIZE, SIZE, ZERO, ONE, WORK( PTRR ), 1, 1, $ DESCR ) CALL PSGEMM( 'T', 'N', SIZE, SIZE, M, ONE, U, 1, 1, DESCU, U, 1, $ 1, DESCU, MONE, WORK( PTRR ), 1, 1, DESCR ) * NORMU = PSLANGE( '1', SIZE, SIZE, WORK( PTRR ), 1, 1, DESCR, $ WORK( PTRWORK ) ) * NORMU = NORMU / ULP / SIZE / THRESH IF( NORMU.GT.1. ) $ RESULT( 2 ) = 1 * * Test3. Form identity matrix R and check norm(VT*VT' - I ) * CALL PSLASET( 'Full', SIZE, SIZE, ZERO, ONE, WORK( PTRR ), 1, 1, $ DESCR ) CALL PSGEMM( 'N', 'T', SIZE, SIZE, N, ONE, VT, 1, 1, DESCVT, VT, $ 1, 1, DESCVT, MONE, WORK( PTRR ), 1, 1, DESCR ) NORMVT = PSLANGE( '1', SIZE, SIZE, WORK( PTRR ), 1, 1, DESCR, $ WORK( PTRWORK ) ) * NORMVT = NORMVT / ULP / SIZE / THRESH IF( NORMVT.GT.1. ) $ RESULT( 3 ) = 1 * MTM = MAX( NORMVT, NORMU )*THRESH * * Test 1. * Initialize R = diag( S ) * CALL PSLASET( 'Full', SIZE, SIZE, ZERO, ZERO, WORK( PTRR ), 1, 1, $ DESCR ) * DO 10 I = 1, SIZE CALL PSELSET( WORK( PTRR ), I, I, DESCR, S( I ) ) 10 CONTINUE * * Calculate U = U*R * DO 20 I = 1, SIZE PCOL = INDXG2P( I, DESCU( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( I, DESCU( NB_ ), 0, 0, NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CALL SSCAL( MP, S( I ), U( ( LOCALCOL-1 )*DESCU( LLD_ )+1 ), $ 1 ) END IF 20 CONTINUE * * Calculate A = U*VT - A * CALL PSGEMM( 'N', 'N', M, N, SIZE, ONE, U, 1, 1, DESCU, VT, 1, 1, $ DESCVT, MONE, A, 1, 1, DESCA ) * NORMA = PSLANGE( '1', M, N, A, 1, 1, DESCA, WORK( PTRWORK ) ) THRESHA = NORMAI*MAX( M, N )*ULP*THRESH * IF( NORMA.GT.THRESHA ) $ RESULT( 1 ) = 1 * IF( THRESHA.EQ.0 ) THEN CHK = 0.0E0 ELSE CHK = NORMA / THRESHA*THRESH END IF * * Test 4. * DO 30 I = 1, SIZE - 1 FIRST = S( I ) SECOND = S( I+1 ) IF( FIRST.LT.SECOND ) $ RESULT( 4 ) = 1 30 CONTINUE 40 CONTINUE RETURN END scalapack-1.8.0/TESTING/EIG/pcgsepreq.f0000640000175000017500000002554010602576752017217 0ustar muammarmuammar* * SUBROUTINE PCGSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX MEM( MEMSIZE ) * .. * * Purpose * ======= * * PCGSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PCGSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) COMPLEX ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL FIVE PARAMETER ( FIVE = 5.0E+0 ) INTEGER CPLXSZ, INTGSZ PARAMETER ( CPLXSZ = 8, INTGSZ = 4 ) INTEGER REALSZ PARAMETER ( REALSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IBTYPE, IMIDPAD, INITCON, $ IPOSTPAD, IPREPAD, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, LDA, LLRWORK, MATSIZE, MATTYPE, $ MYCOL, MYROW, N, NB, NIBTYPES, NMATSIZES, $ NMATTYPES, NNODES, NP, NPCOL, NPCONFIGS, NPROW, $ NQ, NUPLOS, ORDER, PCONFIG, PTRA, PTRB, $ PTRCOPYA, PTRCOPYB, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRRWORK, PTRW, PTRW2, PTRWORK, PTRZ, $ RES, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, UPLO REAL ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PCGSEPTST, PCLASIZEGSEP, PSSEPINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PSSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * Thresholds must be bigger for the generalized problem. * THRESH = THRESH*FIVE * DO 50 MATSIZE = 1, NMATSIZES * DO 40 PCONFIG = 1, NPCONFIGS * DO 30 MATTYPE = 1, NMATTYPES * DO 20 UPLO = 1, NUPLOS IF( LSAME( SUBTESTS, 'Y' ) ) THEN NIBTYPES = 3 ELSE NIBTYPES = 1 END IF DO 10 IBTYPE = 1, NIBTYPES * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, $ NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, $ MYROW, MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PCLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, $ RSIZEQTQ, RSIZECHK, $ SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZESUBTST, $ RSIZESUBTST, ISIZESUBTST, $ SIZETST, RSIZETST, $ ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYB = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRB = PTRCOPYB + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRB + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, CPLXSZ / REALSZ ) PTRWORK = PTRW2 + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, CPLXSZ / REALSZ ) PTRGAP = PTRWORK + SIZETST + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + ICEIL( NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, $ CPLXSZ / REALSZ ) PTRICLUS = PTRIFAIL + $ ICEIL( N+IPREPAD+IPOSTPAD, $ CPLXSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, CPLXSZ / INTGSZ ) PTRRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, CPLXSZ / INTGSZ ) LLRWORK = ( MEMSIZE - PTRRWORK - IPOSTPAD - $ IPREPAD + 1 )* ( CPLXSZ / REALSZ ) NTESTS = NTESTS + 1 IF( LLRWORK.LT.RSIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PCGSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), $ IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, $ MEM( PTRA ), $ MEM( PTRCOPYA ), $ MEM( PTRB ), $ MEM( PTRCOPYB ), $ MEM( PTRZ ), LDA, $ MEM( PTRW ), MEM( PTRW2 ), $ MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ SIZETST, MEM( PTRRWORK ), $ LLRWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * ) $ ' pCGSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE 50 CONTINUE END IF * * RETURN * * End of PCDGSEPREQ * END scalapack-1.8.0/TESTING/EIG/pzmatgen.f0000640000175000017500000004635310363532303017044 0ustar muammarmuammar SUBROUTINE PZMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZMATGEN : Parallel Complex Double precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX*16, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB DOUBLE PRECISION DUMMY * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDRAND EXTERNAL ICEIL, NUMROC, LSAME, PDRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = DCMPLX( PDRAND(0), PDRAND(0) ) 30 CONTINUE IF( SYMM ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), ZERO ) DUMMY = PDRAND(0) END IF DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IF( HERM ) THEN A(IK,JK+J) = DCONJG( A(IK+J,JK) ) ELSE A(IK,JK+J) = A(IK+J,JK) END IF 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 IF( SYMM ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ TWO*PDRAND(0) - ONE ) END IF JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 IF( TRAN ) THEN A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) ELSE A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ TWO*PDRAND(0) - ONE ) END IF JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = DCMPLX( ONE - TWO*PDRAND(0), $ ONE - TWO*PDRAND(0) ) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 IF( HERM ) THEN A(IK,JK+J) = DCMPLX( $ ABS(DBLE(A(IK,JK+J)))+2*MAXMN, ZERO ) ELSE A(IK,JK+J)= DCMPLX( ABS(DBLE(A(IK,JK+J)))+MAXMN, $ ABS(DIMAG(A(IK,JK+J)))+ MAXMN ) END IF IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PZMATGEN * END scalapack-1.8.0/TESTING/EIG/pcsepqtq.f0000640000175000017500000002531210363532303017047 0ustar muammarmuammar* * SUBROUTINE PCSEPQTQ( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, $ QTQNRM, INFO, RES ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES REAL QTQNRM, THRESH * .. * .. Array Arguments .. * INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ), $ PROCDIST( * ) REAL GAP( * ), WORK( * ) COMPLEX C( * ), Q( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * Compute |I - QT * Q| / (ulp * n) * * Arguments * ========= * * NP = number of local rows in C * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in Q * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) COMPLEX array, * global dimension (MS, NV), local dimension (LDQ, NQ) * * Contains the eigenvectors as computed by PCSTEIN * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) COMPLEX array, * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ) * * Accumulator for computing I - QT * Q * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (input) REAL array, dimension (NV) * All procesors have an identical copy of W() * * Contains the computed eigenvalues * * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1) * Identifies which eigenvectors are the last to be computed * by a given process * * ICLUSTR (global input) INTEGER array dimension (2*P) * This input array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace. * This should be the output of PCSTEIN. * * GAP (global input) REAL array, dimension (P) * This input array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * Where: * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * * QTQNRM (global output) REAL * |QTQ -I| / EPS * * RES (global output) INTEGER * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE, NEGONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, $ NEGONE = -1.0E+0 ) * .. * .. Intrinsic Functions .. * INTRINSIC CMPLX, MAX, REAL * .. * .. Local Scalars .. INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN, $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW REAL NORM, QTQNRM2, ULP * .. * .. External Functions .. INTEGER NUMROC REAL PCLANGE, PSLAMCH EXTERNAL NUMROC, PCLANGE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCGEMM, PCLASET, $ PCMATADD, PXERBLA * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * RES = 0 ULP = PSLAMCH( DESCC( CTXT_ ), 'P' ) * CALL BLACS_GRIDINFO( DESCC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IQ, JQ, DESCQ, 7, INFO ) CALL CHK1MAT( NV, 1, MS, 2, IC, JC, DESCC, 11, INFO ) * IF( INFO.EQ.0 ) THEN NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * LWMIN = 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * IF( IQ.NE.1 ) THEN INFO = -5 ELSE IF( JQ.NE.1 ) THEN INFO = -6 ELSE IF( IC.NE.1 ) THEN INFO = -9 ELSE IF( JC.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -16 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCC( CTXT_ ), 'PCSEPQTQ', -INFO ) RETURN END IF * * C = Identity matrix * CALL PCLASET( 'A', NV, NV, ZERO, ONE, C, IC, JC, DESCC ) * * C = C - QT * Q * IF( NV*MS.GT.0 ) THEN CALL PCGEMM( 'Conjugate transpose', 'N', NV, NV, MS, NEGONE, Q, $ 1, 1, DESCQ, Q, 1, 1, DESCQ, ONE, C, 1, 1, DESCC ) END IF * * Allow for poorly orthogonalized eigenvectors for large clusters * NORM = PCLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) QTQNRM = NORM / ( REAL( MAX( MS, 1 ) )*ULP ) * CLUSTER = 1 10 CONTINUE DO 20 FIRSTP = 1, NPROW*NPCOL IF( PROCDIST( FIRSTP ).GE.ICLUSTR( 2*( CLUSTER-1 )+1 ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE * IMIN = ICLUSTR( 2*CLUSTER-1 ) JMAX = ICLUSTR( 2*CLUSTER ) * * IF( IMIN.EQ.0 ) $ GO TO 60 * DO 40 NEXTP = FIRSTP, NPROW*NPCOL IMAX = PROCDIST( NEXTP ) JMIN = IMAX + 1 * * CALL PCMATADD( IMAX-IMIN+1, JMAX-JMIN+1, ZERO, C, IMIN, JMIN, $ DESCC, CMPLX( GAP( CLUSTER ) / 0.01E+0 ), C, $ IMIN, JMIN, DESCC ) CALL PCMATADD( JMAX-JMIN+1, IMAX-IMIN+1, ZERO, C, JMIN, IMIN, $ DESCC, CMPLX( GAP( CLUSTER ) / 0.01E+0 ), C, $ JMIN, IMIN, DESCC ) IMIN = IMAX * IF( ICLUSTR( 2*CLUSTER ).LT.PROCDIST( NEXTP+1 ) ) $ GO TO 50 40 CONTINUE 50 CONTINUE * CLUSTER = CLUSTER + 1 GO TO 10 60 CONTINUE * * Compute the norm of C * NORM = PCLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) * QTQNRM2 = NORM / ( REAL( MAX( MS, 1 ) )*ULP ) * IF( QTQNRM2.GT.THRESH ) THEN RES = 1 QTQNRM = QTQNRM2 END IF RETURN * * End of PCSEPQTQ * END scalapack-1.8.0/TESTING/EIG/pcsdpsubtst.f0000640000175000017500000004011010363532303017556 0ustar muammarmuammar SUBROUTINE PCSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, COPYA, $ Z, IA, JA, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK, LWORK, RWORK, LRWORK, $ LWORK1, IWORK, LIWORK, RESULT, TSTNRM, $ QTQNRM, NOUT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 28, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LIWORK, LRWORK, $ LWORK, LWORK1, N, NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) REAL RWORK( * ), WIN( * ), WNEW( * ) COMPLEX A( * ), COPYA( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCSDPSUBTST calls PCHEEVD and then tests the output of * PCHEEVD * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PCHEEVD when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PCSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PCHEEVD for a description of block cyclic layout. * The test matrix, which is then modified by PCHEEVD * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) COMPLEX array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PCSEPCHK and PCSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as copmuted by this call to PCHEEVD * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) COMPLEX array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) REAL array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PCHEEVD * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PCHEEVD * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0 ) COMPLEX CPADVAL PARAMETER ( CPADVAL = ( 13.989E+0, 1.93E+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) COMPLEX CZERO, CONE, CNEGONE PARAMETER ( CZERO = 0.0E+0, CONE = 1.0E+0, $ CNEGONE = -1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZEHEEVD, ISIZEHEEVX, $ ISIZESUBTST, ISIZETST, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ, RES, RSIZECHK, RSIZEHEEVD, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVD, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST REAL EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, NORM, $ NORMWIN, SAFMIN, ULP * .. * .. Local Arrays .. INTEGER ITMP( 2 ) * .. * .. External Functions .. * INTEGER NUMROC REAL PCLANGE, PCLANHE, PSLAMCH EXTERNAL NUMROC, PCLANGE, PCLANHE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLACPY, IGAMN2D, IGAMX2D, $ PCCHEKPAD, PCFILLPAD, PCGEMM, PCHEEVD, PCLASET, $ PCLASIZESEP, PCSEPCHK, PICHEKPAD, PIFILLPAD, $ PSCHEKPAD, PSFILLPAD, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, SIZESUBTST, $ RSIZESUBTST, ISIZESUBTST, SIZETST, RSIZETST, $ ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63E+0, 1.1E+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 40 CONTINUE * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 60 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL CLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PCFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL ) * CALL PCFILLPAD( DESCA( CTXT_ ), NP, NQ, Z, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PCFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, CPADVAL+4.1E+0 ) * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) * CALL PCHEEVD( 'V', UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), SIZEHEEVD, RWORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, CPADVAL ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-Z', NP, NQ, Z, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ CPADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, CPADVAL+4.1E+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEEVD-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * * Check INFO * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PCLANHE( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ RWORK )*EPS END IF * * Note that a couple key variables get redefined in PCSEPCHK * as described by this table: * * PCSEPTST name PCSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * Perform the |AQ - QE| test * CALL PSFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, RSIZECHK, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * CALL PCSEPCHK( N, N, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCA, A( 1+IPREPAD ), $ IA, JA, DESCA, WNEW( 1+IPREPAD ), $ RWORK( 1+IPREPAD ), RSIZECHK, TSTNRM, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCSDPCHK-rWORK', RSIZECHK, 1, $ RWORK, RSIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RES.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9995 ) END IF * * Perform the |QTQ - I| test * CALL PSFILLPAD( DESCA( CTXT_ ), RSIZEQTQ, 1, RWORK, RSIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * * RES = 0 ULP = PSLAMCH( DESCA( CTXT_ ), 'P' ) CALL PCLASET( 'A', N, N, CZERO, CONE, A( 1+IPREPAD ), IA, JA, $ DESCA ) CALL PCGEMM( 'Conjugate transpose', 'N', N, N, N, CNEGONE, $ Z( 1+IPREPAD ), IA, JA, DESCA, Z( 1+IPREPAD ), IA, $ JA, DESCA, CONE, A( 1+IPREPAD ), IA, JA, DESCA ) NORM = PCLANGE( '1', N, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ) ) QTQNRM = NORM / ( REAL( MAX( N, 1 ) )*ULP ) IF( QTQNRM.GT.THRESH ) THEN RES = 1 END IF CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCSEPQTQ-rWORK', RSIZEQTQ, 1, $ RWORK, RSIZEQTQ, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RES.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9994 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0.0 * DO 50 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 50 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF * * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 60 CONTINUE * RETURN * 9999 FORMAT( 'PCHEEVD returned INFO=', I7 ) 9998 FORMAT( 'PCSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PCSDPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PCHEEVD returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'PCHEEVD failed the |AQ -QE| test' ) 9994 FORMAT( 'PCHEEVD failed the |QTQ -I| test' ) * * End of PCSDPSUBTST * END scalapack-1.8.0/TESTING/EIG/pzevcinfo.f0000640000175000017500000003005010363532303017205 0ustar muammarmuammar SUBROUTINE PZEVCINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZEVCINFO gets needed startup information for PZTREVC driver * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Further Details * =============== * * Implemented by: M. Fahey, June 2000 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'EVC.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK nonsymmetric eigenvector calculation.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'complex double precision eigenvector calculation.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = max( ||T*R-R*D||/(||H||*eps*N)' // $ ' , ||T^H*L-L*D^H||/(||H||*eps*N) )' WRITE( NOUT, FMT = 9999 ) $ ' Normalization residual = max(max_j(max|R(j)|-1),' // $ ' max_j(max|L(j)|-1))/(eps*N)' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PZEVCINFO * END scalapack-1.8.0/TESTING/EIG/pzgseptst.f0000640000175000017500000012265310363532303017260 0ustar muammarmuammar* * SUBROUTINE PZGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, $ THRESH, ORDER, ABSTOL, ISEED, A, COPYA, B, $ COPYB, Z, LDA, WIN, WNEW, IFAIL, ICLUSTR, $ GAP, IPREPAD, IPOSTPAD, WORK, LWORK, RWORK, $ LRWORK, IWORK, LIWORK, NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER IBTYPE, INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, $ LRWORK, LWORK, MATTYPE, N, NOUT, ORDER DOUBLE PRECISION ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX*16 A( LDA, * ), B( LDA, * ), COPYA( LDA, * ), $ COPYB( LDA, * ), WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PZGSEPTST builds a random matrix A, and a well conditioned * matrix B, runs PZHEGVX() to compute the eigenvalues * and eigenvectors and then calls PZHEGVCHK to compute * the residual. * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) Hermitian matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PZSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) COMPLEX*16 array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PZHEGVX * * COPYA (local workspace) COMPLEX*16 array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * B (local workspace) COMPLEX*16 array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PZHEGVX * * COPYB (local workspace) COMPLEX*16 array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) COMPLEX*16 array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PZGSEPCHK * * W (local workspace) DOUBLE PRECISION array, dimension (N) * On normal exit from PZHEGVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PZLASIZEGSEP * * RWORK (local workspace) COMPLEX*16 array, dimension (LWORK) * * LRWORK (local input) INTEGER * The length of the array WORK. LRWORK >= RSIZETST as * returned by PZLASIZEGSEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PZLASIZEGSEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0, $ HALF = 0.5D+0 ) COMPLEX*16 PADVAL PARAMETER ( PADVAL = ( 19.25D+0, 1.1D+1 ) ) COMPLEX*16 ZZERO PARAMETER ( ZZERO = ( 0.0D+0, 0.0D+0 ) ) COMPLEX*16 ZONE PARAMETER ( ZONE = ( 1.0D+0, 0.0D+0 ) ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDRWORK, INDWORK, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, ITYPE, IU, J, LHEEVXSIZE, LLRWORK, $ LLWORK, MAXSIZE, MYCOL, MYROW, NB, NGEN, NLOC, $ NNODES, NP, NPCOL, NPROW, NQ, RES, RSIZECHK, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZESUBTST, SIZETMS, SIZETST, VALSIZE, VECSIZE DOUBLE PRECISION ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL LSAME, NUMROC, DLARAN, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, DLABAD, DLASRT, $ IGAMX2D, IGEBR2D, IGEBS2D, PZCHEKPAD, PZELSET, $ PZFILLPAD, PZGSEPSUBTST, PZLASET, PZLASIZEGSEP, $ PZLASIZEHEEVX, PZLATMS, PZMATGEN, SLCOMBINE, $ ZLATMS * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED ' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PZLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * IF( LRWORK.LT.RSIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDRWORK = INDD + N INDWORK = 1 LLWORK = LWORK - INDWORK + 1 LLRWORK = LRWORK - INDRWORK + 1 * ULP = PDLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PDLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL DLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / DBLE( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log Hermitian, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random Hermitian * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N RWORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PZLASET( 'All', N, N, ZZERO, ZZERO, COPYA, 1, 1, $ DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N RWORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PZLASET( 'All', N, N, ZZERO, ZONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Hermitian, eigenvalues specified * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * Hermitian, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PZMATGEN( DESCA( CTXT_ ), 'H', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PZLASET( 'All', N, N, ZZERO, ZZERO, COPYA, 1, 1, $ DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( DLARAN( ISEED )*DBLE( NLOC ) ), N-NGEN ) * CALL ZLATMS( IN, IN, 'S', ISEED, 'P', RWORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PZELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PZELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PZELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PZELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 RWORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0D+0 ) * CALL PZLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL DLASRT( 'I', N, RWORK( INDD ), IINFO ) * * Create the B matrix * CALL PZFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.3D+0 ) * ANORM = ONE * * Update ISEED so that {ZLAGSY creates a different Q * ISEED( 4 ) = MOD( ISEED( 4 )+257, 4096 ) ISEED( 3 ) = MOD( ISEED( 3 )+192, 4096 ) ISEED( 2 ) = MOD( ISEED( 2 )+35, 4096 ) ISEED( 1 ) = MOD( ISEED( 1 )+128, 4096 ) CALL PZLATMS( N, N, 'S', ISEED, 'P', RWORK( INDD ), 3, TEN, $ ANORM, N, N, 'N', COPYB, 1, 1, DESCA, ORDER, $ WORK( INDWORK+IPREPAD ), SIZETMS, IINFO ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZLATMS5-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.3D+0 ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PZLASIZEHEEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, RWORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LHEEVXSIZE = MIN( MAXSIZE, LRWORK ) WKNOWN = .FALSE. * CALL PZGSEPSUBTST( WKNOWN, IBTYPE, 'v', 'a', UPLO, N, VL, VU, $ IL, IU, THRESH, ABSTOL, A, COPYA, B, COPYB, $ Z, 1, 1, DESCA, RWORK( INDD ), WIN, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK( INDRWORK ), $ LLRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PZLASIZEHEEVX to choose IL and IU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LHEEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PZLASIZEHEEVX to choose VL and VU for us. * CALL PZLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PZGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9984 )IBTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ), MAXTSTNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS END IF END IF * 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 11X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 11X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 22X, 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PZGSEPDRIVER' ) * * End of PZGSEPTST * END scalapack-1.8.0/TESTING/EIG/psnepinfo.f0000640000175000017500000002767710363532303017230 0ustar muammarmuammar SUBROUTINE PSNEPINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSNEPINFO gets needed startup information for PDHSEQR drivers * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * * Implemented by: G. Henry, May 10, 1996 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'NEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QSQ^T by Schur Decomposition.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'real single precision Schur decomposition.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = ||H-QSQ^T|| / ' // $ '(||H|| * eps * N )' WRITE( NOUT, FMT = 9999 ) $ ' Orthogonality residual = ||I - Q^TQ|| / ' // '( eps * N )' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PSNEPINFO * END scalapack-1.8.0/TESTING/EIG/psnepfchk.f0000640000175000017500000002575410363532303017202 0ustar muammarmuammar SUBROUTINE PSNEPFCHK( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, $ DESCZ, ANORM, FRESID, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IASEED, IZ, JA, JZ, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) REAL A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSNEPFCHK computes the residual * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1) * and ||.|| stands for the infinity norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The order of sub( A ) and sub( Z ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * Z (local input) REAL pointer into the local memory * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this * array contains the local pieces of the N-by-N distributed * matrix sub( Z ). * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) REAL array, dimension (LWORK). * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where * * IROFFA = MOD( IA-1, MB_A ), * ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of rows and a block of columns * of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF, $ IW, J, JB, JJA, JN, LDA, LDW, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL EPS * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, SMATADD, INFOG2L, $ PSGEMM, PSLACPY, PSLASET, PSMATGEN * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF LDW = MAX( 1, NP ) * * First compute H <- H * Z**T * CALL DESCSET( DESCW, DESCA( MB_ ), N, DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IA, IA + N - 1, DESCA( MB_ ) IB = MIN( IA+N-I, DESCA( MB_ ) ) * CALL PSLACPY( 'All', IB, N, A, I, JA, DESCA, WORK, 1, 1, $ DESCW ) CALL PSGEMM( 'No transpose', 'Transpose', IB, N, N, ONE, WORK, $ 1, 1, DESCW, Z, IZ, JZ, DESCZ, ZERO, A, I, JA, $ DESCA ) * DESCW( RSRC_ ) = MOD( DESCW( RSRC_ )+1, NPROW ) * 10 CONTINUE * * Then compute H <- Z * H = Z * H0 * Z**T * CALL DESCSET( DESCW, N, DESCA( NB_ ), DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, LDW ) * DO 20 J = JA, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * CALL PSLACPY( 'All', N, JB, A, IA, J, DESCA, WORK, 1, 1, $ DESCW ) CALL PSGEMM( 'No transpose', 'No transpose', N, JB, N, ONE, Z, $ IZ, JZ, DESCZ, WORK, 1, 1, DESCW, ZERO, A, IA, J, $ DESCA ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Compute H - H0 * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA-1 )*LDA IW = 1 JB = JN - JA + 1 DESCW( CSRC_ ) = IACOL * * Handle first block of columns separately * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PSMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ NP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL PSLASET( 'Lower', MAX( 0, N-2 ), JB, ZERO, ZERO, WORK, $ MIN( IW+2, N ), 1, DESCW ) CALL SMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * DO 30 J = JN + 1, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PSMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PSLASET( 'Lower', MAX( 0, N-IW-1 ), JB, ZERO, ZERO, $ WORK, MIN( N, IW+2 ), 1, DESCW ) CALL SMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) 30 CONTINUE * * Calculate factor residual * FRESID = PSLANGE( 'I', N, N, A, IA, JA, DESCA, WORK ) / $ ( N*EPS*ANORM ) * RETURN * * End PSNEPFCHK * END scalapack-1.8.0/TESTING/EIG/pdsepinfo.f0000640000175000017500000003053110363532303017175 0ustar muammarmuammar* * SUBROUTINE PDSEPINFO( CONTEXT, IAM, NIN, NOUT, MAXSETSIZE, $ NMATSIZES, MATSIZES, NUPLOS, UPLOS, $ NPCONFIGS, NPROWS, NPCOLS, NBS, NMATTYPES, $ MATTYPES, MAXTYPE, SUBTESTS, THRESH, ORDER, $ ABSTOL, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PDSEPINFO reads the input test data file (INFILE), copies the * information therein to all processes and returns this information * in the corresponding parameters. * * Arguments * ========= * * CONTEXT (global input) INTEGER * BLACS Context * * IAM (local input) INTEGER * process number. * IAM.EQ.0 on the proceesor that performs I/O * * NIN (global input) INTEGER * The unit number of the input file. * * NOUT (global output) INTEGER * The unit number for output file. * if NOUT = 6, ouput to screen, * if NOUT = 0, output to stderr * Only defined for process 0. * * MAXSETSIZE (global output) INTEGER * Maximum set size. Size of the following arrays: * MATSIZES, MATTYPES, NBS, NPCOLS, NPROWS * * NMATSIZES (global output) INTEGER * Number of matrix sizes to test * * MATSIZES (global output) INTEGER array dimension MAXSETSIZE * Matrix sizes to test * * NUPLOS (global output) INTEGER * Number of UPLO values to test * * UPLOS (global output) CHARACTER*1 array dimension 2 * Values of UPLO to test * * NPCONFIGS (global output) INTEGER * Number of process configuratins (NPROW, NPCOL, NB) * * NPROWS (global output) INTEGER array dimension MAXSETSIZE * Values of NPROW to test * * NPCOLS (global output) INTEGER array dimension MAXSETSIZE * Values of NPCOL to test * * NBS (global output) INTEGER array dimension MAXSETSIZE * Values of NB to test * * NMATTYPES (global output) INTEGER * Number of matrix types to test * * MATTYPES (global output) INTEGER array dimension MAXSETSIZE * Matrix types to test * Refer to PDSEPTST for a complete description of the * supported matrix types. * * MAXTYPE (global input) INTEGER * Maximum allowed matrix type * * SUBTESTS (global output) CHARACTER * 'N' = Do not perform subtests * 'Y' = Perfrom subtests * * * THRESH (global output) @(tupc) * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * ( THRESH is set to 1/10 of the value defined in the .dat * file when NOUT = 13. THRESH is set to 1/20 of the value * defined in the .dat file when NOUT = 14. This allows us * to specify more stringent criteria for our internal testing ) * * ORDER (global output) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global output) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PDSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * If ( ABSTOL .EQ. 0 in SEP.dat, it is set to * 2.0 * PDLAMCH( 'u' ) in this routine. * * INFO (global output) INTEGER * 0 = normal return * -1 = end of file * -2 = incorrrect data specification * * .. Scalar Arguments .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, INFO, MAXSETSIZE, MAXTYPE, NIN, $ NMATSIZES, NMATTYPES, NOUT, NPCONFIGS, NUPLOS, $ ORDER DOUBLE PRECISION ABSTOL, THRESH * .. * .. Array Arguments .. CHARACTER UPLOS( 2 ) INTEGER MATSIZES( MAXSETSIZE ), MATTYPES( MAXSETSIZE ), $ NBS( MAXSETSIZE ), NPCOLS( MAXSETSIZE ), $ NPROWS( MAXSETSIZE ) * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION TWO, TEN, TWENTY PARAMETER ( TWO = 2.0D0, TEN = 10.0D0, TWENTY = 20.0D0 ) * .. * .. Local Scalars .. CHARACTER*80 TESTSUMMRY INTEGER I, ISUBTESTS * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * * .. External Subroutines .. EXTERNAL DGEBR2D, DGEBS2D, IGEBR2D, IGEBS2D * .. * * .. Local Arrays .. INTEGER IUPLOS( 2 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = 9997 )TESTSUMMRY TESTSUMMRY = ' ' READ( NIN, FMT = 9997 )TESTSUMMRY WRITE( NOUT, FMT = 9997 )TESTSUMMRY END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATSIZES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1, 0, 0 ) END IF IF( NMATSIZES.EQ.-1 ) THEN INFO = -1 GO TO 70 END IF IF( NMATSIZES.LT.1 .OR. NMATSIZES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Matrix size', NMATSIZES, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MATSIZES( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MATSIZES, 1, $ 0, 0 ) END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NUPLOS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NUPLOS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NUPLOS, 1, 0, 0 ) END IF IF( NUPLOS.LT.1 .OR. NUPLOS.GT.2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# of UPLOs', NUPLOS, 1, 2 END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( UPLOS( I ), I = 1, NUPLOS ) DO 10 I = 1, NUPLOS IF( LSAME( UPLOS( I ), 'L' ) ) THEN IUPLOS( I ) = 1 ELSE IUPLOS( I ) = 2 END IF 10 CONTINUE CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NUPLOS, IUPLOS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NUPLOS, IUPLOS, 1, 0, 0 ) END IF DO 20 I = 1, NUPLOS IF( IUPLOS( I ).EQ.1 ) THEN UPLOS( I ) = 'L' ELSE UPLOS( I ) = 'U' END IF 20 CONTINUE * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NPCONFIGS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1, 0, 0 ) END IF IF( NPCONFIGS.LT.1 .OR. NPCONFIGS.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# proc configs', NPCONFIGS, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPROWS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1, 0, $ 0 ) END IF DO 30 I = 1, NPCONFIGS IF( NPROWS( I ).LE.0 ) $ INFO = -2 30 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NPROW' END IF GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPCOLS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1, 0, $ 0 ) END IF DO 40 I = 1, NPCONFIGS IF( NPCOLS( I ).LE.0 ) $ INFO = -2 40 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NPCOL' END IF GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NBS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1, 0, 0 ) END IF DO 50 I = 1, NPCONFIGS IF( NBS( I ).LE.0 ) $ INFO = -2 50 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NB' END IF GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATTYPES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATTYPES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATTYPES, 1, 0, 0 ) END IF IF( NMATTYPES.LT.1 .OR. NMATTYPES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'matrix types', NMATTYPES, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MATTYPES( I ), I = 1, NMATTYPES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATTYPES, MATTYPES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATTYPES, MATTYPES, 1, $ 0, 0 ) END IF * DO 60 I = 1, NMATTYPES IF( MATTYPES( I ).LT.1 .OR. MATTYPES( I ).GT.MAXTYPE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'matrix type', MATTYPES( I ), $ 1, MAXTYPE END IF MATTYPES( I ) = 1 END IF 60 CONTINUE * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )SUBTESTS IF( LSAME( SUBTESTS, 'Y' ) ) THEN ISUBTESTS = 2 ELSE ISUBTESTS = 1 END IF CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, ISUBTESTS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, ISUBTESTS, 1, 0, 0 ) END IF IF( ISUBTESTS.EQ.2 ) THEN SUBTESTS = 'Y' ELSE SUBTESTS = 'N' END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )THRESH IF( NOUT.EQ.13 ) $ THRESH = THRESH / TEN IF( NOUT.EQ.14 ) $ THRESH = THRESH / TWENTY CALL DGEBS2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1 ) ELSE CALL DGEBR2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) END IF * ORDER = 0 * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )ABSTOL CALL DGEBS2D( CONTEXT, 'All', ' ', 1, 1, ABSTOL, 1 ) ELSE CALL DGEBR2D( CONTEXT, 'All', ' ', 1, 1, ABSTOL, 1, 0, 0 ) END IF IF( ABSTOL.LT.0 ) $ ABSTOL = TWO*PDLAMCH( CONTEXT, 'U' ) * INFO = 0 * 70 CONTINUE RETURN * 9999 FORMAT( A20, ' is:', I5, ' must be between:', I5, ' and', I5 ) 9998 FORMAT( A20, ' is:', I5, ' must be:', I5, ' or', I5 ) 9997 FORMAT( A ) 9996 FORMAT( A20, ' must be positive' ) * * End of PDSEPINFO * END scalapack-1.8.0/TESTING/EIG/pdsvdcmp.f0000640000175000017500000002675110363532303017037 0ustar muammarmuammar SUBROUTINE PDSVDCMP( M, N, JOBTYPE, S, SC, U, UC, IU, JU, DESCU, $ VT, VTC, IVT, JVT, DESCVT, THRESH, RESULT, $ DELTA, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IU, IVT, JOBTYPE, JU, JVT, LWORK, M, N DOUBLE PRECISION DELTA, THRESH * .. * .. Array Arguments .. INTEGER DESCU( * ), DESCVT( * ), RESULT( * ) DOUBLE PRECISION S( * ), SC( * ), U( * ), UC( * ), VT( * ), $ VTC( * ), WORK( * ) * .. * * Purpose * ======== * Testing how accurately "full" and "partial" decomposition options * provided by PDGESVD correspond to each other. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========== * * M (global input) INTEGER * Number of rows of the distributed matrix, for which * SVD was calculated * * N (global input) INTEGER * Number of columns of the distributed matrix, for which * SVD was calculated * * JOBTYPE (global input) INTEGER * Depending on the value of this parameter, * the following comparisons are performed: * * JOBTYPE | COMPARISON * ------------------------------------------- * 2 | | U - UC | / ( M ulp ) > THRESH, * 3 | | VT - VTC | / ( N ulp ) > THRESH * * In addition, for JOBTYPE = 2:4 comparison * | S1 - S2 | / ( SIZE ulp |S| ) > THRESH * is performed. Positive result of any of the comparisons * typically indicates erroneous computations and sets * to one corresponding element of array RESULT * * S (global input) DOUBLE PRECISION array of singular values * calculated for JOBTYPE equal to 1 * * SC (global input) DOUBLE PRECISION array of singular values * calculated for JOBTYPE nonequal to 1 * * U (local input) DOUBLE PRECISION array of left singular * vectors calculated for JOBTYPE equal to 1, local * dimension (MP, SIZEQ), global dimension (M, SIZE) * * UC (local input) DOUBLE PRECISION array of left singular * vectors calculated for JOBTYPE non equal to 1, local * dimension (MP, SIZEQ), global dimension (M, SIZE) * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U and UC * * V (local input) DOUBLE PRECISION array of right singular * vectors calculated for JOBTYPE equal to 1, local * dimension (SIZEP, NQ), global dimension (SIZE, N) * * VC (local input) DOUBLE PRECISION array of right singular * vectors calculated for JOBTYPE non equal to 1, local * dimension (SIZEP, NQ), global dimension (SIZE, N) * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT and * VTC * * THRESH (global input) DOUBLE PRECISION * The threshold value for the test ratios. A result is * included in the output file if RESULT >= THRESH. The test * ratios are scaled to be O(1), so THRESH should be a small * multiple of 1, e.g., 10 or 100. To have every test ratio * printed, use THRESH = 0. * * RESULT (global input/output) INTEGER array. * Every nonzero entry corresponds to erroneous computation. * * DELTA (global output) DOUBLE PRECISION * maximum of the available of the following three values * | U - UC | / ( M ulp THRESH ), * | VT - VT | / ( N ulp THRESH ), * | S1 - S2 | / ( SIZE ulp |S| THRESH ) * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COLPTR, I, INFO, J, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW, NQ, RESULTS, SIZE, SIZEPOS, SIZEQ DOUBLE PRECISION ACCUR, CMP, NORMDIFS, NORMDIFU, NORMDIFV, $ NORMS, ULP * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION DLANGE, PDLAMCH, PDLANGE EXTERNAL NUMROC, DLANGE, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*DLEN_*DTYPE_*MB_*M_*N_*RSRC_.LT.0 ) $ RETURN * RESULTS = 0 NORMDIFS = 0 NORMDIFU = 0 NORMDIFV = 0 SIZE = MIN( M, N ) * * Sizepos is a number of parameters to pdsvdcmp plus one. It's used * for the error reporting. * SIZEPOS = 17 INFO = 0 CALL BLACS_GRIDINFO( DESCU( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, SIZE, SIZEPOS, 1, 1, DESCU, 8, INFO ) CALL CHK1MAT( SIZE, SIZEPOS, N, 2, 1, 1, DESCVT, 11, INFO ) END IF * IF( INFO.EQ.0 ) THEN * * Calculate workspace. * SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ) NQ = NUMROC( N, DESCVT( NB_ ), MYCOL, 0, NPCOL ) LWMIN = MAX( SIZEQ, NQ ) + 4 WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 60 IF( LWORK.LT.LWMIN ) THEN INFO = -16 ELSE IF( THRESH.LE.0 ) THEN INFO = -12 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCU( CTXT_ ), 'PDSVDCMP', -INFO ) RETURN END IF * ULP = PDLAMCH( DESCU( CTXT_ ), 'P' ) * * Make comparison of singular values. * NORMS = DLANGE( '1', SIZE, 1, S, SIZE, WORK ) DO 10 I = 1, SIZE SC( I ) = S( I ) - SC( I ) 10 CONTINUE * NORMDIFS = DLANGE( '1', SIZE, 1, SC, SIZE, WORK ) ACCUR = ULP*SIZE*NORMS*THRESH * IF( NORMDIFS.GT.ACCUR ) $ RESULTS = 1 IF( NORMDIFS.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFS = 0 ELSE NORMDIFS = NORMDIFS / ACCUR END IF * IF( JOBTYPE.EQ.2 ) THEN * RESULT( 5 ) = RESULTS ACCUR = ULP*M*THRESH DO 30 J = 1, SIZEQ COLPTR = DESCU( LLD_ )*( J-1 ) DO 20 I = 1, DESCU( LLD_ ) UC( I+COLPTR ) = U( I+COLPTR ) - UC( I+COLPTR ) 20 CONTINUE 30 CONTINUE * NORMDIFU = PDLANGE( '1', M, SIZE, UC, IU, JU, DESCU, WORK ) * IF( NORMDIFU.GE.ACCUR ) $ RESULT( 6 ) = 1 IF( NORMDIFU.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFU = 0 ELSE NORMDIFU = NORMDIFU / ACCUR END IF * ELSE IF( JOBTYPE.EQ.3 ) THEN * RESULT( 7 ) = RESULTS ACCUR = ULP*N*THRESH DO 50 J = 1, NQ COLPTR = DESCVT( LLD_ )*( J-1 ) DO 40 I = 1, DESCVT( LLD_ ) VTC( I+COLPTR ) = VT( I+COLPTR ) - VTC( I+COLPTR ) 40 CONTINUE 50 CONTINUE * NORMDIFV = PDLANGE( '1', SIZE, N, VTC, IVT, JVT, DESCVT, WORK ) * IF( NORMDIFV.GE.ACCUR ) $ RESULT( 8 ) = 1 * IF( NORMDIFV.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFV = 0 ELSE NORMDIFV = NORMDIFV / ACCUR END IF * ELSE IF( JOBTYPE.EQ.4 ) THEN * RESULT( 9 ) = RESULTS * END IF * CMP = MAX( NORMDIFV, NORMDIFU ) DELTA = MAX( CMP, NORMDIFS ) * 60 CONTINUE * * End of PDSVDCMP * RETURN END scalapack-1.8.0/TESTING/EIG/pdsdpsubtst.f0000640000175000017500000004056010363532303017570 0ustar muammarmuammar SUBROUTINE PDSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, IA, JA, DESCA, WIN, WNEW, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ IWORK, LIWORK, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 16, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LWORK, LWORK1, N, $ NOUT, RESULT, LIWORK DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) DOUBLE PRECISION A( * ), COPYA( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSDPSUBTST calls PDSYEVD and then tests the output of * PDSYEVD * The following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PDSYEVD when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. * * A (local workspace) DOUBLE PRECISION array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PDSYEVD for a description of block cyclic layout. * The test matrix, which is then modified by PDSYEVD * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) DOUBLE PRECISION array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PDSEPCHK and PDSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as computed by this call to PDSYEVD. * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PDSYEVD * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PDSYEVD * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION FIVE, NEGONE, PADVAL, ZERO PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, MINSIZE, MQ, MYCOL, MYROW, $ NP, NPCOL, NPROW, NQ, RESAQ, RESQTQ, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST, SIZESYEVD, ISIZESYEVD, $ TRILWMIN DOUBLE PRECISION EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, $ NORMWIN, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, NUMROC, PDLAMCH, PDLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, IGAMN2D, IGAMX2D, $ PDCHEKPAD, PDELSET, PDFILLPAD, PDLASIZESQP, $ PDSEPCHK, PDSEPQTQ, PDSYEVD, DGAMN2D, $ DGAMX2D, DLACPY, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PDLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 30 CONTINUE * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Find the amount of workspace needed with or without eigenvectors. * TRILWMIN = 3*N + MAX( DESCA( NB_ )*( NP+1 ), 3*DESCA( NB_ ) ) MINSIZE = MAX( 1 + 6*N + 2*NP*NQ, TRILWMIN ) + 2*N * CALL DLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PDFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * * Make sure that PDSYEVD does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, N, 1 CALL PDELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0D+0 ) 50 CONTINUE 60 CONTINUE * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PDSYEVD( 'V', UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, IWORK( 1+IPREPAD ), $ LIWORK, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVD-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PDCHEKPAD( DESCZ( CTXT_ ), 'PDSYEVD-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVD-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEVD-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )INFO IF( INFO.EQ.(N+1) ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 END IF ELSE IF( INFO.EQ.14 .AND. LWORK1.GE.MINSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * IF( RESULT.EQ.0 .OR. INFO.GT.N ) THEN * * Make sure that different processes return the same eigenvalues. * This is a more exhaustive check that provided by PDSYEVD. * DO 70 I = 1, N WORK( I ) = WNEW( I+IPREPAD ) WORK( I+N ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', N, 1, WORK, N, 1, $ 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', N, 1, $ WORK( 1+N ), N, 1, 1, -1, -1, 0 ) * DO 80 I = 1, N * IF( ABS( WORK( I )-WORK( N+I ) ).GT.ZERO ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 GO TO 90 END IF 80 CONTINUE 90 CONTINUE END IF * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PDLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PDSEPCHK * as described by this table: * * PDSEPTST name PDSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * * * Perform the |AQ - QE| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * RESAQ = 0 * CALL PDSEPCHK( N, N, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RESAQ ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RESAQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9993 ) END IF * * Perform the |QTQ - I| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * RESQTQ = 0 * * DO 40 I = 1, 2 IWORK( IPREPAD + I ) = 0 40 CONTINUE CALL PDSEPQTQ( N, N, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1 ), IWORK( 1 ), WORK( 1 ), $ WORK( IPREPAD+1 ), SIZEQTQ, QTQNRM, INFO, $ RESQTQ ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RESQTQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9992 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF ENDIF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0 * DO 140 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PDSYEVD returned INFO=', I7 ) 9998 FORMAT( 'PDSEPQTQ in PDSDPSUBTST returned INFO=', I7 ) 9997 FORMAT( 'PDSDPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PDSYEVD returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'Different processes return different eigenvalues' ) 9994 FORMAT( 'Heterogeneity detected by PDSYEVD' ) 9993 FORMAT( 'PDSYEVD failed the |AQ -QE| test' ) 9992 FORMAT( 'PDSYEVD failed the |QTQ -I| test' ) * * End of PDSDPSUBTST * END scalapack-1.8.0/TESTING/EIG/pssqpsubtst.f0000640000175000017500000004057310363532303017630 0ustar muammarmuammar* * SUBROUTINE PSSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, IA, JA, DESCA, WIN, WNEW, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LWORK, LWORK1, N, $ NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), COPYA( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSQPSUBTST calls PSSYEV and then tests the output of * PSSYEV * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PSSYEV when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PSSQPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. * * A (local workspace) REAL array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PSSYEV for a description of block cyclic layout. * The test matrix, which is then modified by PSSYEV * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) REAL array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PSSEPCHK and PSSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as computed by this call to PSSYEV. * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) REAL array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PSSYEV * * RESULT (global output) INTEGER * The result of this call to PSSYEV * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL FIVE, NEGONE, PADVAL, ZERO PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, EIGS, MINSIZE, MQ, MYCOL, MYROW, $ NP, NPCOL, NPROW, NQ, RESAQ, RESQTQ, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST, SIZESYEVD, ISIZESYEVD REAL EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, $ NORMWIN, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), ITMP( 2 ), $ IWORK( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, PSLANSY EXTERNAL LSAME, NUMROC, PSLAMCH, PSLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, IGAMN2D, IGAMX2D, $ PSCHEKPAD, PSELSET, PSFILLPAD, PSLASIZESQP, $ PSSEPCHK, PSSEPQTQ, PSSYEV, SGAMN2D, SGAMX2D, $ SLACPY, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PSLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 30 CONTINUE * DO 40 I = 1, 2 IWORK( I ) = 0 40 CONTINUE * IF( LSAME( JOBZ, 'N' ) ) THEN EIGS = 0 ELSE EIGS = N END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( EIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Find the amount of workspace needed with or without eigenvectors. * CALL PSLASIZESYEV( JOBZ, N, DESCA, MINSIZE ) * CALL SLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PSFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * * Make sure that PSSYEV does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, EIGS, 1 CALL PSELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0E+0 ) 50 CONTINUE 60 CONTINUE * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PSSYEV( JOBZ, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEV-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PSCHEKPAD( DESCZ( CTXT_ ), 'PSSYEV-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEV-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEV-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )INFO IF( INFO.EQ.(N+1) ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 END IF ELSE IF( INFO.EQ.14 .AND. LWORK1.GE.MINSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * IF( RESULT.EQ.0 .OR. INFO.GT.N ) THEN * * Make sure that different processes return the same eigenvalues. * This is a more exhaustive check that provided by PSSYEV. * DO 70 I = 1, N WORK( I ) = WNEW( I+IPREPAD ) WORK( I+N ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', N, 1, WORK, N, 1, $ 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', N, 1, $ WORK( 1+N ), N, 1, 1, -1, -1, 0 ) * DO 80 I = 1, N * IF( ABS( WORK( I )-WORK( N+I ) ).GT.ZERO ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 GO TO 90 END IF 80 CONTINUE 90 CONTINUE END IF * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PSLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PSSEPCHK * as described by this table: * * PSSEPTST name PSSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * RESAQ = 0 * CALL PSSEPCHK( N, N, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RESAQ ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RESAQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9993 ) END IF * * Perform the |QTQ - I| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * RESQTQ = 0 * CALL PSSEPQTQ( N, N, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1 ), IWORK( 1 ), WORK( 1 ), $ WORK( IPREPAD+1 ), SIZEQTQ, QTQNRM, INFO, $ RESQTQ ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RESQTQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9992 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0 * DO 140 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PSSYEV returned INFO=', I7 ) 9998 FORMAT( 'PSSEPQTQ in PSSQPSUBTST returned INFO=', I7 ) 9997 FORMAT( 'PSSQPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PSSYEV returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'Different processes return different eigenvalues' ) 9994 FORMAT( 'Heterogeneity detected by PSSYEV' ) 9993 FORMAT( 'PSSYEV failed the |AQ -QE| test' ) 9992 FORMAT( 'PSSYEV failed the |QTQ -I| test' ) * * End of PSSQPSUBTST * END scalapack-1.8.0/TESTING/EIG/pcnepinfo.f0000640000175000017500000002776010363532303017201 0ustar muammarmuammar SUBROUTINE PCNEPINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCNEPINFO gets needed startup information for PCHSEQR drivers * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Further Details * =============== * * Implemented by: M. Fahey, June 2000 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a complex * single precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'NEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QSQ^H by Schur Decomposition.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'complex single precision Schur decomposition.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = ||H-QSQ^H|| / ' // $ '(||H|| * eps * N )' WRITE( NOUT, FMT = 9999 ) $ ' Orthogonality residual = ||I - Q^HQ|| / ' // '( eps * N )' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PCNEPINFO * END scalapack-1.8.0/TESTING/EIG/pznepfchk.f0000640000175000017500000002624410363532303017204 0ustar muammarmuammar SUBROUTINE PZNEPFCHK( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, $ DESCZ, ANORM, FRESID, WORK ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. INTEGER IA, IASEED, IZ, JA, JZ, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) COMPLEX*16 A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZNEPFCHK computes the residual * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1) * and ||.|| stands for the infinity norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The order of sub( A ) and sub( Z ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * Z (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this * array contains the local pieces of the N-by-N distributed * matrix sub( Z ). * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK). * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where * * IROFFA = MOD( IA-1, MB_A ), * ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of rows and a block of columns * of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * Further Details * =============== * * Contributed by Mark Fahey, March, 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF, $ IW, J, JB, JJA, JN, LDA, LDW, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION EPS * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PZGEMM, $ PZLACPY, PZLASET, PZMATGEN, ZMATADD * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF LDW = MAX( 1, NP ) * * First compute H <- H * Z**T * CALL DESCSET( DESCW, DESCA( MB_ ), N, DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IA, IA + N - 1, DESCA( MB_ ) IB = MIN( IA+N-I, DESCA( MB_ ) ) * CALL PZLACPY( 'All', IB, N, A, I, JA, DESCA, WORK, 1, 1, $ DESCW ) CALL PZGEMM( 'No transpose', 'Cong Tran', IB, N, N, ONE, WORK, $ 1, 1, DESCW, Z, IZ, JZ, DESCZ, ZERO, A, I, JA, $ DESCA ) * DESCW( RSRC_ ) = MOD( DESCW( RSRC_ )+1, NPROW ) * 10 CONTINUE * * Then compute H <- Z * H = Z * H0 * Z**T * CALL DESCSET( DESCW, N, DESCA( NB_ ), DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, LDW ) * DO 20 J = JA, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * CALL PZLACPY( 'All', N, JB, A, IA, J, DESCA, WORK, 1, 1, $ DESCW ) CALL PZGEMM( 'No transpose', 'No transpose', N, JB, N, ONE, Z, $ IZ, JZ, DESCZ, WORK, 1, 1, DESCW, ZERO, A, IA, J, $ DESCA ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Compute H - H0 * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA-1 )*LDA IW = 1 JB = JN - JA + 1 DESCW( CSRC_ ) = IACOL * * Handle first block of columns separately * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ NP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL PZLASET( 'Lower', MAX( 0, N-2 ), JB, ZERO, ZERO, WORK, $ MIN( IW+2, N ), 1, DESCW ) CALL ZMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * DO 30 J = JN + 1, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PZLASET( 'Lower', MAX( 0, N-IW-1 ), JB, ZERO, ZERO, $ WORK, MIN( N, IW+2 ), 1, DESCW ) CALL ZMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) 30 CONTINUE * * Calculate factor residual * FRESID = PZLANGE( 'I', N, N, A, IA, JA, DESCA, WORK ) / $ ( N*EPS*ANORM ) * RETURN * * End PZNEPFCHK * END scalapack-1.8.0/TESTING/EIG/pslasizesyevx.f0000640000175000017500000001720410363532303020141 0ustar muammarmuammar* * SUBROUTINE PSLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER RANGE INTEGER IL, IU, MAXSIZE, N, VALSIZE, VECSIZE REAL VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL WIN( * ) * .. * * Purpose * ======= * * PSLASIZESYEVX computes the amount of memory needed by PSSYEVX * to ensure: * 1) Orthogonal Eigenvectors * 2) Eigenvectors * 3) Eigenvalues * * Arguments * ========= * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * VL (global input/output ) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VL is set * to a random value near an entry in WIN * * VU (global input/output ) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VU is set * to a random value near an entry in WIN * * IL (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * If IL < 0, RANGE='I' and WKNOWN = .TRUE., IL is set * to a random value from 1 to N * * IU (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * If IU < 0, RANGE='I' and WKNOWN = .TRUE., IU is set * to a random value from IL to N * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * ISEED is not touched unless IL, IU, VL or VU are modified. * * WIN (global input) REAL array, dimension (N) * If WKNOWN=1, WIN contains the eigenvalues of the matrix. * * MAXSIZE (global output) INTEGER * Workspace required to guarantee that PSSYEVX will return * orthogonal eigenvectors. IF WKNOWN=0, MAXSIZE is set to a * a value which guarantees orthogonality no matter what the * spectrum is. If WKNOWN=1, MAXSIZE is set to a value which * guarantees orthogonality on a matrix with eigenvalues given * by WIN. * * VECSIZE (global output) INTEGER * Workspace required to guarantee that PSSYEVX * will compute eigenvectors. * * VALSIZE (global output) INTEGER * Workspace required to guarantee that PSSYEVX * will compute eigenvalues. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL TWENTY PARAMETER ( TWENTY = 20.0E0 ) * .. * .. Local Scalars .. * INTEGER CLUSTERSIZE, I, ILMIN, IUMAX, MAXCLUSTERSIZE, $ MQ0, MYCOL, MYIL, MYIU, MYROW, NB, NEIG, NN, $ NP0, NPCOL, NPROW REAL ANORM, EPS, ORFAC, SAFMIN, VLMIN, VUMAX * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, ICEIL, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, REAL * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ORFAC = 1.0E-3 * * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe Minimum' ) NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * VALSIZE = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) * IF( WKNOWN ) THEN ANORM = SAFMIN / EPS IF( N.GE.1 ) $ ANORM = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), ANORM ) * IF( LSAME( RANGE, 'I' ) ) THEN IF( IL.LT.0 ) $ IL = INT( SLARAN( ISEED )*REAL( N ) ) + 1 IF( IU.LT.0 ) $ IU = INT( SLARAN( ISEED )*REAL( N-IL ) ) + IL IF( N.EQ.0 ) $ IU = 0 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IF( VL.GT.VU ) THEN MYIL = INT( SLARAN( ISEED )*REAL( N ) ) + 1 MYIU = INT( SLARAN( ISEED )*REAL( N-MYIL ) ) + MYIL VL = WIN( MYIL ) + TWENTY*EPS*ABS( WIN( MYIL ) ) VU = WIN( MYIU ) + TWENTY*EPS*ABS( WIN( MYIU ) ) VU = MAX( VU, VL+EPS*TWENTY*ABS( VL )+SAFMIN ) END IF END IF * END IF IF( LSAME( RANGE, 'V' ) ) THEN * * Compute ILMIN, IUMAX (based on VL, VU and WIN) * IF( WKNOWN ) THEN VLMIN = VL - TWENTY*EPS*ANORM VUMAX = VU + TWENTY*EPS*ANORM ILMIN = 1 IUMAX = 0 DO 10 I = 1, N IF( WIN( I ).LT.VLMIN ) $ ILMIN = ILMIN + 1 IF( WIN( I ).LT.VUMAX ) $ IUMAX = IUMAX + 1 10 CONTINUE ELSE ILMIN = 1 IUMAX = N END IF ELSE IF( LSAME( RANGE, 'I' ) ) THEN ILMIN = IL IUMAX = IU ELSE IF( LSAME( RANGE, 'A' ) ) THEN ILMIN = 1 IUMAX = N END IF * NEIG = IUMAX - ILMIN + 1 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) VECSIZE = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN * IF( WKNOWN ) THEN CLUSTERSIZE = 1 MAXCLUSTERSIZE = 1 DO 20 I = ILMIN + 1, IUMAX IF( ( WIN( I )-WIN( I-1 ) ).LT.ORFAC*2*ANORM ) THEN CLUSTERSIZE = CLUSTERSIZE + 1 IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE CLUSTERSIZE = 1 END IF 20 CONTINUE IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE MAXCLUSTERSIZE = N END IF * MAXSIZE = VECSIZE + MAX( ( MAXCLUSTERSIZE-1 ), 0 )*N * * RETURN * * End of PSLASIZESYEVX * END scalapack-1.8.0/TESTING/EIG/pzlagsy.f0000640000175000017500000002573210363532303016706 0ustar muammarmuammar* * SUBROUTINE PZLAGHE( N, K, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * PZLAGHE generates a real Hermitian matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. * * This is just a quick implementation which will be replaced in the * future. The random orthogonal matrix is computed by creating a * random matrix and running QR on it. This requires vastly more * computation than necessary, but not significantly more communication * than is used in the rest of this rouinte, and hence is not that much * slower than an efficient solution. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix A. N >= 0. * * K (global input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * ### K must be 0 or N-1, 0 < K < N-1 is not supported yet. * * D (global input) COMPLEX*16 array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) COMPLEX*16 array * Global dimension (N, N), local dimension (NP, NQ) * The generated n by n Hermitian matrix A (the full matrix is * stored). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated and will remain identical on * all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PZLASIZESEP * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZZERO PARAMETER ( ZZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, II, IIROW, $ INDAA, INDTAU, INDWORK, IPOSTPAD, IPREPAD, $ IROFFA, ISIZEHEEVX, ISIZESUBTST, ISIZETST, $ JJCOL, LDAA, LII, LIII, LJJ, LJJJ, LWMIN, MAXI, $ MB_A, MYCOL, MYROW, NB_A, NP, NPCOL, NPROW, NQ, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, RSRC_A, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST,SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, PZGEQRF, $ PZLASIZESEP, PZMATGEN, PZUNMQR, ZLASET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. * INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Initialize grid information * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) END IF * LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) NP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) IPREPAD = 0 IPOSTPAD = 0 CALL PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) LWMIN = SIZETMS * * Test the input arguments * IF( INFO.EQ.0 ) THEN IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( N.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZLAGHE', -INFO ) RETURN END IF * INDAA = 1 INDTAU = INDAA + LDAA*MAX( 1, NQ ) INDWORK = INDTAU + MAX( 1, NQ ) * IF( K.NE.0 ) THEN CALL ZLASET( 'A', LDAA, NQ, ZZERO, ZZERO, WORK( INDAA ), LDAA ) * * * Build a random matrix * * CALL PZMATGEN( DESCA( CTXT_ ), 'N', 'N', N, ORDER, $ DESCA( MB_ ), DESCA( NB_ ), WORK( INDAA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ ISEED( 1 ), 0, NP, 0, NQ, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PZGEQRF( N, ORDER, WORK( INDAA ), IA, JA, DESCA, $ WORK( INDTAU ), WORK( INDWORK ), SIZEQRF, INFO ) * END IF * * Build a diagonal matrix A with the eigenvalues specified in D * CALL ZLASET( 'A', NP, NQ, ZZERO, ZZERO, A, DESCA( LLD_ ) ) * IIROW = 0 JJCOL = 0 LII = 1 LJJ = 1 * DO 20 II = 1, N, DESCA( MB_ ) MAXI = MIN( N, II+DESCA( MB_ )-1 ) IF( ( MYROW.EQ.IIROW ) .AND. ( MYCOL.EQ.JJCOL ) ) THEN LIII = LII LJJJ = LJJ DO 10 I = II, MAXI A( LIII+( LJJJ-1 )*DESCA( LLD_ ) ) = D( I ) LIII = LIII + 1 LJJJ = LJJJ + 1 10 CONTINUE END IF IF( MYROW.EQ.IIROW ) $ LII = LII + DESCA( MB_ ) IF( MYCOL.EQ.JJCOL ) $ LJJ = LJJ + DESCA( MB_ ) IIROW = MOD( IIROW+1, NPROW ) JJCOL = MOD( JJCOL+1, NPCOL ) 20 CONTINUE * * A = Q * A * IF( K.NE.0 ) THEN * CALL PZUNMQR( 'L', 'Conjugate transpose', N, N, ORDER, $ WORK( INDAA ), IA, JA, DESCA, WORK( INDTAU ), A, $ IA, JA, DESCA, WORK( INDWORK ), SIZEMQRLEFT, $ INFO ) * * * A = A * Q' * * CALL PZUNMQR( 'R', 'N', N, N, ORDER, WORK( INDAA ), IA, JA, $ DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRRIGHT, INFO ) * END IF * * End of PZLAGHE * END scalapack-1.8.0/TESTING/EIG/pctrdinfo.f0000640000175000017500000003224410363532303017201 0ustar muammarmuammar SUBROUTINE PCTRDINFO( SUMMRY, NOUT, UPLO, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY*(*) INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCTRDINFO gets needed startup information for the Hermitian * tridiagonal reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * UPLO (global output) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL PSLAMCH, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='TRD.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get UPLO * READ( NIN, FMT = * ) UPLO * * Get number of matrices and their dimensions * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'N', LDNVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'NB', LDNBVAL GOTO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDPVAL GOTO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 ) 'Grids', LDQVAL GOTO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS IF( LSAME( UPLO, 'L' ) ) THEN WORK( 4 ) = 1 ELSE WORK( 4 ) = 2 END IF CALL IGEBS2D( ICTXT, 'All', ' ', 4, 1, WORK, 4 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction Routine to Hermitian '// $ 'tridiagonal form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision Hermitian '// $ 'tridiagonal' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - QTQ''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'UPLO : Whether the ''Upper'' or ''Low'// $ 'er'' part of A is to be referenced.' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ 'than THRESH, CHECK is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ 'TRD time : Time in seconds to reduce the'// $ ' matrix to tridiagonal form.' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'Hermitian tridiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9999 ) $ ' UPLO : '//UPLO WRITE( NOUT, FMT = 9996 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) EPS WRITE( NOUT, FMT = 9998 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 4, 1, WORK, 4, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) IF( WORK( 4 ).EQ.1 ) THEN UPLO = 'L' ELSE UPLO = 'U' END IF * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9993 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PCTRDINFO * END scalapack-1.8.0/TESTING/EIG/psgseptst.f0000640000175000017500000012120510363532303017241 0ustar muammarmuammar* * SUBROUTINE PSGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, $ THRESH, ORDER, ABSTOL, ISEED, A, COPYA, B, $ COPYB, Z, LDA, WIN, WNEW, IFAIL, ICLUSTR, $ GAP, IPREPAD, IPOSTPAD, WORK, LWORK, IWORK, $ LIWORK, NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER IBTYPE, INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, $ LWORK, MATTYPE, N, NOUT, ORDER REAL ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) REAL A( LDA, * ), B( LDA, * ), COPYA( LDA, * ), $ COPYB( LDA, * ), GAP( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PSGSEPTST builds a random matrix A, and a well conditioned * matrix B, runs PSSYGVX() to compute the eigenvalues * and eigenvectors and then calls PSSYGVCHK to compute * the residual. * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) symmetric matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PSSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) REAL array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PSSYGVX * * COPYA (local workspace) REAL array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * B (local workspace) REAL array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PSSYGVX * * COPYB (local workspace) REAL array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PSGSEPCHK * * W (local workspace) REAL array, dimension (N) * On normal exit from PSSYGVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PSLASIZEGSEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PSLASIZEGSEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0, $ HALF = 0.5E+0 ) REAL PADVAL PARAMETER ( PADVAL = 19.25E+0 ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDWORK, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ ITYPE, IU, J, LLWORK, LSYEVXSIZE, MAXSIZE, $ MYCOL, MYROW, NB, NGEN, NLOC, NNODES, NP, $ NPCOL, NPROW, NQ, RES, SIZECHK, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, SIZESUBTST, $ SIZESYEVX, SIZETMS, SIZETST, VALSIZE, VECSIZE REAL ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, IGAMX2D, IGEBR2D, $ IGEBS2D, PSCHEKPAD, PSELSET, PSFILLPAD, $ PSGSEPSUBTST, PSLASET, PSLASIZEGSEP, $ PSLASIZESYEVX, PSLATMS, PSMATGEN, SLABAD, $ SLASRT, SLATMS, SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, MOD, REAL, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED ' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PSLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * IF( LWORK.LT.SIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDWORK = INDD + N LLWORK = LWORK - INDWORK + 1 * ULP = PSLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PSLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL SLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / REAL( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log symmetric, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random symmetric * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N WORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PSLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N WORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PSLASET( 'All', N, N, ZERO, ONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * symmetric, eigenvalues specified * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * symmetric, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PSMATGEN( DESCA( CTXT_ ), 'S', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PSLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( SLARAN( ISEED )*REAL( NLOC ) ), N-NGEN ) * CALL SLATMS( IN, IN, 'S', ISEED, 'P', WORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PSELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PSELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PSELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PSELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 WORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0E+0 ) * CALL PSLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL SLASRT( 'I', N, WORK( INDD ), IINFO ) * * Create the B matrix * CALL PSFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.3E+0 ) * ANORM = ONE * * Update ISEED so that {SLAGSY creates a different Q * ISEED( 4 ) = MOD( ISEED( 4 )+257, 4096 ) ISEED( 3 ) = MOD( ISEED( 3 )+192, 4096 ) ISEED( 2 ) = MOD( ISEED( 2 )+35, 4096 ) ISEED( 1 ) = MOD( ISEED( 1 )+128, 4096 ) CALL PSLATMS( N, N, 'S', ISEED, 'P', WORK( INDD ), 3, TEN, $ ANORM, N, N, 'N', COPYB, 1, 1, DESCA, ORDER, $ WORK( INDWORK+IPREPAD ), SIZETMS, IINFO ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSLATMS5-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.3E+0 ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PSLASIZESYEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, WORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LSYEVXSIZE = MIN( MAXSIZE, LWORK ) WKNOWN = .FALSE. * CALL PSGSEPSUBTST( WKNOWN, IBTYPE, 'v', 'a', UPLO, N, VL, VU, $ IL, IU, THRESH, ABSTOL, A, COPYA, B, COPYB, $ Z, 1, 1, DESCA, WORK( INDD ), WIN, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, IWORK, $ ISIZESYEVX, RES, TSTNRM, QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PSLASIZESYEVX to choose IL and IU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PSLASIZESYEVX to choose VL and VU for us. * CALL PSLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE + INT( SLARAN( ISEED )* $ REAL( VECSIZE-VALSIZE ) ) * CALL PSGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9984 )IBTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ), MAXTSTNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS END IF END IF * 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 11X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 11X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 22X, 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PSGSEPDRIVER' ) * * End of PSGSEPTST * END scalapack-1.8.0/TESTING/EIG/pslasizesyev.f0000640000175000017500000001027210363532303017747 0ustar muammarmuammar* * SUBROUTINE PSLASIZESYEV( JOBZ, N, DESCA, MINSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER JOBZ INTEGER MINSIZE, N * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PSLASIZESYEV computes the amount of memory needed by PSSYEV * to calculate: * 1) Eigenvectors and eigenvalues if JOBZ = 'V' * 2) Eigenvalues only if JOBZ = 'N' * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * MINSIZE (global output) INTEGER * Workspace required for PSSYEV to: * 1) Eigenvectors and eigenvalues if JOBZ = 'V' * 2) Eigenvalues only if JOBZ = 'N' * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL WANTZ INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, $ LCM, LCMQ, LDC, MQ0, MYCOL, MYPCOLC, MYPROWC, $ MYROW, NB, NN, NP, NP0, NPCOL, NPCOLC, NPROCS, $ NPROW, NPROWC, NQ, NRC, QRMEM, RSRC_A, $ SIZEMQRLEFT, SIZEMQRRIGHT * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, SL_GRIDRESHAPE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_GRIDEXIT * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) NB = DESCA( MB_ ) N = DESCA( M_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( NN, NB, 0, 0, NPCOL ) NPROCS = NPROW*NPCOL WANTZ = LSAME( JOBZ, 'V' ) LDC = 0 * * Create the new context that is used in PSSYEV * IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) LDC = MAX( 1, NRC ) CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compute the total amount of space needed * IF( WANTZ ) THEN QRMEM = 5*N + MAX( 2*NP0 +MQ0 + NB*NN, 2*NN-2 ) + N*LDC MINSIZE = MAX ( SIZEMQRLEFT, SIZEMQRRIGHT, QRMEM ) ELSE MINSIZE = 5*N + 2*NP0 +MQ0 + NB*NN END IF * RETURN * * End of PSLASIZESYEV * END scalapack-1.8.0/TESTING/EIG/pslatran.f0000640000175000017500000001561710363532303017042 0ustar muammarmuammar SUBROUTINE PSLATRAN( N, NB, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, JA, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * * ======= * * PSLATRAN transpose a lower triangular matrix on to the upper * triangular portion of the same matrix. * * This is an auxiliary routine called by PSSYTRD. * * Notes * ===== * * IA must equal 1 * JA must equal 1 * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( RSRC_ ) must equal 1 * DESCA( CSRC_ ) must equal 1 * * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * NB (global input) INTEGER * The number of rows and columns to be transposed with each * message sent. NB has no impact on the result, it is striclty * a performance tuning parameter. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). On entry, the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. On exit, the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is undefined (and may have been modified). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * Must be equal to 1. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * Must be equal to 1. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( ICTXT_ ) must point to a square process grid * i.e. one where NPROW is equal to NPCOL * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * Where: * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND, $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND, $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL, $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB, $ STARTCOL, STARTROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, STRRV2D, STRSD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Further details * * Because the processor grid is square each process needs only send * data to its transpose process. (Likewsie it need only receive * data from its transpose process.) Because the data decomposition * is cyclic, the local portion of the array is triangular. * * This routine requires that the data be buffered (i.e. copied) * on the sending process (because of the triangular shape) and * unbuffered on the receiving process. Hence, two local memory to * memory copies are performed within the communications routines * followed by a memory to memory copy outside of the communications * routines. It would be nice to avoid having back to back memory * to memory copies (as we do presently on the receiving processor). * This could be done by packaging the data ourselves in the sender * and then unpacking it directly into the matrix. However, this * code seems cleaner and so since this routine is not a significant * performance bottleneck we have left it this way. * * * * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * * IF( MYROW.EQ.MYCOL ) THEN * DO 20 J = 1, NP DO 10 I = J + 1, NQ A( J+( I-1 )*LDA ) = A( I+( J-1 )*LDA ) 10 CONTINUE 20 CONTINUE * ELSE IF( MYROW.GT.MYCOL ) THEN STARTROW = 1 STARTCOL = 2 ELSE IF( MYROW.EQ.MYCOL ) THEN STARTROW = 2 STARTCOL = 2 ELSE STARTROW = 2 STARTCOL = 1 END IF END IF * DO 50 JJ = 1, MAX( NP, NQ ), NB MINJSEND = STARTCOL + JJ - 1 MINJRECV = STARTROW + JJ - 1 MAXJSEND = MIN( MINJSEND+NB-1, NQ ) MAXJRECV = MIN( MINJRECV+NB-1, NP ) * SENDNB = MAXJSEND - MINJSEND + 1 RECVNB = MAXJRECV - MINJRECV + 1 * MINISEND = 1 MINIRECV = 1 MAXISEND = MIN( NP, JJ+SENDNB-1 ) MAXIRECV = MIN( NQ, JJ+RECVNB-1 ) * ISEND = MAXISEND - MINISEND + 1 IRECV = MAXIRECV - MINIRECV + 1 JSEND = MAXJSEND - MINJSEND + 1 JRECV = MAXJRECV - MINJRECV + 1 * * * DO 40 J = MINJRECV, MAXJRECV DO 30 I = MINIRECV, MAXIRECV + J - MAXJRECV WORK( I+( J-MINJRECV )*IRECV ) = A( J+( I-1 )*LDA ) 30 CONTINUE 40 CONTINUE * IF( IRECV.GT.0 .AND. JRECV.GT.0 ) $ CALL STRSD2D( ICTXT, 'U', 'N', IRECV, JRECV, WORK, IRECV, $ MYCOL, MYROW ) * IF( ISEND.GT.0 .AND. JSEND.GT.0 ) $ CALL STRRV2D( ICTXT, 'U', 'N', ISEND, JSEND, $ A( MINISEND+( MINJSEND-1 )*LDA ), LDA, $ MYCOL, MYROW ) * * 50 CONTINUE * END IF * RETURN * * End of PSLATRD * END scalapack-1.8.0/TESTING/EIG/pchrddriver.f0000640000175000017500000004543510363532303017533 0ustar muammarmuammar PROGRAM PCHRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PCHRDDRIVER is the main test program for the COMPLEX * ScaLAPACK HRD (Hessenberg Reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 14 lines: * 'ScaLAPACK HRD input file' * 'PVM machine' * 'HRD.out' output file name (if any) * 6 device out * 2 number of problems sizes * 100 101 values of N * 2 1 values of ILO * 99 101 values of IHI * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, MEMSIZ, NTESTS, TOTMEM COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IHI, IHIP, IHLP, IHLQ, $ ILCOL, ILO, ILROW, INFO, INLQ, IMIDPAD, IPA, $ IPT, IPW, IPOSTPAD, IPREPAD, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LCMQ, LOFF, $ LWORK, MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, $ NPROCS, NOUT, NP, NPCOL, NPROW, NQ, WORKHRD, $ WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), NVHI( NTESTS ), NVLO( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINIT, BLACS_GRIDINFO, $ DESCINIT, IGSUM2D, BLACS_PINFO, PCFILLPAD, $ PCLAFCHK, PCGEHDRV, PCGEHRD, $ PCHRDINFO, PCMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC REAL PCLANGE EXTERNAL ILCM, INDXG2P, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCHRDINFO( OUTFILE, NOUT, NMAT, NVAL, NVLO, NVHI, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GOTO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) ILO = NVLO( J ) IHI = NVHI( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPT = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * IHIP = NUMROC( IHI, NB, MYROW, DESCA( RSRC_ ), NPROW ) LOFF = MOD( ILO-1, NB ) ILROW = INDXG2P( ILO, NB, MYROW, DESCA( RSRC_ ), NPROW ) ILCOL = INDXG2P( ILO, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) IHLP = NUMROC( IHI-ILO+LOFF+1, NB, MYROW, ILROW, NPROW ) INLQ = NUMROC( N-ILO+LOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWORK = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) WORKHRD = LWORK + IPOSTPAD WORKSIZ = WORKHRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IHLQ = NUMROC( IHI-ILO+LOFF+1, NB, MYCOL, ILCOL, $ NPCOL ) ITEMP = NB*MAX( IHLP+INLQ, IHLQ+MAX( IHIP, $ IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ) ) ) WORKSIZ = MAX( NB*NB + NB*IHLP + ITEMP, NB * NP ) + $ IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Hessenberg reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate A * CALL PCMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, 0, NP, 0, NQ, MYROW, MYCOL, $ NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), $ NQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKHRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKHRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce Hessenberg form * CALL PCGEHRD( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGEHRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEHRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEHRD', WORKHRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKHRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps) * CALL PCGEHDRV( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ) ) CALL PCLAFCHK( 'No', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGEHDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEHDRV', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEHDRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0E+0 ) $ THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * HRD requires 40/3 * N^3 floating point ops. (flops) * more precisely, * HRD requires 16/3*(IHI-ILO)^3+8*IHI*(IHI-ILO)^2 flops * NOPS = DBLE( IHI-ILO ) NOPS = NOPS * NOPS * $ ( 8.0D0*DBLE( IHI ) + (16.0D0/3.0D0)*NOPS ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, ILO, IHI, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, $ PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, ILO, IHI, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, $ PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PCHRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pdsvddriver.f0000640000175000017500000002601610363532303017545 0ustar muammarmuammar PROGRAM PDSVDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * Parallel Double precision singular value decomposition test driver. * * INPUT: * ===== * This routine tests PDGESVD, the parallel singular value * decomposition solver. We would like to cover possible combinations * of: matrix size, process configuration (nprow and npcol), block * size (nb), matrix type, and workspace available. * * Current format of the input file SVD.dat lists the following: * device out * Threshold * number of matrices * number of rows for every matrix * number of columns for every matrix * number of process configurations (P, Q, NB) * values of P (NPROW) for every configuration * values of Q (NPCOL) for every configuration * values of NB for every configuration. * Here threshold is an integer constant with a value between 1 and * 100, which meaning is explained in comments to PDSVDTST. * * WHAT IT DOES: * ============ * PSVDDRIVER checks floating-point arithmetic and parameters * provided by the user in initialization file SVD.dat. It reads and * broadcasts to all process parameters required to run actual testing * code PSVDTST. In case all tests are successful it tells you so. For * the actual "meat" of the tests see comments to PSVDTST. * *======================================================================= * * .. Local Scalars .. CHARACTER*80 SUMMARY INTEGER CONTEXT, ERR, I, IAM, J, K, LWORK, MAXNODES, $ NMATSIZES, NOUT, NPCONFIGS, NPROCS DOUBLE PRECISION THRESH * .. * .. Parameters .. INTEGER MAXSETSIZE, NIN, DBLSIZ, TOTMEM, MEMSIZ PARAMETER ( MAXSETSIZE = 50, NIN = 11, DBLSIZ = 8, $ TOTMEM = 2000000, MEMSIZ = TOTMEM / DBLSIZ ) * .. * .. Local Arrays .. INTEGER ISEED( 4 ), MM( MAXSETSIZE ), $ NBS( MAXSETSIZE ), NN( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ), $ RESULT( 9 ) DOUBLE PRECISION WORK( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ DGEBR2D, DGEBS2D, IGEBR2D, IGEBS2D, PDSVDTST * .. * .. Executable Statements .. * * Get starting information. * CALL BLACS_PINFO( IAM, NPROCS ) * * Open file and skip data header; read output device. * IF( IAM.EQ.0 ) THEN OPEN( UNIT = NIN, FILE = 'SVD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )NOUT READ( NIN, FMT = * )MAXNODES END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * * Initialize variables, arrays, and grids. * ERR = 0 NMATSIZES = 0 NPCONFIGS = 0 LWORK = MEMSIZ ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = 9986 ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) WRITE( NOUT, FMT = 9972 ) WRITE( NOUT, FMT = 9971 ) WRITE( NOUT, FMT = 9970 ) WRITE( NOUT, FMT = 9969 ) WRITE( NOUT, FMT = 9968 ) WRITE( NOUT, FMT = 9967 ) WRITE( NOUT, FMT = 9966 ) WRITE( NOUT, FMT = 9965 ) END IF * * Process 0 reads values in input file and broadcasts them to * all other processes. * 10 CONTINUE IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )THRESH WRITE( NOUT, FMT = 9965 )SUMMARY CALL DGEBS2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1 ) ELSE CALL DGEBR2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) END IF IF( THRESH.EQ.-1 ) THEN GO TO 80 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATSIZES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1, 0, 0 ) END IF * Deal with error IF( NMATSIZES.LT.1 .OR. NMATSIZES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Matrix size', NMATSIZES, 1, $ MAXSETSIZE END IF ERR = -1 GO TO 80 END IF * * Read array of MATSIZES. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MM( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MM, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MM, 1, 0, 0 ) END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NN( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, NN, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, NN, 1, 0, 0 ) END IF * * Read and broadcast NPCONFIGS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NPCONFIGS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1, 0, 0 ) END IF * Deal with error IF( NPCONFIGS.LT.1 .OR. NPCONFIGS.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# proc configs', NPCONFIGS, 1, $ MAXSETSIZE END IF ERR = -1 GO TO 80 END IF * * Read and broadcast array of NPROWS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPROWS( I ), I = 1, NPCONFIGS ) * CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1, 0, $ 0 ) END IF * Deal with error DO 20 I = 1, NPCONFIGS IF( NPROWS( I ).LE.0 ) $ ERR = -1 20 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NPROW' END IF GO TO 80 END IF * * Read and broadcast array of NPCOLS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPCOLS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1, 0, $ 0 ) END IF * * Deal with error. * DO 30 I = 1, NPCONFIGS IF( NPCOLS( I ).LE.0 ) $ ERR = -1 30 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NPCOL' END IF GO TO 80 END IF * * Read and broadcast array of NBs. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NBS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1, 0, 0 ) END IF * Deal with error DO 40 I = 1, NPCONFIGS IF( NBS( I ).LE.0 ) $ ERR = -1 40 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NB' END IF GO TO 80 END IF * DO 70 J = 1, NMATSIZES DO 60 I = 1, NPCONFIGS * DO 50 K = 1, 9 RESULT( K ) = 0 50 CONTINUE CALL PDSVDTST( MM( J ), NN( J ), NPROWS( I ), NPCOLS( I ), $ NBS( I ), ISEED, THRESH, WORK, RESULT, LWORK, $ NOUT ) * 60 CONTINUE 70 CONTINUE * GO TO 10 * 80 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * End of PDSVDDRIVER * 9999 FORMAT( A20, ' is:', I5, ' must be between:', I5, ' and', I5 ) 9998 FORMAT( A ) 9997 FORMAT( A20, ' must be positive' ) 9996 FORMAT( A ) 9995 FORMAT( 'M = ', I5, ' N = ', I5, ' NPOW = ', I5, 'NPCOL = ', I5, $ ' NB = ', I5 ) * 9994 FORMAT( 'Test #', I5, 'for this configuration has failed' ) 9993 FORMAT( 'All test passed for this configuration' ) 9992 FORMAT( ' ' ) 9991 FORMAT( 'Running tests of the parallel singular value ', $ 'decomposition routine: PDGESVD' ) 9990 FORMAT( 'The following scaled residual checks will be', $ 'computed:' ) 9989 FORMAT( ' || A - U*diag(S)*VT ||/( ||A||*max(M,N)*ulp )' ) 9988 FORMAT( ' || I - UT*U ||/( M*ulp )' ) 9987 FORMAT( ' || I - VT*V ||/( N*ulp )' ) 9986 FORMAT( ' ' ) 9985 FORMAT( 'An explanation of the input/output parameters', $ ' follows:' ) 9984 FORMAT( 'RESULT : passed; or an indication of which', $ ' jobtype test failed' ) 9983 FORMAT( 'M : The number of rows of the matrix A.' ) 9982 FORMAT( 'N : The number of columns of the matrix A.' ) 9981 FORMAT( 'P : The number of process rows.' ) 9980 FORMAT( 'Q : The number of process columns.' ) 9979 FORMAT( 'NB : The size of the square blocks the', $ ' matrix A is split into.' ) 9978 FORMAT( 'THRESH : If a residual value is less than ', $ ' THRESH, RESULT is flagged as PASSED.' ) 9977 FORMAT( 'MTYPE : matrix type (see pdsvdtst.f).' ) 9976 FORMAT( 'CHK : || A - U*diag(S)*VT ||/( ||A||', $ '*max(M,N)*ulp )' ) 9975 FORMAT( 'MTM : maximum of two values:',/, $ ' || I - UT*U ||/( M*ulp ) and', $ ' || I - VT*V ||/( N*ulp )' ) 9974 FORMAT( 'DELTA : maximum of three values:',/, $ ' || U - UC ||/( M*ulp*THRESH ),' ) 9973 FORMAT( ' || VT - VTC ||/( N*ulp*THRESH ), and' ) 9972 FORMAT( ' || S - SC || / ( SIZE*ulp*|S|*THRESH ), ' ) 9971 FORMAT( ' where UC, VTC, SC are singular vectors ', $ 'and values' ) 9970 FORMAT( ' for JOBTYPE.NE.1 (see pdsvdcmp.f) ' ) 9969 FORMAT( 'HET : P if heterogeneity was detected by PDGESVD' ) 9968 FORMAT( ' T if detected by the PDSVSTST, N if', $ ' undetected' ) 9967 FORMAT( ' ' ) 9966 FORMAT( 'RESULT WALL CPU M N P Q', $ ' NB MTYPE CHK MTM DELTA HET' ) 9965 FORMAT( A ) END scalapack-1.8.0/TESTING/EIG/pshrddriver.f0000640000175000017500000004537010363532303017551 0ustar muammarmuammar PROGRAM PSHRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PSHRDDRIVER is the main test program for the REAL * ScaLAPACK HRD (Hessenberg Reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 14 lines: * 'ScaLAPACK HRD input file' * 'PVM machine' * 'HRD.out' output file name (if any) * 6 device out * 2 number of problems sizes * 100 101 values of N * 2 1 values of ILO * 99 101 values of IHI * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL PARAMETER ( REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IHI, IHIP, IHLP, IHLQ, $ ILCOL, ILO, ILROW, INFO, INLQ, IMIDPAD, IPA, $ IPT, IPW, IPOSTPAD, IPREPAD, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LCMQ, LOFF, $ LWORK, MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, $ NPROCS, NOUT, NP, NPCOL, NPROW, NQ, WORKHRD, $ WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), NVHI( NTESTS ), NVLO( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINIT, BLACS_GRIDINFO, $ DESCINIT, IGSUM2D, BLACS_PINFO, PSFILLPAD, $ PSLAFCHK, PSGEHDRV, PSGEHRD, $ PSHRDINFO, PSMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC REAL PSLANGE EXTERNAL ILCM, INDXG2P, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSHRDINFO( OUTFILE, NOUT, NMAT, NVAL, NVLO, NVHI, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GOTO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) ILO = NVLO( J ) IHI = NVHI( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPT = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * IHIP = NUMROC( IHI, NB, MYROW, DESCA( RSRC_ ), NPROW ) LOFF = MOD( ILO-1, NB ) ILROW = INDXG2P( ILO, NB, MYROW, DESCA( RSRC_ ), NPROW ) ILCOL = INDXG2P( ILO, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) IHLP = NUMROC( IHI-ILO+LOFF+1, NB, MYROW, ILROW, NPROW ) INLQ = NUMROC( N-ILO+LOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWORK = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) WORKHRD = LWORK + IPOSTPAD WORKSIZ = WORKHRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IHLQ = NUMROC( IHI-ILO+LOFF+1, NB, MYCOL, ILCOL, $ NPCOL ) ITEMP = NB*MAX( IHLP+INLQ, IHLQ+MAX( IHIP, $ IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ) ) ) WORKSIZ = MAX( NB*NB + NB*IHLP + ITEMP, NB * NP ) + $ IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Hessenberg reduction', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate A * CALL PSMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, 0, NP, 0, NQ, MYROW, MYCOL, $ NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), $ NQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKHRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKHRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce Hessenberg form * CALL PSGEHRD( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGEHRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEHRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEHRD', WORKHRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKHRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps) * CALL PSGEHDRV( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ) ) CALL PSLAFCHK( 'No', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGEHDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEHDRV', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEHDRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0E+0 ) $ THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * HRD requires 10/3 * N^3 floating point ops. (flops) * more precisely, * HRD requires 4/3*(IHI-ILO)^3 + 2*IHI*(IHI-ILO)^2 flops * NOPS = DBLE( IHI-ILO ) NOPS = NOPS * NOPS * $ ( 2.0D0*DBLE( IHI ) + (4.0D0/3.0D0)*NOPS ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, ILO, IHI, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, $ PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, ILO, IHI, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, $ PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PSHRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pclagsy.f0000640000175000017500000002572110363532303016655 0ustar muammarmuammar* * SUBROUTINE PCLAGHE( N, K, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL D( * ) COMPLEX A( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * PCLAGHE generates a real Hermitian matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. * * This is just a quick implementation which will be replaced in the * future. The random orthogonal matrix is computed by creating a * random matrix and running QR on it. This requires vastly more * computation than necessary, but not significantly more communication * than is used in the rest of this rouinte, and hence is not that much * slower than an efficient solution. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix A. N >= 0. * * K (global input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * ### K must be 0 or N-1, 0 < K < N-1 is not supported yet. * * D (global input) COMPLEX array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) COMPLEX array * Global dimension (N, N), local dimension (NP, NQ) * The generated n by n Hermitian matrix A (the full matrix is * stored). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated and will remain identical on * all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) COMPLEX array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PCLASIZESEP * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX CZERO PARAMETER ( CZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, II, IIROW, $ INDAA, INDTAU, INDWORK, IPOSTPAD, IPREPAD, $ IROFFA, ISIZEHEEVX, ISIZESUBTST, ISIZETST, $ JJCOL, LDAA, LII, LIII, LJJ, LJJJ, LWMIN, MAXI, $ MB_A, MYCOL, MYROW, NB_A, NP, NPCOL, NPROW, NQ, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, RSRC_A, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, CLASET, PCGEQRF, $ PCLASIZESEP, PCMATGEN, PCUNMQR, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. * INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Initialize grid information * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) END IF * LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) NP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) IPREPAD = 0 IPOSTPAD = 0 CALL PCLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) LWMIN = SIZETMS * * Test the input arguments * IF( INFO.EQ.0 ) THEN IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( N.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCLAGHE', -INFO ) RETURN END IF * INDAA = 1 INDTAU = INDAA + LDAA*MAX( 1, NQ ) INDWORK = INDTAU + MAX( 1, NQ ) * IF( K.NE.0 ) THEN CALL CLASET( 'A', LDAA, NQ, CZERO, CZERO, WORK( INDAA ), LDAA ) * * * Build a random matrix * * CALL PCMATGEN( DESCA( CTXT_ ), 'N', 'N', N, ORDER, $ DESCA( MB_ ), DESCA( NB_ ), WORK( INDAA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ ISEED( 1 ), 0, NP, 0, NQ, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PCGEQRF( N, ORDER, WORK( INDAA ), IA, JA, DESCA, $ WORK( INDTAU ), WORK( INDWORK ), SIZEQRF, INFO ) * END IF * * Build a diagonal matrix A with the eigenvalues specified in D * CALL CLASET( 'A', NP, NQ, CZERO, CZERO, A, DESCA( LLD_ ) ) * IIROW = 0 JJCOL = 0 LII = 1 LJJ = 1 * DO 20 II = 1, N, DESCA( MB_ ) MAXI = MIN( N, II+DESCA( MB_ )-1 ) IF( ( MYROW.EQ.IIROW ) .AND. ( MYCOL.EQ.JJCOL ) ) THEN LIII = LII LJJJ = LJJ DO 10 I = II, MAXI A( LIII+( LJJJ-1 )*DESCA( LLD_ ) ) = D( I ) LIII = LIII + 1 LJJJ = LJJJ + 1 10 CONTINUE END IF IF( MYROW.EQ.IIROW ) $ LII = LII + DESCA( MB_ ) IF( MYCOL.EQ.JJCOL ) $ LJJ = LJJ + DESCA( MB_ ) IIROW = MOD( IIROW+1, NPROW ) JJCOL = MOD( JJCOL+1, NPCOL ) 20 CONTINUE * * A = Q * A * IF( K.NE.0 ) THEN * CALL PCUNMQR( 'L', 'Conjugate transpose', N, N, ORDER, $ WORK( INDAA ), IA, JA, DESCA, WORK( INDTAU ), A, $ IA, JA, DESCA, WORK( INDWORK ), SIZEMQRLEFT, $ INFO ) * * * A = A * Q' * * CALL PCUNMQR( 'R', 'N', N, N, ORDER, WORK( INDAA ), IA, JA, $ DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRRIGHT, INFO ) * END IF * * End of PCLAGHE * END scalapack-1.8.0/TESTING/EIG/pcbrddriver.f0000640000175000017500000005006410363532303017517 0ustar muammarmuammar PROGRAM PCBRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PCBRDDRIVER is the main test program for the COMPLEX * ScaLAPACK BRD (bidiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK BRD computation input file' * 'PVM machine' * 'BRD.out' output file name * 6 device out * 3 number of problems sizes * 16 20 18 values of M * 16 18 20 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, MEMSIZ, NTESTS, TOTMEM, REALSZ COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, TOTMEM = 2000000, REALSZ = 8, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPTP, IPTQ, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LWORK, M, MAXMN, $ MINMN, MNP, MNQ, MP, MYCOL, MYROW, N, NB, $ NDIAG, NGRIDS, NMAT, NNB, NOFFD, NOUT, NPCOL, $ NPROCS, NPROW, NQ, WORKBRD, WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ MVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCBRDINFO, PCFILLPAD, PCLAFCHK, $ PCMATGEN, PCGEBDRV, PCGEBRD, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PCLANGE EXTERNAL ICEIL, NUMROC, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCBRDINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * * Go to bottom of loop if this case doesn't use my process * DO 20 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), NB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IF( M.GE.N ) THEN NDIAG = MNQ NOFFD = MNP NDIAG = ICEIL( REALSZ*NDIAG, CPLXSZ ) NOFFD = ICEIL( REALSZ*NOFFD, CPLXSZ ) ELSE NDIAG = MNP NOFFD = NUMROC( MIN( M, N )-1, NB, MYCOL, 0, NPCOL ) NDIAG = ICEIL( REALSZ*NDIAG, CPLXSZ ) NOFFD = ICEIL( REALSZ*NOFFD, CPLXSZ ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPTQ = IPE + NOFFD + IPOSTPAD + IPREPAD IPTP = IPTQ + MNQ + IPOSTPAD + IPREPAD IPW = IPTP + MNP + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = NB*( MP+NQ+1 ) + NQ WORKBRD = LWORK + IPOSTPAD WORKSIZ = WORKBRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN WORKSIZ = MAX( LWORK, 2*NB*( MP+NQ+NB ) ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Bidiagonal reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PCMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, MNQ, 1, MEM( IPTQ-IPREPAD ), $ MNQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, MNP, 1, MEM( IPTP-IPREPAD ), $ MNP, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKBRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKBRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to bidiagonal form * CALL PCGEBRD( M, N, MEM( IPA ), 1, 1, DESCA, MEM( IPD ), $ MEM( IPE ), MEM( IPTQ ), MEM( IPTP ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGEBRD', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBRD', MNQ, 1, $ MEM( IPTQ-IPREPAD ), MNQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBRD', MNP, 1, $ MEM( IPTP-IPREPAD ), MNP, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBRD', WORKBRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKBRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A-Q*B*P|| / (||A|| * N * eps) * CALL PCGEBDRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPTQ ), $ MEM( IPTP ), MEM( IPW ), IERR( 1 ) ) CALL PCLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCGEBDRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGEBDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0E+0 $ .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID * KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * ) $ 'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * BRD requires 32/3 N^3 floating point operations * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) NOPS = 16.0D+0 * DBLE( MINMN ) * DBLE( MINMN ) * $ ( DBLE( MAXMN ) - DBLE( MINMN ) / 3.0D+0 ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB P Q BRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*B*P|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PCBRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pcrptseptst.f0000640000175000017500000000523510363532303017604 0ustar muammarmuammar PROGRAM PCRPTSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * Repeat parallel Hermitian eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS, UPLO INTEGER CONTEXT, IAM, INFO, IPOSTPAD, IPREPAD, LDA, $ MATTYPE, N, NB, NPCOL, NPROCS, NPROW REAL ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) REAL GAP( MAXN ), RWORK( LWORK ), WIN( MAXN ), $ WNEW( MAXN ) COMPLEX A( MAXN*MAXN ), COPYA( MAXN*MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PCSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * These lines should be replaced by the output from pxSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351E-37 THRESH = .350000E+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PCSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, N, $ ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, WORK, $ LWORK-IPREPAD-IPOSTPAD, RWORK, $ LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PCRPTSEPTST * END scalapack-1.8.0/TESTING/EIG/pssvddriver.f0000640000175000017500000002601010363532303017556 0ustar muammarmuammar PROGRAM PSSVDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======== * * Parallel Real singular value decomposition test driver. * * INPUT: * ===== * This routine tests PDGESVD, the parallel singular value * decomposition solver. We would like to cover possible combinations * of: matrix size, process configuration (nprow and npcol), block * size (nb), matrix type, and workspace available. * * Current format of the input file SVD.dat lists the following: * device out * Threshold * number of matrices * number of rows for every matrix * number of columns for every matrix * number of process configurations (P, Q, NB) * values of P (NPROW) for every configuration * values of Q (NPCOL) for every configuration * values of NB for every configuration. * Here threshold is an integer constant with a value between 1 and * 100, which meaning is explained in comments to PSSVDTST. * * WHAT IT DOES: * ============ * PSVDDRIVER checks floating-point arithmetic and parameters * provided by the user in initialization file SVD.dat. It reads and * broadcasts to all process parameters required to run actual testing * code PSVDTST. In case all tests are successful it tells you so. For * the actual "meat" of the tests see comments to PSVDTST. * *======================================================================= * * .. Local Scalars .. CHARACTER*80 SUMMARY INTEGER CONTEXT, ERR, I, IAM, J, K, LWORK, MAXNODES, $ NMATSIZES, NOUT, NPCONFIGS, NPROCS REAL THRESH * .. * .. Parameters .. INTEGER MAXSETSIZE, NIN, DBLSIZ, TOTMEM, MEMSIZ PARAMETER ( MAXSETSIZE = 50, NIN = 11, DBLSIZ = 8, $ TOTMEM = 2000000, MEMSIZ = TOTMEM / DBLSIZ ) * .. * .. Local Arrays .. INTEGER ISEED( 4 ), MM( MAXSETSIZE ), $ NBS( MAXSETSIZE ), NN( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ), $ RESULT( 9 ) REAL WORK( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ SGEBR2D, SGEBS2D, IGEBR2D, IGEBS2D, PSSVDTST * .. * .. Executable Statements .. * * Get starting information. * CALL BLACS_PINFO( IAM, NPROCS ) * * Open file and skip data header; read output device. * IF( IAM.EQ.0 ) THEN OPEN( UNIT = NIN, FILE = 'SVD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )NOUT READ( NIN, FMT = * )MAXNODES END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * * Initialize variables, arrays, and grids. * ERR = 0 NMATSIZES = 0 NPCONFIGS = 0 LWORK = MEMSIZ ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = 9986 ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) WRITE( NOUT, FMT = 9972 ) WRITE( NOUT, FMT = 9971 ) WRITE( NOUT, FMT = 9970 ) WRITE( NOUT, FMT = 9969 ) WRITE( NOUT, FMT = 9968 ) WRITE( NOUT, FMT = 9967 ) WRITE( NOUT, FMT = 9966 ) WRITE( NOUT, FMT = 9965 ) END IF * * Process 0 reads values in input file and broadcasts them to * all other processes. * 10 CONTINUE IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )SUMMARY READ( NIN, FMT = * )THRESH WRITE( NOUT, FMT = 9965 )SUMMARY CALL SGEBS2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1 ) ELSE CALL SGEBR2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) END IF IF( THRESH.EQ.-1 ) THEN GO TO 80 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATSIZES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1, 0, 0 ) END IF * Deal with error IF( NMATSIZES.LT.1 .OR. NMATSIZES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Matrix size', NMATSIZES, 1, $ MAXSETSIZE END IF ERR = -1 GO TO 80 END IF * * Read array of MATSIZES. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MM( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MM, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MM, 1, 0, 0 ) END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NN( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, NN, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, NN, 1, 0, 0 ) END IF * * Read and broadcast NPCONFIGS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NPCONFIGS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1, 0, 0 ) END IF * Deal with error IF( NPCONFIGS.LT.1 .OR. NPCONFIGS.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# proc configs', NPCONFIGS, 1, $ MAXSETSIZE END IF ERR = -1 GO TO 80 END IF * * Read and broadcast array of NPROWS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPROWS( I ), I = 1, NPCONFIGS ) * CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1, 0, $ 0 ) END IF * Deal with error DO 20 I = 1, NPCONFIGS IF( NPROWS( I ).LE.0 ) $ ERR = -1 20 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NPROW' END IF GO TO 80 END IF * * Read and broadcast array of NPCOLS. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPCOLS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1, 0, $ 0 ) END IF * * Deal with error. * DO 30 I = 1, NPCONFIGS IF( NPCOLS( I ).LE.0 ) $ ERR = -1 30 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NPCOL' END IF GO TO 80 END IF * * Read and broadcast array of NBs. * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NBS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1, 0, 0 ) END IF * Deal with error DO 40 I = 1, NPCONFIGS IF( NBS( I ).LE.0 ) $ ERR = -1 40 CONTINUE IF( ERR.EQ.-1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 )' NB' END IF GO TO 80 END IF * DO 70 J = 1, NMATSIZES DO 60 I = 1, NPCONFIGS * DO 50 K = 1, 9 RESULT( K ) = 0 50 CONTINUE CALL PSSVDTST( MM( J ), NN( J ), NPROWS( I ), NPCOLS( I ), $ NBS( I ), ISEED, THRESH, WORK, RESULT, LWORK, $ NOUT ) * 60 CONTINUE 70 CONTINUE * GO TO 10 * 80 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * End of PSSVDDRIVER * 9999 FORMAT( A20, ' is:', I5, ' must be between:', I5, ' and', I5 ) 9998 FORMAT( A ) 9997 FORMAT( A20, ' must be positive' ) 9996 FORMAT( A ) 9995 FORMAT( 'M = ', I5, ' N = ', I5, ' NPOW = ', I5, 'NPCOL = ', I5, $ ' NB = ', I5 ) * 9994 FORMAT( 'Test #', I5, 'for this configuration has failed' ) 9993 FORMAT( 'All test passed for this configuration' ) 9992 FORMAT( ' ' ) 9991 FORMAT( 'Running tests of the parallel singular value ', $ 'decomposition routine: PSGESVD' ) 9990 FORMAT( 'The following scaled residual checks will be', $ 'computed:' ) 9989 FORMAT( ' || A - U*diag(S)*VT ||/( ||A||*max(M,N)*ulp )' ) 9988 FORMAT( ' || I - UT*U ||/( M*ulp )' ) 9987 FORMAT( ' || I - VT*V ||/( N*ulp )' ) 9986 FORMAT( ' ' ) 9985 FORMAT( 'An explanation of the input/output parameters', $ ' follows:' ) 9984 FORMAT( 'RESULT : passed; or an indication of which', $ ' jobtype test failed' ) 9983 FORMAT( 'M : The number of rows of the matrix A.' ) 9982 FORMAT( 'N : The number of columns of the matrix A.' ) 9981 FORMAT( 'P : The number of process rows.' ) 9980 FORMAT( 'Q : The number of process columns.' ) 9979 FORMAT( 'NB : The size of the square blocks the', $ ' matrix A is split into.' ) 9978 FORMAT( 'THRESH : If a residual value is less than ', $ ' THRESH, RESULT is flagged as PASSED.' ) 9977 FORMAT( 'MTYPE : matrix type (see pssvdtst.f).' ) 9976 FORMAT( 'CHK : || A - U*diag(S)*VT ||/( ||A||', $ '*max(M,N)*ulp )' ) 9975 FORMAT( 'MTM : maximum of two values:',/, $ ' || I - UT*U ||/( M*ulp ) and', $ ' || I - VT*V ||/( N*ulp )' ) 9974 FORMAT( 'DELTA : maximum of three values:',/, $ ' || U - UC ||/( M*ulp*THRESH ),' ) 9973 FORMAT( ' || VT - VTC ||/( N*ulp*THRESH ), and' ) 9972 FORMAT( ' || S - SC || / ( SIZE*ulp*|S|*THRESH ), ' ) 9971 FORMAT( ' where UC, VTC, SC are singular vectors ', $ 'and values' ) 9970 FORMAT( ' for JOBTYPE.NE.1 (see pdsvdcmp.f) ' ) 9969 FORMAT( 'HET : P if heterogeneity was detected by PDGESVD' ) 9968 FORMAT( ' T if detected by the PDSVSTST, N if', $ ' undetected' ) 9967 FORMAT( ' ' ) 9966 FORMAT( 'RESULT WALL CPU M N P Q', $ ' NB MTYPE CHK MTM DELTA HET' ) 9965 FORMAT( A ) END scalapack-1.8.0/TESTING/EIG/pdnepdriver.f0000640000175000017500000005050710363532303017535 0ustar muammarmuammar PROGRAM PDNEPDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PDNEPDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK NEP routines. This test driver performs a Schur * decomposition followed by residual check of a Hessenberg matrix. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * DBLESZ INTEGER, default = 8 bytes. * DBLESZ indicate the length in bytes on the given platform * for a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, TOTMEM, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL, ZERO, ONE PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0, $ ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPREPAD, IPW, IPWI, IPWR, IPZ, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LDA, LDZ, LWORK, $ MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, $ NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, QRESID, TMFLOPS, ZNORM * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ), $ IDUM( 1 ), NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDGEMM, PDLAHQR, PDLASET, PDMATGEN, $ PDNEPFCHK, PDNEPINFO, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE, PDLANHS EXTERNAL ILCM, NUMROC, PDLAMCH, PDLANGE, PDLANHS * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDNEPINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWI = IPWR + N + IPOSTPAD + IPREPAD IPW = IPWI + N + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX(2*N, (8*ILCM(NPROW,NPCOL)+2)**2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PDNEPFCHK and PDLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate matrix Z = In * CALL PDLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PDLASET( 'Lower', MAX( 0, N-2 ), MAX( 0, N-2 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 3 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PDFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, N, 1, MEM( IPWI-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform NEP factorization * CALL PDLAHQR( .TRUE., .TRUE., N, 1, N, MEM( IPA ), DESCA, $ MEM( IPWR ), MEM( IPWI ), 1, N, MEM( IPZ ), $ DESCZ, MEM( IPW ), LWORK, IDUM, 0, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PDLAHQR INFO=', INFO KFAIL = KFAIL + 1 GO TO 10 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PDCHEKPAD( ICTXT, 'PDLAHQR (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLAHQR (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLAHQR (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLAHQR (WI)', N, 1, $ MEM( IPWI-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLAHQR (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || Z * H * Z**T - H0 || / ( N*|| H0 ||*EPS ) * CALL PDNEPFCHK( N, MEM( IPA ), 1, 1, DESCA, IASEED, $ MEM( IPZ ), 1, 1, DESCZ, ANORM, $ FRESID, MEM( IPW ) ) * CALL PDCHEKPAD( ICTXT, 'PDNEPFCHK (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDNEPFCHK (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDNEPFCHK (WORK)', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || (Z**T)*Z - In ||_1 * CALL PDLASET( 'All', N, N, ZERO, ONE, MEM( IPA ), 1, $ 1, DESCA ) CALL PDGEMM( 'Transpose', 'No transpose', N, N, N, $ -ONE, MEM( IPZ ), 1, 1, DESCZ, $ MEM( IPZ ), 1, 1, DESCZ, ONE, MEM( IPA ), $ 1, 1, DESCA ) ZNORM = PDLANGE( '1', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) QRESID = ZNORM / ( DBLE( N )*PDLAMCH( ICTXT, 'P' ) ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0D+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 18 N^3 flops for PxLAHQR * NOPS = 18.0D+0*DBLE( N )**3 * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H - Q*S*Q^T|| / (||H|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Q^T*Q - I|| / ( N * eps ) ', G25.7 ) * STOP * * End of PDNEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pdgsepchk.f0000640000175000017500000003102510363532303017155 0ustar muammarmuammar* * SUBROUTINE PDGSEPCHK( IBTYPE, MS, NV, A, IA, JA, DESCA, B, IB, JB, $ DESCB, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, W, WORK, LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, IBTYPE, IC, IQ, JA, JB, JC, JQ, LWORK, $ MS, NV, RESULT DOUBLE PRECISION THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCC( * ), DESCQ( * ) DOUBLE PRECISION A( * ), B( * ), C( * ), Q( * ), W( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PDGSEPCHK checks a decomposition of the form * * A Q = B Q D or * A B Q = Q D or * B A Q = Q D * * where A is a symmetric matrix, B is * symmetric positive definite, Q is orthogonal, and D is diagonal. * * One of the following test ratios is computed: * * IBTYPE = 1: TSTNRM = | A Q - B Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 2: TSTNRM = | A B Q - Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 3: TSTNRM = | B A Q - Q D | / ( |A| |Q| n ulp ) * * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, B and Q * MQ = number of local columns in A * NQ = number of local columns in B and Q * * IBTYPE (input) INTEGER * The form of the symmetric generalized eigenproblem. * = 1: A*Q = (lambda)*B*Q * = 2: A*B*Q = (lambda)*Q * = 3: B*A*Q = (lambda)*Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, B, C and Q * Also, the number of columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) REAL pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer to an * array in local memory of dimension (LLD_B, LOCc(JB+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix B * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix B. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), NQ) * * Contains the eigenvectors as computed by PSSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * * RESULT (global output) INTEGER * 0 if the test passes * 1 if the test fails * * .. Local Scalars .. * INTEGER I, INFO, MYCOL, MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION ANORM, ULP * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) DOUBLE PRECISION CONE, CNEGONE, CZERO PARAMETER ( CONE = 1.0D+0, CNEGONE = -1.0D+0, $ CZERO = 0.0D+0 ) * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION DLAMCH, PDLANGE EXTERNAL NUMROC, DLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PDGEMM, PDSCAL, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( MS, 1, MS, 2, IB, JB, DESCB, 11, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 16, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IB, JB, DESCB, 20, INFO ) * IF( INFO.EQ.0 ) THEN * NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -14 ELSE IF( JQ.NE.1 ) THEN INFO = -15 ELSE IF( IA.NE.1 ) THEN INFO = -5 ELSE IF( JA.NE.1 ) THEN INFO = -6 ELSE IF( IB.NE.1 ) THEN INFO = -9 ELSE IF( JB.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.NQ ) THEN INFO = -23 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDGSEPCHK', -INFO ) RETURN END IF * RESULT = 0 ULP = DLAMCH( 'Epsilon' ) * * Compute product of Max-norms of A and Q. * ANORM = PDLANGE( 'M', MS, MS, A, IA, JA, DESCA, WORK )* $ PDLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) IF( ANORM.EQ.ZERO ) $ ANORM = ONE * IF( IBTYPE.EQ.1 ) THEN * * Norm of AQ - BQD * * C = AQ * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 10 I = 1, NV CALL PDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 10 CONTINUE * * C = C - BQ (i.e. AQ-BQD) * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CNEGONE, C, IC, JC, DESCC ) * TSTNRM = ( PDLANGE( 'M', MS, NV, C, IC, JC, DESCC, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * * ELSE IF( IBTYPE.EQ.2 ) THEN * * Norm of ABQ - QD * * * C = BQ * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 20 I = 1, NV CALL PDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 20 CONTINUE * * Q = AC - Q * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PDLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * Norm of BAQ - QD * * * C = AQ * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 30 I = 1, NV CALL PDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 30 CONTINUE * * Q = BC - Q * CALL PDGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PDLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * END IF * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0D0 ) ) THEN RESULT = 1 END IF RETURN * * End of PDGSEPCHK * END scalapack-1.8.0/TESTING/EIG/pcget22.f0000640000175000017500000002244410363532303016460 0ustar muammarmuammar SUBROUTINE PCGET22( TRANSA, TRANSE, TRANSW, N, A, DESCA, E, DESCE, $ W, WORK, DESCW, RWORK, RESULT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER TRANSA, TRANSE, TRANSW INTEGER N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCE( * ), DESCW( * ) REAL RESULT( 2 ), RWORK( * ) COMPLEX A( * ), E( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PCGET22 does an eigenvector check. * * The basic test is: * * RESULT(1) = | A E - E W | / ( |A| |E| ulp ) * * using the 1-norm. It also tests the normalization of E: * * RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp ) * j * * where E(j) is the j-th eigenvector, and m-norm is the max-norm of a * vector. The max-norm of a complex n-vector x in this case is the * maximum of |re(x(i)| + |im(x(i)| over i = 1, ..., n. * * Arguments * ========== * * TRANSA (input) CHARACTER*1 * Specifies whether or not A is transposed. * = 'N': No transpose * = 'T': Transpose * = 'C': Conjugate transpose * * TRANSE (input) CHARACTER*1 * Specifies whether or not E is transposed. * = 'N': No transpose, eigenvectors are in columns of E * = 'T': Transpose, eigenvectors are in rows of E * = 'C': Conjugate transpose, eigenvectors are in rows of E * * TRANSW (input) CHARACTER*1 * Specifies whether or not W is transposed. * = 'N': No transpose * = 'T': Transpose, same as TRANSW = 'N' * = 'C': Conjugate transpose, use -WI(j) instead of WI(j) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * A (input) COMPLEX array, dimension (*) * The matrix whose eigenvectors are in E. * * DESCA (input) INTEGER array, dimension(*) * * E (input) COMPLEX array, dimension (*) * The matrix of eigenvectors. If TRANSE = 'N', the eigenvectors * are stored in the columns of E, if TRANSE = 'T' or 'C', the * eigenvectors are stored in the rows of E. * * DESCE (input) INTEGER array, dimension(*) * * W (input) COMPLEX array, dimension (N) * The eigenvalues of A. * * WORK (workspace) COMPLEX array, dimension (*) * DESCW (input) INTEGER array, dimension(*) * * RWORK (workspace) REAL array, dimension (N) * * RESULT (output) REAL array, dimension (2) * RESULT(1) = | A E - E W | / ( |A| |E| ulp ) * RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp ) * j * Further Details * =============== * * Contributed by Mark Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER NORMA, NORME INTEGER ICOL, II, IROW, ITRNSE, ITRNSW, J, JCOL, JJ, $ JROW, JVEC, LDA, LDE, LDW, MB, MYCOL, MYROW, $ NB, NPCOL, NPROW, CONTXT, RA, CA, RSRC, CSRC REAL ANORM, ENORM, ENRMAX, ENRMIN, ERRNRM, TEMP1, $ ULP, UNFL COMPLEX CDUM, WTEMP * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH, PCLANGE EXTERNAL LSAME, PSLAMCH, PCLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGAMN2D, SGAMX2D, INFOG2L, $ PCAXPY, PCGEMM, PCLASET * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CONJG, AIMAG, MAX, MIN * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * * Initialize RESULT (in case N=0) * RESULT( 1 ) = ZERO RESULT( 2 ) = ZERO IF( N.LE.0 ) $ RETURN * CONTXT = DESCA( CTXT_ ) RSRC = DESCA( RSRC_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) MB = DESCA( MB_ ) LDA = DESCA( LLD_ ) LDE = DESCE( LLD_ ) LDW = DESCW( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * UNFL = PSLAMCH( CONTXT, 'Safe minimum' ) ULP = PSLAMCH( CONTXT, 'Precision' ) * ITRNSE = 0 ITRNSW = 0 NORMA = 'O' NORME = 'O' * IF( LSAME( TRANSA, 'T' ) .OR. LSAME( TRANSA, 'C' ) ) THEN NORMA = 'I' END IF * IF( LSAME( TRANSE, 'T' ) ) THEN ITRNSE = 1 NORME = 'I' ELSE IF( LSAME( TRANSE, 'C' ) ) THEN ITRNSE = 2 NORME = 'I' END IF * IF( LSAME( TRANSW, 'C' ) ) THEN ITRNSW = 1 END IF * * Normalization of E: * ENRMIN = ONE / ULP ENRMAX = ZERO IF( ITRNSE.EQ.0 ) THEN DO 20 JVEC = 1, N TEMP1 = ZERO DO 10 J = 1, N CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN TEMP1 = MAX( TEMP1, CABS1( E( ( ICOL-1 )*LDE+ $ IROW ) ) ) END IF 10 CONTINUE IF( MYCOL.EQ.JJ ) THEN CALL SGAMX2D( CONTXT, 'Col', ' ', 1, 1, TEMP1, 1, RA, CA, $ -1, -1, -1 ) ENRMIN = MIN( ENRMIN, TEMP1 ) ENRMAX = MAX( ENRMAX, TEMP1 ) END IF 20 CONTINUE CALL SGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA, -1, $ -1, -1 ) CALL SGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA, -1, $ -1, -1 ) ELSE DO 40 J = 1, N TEMP1 = ZERO DO 30 JVEC = 1, N CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN TEMP1 = MAX( TEMP1, CABS1( E( ( ICOL-1 )*LDE+ $ IROW ) ) ) END IF 30 CONTINUE IF( MYROW.EQ.II ) THEN CALL SGAMX2D( CONTXT, 'Row', ' ', 1, 1, TEMP1, 1, RA, CA, $ -1, -1, -1 ) ENRMIN = MIN( ENRMIN, TEMP1 ) ENRMAX = MAX( ENRMAX, TEMP1 ) END IF 40 CONTINUE CALL SGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA, -1, $ -1, -1 ) CALL SGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA, -1, $ -1, -1 ) END IF * * Norm of A: * ANORM = MAX( PCLANGE( NORMA, N, N, A, 1, 1, DESCA, RWORK ), UNFL ) * * Norm of E: * ENORM = MAX( PCLANGE( NORME, N, N, E, 1, 1, DESCE, RWORK ), ULP ) * * Norm of error: * * Error = AE - EW * CALL PCLASET( 'Full', N, N, CZERO, CZERO, WORK, 1, 1, DESCW ) * DO 60 JCOL = 1, N IF( ITRNSW.EQ.0 ) THEN WTEMP = W( JCOL ) ELSE WTEMP = CONJG( W( JCOL ) ) END IF * IF( ITRNSE.EQ.0 ) THEN CALL PCAXPY( N, WTEMP, E, 1, JCOL, DESCE, 1, WORK, 1, JCOL, $ DESCW, 1 ) ELSE IF( ITRNSE.EQ.1 ) THEN CALL PCAXPY( N, WTEMP, E, JCOL, 1, DESCE, N, WORK, 1, JCOL, $ DESCW, 1 ) ELSE CALL PCAXPY( N, CONJG( WTEMP ), E, JCOL, 1, DESCE, N, WORK, $ 1, JCOL, DESCW, 1 ) DO 50 JROW = 1, N CALL INFOG2L( JROW, JCOL, DESCW, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN WORK( ( JCOL-1 )*LDW+JROW ) $ = CONJG( WORK( ( JCOL-1 )*LDW+JROW ) ) END IF 50 CONTINUE END IF 60 CONTINUE * CALL PCGEMM( TRANSA, TRANSE, N, N, N, CONE, A, 1, 1, DESCA, E, 1, $ 1, DESCE, -CONE, WORK, 1, 1, DESCW ) * ERRNRM = PCLANGE( 'One', N, N, WORK, 1, 1, DESCW, RWORK ) / ENORM * * Compute RESULT(1) (avoiding under/overflow) * IF( ANORM.GT.ERRNRM ) THEN RESULT( 1 ) = ( ERRNRM / ANORM ) / ULP ELSE IF( ANORM.LT.ONE ) THEN RESULT( 1 ) = ( MIN( ERRNRM, ANORM ) / ANORM ) / ULP ELSE RESULT( 1 ) = MIN( ERRNRM / ANORM, ONE ) / ULP END IF END IF * * Compute RESULT(2) : the normalization error in E. * RESULT( 2 ) = MAX( ABS( ENRMAX-ONE ), ABS( ENRMIN-ONE ) ) / $ ( REAL( N )*ULP ) * RETURN * * End of PCGET22 * END scalapack-1.8.0/TESTING/EIG/psmatgen.f0000640000175000017500000004311010363532303017021 0ustar muammarmuammar SUBROUTINE PSMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PSMATGEN : Parallel Real Single precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) REAL, pointer into the local memory * to an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSRAND EXTERNAL ICEIL, NUMROC, LSAME, PSRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = ONE - TWO*PSRAND(0) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = ONE - TWO*PSRAND(0) 30 CONTINUE A(IK,JK) = ONE - TWO*PSRAND(0) DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = ONE - TWO*PSRAND(0) A(IK,JK+J) = A(IK+J,JK) 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 A(IK,JK) = ONE - TWO*PSRAND(0) JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = NQNB JUMP3 = N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 A(IK,JK) = ONE - TWO*PSRAND(0) JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = NPMB JUMP3 = M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = ONE - TWO*PSRAND(0) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 A(IK,JK+J) = ABS(A(IK,JK+J)) + MAXMN IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PSMATGEN * END scalapack-1.8.0/TESTING/EIG/pztrddriver.f0000640000175000017500000004720410363532303017572 0ustar muammarmuammar PROGRAM PZTRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * Purpose * ======== * * PZTRDDRIVER is the main test program for the COMPLEX*16 * SCALAPACK TRD (symmetric tridiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK TRD computation input file' * 'PVM machine' * 'TRD.out' output file name * 6 device out * 'L' define Lower or Upper * 3 number of problems sizes * 5 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, TOTMEM, ZPLXSZ, MEMSIZ, NTESTS COMPLEX*16 PADVAL PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LWORK, MYCOL, $ MYROW, N, NB, NDIAG, NGRIDS, NMAT, NNB, NOFFD, $ NOUT, NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ, $ WORKTRD REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZHETDRV, PZHETRD, PZLAFCHK, $ PZMATGEN, PZTRDINFO, PZTTRDTESTER, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PZLANHE EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PZLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )STOP * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZTRDINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF NDIAG = ICEIL( DBLESZ*NDIAG, ZPLXSZ ) NOFFD = ICEIL( DBLESZ*NOFFD, ZPLXSZ ) * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = MAX( NB*( NP+1 ), 3*NB ) WORKTRD = LWORK + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ICEIL( DBLESZ*ITEMP, ZPLXSZ ), $ 2*( NB+NP )*NB ) WORKSIZ = MAX( LWORK, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PZMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHE', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PZHETRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZHETRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETRD', WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PZHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) CALL PZLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZHETDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZHETDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0D+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) * NOPS = ( 4.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', UPLO, N, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', UPLO, N, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * CALL PZTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, NMAT, $ MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB P Q TRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ---- ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, A4, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PZTRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pznepdriver.f0000640000175000017500000005026710363532303017566 0ustar muammarmuammar PROGRAM PZNEPDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * Purpose * ======= * * PZNEPDRIVER is the main test program for the COMPLEX*16 * SCALAPACK NEP routines. This test driver performs a Schur * decomposition followed by residual check of a Hessenberg matrix. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * ZPLXSZ INTEGER, default = 16 bytes. * ZPLXSZ indicate the length in bytes on the given platform * for a double precision complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * Further Details * =============== * * Contributed by Mark Fahey, March 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ZPLXSZ, TOTMEM, MEMSIZ, NTESTS PARAMETER ( ZPLXSZ = 16, TOTMEM = 200000000, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20 ) COMPLEX*16 PADVAL, ZERO, ONE PARAMETER ( PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPREPAD, IPW, IPWR, IPZ, J, K, KFAIL, $ KPASS, KSKIP, KTESTS, LDA, LDWORK, LDZ, LWORK, $ MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, $ NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, QRESID, TMFLOPS, ZNORM * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IDUM( 1 ), $ IERR( 2 ), NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZFILLPAD, PZGEMM, PZLAHQR, PZLASET, PZMATGEN, $ PZNEPFCHK, PZNEPINFO, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE, PZLANHS EXTERNAL ILCM, NUMROC, PDLAMCH, PZLANGE, PZLANHS * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZNEPINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) LDWORK = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPWR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX( 2*N, ( 8*ILCM( NPROW, NPCOL )+2 )** $ 2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZNEPFCHK and PZLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate matrix Z = In * CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PZLASET( 'Lower', MAX( 0, N-2 ), MAX( 0, N-2 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 3 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform NEP factorization * CALL PZLAHQR( .TRUE., .TRUE., N, 1, N, MEM( IPA ), DESCA, $ MEM( IPWR ), 1, N, MEM( IPZ ), DESCZ, $ MEM( IPW ), LWORK, IDUM, 0, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PZLAHQR INFO=', INFO KFAIL = KFAIL + 1 GO TO 10 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PZCHEKPAD( ICTXT, 'PZLAHQR (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLAHQR (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLAHQR (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLAHQR (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || Z * H * Z**T - H0 || / ( N*|| H0 ||*EPS ) * CALL PZNEPFCHK( N, MEM( IPA ), 1, 1, DESCA, IASEED, $ MEM( IPZ ), 1, 1, DESCZ, ANORM, $ FRESID, MEM( IPW ) ) * CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (WORK)', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || (Z**T)*Z - In ||_1 * CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPA ), 1, $ 1, DESCA ) CALL PZGEMM( 'Cong Tran', 'No transpose', N, N, N, $ -ONE, MEM( IPZ ), 1, 1, DESCZ, $ MEM( IPZ ), 1, 1, DESCZ, ONE, MEM( IPA ), $ 1, 1, DESCA ) ZNORM = PZLANGE( '1', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) QRESID = ZNORM / ( DBLE( N )*PDLAMCH( ICTXT, 'P' ) ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0D+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 18 N^3 flops for PxLAHQR * NOPS = 18.0D+0*DBLE( N )**3 * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H - Q*S*Q^T|| / (||H|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Q^T*Q - I|| / ( N * eps ) ', G25.7 ) * STOP * * End of PZNEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pslatms.f0000640000175000017500000003241410363532303016673 0ustar muammarmuammar* * SUBROUTINE PSLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, IA, JA, DESCA, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER IA, INFO, JA, KL, KU, LWORK, M, MODE, N, ORDER REAL COND, DMAX * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL A( * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * PSLATMS generates random symmetric matrices with specified * eigenvalues for testing SCALAPACK programs. * * PSLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * ### bandwidth reduction NOT SUPPORTED ### * * Arguments * ========= * * M - (global input) INTEGER * The number of rows of A. Not modified. * * N - (global input) INTEGER * The number of columns of A. Not modified. * ### M .ne. N unsupported * * DIST - (global input) CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - (global input) INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to SLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - (global input) CHARACTER*1 * If SYM='S' or 'H', the generated matrix is symmetric, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is symmetric, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * ### SYM = 'N' NOT SUPPORTED ### * Not modified. * * D - (local input/output) REAL array, * dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - (global input) INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - (global input) REAL * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - (global input) REAL * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - (global input) INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * ### 1 <= KL < N-1 is NOT SUPPORTED ### * * KU - (global input) INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * ### 1 <= KU < N-1 is NOT SUPPORTED ### * * PACK - (global input) CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * ### PACK must be 'N' all other options NOT SUPPORTED ### * * A - (local output) REAL array * Global dimension (M, N), local dimension (MP, NQ) * On exit A is the desired test matrix. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ORDER - (input) INTEGER * The number of reflectors used to define the orthogonal * matrix Q. A = Q * D * Q' * Higher ORDER requires more computation and communication. * * WORK - (local input/output) REAL array, * dimension (LWORK) * * LWORK - (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PSLASIZESEP * * INFO - (global output) INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -16 => DESCA is inconsistent * -17 => ORDER not in the range 0 to N inclusive * 1 => Error return from SLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from PSLAGSY * *----------------------------------------------------------------------- * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IDIST, IINFO, IPACK, IRSIGN, ISYM, LLB, $ MNMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL ALPHA, TEMP * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLAGSY, $ PXERBLA, SLASET, SLATM1, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * 1) Decode and Test the input parameters. * Initialize flags & seed. * * INFO = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( ( MYROW.GE.NPROW .OR. MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL .OR. MYCOL.LT.0 ) )RETURN * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IPACK = 1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) * IF( ORDER.EQ.0 ) $ ORDER = N * * Set INFO if an error * IF( NPROW.EQ.-1 ) THEN INFO = -( 1600+CTXT_ ) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, INFO ) IF( INFO.EQ.0 ) THEN IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT. $ ONE ) THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( ( ORDER.LT.0 ) .OR. ( ORDER.GT.N ) ) THEN INFO = -17 END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, 0, IDUM1, IDUM2, $ INFO ) END IF * * Check for unsupported features * IF( ISYM.NE.2 ) THEN INFO = -5 ELSE IF( IPACK.NE.0 ) THEN INFO = -12 ELSE IF( KL.GT.0 .AND. KL.LT.M-1 ) THEN INFO = -10 ELSE IF( KU.GT.0 .AND. KU.LT.N-1 ) THEN INFO = -11 ELSE IF( LLB.NE.0 .AND. LLB.NE.M-1 ) THEN INFO = -10 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL SLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) * IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL SSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL SLASET( 'A', NP, NQ, ZERO, ZERO, A, DESCA( LLD_ ) ) * * symmetric -- A = U D U' * CALL PSLAGSY( M, LLB, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, IINFO ) * RETURN * * End of PSLATMS * END scalapack-1.8.0/TESTING/EIG/pdgebdrv.f0000640000175000017500000004663010363532303017012 0ustar muammarmuammar SUBROUTINE PDGEBDRV( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PDGEBDRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from sub( A ), * Q, P returned by PDGEBRD: * * sub( A ) := Q * B * P'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of sub( A ) * as returned by PDGEBRD. On exit, the original distribu- * ted matrix sub( A ) is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local input) DOUBLE PRECISION array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local input) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= 2*NB*( MP + NQ + NB ) * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MP = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION EIGHT, ONE, ZERO PARAMETER ( EIGHT = 8.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IIA, IL, IPTP, IPTQ, $ IPV, IPW, IPWK, IOFF, IV, J, JB, JJA, JL, JV, $ K, MN, MP, MYCOL, MYROW, NB, NPCOL, NPROW, NQ DOUBLE PRECISION ADDBND, D1, D2, E1, E2 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, IGSUM2D, INFOG2L, $ PDLACPY, PDLARFB, PDLARFT, PDLASET, $ PDELGET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXG2P, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) IOFF = MOD( IA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) IPV = 1 IPW = IPV + MP*NB IPTP = IPW + NQ*NB IPTQ = IPTP + NB*NB IPWK = IPTQ + NB*NB * IV = 1 JV = 1 MN = MIN( M, N ) IL = MAX( ( (IA+MN-2) / NB )*NB + 1, IA ) JL = MAX( ( (JA+MN-2) / NB )*NB + 1, JA ) IAROW = INDXG2P( IL, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, IA+M-IL, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCW, NB, JA+N-JL, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * ADDBND = EIGHT * PDLAMCH( ICTXT, 'eps' ) * * When A is an upper bidiagonal form * IF( M.GE.N ) THEN * CALL DESCSET( DESCD, 1, JA+MN-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * DO 10 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PDELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, IA+J, 1, DESCE ) CALL PDELGET( 'Rowwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * DO 20 J = JL, JA+NB-IOFF, -NB JB = MIN( JA+N-J, NB ) I = IA + J - JA K = I - IA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PDLARFT( 'Forward', 'Columnwise', M-K+1, JB, A, I, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Lower', M-K+1, JB, A, I, J, DESCA, $ WORK( IPV ), IV, JV, DESCV ) CALL PDLASET( 'Upper', M-K+1, JB, ZERO, ONE, WORK( IPV ), $ IV, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', M-K, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PDLARFT( 'Forward', 'Rowwise', N-K, JB, A, I, J+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Upper', JB, N-K, A, I, J+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PDLASET( 'Lower', JB, N-K, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PDLASET( 'Upper', JB, N-K-1, ZERO, ZERO, A, I, J+2, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PDLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K+1, N-K+1, JB, WORK( IPV ), $ IV, JV, DESCV, WORK( IPTQ ), A, I, J, DESCA, $ WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-K+1, N-K, JB, WORK( IPW ), IV, JV+1, DESCW, $ WORK( IPTP ), A, I, J+1, DESCA, WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 20 CONTINUE * * Handle first block separately * JB = MIN( N, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PDLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IV, JV, DESCV ) CALL PDLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPV ), IV, JV, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PDLARFT( 'Forward', 'Rowwise', N-1, JB, A, IA, JA+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Upper', JB, N-1, A, IA, JA+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PDLASET( 'Lower', JB, N-1, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PDLASET( 'Upper', JB, N-2, ZERO, ZERO, A, IA, JA+2, $ DESCA ) * * Apply block Householder transformation from left. * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N, JB, WORK( IPV ), IV, JV, DESCV, $ WORK( IPTQ ), A, IA, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right. * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, $ N-1, JB, WORK( IPW ), IV, JV+1, DESCW, $ WORK( IPTP ), A, IA, JA+1, DESCA, WORK( IPWK ) ) * ELSE * CALL DESCSET( DESCD, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MN-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PDELGET( ' ', ' ', D2, D, IA+J, 1, DESCD ) CALL PDELGET( 'Rowwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PDELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * DO 40 I = IL, IA+NB-IOFF, -NB JB = MIN( IA+M-I, NB ) J = JA + I - IA K = J - JA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PDLARFT( 'Forward', 'Columnwise', M-K, JB, A, I+1, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Lower', M-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PDLASET( 'Upper', M-K, JB, ZERO, ONE, WORK( IPV ), $ IV+1, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', M-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PDLARFT( 'Forward', 'Rowwise', N-K+1, JB, A, I, J, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Upper', JB, N-K+1, A, I, J, DESCA, $ WORK( IPW ), IV, JV, DESCW ) CALL PDLASET( 'Lower', JB, N-K+1, ZERO, ONE, WORK( IPW ), $ IV, JV, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PDLASET( 'Upper', JB, N-K, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PDLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K, N-K+1, JB, WORK( IPV ), $ IV+1, JV, DESCV, WORK( IPTQ ), A, I+1, J, $ DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-K+1, N-K+1, JB, WORK( IPW ), IV, JV, DESCW, $ WORK( IPTP ), A, I, J, DESCA, WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 40 CONTINUE * * Handle first block separately * JB = MIN( M, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PDLARFT( 'Forward', 'Columnwise', M-1, JB, A, IA+1, JA, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Lower', M-1, JB, A, IA+1, JA, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PDLASET( 'Upper', M-1, JB, ZERO, ONE, WORK( IPV ), IV+1, $ JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', M-2, JB, ZERO, ZERO, A, IA+2, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PDLARFT( 'Forward', 'Rowwise', N, JB, A, IA, JA, DESCA, $ TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPW ), $ IV, JV, DESCW ) CALL PDLASET( 'Lower', JB, N, ZERO, ONE, WORK( IPW ), IV, JV, $ DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PDLASET( 'Upper', JB, N-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Apply block Householder transformation from left * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-1, N, JB, WORK( IPV ), IV+1, JV, DESCV, $ WORK( IPTQ ), A, IA+1, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, N, $ JB, WORK( IPW ), IV, JV, DESCW, WORK( IPTP ), $ A, IA, JA, DESCA, WORK( IPWK ) ) END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PDGEBDRV * END scalapack-1.8.0/TESTING/EIG/pzhrddriver.f0000640000175000017500000004544510363532303017563 0ustar muammarmuammar PROGRAM PZHRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PZHRDDRIVER is the main test program for the COMPLEX*16 * ScaLAPACK HRD (Hessenberg Reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 14 lines: * 'ScaLAPACK HRD input file' * 'PVM machine' * 'HRD.out' output file name (if any) * 6 device out * 2 number of problems sizes * 100 101 values of N * 2 1 values of ILO * 99 101 values of IHI * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER MEMSIZ, NTESTS, TOTMEM, ZPLXSZ COMPLEX*16 PADVAL PARAMETER ( TOTMEM = 2000000, ZPLXSZ = 16, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IHI, IHIP, IHLP, IHLQ, $ ILCOL, ILO, ILROW, INFO, INLQ, IMIDPAD, IPA, $ IPT, IPW, IPOSTPAD, IPREPAD, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LCMQ, LOFF, $ LWORK, MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, $ NPROCS, NOUT, NP, NPCOL, NPROW, NQ, WORKHRD, $ WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), NVHI( NTESTS ), NVLO( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINIT, BLACS_GRIDINFO, $ DESCINIT, IGSUM2D, BLACS_PINFO, PZFILLPAD, $ PZLAFCHK, PZGEHDRV, PZGEHRD, $ PZHRDINFO, PZMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC DOUBLE PRECISION PZLANGE EXTERNAL ILCM, INDXG2P, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZHRDINFO( OUTFILE, NOUT, NMAT, NVAL, NVLO, NVHI, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GOTO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) ILO = NVLO( J ) IHI = NVHI( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPT = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * IHIP = NUMROC( IHI, NB, MYROW, DESCA( RSRC_ ), NPROW ) LOFF = MOD( ILO-1, NB ) ILROW = INDXG2P( ILO, NB, MYROW, DESCA( RSRC_ ), NPROW ) ILCOL = INDXG2P( ILO, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) IHLP = NUMROC( IHI-ILO+LOFF+1, NB, MYROW, ILROW, NPROW ) INLQ = NUMROC( N-ILO+LOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWORK = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) WORKHRD = LWORK + IPOSTPAD WORKSIZ = WORKHRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IHLQ = NUMROC( IHI-ILO+LOFF+1, NB, MYCOL, ILCOL, $ NPCOL ) ITEMP = NB*MAX( IHLP+INLQ, IHLQ+MAX( IHIP, $ IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ) ) ) WORKSIZ = MAX( NB*NB + NB*IHLP + ITEMP, NB * NP ) + $ IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Hessenberg reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate A * CALL PZMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, 0, NP, 0, NQ, MYROW, MYCOL, $ NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), $ NQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKHRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKHRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce Hessenberg form * CALL PZGEHRD( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGEHRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEHRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEHRD', WORKHRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKHRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps) * CALL PZGEHDRV( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ) ) CALL PZLAFCHK( 'No', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGEHDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEHDRV', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEHDRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0D+0 ) $ THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * HRD requires 40/3 * N^3 floating point ops. (flops) * more precisely, * HRD requires 16/3*(IHI-ILO)^3+8*IHI*(IHI-ILO)^2 flops * NOPS = DBLE( IHI-ILO ) NOPS = NOPS * NOPS * $ ( 8.0D0*DBLE( IHI ) + (16.0D0/3.0D0)*NOPS ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, ILO, IHI, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, $ PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, ILO, IHI, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, $ PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PZHRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pdgsepsubtst.f0000640000175000017500000007077010363532303017746 0ustar muammarmuammar* * SUBROUTINE PDGSEPSUBTST( WKNOWN, IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, IA, JA, DESCA, WIN, WNEW, $ IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, LWORK1, IWORK, LIWORK, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IBTYPE, IL, IPOSTPAD, IPREPAD, IU, JA, $ LIWORK, LWORK, LWORK1, N, NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) DOUBLE PRECISION A( * ), B( * ), COPYA( * ), COPYB( * ), $ GAP( * ), WIN( * ), WNEW( * ), WORK( * ), $ Z( * ) * .. * * Purpose * ======= * * PDGSEPSUBTST calls PDSYGVX and then tests the output of * PDSYGVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < THRESH * |QT * Q - I| / eps * norm(A) < THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PDSYGVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PDGSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PDGSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PDSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) DOUBLE PRECISION array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PDSYGVX for a description of block cyclic layout. * The test matrix, which is then modified by PDSYGVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) DOUBLE PRECISION array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * B (local workspace) DOUBLE PRECISION array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PDSYGVX * * COPYB (local input) DOUBLE PRECISION array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PDGSEPCHK and PDSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as copmuted by this call to PDSYGVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PDSYGVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PDSYGVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / |A|*N*EPS * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0 ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEVX, SIZETMS, $ SIZETST, VALSIZE, VECSIZE DOUBLE PRECISION EPS, ERROR, MAXERROR, MAXVU, MINERROR, MINVL, $ NORMWIN, OLDVL, OLDVU, ORFAC, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGAMN2D, DGAMX2D, $ DLACPY, IGAMN2D, IGAMX2D, PDCHEKPAD, PDELSET, $ PDFILLPAD, PDGSEPCHK, PDLASIZEGSEP, $ PDLASIZESYEVX, PDSYGVX, PICHEKPAD, PIFILLPAD, $ SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PDLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 30 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 40 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 40 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * * * DSEED is not used in this call to PDLASIZESYEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PDLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL DLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL DLACPY( 'A', NP, NQ, COPYB, DESCA( LLD_ ), B( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PDFILLPAD( DESCA( CTXT_ ), NP, NQ, B, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0D+2 ) * CALL PDFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * * Make sure that PDSYGVX does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, MAXEIGS, 1 CALL PDELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0D+0 ) 50 CONTINUE 60 CONTINUE * ORFAC = -1.0D+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, $ JA, DESCA, B( 1+IPREPAD ), IA, JA, DESCA, VL, VU, $ IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), ORFAC, $ Z( 1+IPREPAD ), IA, JA, DESCA, WORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, $ IFAIL( 1+IPREPAD ), ICLUSTR( 1+IPREPAD ), $ GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-B', NP, NQ, B, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0D+2 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PDCHEKPAD( DESCZ( CTXT_ ), 'PDSYGVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PDSYGVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 70 I = 1, M WORK( I ) = WNEW( I+IPREPAD ) WORK( I+M ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, WORK, M, 1, $ 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ WORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 80 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( WORK( I )-WORK( M+ $ I ) ).GT.FIVE*EPS*ABS( WORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 80 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 90 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 100 NCLUSTERS = NCLUSTERS + 1 90 CONTINUE 100 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 110 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 110 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 120 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 120 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 150 * * Note that a couple key variables get redefined in PDGSEPCHK * as described by this table: * * PDGSEPTST name PDGSEPCHK name * ------------- ------------- * COPYA A * Z Q * B B * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the residual check * CALL PDFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * CALL PDGSEPCHK( IBTYPE, N, NZ, COPYA, IA, JA, DESCA, COPYB, $ IA, JA, DESCA, THRESH, Z( 1+IPREPAD ), IA, $ JA, DESCZ, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDGSEPCHK-WORK', SIZECHK, $ 1, WORK, SIZECHK, IPREPAD, IPOSTPAD, $ 4.3D+0 ) * IF( RES.NE.0 ) $ RESULT = 1 END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 140 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 130 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 130 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 140 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxSYGVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PDSYGVX returned INFO=', I7 ) 9998 FORMAT( 'PDSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PDGSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PDSYGVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PDSYGVX' ) 9981 FORMAT( 'NZ altered by PDSYGVX with JOBZ=N' ) * * End of PDGSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pdhrdinfo.f0000640000175000017500000003237410363532303017172 0ustar muammarmuammar SUBROUTINE PDHRDINFO( SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ NVHI( LDNVAL ), NVLO( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDHRDINFO get the needed startup information for the Hessenberg * reduction tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for * N, IHI & ILO. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of rows & columns in matrix). * * NVLO (global output) INTEGER array, dimension (LDNVAL) * The values of ILO. * * NVHI (global output) INTEGER array, dimension (LDNVAL) * The values of IHI. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, ILO and IHI. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * 3*LDNVAL+LDNBVAL+2*LDPVAL. Used to pack all input arrays * in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'HRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = * ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'N', LDNVAL GO TO 20 END IF * * Get values of N, ILO, IHI * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVLO( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVHI( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVLO, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVHI, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS -1 CALL IGEBS2D( ICTXT, 'All', ' ', 1, I, WORK, 1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction routine to Hessenberg form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision Hessenberg ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q H Q''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'HRD time : Time in seconds to compute HRD ' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for HRD ' // $ 'reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'ILO ', ( NVLO( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVLO( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'IHI ', ( NVHI( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVHI( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 3*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) * I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVLO, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVHI, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' ILLEGAL INPUT IN FILE ', 40A, '. ABORTING RUN.' ) 9997 FORMAT( ' NUMBER OF VALUES OF ', 5A, $ ' IS LESS THAN 1 OR GREATER ', 'THAN ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G14.7 ) * * End of PDHRDINFO * END scalapack-1.8.0/TESTING/EIG/pzrptseptst.f0000640000175000017500000000524310363532303017632 0ustar muammarmuammar* * PROGRAM PZRPTSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Repeat parallel Hermitian eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS, UPLO INTEGER CONTEXT, IAM, INFO, IPOSTPAD, IPREPAD, LDA, $ MATTYPE, N, NB, NPCOL, NPROCS, NPROW DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) DOUBLE PRECISION GAP( MAXN ), RWORK( LWORK ), WIN( MAXN ), $ WNEW( MAXN ) COMPLEX*16 A( MAXN*MAXN ), COPYA( MAXN*MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PZSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * These lines should be replaced by the output from pxSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351D-37 THRESH = .350000D+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PZSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, N, $ ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, WORK, $ LWORK-IPREPAD-IPOSTPAD, RWORK, $ LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PZRPTSEPTST * END scalapack-1.8.0/TESTING/EIG/pdgsepreq.f0000640000175000017500000002437010602576752017220 0ustar muammarmuammar* * SUBROUTINE PDGSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION MEM( MEMSIZE ) * .. * * Purpose * ======= * * PDGSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PDGSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) DOUBLE PRECISION ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION FIVE PARAMETER ( FIVE = 5.0D+0 ) INTEGER DBLESZ, INTGSZ PARAMETER ( DBLESZ = 8, INTGSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IBTYPE, IMIDPAD, INITCON, $ IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, LDA, LLWORK, MATSIZE, MATTYPE, MYCOL, $ MYROW, N, NB, NIBTYPES, NMATSIZES, NMATTYPES, $ NNODES, NP, NPCOL, NPCONFIGS, NPROW, NQ, $ NUPLOS, ORDER, PCONFIG, PTRA, PTRB, PTRCOPYA, $ PTRCOPYB, PTRGAP, PTRICLUS, PTRIFAIL, PTRIWRK, $ PTRW, PTRW2, PTRWORK, PTRZ, RES, SIZECHK, $ SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, $ SIZESUBTST, SIZESYEVX, SIZETMS, SIZETST, UPLO DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PDGSEPTST, PDLASIZEGSEP, PDSEPINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PDSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * Thresholds must be bigger for the generalized problem. * THRESH = THRESH*FIVE * DO 50 MATSIZE = 1, NMATSIZES * DO 40 PCONFIG = 1, NPCONFIGS * DO 30 MATTYPE = 1, NMATTYPES * DO 20 UPLO = 1, NUPLOS IF( LSAME( SUBTESTS, 'Y' ) ) THEN NIBTYPES = 3 ELSE NIBTYPES = 1 END IF DO 10 IBTYPE = 1, NIBTYPES * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, $ NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, $ MYROW, MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PDLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, $ ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, $ ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYB = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRB = PTRCOPYB + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRB + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRGAP = PTRW2 + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + NPROW*NPCOL + IPREPAD + $ IPOSTPAD PTRICLUS = PTRIFAIL + $ ICEIL( N+IPREPAD+IPOSTPAD, $ DBLESZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, DBLESZ / INTGSZ ) PTRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, DBLESZ / INTGSZ ) LLWORK = MEMSIZE - PTRWORK - IPOSTPAD - $ IPREPAD + 1 NTESTS = NTESTS + 1 IF( LLWORK.LT.SIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PDGSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), $ IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, $ MEM( PTRA ), $ MEM( PTRCOPYA ), $ MEM( PTRB ), $ MEM( PTRCOPYB ), $ MEM( PTRZ ), LDA, $ MEM( PTRW ), MEM( PTRW2 ), $ MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ LLWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * ) $ ' pDGSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE 50 CONTINUE END IF * * RETURN * * End of PDDGSEPREQ * END scalapack-1.8.0/TESTING/EIG/pcbrdinfo.f0000640000175000017500000003217610363532303017163 0ustar muammarmuammar SUBROUTINE PCBRDINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBVAL, LDNVAL, LDPVAL, $ LDQVAL, NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBVAL( LDNBVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBRDINFO get needed startup information for the bidiagonal * reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M & N. * * MVAL (global output) INTEGER array, dimension (LDMVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M. LDMVAL >= NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDMVAL+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, IGEBR2D, IGEBS2D, $ SCOPY * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'BRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get values of M, N * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Bidiagonal reduction' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex single precision bidiagonal ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q B P''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ ' than THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'BRD time : Time in seconds to reduce the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'the bidiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9999 ) ' ' WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 2*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9997 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G12.5 ) * * End of PCBRDINFO * END scalapack-1.8.0/TESTING/EIG/pdsqpsubtst.f0000640000175000017500000004076310363532303017612 0ustar muammarmuammar* * SUBROUTINE PDSQPSUBTST( WKNOWN, JOBZ, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, IA, JA, DESCA, WIN, WNEW, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LWORK, LWORK1, N, $ NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), COPYA( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSQPSUBTST calls PDSYEV and then tests the output of * PDSYEV * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PDSYEV when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PDSQPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. * * A (local workspace) DOUBLE PRECISION array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PDSYEV for a description of block cyclic layout. * The test matrix, which is then modified by PDSYEV * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) DOUBLE PRECISION array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PDSEPCHK and PDSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as computed by this call to PDSYEV. * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PDSYEV * * RESULT (global output) INTEGER * The result of this call to PDSYEV * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION FIVE, NEGONE, PADVAL, ZERO PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, EIGS, MINSIZE, MQ, MYCOL, MYROW, $ NP, NPCOL, NPROW, NQ, RESAQ, RESQTQ, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST,SIZESYEVD, ISIZESYEVD DOUBLE PRECISION EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, $ NORMWIN, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), ITMP( 2 ), $ IWORK( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, NUMROC, PDLAMCH, PDLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGAMN2D, DGAMX2D, $ DLACPY, IGAMN2D, IGAMX2D, PDCHEKPAD, PDELSET, $ PDFILLPAD, PDLASIZESQP, PDSEPCHK, PDSEPQTQ, $ PDSYEV, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PDLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 30 CONTINUE * DO 40 I = 1, 2 IWORK( I ) = 0 40 CONTINUE * IF( LSAME( JOBZ, 'N' ) ) THEN EIGS = 0 ELSE EIGS = N END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( EIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Find the amount of workspace needed with or without eigenvectors. * CALL PDLASIZESYEV( JOBZ, N, DESCA, MINSIZE ) * CALL DLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PDFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * * Make sure that PDSYEV does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, EIGS, 1 CALL PDELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0D+0 ) 50 CONTINUE 60 CONTINUE * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PDSYEV( JOBZ, UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEV-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PDCHEKPAD( DESCZ( CTXT_ ), 'PDSYEV-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEV-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSYEV-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )INFO IF( INFO.EQ.(N+1) ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 END IF ELSE IF( INFO.EQ.14 .AND. LWORK1.GE.MINSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * IF( RESULT.EQ.0 .OR. INFO.GT.N ) THEN * * Make sure that different processes return the same eigenvalues. * This is a more exhaustive check that provided by PDSYEV. * DO 70 I = 1, N WORK( I ) = WNEW( I+IPREPAD ) WORK( I+N ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', N, 1, WORK, N, 1, $ 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', N, 1, $ WORK( 1+N ), N, 1, 1, -1, -1, 0 ) * DO 80 I = 1, N * IF( ABS( WORK( I )-WORK( N+I ) ).GT.ZERO ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 GO TO 90 END IF 80 CONTINUE 90 CONTINUE END IF * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PDLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PDSEPCHK * as described by this table: * * PDSEPTST name PDSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the |AQ - QE| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * RESAQ = 0 * CALL PDSEPCHK( N, N, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RESAQ ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RESAQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9993 ) END IF * * Perform the |QTQ - I| test * CALL PDFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * RESQTQ = 0 * CALL PDSEPQTQ( N, N, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1 ), IWORK( 1 ), WORK( 1 ), $ WORK( IPREPAD+1 ), SIZEQTQ, QTQNRM, INFO, $ RESQTQ ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RESQTQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9992 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0 * DO 140 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PDSYEV returned INFO=', I7 ) 9998 FORMAT( 'PDSEPQTQ in PDSQPSUBTST returned INFO=', I7 ) 9997 FORMAT( 'PDSQPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PDSYEV returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'Different processes return different eigenvalues' ) 9994 FORMAT( 'Heterogeneity detected by PDSYEV' ) 9993 FORMAT( 'PDSYEV failed the |AQ -QE| test' ) 9992 FORMAT( 'PDSYEV failed the |QTQ -I| test' ) * * End of PDSQPSUBTST * END scalapack-1.8.0/TESTING/EIG/pcnepfchk.f0000640000175000017500000002616710363532303017161 0ustar muammarmuammar SUBROUTINE PCNEPFCHK( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, $ DESCZ, ANORM, FRESID, WORK ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. INTEGER IA, IASEED, IZ, JA, JZ, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) COMPLEX A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCNEPFCHK computes the residual * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1) * and ||.|| stands for the infinity norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The order of sub( A ) and sub( Z ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * Z (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this * array contains the local pieces of the N-by-N distributed * matrix sub( Z ). * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX array, dimension (LWORK). * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where * * IROFFA = MOD( IA-1, MB_A ), * ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * WORK is used to store a block of rows and a block of columns * of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * Further Details * =============== * * Contributed by Mark Fahey, March, 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF, $ IW, J, JB, JJA, JN, LDA, LDW, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL EPS * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCGEMM, $ PCLACPY, PCLASET, PCMATGEN, CMATADD * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PCLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF LDW = MAX( 1, NP ) * * First compute H <- H * Z**T * CALL DESCSET( DESCW, DESCA( MB_ ), N, DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, DESCA( MB_ ) ) * DO 10 I = IA, IA + N - 1, DESCA( MB_ ) IB = MIN( IA+N-I, DESCA( MB_ ) ) * CALL PCLACPY( 'All', IB, N, A, I, JA, DESCA, WORK, 1, 1, $ DESCW ) CALL PCGEMM( 'No transpose', 'Cong Tran', IB, N, N, ONE, WORK, $ 1, 1, DESCW, Z, IZ, JZ, DESCZ, ZERO, A, I, JA, $ DESCA ) * DESCW( RSRC_ ) = MOD( DESCW( RSRC_ )+1, NPROW ) * 10 CONTINUE * * Then compute H <- Z * H = Z * H0 * Z**T * CALL DESCSET( DESCW, N, DESCA( NB_ ), DESCA( MB_ ), DESCA( NB_ ), $ IAROW, IACOL, ICTXT, LDW ) * DO 20 J = JA, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * CALL PCLACPY( 'All', N, JB, A, IA, J, DESCA, WORK, 1, 1, $ DESCW ) CALL PCGEMM( 'No transpose', 'No transpose', N, JB, N, ONE, Z, $ IZ, JZ, DESCZ, WORK, 1, 1, DESCW, ZERO, A, IA, J, $ DESCA ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Compute H - H0 * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA-1 )*LDA IW = 1 JB = JN - JA + 1 DESCW( CSRC_ ) = IACOL * * Handle first block of columns separately * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PCMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ NP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL PCLASET( 'Lower', MAX( 0, N-2 ), JB, ZERO, ZERO, WORK, $ MIN( IW+2, N ), 1, DESCW ) CALL CMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * DO 30 J = JN + 1, JA + N - 1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN CALL PCMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PCLASET( 'Lower', MAX( 0, N-IW-1 ), JB, ZERO, ZERO, $ WORK, MIN( N, IW+2 ), 1, DESCW ) CALL CMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IW = IW + DESCA( MB_ ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) 30 CONTINUE * * Calculate factor residual * FRESID = PCLANGE( 'I', N, N, A, IA, JA, DESCA, WORK ) / $ ( N*EPS*ANORM ) * RETURN * * End PCNEPFCHK * END scalapack-1.8.0/TESTING/EIG/pznepinfo.f0000640000175000017500000002776010363532303017230 0ustar muammarmuammar SUBROUTINE PZNEPINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZNEPINFO gets needed startup information for PZHSEQR drivers * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Further Details * =============== * * Implemented by: M. Fahey, June 2000 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a complex * single precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'NEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK QSQ^H by Schur Decomposition.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'complex double precision Schur decomposition.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = ||H-QSQ^H|| / ' // $ '(||H|| * eps * N )' WRITE( NOUT, FMT = 9999 ) $ ' Orthogonality residual = ||I - Q^HQ|| / ' // '( eps * N )' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PZNEPINFO * END scalapack-1.8.0/TESTING/EIG/psgsepdriver.f0000640000175000017500000002334510363532303017730 0ustar muammarmuammar* * PROGRAM PSGSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel REAL symmetric eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and REALSZ * indicate the length in bytes on the given platform for an integer * and a single precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * REAL is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * REAL words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PSSYGVX, the expert driver for the parallel * symmetric eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, REALSZ, NIN PARAMETER ( TOTMEM = 2000000, REALSZ = 4, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / REALSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) REAL MEM( MEMSIZ ) * .. * .. External Functions .. REAL SLAMCH EXTERNAL SLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PSGSEPREQ, PSLACHKIEEE, PSLASNBT * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PSLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PSLACHKIEEE( ISIEEE, SLAMCH( 'O' ), SLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK symmetric Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'generalized ' // 'symmetric eigenvalue routine: PSSYGVX.' WRITE( NOUT, FMT = 9999 )'A scaled residual check, ' // $ 'will be computed' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pSGSEPtst.f).' WRITE( NOUT, FMT = 9999 ) $ 'IBTYPE : Generalized eigenproblem type' // $ ' (see pSSYGVx.f)' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pSGSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : The scaled residual' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PSGSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP IBTYPE SUB WALL CPU ', $ ' CHK CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- ------ --- -------- --------', $ ' --------- -----' ) * * End of PSGSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pzgsepchk.f0000640000175000017500000003103010363532303017177 0ustar muammarmuammar* * SUBROUTINE PZGSEPCHK( IBTYPE, MS, NV, A, IA, JA, DESCA, B, IB, JB, $ DESCB, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, W, WORK, LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, IBTYPE, IC, IQ, JA, JB, JC, JQ, LWORK, $ MS, NV, RESULT DOUBLE PRECISION THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCC( * ), DESCQ( * ) DOUBLE PRECISION W( * ), WORK( * ) COMPLEX*16 A( * ), B( * ), C( * ), Q( * ) * .. * * * Purpose * ======= * * PZGSEPCHK checks a decomposition of the form * * A Q = B Q D or * A B Q = Q D or * B A Q = Q D * * where A is a symmetric matrix, B is * symmetric positive definite, Q is orthogonal, and D is diagonal. * * One of the following test ratios is computed: * * IBTYPE = 1: TSTNRM = | A Q - B Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 2: TSTNRM = | A B Q - Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 3: TSTNRM = | B A Q - Q D | / ( |A| |Q| n ulp ) * * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, B and Q * MQ = number of local columns in A * NQ = number of local columns in B and Q * * IBTYPE (input) INTEGER * The form of the symmetric generalized eigenproblem. * = 1: A*Q = (lambda)*B*Q * = 2: A*B*Q = (lambda)*Q * = 3: B*A*Q = (lambda)*Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, B, C and Q * Also, the number of columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) REAL pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer to an * array in local memory of dimension (LLD_B, LOCc(JB+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix B * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix B. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), NQ) * * Contains the eigenvectors as computed by PSSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * * RESULT (global output) INTEGER * 0 if the test passes * 1 if the test fails * * .. Local Scalars .. * INTEGER I, INFO, MYCOL, MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION ANORM, ULP * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) COMPLEX*16 CONE, CNEGONE, CZERO PARAMETER ( CONE = 1.0D+0, CNEGONE = -1.0D+0, $ CZERO = 0.0D+0 ) * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION DLAMCH, PZLANGE EXTERNAL NUMROC, DLAMCH, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, PZDSCAL, $ PZGEMM * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( MS, 1, MS, 2, IB, JB, DESCB, 11, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 16, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IB, JB, DESCB, 20, INFO ) * IF( INFO.EQ.0 ) THEN * NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -14 ELSE IF( JQ.NE.1 ) THEN INFO = -15 ELSE IF( IA.NE.1 ) THEN INFO = -5 ELSE IF( JA.NE.1 ) THEN INFO = -6 ELSE IF( IB.NE.1 ) THEN INFO = -9 ELSE IF( JB.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.NQ ) THEN INFO = -23 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZGSEPCHK', -INFO ) RETURN END IF * RESULT = 0 ULP = DLAMCH( 'Epsilon' ) * * Compute product of Max-norms of A and Q. * ANORM = PZLANGE( 'M', MS, MS, A, IA, JA, DESCA, WORK )* $ PZLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) IF( ANORM.EQ.ZERO ) $ ANORM = ONE * IF( IBTYPE.EQ.1 ) THEN * * Norm of AQ - BQD * * C = AQ * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 10 I = 1, NV CALL PZDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 10 CONTINUE * * C = C - BQ (i.e. AQ-BQD) * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CNEGONE, C, IC, JC, DESCC ) * TSTNRM = ( PZLANGE( 'M', MS, NV, C, IC, JC, DESCC, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * * ELSE IF( IBTYPE.EQ.2 ) THEN * * Norm of ABQ - QD * * * C = BQ * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 20 I = 1, NV CALL PZDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 20 CONTINUE * * Q = AC - Q * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PZLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * Norm of BAQ - QD * * * C = AQ * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 30 I = 1, NV CALL PZDSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 30 CONTINUE * * Q = BC - Q * CALL PZGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PZLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * END IF * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0D0 ) ) THEN RESULT = 1 END IF RETURN * * End of PZGSEPCHK * END scalapack-1.8.0/TESTING/EIG/pstrddriver.f0000640000175000017500000004664610363532303017574 0ustar muammarmuammar PROGRAM PSTRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * Purpose * ======== * * PSTRDDRIVER is the main test program for the REAL * SCALAPACK TRD (symmetric tridiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK TRD computation input file' * 'PVM machine' * 'TRD.out' output file name * 6 device out * 'L' define Lower or Upper * 3 number of problems sizes * 5 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER REALSZ, TOTMEM, MEMSIZ, NTESTS REAL PADVAL PARAMETER ( REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LWORK, MYCOL, $ MYROW, N, NB, NDIAG, NGRIDS, NMAT, NNB, NOFFD, $ NOUT, NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ, $ WORKTRD REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSLAFCHK, PSMATGEN, PSSYTDRV, $ PSSYTRD, PSTRDINFO, PSTTRDTESTER, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PSLANSY EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PSLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )STOP * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSTRDINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = MAX( NB*( NP+1 ), 3*NB ) WORKTRD = LWORK + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ITEMP, 2*( NB+NP )*NB ) WORKSIZ = MAX( LWORK, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PSMATGEN( ICTXT, 'Symm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANSY', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PSSYTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSSYTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTRD', WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PSSYTDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) CALL PSLAFCHK( 'Symm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSSYTDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSSYTDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0E+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 4/3 N^3 floating point operations * NOPS = DBLE( N ) * NOPS = ( 4.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', UPLO, N, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', UPLO, N, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * CALL PSTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, NMAT, $ MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB P Q TRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ---- ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, A4, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PSTRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pzsepreq.f0000640000175000017500000002326210363532303017062 0ustar muammarmuammar* * SUBROUTINE PZSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX*16 MEM( MEMSIZE ) * .. * * Purpose * ======= * * PZSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PZSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) COMPLEX*16 ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ZPLXSZ, INTGSZ PARAMETER ( ZPLXSZ = 16, INTGSZ = 4 ) INTEGER DBLESZ PARAMETER ( DBLESZ = 8 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IMIDPAD, INITCON, IPOSTPAD, $ IPREPAD, ISIZEHEEVX, ISIZESUBTST, ISIZETST, $ LDA, LLRWORK, MATSIZE, MATTYPE, MYCOL, MYROW, $ N, NB, NMATSIZES, NMATTYPES, NNODES, NP, NPCOL, $ NPCONFIGS, NPROW, NQ, NUPLOS, ORDER, PCONFIG, $ PTRA, PTRCOPYA, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRRWORK, PTRW, PTRW2, PTRWORK, PTRZ, $ RES, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, UPLO, SIZEHEEVD, RSIZEHEEVD, $ ISIZEHEEVD DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PDSEPINFO, PZLASIZESEP, PZSEPTST * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PDSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * DO 40 MATSIZE = 1, NMATSIZES * DO 30 PCONFIG = 1, NPCONFIGS * DO 20 MATTYPE = 1, NMATTYPES * DO 10 UPLO = 1, NUPLOS * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, $ MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, $ RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, $ ISIZEHEEVD, $ SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, ZPLXSZ / DBLESZ ) PTRWORK = PTRW2 + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, ZPLXSZ / DBLESZ ) PTRGAP = PTRWORK + SIZETST + IPREPAD + IPOSTPAD PTRIFAIL = PTRGAP + ICEIL( NPROW*NPCOL+IPREPAD+ $ IPOSTPAD, ZPLXSZ / DBLESZ ) PTRICLUS = PTRIFAIL + ICEIL( N+IPREPAD+IPOSTPAD, $ ZPLXSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, ZPLXSZ / INTGSZ ) PTRRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, ZPLXSZ / INTGSZ ) LLRWORK = ( MEMSIZE-PTRRWORK+1 )*ZPLXSZ / DBLESZ C LLRWORK = ( MEMSIZE-PTRRWORK-IPREPAD-IPOSTPAD+1) C $ *ZPLXSZ / DBLESZ NTESTS = NTESTS + 1 IF( LLRWORK.LT.RSIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PZSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), SUBTESTS, $ THRESH, N, ABSTOL, ISEED, $ MEM( PTRA ), MEM( PTRCOPYA ), $ MEM( PTRZ ), LDA, MEM( PTRW ), $ MEM( PTRW2 ), MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ SIZETST, MEM( PTRRWORK ), $ LLRWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT=* )' PZSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE END IF * * RETURN * * End of PZDSEPREQ * END scalapack-1.8.0/TESTING/EIG/pdbrdinfo.f0000640000175000017500000003217310363532303017161 0ustar muammarmuammar SUBROUTINE PDBRDINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBVAL, LDNVAL, LDPVAL, $ LDQVAL, NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBVAL( LDNBVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBRDINFO get needed startup information for the bidiagonal * reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M & N. * * MVAL (global output) INTEGER array, dimension (LDMVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M. LDMVAL >= NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDMVAL+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, IGEBR2D, IGEBS2D, $ SCOPY * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'BRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get values of M, N * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Bidiagonal reduction' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real double precision bidiagonal ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q B P''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ ' than THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'BRD time : Time in seconds to reduce the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'the bidiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9999 ) ' ' WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 2*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9997 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G12.5 ) * * End of PDBRDINFO * END scalapack-1.8.0/TESTING/EIG/pssepinfo.f0000640000175000017500000003051510363532303017216 0ustar muammarmuammar* * SUBROUTINE PSSEPINFO( CONTEXT, IAM, NIN, NOUT, MAXSETSIZE, $ NMATSIZES, MATSIZES, NUPLOS, UPLOS, $ NPCONFIGS, NPROWS, NPCOLS, NBS, NMATTYPES, $ MATTYPES, MAXTYPE, SUBTESTS, THRESH, ORDER, $ ABSTOL, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PSSEPINFO reads the input test data file (INFILE), copies the * information therein to all processes and returns this information * in the corresponding parameters. * * Arguments * ========= * * CONTEXT (global input) INTEGER * BLACS Context * * IAM (local input) INTEGER * process number. * IAM.EQ.0 on the proceesor that performs I/O * * NIN (global input) INTEGER * The unit number of the input file. * * NOUT (global output) INTEGER * The unit number for output file. * if NOUT = 6, ouput to screen, * if NOUT = 0, output to stderr * Only defined for process 0. * * MAXSETSIZE (global output) INTEGER * Maximum set size. Size of the following arrays: * MATSIZES, MATTYPES, NBS, NPCOLS, NPROWS * * NMATSIZES (global output) INTEGER * Number of matrix sizes to test * * MATSIZES (global output) INTEGER array dimension MAXSETSIZE * Matrix sizes to test * * NUPLOS (global output) INTEGER * Number of UPLO values to test * * UPLOS (global output) CHARACTER*1 array dimension 2 * Values of UPLO to test * * NPCONFIGS (global output) INTEGER * Number of process configuratins (NPROW, NPCOL, NB) * * NPROWS (global output) INTEGER array dimension MAXSETSIZE * Values of NPROW to test * * NPCOLS (global output) INTEGER array dimension MAXSETSIZE * Values of NPCOL to test * * NBS (global output) INTEGER array dimension MAXSETSIZE * Values of NB to test * * NMATTYPES (global output) INTEGER * Number of matrix types to test * * MATTYPES (global output) INTEGER array dimension MAXSETSIZE * Matrix types to test * Refer to PSSEPTST for a complete description of the * supported matrix types. * * MAXTYPE (global input) INTEGER * Maximum allowed matrix type * * SUBTESTS (global output) CHARACTER * 'N' = Do not perform subtests * 'Y' = Perfrom subtests * * * THRESH (global output) @(tupc) * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * ( THRESH is set to 1/10 of the value defined in the .dat * file when NOUT = 13. THRESH is set to 1/20 of the value * defined in the .dat file when NOUT = 14. This allows us * to specify more stringent criteria for our internal testing ) * * ORDER (global output) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global output) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PSSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * If ( ABSTOL .EQ. 0 in SEP.dat, it is set to * 2.0 * PSLAMCH( 'u' ) in this routine. * * INFO (global output) INTEGER * 0 = normal return * -1 = end of file * -2 = incorrrect data specification * * .. Scalar Arguments .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, INFO, MAXSETSIZE, MAXTYPE, NIN, $ NMATSIZES, NMATTYPES, NOUT, NPCONFIGS, NUPLOS, $ ORDER REAL ABSTOL, THRESH * .. * .. Array Arguments .. CHARACTER UPLOS( 2 ) INTEGER MATSIZES( MAXSETSIZE ), MATTYPES( MAXSETSIZE ), $ NBS( MAXSETSIZE ), NPCOLS( MAXSETSIZE ), $ NPROWS( MAXSETSIZE ) * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL TWO, TEN, TWENTY PARAMETER ( TWO = 2.0E0, TEN = 10.0E0, TWENTY = 20.0E0 ) * .. * .. Local Scalars .. CHARACTER*80 TESTSUMMRY INTEGER I, ISUBTESTS * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * * .. External Subroutines .. EXTERNAL IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * * .. Local Arrays .. INTEGER IUPLOS( 2 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = 9997 )TESTSUMMRY TESTSUMMRY = ' ' READ( NIN, FMT = 9997 )TESTSUMMRY WRITE( NOUT, FMT = 9997 )TESTSUMMRY END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATSIZES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATSIZES, 1, 0, 0 ) END IF IF( NMATSIZES.EQ.-1 ) THEN INFO = -1 GO TO 70 END IF IF( NMATSIZES.LT.1 .OR. NMATSIZES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Matrix size', NMATSIZES, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MATSIZES( I ), I = 1, NMATSIZES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MATSIZES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATSIZES, MATSIZES, 1, $ 0, 0 ) END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NUPLOS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NUPLOS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NUPLOS, 1, 0, 0 ) END IF IF( NUPLOS.LT.1 .OR. NUPLOS.GT.2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# of UPLOs', NUPLOS, 1, 2 END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( UPLOS( I ), I = 1, NUPLOS ) DO 10 I = 1, NUPLOS IF( LSAME( UPLOS( I ), 'L' ) ) THEN IUPLOS( I ) = 1 ELSE IUPLOS( I ) = 2 END IF 10 CONTINUE CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NUPLOS, IUPLOS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NUPLOS, IUPLOS, 1, 0, 0 ) END IF DO 20 I = 1, NUPLOS IF( IUPLOS( I ).EQ.1 ) THEN UPLOS( I ) = 'L' ELSE UPLOS( I ) = 'U' END IF 20 CONTINUE * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NPCONFIGS CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NPCONFIGS, 1, 0, 0 ) END IF IF( NPCONFIGS.LT.1 .OR. NPCONFIGS.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'# proc configs', NPCONFIGS, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPROWS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPROWS, 1, 0, $ 0 ) END IF DO 30 I = 1, NPCONFIGS IF( NPROWS( I ).LE.0 ) $ INFO = -2 30 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NPROW' END IF GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NPCOLS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NPCOLS, 1, 0, $ 0 ) END IF DO 40 I = 1, NPCONFIGS IF( NPCOLS( I ).LE.0 ) $ INFO = -2 40 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NPCOL' END IF GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( NBS( I ), I = 1, NPCONFIGS ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NPCONFIGS, NBS, 1, 0, 0 ) END IF DO 50 I = 1, NPCONFIGS IF( NBS( I ).LE.0 ) $ INFO = -2 50 CONTINUE IF( INFO.EQ.-2 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9996 )' NB' END IF GO TO 70 END IF * * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )NMATTYPES CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, NMATTYPES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, NMATTYPES, 1, 0, 0 ) END IF IF( NMATTYPES.LT.1 .OR. NMATTYPES.GT.MAXSETSIZE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'matrix types', NMATTYPES, 1, $ MAXSETSIZE END IF INFO = -2 GO TO 70 END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )( MATTYPES( I ), I = 1, NMATTYPES ) CALL IGEBS2D( CONTEXT, 'All', ' ', 1, NMATTYPES, MATTYPES, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, NMATTYPES, MATTYPES, 1, $ 0, 0 ) END IF * DO 60 I = 1, NMATTYPES IF( MATTYPES( I ).LT.1 .OR. MATTYPES( I ).GT.MAXTYPE ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'matrix type', MATTYPES( I ), $ 1, MAXTYPE END IF MATTYPES( I ) = 1 END IF 60 CONTINUE * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )SUBTESTS IF( LSAME( SUBTESTS, 'Y' ) ) THEN ISUBTESTS = 2 ELSE ISUBTESTS = 1 END IF CALL IGEBS2D( CONTEXT, 'All', ' ', 1, 1, ISUBTESTS, 1 ) ELSE CALL IGEBR2D( CONTEXT, 'All', ' ', 1, 1, ISUBTESTS, 1, 0, 0 ) END IF IF( ISUBTESTS.EQ.2 ) THEN SUBTESTS = 'Y' ELSE SUBTESTS = 'N' END IF * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )THRESH IF( NOUT.EQ.13 ) $ THRESH = THRESH / TEN IF( NOUT.EQ.14 ) $ THRESH = THRESH / TWENTY CALL SGEBS2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1 ) ELSE CALL SGEBR2D( CONTEXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) END IF * ORDER = 0 * IF( IAM.EQ.0 ) THEN READ( NIN, FMT = * )ABSTOL CALL SGEBS2D( CONTEXT, 'All', ' ', 1, 1, ABSTOL, 1 ) ELSE CALL SGEBR2D( CONTEXT, 'All', ' ', 1, 1, ABSTOL, 1, 0, 0 ) END IF IF( ABSTOL.LT.0 ) $ ABSTOL = TWO*PSLAMCH( CONTEXT, 'U' ) * INFO = 0 * 70 CONTINUE RETURN * 9999 FORMAT( A20, ' is:', I5, ' must be between:', I5, ' and', I5 ) 9998 FORMAT( A20, ' is:', I5, ' must be:', I5, ' or', I5 ) 9997 FORMAT( A ) 9996 FORMAT( A20, ' must be positive' ) * * End of PSSEPINFO * END scalapack-1.8.0/TESTING/EIG/pdbrddriver.f0000640000175000017500000004743110363532303017524 0ustar muammarmuammar PROGRAM PDBRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PDBRDDRIVER is the main test program for the DOUBLE PRECISION * ScaLAPACK BRD (bidiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK BRD computation input file' * 'PVM machine' * 'BRD.out' output file name * 6 device out * 3 number of problems sizes * 16 20 18 values of M * 16 18 20 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPTP, IPTQ, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LWORK, M, MAXMN, $ MINMN, MNP, MNQ, MP, MYCOL, MYROW, N, NB, $ NDIAG, NGRIDS, NMAT, NNB, NOFFD, NOUT, NPCOL, $ NPROCS, NPROW, NQ, WORKBRD, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ MVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDBRDINFO, PDFILLPAD, PDLAFCHK, $ PDMATGEN, PDGEBDRV, PDGEBRD, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLANGE EXTERNAL ICEIL, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDBRDINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * * Go to bottom of loop if this case doesn't use my process * DO 20 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), NB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IF( M.GE.N ) THEN NDIAG = MNQ NOFFD = MNP ELSE NDIAG = MNP NOFFD = NUMROC( MIN( M, N )-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPTQ = IPE + NOFFD + IPOSTPAD + IPREPAD IPTP = IPTQ + MNQ + IPOSTPAD + IPREPAD IPW = IPTP + MNP + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = NB*( MP+NQ+1 ) + NQ WORKBRD = LWORK + IPOSTPAD WORKSIZ = WORKBRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN WORKSIZ = MAX( LWORK, 2*NB*( MP+NQ+NB ) ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Bidiagonal reduction', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PDMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, MNQ, 1, MEM( IPTQ-IPREPAD ), $ MNQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, MNP, 1, MEM( IPTP-IPREPAD ), $ MNP, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKBRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKBRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to bidiagonal form * CALL PDGEBRD( M, N, MEM( IPA ), 1, 1, DESCA, MEM( IPD ), $ MEM( IPE ), MEM( IPTQ ), MEM( IPTP ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGEBRD', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBRD', MNQ, 1, $ MEM( IPTQ-IPREPAD ), MNQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBRD', MNP, 1, $ MEM( IPTP-IPREPAD ), MNP, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBRD', WORKBRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKBRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A-Q*B*P|| / (||A|| * N * eps) * CALL PDGEBDRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPTQ ), $ MEM( IPTP ), MEM( IPW ), IERR( 1 ) ) CALL PDLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGEBDRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEBDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0D+0 $ .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID * KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * ) $ 'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * BRD requires 8/3 N^3 floating point operations * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) NOPS = 4.0D+0 * DBLE( MINMN ) * DBLE( MINMN ) * $ ( DBLE( MAXMN ) - DBLE( MINMN ) / 3.0D+0 ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB P Q BRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*B*P|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PDBRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pchetdrv.f0000640000175000017500000004147010363532303017031 0ustar muammarmuammar SUBROUTINE PCHETDRV( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCHETDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from Q, the * Hermitian tridiagonal matrix T (or D and E), and TAU, which were * computed by PCHETRD: sub( A ) := Q * T * Q'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of sub( A ). On entry, * if UPLO='U', the diagonal and first superdiagonal of sub( A ) * have the corresponding elements of the tridiagonal matrix T, * and the elements above the first superdiagonal, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO='L', the diagonal and * first subdiagonal of sub( A ) have the corresponding elements * of the tridiagonal matrix T, and the elements below the first * subdiagonal, with the array TAU, represent the unitary * matrix Q as a product of elementary reflectors, and the * strictly upper triangular part of sub( A ) is not referenced. * On exit, if UPLO = 'U', the upper triangular part of the * distributed Hermitian matrix sub( A ) is recovered. * If UPLO='L', the lower triangular part of the distributed * Hermitian matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local input) COMPLEX, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= 2 * NB *( NB + NP ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL REIGHT, RONE, RZERO PARAMETER ( REIGHT = 8.0E+0, RONE = 1.0E+0, $ RZERO = 0.0E+0 ) COMPLEX HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER I, IACOL, IAROW, ICTXT, II, IPT, IPV, IPX, $ IPY, J, JB, JJ, JL, K, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW REAL ADDBND, D2, E2 COMPLEX D1, E1 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCT( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, IGSUM2D, $ PCELGET, PCGEMM, PCHEMM, $ PCHER2K, PCLACPY, PCLARFT, $ PCLASET, PCTRMM, PSELGET * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MAX, MIN, MOD * .. * .. Executable statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) UPPER = LSAME( UPLO, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = NB * NB + IPT IPX = NB * NP + IPV IPY = NB * NP + IPX * CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * ADDBND = REIGHT * PSLAMCH( ICTXT, 'eps' ) * IF( UPPER ) THEN * CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 10 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PCELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J+1, DESCE ) CALL PCELGET( 'Columnwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * * Compute the upper triangle of sub( A ). * CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * DO 20 K = 0, N-1, NB JB = MIN( NB, N-K ) I = IA + K J = JA + K * * Compute the lower triangular matrix T. * CALL PCLARFT( 'Backward', 'Columnwise', K+JB-1, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PCLACPY( 'All', K+JB-1, JB, A, IA, J, DESCA, $ WORK( IPV ), 1, 1, DESCV ) * IF( K.GT.0 ) THEN CALL PCLASET( 'Lower', JB+1, JB, ZERO, ONE, WORK( IPV ), $ K, 1, DESCV ) ELSE CALL PCLASET( 'Lower', JB, JB-1, ZERO, ONE, WORK( IPV ), $ 1, 2, DESCV ) CALL PCLASET( 'Ge', JB, 1, ZERO, ZERO, WORK( IPV ), 1, $ 1, DESCV ) END IF * * Zero out the strict upper triangular part of A. * IF( K.GT.0 ) THEN CALL PCLASET( 'Ge', K-1, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PCLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, A, I-1, $ J+1, DESCA ) ELSE IF( JB.GT.1 ) THEN CALL PCLASET( 'Upper', JB-2, JB-2, ZERO, ZERO, A, IA, $ J+2, DESCA ) END IF * * (1) X := A * V * T' * CALL PCHEMM( 'Left', 'Upper', K+JB, JB, ONE, A, IA, JA, $ DESCA, WORK( IPV ), 1, 1, DESCV, ZERO, $ WORK( IPX ), 1, 1, DESCV ) CALL PCTRMM( 'Right', 'Lower', 'Conjugate transpose', $ 'Non-Unit', K+JB, JB, ONE, WORK( IPT ), 1, 1, $ DESCT, WORK( IPX ), 1, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PCGEMM( 'Conjugate transpose', 'No transpose', JB, JB, $ K+JB, ONE, WORK( IPV ), 1, 1, DESCV, $ WORK( IPX ), 1, 1, DESCV, ZERO, WORK( IPY ), $ 1, 1, DESCT ) CALL PCTRMM( 'Left', 'Lower', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PCGEMM( 'No tranpose', 'No transpose', K+JB, JB, JB, $ -HALF, WORK( IPV ), 1, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), 1, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PCHER2K( 'Upper', 'No transpose', K+JB, JB, -ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, RONE, A, IA, JA, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * ELSE * CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PCELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PCELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * * Compute the lower triangle of sub( A ). * JL = MAX( ( ( JA+N-2 ) / NB ) * NB + 1, JA ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, INDXG2P( IA+JL-JA+1, NB, $ MYROW, DESCA( RSRC_ ), NPROW ), IACOL, ICTXT, $ NB ) * DO 40 J = JL, JA, -NB K = J - JA + 1 I = IA + K - 1 JB = MIN( N-K+1, NB ) * * Compute upper triangular matrix T from TAU. * CALL PCLARFT( 'Forward', 'Columnwise', N-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PCLACPY( 'Lower', N-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), K+1, 1, DESCV ) CALL PCLASET( 'Upper', N-K, JB, ZERO, ONE, WORK( IPV ), $ K+1, 1, DESCV ) CALL PCLASET( 'Ge', 1, JB, ZERO, ZERO, WORK( IPV ), K, 1, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', N-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * (1) X := A * V * T' * CALL PCHEMM( 'Left', 'Lower', N-K+1, JB, ONE, A, I, J, $ DESCA, WORK( IPV ), K, 1, DESCV, ZERO, $ WORK( IPX ), K, 1, DESCV ) CALL PCTRMM( 'Right', 'Upper', 'Conjugate transpose', $ 'Non-Unit', N-K+1, JB, ONE, WORK( IPT ), 1, 1, $ DESCT, WORK( IPX ), K, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PCGEMM( 'Conjugate transpose', 'No transpose', JB, JB, $ N-K+1, ONE, WORK( IPV ), K, 1, DESCV, $ WORK( IPX ), K, 1, DESCV, ZERO, WORK( IPY ), $ 1, 1, DESCT ) CALL PCTRMM( 'Left', 'Upper', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PCGEMM( 'No transpose', 'No transpose', N-K+1, JB, JB, $ -HALF, WORK( IPV ), K, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), K, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PCHER2K( 'Lower', 'No tranpose', N-K+1, JB, -ONE, $ WORK( IPV ), K, 1, DESCV, WORK( IPX ), K, 1, $ DESCV, RONE, A, I, J, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCT( RSRC_ ) = MOD( DESCT( RSRC_ ) + NPROW - 1, NPROW ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + NPCOL - 1, NPCOL ) * 40 CONTINUE * END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PCHETDRV * END scalapack-1.8.0/TESTING/EIG/pdsytdrv.f0000640000175000017500000004076210363532303017074 0ustar muammarmuammar SUBROUTINE PDSYTDRV( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDSYTDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from Q, the * symmetric tridiagonal matrix T (or D and E), and TAU, which were * computed by PDSYTRD: sub( A ) := Q * T * Q'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of sub( A ). On entry, * if UPLO='U', the diagonal and first superdiagonal of sub( A ) * have the corresponding elements of the tridiagonal matrix T, * and the elements above the first superdiagonal, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO='L', the diagonal and * first subdiagonal of sub( A ) have the corresponding elements * of the tridiagonal matrix T, and the elements below the first * subdiagonal, with the array TAU, represent the orthogonal * matrix Q as a product of elementary reflectors, and the * strictly upper triangular part of sub( A ) is not referenced. * On exit, if UPLO = 'U', the upper triangular part of the * distributed symmetric matrix sub( A ) is recovered. * If UPLO='L', the lower triangular part of the distributed * symmetric matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local input) DOUBLE PRECISION, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= 2 * NB *( NB + NP ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION EIGHT, HALF, ONE, ZERO PARAMETER ( EIGHT = 8.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER I, IACOL, IAROW, ICTXT, II, IPT, IPV, IPX, $ IPY, J, JB, JJ, JL, K, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW DOUBLE PRECISION ADDBND, D1, D2, E1, E2 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCT( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, IGSUM2D, $ PDELGET, PDGEMM, PDLACPY, $ PDLARFT, PDLASET, PDSYMM, $ PDSYR2K, PDTRMM * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) UPPER = LSAME( UPLO, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = NB * NB + IPT IPX = NB * NP + IPV IPY = NB * NP + IPX * CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * ADDBND = EIGHT * PDLAMCH( ICTXT, 'eps' ) * IF( UPPER ) THEN * CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 10 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PDELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J+1, DESCE ) CALL PDELGET( 'Columnwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * * Compute the upper triangle of sub( A ). * CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * DO 20 K = 0, N-1, NB JB = MIN( NB, N-K ) I = IA + K J = JA + K * * Compute the lower triangular matrix T. * CALL PDLARFT( 'Backward', 'Columnwise', K+JB-1, JB, A, IA, $ J, DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PDLACPY( 'All', K+JB-1, JB, A, IA, J, DESCA, $ WORK( IPV ), 1, 1, DESCV ) * IF( K.GT.0 ) THEN CALL PDLASET( 'Lower', JB+1, JB, ZERO, ONE, WORK( IPV ), $ K, 1, DESCV ) ELSE CALL PDLASET( 'Lower', JB, JB-1, ZERO, ONE, WORK( IPV ), $ 1, 2, DESCV ) CALL PDLASET( 'Ge', JB, 1, ZERO, ZERO, WORK( IPV ), 1, $ 1, DESCV ) END IF * * Zero out the strict upper triangular part of A. * IF( K.GT.0 ) THEN CALL PDLASET( 'Ge', K-1, JB, ZERO, ZERO, A, IA, J, $ DESCA ) CALL PDLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, A, I-1, $ J+1, DESCA ) ELSE IF( JB.GT.1 ) THEN CALL PDLASET( 'Upper', JB-2, JB-2, ZERO, ZERO, A, IA, $ J+2, DESCA ) END IF * * (1) X := A * V * T' * CALL PDSYMM( 'Left', 'Upper', K+JB, JB, ONE, A, IA, JA, $ DESCA, WORK( IPV ), 1, 1, DESCV, ZERO, $ WORK( IPX ), 1, 1, DESCV ) CALL PDTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', $ K+JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPX ), 1, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PDGEMM( 'Transpose', 'No transpose', JB, JB, K+JB, ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, ZERO, WORK( IPY ), 1, 1, DESCT ) CALL PDTRMM( 'Left', 'Lower', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PDGEMM( 'No tranpose', 'No transpose', K+JB, JB, JB, $ -HALF, WORK( IPV ), 1, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), 1, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PDSYR2K( 'Upper', 'No transpose', K+JB, JB, -ONE, $ WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1, $ DESCV, ONE, A, IA, JA, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * ELSE * CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, N-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PDELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PDELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(N-1) ) THEN CALL PDELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PDELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * * Compute the lower triangle of sub( A ). * JL = MAX( ( ( JA+N-2 ) / NB ) * NB + 1, JA ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCT, NB, NB, NB, NB, INDXG2P( IA+JL-JA+1, NB, $ MYROW, DESCA( RSRC_ ), NPROW ), IACOL, ICTXT, $ NB ) * DO 40 J = JL, JA, -NB K = J - JA + 1 I = IA + K - 1 JB = MIN( N-K+1, NB ) * * Compute upper triangular matrix T from TAU. * CALL PDLARFT( 'Forward', 'Columnwise', N-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPV ) ) * * Copy Householder vectors into WORK( IPV ). * CALL PDLACPY( 'Lower', N-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), K+1, 1, DESCV ) CALL PDLASET( 'Upper', N-K, JB, ZERO, ONE, WORK( IPV ), $ K+1, 1, DESCV ) CALL PDLASET( 'Ge', 1, JB, ZERO, ZERO, WORK( IPV ), K, 1, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', N-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * (1) X := A * V * T' * CALL PDSYMM( 'Left', 'Lower', N-K+1, JB, ONE, A, I, J, $ DESCA, WORK( IPV ), K, 1, DESCV, ZERO, $ WORK( IPX ), K, 1, DESCV ) CALL PDTRMM( 'Right', 'Upper', 'Transpose', 'Non-Unit', $ N-K+1, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPX ), K, 1, DESCV ) * * (2) X := X - 1/2 * V * (T * V' * X) * CALL PDGEMM( 'Transpose', 'No transpose', JB, JB, N-K+1, $ ONE, WORK( IPV ), K, 1, DESCV, WORK( IPX ), $ K, 1, DESCV, ZERO, WORK( IPY ), 1, 1, DESCT ) CALL PDTRMM( 'Left', 'Upper', 'No transpose', 'Non-Unit', $ JB, JB, ONE, WORK( IPT ), 1, 1, DESCT, $ WORK( IPY ), 1, 1, DESCT ) CALL PDGEMM( 'No transpose', 'No transpose', N-K+1, JB, JB, $ -HALF, WORK( IPV ), K, 1, DESCV, WORK( IPY ), $ 1, 1, DESCT, ONE, WORK( IPX ), K, 1, DESCV ) * * (3) A := A - X * V' - V * X' * CALL PDSYR2K( 'Lower', 'No tranpose', N-K+1, JB, -ONE, $ WORK( IPV ), K, 1, DESCV, WORK( IPX ), K, 1, $ DESCV, ONE, A, I, J, DESCA ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCT( RSRC_ ) = MOD( DESCT( RSRC_ ) + NPROW - 1, NPROW ) DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + NPCOL - 1, NPCOL ) * 40 CONTINUE * END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PDSYTDRV * END scalapack-1.8.0/TESTING/EIG/pdlasizesyev.f0000640000175000017500000001027210363532303017730 0ustar muammarmuammar* * SUBROUTINE PDLASIZESYEV( JOBZ, N, DESCA, MINSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER JOBZ INTEGER MINSIZE, N * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PDLASIZESYEV computes the amount of memory needed by PDSYEV * to calculate: * 1) Eigenvectors and eigenvalues if JOBZ = 'V' * 2) Eigenvalues only if JOBZ = 'N' * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * MINSIZE (global output) INTEGER * Workspace required for PDSYEV to: * 1) Eigenvectors and eigenvalues if JOBZ = 'V' * 2) Eigenvalues only if JOBZ = 'N' * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL WANTZ INTEGER CONTEXTC, CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, $ LCM, LCMQ, LDC, MQ0, MYCOL, MYPCOLC, MYPROWC, $ MYROW, NB, NN, NP, NP0, NPCOL, NPCOLC, NPROCS, $ NPROW, NPROWC, NQ, NRC, QRMEM, RSRC_A, $ SIZEMQRLEFT, SIZEMQRRIGHT * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC, SL_GRIDRESHAPE EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, SL_GRIDRESHAPE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_GRIDEXIT * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) NB = DESCA( MB_ ) N = DESCA( M_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( NN, NB, 0, 0, NPCOL ) NPROCS = NPROW*NPCOL WANTZ = LSAME( JOBZ, 'V' ) LDC = 0 * * Create the new context that is used in PDSYEV * IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) LDC = MAX( 1, NRC ) CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compute the total amount of space needed * IF( WANTZ ) THEN QRMEM = 5*N + MAX( 2*NP0 +MQ0 + NB*NN, 2*NN-2 ) + N*LDC MINSIZE = MAX ( SIZEMQRLEFT, SIZEMQRRIGHT, QRMEM ) ELSE MINSIZE = 5*N + 2*NP0 +MQ0 + NB*NN END IF * RETURN * * End of PDLASIZESYEV * END scalapack-1.8.0/TESTING/EIG/pzlatms.f0000640000175000017500000003265510363532303016711 0ustar muammarmuammar* * SUBROUTINE PZLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, IA, JA, DESCA, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER IA, INFO, JA, KL, KU, LWORK, M, MODE, N, ORDER DOUBLE PRECISION COND, DMAX * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLATMS generates random Hermitian matrices with specified * eigenvalues for testing SCALAPACK programs. * * PZLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * ### bandwidth reduction NOT SUPPORTED ### * * Arguments * ========= * * M - (global input) INTEGER * The number of rows of A. Not modified. * * N - (global input) INTEGER * The number of columns of A. Not modified. * ### M .ne. N unsupported * * DIST - (global input) CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - (global input) INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to ZLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - (global input) CHARACTER*1 * If SYM='S' or 'H', the generated matrix is Hermitian, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is Hermitian, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * ### SYM = 'N' NOT SUPPORTED ### * Not modified. * * D - (local input/output) DOUBLE PRECISION array, * dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - (global input) INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - (global input) DOUBLE PRECISION * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - (global input) DOUBLE PRECISION * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - (global input) INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is Hermitian. * Not modified. * ### 1 <= KL < N-1 is NOT SUPPORTED ### * * KU - (global input) INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is Hermitian. * Not modified. * ### 1 <= KU < N-1 is NOT SUPPORTED ### * * PACK - (global input) CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * ### PACK must be 'N' all other options NOT SUPPORTED ### * * A - (local output) COMPLEX*16 array * Global dimension (M, N), local dimension (MP, NQ) * On exit A is the desired test matrix. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ORDER - (input) INTEGER * The number of reflectors used to define the orthogonal * matrix Q. A = Q * D * Q' * Higher ORDER requires more computation and communication. * * WORK - (local input/output) COMPLEX*16 array, * dimension (LWORK) * * LWORK - (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PZLASIZESEP * * INFO - (global output) INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -16 => DESCA is inconsistent * -17 => ORDER not in the range 0 to N inclusive * 1 => Error return from DLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from PZLAGHE * *----------------------------------------------------------------------- * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 ZZERO PARAMETER ( ZZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IDIST, IINFO, IPACK, IRSIGN, ISYM, LLB, $ MNMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION ALPHA, TEMP * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DLATM1, DSCAL, $ PCHK1MAT, PXERBLA, PZLAGHE, ZLASET * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * 1) Decode and Test the input parameters. * Initialize flags & seed. * * INFO = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( ( MYROW.GE.NPROW .OR. MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL .OR. MYCOL.LT.0 ) )RETURN * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IPACK = 1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) * IF( ORDER.EQ.0 ) $ ORDER = N * * Set INFO if an error * IF( NPROW.EQ.-1 ) THEN INFO = -( 1600+CTXT_ ) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, INFO ) IF( INFO.EQ.0 ) THEN IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT. $ ONE ) THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( ( ORDER.LT.0 ) .OR. ( ORDER.GT.N ) ) THEN INFO = -17 END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, 0, IDUM1, IDUM2, $ INFO ) END IF * * Check for unsupported features * IF( ISYM.NE.2 ) THEN INFO = -5 ELSE IF( IPACK.NE.0 ) THEN INFO = -12 ELSE IF( KL.GT.0 .AND. KL.LT.M-1 ) THEN INFO = -10 ELSE IF( KU.GT.0 .AND. KU.LT.N-1 ) THEN INFO = -11 ELSE IF( LLB.NE.0 .AND. LLB.NE.M-1 ) THEN INFO = -10 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL DLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) * IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL DSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL ZLASET( 'A', NP, NQ, ZZERO, ZZERO, A, DESCA( LLD_ ) ) * * Hermitian -- A = U D U' * CALL PZLAGHE( M, LLB, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, IINFO ) * RETURN * * End of PZLATMS * END scalapack-1.8.0/TESTING/EIG/pssdpsubtst.f0000640000175000017500000004044610363532303017612 0ustar muammarmuammar SUBROUTINE PSSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, $ COPYA, Z, IA, JA, DESCA, WIN, WNEW, $ IPREPAD, IPOSTPAD, WORK, LWORK, LWORK1, $ IWORK, LIWORK, $ RESULT, TSTNRM, QTQNRM, NOUT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 16, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LWORK, LWORK1, N, $ NOUT, RESULT, LIWORK REAL ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) REAL A( * ), COPYA( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSDPSUBTST calls PSSYEVD and then tests the output of * PSSYEVD * The following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PSSYEVD when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. * * A (local workspace) REAL array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PSSYEVD for a description of block cyclic layout. * The test matrix, which is then modified by PSSYEVD * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) REAL array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) REAL array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PSSEPCHK and PSSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as computed by this call to PSSYEVD. * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) REAL array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PSSYEVD * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PSSYEVD * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DT_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL FIVE, NEGONE, PADVAL, ZERO PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, J, MINSIZE, MQ, MYCOL, MYROW, $ NP, NPCOL, NPROW, NQ, RESAQ, RESQTQ, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST, SIZESYEVD, ISIZESYEVD, $ TRILWMIN REAL EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, $ NORMWIN, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, PSLANSY EXTERNAL LSAME, NUMROC, PSLAMCH, PSLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, IGAMN2D, IGAMX2D, $ PSCHEKPAD, PSELSET, PSFILLPAD, PSLASIZESQP, $ PSSEPCHK, PSSEPQTQ, PSSYEVD, SGAMN2D, SGAMX2D, $ SLACPY, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DT_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PSLASIZESQP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * DO 10 I = 1, LWORK1, 1 WORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE * DO 30 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 30 CONTINUE * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 150 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Find the amount of workspace needed with or without eigenvectors. * TRILWMIN = 3*N + MAX( DESCA( NB_ )*( NP+1 ), 3*DESCA( NB_ ) ) MINSIZE = MAX( 1 + 6*N + 2*NP*NQ, TRILWMIN ) + 2*N * CALL SLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PSFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, PADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, WORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * * Make sure that PSSYEVD does not cheat (i.e. use answers * already computed.) * DO 60 I = 1, N, 1 DO 50 J = 1, N, 1 CALL PSELSET( Z( 1+IPREPAD ), I, J, DESCA, 13.0E+0 ) 50 CONTINUE 60 CONTINUE * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PSSYEVD( 'V', UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), LWORK1, IWORK( 1+IPREPAD ), $ LIWORK, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVD-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, PADVAL ) * CALL PSCHEKPAD( DESCZ( CTXT_ ), 'PSSYEVD-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVD-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSYEVD-WORK', LWORK1, 1, $ WORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )INFO IF( INFO.EQ.(N+1) ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 END IF ELSE IF( INFO.EQ.14 .AND. LWORK1.GE.MINSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * IF( RESULT.EQ.0 .OR. INFO.GT.N ) THEN * * Make sure that different processes return the same eigenvalues. * This is a more exhaustive check that provided by PSSYEVD. * DO 70 I = 1, N WORK( I ) = WNEW( I+IPREPAD ) WORK( I+N ) = WNEW( I+IPREPAD ) 70 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', N, 1, WORK, N, 1, $ 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', N, 1, $ WORK( 1+N ), N, 1, 1, -1, -1, 0 ) * DO 80 I = 1, N * IF( ABS( WORK( I )-WORK( N+I ) ).GT.ZERO ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 GO TO 90 END IF 80 CONTINUE 90 CONTINUE END IF * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PSLANSY( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ WORK )*EPS END IF * * Note that a couple key variables get redefined in PSSEPCHK * as described by this table: * * PSSEPTST name PSSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * * * Perform the |AQ - QE| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZECHK, 1, WORK, SIZECHK, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * RESAQ = 0 * CALL PSSEPCHK( N, N, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), WORK( 1+IPREPAD ), $ SIZECHK, TSTNRM, RESAQ ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPCHK-WORK', SIZECHK, 1, $ WORK, SIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RESAQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9993 ) END IF * * Perform the |QTQ - I| test * CALL PSFILLPAD( DESCA( CTXT_ ), SIZEQTQ, 1, WORK, SIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3E+0 ) * RESQTQ = 0 * * DO 40 I = 1, 2 IWORK( IPREPAD + I ) = 0 40 CONTINUE CALL PSSEPQTQ( N, N, THRESH, Z( 1+IPREPAD ), IA, JA, DESCZ, $ A( 1+IPREPAD ), IA, JA, DESCA, $ IWORK( 1 ), IWORK( 1 ), WORK( 1 ), $ WORK( IPREPAD+1 ), SIZEQTQ, QTQNRM, INFO, $ RESQTQ ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PSSEPQTQ-WORK', SIZEQTQ, 1, $ WORK, SIZEQTQ, IPREPAD, IPOSTPAD, 4.3E+0 ) * IF( RESQTQ.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9992 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF ENDIF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0 * cc CALL SLASRT( 'I', N,WNEW( IPREPAD +1 ), INFO ) c DO 140 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 150 CONTINUE * * RETURN * 9999 FORMAT( 'PSSYEVD returned INFO=', I7 ) 9998 FORMAT( 'PSSEPQTQ in PSSDPSUBTST returned INFO=', I7 ) 9997 FORMAT( 'PSSDPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PSSYEVD returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'Different processes return different eigenvalues' ) 9994 FORMAT( 'Heterogeneity detected by PSSYEVD' ) 9993 FORMAT( 'PSSYEVD failed the |AQ -QE| test' ) 9992 FORMAT( 'PSSYEVD failed the |QTQ -I| test' ) * * End of PSSDPSUBTST * END scalapack-1.8.0/TESTING/EIG/pzbrddriver.f0000640000175000017500000005007410363532303017547 0ustar muammarmuammar PROGRAM PZBRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PZBRDDRIVER is the main test program for the COMPLEX*16 * ScaLAPACK BRD (bidiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK BRD computation input file' * 'PVM machine' * 'BRD.out' output file name * 6 device out * 3 number of problems sizes * 16 20 18 values of M * 16 18 20 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * ZPLXSZ INTEGER, default = 16 bytes. * INTGSZ and ZPLXSZ indicate the length in bytes on the * given platform for an integer and a double precision * complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER MEMSIZ, NTESTS, TOTMEM, ZPLXSZ, DBLESZ COMPLEX*16 PADVAL PARAMETER ( TOTMEM = 2000000, ZPLXSZ = 16, DBLESZ = 8, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPTP, IPTQ, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LWORK, M, MAXMN, $ MINMN, MNP, MNQ, MP, MYCOL, MYROW, N, NB, $ NDIAG, NGRIDS, NMAT, NNB, NOFFD, NOUT, NPCOL, $ NPROCS, NPROW, NQ, WORKBRD, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ MVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD, $ PZBRDINFO, PZFILLPAD, PZLAFCHK, $ PZMATGEN, PZGEBDRV, PZGEBRD, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PZLANGE EXTERNAL ICEIL, NUMROC, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZBRDINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * * Go to bottom of loop if this case doesn't use my process * DO 20 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), NB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IF( M.GE.N ) THEN NDIAG = MNQ NOFFD = MNP NDIAG = ICEIL( DBLESZ*NDIAG, ZPLXSZ ) NOFFD = ICEIL( DBLESZ*NOFFD, ZPLXSZ ) ELSE NDIAG = MNP NOFFD = NUMROC( MIN( M, N )-1, NB, MYCOL, 0, NPCOL ) NDIAG = ICEIL( DBLESZ*NDIAG, ZPLXSZ ) NOFFD = ICEIL( DBLESZ*NOFFD, ZPLXSZ ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPTQ = IPE + NOFFD + IPOSTPAD + IPREPAD IPTP = IPTQ + MNQ + IPOSTPAD + IPREPAD IPW = IPTP + MNP + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = NB*( MP+NQ+1 ) + NQ WORKBRD = LWORK + IPOSTPAD WORKSIZ = WORKBRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN WORKSIZ = MAX( LWORK, 2*NB*( MP+NQ+NB ) ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Bidiagonal reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PZMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, MNQ, 1, MEM( IPTQ-IPREPAD ), $ MNQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, MNP, 1, MEM( IPTP-IPREPAD ), $ MNP, IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKBRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKBRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to bidiagonal form * CALL PZGEBRD( M, N, MEM( IPA ), 1, 1, DESCA, MEM( IPD ), $ MEM( IPE ), MEM( IPTQ ), MEM( IPTP ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGEBRD', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBRD', MNQ, 1, $ MEM( IPTQ-IPREPAD ), MNQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBRD', MNP, 1, $ MEM( IPTP-IPREPAD ), MNP, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBRD', WORKBRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKBRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A-Q*B*P|| / (||A|| * N * eps) * CALL PZGEBDRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPTQ ), $ MEM( IPTP ), MEM( IPW ), IERR( 1 ) ) CALL PZLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PZCHEKPAD( ICTXT, 'PZGEBDRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGEBDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0D+0 $ .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID * KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * ) $ 'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * BRD requires 32/3 N^3 floating point operations * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) NOPS = 16.0D+0 * DBLE( MINMN ) * DBLE( MINMN ) * $ ( DBLE( MAXMN ) - DBLE( MINMN ) / 3.0D+0 ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB P Q BRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*B*P|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PZBRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pcevcinfo.f0000640000175000017500000003003410363532303017160 0ustar muammarmuammar SUBROUTINE PCEVCINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB, $ NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, THRESH, WORK, IAM, NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS, $ NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PCEVCINFO gets needed startup information for PCTREVC driver * and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for N. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (the order of the matrix) to run the code * with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, LDNVAL > = NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * < 0 : Perform no error checking * > 0 : report all residuals greater than THRESH * * WORK (local workspace) INTEGER array of dimension >= * MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all * input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Further Details * =============== * * Implemented by: M. Fahey, June 2000 * * ====================================================================== * * Note: For packing the information we assumed that the length in bytes * ===== of an integer is equal to the length in bytes of a real single * precision. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE = 'EVC.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices and their dimensions * READ( NIN, FMT = * )NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9994 )'N', LDNVAL GO TO 30 END IF READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * )NNB IF( NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL GO TO 30 END IF READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB ) * DO 10 I = 1, NNB IF( NBVAL( I ).LT.6 ) THEN WRITE( NOUT, FMT = 9992 )NBVAL( I ) GO TO 30 END IF 10 CONTINUE * * Get number of grids * READ( NIN, FMT = * )NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL GO TO 30 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL GO TO 30 END IF * * Get values of P and Q * READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * )THRESH * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 20 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 20 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK nonsymmetric eigenvector calculation.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' // $ 'complex single precision eigenvector calculation.' WRITE( NOUT, FMT = 9999 )'The following scaled residual ' // $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' Residual = max( ||T*R-R*D||/(||H||*eps*N)' // $ ' , ||T^H*L-L*D^H||/(||H||*eps*N) )' WRITE( NOUT, FMT = 9999 ) $ ' Normalization residual = max(max_j(max|R(j)|-1),' // $ ' max_j(max|L(j)|-1))/(eps*N)' WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' // $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )'An explanation of the input/output ' $ // 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or ' // $ 'CPU time was used.' * WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns in the ' // 'matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the' // $ ' matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less than' // $ ' THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'NEP time : Time in seconds to decompose the ' // ' matrix' WRITE( NOUT, FMT = 9999 )'MFLOPS : Rate of execution ' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9996 )'N ', $ ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9996 )'NB ', $ ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9996 )'P ', $ ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9996 )'Q ', $ ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 )EPS WRITE( NOUT, FMT = 9998 )THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 30 CONTINUE WRITE( NOUT, FMT = 9993 ) CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 'Routines pass computational tests if scaled residual ', $ 'is less than ', G12.5 ) 9997 FORMAT( ' ', 10I6 ) 9996 FORMAT( 2X, A5, ' : ', 10I6 ) 9995 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9994 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9993 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' ) * * End of PCEVCINFO * END scalapack-1.8.0/TESTING/EIG/psnepdriver.f0000640000175000017500000005055210363532303017554 0ustar muammarmuammar PROGRAM PSNEPDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * PSNEPDRIVER is the main test program for the REAL * SCALAPACK NEP routines. This test driver performs a Schur * decomposition followed by residual check of a Hessenberg matrix. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 18 lines: * 'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * REALSZ INTEGER, default = 4 bytes. * REALSZ indicate the length in bytes on the given platform * for a real element. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER REALSZ, TOTMEM, MEMSIZ, NTESTS REAL PADVAL, ZERO, ONE PARAMETER ( REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0, ZERO = 0.0E+0, $ ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA, $ IPOSTPAD, IPREPAD, IPW, IPWI, IPWR, IPZ, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LDA, LDZ, LWORK, $ MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, $ NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH REAL ANORM, FRESID, QRESID, ZNORM DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ), $ IDUM( 1 ), NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM ( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSFILLPAD, PSGEMM, PSLAHQR, PSLASET, PSMATGEN, $ PSNEPFCHK, PSNEPINFO, SLBOOT, SLCOMBINE, $ SLTIMER * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH, PSLANGE, PSLANHS EXTERNAL ILCM, NUMROC, PSLAMCH, PSLANGE, PSLANHS * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, REAL * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSNEPINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWI = IPWR + N + IPOSTPAD + IPREPAD IPW = IPWI + N + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX(2*N, (8*ILCM(NPROW,NPCOL)+2)**2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PSNEPFCHK and PSLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate matrix Z = In * CALL PSLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PSMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PSLASET( 'Lower', MAX( 0, N-2 ), MAX( 0, N-2 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 3 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PSFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, N, 1, MEM( IPWI-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * CALL SLBOOT( ) CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform NEP factorization * CALL PSLAHQR( .TRUE., .TRUE., N, 1, N, MEM( IPA ), DESCA, $ MEM( IPWR ), MEM( IPWI ), 1, N, MEM( IPZ ), $ DESCZ, MEM( IPW ), LWORK, IDUM, 0, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PSLAHQR INFO=', INFO KFAIL = KFAIL + 1 GO TO 10 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PSCHEKPAD( ICTXT, 'PSLAHQR (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLAHQR (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLAHQR (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLAHQR (WI)', N, 1, $ MEM( IPWI-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLAHQR (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || Z * H * Z**T - H0 || / ( N*|| H0 ||*EPS ) * CALL PSNEPFCHK( N, MEM( IPA ), 1, 1, DESCA, IASEED, $ MEM( IPZ ), 1, 1, DESCZ, ANORM, $ FRESID, MEM( IPW ) ) * CALL PSCHEKPAD( ICTXT, 'PSNEPFCHK (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSNEPFCHK (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSNEPFCHK (WORK)', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || (Z**T)*Z - In ||_1 * CALL PSLASET( 'All', N, N, ZERO, ONE, MEM( IPA ), 1, $ 1, DESCA ) CALL PSGEMM( 'Transpose', 'No transpose', N, N, N, $ -ONE, MEM( IPZ ), 1, 1, DESCZ, $ MEM( IPZ ), 1, 1, DESCZ, ONE, MEM( IPA ), $ 1, 1, DESCA ) ZNORM = PSLANGE( '1', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) QRESID = ZNORM / ( REAL( N )*PSLAMCH( ICTXT, 'P' ) ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0E+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 18 N^3 flops for PxLAHQR * NOPS = 18.0D+0*DBLE( N )**3 * * Calculate total megaflops -- factorization only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 10 CONTINUE * 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H - Q*S*Q^T|| / (||H|| * N * eps) = ', G25.7 ) 9985 FORMAT( '||Q^T*Q - I|| / ( N * eps ) ', G25.7 ) * STOP * * End of PSNEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pcgsepsubtst.f0000640000175000017500000007250610363532303017744 0ustar muammarmuammar* * SUBROUTINE PCGSEPSUBTST( WKNOWN, IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, IA, JA, DESCA, WIN, WNEW, $ IFAIL, ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK, LWORK, RWORK, LRWORK, LWORK1, $ IWORK, LIWORK, RESULT, TSTNRM, QTQNRM, $ NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IBTYPE, IL, IPOSTPAD, IPREPAD, IU, JA, $ LIWORK, LRWORK, LWORK, LWORK1, N, NOUT, RESULT REAL ABSTOL, QTQNRM, THRESH, TSTNRM, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ IWORK( * ) REAL GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX A( * ), B( * ), COPYA( * ), COPYB( * ), $ WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCGSEPSUBTST calls PCHEGVX and then tests the output of * PCHEGVX * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < THRESH * |QT * Q - I| / eps * norm(A) < THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PCHEGVX when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * Must be 'V' on first call to PCGSEPSUBTST * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * Must be 'A' on first call to PCGSEPSUBTST * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PCSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PCHEGVX for a description of block cyclic layout. * The test matrix, which is then modified by PCHEGVX * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * B (local workspace) COMPLEX array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PCHEGVX * * COPYB (local input) COMPLEX array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) COMPLEX array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PCGSEPCHK and PCSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) REAL array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) REAL array, dimension (N) * The eigenvalues as copmuted by this call to PCHEGVX * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If INFO > 0 on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * IFAIL has already been padded front and back, * use IFAIL(1+IPREPAD) * * ICLUSTR (global workspace) integer array, dimension (2*NPROW*NPCOL) * * GAP (global workspace) REAL array, * dimension (NPROW*NPCOL) * * WORK (local workspace) COMPLEX array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) REAL array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of real workspace to pass to PCHEGVX * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PCHEGVX * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) REAL * |AQ- QL| / |A|*N*EPS * * QTQNRM (global output) REAL * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285E+0, FIVE = 5.0E+0, $ NEGONE = -1.0E+0 ) COMPLEX CPADVAL PARAMETER ( CPADVAL = ( 13.989E+0, 1.93E+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) * .. * .. Local Scalars .. LOGICAL MISSLARGEST, MISSSMALLEST INTEGER I, IAM, INDIWRK, INFO, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, J, M, MAXEIGS, MAXIL, MAXIU, MAXSIZE, $ MINIL, MQ, MYCOL, MYIL, MYROW, NCLUSTERS, NP, $ NPCOL, NPROW, NQ, NZ, OLDIL, OLDIU, OLDNZ, RES, $ RSIZECHK, RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, $ RSIZETST, SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZESUBTST, SIZETMS, SIZETST, VALSIZE, $ VECSIZE REAL EPS, ERROR, MAXERROR, MAXVU, MINERROR, MINVL, $ NORMWIN, OLDVL, OLDVU, ORFAC, SAFMIN * .. * .. Local Arrays .. INTEGER DESCZ( DLEN_ ), DSEED( 4 ), ITMP( 2 ) * .. * .. External Functions .. * LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH EXTERNAL LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLACPY, DESCINIT, IGAMN2D, $ IGAMX2D, PCCHEKPAD, PCELSET, PCFILLPAD, $ PCGSEPCHK, PCHEGVX, PCLASIZEGSEP, $ PCLASIZEHEEVX, PICHEKPAD, PIFILLPAD, PSCHEKPAD, $ PSFILLPAD, SGAMN2D, SGAMX2D, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL PCLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PSLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), NORMWIN ) * * Make sure that we aren't using information from previous calls * NZ = -13 OLDNZ = NZ OLDIL = IL OLDIU = IU OLDVL = VL OLDVU = VU * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3E+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I+IPREPAD ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63E+0, 1.1E+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159E+0 40 CONTINUE * ICLUSTR( 1+IPREPAD ) = 139 * IF( LSAME( JOBZ, 'N' ) ) THEN MAXEIGS = 0 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MAXEIGS = N ELSE IF( LSAME( RANGE, 'I' ) ) THEN MAXEIGS = IU - IL + 1 ELSE MINVL = VL - NORMWIN*FIVE*EPS - ABSTOL MAXVU = VU + NORMWIN*FIVE*EPS + ABSTOL MINIL = 1 MAXIU = 0 DO 50 I = 1, N IF( WIN( I ).LT.MINVL ) $ MINIL = MINIL + 1 IF( WIN( I ).LE.MAXVU ) $ MAXIU = MAXIU + 1 50 CONTINUE * MAXEIGS = MAXIU - MINIL + 1 END IF END IF * * CALL DESCINIT( DESCZ, DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ DESCA( CTXT_ ), DESCA( LLD_ ), INFO ) * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INDIWRK = 1 + IPREPAD + NPROW*NPCOL + 1 * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 160 RESULT = 0 * * * DSEED is not used in this call to PCLASIZEHEEVX, the * following line just makes ftnchek happy. * DSEED( 1 ) = 1 * CALL PCLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) MQ = NUMROC( MAXEIGS, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL CLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL CLACPY( 'A', NP, NQ, COPYB, DESCA( LLD_ ), B( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PCFILLPAD( DESCA( CTXT_ ), NP, NQ, B, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL+1.0E+2 ) * CALL PCFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL ) * CALL PCFILLPAD( DESCZ( CTXT_ ), NP, MQ, Z, DESCZ( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL+1.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), NPROW*NPCOL, 1, GAP, NPROW*NPCOL, $ IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PSFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0E+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), N, 1, IFAIL, N, IPREPAD, IPOSTPAD, $ IPADVAL ) * CALL PIFILLPAD( DESCA( CTXT_ ), 2*NPROW*NPCOL, 1, ICLUSTR, $ 2*NPROW*NPCOL, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PCFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, CPADVAL+4.1E+0 ) * * Make sure that PCHEGVX does not cheat (i.e. use answers * already computed.) * DO 70 I = 1, N, 1 DO 60 J = 1, MAXEIGS, 1 CALL PCELSET( Z( 1+IPREPAD ), I, J, DESCA, $ ( 13.0E+0, 1.34E+0 ) ) 60 CONTINUE 70 CONTINUE * ORFAC = -1.0E+0 * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) CALL PCHEGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A( 1+IPREPAD ), IA, $ JA, DESCA, B( 1+IPREPAD ), IA, JA, DESCA, VL, VU, $ IL, IU, ABSTOL, M, NZ, WNEW( 1+IPREPAD ), ORFAC, $ Z( 1+IPREPAD ), IA, JA, DESCA, WORK( 1+IPREPAD ), $ SIZEHEEVX, RWORK( 1+IPREPAD ), LWORK1, $ IWORK( 1+IPREPAD ), LIWORK, IFAIL( 1+IPREPAD ), $ ICLUSTR( 1+IPREPAD ), GAP( 1+IPREPAD ), INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-B', NP, NQ, B, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ CPADVAL+1.0E+2 ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, CPADVAL ) * CALL PCCHEKPAD( DESCZ( CTXT_ ), 'PCHEGVX-Z', NP, MQ, Z, $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ CPADVAL+1.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-GAP', NPROW*NPCOL, 1, $ GAP, NPROW*NPCOL, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, CPADVAL+4.1E+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-IFAIL', N, 1, IFAIL, $ N, IPREPAD, IPOSTPAD, IPADVAL ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PCHEGVX-ICLUSTR', $ 2*NPROW*NPCOL, 1, ICLUSTR, 2*NPROW*NPCOL, $ IPREPAD, IPOSTPAD, IPADVAL ) * * * Since we now know the spectrum, we can potentially reduce MAXSIZE. * IF( LSAME( RANGE, 'A' ) ) THEN CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ DSEED, WNEW( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) END IF * * * Check INFO * * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( MOD( INFO, 2 ).EQ.1 .OR. INFO.GT.7 .OR. INFO.LT.0 ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )INFO RESULT = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 .AND. LWORK1.GE.MAXSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 .AND. LWORK1.GE.VECSIZE ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * IF( LSAME( JOBZ, 'V' ) .AND. ( ICLUSTR( 1+IPREPAD ).NE. $ 0 ) .AND. ( MOD( INFO / 2, 2 ).NE.1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) RESULT = 1 END IF * * Check M * IF( ( M.LT.0 ) .OR. ( M.GT.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'A' ) .AND. ( M.NE.N ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) RESULT = 1 ELSE IF( LSAME( RANGE, 'I' ) .AND. ( M.NE.IU-IL+1 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9992 ) RESULT = 1 ELSE IF( LSAME( JOBZ, 'V' ) .AND. $ ( .NOT.( LSAME( RANGE, 'V' ) ) ) .AND. ( M.NE.NZ ) ) $ THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) RESULT = 1 END IF * * Check NZ * IF( LSAME( JOBZ, 'V' ) ) THEN IF( LSAME( RANGE, 'V' ) ) THEN IF( NZ.GT.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) RESULT = 1 END IF IF( NZ.LT.M .AND. MOD( INFO / 4, 2 ).NE.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9989 ) RESULT = 1 END IF ELSE IF( NZ.NE.M ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) RESULT = 1 END IF END IF END IF IF( RESULT.EQ.0 ) THEN * * Make sure that all processes return the same # of eigenvalues * ITMP( 1 ) = M ITMP( 2 ) = M * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) RESULT = 1 ELSE * * Make sure that different processes return the same eigenvalues * DO 80 I = 1, M RWORK( I ) = WNEW( I+IPREPAD ) RWORK( I+M ) = WNEW( I+IPREPAD ) 80 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', M, 1, RWORK, M, $ 1, 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', M, 1, $ RWORK( 1+M ), M, 1, 1, -1, -1, 0 ) * DO 90 I = 1, M * IF( RESULT.EQ.0 .AND. ( ABS( RWORK( I )-RWORK( M+ $ I ) ).GT.FIVE*EPS*ABS( RWORK( I ) ) ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) RESULT = 1 END IF 90 CONTINUE END IF END IF * * Make sure that all processes return the same # of clusters * IF( LSAME( JOBZ, 'V' ) ) THEN NCLUSTERS = 0 DO 100 I = 0, NPROW*NPCOL - 1 IF( ICLUSTR( 1+IPREPAD+2*I ).EQ.0 ) $ GO TO 110 NCLUSTERS = NCLUSTERS + 1 100 CONTINUE 110 CONTINUE ITMP( 1 ) = NCLUSTERS ITMP( 2 ) = NCLUSTERS * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, $ 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) RESULT = 1 ELSE * * Make sure that different processes return the same clusters * DO 120 I = 1, NCLUSTERS IWORK( INDIWRK+I ) = ICLUSTR( I+IPREPAD ) IWORK( INDIWRK+I+NCLUSTERS ) = ICLUSTR( I+IPREPAD ) 120 CONTINUE CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1 ), NCLUSTERS*2+1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', NCLUSTERS*2+1, 1, $ IWORK( INDIWRK+1+NCLUSTERS ), $ NCLUSTERS*2+1, 1, 1, -1, -1, 0 ) * * DO 130 I = 1, NCLUSTERS IF( RESULT.EQ.0 .AND. IWORK( INDIWRK+I ).NE. $ IWORK( INDIWRK+NCLUSTERS+I ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) RESULT = 1 END IF 130 CONTINUE * IF( ICLUSTR( 1+IPREPAD+NCLUSTERS*2 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) RESULT = 1 END IF END IF END IF * * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, $ -1, -1, 0 ) IF( RESULT.NE.0 ) $ GO TO 160 * * Note that a couple key variables get redefined in PCGSEPCHK * as described by this table: * * PCGSEPTST name PCGSEPCHK name * ------------- ------------- * COPYA A * Z Q * B B * A C * * IF( LSAME( JOBZ, 'V' ) ) THEN * * Perform the residual check * CALL PSFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, $ RSIZECHK, IPREPAD, IPOSTPAD, 4.3E+0 ) * CALL PCGSEPCHK( IBTYPE, N, NZ, COPYA, IA, JA, DESCA, COPYB, $ IA, JA, DESCA, THRESH, Z( 1+IPREPAD ), IA, $ JA, DESCZ, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), RWORK( 1+IPREPAD ), $ RSIZECHK, TSTNRM, RES ) * CALL PSCHEKPAD( DESCA( CTXT_ ), 'PCGSEPCHK-rWORK', RSIZECHK, $ 1, RWORK, RSIZECHK, IPREPAD, IPOSTPAD, $ 4.3E+0 ) * IF( RES.NE.0 ) $ RESULT = 1 END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN ) THEN * * Set up MYIL if necessary * MYIL = IL * IF( LSAME( RANGE, 'V' ) ) THEN MYIL = 1 MINIL = 1 MAXIL = N - M + 1 ELSE IF( LSAME( RANGE, 'A' ) ) THEN MYIL = 1 END IF MINIL = MYIL MAXIL = MYIL END IF * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN * DO 150 MYIL = MINIL, MAXIL MAXERROR = 0 * * Make sure that we aren't skipping any important eigenvalues * MISSSMALLEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.1 ) ) $ MISSSMALLEST = .FALSE. IF( MISSSMALLEST .AND. ( WIN( MYIL-1 ).LT.VL+NORMWIN* $ FIVE*THRESH*EPS ) )MISSSMALLEST = .FALSE. MISSLARGEST = .TRUE. IF( .NOT.LSAME( RANGE, 'V' ) .OR. ( MYIL.EQ.MAXIL ) ) $ MISSLARGEST = .FALSE. IF( MISSLARGEST .AND. ( WIN( MYIL+M ).GT.VU-NORMWIN*FIVE* $ THRESH*EPS ) )MISSLARGEST = .FALSE. IF( .NOT.MISSSMALLEST ) THEN IF( .NOT.MISSLARGEST ) THEN * * Make sure that the eigenvalues that we report are OK * DO 140 I = 1, M ERROR = ABS( WIN( I+MYIL-1 )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 140 CONTINUE * MINERROR = MIN( MAXERROR, MINERROR ) END IF END IF 150 CONTINUE * * * If JOBZ = 'V' and RANGE='A', we might be comparing * against our estimate of what the eigenvalues ought to * be, rather than comparing against what PxHEGVX computed * last time around, so we have to be more generous. * IF( LSAME( JOBZ, 'V' ) .AND. LSAME( RANGE, 'A' ) ) THEN IF( MINERROR.GT.NORMWIN*FIVE*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF ELSE IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF END IF * * * Make sure that the IL, IU, VL and VU were not altered * IF( IL.NE.OLDIL .OR. IU.NE.OLDIU .OR. VL.NE.OLDVL .OR. VU.NE. $ OLDVU ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) RESULT = 1 END IF * IF( LSAME( JOBZ, 'N' ) .AND. ( NZ.NE.OLDNZ ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9981 ) RESULT = 1 END IF * END IF * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 160 CONTINUE * * RETURN * 9999 FORMAT( 'PCHEGVX returned INFO=', I7 ) 9998 FORMAT( 'PCSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PCGSEPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PCHEGVX returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'ICLUSTR(1).NE.0 but mod(INFO/2,2).NE.1' ) 9994 FORMAT( 'M not in the range 0 to N' ) 9993 FORMAT( 'M not equal to N' ) 9992 FORMAT( 'M not equal to IU-IL+1' ) 9991 FORMAT( 'M not equal to NZ' ) 9990 FORMAT( 'NZ > M' ) 9989 FORMAT( 'NZ < M' ) 9988 FORMAT( 'NZ not equal to M' ) 9987 FORMAT( 'Different processes return different values for M' ) 9986 FORMAT( 'Different processes return different eigenvalues' ) 9985 FORMAT( 'Different processes return ', $ 'different numbers of clusters' ) 9984 FORMAT( 'Different processes return different clusters' ) 9983 FORMAT( 'ICLUSTR not zero terminated' ) 9982 FORMAT( 'IL, IU, VL or VU altered by PCHEGVX' ) 9981 FORMAT( 'NZ altered by PCHEGVX with JOBZ=N' ) * * End of PCGSEPSUBTST * END scalapack-1.8.0/TESTING/EIG/pdlatms.f0000640000175000017500000003251010363532303016651 0ustar muammarmuammar* * SUBROUTINE PDLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, IA, JA, DESCA, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER IA, INFO, JA, KL, KU, LWORK, M, MODE, N, ORDER DOUBLE PRECISION COND, DMAX * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION A( * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * PDLATMS generates random symmetric matrices with specified * eigenvalues for testing SCALAPACK programs. * * PDLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * ### bandwidth reduction NOT SUPPORTED ### * * Arguments * ========= * * M - (global input) INTEGER * The number of rows of A. Not modified. * * N - (global input) INTEGER * The number of columns of A. Not modified. * ### M .ne. N unsupported * * DIST - (global input) CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - (global input) INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to DLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - (global input) CHARACTER*1 * If SYM='S' or 'H', the generated matrix is symmetric, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is symmetric, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * ### SYM = 'N' NOT SUPPORTED ### * Not modified. * * D - (local input/output) DOUBLE PRECISION array, * dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - (global input) INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - (global input) DOUBLE PRECISION * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - (global input) DOUBLE PRECISION * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - (global input) INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * ### 1 <= KL < N-1 is NOT SUPPORTED ### * * KU - (global input) INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * ### 1 <= KU < N-1 is NOT SUPPORTED ### * * PACK - (global input) CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * ### PACK must be 'N' all other options NOT SUPPORTED ### * * A - (local output) DOUBLE PRECISION array * Global dimension (M, N), local dimension (MP, NQ) * On exit A is the desired test matrix. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ORDER - (input) INTEGER * The number of reflectors used to define the orthogonal * matrix Q. A = Q * D * Q' * Higher ORDER requires more computation and communication. * * WORK - (local input/output) DOUBLE PRECISION array, * dimension (LWORK) * * LWORK - (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PDLASIZESEP * * INFO - (global output) INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -16 => DESCA is inconsistent * -17 => ORDER not in the range 0 to N inclusive * 1 => Error return from DLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from PDLAGSY * *----------------------------------------------------------------------- * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IDIST, IINFO, IPACK, IRSIGN, ISYM, LLB, $ MNMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION ALPHA, TEMP * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DLASET, DLATM1, DSCAL, $ PCHK1MAT, PDLAGSY, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * 1) Decode and Test the input parameters. * Initialize flags & seed. * * INFO = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( ( MYROW.GE.NPROW .OR. MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL .OR. MYCOL.LT.0 ) )RETURN * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IPACK = 1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) * IF( ORDER.EQ.0 ) $ ORDER = N * * Set INFO if an error * IF( NPROW.EQ.-1 ) THEN INFO = -( 1600+CTXT_ ) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, INFO ) IF( INFO.EQ.0 ) THEN IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT. $ ONE ) THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( ( ORDER.LT.0 ) .OR. ( ORDER.GT.N ) ) THEN INFO = -17 END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, 0, IDUM1, IDUM2, $ INFO ) END IF * * Check for unsupported features * IF( ISYM.NE.2 ) THEN INFO = -5 ELSE IF( IPACK.NE.0 ) THEN INFO = -12 ELSE IF( KL.GT.0 .AND. KL.LT.M-1 ) THEN INFO = -10 ELSE IF( KU.GT.0 .AND. KU.LT.N-1 ) THEN INFO = -11 ELSE IF( LLB.NE.0 .AND. LLB.NE.M-1 ) THEN INFO = -10 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL DLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) * IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL DSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL DLASET( 'A', NP, NQ, ZERO, ZERO, A, DESCA( LLD_ ) ) * * symmetric -- A = U D U' * CALL PDLAGSY( M, LLB, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, IINFO ) * RETURN * * End of PDLATMS * END scalapack-1.8.0/TESTING/EIG/pcgebdrv.f0000640000175000017500000004721510363532303017011 0ustar muammarmuammar SUBROUTINE PCGEBDRV( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEBDRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from sub( A ), * Q, P returned by PCGEBRD: * * sub( A ) := Q * B * P'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of sub( A ) * as returned by PCGEBRD. On exit, the original distribu- * ted matrix sub( A ) is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local input) COMPLEX array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local input) COMPLEX array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= 2*NB*( MP + NQ + NB ) * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MP = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL REIGHT, RZERO PARAMETER ( REIGHT = 8.0E+0, RZERO = 0.0E+0 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IIA, IL, IPTP, IPTQ, $ IPV, IPW, IPWK, IOFF, IV, J, JB, JJA, JL, JV, $ K, MN, MP, MYCOL, MYROW, NB, NPCOL, NPROW, NQ REAL ADDBND, D2, E2 COMPLEX D1, E1 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, IGSUM2D, INFOG2L, $ PCLACPY, PCLARFB, PCLARFT, PCLASET, $ PCELGET, PSELGET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC REAL PSLAMCH EXTERNAL INDXG2P, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) IOFF = MOD( IA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) IPV = 1 IPW = IPV + MP*NB IPTP = IPW + NQ*NB IPTQ = IPTP + NB*NB IPWK = IPTQ + NB*NB * IV = 1 JV = 1 MN = MIN( M, N ) IL = MAX( ( (IA+MN-2) / NB )*NB + 1, IA ) JL = MAX( ( (JA+MN-2) / NB )*NB + 1, JA ) IAROW = INDXG2P( IL, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, IA+M-IL, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCW, NB, JA+N-JL, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * ADDBND = REIGHT * PSLAMCH( ICTXT, 'eps' ) * * When A is an upper bidiagonal form * IF( M.GE.N ) THEN * CALL DESCSET( DESCD, 1, JA+MN-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * DO 10 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PCELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, IA+J, 1, DESCE ) CALL PCELGET( 'Rowwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * DO 20 J = JL, JA+NB-IOFF, -NB JB = MIN( JA+N-J, NB ) I = IA + J - JA K = I - IA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PCLARFT( 'Forward', 'Columnwise', M-K+1, JB, A, I, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Lower', M-K+1, JB, A, I, J, DESCA, $ WORK( IPV ), IV, JV, DESCV ) CALL PCLASET( 'Upper', M-K+1, JB, ZERO, ONE, WORK( IPV ), $ IV, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', M-K, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PCLARFT( 'Forward', 'Rowwise', N-K, JB, A, I, J+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Upper', JB, N-K, A, I, J+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PCLASET( 'Lower', JB, N-K, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PCLASET( 'Upper', JB, N-K-1, ZERO, ZERO, A, I, J+2, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PCLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K+1, N-K+1, JB, WORK( IPV ), $ IV, JV, DESCV, WORK( IPTQ ), A, I, J, DESCA, $ WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-K+1, N-K, JB, WORK( IPW ), IV, $ JV+1, DESCW, WORK( IPTP ), A, I, J+1, DESCA, $ WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 20 CONTINUE * * Handle first block separately * JB = MIN( N, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PCLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IV, JV, DESCV ) CALL PCLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPV ), IV, JV, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PCLARFT( 'Forward', 'Rowwise', N-1, JB, A, IA, JA+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Upper', JB, N-1, A, IA, JA+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PCLASET( 'Lower', JB, N-1, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PCLASET( 'Upper', JB, N-2, ZERO, ZERO, A, IA, JA+2, $ DESCA ) * * Apply block Householder transformation from left. * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N, JB, WORK( IPV ), IV, JV, DESCV, $ WORK( IPTQ ), A, IA, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right. * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N-1, JB, WORK( IPW ), IV, JV+1, $ DESCW, WORK( IPTP ), A, IA, JA+1, DESCA, $ WORK( IPWK ) ) * ELSE * CALL DESCSET( DESCD, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MN-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = RZERO E2 = RZERO CALL PSELGET( ' ', ' ', D2, D, IA+J, 1, DESCD ) CALL PCELGET( 'Rowwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PCELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR. $ ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * DO 40 I = IL, IA+NB-IOFF, -NB JB = MIN( IA+M-I, NB ) J = JA + I - IA K = J - JA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PCLARFT( 'Forward', 'Columnwise', M-K, JB, A, I+1, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Lower', M-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PCLASET( 'Upper', M-K, JB, ZERO, ONE, WORK( IPV ), $ IV+1, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', M-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PCLARFT( 'Forward', 'Rowwise', N-K+1, JB, A, I, J, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Upper', JB, N-K+1, A, I, J, DESCA, $ WORK( IPW ), IV, JV, DESCW ) CALL PCLASET( 'Lower', JB, N-K+1, ZERO, ONE, WORK( IPW ), $ IV, JV, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PCLASET( 'Upper', JB, N-K, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PCLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K, N-K+1, JB, WORK( IPV ), $ IV+1, JV, DESCV, WORK( IPTQ ), A, I+1, J, $ DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-K+1, N-K+1, JB, WORK( IPW ), IV, $ JV, DESCW, WORK( IPTP ), A, I, J, DESCA, $ WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 40 CONTINUE * * Handle first block separately * JB = MIN( M, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PCLARFT( 'Forward', 'Columnwise', M-1, JB, A, IA+1, JA, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Lower', M-1, JB, A, IA+1, JA, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PCLASET( 'Upper', M-1, JB, ZERO, ONE, WORK( IPV ), IV+1, $ JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', M-2, JB, ZERO, ZERO, A, IA+2, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PCLARFT( 'Forward', 'Rowwise', N, JB, A, IA, JA, DESCA, $ TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPW ), $ IV, JV, DESCW ) CALL PCLASET( 'Lower', JB, N, ZERO, ONE, WORK( IPW ), IV, JV, $ DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PCLASET( 'Upper', JB, N-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Apply block Householder transformation from left * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-1, N, JB, WORK( IPV ), IV+1, JV, DESCV, $ WORK( IPTQ ), A, IA+1, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M, N, JB, WORK( IPW ), IV, JV, DESCW, $ WORK( IPTP ), A, IA, JA, DESCA, WORK( IPWK ) ) END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PCGEBDRV * END scalapack-1.8.0/TESTING/EIG/pssvdtst.f0000640000175000017500000005610210363532303017102 0ustar muammarmuammar SUBROUTINE PSSVDTST( M, N, NPROW, NPCOL, NB, ISEED, THRESH, WORK, $ RESULT, LWORK, NOUT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LWORK, M, N, NB, NOUT, NPCOL, NPROW REAL THRESH * .. * .. Array Arguments .. INTEGER ISEED( 4 ), RESULT( 9 ) REAL WORK( * ) * .. * * Purpose * ======= * * PSSVDTST checks the singular value decomposition (SVD) routine * PSGESVD. PSGESVD factors A = U diag(S) VT, where U and VT are * orthogonal and diag(S) is diagonal with the entries of the array * S on its diagonal. The entries of S are the singular values, stored * in decreasing order. U and VT can be optionally not computed, * computed and overwritten on A, or computed partially. * * A is M by N. Let SIZE = min( M, N ). S has dimension SIZE by SIZE. * U is M by SIZE and VT is SIZE by N. PDGESVD optionally calculates * U and VT, depending on the values of its parameters JOBU and JOBVT. * There are four possible combinations of "job" parameters for a call * to PDGESVD, that correspond to four values of internal index JOBTYPE. * The table below shows the mapping between "job" parameters of * PDGESVD and respective values of the index JOBTYPE together * with matrices computed for each type of the job. * * * | JOBU = 'V' | JOBU = 'N' * ---------- ------------------------------------------- * JOBVT = 'V'| JOBTYPE = 1 | JOBTYPE = 3 * | U1, S1, VT1 | S3, VT3 * ---------- ------------------------------------------ * JOBVT = 'N'| JOBTYPE = 2 | JOBTYPE = 4 * | U2, S2 | S4 * * * When PSSVDTST is called, a number of matrix "types" are specified. * For each type of matrix, and for the minimal workspace as well as * for larger than minimal workspace an M x N matrix "A" with known * singular values is generated and used to test the SVD routines. * For each matrix, A will be factored as A = U diag(S) VT and the * following 9 tests computed: * * (1) | A - U1 diag(S1) VT1 | / ( |A| max(M,N) ulp ) * * (2) | I - U1'U1 | / ( M ulp ) * * (3) | I - VT1 VT1' | / ( N ulp ), * * (4) S1 contains SIZE nonnegative values in decreasing order. * (Return 0 if true, 1/ULP if false.) * * (5) | S1 - S2 | / ( SIZE ulp |S| ) * * (6) | U1 - U2 | / ( M ulp ) * * (7) | S1 - S3 | / ( SIZE ulp |S| ) * * (8) | VT1 - VT3 | / ( N ulp ) * * (9) | S1 - S4 | / ( SIZE ulp |S| ) * * Currently, the list of possible matrix types is: * * (1) The zero matrix. * * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP. * (ULP = (first number larger than 1) - 1 ) * * (4) A matrix of the form U D VT, where U, VT are orthogonal and * D has evenly spaced entries 1, ..., ULP. * * (5) Same as (4), but multiplied by SQRT( overflow threshold ) * * (6) Same as (4), but multiplied by SQRT( underflow threshold ) * * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========== * * M (global input) INTEGER dimension * The value of the matrix row dimension. * * N (global input) INTEGER dimension * The value of the matrix column dimension. * * NPROW (global input) INTEGER * Number of process rows * * NPCOL (global input) INTEGER * Number of process columns * * NB (global input) INTEGER * The block size of the matrix A. NB >=1. * * ISEED (global input/local output) INTEGER array, dimension (4) * On entry, the seed of the random number generator. The array * elements should be between 0 and 4095; if not they will be * reduced mod 4096. Also, ISEED(4) must be odd. * On exit, ISEED is changed and can be used in the next call to * SDRVBD to continue the same random number sequence. * * THRESH (global input) REAL * The threshold value for the test ratios. A result is * included in the output file if RESULT >= THRESH. The test * ratios are scaled to be O(1), so THRESH should be a small * multiple of 1, e.g., 10 or 100. To have every test ratio * printed, use THRESH = 0. * * RESULT (global input/output) INTEGER array of dimension 9. Initially * RESULT( I ) = 0. On the output, RESULT ( I ) = 1 if test I * ( see above ) wasn't passed. * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * Dimension of the array WORK. It is defined as follows * LWORK = 1 + 2*LDA*NQ + 3*SIZE + * MAX(WPSLAGGE, LDU*SIZEQ + LDVT*NQ + MAX(LDU*SIZEQ, LDVT*NQ) * + WPSGESVD + MAX( WPSSVDCHK, WPSSVDCMP)), * where WPSLAGGE, WPSGESVD, WPSSVDCHK, WPSSVDCMP are amounts * of workspace required respectively by PSLAGGE, PSGESVD, * PSSVDCHK, PSSVDCMP. * Here * LDA = NUMROC( M, NB, MYROW, 0, NPROW ), LDU = LDA, * LDVT = NUMROC( SIZE, NB, MYROW, 0, NPROW ), * NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ), * SIZEQ = NUMROC( SIZE, NB, MYCOL, 0, NPCOL ). * Values of the variables WPSLAGGE, WPSGESVD, WPSSVDCHK, * WPSSVDCMP are found by "dummy" calls to * the respective routines. In every "dummy" call, variable * LWORK is set to -1, thus causing respective routine * immediately return required workspace in WORK(1) without * executing any calculations * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_, NTYPES PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9, NTYPES = 6 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. CHARACTER HETERO, JOBU, JOBVT INTEGER CONTEXT, DINFO, I, IA, IAM, INFO, ITYPE, IU, $ IVT, JA, JOBTYPE, JU, JVT, LDA, LDU, LDVT, $ LLWORK, LWMIN, MYCOL, MYROW, NNODES, NQ, PASS, $ PTRA, PTRAC, PTRD, PTRS, PTRSC, PTRU, PTRUC, $ PTRVT, PTRVTC, PTRWORK, SETHET, SIZE, SIZEQ, $ WPSGESVD, WPSLAGGE, WPSSVDCHK, WPSSVDCMP REAL CHK, DELTA, H, MTM, OVFL, RTOVFL, RTUNFL, ULP, $ UNFL * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ BLACS_SET, $ DESCINIT, SGAMN2D, SGAMX2D, SLABAD, SSCAL, $ IGAMN2D, IGAMX2D, IGEBR2D, IGEBS2D, PSELSET, $ PSGESVD, PSLACPY, PSLAGGE, PSLASET, PSSVDCHK, $ PSSVDCMP, PXERBLA, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER NUMROC REAL PSLAMCH EXTERNAL NUMROC, PSLAMCH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCU( DLEN_ ), $ DESCVT( DLEN_ ), ITMP( 2 ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*DTYPE_*LLD_*MB_*M_*NB_*N_*RSRC_.LT.0 ) $ RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * If this process is not a part of the contex, bail out now. * IF( ( MYROW.GE.NPROW ) .OR. ( MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL ) .OR. ( MYCOL.LT.0 ) )GO TO 110 CALL BLACS_SET( CONTEXT, 15, 1 ) INFO = 0 * * Check input parameters. * IF( M.LE.0 ) THEN INFO = -1 ELSE IF( N.LE.0 ) THEN INFO = -2 ELSE IF( NPROW.LE.0 ) THEN INFO = -3 ELSE IF( NPCOL.LE.0 ) THEN INFO = -4 ELSE IF( NB.LE.0 ) THEN INFO = -5 ELSE IF( THRESH.LE.0 ) THEN INFO = -7 END IF * SIZE = MIN( M, N ) * * Initialize matrix descriptors. * IA = 1 JA = 1 IU = 1 JU = 1 IVT = 1 JVT = 1 * LDA = NUMROC( M, NB, MYROW, 0, NPROW ) LDA = MAX( 1, LDA ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) LDU = LDA SIZEQ = NUMROC( SIZE, NB, MYCOL, 0, NPCOL ) LDVT = NUMROC( SIZE, NB, MYROW, 0, NPROW ) LDVT = MAX( 1, LDVT ) CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, CONTEXT, LDA, DINFO ) CALL DESCINIT( DESCU, M, SIZE, NB, NB, 0, 0, CONTEXT, LDU, DINFO ) CALL DESCINIT( DESCVT, SIZE, N, NB, NB, 0, 0, CONTEXT, LDVT, $ DINFO ) * * Set some pointers to work array in order to do "dummy" calls. * PTRA = 2 PTRAC = PTRA + LDA*NQ PTRD = PTRAC + LDA*NQ PTRS = PTRD + SIZE PTRSC = PTRS + SIZE PTRWORK = PTRSC + SIZE * PTRU = PTRWORK PTRVT = PTRWORK PTRUC = PTRWORK PTRVTC = PTRWORK * * "Dummy" calls -- return required workspace in work(1) without * any calculation. * CALL PSLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, DESCA, $ ISEED, SIZE, WORK( PTRWORK ), -1, DINFO ) WPSLAGGE = INT( WORK( PTRWORK ) ) * CALL PSGESVD( 'V', 'V', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRS ), WORK( PTRU ), IU, JU, DESCU, $ WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRWORK ), -1, DINFO ) WPSGESVD = INT( WORK( PTRWORK ) ) * CALL PSSVDCHK( M, N, WORK( PTRAC ), IA, JA, DESCA, WORK( PTRUC ), $ IU, JU, DESCU, WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRS ), THRESH, WORK( PTRWORK ), -1, $ RESULT, CHK, MTM ) WPSSVDCHK = INT( WORK( PTRWORK ) ) * CALL PSSVDCMP( M, N, 1, WORK( PTRS ), WORK( PTRSC ), WORK( PTRU ), $ WORK( PTRUC ), IU, JU, DESCU, WORK( PTRVT ), $ WORK( PTRVTC ), IVT, JVT, DESCVT, THRESH, $ RESULT, DELTA, WORK( PTRWORK ), -1 ) WPSSVDCMP = INT( WORK( PTRWORK ) ) * * Calculation of workspace at last. * LWMIN = 1 + 2*LDA*NQ + 3*SIZE + $ MAX( WPSLAGGE, LDU*SIZEQ+LDVT*NQ+MAX( LDU*SIZEQ, $ LDVT*NQ )+WPSGESVD+MAX( WPSSVDCHK, WPSSVDCMP ) ) WORK( 1 ) = LWMIN * * If this is a "dummy" call, return. * IF( LWORK.EQ.-1 ) $ GO TO 120 IF( INFO.EQ.0 ) THEN IF( LWORK.LT.LWMIN ) THEN INFO = -10 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSSVDTST', -INFO ) RETURN END IF * ULP = PSLAMCH( CONTEXT, 'P' ) UNFL = PSLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL SLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF * * Loop over matrix types. * DO 100 ITYPE = 1, NTYPES * PASS = 0 SETHET = 0 PTRWORK = PTRSC + SIZE LLWORK = LWORK - PTRWORK + 1 * * Compute A. * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix. * DO 10 I = 1, SIZE WORK( PTRD+I-1 ) = ZERO 10 CONTINUE * CALL PSLASET( 'All', M, N, ZERO, ZERO, WORK( PTRA ), $ IA, JA, DESCA ) * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix. * DO 20 I = 1, SIZE WORK( PTRD+I-1 ) = ONE 20 CONTINUE * CALL PSLASET( 'All', M, N, ZERO, ONE, WORK( PTRA ), $ IA, JA, DESCA ) * ELSE IF( ITYPE.GT.2 ) THEN * * Preset Singular Values. * IF( SIZE.NE.1 ) THEN H = ( ULP-1 ) / ( SIZE-1 ) DO 30 I = 1, SIZE WORK( PTRD+I-1 ) = 1 + H*( I-1 ) 30 CONTINUE ELSE WORK( PTRD ) = 1 END IF * IF( ITYPE.EQ.3 ) THEN * * Diagonal Matrix with specified singular values. * CALL PSLASET( 'All', M, N, ZERO, ZERO, WORK( PTRA ), $ IA, JA, DESCA ) * DO 40 I = 1, SIZE CALL PSELSET( WORK( PTRA ), I, I, DESCA, $ WORK( PTRD+I-1 ) ) 40 CONTINUE * ELSE IF( ITYPE.EQ.4 ) THEN * * General matrix with specified singular values. * CALL PSLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Singular values scaled by overflow. * CALL SSCAL( SIZE, RTOVFL, WORK( PTRD ), 1 ) * CALL PSLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * ELSE IF( ITYPE.EQ.6 ) THEN * * Singular values scaled by underflow. * CALL SSCAL( SIZE, RTUNFL, WORK( PTRD ), 1 ) CALL PSLAGGE( M, N, WORK( PTRD ), WORK( PTRA ), IA, JA, $ DESCA, ISEED, SIZE, WORK( PTRWORK ), $ LLWORK, INFO ) * END IF * END IF * * Set mapping between JOBTYPE and calling parameters of * PSGESVD, reset pointers to WORK array to save space. * DO 80 JOBTYPE = 1, 4 * IF( JOBTYPE.EQ.1 ) THEN JOBU = 'V' JOBVT = 'V' PTRVT = PTRU + LDU*SIZEQ PTRUC = PTRVT + LDVT*NQ PTRWORK = PTRUC + LDU*SIZEQ LLWORK = LWORK - PTRWORK + 1 ELSE IF( JOBTYPE.EQ.2 ) THEN JOBU = 'V' JOBVT = 'N' ELSE IF( JOBTYPE.EQ.3 ) THEN JOBU = 'N' JOBVT = 'V' PTRVTC = PTRUC PTRWORK = PTRVTC + LDVT*NQ LLWORK = LWORK - PTRWORK + 1 ELSE IF( JOBTYPE.EQ.4 ) THEN JOBU = 'N' JOBVT = 'N' PTRWORK = PTRUC LLWORK = LWORK - PTRWORK + 1 END IF * * Duplicate matrix A. * CALL PSLACPY( 'A', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRAC ), IA, JA, DESCA ) * * Test SVD calculation with minimum amount of workspace * calculated earlier. * IF( JOBTYPE.EQ.1 ) THEN * * Run SVD. CALL SLBOOT CALL BLACS_BARRIER( CONTEXT, 'All' ) CALL SLTIMER( 1 ) * CALL PSGESVD( JOBU, JOBVT, M, N, WORK( PTRAC ), IA, JA, $ DESCA, WORK( PTRS ), WORK( PTRU ), IU, JU, $ DESCU, WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRWORK ), WPSGESVD, INFO ) * CALL SLTIMER( 1 ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 1, 1, CTIME ) * * Check INFO. Different INFO for different processes mean * something went wrong. * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, $ 1, -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), $ 1, 1, 1, -1, -1, 0 ) * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' GO TO 120 END IF END IF * * If INFO is negative PXERBLA tells you. So the only thing * is to check for positive INFO -- detected heterogeneous * system. * IF( INFO.EQ.( SIZE+1 ) ) THEN HETERO = 'P' SETHET = 1 END IF * * If INFO was fine do more exhaustive check. * IF( INFO.EQ.ZERO ) THEN * DO 50 I = 1, SIZE WORK( I+PTRWORK ) = WORK( I+PTRS-1 ) WORK( I+SIZE+PTRWORK ) = WORK( I+PTRS-1 ) 50 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', SIZE, 1, $ WORK( 1+PTRWORK ), SIZE, 1, 1, -1, -1, $ 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', SIZE, 1, $ WORK( 1+SIZE+PTRWORK ), SIZE, 1, 1, -1, $ -1, 0 ) * DO 60 I = 1, SIZE IF( ABS( WORK( I+PTRWORK )-WORK( SIZE+I+ $ PTRWORK ) ).GT.ZERO ) THEN WRITE( NOUT, FMT = * )'I= ', I, ' MIN=', $ WORK( I+PTRWORK ), ' MAX=', $ WORK( SIZE+I+PTRWORK ) HETERO = 'T' SETHET = 1 GO TO 70 END IF * 60 CONTINUE 70 CONTINUE * END IF * IF( SETHET.NE.1 ) $ HETERO = 'N' * * After PSGESVD AC got screwed up -- need to copy again. * CALL PSLACPY( 'A', M, N, WORK( PTRA ), IA, JA, DESCA, $ WORK( PTRAC ), IA, JA, DESCA ) * * PSSVDCHK screws up U. So before the call to PSSVDCHK * U is copied to UC and a pointer to UC is passed to * PSSVDCHK. * CALL PSLACPY( 'A', M, SIZE, WORK( PTRU ), IU, JU, DESCU, $ WORK( PTRUC ), IU, JU, DESCU ) * * Run tests 1 - 4. * CALL PSSVDCHK( M, N, WORK( PTRAC ), IA, JA, DESCA, $ WORK( PTRUC ), IU, JU, DESCU, $ WORK( PTRVT ), IVT, JVT, DESCVT, $ WORK( PTRS ), THRESH, WORK( PTRWORK ), $ LLWORK, RESULT, CHK, MTM ) * ELSE * * Once again test PSGESVD with min workspace. * CALL PSGESVD( JOBU, JOBVT, M, N, WORK( PTRAC ), IA, JA, $ DESCA, WORK( PTRSC ), WORK( PTRUC ), IU, $ JU, DESCU, WORK( PTRVTC ), IVT, JVT, $ DESCVT, WORK( PTRWORK ), WPSGESVD, INFO ) * CALL PSSVDCMP( M, N, JOBTYPE, WORK( PTRS ), $ WORK( PTRSC ), WORK( PTRU ), $ WORK( PTRUC ), IU, JU, DESCU, $ WORK( PTRVT ), WORK( PTRVTC ), IVT, JVT, $ DESCVT, THRESH, RESULT, DELTA, $ WORK( PTRWORK ), LLWORK ) * END IF * 80 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN DO 90 I = 1, 9 IF( RESULT( I ).EQ.1 ) THEN PASS = 1 WRITE( NOUT, FMT = * )'Test I = ', I, 'has failed' WRITE( NOUT, FMT = * )' ' END IF 90 CONTINUE IF( PASS.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 )'Passed', WTIME( 1 ), $ CTIME( 1 ), M, N, NPROW, NPCOL, NB, ITYPE, CHK, MTM, $ DELTA, HETERO END IF END IF 100 CONTINUE CALL BLACS_GRIDEXIT( CONTEXT ) 110 CONTINUE * 9999 FORMAT( A6, 2E10.4, 2I6, 2I4, I5, I6, 3F6.2, 4X, A1 ) 120 CONTINUE * * End of PSSVDTST * RETURN END scalapack-1.8.0/TESTING/EIG/psgehdrv.f0000640000175000017500000001763210602576752017053 0ustar muammarmuammar SUBROUTINE PSGEHDRV( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEHDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from the * orthogonal matrix Q, the Hessenberg matrix, and the array TAU * returned by PSGEHRD: * sub( A ) := Q * H * Q' * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows and columns 1:ILO-1 and IHI+1:N. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) reduced to Hessenberg * form by PSGEHRD. The upper triangle and the first sub- * diagonal of sub( A ) contain the upper Hessenberg matrix H, * and the elements below the first subdiagonal, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors. On exit, the original distributed * N-by-N matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors returned by * PSGEHRD. TAU is tied to the distributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK). * LWORK >= NB*NB + NB*IHLP + MAX[ NB*( IHLP+INLQ ), * NB*( IHLQ + MAX[ IHIP, * IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, * NPCOL ), NB, 0, 0, LCMQ ) ] ) ] * * where NB = MB_A = NB_A, * LCM is the least common multiple of NPROW and NPCOL, * LCM = ILCM( NPROW, NPCOL ), LCMQ = LCM / NPCOL, * * IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * IHLP = NUMROC( IHI-ILO+IROFFA+1, NB, MYROW, ILROW, NPROW ), * IHLQ = NUMROC( IHI-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ), * INLQ = NUMROC( N-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ). * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IHLP, II, IOFF, IPT, $ IPV, IPW, IV, J, JB, JJ, JL, K, MYCOL, MYROW, $ NB, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSLARFB, $ PSLARFT, PSLACPY, PSLASET * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * NB = DESCA( MB_ ) IOFF = MOD( IA+ILO-2, NB ) CALL INFOG2L( IA+ILO-1, JA+ILO-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = IPT + NB * NB IPW = IPV + IHLP * NB JL = MAX( ( ( JA+IHI-2 ) / NB ) * NB + 1, JA + ILO - 1 ) CALL DESCSET( DESCV, IHI-ILO+IOFF+1, NB, NB, NB, IAROW, $ INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, IHLP ) ) * DO 10 J = JL, ILO+JA+NB-IOFF-1, -NB JB = MIN( JA+IHI-J-1, NB ) I = IA + J - JA K = I - IA + 1 IV = K - ILO + IOFF + 1 * * Compute upper triangular matrix T from TAU. * CALL PSLARFT( 'Forward', 'Columnwise', IHI-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'All', IHI-K, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', IHI-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-K, N-K+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Columnwise', $ IHI, IHI-K, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, IA, J+1, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the first block separately * IV = IOFF + 1 I = IA + ILO - 1 J = JA + ILO - 1 JB = MIN( NB-IOFF, JA+IHI-J-1 ) * * Compute upper triangular matrix T from TAU. * CALL PSLARFT( 'Forward', 'Columnwise', IHI-ILO, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'All', IHI-ILO, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * IF( IHI-ILO.GT.0 ) $ CALL PSLASET( 'Lower', IHI-ILO-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-ILO, N-ILO+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Columnwise', IHI, $ IHI-ILO, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, IA, J+1, DESCA, WORK( IPW ) ) * RETURN * * End of PSGEHDRV * END scalapack-1.8.0/TESTING/EIG/pclasizegsep.f0000640000175000017500000001324110363532303017676 0ustar muammarmuammar SUBROUTINE PCLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, RSIZETST, $ ISIZETST ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * PCLASIZEGSEP computes the amount of memory needed by * ======= * * PCLASIZEGSEP computes the amount of memory needed by * various GSEP test routines, as well as HEGVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PCHEGVX * * SIZEMQRLEFT LWORK for the 1st PCUNMQR call in PCLAGHE * * SIZEMQRRIGHT LWORK for the 2nd PCUNMQR call in PCLAGHE * * SIZEQRF LWORK for PCGEQRF in PCLAGHE * * SIZETMS LWORK for PCLATMS * * RSIZEQTQ LWORK for PCSEPQTQ (nexer complex) * * RSIZECHK LWORK for PCGSEPCHK * * SIZEHEEVX LWORK for PCHEGVX * * RSIZEHEEVX LRWORK for PCHEGVX * * ISIZEHEEVX LIWORK for PCHEGVX * * SIZESUBTST LWORK for PCSUBTST * * RSIZESUBTST LRWORK for PCSUBTST * * ISIZESUBTST LIWORK for PCSUBTST * * SIZETST LWORK for PCTST * * RSIZETST LRWORK for PCTST * * ISIZETST LIWORK for PCTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ANB, CSRC_A, IACOL, IAROW, ICOFFA, ICTXT, $ IROFFA, LCM, LCMQ, LDA, MQ0, MYCOL, MYROW, N, $ NB, NEIG, NHEGST_LWOPT, NHETRD_LWOPT, NN, NNP, $ NP, NP0, NPCOL, NPROW, NPS, NQ, NQ0, RSRC_A, $ SIZECHK, SIZEQTQ, SQNPC * .. * .. External Functions .. * INTEGER ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, SQRT * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = 0 RSIZEQTQ = 0 RSIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZEHEEVX = N + ( NP0+MQ0+NB )*NB RSIZEHEEVX = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZEHEEVX = 6*NNP * ICTXT = DESCA( CTXT_ ) ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS * NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB SIZEHEEVX = MAX( SIZEHEEVX, N+NHETRD_LWOPT, NHEGST_LWOPT ) * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZEHEEVX ) + $ IPREPAD + IPOSTPAD RSIZESUBTST = MAX( RSIZEHEEVX, RSIZEQTQ, RSIZECHK ) + IPREPAD + $ IPOSTPAD ISIZESUBTST = ISIZEHEEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Room for DIAG, WIN, WNEW, GAP and RWORK * RSIZETST = 4*( N+IPREPAD+IPOSTPAD ) + RSIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PCHEGVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/psbrdinfo.f0000640000175000017500000003217310363532303017200 0ustar muammarmuammar SUBROUTINE PSBRDINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBVAL, LDNVAL, LDPVAL, $ LDQVAL, NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBVAL( LDNBVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBRDINFO get needed startup information for the bidiagonal * reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M & N. * * MVAL (global output) INTEGER array, dimension (LDMVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M. LDMVAL >= NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDMVAL+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, IGEBR2D, IGEBS2D, $ SCOPY * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'BRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get values of M, N * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Bidiagonal reduction' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'real single precision bidiagonal ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q B P''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ ' than THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'BRD time : Time in seconds to reduce the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'the bidiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9999 ) ' ' WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 2*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9997 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G12.5 ) * * End of PSBRDINFO * END scalapack-1.8.0/TESTING/EIG/pssepreq.f0000640000175000017500000002223710363532303017054 0ustar muammarmuammar* * SUBROUTINE PSSEPREQ( HETERO, NIN, MEM, MEMSIZE, NOUT, ISEED, $ NTESTS, NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER HETERO INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL MEM( MEMSIZE ) * .. * * Purpose * ======= * * PSSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PSSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) REAL ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER REALSZ, INTGSZ PARAMETER ( REALSZ = 4, INTGSZ = 4 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IMIDPAD, INITCON, IPOSTPAD, $ IPREPAD, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ LDA, LLWORK, MATSIZE, MATTYPE, MYCOL, MYROW, N, $ NB, NMATSIZES, NMATTYPES, NNODES, NP, NPCOL, $ NPCONFIGS, NPROW, NQ, NUPLOS, ORDER, PCONFIG, $ PTRA, PTRCOPYA, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRW, PTRW2, PTRWORK, PTRZ, RES, $ SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZEQTQ, SIZESUBTST, SIZESYEV, SIZESYEVX, $ SIZETMS, SIZETST, UPLO, SIZESYEVD, ISIZESYEVD REAL ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PSLASIZESQP, PSSEPINFO, PSSEPTST * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PSSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * DO 40 MATSIZE = 1, NMATSIZES * DO 30 PCONFIG = 1, NPCONFIGS * DO 20 MATTYPE = 1, NMATTYPES * DO 10 UPLO = 1, NUPLOS * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, $ MYCOL ) * IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PSLASIZESQP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, $ ISIZESYEVX, SIZESYEV, $ SIZESYEVD, ISIZESYEVD, $ SIZESUBTST, ISIZESUBTST, $ SIZETST, ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + MAX( N, 1 ) + IPREPAD + IPOSTPAD PTRGAP = PTRW2 + MAX( N, 1 ) + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + NPROW*NPCOL + IPREPAD + $ IPOSTPAD PTRICLUS = PTRIFAIL + ICEIL( N+IPREPAD+IPOSTPAD, $ REALSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, REALSZ / INTGSZ ) PTRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, REALSZ / INTGSZ ) LLWORK = MEMSIZE - PTRWORK + 1 C LLWORK = MEMSIZE - PTRWORK - IPREPAD - C $ IPOSTPAD + 1 NTESTS = NTESTS + 1 IF( LLWORK.LT.SIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PSSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), SUBTESTS, $ THRESH, N, ABSTOL, ISEED, $ MEM( PTRA ), MEM( PTRCOPYA ), $ MEM( PTRZ ), LDA, MEM( PTRW ), $ MEM( PTRW2 ), MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ LLWORK, MEM( PTRIWRK ), $ ISIZETST, HETERO, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * )' PSSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE END IF * * RETURN * * End of PSSEPREQ * END scalapack-1.8.0/TESTING/EIG/pzget22.f0000640000175000017500000002251410363532303016505 0ustar muammarmuammar SUBROUTINE PZGET22( TRANSA, TRANSE, TRANSW, N, A, DESCA, E, DESCE, $ W, WORK, DESCW, RWORK, RESULT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER TRANSA, TRANSE, TRANSW INTEGER N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCE( * ), DESCW( * ) DOUBLE PRECISION RESULT( 2 ), RWORK( * ) COMPLEX*16 A( * ), E( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PZGET22 does an eigenvector check. * * The basic test is: * * RESULT(1) = | A E - E W | / ( |A| |E| ulp ) * * using the 1-norm. It also tests the normalization of E: * * RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp ) * j * * where E(j) is the j-th eigenvector, and m-norm is the max-norm of a * vector. The max-norm of a complex n-vector x in this case is the * maximum of |re(x(i)| + |im(x(i)| over i = 1, ..., n. * * Arguments * ========== * * TRANSA (input) CHARACTER*1 * Specifies whether or not A is transposed. * = 'N': No transpose * = 'T': Transpose * = 'C': Conjugate transpose * * TRANSE (input) CHARACTER*1 * Specifies whether or not E is transposed. * = 'N': No transpose, eigenvectors are in columns of E * = 'T': Transpose, eigenvectors are in rows of E * = 'C': Conjugate transpose, eigenvectors are in rows of E * * TRANSW (input) CHARACTER*1 * Specifies whether or not W is transposed. * = 'N': No transpose * = 'T': Transpose, same as TRANSW = 'N' * = 'C': Conjugate transpose, use -WI(j) instead of WI(j) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * A (input) COMPLEX*16 array, dimension (*) * The matrix whose eigenvectors are in E. * * DESCA (input) INTEGER array, dimension(*) * * E (input) COMPLEX*16 array, dimension (*) * The matrix of eigenvectors. If TRANSE = 'N', the eigenvectors * are stored in the columns of E, if TRANSE = 'T' or 'C', the * eigenvectors are stored in the rows of E. * * DESCE (input) INTEGER array, dimension(*) * * W (input) COMPLEX*16 array, dimension (N) * The eigenvalues of A. * * WORK (workspace) COMPLEX*16 array, dimension (*) * DESCW (input) INTEGER array, dimension(*) * * RWORK (workspace) DOUBLE PRECISION array, dimension (N) * * RESULT (output) DOUBLE PRECISION array, dimension (2) * RESULT(1) = | A E - E W | / ( |A| |E| ulp ) * RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp ) * j * Further Details * =============== * * Contributed by Mark Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER NORMA, NORME INTEGER ICOL, II, IROW, ITRNSE, ITRNSW, J, JCOL, JJ, $ JROW, JVEC, LDA, LDE, LDW, MB, MYCOL, MYROW, $ NB, NPCOL, NPROW, CONTXT, CA, CSRC, RA, RSRC DOUBLE PRECISION ANORM, ENORM, ENRMAX, ENRMIN, ERRNRM, TEMP1, $ ULP, UNFL COMPLEX*16 CDUM, WTEMP * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL LSAME, PDLAMCH, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMN2D, DGAMX2D, INFOG2L, $ PZAXPY, PZGEMM, PZLASET * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * * Initialize RESULT (in case N=0) * RESULT( 1 ) = ZERO RESULT( 2 ) = ZERO IF( N.LE.0 ) $ RETURN * CONTXT = DESCA( CTXT_ ) RSRC = DESCA( RSRC_ ) CSRC = DESCA( CSRC_ ) NB = DESCA( NB_ ) MB = DESCA( MB_ ) LDA = DESCA( LLD_ ) LDE = DESCE( LLD_ ) LDW = DESCW( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * UNFL = PDLAMCH( CONTXT, 'Safe minimum' ) ULP = PDLAMCH( CONTXT, 'Precision' ) * ITRNSE = 0 ITRNSW = 0 NORMA = 'O' NORME = 'O' * IF( LSAME( TRANSA, 'T' ) .OR. LSAME( TRANSA, 'C' ) ) THEN NORMA = 'I' END IF * IF( LSAME( TRANSE, 'T' ) ) THEN ITRNSE = 1 NORME = 'I' ELSE IF( LSAME( TRANSE, 'C' ) ) THEN ITRNSE = 2 NORME = 'I' END IF * IF( LSAME( TRANSW, 'C' ) ) THEN ITRNSW = 1 END IF * * Normalization of E: * ENRMIN = ONE / ULP ENRMAX = ZERO IF( ITRNSE.EQ.0 ) THEN DO 20 JVEC = 1, N TEMP1 = ZERO DO 10 J = 1, N CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN TEMP1 = MAX( TEMP1, CABS1( E( ( ICOL-1 )*LDE+ $ IROW ) ) ) END IF 10 CONTINUE IF( MYCOL.EQ.JJ ) THEN CALL DGAMX2D( CONTXT, 'Col', ' ', 1, 1, TEMP1, 1, RA, CA, $ -1, -1, -1 ) ENRMIN = MIN( ENRMIN, TEMP1 ) ENRMAX = MAX( ENRMAX, TEMP1 ) END IF 20 CONTINUE CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA, -1, $ -1, -1 ) CALL DGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA, -1, $ -1, -1 ) ELSE DO 40 J = 1, N TEMP1 = ZERO DO 30 JVEC = 1, N CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN TEMP1 = MAX( TEMP1, CABS1( E( ( ICOL-1 )*LDE+ $ IROW ) ) ) END IF 30 CONTINUE IF( MYROW.EQ.II ) THEN CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, TEMP1, 1, RA, CA, $ -1, -1, -1 ) ENRMIN = MIN( ENRMIN, TEMP1 ) ENRMAX = MAX( ENRMAX, TEMP1 ) END IF 40 CONTINUE CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA, -1, $ -1, -1 ) CALL DGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA, -1, $ -1, -1 ) END IF * * Norm of A: * ANORM = MAX( PZLANGE( NORMA, N, N, A, 1, 1, DESCA, RWORK ), UNFL ) * * Norm of E: * ENORM = MAX( PZLANGE( NORME, N, N, E, 1, 1, DESCE, RWORK ), ULP ) * * Norm of error: * * Error = AE - EW * CALL PZLASET( 'Full', N, N, CZERO, CZERO, WORK, 1, 1, DESCW ) * DO 60 JCOL = 1, N IF( ITRNSW.EQ.0 ) THEN WTEMP = W( JCOL ) ELSE WTEMP = DCONJG( W( JCOL ) ) END IF * IF( ITRNSE.EQ.0 ) THEN CALL PZAXPY( N, WTEMP, E, 1, JCOL, DESCE, 1, WORK, 1, JCOL, $ DESCW, 1 ) ELSE IF( ITRNSE.EQ.1 ) THEN CALL PZAXPY( N, WTEMP, E, JCOL, 1, DESCE, N, WORK, 1, JCOL, $ DESCW, 1 ) ELSE CALL PZAXPY( N, DCONJG( WTEMP ), E, JCOL, 1, DESCE, N, WORK, $ 1, JCOL, DESCW, 1 ) DO 50 JROW = 1, N CALL INFOG2L( JROW, JCOL, DESCW, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN WORK( ( JCOL-1 )*LDW+JROW ) $ = DCONJG( WORK( ( JCOL-1 )*LDW+JROW ) ) END IF 50 CONTINUE END IF 60 CONTINUE * CALL PZGEMM( TRANSA, TRANSE, N, N, N, CONE, A, 1, 1, DESCA, E, 1, $ 1, DESCE, -CONE, WORK, 1, 1, DESCW ) * ERRNRM = PZLANGE( 'One', N, N, WORK, 1, 1, DESCW, RWORK ) / ENORM * * Compute RESULT(1) (avoiding under/overflow) * IF( ANORM.GT.ERRNRM ) THEN RESULT( 1 ) = ( ERRNRM / ANORM ) / ULP ELSE IF( ANORM.LT.ONE ) THEN RESULT( 1 ) = ( MIN( ERRNRM, ANORM ) / ANORM ) / ULP ELSE RESULT( 1 ) = MIN( ERRNRM / ANORM, ONE ) / ULP END IF END IF * * Compute RESULT(2) : the normalization error in E. * RESULT( 2 ) = MAX( ABS( ENRMAX-ONE ), ABS( ENRMIN-ONE ) ) / $ ( DBLE( N )*ULP ) * RETURN * * End of PZGET22 * END scalapack-1.8.0/TESTING/EIG/xpjlaenv.f0000640000175000017500000003435610363532303017046 0ustar muammarmuammar INTEGER FUNCTION PJLAENV( ICTXT, ISPEC, NAME, OPTS, N1, $ N2, N3, N4 ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 2, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS INTEGER ICTXT, ISPEC, N1, N2, N3, N4 * .. * * xpjlaenv.f versus pjlaenv.f * =========================== * * xpjlaenv.f is used during testing to allow the timer/tester to * control pjlaenv's return values by setting common variables. * xpjlaenv.f guarantees that the return value is the same as the * corresponding value in common. xpjlaenv.f either reads values * from common and uses them as return values or it writes the * return value to common. Either way, xpjlaenv.f's return * value and the correpsonding value in common will always match. * * When the common variable "TIMING" is set, the other common * variables are set to the values returned by xpjlaenv.f, else * xpjlaenv.f returns the values as set in common. * * Purpose * * ======= * * PJLAENV is called from the ScaLAPACK symmetric and Hermitian * tailored eigen-routines to choose * problem-dependent parameters for the local environment. See ISPEC * for a description of the parameters. * * This version provides a set of parameters which should give good, * but not optimal, performance on many of the currently available * computers. Users are encouraged to modify this subroutine to set * the tuning parameters for their particular machine using the option * and problem size information in the arguments. * * This routine will not function correctly if it is converted to all * lower case. Converting it to all upper case is allowed. * * Arguments * ========= * * ISPEC (global input) INTEGER * Specifies the parameter to be returned as the value of * PJLAENV. * = 1: the data layout blocksize; * = 2: the panel blocking factor; * = 3: the algorithmic blocking factor; * = 4: execution path control; * = 5: maximum size for direct call to the LAPACK routine * * NAME (global input) CHARACTER*(*) * The name of the calling subroutine, in either upper case or * lower case. * * OPTS (global input) CHARACTER*(*) * The character options to the subroutine NAME, concatenated * into a single character string. For example, UPLO = 'U', * TRANS = 'T', and DIAG = 'N' for a triangular routine would * be specified as OPTS = 'UTN'. * * N1 (global input) INTEGER * N2 (global input) INTEGER * N3 (global input) INTEGER * N4 (global input) INTEGER * Problem dimensions for the subroutine NAME; these may not all * be required. * * At present, only N1 is used, and it (N1) is used only for * 'TTRD' * * (PJLAENV) (global or local output) INTEGER * >= 0: the value of the parameter specified by ISPEC * < 0: if PJLAENV = -k, the k-th argument had an illegal * value. * * Most parameters set via a call to PJLAENV must be identical * on all processors and hence PJLAENV will return the same * value to all procesors (i.e. global output). However some, * in particular, the panel blocking factor can be different * on each processor and hence PJLAENV can return different * values on different processors (i.e. local output). * * Further Details * =============== * * The following conventions have been used when calling PJLAENV from * the ScaLAPACK routines: * 1) OPTS is a concatenation of all of the character options to * subroutine NAME, in the same order that they appear in the * argument list for NAME, even if they are not used in determining * the value of the parameter specified by ISPEC. * 2) The problem dimensions N1, N2, N3, N4 are specified in the order * that they appear in the argument list for NAME. N1 is used * first, N2 second, and so on, and unused problem dimensions are * passed a value of -1. * 3) The parameter value returned by PJLAENV is checked for validity * in the calling subroutine. For example, PJLAENV is used to * retrieve the optimal blocksize for STRTRI as follows: * * NB = PJLAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) * IF( NB.LE.1 ) NB = MAX( 1, N ) * * PJLAENV is patterned after ILAENV and keeps the same interface in * anticipation of future needs, even though PJLAENV is only sparsely * used at present in ScaLAPACK. Most ScaLAPACK codes use the input * data layout blocking factor as the algorithmic blocking factor - * hence there is no need or opportunity to set the algorithmic or * data decomposition blocking factor. * * pXYYtevx.f and pXYYtgvx.f and pXYYttrd.f are the only codes which * call PJLAENV in this release. pXYYtevx.f and pXYYtgvx.f redistribute * the data to the best data layout for each transformation. pXYYttrd.f * uses a data layout blocking factor of 1 and a * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL CNAME, GLOBAL, SNAME, TIME CHARACTER C1 CHARACTER*2 C2, C4 CHARACTER*3 C3 CHARACTER*8 SUBNAM INTEGER I, IC, IDUMM, IZ, MSZ, NB * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR * .. * * * .. Scalars in Common .. INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE, $ LLTBLOCK, MINSZ, PNB, TIMING, TRSBLOCK, $ TWOGEMMS * .. * .. External Subroutines .. EXTERNAL IGAMX2D * .. * .. Common blocks .. COMMON / BLOCKSIZES / GSTBLOCK, LLTBLOCK, BCKBLOCK, $ TRSBLOCK COMMON / MINSIZE / MINSZ COMMON / PJLAENVTIMING / TIMING COMMON / TAILOREDOPTS / PNB, ANB, INTERLEAVE, $ BALANCED, TWOGEMMS * .. * .. Executable Statements .. * TIME = ( TIMING.EQ.1 ) * * GO TO ( 10, 10, 10, 10, 10 )ISPEC * * Invalid value for ISPEC * PJLAENV = -1 RETURN * 10 CONTINUE * * Convert NAME to upper case if the first character is lower case. * PJLAENV = 1 SUBNAM = NAME IC = ICHAR( SUBNAM( 1: 1 ) ) IZ = ICHAR( 'Z' ) IF( IZ.EQ.100 .OR. IZ.EQ.122 ) THEN * * ASCII character set * IF( IC.GE.97 .AND. IC.LE.122 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 20 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.97 .AND. IC.LE.122 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 20 CONTINUE END IF * ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN * * EBCDIC character set * IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN SUBNAM( 1: 1 ) = CHAR( IC+64 ) DO 30 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: $ I ) = CHAR( IC+64 ) 30 CONTINUE END IF * ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN * * Prime machines: ASCII+128 * IF( IC.GE.225 .AND. IC.LE.250 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 40 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.225 .AND. IC.LE.250 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 40 CONTINUE END IF END IF * C1 = SUBNAM( 2: 2 ) SNAME = C1.EQ.'S' .OR. C1.EQ.'D' CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' IF( .NOT.( CNAME .OR. SNAME ) ) $ RETURN C2 = SUBNAM( 3: 4 ) C3 = SUBNAM( 5: 7 ) C4 = C3( 2: 3 ) * * This is to keep ftnchek happy * IF( ( N2+N3+N4 )*0.NE.0 ) THEN C4 = OPTS C3 = C4 END IF * GO TO ( 50, 60, 70, 80, 90 )ISPEC * 50 CONTINUE * * ISPEC = 1: data layout block size * (global - all processes must use the same value) * * In these examples, separate code is provided for setting NB for * real and complex. We assume that NB will take the same value in * single or double precision. * NB = 1 * IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'LLT' ) THEN IF( SNAME ) THEN NB = 64 ELSE NB = 64 END IF IF( TIME ) THEN LLTBLOCK = NB ELSE NB = LLTBLOCK IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable LLTBLOCK', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF ELSE IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 1 ELSE NB = 1 END IF ELSE IF( C3.EQ.'GST' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF IF( TIME ) THEN GSTBLOCK = NB ELSE NB = GSTBLOCK IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable GSTBLOCK', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF ELSE IF( C3.EQ.'BCK' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF IF( TIME ) THEN BCKBLOCK = NB ELSE NB = BCKBLOCK IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable BCKBLOCK', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF ELSE IF( C3.EQ.'TRS' ) THEN IF( SNAME ) THEN NB = 64 ELSE NB = 64 END IF IF( TIME ) THEN TRSBLOCK = NB ELSE NB = TRSBLOCK IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable TRSBLOCK', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF END IF END IF * * PJLAENV = NB GLOBAL = .TRUE. GO TO 100 * 60 CONTINUE * * ISPEC = 2: panel blocking factor (Used only in PxyyTTRD) * (local - different processes may use different values) * NB = 16 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF END IF END IF IF( TIME ) THEN PNB = NB ELSE NB = PNB IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable PNB', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF PJLAENV = NB GLOBAL = .FALSE. GO TO 100 * * 70 CONTINUE * * ISPEC = 3: algorithmic blocking factor (Used only in PxyyTTRD) * (global - all processes must use the same value) * NB = 16 NB = 1 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 16 ELSE NB = 16 END IF END IF END IF IF( TIME ) THEN ANB = NB ELSE NB = ANB IF( NB.LE.0 ) THEN PRINT *, 'xpjlaenv.f ERROR common variable ANB', $ ' may be unitialized' c CALL EXIT( 13 ) STOP END IF END IF PJLAENV = NB GLOBAL = .TRUE. GO TO 100 * 80 CONTINUE * * ISPEC = 4: Execution path options (Used only in PxyyTTRD) * (global - all processes must use the same value) * PJLAENV = -4 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN * V and H interleaved (default is not interleaved) IF( N1.EQ.1 ) THEN PJLAENV = 1 IF( TIME ) THEN INTERLEAVE = PJLAENV ELSE PJLAENV = INTERLEAVE END IF END IF * * Two ZGEMMs (default is one ZGEMM) IF( N1.EQ.2 ) THEN PJLAENV = 0 IF( TIME ) THEN TWOGEMMS = PJLAENV ELSE PJLAENV = TWOGEMMS END IF END IF * Balanced Update (default is minimum communication update) IF( N1.EQ.3 ) THEN PJLAENV = 0 IF( TIME ) THEN BALANCED = PJLAENV ELSE PJLAENV = BALANCED END IF END IF END IF END IF GLOBAL = .TRUE. GO TO 100 * 90 CONTINUE * * ISPEC = 5: Minimum size to justify call to parallel code * (global - all processes must use the same value) * MSZ = 0 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN MSZ = 100 ELSE MSZ = 100 END IF END IF END IF IF( TIME ) THEN MINSZ = MSZ ELSE MSZ = MINSZ END IF PJLAENV = MSZ GLOBAL = .TRUE. GO TO 100 * 100 CONTINUE * IF( GLOBAL ) THEN CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, PJLAENV, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * * * RETURN * * End of PJLAENV * END scalapack-1.8.0/TESTING/EIG/pdhrddriver.f0000640000175000017500000004537010363532303017532 0ustar muammarmuammar PROGRAM PDHRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PDHRDDRIVER is the main test program for the DOUBLE PRECISION * ScaLAPACK HRD (Hessenberg Reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 14 lines: * 'ScaLAPACK HRD input file' * 'PVM machine' * 'HRD.out' output file name (if any) * 6 device out * 2 number of problems sizes * 100 101 values of N * 2 1 values of ILO * 99 101 values of IHI * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 3.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, MEMSIZ, NTESTS, TOTMEM DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IHI, IHIP, IHLP, IHLQ, $ ILCOL, ILO, ILROW, INFO, INLQ, IMIDPAD, IPA, $ IPT, IPW, IPOSTPAD, IPREPAD, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LCMQ, LOFF, $ LWORK, MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, $ NPROCS, NOUT, NP, NPCOL, NPROW, NQ, WORKHRD, $ WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), NVHI( NTESTS ), NVLO( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINIT, BLACS_GRIDINFO, $ DESCINIT, IGSUM2D, BLACS_PINFO, PDFILLPAD, $ PDLAFCHK, PDGEHDRV, PDGEHRD, $ PDHRDINFO, PDMATGEN, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC DOUBLE PRECISION PDLANGE EXTERNAL ILCM, INDXG2P, NUMROC, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDHRDINFO( OUTFILE, NOUT, NMAT, NVAL, NVLO, NVHI, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GOTO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) ILO = NVLO( J ) IHI = NVHI( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * DO 10 K = 1, NNB NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP ) + IMIDPAD, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPT = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * IHIP = NUMROC( IHI, NB, MYROW, DESCA( RSRC_ ), NPROW ) LOFF = MOD( ILO-1, NB ) ILROW = INDXG2P( ILO, NB, MYROW, DESCA( RSRC_ ), NPROW ) ILCOL = INDXG2P( ILO, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) IHLP = NUMROC( IHI-ILO+LOFF+1, NB, MYROW, ILROW, NPROW ) INLQ = NUMROC( N-ILO+LOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWORK = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) WORKHRD = LWORK + IPOSTPAD WORKSIZ = WORKHRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IHLQ = NUMROC( IHI-ILO+LOFF+1, NB, MYCOL, ILCOL, $ NPCOL ) ITEMP = NB*MAX( IHLP+INLQ, IHLQ+MAX( IHIP, $ IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ) ) ) WORKSIZ = MAX( NB*NB + NB*IHLP + ITEMP, NB * NP ) + $ IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Hessenberg reduction', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate A * CALL PDMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), $ IASEED, 0, NP, 0, NQ, MYROW, MYCOL, $ NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), $ NQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANGE', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKHRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKHRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce Hessenberg form * CALL PDGEHRD( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ), LWORK, INFO ) CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGEHRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEHRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEHRD', WORKHRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKHRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - Q H Q'|| / (||A||*N*eps) * CALL PDGEHDRV( N, ILO, IHI, MEM( IPA ), 1, 1, DESCA, $ MEM( IPT ), MEM( IPW ) ) CALL PDLAFCHK( 'No', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDGEHDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEHDRV', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDGEHDRV', $ WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0D+0 ) $ THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather max. of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * HRD requires 10/3 * N^3 floating point ops. (flops) * more precisely, * HRD requires 4/3*(IHI-ILO)^3 + 2*IHI*(IHI-ILO)^2 flops * NOPS = DBLE( IHI-ILO ) NOPS = NOPS * NOPS * $ ( 2.0D0*DBLE( IHI ) + (4.0D0/3.0D0)*NOPS ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', N, ILO, IHI, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, $ PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', N, ILO, IHI, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, $ PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N ILO IHI NB P Q HRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, $ F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*H*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PDHRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pdrptseptst.f0000640000175000017500000000526110363532303017604 0ustar muammarmuammar* * PROGRAM PDRPTSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Repeat parallel symmetric eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER HETERO, SUBTESTS, UPLO INTEGER CONTEXT, IAM, INFO, IPOSTPAD, IPREPAD, LDA, $ MATTYPE, N, NB, NPCOL, NPROCS, NPROW DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) DOUBLE PRECISION A( MAXN*MAXN ), COPYA( MAXN*MAXN ), $ GAP( MAXN ), WIN( MAXN ), WNEW( MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PDSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * Set HETERO to 'Y' if you want to turn off the PxSYEV tests * HETERO = 'N' * * These lines should be replaced by the output from pxSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351D-37 THRESH = .350000D+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PDSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, N, $ ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, WORK, $ LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, HETERO, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PDRPTSEPTST * END scalapack-1.8.0/TESTING/EIG/pzsepchk.f0000640000175000017500000002427410363532303017044 0ustar muammarmuammar* * SUBROUTINE PZSEPCHK( MS, NV, A, IA, JA, DESCA, EPSNORMA, THRESH, $ Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, W, WORK, $ LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, IQ, JA, JC, JQ, LWORK, MS, NV, RESULT DOUBLE PRECISION EPSNORMA, THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCC( * ), DESCQ( * ) DOUBLE PRECISION W( * ), WORK( * ) COMPLEX*16 A( * ), C( * ), Q( * ) * .. * * Purpose * ======= * * Compute |AQ- QL| / (EPSNORMA * N) * where EPSNORMA = (abstol + eps)*norm(A) when called by pdsqpsubtst. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, C and Q * MQ = number of local columns in A * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, C and Q * Also, the number of global columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) COMPLEX*16 pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the MS-by-MS * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * EPSNORMA (input) DOUBLE PRECISION * abstol + eps * inf.norm(A) * Abstol is absolute tolerence for the eigenvalues and is set * in the calling routines, pdsepsubtst and pdsqpsubtst. * * THRESH (input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) COMPLEX*16 array * global dimension (MS, NV), local dimension (DESCA(DLEN_), NQ) * * Contains the eigenvectors as computed by PZHEEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) COMPLEX*16 array, * global dimension (NV, NV), local dimension (DESCA(DLEN_), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (global input) DOUBLE PRECISION array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) DOUBLE PRECISION array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / ( EPSNROMA * MS ) * * RESULT (global output) INTEGER * 0 if the test passes i.e. * |AQ -QL| / (abstol + eps * norm(A) ) <= n* THRESH * 1 if the test fails i.e. * |AQ -QL| / (abstol + eps * norm(A) ) > n * THRESH * * .. Local Scalars .. * INTEGER INFO, J, LOCALCOL, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ, PCOL DOUBLE PRECISION NORM * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, NEGONE PARAMETER ( ONE = 1.0D+0, NEGONE = -1.0D+0 ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC DOUBLE PRECISION PZLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, PZGEMM, $ ZDSCAL, ZLACPY * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 12, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IC, JC, DESCC, 16, INFO ) * IF( INFO.EQ.0 ) THEN * MP = NUMROC( MS, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -10 ELSE IF( JQ.NE.1 ) THEN INFO = -11 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( IC.NE.1 ) THEN INFO = -14 ELSE IF( JC.NE.1 ) THEN INFO = -15 ELSE IF( LWORK.LT.NQ ) THEN INFO = -19 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZSEPCHK', -INFO ) RETURN END IF * * C = Q * W * CALL ZLACPY( 'A', MP, NQ, Q, DESCQ( LLD_ ), C, DESCC( LLD_ ) ) * * DO 10 J = 1, NV PCOL = INDXG2P( J, DESCC( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( J, DESCC( NB_ ), 0, 0, NPCOL ) * IF( MYCOL.EQ.PCOL ) THEN CALL ZDSCAL( MP, W( J ), C( ( LOCALCOL-1 )*DESCC( LLD_ )+ $ 1 ), 1 ) END IF 10 CONTINUE * * * C = C - A * Q * CALL PZGEMM( 'N', 'N', MS, NV, MS, NEGONE, A, 1, 1, DESCA, Q, 1, $ 1, DESCQ, ONE, C, 1, 1, DESCC ) * * Compute the norm of C * * NORM = PZLANGE( 'M', MS, NV, C, 1, 1, DESCC, WORK ) * * TSTNRM = NORM / EPSNORMA / MAX( MS, 1 ) * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0D0 ) ) THEN RESULT = 1 END IF * * RETURN * * End of PZSEPCHK * END scalapack-1.8.0/TESTING/EIG/pssepqtq.f0000640000175000017500000002521110363532303017065 0ustar muammarmuammar* * SUBROUTINE PSSEPQTQ( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, $ QTQNRM, INFO, RES ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES REAL QTQNRM, THRESH * .. * .. Array Arguments .. * INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ), $ PROCDIST( * ) REAL C( * ), GAP( * ), Q( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * Compute |I - QT * Q| / (ulp * n) * * Arguments * ========= * * NP = number of local rows in C * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in Q * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array, * global dimension (MS, NV), local dimension (LDQ, NQ) * * Contains the eigenvectors as computed by PSSTEIN * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ) * * Accumulator for computing I - QT * Q * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (input) REAL array, dimension (NV) * All procesors have an identical copy of W() * * Contains the computed eigenvalues * * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1) * Identifies which eigenvectors are the last to be computed * by a given process * * ICLUSTR (global input) INTEGER array dimension (2*P) * This input array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace. * This should be the output of PSSTEIN. * * GAP (global input) REAL array, dimension (P) * This input array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * Where: * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * * QTQNRM (global output) REAL * |QTQ -I| / EPS * * RES (global output) INTEGER * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, NEGONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, $ NEGONE = -1.0E+0 ) * .. * .. Intrinsic Functions .. * INTRINSIC MAX, REAL * .. * .. Local Scalars .. INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN, $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW REAL NORM, QTQNRM2, ULP * .. * .. External Functions .. INTEGER NUMROC REAL PSLAMCH, PSLANGE EXTERNAL NUMROC, PSLAMCH, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PSGEMM, PSLASET, $ PSMATADD, PXERBLA * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * RES = 0 ULP = PSLAMCH( DESCC( CTXT_ ), 'P' ) * CALL BLACS_GRIDINFO( DESCC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IQ, JQ, DESCQ, 7, INFO ) CALL CHK1MAT( NV, 1, MS, 2, IC, JC, DESCC, 11, INFO ) * IF( INFO.EQ.0 ) THEN NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * LWMIN = 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * IF( IQ.NE.1 ) THEN INFO = -5 ELSE IF( JQ.NE.1 ) THEN INFO = -6 ELSE IF( IC.NE.1 ) THEN INFO = -9 ELSE IF( JC.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -16 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCC( CTXT_ ), 'PSSEPQTQ', -INFO ) RETURN END IF * * C = Identity matrix * CALL PSLASET( 'A', NV, NV, ZERO, ONE, C, IC, JC, DESCC ) * * C = C - QT * Q * IF( NV*MS.GT.0 ) THEN CALL PSGEMM( 'Transpose', 'N', NV, NV, MS, NEGONE, Q, 1, 1, $ DESCQ, Q, 1, 1, DESCQ, ONE, C, 1, 1, DESCC ) END IF * * Allow for poorly orthogonalized eigenvectors for large clusters * NORM = PSLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) QTQNRM = NORM / ( REAL( MAX( MS, 1 ) )*ULP ) * CLUSTER = 1 10 CONTINUE DO 20 FIRSTP = 1, NPROW*NPCOL IF( PROCDIST( FIRSTP ).GE.ICLUSTR( 2*( CLUSTER-1 )+1 ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE * IMIN = ICLUSTR( 2*CLUSTER-1 ) JMAX = ICLUSTR( 2*CLUSTER ) * * IF( IMIN.EQ.0 ) $ GO TO 60 * DO 40 NEXTP = FIRSTP, NPROW*NPCOL IMAX = PROCDIST( NEXTP ) JMIN = IMAX + 1 * * CALL PSMATADD( IMAX-IMIN+1, JMAX-JMIN+1, ZERO, C, IMIN, JMIN, $ DESCC, GAP( CLUSTER ) / 0.01E+0, C, IMIN, JMIN, $ DESCC ) CALL PSMATADD( JMAX-JMIN+1, IMAX-IMIN+1, ZERO, C, JMIN, IMIN, $ DESCC, GAP( CLUSTER ) / 0.01E+0, C, JMIN, IMIN, $ DESCC ) IMIN = IMAX * IF( ICLUSTR( 2*CLUSTER ).LT.PROCDIST( NEXTP+1 ) ) $ GO TO 50 40 CONTINUE 50 CONTINUE * CLUSTER = CLUSTER + 1 GO TO 10 60 CONTINUE * * Compute the norm of C * NORM = PSLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) * QTQNRM2 = NORM / ( REAL( MAX( MS, 1 ) )*ULP ) * IF( QTQNRM2.GT.THRESH ) THEN RES = 1 QTQNRM = QTQNRM2 END IF RETURN * * End of PSSEPQTQ * END scalapack-1.8.0/TESTING/EIG/pslagsy.f0000640000175000017500000002526010363532303016673 0ustar muammarmuammar* * SUBROUTINE PSLAGSY( N, K, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL A( * ), D( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * PSLAGSY generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. * * This is just a quick implementation which will be replaced in the * future. The random orthogonal matrix is computed by creating a * random matrix and running QR on it. This requires vastly more * computation than necessary, but not significantly more communication * than is used in the rest of this rouinte, and hence is not that much * slower than an efficient solution. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix A. N >= 0. * * K (global input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * ### K must be 0 or N-1, 0 < K < N-1 is not supported yet. * * D (global input) REAL array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) REAL array * Global dimension (N, N), local dimension (NP, NQ) * The generated n by n symmetric matrix A (the full matrix is * stored). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated and will remain identical on * all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PSLASIZESEP * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, II, IIROW, $ INDAA, INDTAU, INDWORK, IPOSTPAD, IPREPAD, $ IROFFA, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ JJCOL, LDAA, LII, LIII, LJJ, LJJJ, LWMIN, MAXI, $ MB_A, MYCOL, MYROW, NB_A, NP, NPCOL, NPROW, NQ, $ RSRC_A, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PSGEQRF, PSLASIZESEP, $ PSMATGEN, PSORMQR, PXERBLA, SLASET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. * INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Initialize grid information * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) END IF * LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) NP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) IPREPAD = 0 IPOSTPAD = 0 CALL PSLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) LWMIN = SIZETMS * * Test the input arguments * IF( INFO.EQ.0 ) THEN IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( N.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSLAGSY', -INFO ) RETURN END IF * INDAA = 1 INDTAU = INDAA + LDAA*MAX( 1, NQ ) INDWORK = INDTAU + MAX( 1, NQ ) * IF( K.NE.0 ) THEN CALL SLASET( 'A', LDAA, NQ, ZERO, ZERO, WORK( INDAA ), LDAA ) * * * Build a random matrix * * CALL PSMATGEN( DESCA( CTXT_ ), 'N', 'N', N, ORDER, $ DESCA( MB_ ), DESCA( NB_ ), WORK( INDAA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ ISEED( 1 ), 0, NP, 0, NQ, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PSGEQRF( N, ORDER, WORK( INDAA ), IA, JA, DESCA, $ WORK( INDTAU ), WORK( INDWORK ), SIZEQRF, INFO ) * END IF * * Build a diagonal matrix A with the eigenvalues specified in D * CALL SLASET( 'A', NP, NQ, ZERO, ZERO, A, DESCA( LLD_ ) ) * IIROW = 0 JJCOL = 0 LII = 1 LJJ = 1 * DO 20 II = 1, N, DESCA( MB_ ) MAXI = MIN( N, II+DESCA( MB_ )-1 ) IF( ( MYROW.EQ.IIROW ) .AND. ( MYCOL.EQ.JJCOL ) ) THEN LIII = LII LJJJ = LJJ DO 10 I = II, MAXI A( LIII+( LJJJ-1 )*DESCA( LLD_ ) ) = D( I ) LIII = LIII + 1 LJJJ = LJJJ + 1 10 CONTINUE END IF IF( MYROW.EQ.IIROW ) $ LII = LII + DESCA( MB_ ) IF( MYCOL.EQ.JJCOL ) $ LJJ = LJJ + DESCA( MB_ ) IIROW = MOD( IIROW+1, NPROW ) JJCOL = MOD( JJCOL+1, NPCOL ) 20 CONTINUE * * A = Q * A * IF( K.NE.0 ) THEN * CALL PSORMQR( 'L', 'Transpose', N, N, ORDER, WORK( INDAA ), IA, $ JA, DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRLEFT, INFO ) * * * A = A * Q' * * CALL PSORMQR( 'R', 'N', N, N, ORDER, WORK( INDAA ), IA, JA, $ DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRRIGHT, INFO ) * END IF * * End of PSLAGSY * END scalapack-1.8.0/TESTING/EIG/pslafchk.f0000640000175000017500000002220010363532303016773 0ustar muammarmuammar SUBROUTINE PSLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N REAL ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) REAL * The Infinity norm of sub( A ). * * FRESID (global output) REAL * The maximum (worst) factorizational error. * * WORK (local workspace) REAL array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, IROFF, $ JB, JJ, JJA, JN, LDA, LDW, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ REAL EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSMATGEN, SMATADD * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL SMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 10 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 ) * LDA CALL PSMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1, $ MP, JJ-1, JB, MYROW, MYCOL, NPROW, NPCOL ) CALL SMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA ) 10 CONTINUE * * Calculate factor residual * FRESID = PSLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PSLAFCHK * END scalapack-1.8.0/TESTING/EIG/pctrddriver.f0000640000175000017500000004717410363532303017551 0ustar muammarmuammar PROGRAM PCTRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * Purpose * ======== * * PCTRDDRIVER is the main test program for the COMPLEX * SCALAPACK TRD (symmetric tridiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK TRD computation input file' * 'PVM machine' * 'TRD.out' output file name * 6 device out * 'L' define Lower or Upper * 3 number of problems sizes * 5 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * CPLXSZ INTEGER, default = 8 bytes. * INTGSZ and CPLXSZ indicate the length in bytes on the * given platform for an integer and a single precision * complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, REALSZ, TOTMEM, MEMSIZ, NTESTS COMPLEX PADVAL PARAMETER ( CPLXSZ = 8, REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LWORK, MYCOL, $ MYROW, N, NB, NDIAG, NGRIDS, NMAT, NNB, NOFFD, $ NOUT, NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ, $ WORKTRD REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) COMPLEX MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCHETDRV, PCHETRD, PCLAFCHK, $ PCMATGEN, PCTRDINFO, PCTTRDTESTER, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC REAL PCLANHE EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PCLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )STOP * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCTRDINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF NDIAG = ICEIL( REALSZ*NDIAG, CPLXSZ ) NOFFD = ICEIL( REALSZ*NOFFD, CPLXSZ ) * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = MAX( NB*( NP+1 ), 3*NB ) WORKTRD = LWORK + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ICEIL( REALSZ*ITEMP, CPLXSZ ), $ 2*( NB+NP )*NB ) WORKSIZ = MAX( LWORK, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PCMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PCHETRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCHETRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETRD', WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PCHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) CALL PCLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCHETDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0E+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) * NOPS = ( 4.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', UPLO, N, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', UPLO, N, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * CALL PCTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, NMAT, $ MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB P Q TRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ---- ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, A4, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PCTRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pslagge.f0000640000175000017500000003076310363532303016637 0ustar muammarmuammar SUBROUTINE PSLAGGE( M, N, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL A( * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAGGE generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal * matrices: A = U*D*VT. * * This is just a quick implementation which will be replaced in the * future. The random matrix A1(m,n) is generated and random left * orthogonal matrix U(m,m) is obtained by running QR on A1: * A1(m,n) = U(m,m)*R, * where U(m,m) is a product of min(m,n) Householder rotations. * Afterwards the space of A1 is reused for a second random matrix * A2(m,n), which is used to obtain the right orthogonal matrix VT(n,n) * by running LQ on A2: * A2(m,n) = L*VT(n,n). * This requires vastly more computation than necessary, but not * significantly more communication than is used in the rest of this * routine, and hence is not that much slower than an efficient * solution. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * Number of rows of the matrix A. M >= 0. * * N (global input) INTEGER * Number of columns of matrix A. N >= 0. * * D (local input) REAL array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) REAL array * Global dimension (M, N), local dimension (MP, NQ) * * IA (global input) INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. On exit, the seed is updated and will remain identical * on all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= MAX( QR_WORK, LQ_WORK ) * QR_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU1 ) + * MAX( SIZEMQRLEFT, SIZEQRF) * LQ_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU2) + * MAX( SIZEMLQRIGHT, SIZEQRF ) * Where: * LDAA = DESCA( LLD_ ) * MB_A = DESCA( MB_ ) * NB_A = DESCA( NB_ ) * RSRC_A = DESCA( RSRC_ ) * CSRC_A = DESCA( CSRC_ ) * LCM = ILCM( NPROW, NPCOL ) * LCMQ = LCM / NPCOL * IROFFA = MOD( IA-1, MB_A ) * ICOFFA = MOD( JA-1, NB_A ) * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * MP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) * NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) * DTAU1 = NUMROC( JA + SIZE- 1, NB_A, MYCOL, IACOL, NPROW ) * DTAU2 = NUMROC( IA + SIZE- 1, MB_A, MYROW, IAROW, NPROW ) * SIZEMQRLEFT = MAX( (MB_A*(MB_A-1))/2, ( MP + NQ ) * MB_A ) * + ( MP + NB_A ) * NB_A * SIZEMLQRIGHT = MAX( (MB_A*(MB_A-1))/2, (MP + NQ)*MB_A ) + * MB_A * MB_A * SIZEQRF = NB_A*NP + MB_A*NQ + NB_A*NB_A * * INFO (local output) INTEGER * * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, DTAU1, DTAU2, I, IACOL, IAROW, ICOFFA, $ IROFFA, LCM, LCMQ, LDAA, LQ_WORK, LWMIN, MB_A, $ MP, MYCOL, MYROW, NB_A, NPCOL, NPROW, NQ, $ PTR2AA, PTR2TAU, PTR2WORK, QR_WORK, RSRC_A, $ SIZE, SIZELQF, SIZEMLQRIGHT, SIZEMQRLEFT, $ SIZEQRF * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PSELSET, PSGELQF, $ PSGEQRF, PSLASET, PSMATGEN, PSORMLQ, PSORMQR, $ PXERBLA * .. * .. External Functions .. INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*DLEN_*DTYPE_*M_*N_.LT.0 )RETURN * * Initialize grid information. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK. * INFO = 0 SIZE = MIN( M, N ) IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 8, INFO ) END IF * Calculation of a minimum workspace. LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) DTAU1 = NUMROC( JA+SIZE-1, NB_A, MYCOL, IACOL, NPCOL ) DTAU2 = NUMROC( IA+SIZE-1, MB_A, MYROW, IAROW, NPROW ) MP = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) * SIZEMQRLEFT = MAX( ( MB_A*( MB_A-1 ) ) / 2, ( MP+NQ )*MB_A ) + $ ( MP+NB_A )*NB_A SIZEMLQRIGHT = MAX( ( MB_A*( MB_A-1 ) ) / 2, ( MP+NQ )*MB_A ) + $ MB_A*MB_A SIZEQRF = NB_A*MP + MB_A*NQ + NB_A*NB_A + 100 SIZELQF = NB_A*( MP+NQ+NB_A ) + 100 * QR_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU1 ) + $ MAX( SIZEMQRLEFT, SIZEQRF ) LQ_WORK = LDAA*MAX( 1, NQ ) + 200 + MAX( 1, DTAU2 ) + $ MAX( SIZEMLQRIGHT, SIZELQF ) LWMIN = MAX( QR_WORK, LQ_WORK ) WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 20 * * Test the input arguments. * IF( INFO.EQ.0 ) THEN IF( SIZE.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSLAGGE', -INFO ) RETURN END IF * * Build a diagonal matrix A with the eigenvalues specified in D. * CALL PSLASET( 'Full', M, N, ZERO, ZERO, A, IA, JA, DESCA ) DO 10 I = 1, SIZE CALL PSELSET( A, I, I, DESCA, D( I ) ) 10 CONTINUE * * Local dimension of array TAU in tis case is LOCc(JA+MIN(M,N)-1). * PTR2AA = 2 PTR2TAU = PTR2AA + LDAA*MAX( 1, NQ ) + 100 PTR2WORK = PTR2TAU + MAX( 1, DTAU1 ) + 100 * CALL PSLASET( 'All', M, N, ZERO, ZERO, WORK( PTR2AA ), IA, JA, $ DESCA ) * * Build a random matrix AA1. * CALL PSMATGEN( DESCA( CTXT_ ), 'N', 'N', M, N, DESCA( MB_ ), $ DESCA( NB_ ), WORK( PTR2AA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), 0, MP, $ 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Produce QR decomposition AA1 -> U*R. * CALL PSGEQRF( M, N, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), WORK( PTR2WORK ), SIZEQRF, INFO ) * * A = U*A. * CALL PSORMQR( 'L', 'N', M, N, SIZE, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), A, IA, JA, DESCA, WORK( PTR2WORK ), $ SIZEMQRLEFT, INFO ) * * Reinitialize pointer to WORK array. Dimension of array TAU in * this case is LOCr(IA+MIN(M,N)-1). * PTR2WORK = PTR2TAU + MAX( 1, DTAU2 ) + 100 * * Use the same workspace to generate a random matrix AA2. * CALL PSMATGEN( DESCA( CTXT_ ), 'N', 'N', M, N, DESCA( MB_ ), $ DESCA( NB_ ), WORK( PTR2AA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 2 ), 0, MP, $ 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) * * Produce LQ decomposition of random matrix AA2 -> L*VT. * CALL PSGELQF( M, N, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), WORK( PTR2WORK ), SIZELQF, INFO ) * * Calculate A = A*VT. * CALL PSORMLQ( 'R', 'N', M, N, SIZE, WORK( PTR2AA ), IA, JA, DESCA, $ WORK( PTR2TAU ), A, IA, JA, DESCA, WORK( PTR2WORK ), $ SIZEMLQRIGHT, INFO ) * * End of PSLAGGE * 20 CONTINUE RETURN END scalapack-1.8.0/TESTING/EIG/pdtrddriver.f0000640000175000017500000004664610363532303017555 0ustar muammarmuammar PROGRAM PDTRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * Purpose * ======== * * PDTRDDRIVER is the main test program for the DOUBLE PRECISION * SCALAPACK TRD (symmetric tridiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK TRD computation input file' * 'PVM machine' * 'TRD.out' output file name * 6 device out * 'L' define Lower or Upper * 3 number of problems sizes * 5 31 201 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * DBLESZ INTEGER, default = 8 bytes. * INTGSZ and DBLESZ indicate the length in bytes on the * given platform for an integer and a double precision real. * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ, TOTMEM, MEMSIZ, NTESTS DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER UPLO CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LCM, LWORK, MYCOL, $ MYROW, N, NB, NDIAG, NGRIDS, NMAT, NNB, NOFFD, $ NOUT, NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ, $ WORKTRD REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDLAFCHK, PDMATGEN, PDSYTDRV, $ PDSYTRD, PDTRDINFO, PDTTRDTESTER, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC DOUBLE PRECISION PDLANSY EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PDLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )STOP * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PDTRDINFO( OUTFILE, NOUT, UPLO, NMAT, NVAL, NTESTS, NNB, $ NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = MAX( NB*( NP+1 ), 3*NB ) WORKTRD = LWORK + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ITEMP, 2*( NB+NP )*NB ) WORKSIZ = MAX( LWORK, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PDMATGEN( ICTXT, 'Symm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PDSYTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDSYTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTRD', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTRD', WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PDSYTDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) CALL PDLAFCHK( 'Symm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDSYTDRV', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0D+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 4/3 N^3 floating point operations * NOPS = DBLE( N ) * NOPS = ( 4.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', UPLO, N, NB, $ NPROW, NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', UPLO, N, NB, $ NPROW, NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * CALL PDTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, NMAT, $ MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME UPLO N NB P Q TRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ---- ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, A4, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PDTRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pmatgeninc.f0000640000175000017500000002003110363532303017325 0ustar muammarmuammar* ===================================================================== * SUBROUTINE LADD * ===================================================================== * SUBROUTINE LADD( J, K, I ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER I(2), J(2), K(2) * .. * * ===================================================================== * * .. Parameters .. INTEGER IPOW16, IPOW15 PARAMETER ( IPOW16=2**16, IPOW15=2**15 ) * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * I(1) = MOD( K(1)+J(1), IPOW16 ) I(2) = MOD( (K(1)+J(1)) / IPOW16+K(2)+J(2), IPOW15 ) * RETURN * * End of LADD * END * * ===================================================================== * SUBROUTINE LMUL * ===================================================================== * SUBROUTINE LMUL( K, J, I ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER I(2), J(2), K(2) * .. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16, IPOW30 PARAMETER ( IPOW15=2**15, IPOW16=2**16, IPOW30=2**30 ) * .. * .. Local Scalars .. INTEGER KT, LT * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * KT = K(1)*J(1) IF( KT.LT.0 ) KT = (KT+IPOW30) + IPOW30 I(1) = MOD(KT,IPOW16) LT = K(1)*J(2) + K(2)*J(1) IF( LT.LT.0 ) LT = (LT+IPOW30) + IPOW30 KT = KT/IPOW16 + LT IF( KT.LT.0 ) KT = (KT+IPOW30) + IPOW30 I(2) = MOD( KT, IPOW15 ) * RETURN * * End of LMUL * END * * ===================================================================== * SUBROUTINE XJUMPM * ===================================================================== * SUBROUTINE XJUMPM( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER JUMPM * .. * .. Array Arguments .. INTEGER IADD(2), IAM(2), ICM(2), IRANM(2), IRANN(2) INTEGER MULT(2) * .. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Executable Statements .. * IF( JUMPM.GT.0 ) THEN DO 10 I = 1, 2 IAM(I) = MULT(I) ICM(I) = IADD(I) 10 CONTINUE DO 20 I = 1, JUMPM-1 CALL LMUL( IAM, MULT, J ) IAM(1) = J(1) IAM(2) = J(2) CALL LMUL( ICM, MULT, J ) CALL LADD( IADD, J, ICM ) 20 CONTINUE CALL LMUL( IRANN, IAM, J ) CALL LADD( J, ICM, IRANM ) ELSE IRANM(1) = IRANN(1) IRANM(2) = IRANN(2) END IF * RETURN * * End of XJUMPM * END * * ===================================================================== * SUBROUTINE SETRAN * ===================================================================== * SUBROUTINE SETRAN( IRAN, IA, IC ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER IA(2), IC(2), IRAN(2) * .. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER IAS(2), ICS(2), IRAND(2) * .. * .. Common Blocks .. COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * DO 10 I = 1, 2 IRAND(I) = IRAN(I) IAS(I) = IA(I) ICS(I) = IC(I) 10 CONTINUE * RETURN * * End of SETRAN * END * * ===================================================================== * SUBROUTINE JUMPIT * ===================================================================== * SUBROUTINE JUMPIT( MULT, IADD, IRANN, IRANM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. INTEGER IADD(2), IRANM(2), IRANN(2), MULT(2) * .. * * ===================================================================== * * .. Local Arrays .. INTEGER IAS(2), ICS(2), IRAND(2), J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Common Blocks .. COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * CALL LMUL( IRANN, MULT, J ) CALL LADD( J, IADD, IRANM ) * IRAND(1) = IRANM(1) IRAND(2) = IRANM(2) * RETURN * * End of JUMPIT * END * * ===================================================================== * REAL FUNCTION PSRAND * ===================================================================== * REAL FUNCTION PSRAND( IDUMM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC=2.147483648E+9, POW16=6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IAS(2), ICS(2), IRAND(2) COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * PSRAND = ( REAL(IRAND(1)) + POW16 * REAL(IRAND(2)) ) / DIVFAC * CALL LMUL( IRAND, IAS, J ) CALL LADD( J, ICS, IRAND ) * RETURN * * End of PSRAND * END * * ===================================================================== * DOUBLE PRECISION FUNCTION PDRAND * ===================================================================== * DOUBLE PRECISION FUNCTION PDRAND( IDUMM ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC=2.147483648D+9, POW16=6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J(2) * .. * .. External Subroutines .. EXTERNAL LADD, LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IAS(2), ICS(2), IRAND(2) COMMON /RANCOM/ IRAND, IAS, ICS SAVE /RANCOM/ * .. * .. Executable Statements .. * PDRAND = ( DBLE(IRAND(1)) + POW16 * DBLE(IRAND(2)) ) / DIVFAC * CALL LMUL( IRAND, IAS, J ) CALL LADD( J, ICS, IRAND ) * RETURN * * End of PDRAND * END scalapack-1.8.0/TESTING/EIG/pzsdpsubtst.f0000640000175000017500000004031310363532303017612 0ustar muammarmuammar SUBROUTINE PZSDPSUBTST( WKNOWN, UPLO, N, THRESH, ABSTOL, A, COPYA, $ Z, IA, JA, DESCA, WIN, WNEW, IPREPAD, $ IPOSTPAD, WORK, LWORK, RWORK, LRWORK, $ LWORK1, IWORK, LIWORK, RESULT, TSTNRM, $ QTQNRM, NOUT ) * * -- ScaLAPACK testing routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 28, 2000 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER UPLO INTEGER IA, IPOSTPAD, IPREPAD, JA, LIWORK, LRWORK, $ LWORK, LWORK1, N, NOUT, RESULT DOUBLE PRECISION ABSTOL, QTQNRM, THRESH, TSTNRM * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) DOUBLE PRECISION RWORK( * ), WIN( * ), WNEW( * ) COMPLEX*16 A( * ), COPYA( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZSDPSUBTST calls PZHEEVD and then tests the output of * PZHEEVD * If JOBZ = 'V' then the following two tests are performed: * |AQ -QL| / (abstol + eps * norm(A) ) < N*THRESH * |QT * Q - I| / eps * norm(A) < N*THRESH * If WKNOWN then * we check to make sure that the eigenvalues match expectations * i.e. |WIN - WNEW(1+IPREPAD)| / (eps * |WIN|) < THRESH * where WIN is the array of eigenvalues as computed by * PZHEEVD when eigenvectors are requested * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PZSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * A (local workspace) COMPLEX*16 array * global dimension (N, N), local dimension (DESCA(DLEN_), NQ) * A is distributed in a block cyclic manner over both rows * and columns. * See PZHEEVD for a description of block cyclic layout. * The test matrix, which is then modified by PZHEEVD * A has already been padded front and back, use A(1+IPREPAD) * * COPYA (local input) COMPLEX*16 array, dimension(N*N) * COPYA holds a copy of the original matrix A * identical in both form and content to A * * Z (local workspace) COMPLEX*16 array, dim (N*N) * Z is distributed in the same manner as A * Z contains the eigenvector matrix * Z is used as workspace by the test routines * PZSEPCHK and PZSEPQTQ. * Z has already been padded front and back, use Z(1+IPREPAD) * * IA (global input) INTEGER * On entry, IA specifies the global row index of the submatrix * of the global matrix A, COPYA and Z to operate on. * * JA (global input) INTEGER * On entry, IA specifies the global column index of the submat * of the global matrix A, COPYA and Z to operate on. * * DESCA (global/local input) INTEGER array of dimension 8 * The array descriptor for the matrix A, COPYA and Z. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If .not. WKNOWN, WIN is ignored on input * Otherwise, WIN() is taken as the standard by which the * eigenvalues are to be compared against. * * WNEW (global workspace) DOUBLE PRECISION array, dimension (N) * The eigenvalues as copmuted by this call to PZHEEVD * If JOBZ <> 'V' or RANGE <> 'A' these eigenvalues are * compared against those in WIN(). * WNEW has already been padded front and back, * use WNEW(1+IPREPAD) * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * WORK has already been padded front and back, * use WORK(1+IPREPAD) * * LWORK (local input) INTEGER * The actual length of the array WORK after padding. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * RWORK has already been padded front and back, * use RWORK(1+IPREPAD) * * LRWORK (local input) INTEGER * The actual length of the array RWORK after padding. * * LWORK1 (local input) INTEGER * The amount of DOUBLE PRECISION workspace to pass to PZHEEVD * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * IWORK has already been padded front and back, * use IWORK(1+IPREPAD) * * LIWORK (local input) INTEGER * The length of the array IWORK after padding. * * RESULT (global output) INTEGER * The result of this call to PZHEEVD * RESULT = -3 => This process did not participate * RESULT = 0 => All tests passed * RESULT = 1 => ONe or more tests failed * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / (ABSTOL+EPS*|A|)*N * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / N*EPS * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION PADVAL, FIVE, NEGONE PARAMETER ( PADVAL = 13.5285D+0, FIVE = 5.0D+0, $ NEGONE = -1.0D+0 ) COMPLEX*16 CPADVAL PARAMETER ( CPADVAL = ( 13.989D+0, 1.93D+0 ) ) INTEGER IPADVAL PARAMETER ( IPADVAL = 927 ) COMPLEX*16 CZERO, CONE, CNEGONE PARAMETER ( CZERO = 0.0D+0, CONE = 1.0D+0, $ CNEGONE = -1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAM, INFO, ISIZEHEEVD, ISIZEHEEVX, $ ISIZESUBTST, ISIZETST, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ, NZ, RES, RSIZECHK, RSIZEHEEVD, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVD, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST DOUBLE PRECISION EPS, EPSNORMA, ERROR, MAXERROR, MINERROR, NORM, $ NORMWIN, SAFMIN, ULP * .. * .. Local Arrays .. INTEGER ITMP( 2 ) * .. * .. External Functions .. * INTEGER NUMROC DOUBLE PRECISION PZLANGE, PZLANHE, PDLAMCH EXTERNAL NUMROC, PZLANGE, PZLANHE, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZLACPY, IGAMN2D, IGAMX2D, $ PZCHEKPAD, PZFILLPAD, PZGEMM, PZHEEVD, PZLASET, $ PZLASIZESEP, PZSEPCHK, PICHEKPAD, PIFILLPAD, $ PDCHEKPAD, PDFILLPAD, SLBOOT, SLTIMER * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, DBLE * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL PZLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZEHEEVD, RSIZEHEEVD, ISIZEHEEVD, SIZESUBTST, $ RSIZESUBTST, ISIZESUBTST, SIZETST, RSIZETST, $ ISIZETST ) * TSTNRM = NEGONE QTQNRM = NEGONE EPS = PDLAMCH( DESCA( CTXT_ ), 'Eps' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe min' ) * NORMWIN = SAFMIN / EPS IF( N.GE.1 ) $ NORMWIN = MAX( ABS( WIN( 1+IPREPAD ) ), $ ABS( WIN( N+IPREPAD ) ), NORMWIN ) * DO 10 I = 1, LWORK1, 1 RWORK( I+IPREPAD ) = 14.3D+0 10 CONTINUE DO 20 I = 1, LIWORK, 1 IWORK( I ) = 14 20 CONTINUE DO 30 I = 1, LWORK, 1 WORK( I+IPREPAD ) = ( 15.63D+0, 1.1D+0 ) 30 CONTINUE * DO 40 I = 1, N WNEW( I+IPREPAD ) = 3.14159D+0 40 CONTINUE * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IAM = 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ IAM = 0 * * If this process is not involved in this test, bail out now * RESULT = -3 IF( MYROW.GE.NPROW .OR. MYROW.LT.0 ) $ GO TO 60 RESULT = 0 * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * CALL ZLACPY( 'A', NP, NQ, COPYA, DESCA( LLD_ ), A( 1+IPREPAD ), $ DESCA( LLD_ ) ) * CALL PZFILLPAD( DESCA( CTXT_ ), NP, NQ, A, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL ) * CALL PZFILLPAD( DESCA( CTXT_ ), NP, NQ, Z, DESCA( LLD_ ), IPREPAD, $ IPOSTPAD, CPADVAL+1.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), N, 1, WNEW, N, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * CALL PDFILLPAD( DESCA( CTXT_ ), LWORK1, 1, RWORK, LWORK1, IPREPAD, $ IPOSTPAD, PADVAL+4.0D+0 ) * CALL PIFILLPAD( DESCA( CTXT_ ), LIWORK, 1, IWORK, LIWORK, IPREPAD, $ IPOSTPAD, IPADVAL ) * CALL PZFILLPAD( DESCA( CTXT_ ), LWORK, 1, WORK, LWORK, IPREPAD, $ IPOSTPAD, CPADVAL+4.1D+0 ) * CALL SLBOOT CALL SLTIMER( 1 ) CALL SLTIMER( 6 ) * CALL PZHEEVD( 'V', UPLO, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WNEW( 1+IPREPAD ), Z( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ), SIZEHEEVD, RWORK( 1+IPREPAD ), $ LWORK1, IWORK( 1+IPREPAD ), LIWORK, INFO ) CALL SLTIMER( 6 ) CALL SLTIMER( 1 ) * IF( THRESH.LE.0 ) THEN RESULT = 0 ELSE CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-A', NP, NQ, A, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, CPADVAL ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-Z', NP, NQ, Z, $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ CPADVAL+1.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-WNEW', N, 1, WNEW, N, $ IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-rWORK', LWORK1, 1, $ RWORK, LWORK1, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * CALL PZCHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-WORK', LWORK, 1, WORK, $ LWORK, IPREPAD, IPOSTPAD, CPADVAL+4.1D+0 ) * CALL PICHEKPAD( DESCA( CTXT_ ), 'PZHEEVD-IWORK', LIWORK, 1, $ IWORK, LIWORK, IPREPAD, IPOSTPAD, IPADVAL ) * * Check INFO * * Make sure that all processes return the same value of INFO * ITMP( 1 ) = INFO ITMP( 2 ) = INFO * CALL IGAMN2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP, 1, 1, 1, $ -1, -1, 0 ) CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, ITMP( 2 ), 1, 1, $ 1, -1, -1, 0 ) * * IF( ITMP( 1 ).NE.ITMP( 2 ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * ) $ 'Different processes return different INFO' RESULT = 1 ELSE IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )INFO RESULT = 1 END IF * * Compute eps * norm(A) * IF( N.EQ.0 ) THEN EPSNORMA = EPS ELSE EPSNORMA = PZLANHE( 'I', UPLO, N, COPYA, IA, JA, DESCA, $ RWORK )*EPS END IF * * Note that a couple key variables get redefined in PZSEPCHK * as described by this table: * * PZSEPTST name PZSEPCHK name * ------------- ------------- * COPYA A * Z Q * A C * * Perform the |AQ - QE| test * CALL PDFILLPAD( DESCA( CTXT_ ), RSIZECHK, 1, RWORK, RSIZECHK, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * CALL PZSEPCHK( N, NZ, COPYA, IA, JA, DESCA, $ MAX( ABSTOL+EPSNORMA, SAFMIN ), THRESH, $ Z( 1+IPREPAD ), IA, JA, DESCA, A( 1+IPREPAD ), $ IA, JA, DESCA, WNEW( 1+IPREPAD ), $ RWORK( 1+IPREPAD ), RSIZECHK, TSTNRM, RES ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZSDPCHK-rWORK', RSIZECHK, 1, $ RWORK, RSIZECHK, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RES.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9995 ) END IF * * Perform the |QTQ - I| test * CALL PDFILLPAD( DESCA( CTXT_ ), RSIZEQTQ, 1, RWORK, RSIZEQTQ, $ IPREPAD, IPOSTPAD, 4.3D+0 ) * * RES = 0 ULP = PDLAMCH( DESCA( CTXT_ ), 'P' ) CALL PZLASET( 'A', N, N, CZERO, CONE, A( 1+IPREPAD ), IA, JA, $ DESCA ) CALL PZGEMM( 'Conjugate transpose', 'N', N, N, N, CNEGONE, $ Z( 1+IPREPAD ), IA, JA, DESCA, Z( 1+IPREPAD ), IA, $ JA, DESCA, CONE, A( 1+IPREPAD ), IA, JA, DESCA ) NORM = PZLANGE( '1', N, N, A( 1+IPREPAD ), IA, JA, DESCA, $ WORK( 1+IPREPAD ) ) QTQNRM = NORM / ( DBLE( MAX( N, 1 ) )*ULP ) IF( QTQNRM.GT.THRESH ) THEN RES = 1 END IF CALL PDCHEKPAD( DESCA( CTXT_ ), 'PZSEPQTQ-rWORK', RSIZEQTQ, 1, $ RWORK, RSIZEQTQ, IPREPAD, IPOSTPAD, 4.3D+0 ) * IF( RES.NE.0 ) THEN RESULT = 1 WRITE( NOUT, FMT = 9994 ) END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )INFO RESULT = 1 END IF END IF * * Check to make sure that we have the right eigenvalues * IF( WKNOWN .AND. N.GT.0 ) THEN * * Find the largest difference between the computed * and expected eigenvalues * MINERROR = NORMWIN MAXERROR = 0.0D+00 * DO 50 I = 1, N ERROR = ABS( WIN( I+IPREPAD )-WNEW( I+IPREPAD ) ) MAXERROR = MAX( MAXERROR, ERROR ) 50 CONTINUE MINERROR = MIN( MAXERROR, MINERROR ) * IF( MINERROR.GT.NORMWIN*FIVE*THRESH*EPS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )MINERROR, NORMWIN RESULT = 1 END IF END IF * * * All processes should report the same result * CALL IGAMX2D( DESCA( CTXT_ ), 'a', ' ', 1, 1, RESULT, 1, 1, 1, -1, $ -1, 0 ) * 60 CONTINUE * RETURN * 9999 FORMAT( 'PZHEEVD returned INFO=', I7 ) 9998 FORMAT( 'PZSEPQTQ returned INFO=', I7 ) 9997 FORMAT( 'PZSDPSUBTST minerror =', D11.2, ' normwin=', D11.2 ) 9996 FORMAT( 'PZHEEVD returned INFO=', I7, $ ' despite adequate workspace' ) 9995 FORMAT( 'PZHEEVD failed the |AQ -QE| test' ) 9994 FORMAT( 'PZHEEVD failed the |QTQ -I| test' ) * * End of PZSDPSUBTST * END scalapack-1.8.0/TESTING/EIG/pclatran.f0000640000175000017500000001567510363532303017026 0ustar muammarmuammar SUBROUTINE PCLATRAN( N, NB, A, IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, JA, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * * ======= * * PCLATRAN transpose a lower triangular matrix on to the upper * triangular portion of the same matrix. * * This is an auxiliary routine called by PCHETRD. * * Notes * ===== * * IA must equal 1 * JA must equal 1 * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( RSRC_ ) must equal 1 * DESCA( CSRC_ ) must equal 1 * * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * NB (global input) INTEGER * The number of rows and columns to be transposed with each * message sent. NB has no impact on the result, it is striclty * a performance tuning parameter. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). On entry, the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. On exit, the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is undefined (and may have been modified). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * Must be equal to 1. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * Must be equal to 1. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * DESCA( MB_ ) must equal 1 * DESCA( NB_ ) must equal 1 * DESCA( ICTXT_ ) must point to a square process grid * i.e. one where NPROW is equal to NPCOL * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * Where: * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IRECV, ISEND, J, JJ, JRECV, JSEND, $ LDA, MAXIRECV, MAXISEND, MAXJRECV, MAXJSEND, $ MINIRECV, MINISEND, MINJRECV, MINJSEND, MYCOL, $ MYROW, NP, NPCOL, NPROW, NQ, RECVNB, SENDNB, $ STARTCOL, STARTROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CTRRV2D, CTRSD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Further details * * Because the processor grid is square each process needs only send * data to its transpose process. (Likewsie it need only receive * data from its transpose process.) Because the data decomposition * is cyclic, the local portion of the array is triangular. * * This routine requires that the data be buffered (i.e. copied) * on the sending process (because of the triangular shape) and * unbuffered on the receiving process. Hence, two local memory to * memory copies are performed within the communications routines * followed by a memory to memory copy outside of the communications * routines. It would be nice to avoid having back to back memory * to memory copies (as we do presently on the receiving processor). * This could be done by packaging the data ourselves in the sender * and then unpacking it directly into the matrix. However, this * code seems cleaner and so since this routine is not a significant * performance bottleneck we have left it this way. * * * * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * * IF( MYROW.EQ.MYCOL ) THEN * DO 20 J = 1, NP DO 10 I = J + 1, NQ A( J+( I-1 )*LDA ) = CONJG( A( I+( J-1 )*LDA ) ) 10 CONTINUE 20 CONTINUE * ELSE IF( MYROW.GT.MYCOL ) THEN STARTROW = 1 STARTCOL = 2 ELSE IF( MYROW.EQ.MYCOL ) THEN STARTROW = 2 STARTCOL = 2 ELSE STARTROW = 2 STARTCOL = 1 END IF END IF * DO 50 JJ = 1, MAX( NP, NQ ), NB MINJSEND = STARTCOL + JJ - 1 MINJRECV = STARTROW + JJ - 1 MAXJSEND = MIN( MINJSEND+NB-1, NQ ) MAXJRECV = MIN( MINJRECV+NB-1, NP ) * SENDNB = MAXJSEND - MINJSEND + 1 RECVNB = MAXJRECV - MINJRECV + 1 * MINISEND = 1 MINIRECV = 1 MAXISEND = MIN( NP, JJ+SENDNB-1 ) MAXIRECV = MIN( NQ, JJ+RECVNB-1 ) * ISEND = MAXISEND - MINISEND + 1 IRECV = MAXIRECV - MINIRECV + 1 JSEND = MAXJSEND - MINJSEND + 1 JRECV = MAXJRECV - MINJRECV + 1 * * * DO 40 J = MINJRECV, MAXJRECV DO 30 I = MINIRECV, MAXIRECV + J - MAXJRECV WORK( I+( J-MINJRECV )*IRECV ) $ = CONJG( A( J+( I-1 )*LDA ) ) 30 CONTINUE 40 CONTINUE * IF( IRECV.GT.0 .AND. JRECV.GT.0 ) $ CALL CTRSD2D( ICTXT, 'U', 'N', IRECV, JRECV, WORK, IRECV, $ MYCOL, MYROW ) * IF( ISEND.GT.0 .AND. JSEND.GT.0 ) $ CALL CTRRV2D( ICTXT, 'U', 'N', ISEND, JSEND, $ A( MINISEND+( MINJSEND-1 )*LDA ), LDA, $ MYCOL, MYROW ) * * 50 CONTINUE * END IF * RETURN * * End of PCLATRD * END scalapack-1.8.0/TESTING/EIG/pcsepchk.f0000640000175000017500000002416710363532303017016 0ustar muammarmuammar* * SUBROUTINE PCSEPCHK( MS, NV, A, IA, JA, DESCA, EPSNORMA, THRESH, $ Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, W, WORK, $ LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, IQ, JA, JC, JQ, LWORK, MS, NV, RESULT REAL EPSNORMA, THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCC( * ), DESCQ( * ) REAL W( * ), WORK( * ) COMPLEX A( * ), C( * ), Q( * ) * .. * * Purpose * ======= * * Compute |AQ- QL| / (EPSNORMA * N) * where EPSNORMA = (abstol + eps)*norm(A) when called by pdsqpsubtst. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, C and Q * MQ = number of local columns in A * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, C and Q * Also, the number of global columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) COMPLEX pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the MS-by-MS * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * EPSNORMA (input) REAL * abstol + eps * inf.norm(A) * Abstol is absolute tolerence for the eigenvalues and is set * in the calling routines, pdsepsubtst and pdsqpsubtst. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) COMPLEX array * global dimension (MS, NV), local dimension (DESCA(DLEN_), NQ) * * Contains the eigenvectors as computed by PCHEEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) COMPLEX array, * global dimension (NV, NV), local dimension (DESCA(DLEN_), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * |AQ- QL| / ( EPSNROMA * MS ) * * RESULT (global output) INTEGER * 0 if the test passes i.e. * |AQ -QL| / (abstol + eps * norm(A) ) <= n* THRESH * 1 if the test fails i.e. * |AQ -QL| / (abstol + eps * norm(A) ) > n * THRESH * * .. Local Scalars .. * INTEGER INFO, J, LOCALCOL, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ, PCOL REAL NORM * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, NEGONE PARAMETER ( ONE = 1.0E+0, NEGONE = -1.0E+0 ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC REAL PCLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PCLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, CLACPY, CSSCAL, $ PCGEMM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 12, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IC, JC, DESCC, 16, INFO ) * IF( INFO.EQ.0 ) THEN * MP = NUMROC( MS, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -10 ELSE IF( JQ.NE.1 ) THEN INFO = -11 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( IC.NE.1 ) THEN INFO = -14 ELSE IF( JC.NE.1 ) THEN INFO = -15 ELSE IF( LWORK.LT.NQ ) THEN INFO = -19 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCSEPCHK', -INFO ) RETURN END IF * * C = Q * W * CALL CLACPY( 'A', MP, NQ, Q, DESCQ( LLD_ ), C, DESCC( LLD_ ) ) * * DO 10 J = 1, NV PCOL = INDXG2P( J, DESCC( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( J, DESCC( NB_ ), 0, 0, NPCOL ) * IF( MYCOL.EQ.PCOL ) THEN CALL CSSCAL( MP, W( J ), C( ( LOCALCOL-1 )*DESCC( LLD_ )+ $ 1 ), 1 ) END IF 10 CONTINUE * * * C = C - A * Q * CALL PCGEMM( 'N', 'N', MS, NV, MS, NEGONE, A, 1, 1, DESCA, Q, 1, $ 1, DESCQ, ONE, C, 1, 1, DESCC ) * * Compute the norm of C * * NORM = PCLANGE( 'M', MS, NV, C, 1, 1, DESCC, WORK ) * * TSTNRM = NORM / EPSNORMA / MAX( MS, 1 ) * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0E0 ) ) THEN RESULT = 1 END IF * * RETURN * * End of PCSEPCHK * END scalapack-1.8.0/TESTING/EIG/psgebdrv.f0000640000175000017500000004652010363532303017027 0ustar muammarmuammar SUBROUTINE PSGEBDRV( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PSGEBDRV computes sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from sub( A ), * Q, P returned by PSGEBRD: * * sub( A ) := Q * B * P'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of sub( A ) * as returned by PSGEBRD. On exit, the original distribu- * ted matrix sub( A ) is restored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local input) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local input) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local input) REAL array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local input) REAL array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= 2*NB*( MP + NQ + NB ) * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MP = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * On exit, if INFO <> 0, a discrepancy has been found between * the diagonal and off-diagonal elements of A and the copies * contained in the arrays D and E. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL EIGHT, ONE, ZERO PARAMETER ( EIGHT = 8.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IIA, IL, IPTP, IPTQ, $ IPV, IPW, IPWK, IOFF, IV, J, JB, JJA, JL, JV, $ K, MN, MP, MYCOL, MYROW, NB, NPCOL, NPROW, NQ REAL ADDBND, D1, D2, E1, E2 * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ), $ DESCW( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, IGSUM2D, INFOG2L, $ PSLACPY, PSLARFB, PSLARFT, PSLASET, $ PSELGET * .. * .. External Functions .. INTEGER INDXG2P, NUMROC REAL PSLAMCH EXTERNAL INDXG2P, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 NB = DESCA( MB_ ) IOFF = MOD( IA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) IPV = 1 IPW = IPV + MP*NB IPTP = IPW + NQ*NB IPTQ = IPTP + NB*NB IPWK = IPTQ + NB*NB * IV = 1 JV = 1 MN = MIN( M, N ) IL = MAX( ( (IA+MN-2) / NB )*NB + 1, IA ) JL = MAX( ( (JA+MN-2) / NB )*NB + 1, JA ) IAROW = INDXG2P( IL, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCV, IA+M-IL, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCW, NB, JA+N-JL, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * ADDBND = EIGHT * PSLAMCH( ICTXT, 'eps' ) * * When A is an upper bidiagonal form * IF( M.GE.N ) THEN * CALL DESCSET( DESCD, 1, JA+MN-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * DO 10 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD ) CALL PSELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, IA+J, 1, DESCE ) CALL PSELGET( 'Rowwise', ' ', E1, A, IA+J, JA+J+1, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 10 CONTINUE * DO 20 J = JL, JA+NB-IOFF, -NB JB = MIN( JA+N-J, NB ) I = IA + J - JA K = I - IA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PSLARFT( 'Forward', 'Columnwise', M-K+1, JB, A, I, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Lower', M-K+1, JB, A, I, J, DESCA, $ WORK( IPV ), IV, JV, DESCV ) CALL PSLASET( 'Upper', M-K+1, JB, ZERO, ONE, WORK( IPV ), $ IV, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', M-K, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PSLARFT( 'Forward', 'Rowwise', N-K, JB, A, I, J+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Upper', JB, N-K, A, I, J+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PSLASET( 'Lower', JB, N-K, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PSLASET( 'Upper', JB, N-K-1, ZERO, ZERO, A, I, J+2, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PSLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K+1, N-K+1, JB, WORK( IPV ), $ IV, JV, DESCV, WORK( IPTQ ), A, I, J, DESCA, $ WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-K+1, N-K, JB, WORK( IPW ), IV, JV+1, DESCW, $ WORK( IPTP ), A, I, J+1, DESCA, WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 20 CONTINUE * * Handle first block separately * JB = MIN( N, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PSLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Lower', M, JB, A, IA, JA, DESCA, WORK( IPV ), $ IV, JV, DESCV ) CALL PSLASET( 'Upper', M, JB, ZERO, ONE, WORK( IPV ), IV, JV, $ DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', M-1, JB, ZERO, ZERO, A, IA+1, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PSLARFT( 'Forward', 'Rowwise', N-1, JB, A, IA, JA+1, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Upper', JB, N-1, A, IA, JA+1, DESCA, $ WORK( IPW ), IV, JV+1, DESCW ) CALL PSLASET( 'Lower', JB, N-1, ZERO, ONE, WORK( IPW ), IV, $ JV+1, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PSLASET( 'Upper', JB, N-2, ZERO, ZERO, A, IA, JA+2, $ DESCA ) * * Apply block Householder transformation from left. * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N, JB, WORK( IPV ), IV, JV, DESCV, $ WORK( IPTQ ), A, IA, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right. * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, $ N-1, JB, WORK( IPW ), IV, JV+1, DESCW, $ WORK( IPTP ), A, IA, JA+1, DESCA, WORK( IPWK ) ) * ELSE * CALL DESCSET( DESCD, IA+MN-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MN-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * DO 30 J = 0, MN-1 D1 = ZERO E1 = ZERO D2 = ZERO E2 = ZERO CALL PSELGET( ' ', ' ', D2, D, IA+J, 1, DESCD ) CALL PSELGET( 'Rowwise', ' ', D1, A, IA+J, JA+J, DESCA ) IF( J.LT.(MN-1) ) THEN CALL PSELGET( ' ', ' ', E2, E, 1, JA+J, DESCE ) CALL PSELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J, $ DESCA ) END IF * IF( ( ABS( D1 - D2 ).GT.( ABS( D2 ) * ADDBND ) ) .OR. $ ( ABS( E1 - E2 ).GT.( ABS( E2 ) * ADDBND ) ) ) $ INFO = INFO + 1 30 CONTINUE * DO 40 I = IL, IA+NB-IOFF, -NB JB = MIN( IA+M-I, NB ) J = JA + I - IA K = J - JA + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PSLARFT( 'Forward', 'Columnwise', M-K, JB, A, I+1, J, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Lower', M-K, JB, A, I+1, J, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PSLASET( 'Upper', M-K, JB, ZERO, ONE, WORK( IPV ), $ IV+1, JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', M-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PSLARFT( 'Forward', 'Rowwise', N-K+1, JB, A, I, J, $ DESCA, TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Upper', JB, N-K+1, A, I, J, DESCA, $ WORK( IPW ), IV, JV, DESCW ) CALL PSLASET( 'Lower', JB, N-K+1, ZERO, ONE, WORK( IPW ), $ IV, JV, DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PSLASET( 'Upper', JB, N-K, ZERO, ZERO, A, I, J+1, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PSLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-K, N-K+1, JB, WORK( IPV ), $ IV+1, JV, DESCV, WORK( IPTQ ), A, I+1, J, $ DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from Right. * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-K+1, N-K+1, JB, WORK( IPW ), IV, JV, DESCW, $ WORK( IPTP ), A, I, J, DESCA, WORK( IPWK ) ) * DESCV( M_ ) = DESCV( M_ ) + NB DESCV( RSRC_ ) = MOD( DESCV( RSRC_ ) + NPROW - 1, NPROW ) DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) DESCW( N_ ) = DESCW( N_ ) + NB DESCW( RSRC_ ) = DESCV( RSRC_ ) DESCW( CSRC_ ) = DESCV( CSRC_ ) * 40 CONTINUE * * Handle first block separately * JB = MIN( M, NB - IOFF ) IV = IOFF + 1 JV = IOFF + 1 * * Compute upper triangular matrix TQ from TAUQ. * CALL PSLARFT( 'Forward', 'Columnwise', M-1, JB, A, IA+1, JA, $ DESCA, TAUQ, WORK( IPTQ ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Lower', M-1, JB, A, IA+1, JA, DESCA, $ WORK( IPV ), IV+1, JV, DESCV ) CALL PSLASET( 'Upper', M-1, JB, ZERO, ONE, WORK( IPV ), IV+1, $ JV, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PSLASET( 'Lower', M-2, JB, ZERO, ZERO, A, IA+2, JA, $ DESCA ) * * Compute upper triangular matrix TP from TAUP. * CALL PSLARFT( 'Forward', 'Rowwise', N, JB, A, IA, JA, DESCA, $ TAUP, WORK( IPTP ), WORK( IPWK ) ) * * Copy Householder vectors into workspace. * CALL PSLACPY( 'Upper', JB, N, A, IA, JA, DESCA, WORK( IPW ), $ IV, JV, DESCW ) CALL PSLASET( 'Lower', JB, N, ZERO, ONE, WORK( IPW ), IV, JV, $ DESCW ) * * Zero out the strict+1 upper triangular part of A. * CALL PSLASET( 'Upper', JB, N-1, ZERO, ZERO, A, IA, JA+1, $ DESCA ) * * Apply block Householder transformation from left * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M-1, N, JB, WORK( IPV ), IV+1, JV, DESCV, $ WORK( IPTQ ), A, IA+1, JA, DESCA, WORK( IPWK ) ) * * Apply block Householder transformation from right * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M, N, $ JB, WORK( IPW ), IV, JV, DESCW, WORK( IPTP ), $ A, IA, JA, DESCA, WORK( IPWK ) ) END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * RETURN * * End of PSGEBDRV * END scalapack-1.8.0/TESTING/EIG/pdttrdtester.f0000640000175000017500000006162210363532303017743 0ustar muammarmuammar SUBROUTINE PDTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 24, 2000 * * .. Scalar Arguments .. LOGICAL CHECK INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS, $ TOTMEM REAL THRESH * .. * .. Array Arguments .. INTEGER NVAL( * ) DOUBLE PRECISION MEM( * ) * .. * * Purpose * ======= * * PDTTRDTESTER tests PDSYTTRD * * Arguments * ========= * * IAM (local input) INTEGER * The local process number * * NPROCS (global input) INTEGER * The number of processors * * CHECK (global input) LOGICAL * Specifies whether the user wants to check the answer * * NOUT (local input) INTEGER * File descriptor * * THRESH (global input) DOUBLE PRECISION * Acceptable error threshold * * NVAL (global input) INTEGER array dimension NMAT * The matrix sizes to test * * NMAT (global input) INTEGER * The number of matrix sizes to test * * MEM (local input) DOUBLE PRECISION array dimension MEMSIZ * Where: * MEMSIZ = TOTMEM / DBLESZ * * TOTMEM (global input) INTEGER * Number of bytes in MEM * * KPASS (local input/output) INTEGER * The number of tests which passed. Only relevant on * processor 0. * * KFAIL (local input/output) INTEGER * The number of tests which failed. Only relevant on * processor 0. * * KSKIP (local input/output) INTEGER * The number of tests which were skipped. Only relevant on * processor 0. * * ================================================================ * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DBLESZ DOUBLE PRECISION PADVAL PARAMETER ( DBLESZ = 8, PADVAL = -9923.0D+0 ) INTEGER TIMETESTS PARAMETER ( TIMETESTS = 11 ) INTEGER TESTS PARAMETER ( TESTS = 8 ) INTEGER MINTIMEN PARAMETER ( MINTIMEN = 8 ) * .. * .. Local Scalars .. LOGICAL TIME CHARACTER UPLO CHARACTER*6 PASSED INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ LCM, LWMIN, MAXTESTS, MEMSIZ, MYCOL, MYROW, N, $ NB, NDIAG, NGRIDS, NN, NOFFD, NP, NPCOL, NPROW, $ NPS, NQ, SPLITSTIMED, WORKSIZ, WORKTRD DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ), $ BALTEST( TESTS ), BALTIME( TIMETESTS ), $ DESCA( DLEN_ ), DESCD( DLEN_ ), IERR( 1 ), $ INTERTEST( TESTS ), INTERTIME( TIMETESTS ), $ PNBTEST( TESTS ), PNBTIME( TIMETESTS ), $ TWOGEMMTEST( TESTS ), TWOGEMMTIME( TIMETESTS ) DOUBLE PRECISION CTIME( 100 ), WTIME( 100 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGEBR2D, IGEBS2D, IGSUM2D, PDCHEKPAD, $ PDFILLPAD, PDLAFCHK, PDLATRAN, PDMATGEN, $ PDSYTDRV, PDSYTTRD, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC, PJLAENV DOUBLE PRECISION PDLANSY EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PJLAENV, PDLANSY * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, SQRT * .. * * .. Scalars in Common .. INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE, $ LLTBLOCK, MINSZ, PNB, TIMEINTERNALS, TIMING, $ TRSBLOCK, TWOGEMMS * .. * .. Common blocks .. COMMON / BLOCKSIZES / GSTBLOCK, LLTBLOCK, BCKBLOCK, $ TRSBLOCK COMMON / MINSIZE / MINSZ COMMON / PJLAENVTIMING / TIMING COMMON / TAILOREDOPTS / PNB, ANB, INTERLEAVE, $ BALANCED, TWOGEMMS COMMON / TIMECONTROL / TIMEINTERNALS * .. * .. Data statements .. DATA BALTIME / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 / DATA INTERTIME / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 / DATA TWOGEMMTIME / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 / DATA ANBTIME / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16, $ 16 / DATA PNBTIME / 32, 32, 32, 32, 32, 32, 32, 32, 32, $ 16, 64 / DATA BALTEST / 0, 0, 0, 0, 1, 1, 1, 1 / DATA INTERTEST / 0, 0, 1, 1, 0, 0, 1, 1 / DATA TWOGEMMTEST / 0, 1, 0, 1, 0, 1, 0, 1 / DATA ANBTEST / 1, 2, 3, 16, 1, 2, 3, 16 / DATA PNBTEST / 1, 16, 8, 1, 16, 8, 1, 16 / * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * IASEED = 0 SPLITSTIMED = 0 NB = 1 UPLO = 'L' MEMSIZ = TOTMEM / DBLESZ * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * NGRIDS = INT( SQRT( DBLE( NPROCS ) ) ) * DO 30 NN = 1, NGRIDS * NPROW = NN NPCOL = NN IERR( 1 ) = 0 * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * IF( N.GT.MINTIMEN ) THEN * * For timing tests, we perform one or two extra tests. * Both of these extra tests are performed with the * default values for the performance tuning parameters. * The second extra test (which is only performed if * split times are non-zero) is performed with timeinternals * set to 1 (which forces barrier syncs between many * phases of the computation). * TIME = .TRUE. MAXTESTS = TIMETESTS + 2 ELSE TIME = .FALSE. MAXTESTS = TESTS END IF * * DO 10 K = 1, MAXTESTS TIMEINTERNALS = 0 IF( TIME ) THEN IF( K.GE.MAXTESTS-1 ) THEN * * For the last two timings, we let pjlaenv set * the execution path values. These dummy * initializations aren't really necessary, * but they illustrate the fact that these values are * set in xpjlaenv. The dummy call to pjlaenv * has the side effect of setting ANB. * MINSZ = -13 BALANCED = -13 INTERLEAVE = -13 TWOGEMMS = -13 ANB = -13 PNB = -13 TIMING = 1 DUMMY = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, $ 0, 0 ) IF( K.EQ.MAXTESTS ) $ TIMEINTERNALS = 1 ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTIME( K ) INTERLEAVE = INTERTIME( K ) TWOGEMMS = TWOGEMMTIME( K ) ANB = ANBTIME( K ) PNB = PNBTIME( K ) END IF ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTEST( K ) INTERLEAVE = INTERTEST( K ) TWOGEMMS = TWOGEMMTEST( K ) ANB = ANBTEST( K ) PNB = PNBTEST( K ) END IF * * Skip the last test (with timeinternals = 1) if * PDSYTTRD is not collecting the split times. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, 1, $ 0, 0 ) END IF * * IF( SPLITSTIMED.EQ.0 .AND. K.EQ.MAXTESTS ) $ GO TO 10 * * The following hack tests to make sure that PNB need not * be the same on all processes. (Provided that PNB is set * to 1 in the TRD.dat file.) * IF( PNB.EQ.1 ) $ PNB = 1 + IAM * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1, $ INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORKTRD = LWMIN + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ITEMP, 2*( NB+NP )*NB ) WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * * * Generate the matrix A * CALL PDMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PDLANSY( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDLANSY', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PDSYTTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWMIN, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDSYTTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PDCHEKPAD( ICTXT, 'PDSYTTRDc', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTTRDd', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTTRDe', WORKTRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PDSYTDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) * * TTRD does not preserve the upper triangular part of A. * The following call to PDLATRAN means that we only * check the lower triangular part of A - QTQ' * CALL PDLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PDLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PDCHEKPAD( ICTXT, 'PDSYTDRVf', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRVg', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRVh', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PDCHEKPAD( ICTXT, 'PDSYTDRVi', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0D+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9991 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' * * END IF * * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 50, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 50, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'WALL', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'CPU ', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ CTIME( 1 ), TMFLOPS, FRESID, PASSED * * * If split times were collected (in PDSYttrd.f), print * them out. * IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 ).GT.0.0D+0 .OR. $ CTIME( 13 )+CTIME( 15 )+CTIME( 16 ).GT.0.0D+0 ) $ THEN SPLITSTIMED = 1 END IF IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ), $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ), $ WTIME( 15 ) WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ), $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ), $ WTIME( 21 ) * WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ), $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ), $ CTIME( 15 ) WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ), $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ), $ CTIME( 21 ) WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB, $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS END IF END IF 10 CONTINUE 20 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) END IF END IF * * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE RETURN * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) * 9995 FORMAT( 'PDSYTTRD, tailored reduction to tridiagonal form, test.' $ ) 9994 FORMAT( 'TIME N int 2gm bal anb pnb prcs TRD Time ', $ ' MFLOPS Residual CHECK' ) 9993 FORMAT( '---- ---- --- --- --- --- --- ---- -------- ', $ '----------- -------- ------' ) 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9991 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) 9990 FORMAT( 'wsplit1=[wsplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9989 FORMAT( 'wsplit2=[wsplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9988 FORMAT( 'csplit1=[csplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9987 FORMAT( 'csplit2=[csplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9986 FORMAT( 'size_opts=[size_opts;', I4, 1X, I4, 1X, I4, 1X, I4, 1X, $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];' ) 9985 FORMAT( 'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;', $ ' TWOGEMMS=7; TIMEINTERNALS=8;' ) 9984 FORMAT( 'S1_OVERHEAD = 1; % Should be mainly cost of barrier' ) 9983 FORMAT( 'S1_BARRIER = 2; % Cost of barrier' ) 9982 FORMAT( 'S1_UPDCURCOL = 3; % Update the current column' ) 9981 FORMAT( 'S1_HOUSE = 4; % Compute the householder vector' ) 9980 FORMAT( 'S1_SPREAD = 5; % Spread across' ) 9979 FORMAT( 'S1_TRANSPOSE = 6; % Transpose' ) 9978 FORMAT( 'S2_UPDCURBLK = 1; % Update the current block column' ) 9977 FORMAT( 'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' ) 9976 FORMAT( 'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' ) 9975 FORMAT( 'S2_TRANS_SUM = 4; % v = v + vt'' ' ) 9974 FORMAT( 'S2_DOT = 5; % c = v'' * h ' ) 9973 FORMAT( 'S2_R2K = 6; % A = A - v * h'' - h * v'' ' ) * * * End of PDTTRDTESTER * END scalapack-1.8.0/TESTING/EIG/pzgsepdriver.f0000640000175000017500000002336410363532303017740 0ustar muammarmuammar* * PROGRAM PZGSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel COMPLEX*16 Hermitian eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and DBLSIZ * indicate the length in bytes on the given platform for an integer * and a double precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * DOUBLE is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * COMPLEX*16 words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PZHEGVX, the expert driver for the parallel * Hermitian eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, ZPLXSZ, NIN PARAMETER ( TOTMEM = 2000000, ZPLXSZ = 16, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / ZPLXSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Functions .. DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PDLACHKIEEE, PDLASNBT, PZGSEPREQ * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PDLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PDLACHKIEEE( ISIEEE, DLAMCH( 'O' ), DLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Hermitian Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'generalized ' // 'Hermitian eigenvalue routine: PZHEGVX.' WRITE( NOUT, FMT = 9999 )'A scaled residual check, ' // $ 'will be computed' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pZGSEPtst.f).' WRITE( NOUT, FMT = 9999 ) $ 'IBTYPE : Generalized eigenproblem type' // $ ' (see pZHEGVx.f)' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pZGSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : The scaled residual' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PZGSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP IBTYPE SUB WALL CPU ', $ ' CHK CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- ------ --- -------- --------', $ ' --------- -----' ) * * End of PZGSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/psrptseptst.f0000640000175000017500000000526110363532303017623 0ustar muammarmuammar* * PROGRAM PSRPTSEPTST * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * Repeat parallel symmetric eigenproblem test * .. Parameters .. * INTEGER DLEN_ PARAMETER ( DLEN_ = 9 ) INTEGER MAXN, LWORK, LIWORK PARAMETER ( MAXN = 200, LWORK = 500000, $ LIWORK = 6*MAXN+4 ) * .. * .. Local Scalars .. CHARACTER HETERO, SUBTESTS, UPLO INTEGER CONTEXT, IAM, INFO, IPOSTPAD, IPREPAD, LDA, $ MATTYPE, N, NB, NPCOL, NPROCS, NPROW REAL ABSTOL, THRESH * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), ICLUSTR( MAXN ), IFAIL( MAXN ), $ ISEED( 4 ), IWORK( LIWORK ) REAL A( MAXN*MAXN ), COPYA( MAXN*MAXN ), $ GAP( MAXN ), WIN( MAXN ), WNEW( MAXN ), $ WORK( LWORK ), Z( MAXN*MAXN ) * .. * * * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDINIT, $ BLACS_PINFO, BLACS_SETUP, DESCINIT, PSSEPTST * .. * .. Executable Statements .. * IPREPAD = 3 IPOSTPAD = 3 LDA = MAXN * * Set HETERO to 'Y' if you want to turn off the PxSYEV tests * HETERO = 'N' * * These lines should be replaced by the output from pxSEPdriver * * ISEED( 1 ) = 2312 ISEED( 2 ) = 3709 ISEED( 3 ) = 666 ISEED( 4 ) = 3371 UPLO = 'U' SUBTESTS = 'Y' N = 33 NPROW = 2 NPCOL = 2 NB = 4 MATTYPE = 9 * note: the printout often makes a mess of ABSTOL ABSTOL = 0.1175494351E-37 THRESH = .350000E+01 * CALL BLACS_PINFO( IAM, NPROCS ) IF( NPROCS.LT.1 ) THEN * NPROCS = NPROW*NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, NPCOL ) * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, CONTEXT, LDA, INFO ) * CALL PSSEPTST( DESCA, UPLO, N, MATTYPE, SUBTESTS, THRESH, N, $ ABSTOL, ISEED, A, COPYA, Z, LDA, WIN, WNEW, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, WORK, $ LWORK-IPREPAD-IPOSTPAD, IWORK, $ LIWORK-IPREPAD-IPOSTPAD, HETERO, 6, INFO ) * * * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * * CALL BLACS_EXIT( 0 ) STOP * * * * End of PSRPTSEPTST * END scalapack-1.8.0/TESTING/EIG/pcgsepchk.f0000640000175000017500000003103010363532303017150 0ustar muammarmuammar* * SUBROUTINE PCGSEPCHK( IBTYPE, MS, NV, A, IA, JA, DESCA, B, IB, JB, $ DESCB, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, W, WORK, LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, IBTYPE, IC, IQ, JA, JB, JC, JQ, LWORK, $ MS, NV, RESULT REAL THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCC( * ), DESCQ( * ) REAL W( * ), WORK( * ) COMPLEX A( * ), B( * ), C( * ), Q( * ) * .. * * * Purpose * ======= * * PCGSEPCHK checks a decomposition of the form * * A Q = B Q D or * A B Q = Q D or * B A Q = Q D * * where A is a symmetric matrix, B is * symmetric positive definite, Q is orthogonal, and D is diagonal. * * One of the following test ratios is computed: * * IBTYPE = 1: TSTNRM = | A Q - B Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 2: TSTNRM = | A B Q - Q D | / ( |A| |Q| n ulp ) * * IBTYPE = 3: TSTNRM = | B A Q - Q D | / ( |A| |Q| n ulp ) * * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, B and Q * MQ = number of local columns in A * NQ = number of local columns in B and Q * * IBTYPE (input) INTEGER * The form of the symmetric generalized eigenproblem. * = 1: A*Q = (lambda)*B*Q * = 2: A*B*Q = (lambda)*Q * = 3: B*A*Q = (lambda)*Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, B, C and Q * Also, the number of columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) REAL pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer to an * array in local memory of dimension (LLD_B, LOCc(JB+N-1)). * This array contains the local pieces of the M-by-N * distributed test matrix B * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix B. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), NQ) * * Contains the eigenvectors as computed by PSSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (MS, NV), * local dimension (DESCA( DLEN_ ), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * * RESULT (global output) INTEGER * 0 if the test passes * 1 if the test fails * * .. Local Scalars .. * INTEGER I, INFO, MYCOL, MYROW, NPCOL, NPROW, NQ REAL ANORM, ULP * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) COMPLEX CONE, CNEGONE, CZERO PARAMETER ( CONE = 1.0E+0, CNEGONE = -1.0E+0, $ CZERO = 0.0E+0 ) * .. * .. External Functions .. INTEGER NUMROC REAL PCLANGE, SLAMCH EXTERNAL NUMROC, PCLANGE, SLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCGEMM, PCSSCAL, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( MS, 1, MS, 2, IB, JB, DESCB, 11, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 16, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IB, JB, DESCB, 20, INFO ) * IF( INFO.EQ.0 ) THEN * NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -14 ELSE IF( JQ.NE.1 ) THEN INFO = -15 ELSE IF( IA.NE.1 ) THEN INFO = -5 ELSE IF( JA.NE.1 ) THEN INFO = -6 ELSE IF( IB.NE.1 ) THEN INFO = -9 ELSE IF( JB.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.NQ ) THEN INFO = -23 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCGSEPCHK', -INFO ) RETURN END IF * RESULT = 0 ULP = SLAMCH( 'Epsilon' ) * * Compute product of Max-norms of A and Q. * ANORM = PCLANGE( 'M', MS, MS, A, IA, JA, DESCA, WORK )* $ PCLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) IF( ANORM.EQ.ZERO ) $ ANORM = ONE * IF( IBTYPE.EQ.1 ) THEN * * Norm of AQ - BQD * * C = AQ * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 10 I = 1, NV CALL PCSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 10 CONTINUE * * C = C - BQ (i.e. AQ-BQD) * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CNEGONE, C, IC, JC, DESCC ) * TSTNRM = ( PCLANGE( 'M', MS, NV, C, IC, JC, DESCC, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * * ELSE IF( IBTYPE.EQ.2 ) THEN * * Norm of ABQ - QD * * * C = BQ * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 20 I = 1, NV CALL PCSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 20 CONTINUE * * Q = AC - Q * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PCLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * Norm of BAQ - QD * * * C = AQ * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, A, IA, JA, DESCA, Q, $ IQ, JQ, DESCQ, CZERO, C, IC, JC, DESCC ) * * Q = QD * DO 30 I = 1, NV CALL PCSSCAL( MS, W( I ), Q, IQ, JQ+I-1, DESCQ, 1 ) 30 CONTINUE * * Q = BC - Q * CALL PCGEMM( 'N', 'N', MS, NV, MS, CONE, B, IB, JB, DESCB, C, $ IC, JC, DESCC, CNEGONE, Q, IQ, JQ, DESCQ ) * TSTNRM = ( PCLANGE( 'M', MS, NV, Q, IQ, JQ, DESCQ, WORK ) / $ ANORM ) / ( MAX( MS, 1 )*ULP ) * END IF * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0E0 ) ) THEN RESULT = 1 END IF RETURN * * End of PCGSEPCHK * END scalapack-1.8.0/TESTING/EIG/pdlasizesyevx.f0000640000175000017500000001725010363532303020123 0ustar muammarmuammar* * SUBROUTINE PDLASIZESYEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER RANGE INTEGER IL, IU, MAXSIZE, N, VALSIZE, VECSIZE DOUBLE PRECISION VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION WIN( * ) * .. * * Purpose * ======= * * PDLASIZESYEVX computes the amount of memory needed by PDSYEVX * to ensure: * 1) Orthogonal Eigenvectors * 2) Eigenvectors * 3) Eigenvalues * * Arguments * ========= * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * VL (global input/output ) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VL is set * to a random value near an entry in WIN * * VU (global input/output ) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VU is set * to a random value near an entry in WIN * * IL (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * If IL < 0, RANGE='I' and WKNOWN = .TRUE., IL is set * to a random value from 1 to N * * IU (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * If IU < 0, RANGE='I' and WKNOWN = .TRUE., IU is set * to a random value from IL to N * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * ISEED is not touched unless IL, IU, VL or VU are modified. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If WKNOWN=1, WIN contains the eigenvalues of the matrix. * * MAXSIZE (global output) INTEGER * Workspace required to guarantee that PDSYEVX will return * orthogonal eigenvectors. IF WKNOWN=0, MAXSIZE is set to a * a value which guarantees orthogonality no matter what the * spectrum is. If WKNOWN=1, MAXSIZE is set to a value which * guarantees orthogonality on a matrix with eigenvalues given * by WIN. * * VECSIZE (global output) INTEGER * Workspace required to guarantee that PDSYEVX * will compute eigenvectors. * * VALSIZE (global output) INTEGER * Workspace required to guarantee that PDSYEVX * will compute eigenvalues. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION TWENTY PARAMETER ( TWENTY = 20.0D0 ) * .. * .. Local Scalars .. * INTEGER CLUSTERSIZE, I, ILMIN, IUMAX, MAXCLUSTERSIZE, $ MQ0, MYCOL, MYIL, MYIU, MYROW, NB, NEIG, NN, $ NP0, NPCOL, NPROW DOUBLE PRECISION ANORM, EPS, ORFAC, SAFMIN, VLMIN, VUMAX * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL LSAME, ICEIL, NUMROC, DLARAN, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ORFAC = 1.0D-3 * * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe Minimum' ) NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * VALSIZE = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) * IF( WKNOWN ) THEN ANORM = SAFMIN / EPS IF( N.GE.1 ) $ ANORM = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), ANORM ) * IF( LSAME( RANGE, 'I' ) ) THEN IF( IL.LT.0 ) $ IL = INT( DLARAN( ISEED )*DBLE( N ) ) + 1 IF( IU.LT.0 ) $ IU = INT( DLARAN( ISEED )*DBLE( N-IL ) ) + IL IF( N.EQ.0 ) $ IU = 0 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IF( VL.GT.VU ) THEN MYIL = INT( DLARAN( ISEED )*DBLE( N ) ) + 1 MYIU = INT( DLARAN( ISEED )*DBLE( N-MYIL ) ) + MYIL VL = WIN( MYIL ) + TWENTY*EPS*ABS( WIN( MYIL ) ) VU = WIN( MYIU ) + TWENTY*EPS*ABS( WIN( MYIU ) ) VU = MAX( VU, VL+EPS*TWENTY*ABS( VL )+SAFMIN ) END IF END IF * END IF IF( LSAME( RANGE, 'V' ) ) THEN * * Compute ILMIN, IUMAX (based on VL, VU and WIN) * IF( WKNOWN ) THEN VLMIN = VL - TWENTY*EPS*ANORM VUMAX = VU + TWENTY*EPS*ANORM ILMIN = 1 IUMAX = 0 DO 10 I = 1, N IF( WIN( I ).LT.VLMIN ) $ ILMIN = ILMIN + 1 IF( WIN( I ).LT.VUMAX ) $ IUMAX = IUMAX + 1 10 CONTINUE ELSE ILMIN = 1 IUMAX = N END IF ELSE IF( LSAME( RANGE, 'I' ) ) THEN ILMIN = IL IUMAX = IU ELSE IF( LSAME( RANGE, 'A' ) ) THEN ILMIN = 1 IUMAX = N END IF * NEIG = IUMAX - ILMIN + 1 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) VECSIZE = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN * IF( WKNOWN ) THEN CLUSTERSIZE = 1 MAXCLUSTERSIZE = 1 DO 20 I = ILMIN + 1, IUMAX IF( ( WIN( I )-WIN( I-1 ) ).LT.ORFAC*2*ANORM ) THEN CLUSTERSIZE = CLUSTERSIZE + 1 IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE CLUSTERSIZE = 1 END IF 20 CONTINUE IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE MAXCLUSTERSIZE = N END IF * MAXSIZE = VECSIZE + MAX( ( MAXCLUSTERSIZE-1 ), 0 )*N * * RETURN * * End of PDLASIZESYEVX * END scalapack-1.8.0/TESTING/EIG/pcevcdriver.f0000640000175000017500000005371510602576752017547 0ustar muammarmuammar PROGRAM PCEVCDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * June, 2000 * * Purpose * ======= * * PCEVCDRIVER is the main test program for the COMPLEX * SCALAPACK PCTREVC routine. This test driver performs a right and * left eigenvector calculation of a triangular matrix followed by * a residual checks of the calcuated eigenvectors. * * The program must be driven by a short data file and uses the same * input file as the PCNEPDRIVER. An annotated example of a data file * can be obtained by deleting the first 3 characters from the following * 18 lines: * 'SCALAPACK, Version 1.8, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * CPLXSZ INTEGER, default = 8 bytes. * CPLXSZ indicate the length in bytes on the given platform * for a single precision complex. * MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * Further Details * ============== * * Contributed by Mark Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER CPLXSZ, TOTMEM, MEMSIZ, NTESTS PARAMETER ( CPLXSZ = 8, TOTMEM = 200000000, $ MEMSIZ = TOTMEM / CPLXSZ, NTESTS = 20 ) COMPLEX PADVAL, ZERO, ONE PARAMETER ( PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICOL, ICTXT, II, III, IMIDPAD, $ INFO, IPA, IPOSTPAD, IPREPAD, IPVL, IPVR, IPW, $ IPWR, IPC, IROW, J, JJ, JJJ, K, KFAIL, KPASS, $ KSKIP, KTESTS, LDA, LDZ, LWORK, M, MYCOL, $ MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH REAL ANORM, FRESID, QRESID DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. LOGICAL SELECT( 1 ) INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ), $ NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL RESULT( 2 ), RWORK( 5000 ) DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, INFOG2L, $ PCCHEKPAD, PCEVCINFO, PCFILLPAD, PCGET22, $ PCLASET, PCMATGEN, PCTREVC, SLBOOT, SLCOMBINE, $ SLTIMER, CGSUM2D * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PCLANHS EXTERNAL ILCM, NUMROC, PCLANHS * .. * .. Intrinsic Functions .. INTRINSIC REAL, MAX, MIN * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PCEVCINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 40 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 40 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 40 * DO 30 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 30 END IF * DO 20 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 20 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 20 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAE+1 ) * IPA = IPREPAD + 1 IPC = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPC + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPVL = IPWR + N + IPOSTPAD + IPREPAD IPVR = IPVL + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPVR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX( 2*N, ( 8*ILCM( NPROW, NPCOL )+2 )** $ 2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PCEVCFCHK and PCLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 20 END IF * * Generate matrix Z = In * CALL PCLASET( 'All', N, N, ZERO, ONE, MEM( IPC ), 1, 1, $ DESCZ ) CALL PCLASET( 'All', N, N, ZERO, ZERO, MEM( IPVR ), 1, 1, $ DESCZ ) CALL PCLASET( 'All', N, N, ZERO, ZERO, MEM( IPVL ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PCMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PCLASET( 'Lower', MAX( 0, N-1 ), MAX( 0, N-1 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 2 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPVR-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPVL-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPC-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * * Set eigenvalues from diagonal * DO 10 JJJ = 1, N CALL INFOG2L( JJJ, JJJ, DESCZ, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, II, JJ ) IF( MYROW.EQ.II .AND. MYCOL.EQ.JJ ) THEN MEM( IPWR-1+JJJ ) = MEM( IPA-1+( ICOL-1 )*LDA+ $ IROW ) ELSE MEM( IPWR-1+JJJ ) = ZERO END IF 10 CONTINUE CALL CGSUM2D( ICTXT, 'All', ' ', N, 1, MEM( IPWR ), N, $ -1, -1 ) * SELECT( 1 ) = .TRUE. CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform eigenvector calculation * CALL PCTREVC( 'B', 'A', SELECT, N, MEM( IPA ), DESCA, $ MEM( IPVL ), DESCZ, MEM( IPVR ), DESCZ, N, $ M, MEM( IPW ), RWORK, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PCTREVC INFO=', INFO KFAIL = KFAIL + 1 GO TO 20 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PCCHEKPAD( ICTXT, 'PCTREVC (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCTREVC (VR)', NP, NQ, $ MEM( IPVR-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCTREVC (VL)', NP, NQ, $ MEM( IPVL-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCTREVC (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCTREVC (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || T * Z - Z * D || / ( N*|| T ||*EPS ) * FRESID = 0.0E+0 QRESID = 0.0E+0 CALL PCGET22( 'N', 'N', 'N', N, MEM( IPA ), DESCA, $ MEM( IPVR ), DESCZ, MEM( IPWR ), $ MEM( IPC ), DESCZ, RWORK, RESULT ) FRESID = RESULT( 1 ) QRESID = RESULT( 2 ) * * Compute || T^H * L - L * D^H || / ( N*|| T ||*EPS ) * CALL PCGET22( 'C', 'N', 'C', N, MEM( IPA ), DESCA, $ MEM( IPVL ), DESCZ, MEM( IPWR ), $ MEM( IPC ), DESCZ, RWORK, RESULT ) FRESID = MAX( FRESID, RESULT( 1 ) ) QRESID = MAX( QRESID, RESULT( 2 ) ) * CALL PCCHEKPAD( ICTXT, 'PCGET22 (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGET22 (VR)', NP, NQ, $ MEM( IPVR-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGET22 (VL)', NP, NQ, $ MEM( IPVL-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCGET22 (Z)', NP, NQ, $ MEM( IPC-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0E+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0E+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 2 N^2 flops for PxTREVC * NOPS = 2.0D+0*DBLE( N )**2 * * Calculate total megaflops -- eigenvector calc only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 20 CONTINUE * 30 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 40 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H*Z - Z*D|| / (||T|| * N * eps) = ', G25.7 ) 9985 FORMAT( 'max_j(max|Z(j)| - 1) / ( N * eps ) ', G25.7 ) * STOP * * End of PCEVCDRIVER * END scalapack-1.8.0/TESTING/EIG/pcgseptst.f0000640000175000017500000012256210363532303017230 0ustar muammarmuammar* * SUBROUTINE PCGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, $ THRESH, ORDER, ABSTOL, ISEED, A, COPYA, B, $ COPYB, Z, LDA, WIN, WNEW, IFAIL, ICLUSTR, $ GAP, IPREPAD, IPOSTPAD, WORK, LWORK, RWORK, $ LRWORK, IWORK, LIWORK, NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER IBTYPE, INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, $ LRWORK, LWORK, MATTYPE, N, NOUT, ORDER REAL ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) REAL GAP( * ), RWORK( * ), WIN( * ), WNEW( * ) COMPLEX A( LDA, * ), B( LDA, * ), COPYA( LDA, * ), $ COPYB( LDA, * ), WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PCGSEPTST builds a random matrix A, and a well conditioned * matrix B, runs PCHEGVX() to compute the eigenvalues * and eigenvectors and then calls PCHEGVCHK to compute * the residual. * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) Hermitian matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PCSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) COMPLEX array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PCHEGVX * * COPYA (local workspace) COMPLEX array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * B (local workspace) COMPLEX array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PCHEGVX * * COPYB (local workspace) COMPLEX array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) COMPLEX array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PCGSEPCHK * * W (local workspace) REAL array, dimension (N) * On normal exit from PCHEGVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) COMPLEX array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PCLASIZEGSEP * * RWORK (local workspace) COMPLEX array, dimension (LWORK) * * LRWORK (local input) INTEGER * The length of the array WORK. LRWORK >= RSIZETST as * returned by PCLASIZEGSEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PCLASIZEGSEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0, $ HALF = 0.5E+0 ) COMPLEX PADVAL PARAMETER ( PADVAL = ( 19.25E+0, 1.1E+1 ) ) COMPLEX CZERO PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDRWORK, INDWORK, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, ITYPE, IU, J, LHEEVXSIZE, LLRWORK, $ LLWORK, MAXSIZE, MYCOL, MYROW, NB, NGEN, NLOC, $ NNODES, NP, NPCOL, NPROW, NQ, RES, RSIZECHK, $ RSIZEHEEVX, RSIZEQTQ, RSIZESUBTST, RSIZETST, $ SIZEHEEVX, SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF, $ SIZESUBTST, SIZETMS, SIZETST, VALSIZE, VECSIZE REAL ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, CLATMS, IGAMX2D, $ IGEBR2D, IGEBS2D, PCCHEKPAD, PCELSET, $ PCFILLPAD, PCGSEPSUBTST, PCLASET, PCLASIZEGSEP, $ PCLASIZEHEEVX, PCLATMS, PCMATGEN, SLABAD, $ SLASRT, SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, MOD, REAL, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED ' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PCLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, ISIZEHEEVX, $ SIZESUBTST, RSIZESUBTST, ISIZESUBTST, SIZETST, $ RSIZETST, ISIZETST ) * IF( LRWORK.LT.RSIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDRWORK = INDD + N INDWORK = 1 LLWORK = LWORK - INDWORK + 1 LLRWORK = LRWORK - INDRWORK + 1 * ULP = PSLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PSLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL SLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / REAL( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log Hermitian, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random Hermitian * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N RWORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PCLASET( 'All', N, N, CZERO, CZERO, COPYA, 1, 1, $ DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N RWORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PCLASET( 'All', N, N, CZERO, CONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0E+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * Hermitian, eigenvalues specified * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0E+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * Hermitian, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PCMATGEN( DESCA( CTXT_ ), 'H', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0E+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PCLASET( 'All', N, N, CZERO, CZERO, COPYA, 1, 1, $ DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( SLARAN( ISEED )*REAL( NLOC ) ), N-NGEN ) * CALL CLATMS( IN, IN, 'S', ISEED, 'P', RWORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PCELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PCELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PCELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PCELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 RWORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0E+0 ) * CALL PCLATMS( N, N, 'S', ISEED, 'S', RWORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0E+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL SLASRT( 'I', N, RWORK( INDD ), IINFO ) * * Create the B matrix * CALL PCFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.3E+0 ) * ANORM = ONE * * Update ISEED so that {CLAGSY creates a different Q * ISEED( 4 ) = MOD( ISEED( 4 )+257, 4096 ) ISEED( 3 ) = MOD( ISEED( 3 )+192, 4096 ) ISEED( 2 ) = MOD( ISEED( 2 )+35, 4096 ) ISEED( 1 ) = MOD( ISEED( 1 )+128, 4096 ) CALL PCLATMS( N, N, 'S', ISEED, 'P', RWORK( INDD ), 3, TEN, $ ANORM, N, N, 'N', COPYB, 1, 1, DESCA, ORDER, $ WORK( INDWORK+IPREPAD ), SIZETMS, IINFO ) * CALL PCCHEKPAD( DESCA( CTXT_ ), 'PCLATMS5-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.3E+0 ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PCLASIZEHEEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, RWORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LHEEVXSIZE = MIN( MAXSIZE, LRWORK ) WKNOWN = .FALSE. * CALL PCGSEPSUBTST( WKNOWN, IBTYPE, 'v', 'a', UPLO, N, VL, VU, $ IL, IU, THRESH, ABSTOL, A, COPYA, B, COPYB, $ Z, 1, 1, DESCA, RWORK( INDD ), WIN, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, RWORK( INDRWORK ), $ LLRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PCLASIZEHEEVX to choose IL and IU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LHEEVXSIZE = VECSIZE + INT( SLARAN( ISEED )* $ REAL( MAXSIZE-VECSIZE ) ) * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = MAXSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VECSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LHEEVXSIZE = VALSIZE * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PCLASIZEHEEVX to choose VL and VU for us. * CALL PCLASIZEHEEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * * CALL PCGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, RWORK, $ LRWORK, LHEEVXSIZE, IWORK, ISIZEHEEVX, $ RES, TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9984 )IBTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ), MAXTSTNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS END IF END IF * 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 11X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 11X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 22X, 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PCGSEPDRIVER' ) * * End of PCGSEPTST * END scalapack-1.8.0/TESTING/EIG/pzlasizegsep.f0000640000175000017500000001324110363532303017725 0ustar muammarmuammar SUBROUTINE PZLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, RSIZEQTQ, $ RSIZECHK, SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZESUBTST, RSIZESUBTST, $ ISIZESUBTST, SIZETST, RSIZETST, $ ISIZETST ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * PZLASIZEGSEP computes the amount of memory needed by * ======= * * PZLASIZEGSEP computes the amount of memory needed by * various GSEP test routines, as well as HEGVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PZHEGVX * * SIZEMQRLEFT LWORK for the 1st PZUNMQR call in PZLAGHE * * SIZEMQRRIGHT LWORK for the 2nd PZUNMQR call in PZLAGHE * * SIZEQRF LWORK for PZGEQRF in PZLAGHE * * SIZETMS LWORK for PZLATMS * * RSIZEQTQ LWORK for PZSEPQTQ (nexer complex) * * RSIZECHK LWORK for PZGSEPCHK * * SIZEHEEVX LWORK for PZHEGVX * * RSIZEHEEVX LRWORK for PZHEGVX * * ISIZEHEEVX LIWORK for PZHEGVX * * SIZESUBTST LWORK for PZSUBTST * * RSIZESUBTST LRWORK for PZSUBTST * * ISIZESUBTST LIWORK for PZSUBTST * * SIZETST LWORK for PZTST * * RSIZETST LRWORK for PZTST * * ISIZETST LIWORK for PZTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ANB, CSRC_A, IACOL, IAROW, ICOFFA, ICTXT, $ IROFFA, LCM, LCMQ, LDA, MQ0, MYCOL, MYROW, N, $ NB, NEIG, NHEGST_LWOPT, NHETRD_LWOPT, NN, NNP, $ NP, NP0, NPCOL, NPROW, NPS, NQ, NQ0, RSRC_A, $ SIZECHK, SIZEQTQ, SQNPC * .. * .. External Functions .. * INTEGER ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, SQRT * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = 0 RSIZEQTQ = 0 RSIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZEHEEVX = N + ( NP0+MQ0+NB )*NB RSIZEHEEVX = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZEHEEVX = 6*NNP * ICTXT = DESCA( CTXT_ ) ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS * NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB SIZEHEEVX = MAX( SIZEHEEVX, N+NHETRD_LWOPT, NHEGST_LWOPT ) * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZEHEEVX ) + $ IPREPAD + IPOSTPAD RSIZESUBTST = MAX( RSIZEHEEVX, RSIZEQTQ, RSIZECHK ) + IPREPAD + $ IPOSTPAD ISIZESUBTST = ISIZEHEEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and WORK * SIZETST = 3*( LDA*NP+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Room for DIAG, WIN, WNEW, GAP and RWORK * RSIZETST = 4*( N+IPREPAD+IPOSTPAD ) + RSIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PZHEGVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pzevcdriver.f0000640000175000017500000005367710602576752017605 0ustar muammarmuammar PROGRAM PZEVCDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * June, 2000 * * Purpose * ======= * * PZEVCDRIVER is the main test program for the COMPLEX*16 * SCALAPACK PZTREVC routine. This test driver performs a right and * left eigenvector calculation of a triangular matrix followed by * a residual checks of the calcuated eigenvectors. * * The program must be driven by a short data file and uses the same * input file as the PZNEPDRIVER. An annotated example of a data file * can be obtained by deleting the first 3 characters from the following * 18 lines: * 'SCALAPACK, Version 1.8, NEP (Nonsymmetric EigenProblem) input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'NEP.out' output file name (if any) * 6 device out * 8 number of problems sizes * 1 2 3 4 6 10 100 200 vales of N * 3 number of NB's * 6 20 40 values of NB * 4 number of process grids (ordered pairs of P & Q) * 1 2 1 4 values of P * 1 2 4 1 values of Q * 20.0 threshold * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * ZPLXSZ INTEGER, default = 16 bytes. * ZPLXSZ indicate the length in bytes on the given platform * for a double precision complex. * MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * Further Details * ============== * * Contributed by Mark Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ZPLXSZ, TOTMEM, MEMSIZ, NTESTS PARAMETER ( ZPLXSZ = 16, TOTMEM = 200000000, $ MEMSIZ = TOTMEM / ZPLXSZ, NTESTS = 20 ) COMPLEX*16 PADVAL, ZERO, ONE PARAMETER ( PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICOL, ICTXT, II, III, IMIDPAD, $ INFO, IPA, IPOSTPAD, IPREPAD, IPVL, IPVR, IPW, $ IPWR, IPZ, IROW, J, JJ, JJJ, K, KFAIL, KPASS, $ KSKIP, KTESTS, LDA, LDZ, LWORK, M, MYCOL, $ MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT, NP, $ NPCOL, NPROCS, NPROW, NQ, WORKSIZ REAL THRESH DOUBLE PRECISION ANORM, FRESID, NOPS, QRESID, TMFLOPS * .. * .. Local Arrays .. LOGICAL SELECT( 1 ) INTEGER DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ), $ NBVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) DOUBLE PRECISION CTIME( 2 ), RESULT( 2 ), RWORK( 5000 ), $ WTIME( 2 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, INFOG2L, $ PZCHEKPAD, PZEVCINFO, PZFILLPAD, PZGET22, $ PZLASET, PZMATGEN, PZTREVC, SLBOOT, SLCOMBINE, $ SLTIMER, ZGSUM2D * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PZLANHS EXTERNAL ILCM, NUMROC, PZLANHS * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KFAIL, KPASS, KSKIP, KTESTS / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PZEVCINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL, $ NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS, $ THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 40 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'grid' KSKIP = KSKIP + 1 GO TO 40 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 40 * DO 30 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 30 END IF * DO 20 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.6 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'NB' KSKIP = KSKIP + 1 GO TO 20 END IF * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) IPREPAD = IPREPAD + 1000 IMIDPAD = IMIDPAD + 1000 IPOSTPAD = IPOSTPAD + 1000 ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * * Initialize the array descriptor for the matrix Z * CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 2 ) ) * LDA = DESCA( LLD_ ) LDZ = DESCZ( LLD_ ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 ) * IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 20 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IPA = IPREPAD + 1 IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPVL = IPWR + N + IPOSTPAD + IPREPAD IPVR = IPVL + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD IPW = IPVR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD III = N / NB IF( III*NB.LT.N ) $ III = III + 1 III = 7*III / ILCM( NPROW, NPCOL ) * * LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III ) LWORK = LWORK + MAX( 2*N, ( 8*ILCM( NPROW, NPCOL )+2 )** $ 2 ) * IF( CHECK ) THEN * * Figure the amount of workspace required by the * checking routines PZEVCFCHK and PZLANHS * WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ), $ DESCA( MB_ )*NQ ) + IPOSTPAD * ELSE * WORKSIZ = LWORK + IPOSTPAD * END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Schur reduction', $ ( IPW+WORKSIZ )*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 20 END IF * * Generate matrix Z = In * CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1, $ DESCZ ) CALL PZLASET( 'All', N, N, ZERO, ZERO, MEM( IPVR ), 1, 1, $ DESCZ ) CALL PZLASET( 'All', N, N, ZERO, ZERO, MEM( IPVL ), 1, 1, $ DESCZ ) * * Generate matrix A upper Hessenberg * CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose', $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ), $ DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0, $ NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) CALL PZLASET( 'Lower', MAX( 0, N-1 ), MAX( 0, N-1 ), $ ZERO, ZERO, MEM( IPA ), MIN( N, 2 ), 1, $ DESCA ) * * Calculate inf-norm of A for residual error-checking * IF( CHECK ) THEN CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPVR-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPVL-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ), $ DESCZ( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PZLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PZCHEKPAD( ICTXT, 'PZLANHS', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZLANHS', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ), $ LWORK, IPREPAD, IPOSTPAD, PADVAL ) * END IF * * Set eigenvalues from diagonal * DO 10 JJJ = 1, N CALL INFOG2L( JJJ, JJJ, DESCZ, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, II, JJ ) IF( MYROW.EQ.II .AND. MYCOL.EQ.JJ ) THEN MEM( IPWR-1+JJJ ) = MEM( IPA-1+( ICOL-1 )*LDA+ $ IROW ) ELSE MEM( IPWR-1+JJJ ) = ZERO END IF 10 CONTINUE CALL ZGSUM2D( ICTXT, 'All', ' ', N, 1, MEM( IPWR ), N, $ -1, -1 ) * SELECT( 1 ) = .TRUE. CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Perform eigenvector calculation * CALL PZTREVC( 'B', 'A', SELECT, N, MEM( IPA ), DESCA, $ MEM( IPVL ), DESCZ, MEM( IPVR ), DESCZ, N, $ M, MEM( IPW ), RWORK, INFO ) * CALL SLTIMER( 1 ) * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = * )'PZTREVC INFO=', INFO KFAIL = KFAIL + 1 GO TO 20 END IF * IF( CHECK ) THEN * * Check for memory overwrite in NEP factorization * CALL PZCHEKPAD( ICTXT, 'PZTREVC (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZTREVC (VR)', NP, NQ, $ MEM( IPVR-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZTREVC (VL)', NP, NQ, $ MEM( IPVL-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZTREVC (WR)', N, 1, $ MEM( IPWR-IPREPAD ), N, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZTREVC (WORK)', LWORK, 1, $ MEM( IPW-IPREPAD ), LWORK, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute || T * Z - Z * D || / ( N*|| T ||*EPS ) * FRESID = 0.0D+0 QRESID = 0.0D+0 CALL PZGET22( 'N', 'N', 'N', N, MEM( IPA ), DESCA, $ MEM( IPVR ), DESCZ, MEM( IPWR ), $ MEM( IPZ ), DESCZ, RWORK, RESULT ) FRESID = RESULT( 1 ) QRESID = RESULT( 2 ) * * Compute || T^H * L - L * D^H || / ( N*|| T ||*EPS ) * CALL PZGET22( 'C', 'N', 'C', N, MEM( IPA ), DESCA, $ MEM( IPVL ), DESCZ, MEM( IPWR ), $ MEM( IPZ ), DESCZ, RWORK, RESULT ) FRESID = MAX( FRESID, RESULT( 1 ) ) QRESID = MAX( QRESID, RESULT( 2 ) ) * CALL PZCHEKPAD( ICTXT, 'PZGET22 (A)', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGET22 (VR)', NP, NQ, $ MEM( IPVR-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGET22 (VL)', NP, NQ, $ MEM( IPVL-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PZCHEKPAD( ICTXT, 'PZGET22 (Z)', NP, NQ, $ MEM( IPZ-IPREPAD ), DESCZ( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( ( FRESID.LE.THRESH ) .AND. $ ( ( FRESID-FRESID ).EQ.0.0D+0 ) .AND. $ ( QRESID.LE.THRESH ) .AND. $ ( ( QRESID-QRESID ).EQ.0.0D+0 ) ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE KFAIL = KFAIL + 1 PASSED = 'FAILED' IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 )FRESID WRITE( NOUT, FMT = 9985 )QRESID END IF END IF * ELSE * * Don't perform the checking, only timing * KPASS = KPASS + 1 FRESID = FRESID - FRESID QRESID = QRESID - QRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * 2 N^2 flops for PxTREVC * NOPS = 2.0D+0*DBLE( N )**2 * * Calculate total megaflops -- eigenvector calc only, * -- for WALL and CPU time, and print output * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, PASSED * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 ) ELSE TMFLOPS = 0.0D+0 END IF * IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, PASSED END IF * 20 CONTINUE * 30 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 40 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 )KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 )KPASS WRITE( NOUT, FMT = 9989 )KFAIL ELSE WRITE( NOUT, FMT = 9990 )KPASS END IF WRITE( NOUT, FMT = 9988 )KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME N NB P Q NEP Time MFLOPS CHECK' ) 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' ) 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2, $ 1X, A6 ) 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||H*Z - Z*D|| / (||T|| * N * eps) = ', G25.7 ) 9985 FORMAT( 'max_j(max|Z(j)| - 1) / ( N * eps ) ', G25.7 ) * STOP * * End of PZEVCDRIVER * END scalapack-1.8.0/TESTING/EIG/pssvdcmp.f0000640000175000017500000002674310363532303017057 0ustar muammarmuammar SUBROUTINE PSSVDCMP( M, N, JOBTYPE, S, SC, U, UC, IU, JU, DESCU, $ VT, VTC, IVT, JVT, DESCVT, THRESH, RESULT, $ DELTA, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IU, IVT, JOBTYPE, JU, JVT, LWORK, M, N REAL DELTA, THRESH * .. * .. Array Arguments .. INTEGER DESCU( * ), DESCVT( * ), RESULT( * ) REAL S( * ), SC( * ), U( * ), UC( * ), VT( * ), $ VTC( * ), WORK( * ) * .. * * Purpose * ======== * Testing how accurately "full" and "partial" decomposition options * provided by PSGESVD correspond to each other. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========== * * M (global input) INTEGER * Number of rows of the distributed matrix, for which * SVD was calculated * * N (global input) INTEGER * Number of columns of the distributed matrix, for which * SVD was calculated * * JOBTYPE (global input) INTEGER * Depending on the value of this parameter, * the following comparisons are performed: * * JOBTYPE | COMPARISON * ------------------------------------------- * 2 | | U - UC | / ( M ulp ) > THRESH, * 3 | | VT - VTC | / ( N ulp ) > THRESH * * In addition, for JOBTYPE = 2:4 comparison * | S1 - S2 | / ( SIZE ulp |S| ) > THRESH * is performed. Positive result of any of the comparisons * typically indicates erroneous computations and sets * to one corresponding element of array RESULT * * S (global input) REAL array of singular values * calculated for JOBTYPE equal to 1 * * SC (global input) REAL array of singular values * calculated for JOBTYPE nonequal to 1 * * U (local input) REAL array of left singular * vectors calculated for JOBTYPE equal to 1, local * dimension (MP, SIZEQ), global dimension (M, SIZE) * * UC (local input) REAL array of left singular * vectors calculated for JOBTYPE non equal to 1, local * dimension (MP, SIZEQ), global dimension (M, SIZE) * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U and UC * * V (local input) REAL array of right singular * vectors calculated for JOBTYPE equal to 1, local * dimension (SIZEP, NQ), global dimension (SIZE, N) * * VC (local input) REAL array of right singular * vectors calculated for JOBTYPE non equal to 1, local * dimension (SIZEP, NQ), global dimension (SIZE, N) * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT and * VTC * * THRESH (global input) REAL * The threshold value for the test ratios. A result is * included in the output file if RESULT >= THRESH. The test * ratios are scaled to be O(1), so THRESH should be a small * multiple of 1, e.g., 10 or 100. To have every test ratio * printed, use THRESH = 0. * * RESULT (global input/output) INTEGER array. * Every nonzero entry corresponds to erroneous computation. * * DELTA (global output) REAL * maximum of the available of the following three values * | U - UC | / ( M ulp THRESH ), * | VT - VT | / ( N ulp THRESH ), * | S1 - S2 | / ( SIZE ulp |S| THRESH ) * * WORK (local workspace/output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * ====================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COLPTR, I, INFO, J, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW, NQ, RESULTS, SIZE, SIZEPOS, SIZEQ REAL ACCUR, CMP, NORMDIFS, NORMDIFU, NORMDIFV, $ NORMS, ULP * .. * .. External Functions .. INTEGER NUMROC REAL SLANGE, PSLAMCH, PSLANGE EXTERNAL NUMROC, SLANGE, PSLAMCH, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*DLEN_*DTYPE_*MB_*M_*N_*RSRC_.LT.0 ) $ RETURN * RESULTS = 0 NORMDIFS = 0 NORMDIFU = 0 NORMDIFV = 0 SIZE = MIN( M, N ) * * Sizepos is a number of parameters to pdsvdcmp plus one. It's used * for the error reporting. * SIZEPOS = 17 INFO = 0 CALL BLACS_GRIDINFO( DESCU( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( NPROW.EQ.-1 ) THEN INFO = -607 ELSE CALL CHK1MAT( M, 1, SIZE, SIZEPOS, 1, 1, DESCU, 8, INFO ) CALL CHK1MAT( SIZE, SIZEPOS, N, 2, 1, 1, DESCVT, 11, INFO ) END IF * IF( INFO.EQ.0 ) THEN * * Calculate workspace. * SIZEQ = NUMROC( SIZE, DESCU( NB_ ), MYCOL, 0, NPCOL ) NQ = NUMROC( N, DESCVT( NB_ ), MYCOL, 0, NPCOL ) LWMIN = MAX( SIZEQ, NQ ) + 4 WORK( 1 ) = LWMIN IF( LWORK.EQ.-1 ) $ GO TO 60 IF( LWORK.LT.LWMIN ) THEN INFO = -16 ELSE IF( THRESH.LE.0 ) THEN INFO = -12 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCU( CTXT_ ), 'PSSVDCMP', -INFO ) RETURN END IF * ULP = PSLAMCH( DESCU( CTXT_ ), 'P' ) * * Make comparison of singular values. * NORMS = SLANGE( '1', SIZE, 1, S, SIZE, WORK ) DO 10 I = 1, SIZE SC( I ) = S( I ) - SC( I ) 10 CONTINUE * NORMDIFS = SLANGE( '1', SIZE, 1, SC, SIZE, WORK ) ACCUR = ULP*SIZE*NORMS*THRESH * IF( NORMDIFS.GT.ACCUR ) $ RESULTS = 1 IF( NORMDIFS.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFS = 0 ELSE NORMDIFS = NORMDIFS / ACCUR END IF * IF( JOBTYPE.EQ.2 ) THEN * RESULT( 5 ) = RESULTS ACCUR = ULP*M*THRESH DO 30 J = 1, SIZEQ COLPTR = DESCU( LLD_ )*( J-1 ) DO 20 I = 1, DESCU( LLD_ ) UC( I+COLPTR ) = U( I+COLPTR ) - UC( I+COLPTR ) 20 CONTINUE 30 CONTINUE * NORMDIFU = PSLANGE( '1', M, SIZE, UC, IU, JU, DESCU, WORK ) * IF( NORMDIFU.GE.ACCUR ) $ RESULT( 6 ) = 1 IF( NORMDIFU.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFU = 0 ELSE NORMDIFU = NORMDIFU / ACCUR END IF * ELSE IF( JOBTYPE.EQ.3 ) THEN * RESULT( 7 ) = RESULTS ACCUR = ULP*N*THRESH DO 50 J = 1, NQ COLPTR = DESCVT( LLD_ )*( J-1 ) DO 40 I = 1, DESCVT( LLD_ ) VTC( I+COLPTR ) = VT( I+COLPTR ) - VTC( I+COLPTR ) 40 CONTINUE 50 CONTINUE * NORMDIFV = PSLANGE( '1', SIZE, N, VTC, IVT, JVT, DESCVT, WORK ) * IF( NORMDIFV.GE.ACCUR ) $ RESULT( 8 ) = 1 * IF( NORMDIFV.EQ.0 .AND. ACCUR.EQ.0 ) THEN NORMDIFV = 0 ELSE NORMDIFV = NORMDIFV / ACCUR END IF * ELSE IF( JOBTYPE.EQ.4 ) THEN * RESULT( 9 ) = RESULTS * END IF * CMP = MAX( NORMDIFV, NORMDIFU ) DELTA = MAX( CMP, NORMDIFS ) * 60 CONTINUE * * End of PSSVDCMP * RETURN END scalapack-1.8.0/TESTING/EIG/pdlagsy.f0000640000175000017500000002532410363532303016655 0ustar muammarmuammar* * SUBROUTINE PDLAGSY( N, K, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, INFO ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, N, ORDER * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION A( * ), D( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * PDLAGSY generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. * * This is just a quick implementation which will be replaced in the * future. The random orthogonal matrix is computed by creating a * random matrix and running QR on it. This requires vastly more * computation than necessary, but not significantly more communication * than is used in the rest of this rouinte, and hence is not that much * slower than an efficient solution. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix A. N >= 0. * * K (global input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * ### K must be 0 or N-1, 0 < K < N-1 is not supported yet. * * D (global input) DOUBLE PRECISION array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (local output) DOUBLE PRECISION array * Global dimension (N, N), local dimension (NP, NQ) * The generated n by n symmetric matrix A (the full matrix is * stored). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated and will remain identical on * all processes in the context. * * ORDER (global input) INTEGER * Number of reflectors in the matrix Q * At present, ORDER .NE. N is not supported * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PDLASIZESEP * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, II, IIROW, $ INDAA, INDTAU, INDWORK, IPOSTPAD, IPREPAD, $ IROFFA, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ JJCOL, LDAA, LII, LIII, LJJ, LJJJ, LWMIN, MAXI, $ MB_A, MYCOL, MYROW, NB_A, NP, NPCOL, NPROW, NQ, $ RSRC_A, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DLASET, PDGEQRF, $ PDLASIZESEP, PDMATGEN, PDORMQR, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. * INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Initialize grid information * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check LWORK * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) END IF * LDAA = DESCA( LLD_ ) MB_A = DESCA( MB_ ) NB_A = DESCA( NB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) NP = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ) IPREPAD = 0 IPOSTPAD = 0 CALL PDLASIZESEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) LWMIN = SIZETMS * * Test the input arguments * IF( INFO.EQ.0 ) THEN IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( N.NE.ORDER ) THEN INFO = -9 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( INFO.LT.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDLAGSY', -INFO ) RETURN END IF * INDAA = 1 INDTAU = INDAA + LDAA*MAX( 1, NQ ) INDWORK = INDTAU + MAX( 1, NQ ) * IF( K.NE.0 ) THEN CALL DLASET( 'A', LDAA, NQ, ZERO, ZERO, WORK( INDAA ), LDAA ) * * * Build a random matrix * * CALL PDMATGEN( DESCA( CTXT_ ), 'N', 'N', N, ORDER, $ DESCA( MB_ ), DESCA( NB_ ), WORK( INDAA ), $ DESCA( LLD_ ), DESCA( RSRC_ ), DESCA( CSRC_ ), $ ISEED( 1 ), 0, NP, 0, NQ, MYROW, MYCOL, NPROW, $ NPCOL ) CALL PDGEQRF( N, ORDER, WORK( INDAA ), IA, JA, DESCA, $ WORK( INDTAU ), WORK( INDWORK ), SIZEQRF, INFO ) * END IF * * Build a diagonal matrix A with the eigenvalues specified in D * CALL DLASET( 'A', NP, NQ, ZERO, ZERO, A, DESCA( LLD_ ) ) * IIROW = 0 JJCOL = 0 LII = 1 LJJ = 1 * DO 20 II = 1, N, DESCA( MB_ ) MAXI = MIN( N, II+DESCA( MB_ )-1 ) IF( ( MYROW.EQ.IIROW ) .AND. ( MYCOL.EQ.JJCOL ) ) THEN LIII = LII LJJJ = LJJ DO 10 I = II, MAXI A( LIII+( LJJJ-1 )*DESCA( LLD_ ) ) = D( I ) LIII = LIII + 1 LJJJ = LJJJ + 1 10 CONTINUE END IF IF( MYROW.EQ.IIROW ) $ LII = LII + DESCA( MB_ ) IF( MYCOL.EQ.JJCOL ) $ LJJ = LJJ + DESCA( MB_ ) IIROW = MOD( IIROW+1, NPROW ) JJCOL = MOD( JJCOL+1, NPCOL ) 20 CONTINUE * * A = Q * A * IF( K.NE.0 ) THEN * CALL PDORMQR( 'L', 'Transpose', N, N, ORDER, WORK( INDAA ), IA, $ JA, DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRLEFT, INFO ) * * * A = A * Q' * * CALL PDORMQR( 'R', 'N', N, N, ORDER, WORK( INDAA ), IA, JA, $ DESCA, WORK( INDTAU ), A, IA, JA, DESCA, $ WORK( INDWORK ), SIZEMQRRIGHT, INFO ) * END IF * * End of PDLAGSY * END scalapack-1.8.0/TESTING/EIG/pdgehdrv.f0000640000175000017500000001767610602576752017044 0ustar muammarmuammar SUBROUTINE PDGEHDRV( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 28, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEHDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from the * orthogonal matrix Q, the Hessenberg matrix, and the array TAU * returned by PDGEHRD: * sub( A ) := Q * H * Q' * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows and columns 1:ILO-1 and IHI+1:N. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) reduced to Hessenberg * form by PDGEHRD. The upper triangle and the first sub- * diagonal of sub( A ) contain the upper Hessenberg matrix H, * and the elements below the first subdiagonal, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors. On exit, the original distributed * N-by-N matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors returned by * PDGEHRD. TAU is tied to the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK). * LWORK >= NB*NB + NB*IHLP + MAX[ NB*( IHLP+INLQ ), * NB*( IHLQ + MAX[ IHIP, * IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, * NPCOL ), NB, 0, 0, LCMQ ) ] ) ] * * where NB = MB_A = NB_A, * LCM is the least common multiple of NPROW and NPCOL, * LCM = ILCM( NPROW, NPCOL ), LCMQ = LCM / NPCOL, * * IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * IHLP = NUMROC( IHI-ILO+IROFFA+1, NB, MYROW, ILROW, NPROW ), * IHLQ = NUMROC( IHI-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ), * INLQ = NUMROC( N-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ). * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IHLP, II, IOFF, IPT, $ IPV, IPW, IV, J, JB, JJ, JL, K, MYCOL, MYROW, $ NB, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDLARFB, $ PDLARFT, PDLACPY, PDLASET * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * NB = DESCA( MB_ ) IOFF = MOD( IA+ILO-2, NB ) CALL INFOG2L( IA+ILO-1, JA+ILO-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = IPT + NB * NB IPW = IPV + IHLP * NB JL = MAX( ( ( JA+IHI-2 ) / NB ) * NB + 1, JA + ILO - 1 ) CALL DESCSET( DESCV, IHI-ILO+IOFF+1, NB, NB, NB, IAROW, $ INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, IHLP ) ) * DO 10 J = JL, ILO+JA+NB-IOFF-1, -NB JB = MIN( JA+IHI-J-1, NB ) I = IA + J - JA K = I - IA + 1 IV = K - ILO + IOFF + 1 * * Compute upper triangular matrix T from TAU. * CALL PDLARFT( 'Forward', 'Columnwise', IHI-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'All', IHI-K, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PDLASET( 'Lower', IHI-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-K, N-K+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Columnwise', $ IHI, IHI-K, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, IA, J+1, DESCA, WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the first block separately * IV = IOFF + 1 I = IA + ILO - 1 J = JA + ILO - 1 JB = MIN( NB-IOFF, JA+IHI-J-1 ) * * Compute upper triangular matrix T from TAU. * CALL PDLARFT( 'Forward', 'Columnwise', IHI-ILO, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PDLACPY( 'All', IHI-ILO, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * IF( IHI-ILO.GT.0 ) $ CALL PDLASET( 'Lower', IHI-ILO-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-ILO, N-ILO+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Columnwise', IHI, $ IHI-ILO, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, IA, J+1, DESCA, WORK( IPW ) ) * RETURN * * End of PDGEHDRV * END scalapack-1.8.0/TESTING/EIG/psbrddriver.f0000640000175000017500000004743110363532303017543 0ustar muammarmuammar PROGRAM PSBRDDRIVER * * -- ScaLAPACK testing driver (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * Purpose * ======= * * PSBRDDRIVER is the main test program for the REAL * ScaLAPACK BRD (bidiagonal reduction) routines. * * The program must be driven by a short data file. An annotated * example of a data file can be obtained by deleting the first 3 * characters from the following 13 lines: * 'ScaLAPACK BRD computation input file' * 'PVM machine' * 'BRD.out' output file name * 6 device out * 3 number of problems sizes * 16 20 18 values of M * 16 18 20 values of N * 3 number of NB's * 2 3 5 values of NB * 7 number of process grids (ordered pairs of P & Q) * 1 2 1 4 2 3 8 values of P * 1 2 4 1 3 2 1 values of Q * 1.0 threshold * * * Internal Parameters * =================== * * TOTMEM INTEGER, default = 2000000 * TOTMEM is a machine-specific parameter indicating the * maximum amount of available memory in bytes. * The user should customize TOTMEM to his platform. Remember * to leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. * * INTGSZ INTEGER, default = 4 bytes. * REALSZ INTEGER, default = 4 bytes. * INTGSZ and REALSZ indicate the length in bytes on the * given platform for an integer and a single precision real. * MEM REAL array, dimension ( TOTMEM / REALSZ ) * * All arrays used by SCALAPACK routines are allocated from * this array and referenced by pointers. The integer IPA, * for example, is a pointer to the starting element of MEM for * the matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER MEMSIZ, NTESTS, REALSZ, TOTMEM REAL PADVAL PARAMETER ( REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, NTESTS = 20, $ PADVAL = -9923.0E+0 ) * .. * .. Local Scalars .. LOGICAL CHECK CHARACTER*6 PASSED CHARACTER*80 OUTFILE INTEGER I, IAM, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPTP, IPTQ, IPW, J, K, $ KFAIL, KPASS, KSKIP, KTESTS, LWORK, M, MAXMN, $ MINMN, MNP, MNQ, MP, MYCOL, MYROW, N, NB, $ NDIAG, NGRIDS, NMAT, NNB, NOFFD, NOUT, NPCOL, $ NPROCS, NPROW, NQ, WORKBRD, WORKSIZ REAL ANORM, FRESID, THRESH DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), IERR( 1 ), NBVAL( NTESTS ), $ MVAL( NTESTS ), NVAL( NTESTS ), $ PVAL( NTESTS ), QVAL( NTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, DESCINIT, IGSUM2D, PSCHEKPAD, $ PSBRDINFO, PSFILLPAD, PSLAFCHK, $ PSMATGEN, PSGEBDRV, PSGEBRD, SLBOOT, $ SLCOMBINE, SLTIMER * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLANGE EXTERNAL ICEIL, NUMROC, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data statements .. DATA KTESTS, KPASS, KFAIL, KSKIP / 4*0 / * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) IASEED = 100 CALL PSBRDINFO( OUTFILE, NOUT, NMAT, MVAL, NTESTS, NVAL, NTESTS, $ NNB, NBVAL, NTESTS, NGRIDS, PVAL, NTESTS, QVAL, $ NTESTS, THRESH, MEM, IAM, NPROCS ) CHECK = ( THRESH.GE.0.0E+0 ) * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * DO 30 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'nprow', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID', 'npcol', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'grid' KSKIP = KSKIP + 1 GO TO 30 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * * Go to bottom of loop if this case doesn't use my process * DO 20 J = 1, NMAT * M = MVAL( J ) N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( M.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'M', M IERR( 1 ) = 1 ELSE IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * DO 10 K = 1, NNB * NB = NBVAL( K ) * * Make sure nb is legal * IERR( 1 ) = 0 IF( NB.LT.1 ) THEN IERR( 1 ) = 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'NB', 'NB', NB END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'NB' KSKIP = KSKIP + 1 GO TO 10 END IF * * Padding constants * MP = NUMROC( M, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) MNP = NUMROC( MIN( M, N ), NB, MYROW, 0, NPROW ) MNQ = NUMROC( MIN( M, N ), NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, MP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, MP )+IMIDPAD, IERR( 1 ) ) * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * IF( M.GE.N ) THEN NDIAG = MNQ NOFFD = MNP ELSE NDIAG = MNP NOFFD = NUMROC( MIN( M, N )-1, NB, MYCOL, 0, NPCOL ) END IF * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPTQ = IPE + NOFFD + IPOSTPAD + IPREPAD IPTP = IPTQ + MNQ + IPOSTPAD + IPREPAD IPW = IPTP + MNP + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * LWORK = NB*( MP+NQ+1 ) + NQ WORKBRD = LWORK + IPOSTPAD WORKSIZ = WORKBRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN WORKSIZ = MAX( LWORK, 2*NB*( MP+NQ+NB ) ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) 'Bidiagonal reduction', $ ( IPW+WORKSIZ )*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * Generate the matrix A * CALL PSMATGEN( ICTXT, 'No', 'No', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, MNQ, 1, MEM( IPTQ-IPREPAD ), $ MNQ, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, MNP, 1, MEM( IPTP-IPREPAD ), $ MNP, IPREPAD, IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PSLANGE( 'I', M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSLANGE', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PSFILLPAD( ICTXT, WORKBRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKBRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to bidiagonal form * CALL PSGEBRD( M, N, MEM( IPA ), 1, 1, DESCA, MEM( IPD ), $ MEM( IPE ), MEM( IPTQ ), MEM( IPTP ), $ MEM( IPW ), LWORK, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGEBRD', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBRD', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBRD', MNQ, 1, $ MEM( IPTQ-IPREPAD ), MNQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBRD', MNP, 1, $ MEM( IPTP-IPREPAD ), MNP, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBRD', WORKBRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKBRD-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A-Q*B*P|| / (||A|| * N * eps) * CALL PSGEBDRV( M, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPTQ ), $ MEM( IPTP ), MEM( IPW ), IERR( 1 ) ) CALL PSLAFCHK( 'No', 'No', M, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PSCHEKPAD( ICTXT, 'PSGEBDRV', MP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBDRV', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBDRV', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PSCHEKPAD( ICTXT, 'PSGEBDRV', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, $ IPOSTPAD, PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ.0.0E+0 $ .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) FRESID * KFAIL = KFAIL + 1 PASSED = 'FAILED' END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * ) $ 'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' * END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * BRD requires 8/3 N^3 floating point operations * MAXMN = MAX( M, N ) MINMN = MIN( M, N ) NOPS = 4.0D+0 * DBLE( MINMN ) * DBLE( MINMN ) * $ ( DBLE( MAXMN ) - DBLE( MINMN ) / 3.0D+0 ) NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'WALL', M, N, NB, NPROW, $ NPCOL, WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9993 ) 'CPU ', M, N, NB, NPROW, $ NPCOL, CTIME( 1 ), TMFLOPS, FRESID, PASSED END IF 10 CONTINUE 20 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN KTESTS = KPASS + KFAIL + KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) KTESTS IF( CHECK ) THEN WRITE( NOUT, FMT = 9991 ) KPASS WRITE( NOUT, FMT = 9989 ) KFAIL ELSE WRITE( NOUT, FMT = 9990 ) KPASS END IF WRITE( NOUT, FMT = 9988 ) KSKIP WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9995 FORMAT( 'TIME M N NB P Q BRD Time ', $ ' MFLOPS Residual CHECK' ) 9994 FORMAT( '---- ------ ------ --- ----- ----- --------- ', $ '----------- -------- ------' ) 9993 FORMAT( A4, 1X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X, F9.2, 1X, $ F11.2, 1X, F8.2, 1X, A6 ) 9992 FORMAT( 'Finished', I4, ' tests, with the following results:' ) 9991 FORMAT( I5, ' tests completed and passed residual checks.' ) 9990 FORMAT( I5, ' tests completed without checking.' ) 9989 FORMAT( I5, ' tests completed and failed residual checks.' ) 9988 FORMAT( I5, ' tests skipped because of illegal input values.' ) 9987 FORMAT( 'END OF TESTS.' ) 9986 FORMAT( '||A - Q*B*P|| / (||A|| * N * eps) = ', G25.7 ) * STOP * * End of PSBRDDRIVER * END scalapack-1.8.0/TESTING/EIG/pzlafchk.f0000640000175000017500000002701310363532303017011 0ustar muammarmuammar SUBROUTINE PZLAFCHK( AFORM, DIAG, M, N, A, IA, JA, DESCA, IASEED, $ ANORM, FRESID, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER AFORM, DIAG INTEGER IA, IASEED, JA, M, N DOUBLE PRECISION ANORM, FRESID * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLAFCHK computes the residual * || sub( A ) - sub( Ao ) || / (|| sub( Ao ) ||*eps*MAX(M,N)), * where Ao will be regenerated by the parallel random matrix generator, * sub( A ) = A( IA:IA+M-1, JA:JA+N-1 ) and ||.|| stands for the infini- * ty norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * AFORM (global input) CHARACTER * sub( A ) is overwritten with: * - a symmetric matrix, if AFORM = 'S'; * - a Hermitian matrix, if AFORM = 'H'; * - the transpose of what would normally be generated, * if AFORM = 'T'; * - the conjugate transpose of what would normally be * generated, if AFORM = 'C'; * - otherwise a random matrix. * * DIAG (global input) CHARACTER * if DIAG = 'D' : sub( A ) is diagonally dominant. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be checked. On exit, this * array contains the local pieces of the difference * sub( A ) - sub( Ao ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IASEED (global input) INTEGER * The seed number to generate the original matrix Ao. * * ANORM (global input) DOUBLE PRECISION * The Infinity norm of sub( A ). * * FRESID (global output) DOUBLE PRECISION * The maximum (worst) factorizational error. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK). * LWORK >= MpA0 * NB_A, where * * IROFFA = MOD( IA-1, MB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * WORK is used to store a block of columns of sub( A ). * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = (1.0D+0, 0.0D+0) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ II, IIA, IOFFA, IROFF, JB, JJ, JJA, JN, KK, $ LDA, LDW, LDWP1, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PZMATGEN, ZMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( ICTXT, 'eps' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * * Compute sub( A ) := sub( A ) - sub( Ao ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDW = MAX( 1, MP ) LDWP1 = LDW + 1 LDA = DESCA( LLD_ ) IOFFA = IIA + ( JJA - 1 )*LDA * IF( LSAME( AFORM, 'H' ) ) THEN * * Handle first block of columns separately * II = 1 ICURROW = IAROW ICURCOL = IACOL JB = JN - JA + 1 * IF( MYCOL.EQ.ICURCOL ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 10, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = DBLE( WORK( II+KK*LDWP1 ) ) 10 CONTINUE END IF CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30, JJ = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-JJ, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ WORK, LDW, DESCA( RSRC_ ), DESCA( CSRC_ ), $ IASEED, IIA-1, MP, JJA-1, JB, MYROW, $ MYCOL, NPROW, NPCOL ) IF( MYROW.EQ.ICURROW ) THEN DO 20, KK = 0, JB-1 WORK( II+KK*LDWP1 ) = DBLE( WORK( II+KK*LDWP1 ) ) 20 CONTINUE END IF CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB IOFFA = IOFFA + JB*LDA END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * ELSE * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN JB = JN-JA+1 CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJA-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) JJA = JJA + JB NQ = NQ - JB IOFFA = IOFFA + JB * LDA END IF * * Handle the remaning blocks of columns * DO 40 JJ = JJA, JJA+NQ-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JJA+NQ-JJ ) IOFFA = IIA + ( JJ - 1 )*LDA CALL PZMATGEN( ICTXT, AFORM, DIAG, DESCA( M_ ), DESCA( N_ ), $ DESCA( MB_ ), DESCA( NB_ ), WORK, LDW, $ DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, $ IIA-1, MP, JJ-1, JB, MYROW, MYCOL, NPROW, $ NPCOL ) CALL ZMATADD( MP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), $ LDA ) 40 CONTINUE * END IF * * Calculate factor residual * FRESID = PZLANGE( 'I', M, N, A, IA, JA, DESCA, WORK ) / $ ( MAX( M, N ) * EPS * ANORM ) * RETURN * * End PZLAFCHK * END scalapack-1.8.0/TESTING/EIG/pzlasizeheevx.f0000640000175000017500000001721310363532303020111 0ustar muammarmuammar* * SUBROUTINE PZLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER RANGE INTEGER IL, IU, MAXSIZE, N, VALSIZE, VECSIZE DOUBLE PRECISION VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) DOUBLE PRECISION WIN( * ) * .. * * Purpose * ======= * * PZLASIZEHEEVX computes the amount of memory needed by PZHEEVX * to ensure: * 1) Orthogonal Eigenvectors * 2) Eigenvectors * 3) Eigenvalues * * Arguments * ========= * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * VL (global input/output ) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VL is set * to a random value near an entry in WIN * * VU (global input/output ) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VU is set * to a random value near an entry in WIN * * IL (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * If IL < 0, RANGE='I' and WKNOWN = .TRUE., IL is set * to a random value from 1 to N * * IU (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * If IU < 0, RANGE='I' and WKNOWN = .TRUE., IU is set * to a random value from IL to N * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * ISEED is not touched unless IL, IU, VL or VU are modified. * * WIN (global input) DOUBLE PRECISION array, dimension (N) * If WKNOWN=1, WIN contains the eigenvalues of the matrix. * * MAXSIZE (global output) INTEGER * Workspace required to guarantee that PZHEEVX will return * orthogonal eigenvectors. IF WKNOWN=0, MAXSIZE is set to a * a value which guarantees orthogonality no matter what the * spectrum is. If WKNOWN=1, MAXSIZE is set to a value which * guarantees orthogonality on a matrix with eigenvalues given * by WIN. * * VECSIZE (global output) INTEGER * Workspace required to guarantee that PZHEEVX * will compute eigenvectors. * * VALSIZE (global output) INTEGER * Workspace required to guarantee that PZHEEVX * will compute eigenvalues. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION TWENTY PARAMETER ( TWENTY = 20.0D0 ) * .. * .. Local Scalars .. * INTEGER CLUSTERSIZE, I, ILMIN, IUMAX, MAXCLUSTERSIZE, $ MQ0, MYCOL, MYIL, MYIU, MYROW, NB, NEIG, NN, $ NP0, NPCOL, NPROW DOUBLE PRECISION ANORM, EPS, ORFAC, SAFMIN, VLMIN, VUMAX * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL LSAME, ICEIL, NUMROC, DLARAN, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ORFAC = 1.0D-3 * * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe Minimum' ) NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * VALSIZE = 5*NN + 4*N * IF( WKNOWN ) THEN ANORM = SAFMIN / EPS IF( N.GE.1 ) $ ANORM = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), ANORM ) * IF( LSAME( RANGE, 'I' ) ) THEN IF( IL.LT.0 ) $ IL = INT( DLARAN( ISEED )*DBLE( N ) ) + 1 IF( IU.LT.0 ) $ IU = INT( DLARAN( ISEED )*DBLE( N-IL ) ) + IL IF( N.EQ.0 ) $ IU = 0 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IF( VL.GT.VU ) THEN MYIL = INT( DLARAN( ISEED )*DBLE( N ) ) + 1 MYIU = INT( DLARAN( ISEED )*DBLE( N-MYIL ) ) + MYIL VL = WIN( MYIL ) + TWENTY*EPS*ABS( WIN( MYIL ) ) VU = WIN( MYIU ) + TWENTY*EPS*ABS( WIN( MYIU ) ) VU = MAX( VU, VL+EPS*TWENTY*ABS( VL )+SAFMIN ) END IF END IF * END IF IF( LSAME( RANGE, 'V' ) ) THEN * * Compute ILMIN, IUMAX (based on VL, VU and WIN) * IF( WKNOWN ) THEN VLMIN = VL - TWENTY*EPS*ANORM VUMAX = VU + TWENTY*EPS*ANORM ILMIN = 1 IUMAX = 0 DO 10 I = 1, N IF( WIN( I ).LT.VLMIN ) $ ILMIN = ILMIN + 1 IF( WIN( I ).LT.VUMAX ) $ IUMAX = IUMAX + 1 10 CONTINUE ELSE ILMIN = 1 IUMAX = N END IF ELSE IF( LSAME( RANGE, 'I' ) ) THEN ILMIN = IL IUMAX = IU ELSE IF( LSAME( RANGE, 'A' ) ) THEN ILMIN = 1 IUMAX = N END IF * NEIG = IUMAX - ILMIN + 1 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) VECSIZE = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN * IF( WKNOWN ) THEN CLUSTERSIZE = 1 MAXCLUSTERSIZE = 1 DO 20 I = ILMIN + 1, IUMAX IF( ( WIN( I )-WIN( I-1 ) ).LT.ORFAC*2*ANORM ) THEN CLUSTERSIZE = CLUSTERSIZE + 1 IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE CLUSTERSIZE = 1 END IF 20 CONTINUE IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE MAXCLUSTERSIZE = N END IF * MAXSIZE = VECSIZE + MAX( ( MAXCLUSTERSIZE-1 ), 0 )*N * * RETURN * * End of PZLASIZEHEEVX * END scalapack-1.8.0/TESTING/EIG/pzgsepreq.f0000640000175000017500000002554410602576752017252 0ustar muammarmuammar* * SUBROUTINE PZGSEPREQ( NIN, MEM, MEMSIZE, NOUT, ISEED, NTESTS, $ NSKIPPED, NNOCHECK, NPASSED, INFO ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER INFO, MEMSIZE, NIN, NNOCHECK, NOUT, NPASSED, $ NSKIPPED, NTESTS * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX*16 MEM( MEMSIZE ) * .. * * Purpose * ======= * * PZGSEPREQ performs one request from the input file 'SEP.dat' * A request is the cross product of the specifications in the * input file. PZGSEPREQ prints one line per test. * * Arguments * ========= * * NIN (local input) INTEGER * The unit number for the input file 'SEP.dat' * * MEM (local input) COMPLEX*16 ARRAY, dimension MEMSIZE * Array encompassing the available single precision memory * * MEMSIZE (local input) INTEGER * Size of MEM array * * NOUT (local input) INTEGER * The unit number for output file. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10 * NOUT = 14, output to file, divide thresh by 20 * Only used on node 0. * NOUT = 13, 14 allow the threshold to be tighter for our * internal testing which means that when a user reports * a threshold error, it is more likely to be significant. * * ISEED (global input/output) INTEGER array, dimension 4 * Random number generator seed * * NTESTS (global input/output) INTEGER * NTESTS = NTESTS + tests requested * * NSKIPPED (global input/output) INTEGER * NSKIPPED = NSKIPPED + tests skipped * * NNOCHECK (global input/output) INTEGER * NNOCHECK = NNOCHECK + tests completed but not checked * * NPASSED (global input/output) INTEGER * NPASSED = NPASSED + tests which passed all checks * * * * INFO (global output) INTEGER * 0 = test request ran * -1 = end of file * -2 = incorrect .dat file * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION FIVE PARAMETER ( FIVE = 5.0D+0 ) INTEGER ZPLXSZ, INTGSZ PARAMETER ( ZPLXSZ = 16, INTGSZ = 4 ) INTEGER DBLESZ PARAMETER ( DBLESZ = 8 ) INTEGER MAXSETSIZE PARAMETER ( MAXSETSIZE = 50 ) * .. * .. Local Scalars .. CHARACTER SUBTESTS INTEGER CONTEXT, IAM, IBTYPE, IMIDPAD, INITCON, $ IPOSTPAD, IPREPAD, ISIZEHEEVX, ISIZESUBTST, $ ISIZETST, LDA, LLRWORK, MATSIZE, MATTYPE, $ MYCOL, MYROW, N, NB, NIBTYPES, NMATSIZES, $ NMATTYPES, NNODES, NP, NPCOL, NPCONFIGS, NPROW, $ NQ, NUPLOS, ORDER, PCONFIG, PTRA, PTRB, $ PTRCOPYA, PTRCOPYB, PTRGAP, PTRICLUS, PTRIFAIL, $ PTRIWRK, PTRRWORK, PTRW, PTRW2, PTRWORK, PTRZ, $ RES, RSIZECHK, RSIZEHEEVX, RSIZEQTQ, $ RSIZESUBTST, RSIZETST, SIZEHEEVX, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZESUBTST, SIZETMS, $ SIZETST, UPLO DOUBLE PRECISION ABSTOL, THRESH * .. * .. Local Arrays .. CHARACTER UPLOS( 2 ) INTEGER DESCA( DLEN_ ), MATSIZES( MAXSETSIZE ), $ MATTYPES( MAXSETSIZE ), NBS( MAXSETSIZE ), $ NPCOLS( MAXSETSIZE ), NPROWS( MAXSETSIZE ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, PDSEPINFO, PZGSEPTST, PZLASIZEGSEP * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GET( -1, 0, INITCON ) CALL BLACS_GRIDINIT( INITCON, 'R', 1, NNODES ) * CALL PDSEPINFO( INITCON, IAM, NIN, NOUT, MAXSETSIZE, NMATSIZES, $ MATSIZES, NUPLOS, UPLOS, NPCONFIGS, NPROWS, $ NPCOLS, NBS, NMATTYPES, MATTYPES, 22, SUBTESTS, $ THRESH, ORDER, ABSTOL, INFO ) * CALL BLACS_GRIDEXIT( INITCON ) * IF( INFO.EQ.0 ) THEN * * Thresholds must be bigger for the generalized problem. * THRESH = THRESH*FIVE * DO 50 MATSIZE = 1, NMATSIZES * DO 40 PCONFIG = 1, NPCONFIGS * DO 30 MATTYPE = 1, NMATTYPES * DO 20 UPLO = 1, NUPLOS IF( LSAME( SUBTESTS, 'Y' ) ) THEN NIBTYPES = 3 ELSE NIBTYPES = 1 END IF DO 10 IBTYPE = 1, NIBTYPES * N = MATSIZES( MATSIZE ) ORDER = N * NPROW = NPROWS( PCONFIG ) NPCOL = NPCOLS( PCONFIG ) NB = NBS( PCONFIG ) * NP = NUMROC( N, NB, 0, 0, NPROW ) NQ = NUMROC( N, NB, 0, 0, NPCOL ) IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) * LDA = MAX( NP, 1 ) + IMIDPAD * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', NPROW, $ NPCOL ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, $ MYROW, MYCOL ) IF( MYROW.GE.0 ) THEN CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, $ CONTEXT, LDA, INFO ) CALL PZLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, $ SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZETMS, $ RSIZEQTQ, RSIZECHK, $ SIZEHEEVX, RSIZEHEEVX, $ ISIZEHEEVX, SIZESUBTST, $ RSIZESUBTST, ISIZESUBTST, $ SIZETST, RSIZETST, $ ISIZETST ) * PTRA = 1 PTRZ = PTRA + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYB = PTRZ + LDA*NQ + IPREPAD + IPOSTPAD PTRB = PTRCOPYB + LDA*NQ + IPREPAD + IPOSTPAD PTRCOPYA = PTRB + LDA*NQ + IPREPAD + IPOSTPAD PTRW = PTRCOPYA + LDA*NQ + IPREPAD + IPOSTPAD PTRW2 = PTRW + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, ZPLXSZ / DBLESZ ) PTRWORK = PTRW2 + ICEIL( MAX( N, 1 )+IPREPAD+ $ IPOSTPAD, ZPLXSZ / DBLESZ ) PTRGAP = PTRWORK + SIZETST + IPREPAD + $ IPOSTPAD PTRIFAIL = PTRGAP + ICEIL( NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, $ ZPLXSZ / DBLESZ ) PTRICLUS = PTRIFAIL + $ ICEIL( N+IPREPAD+IPOSTPAD, $ ZPLXSZ / INTGSZ ) PTRIWRK = PTRICLUS + ICEIL( 2*NPROW*NPCOL+ $ IPREPAD+IPOSTPAD, ZPLXSZ / INTGSZ ) PTRRWORK = PTRIWRK + ICEIL( ISIZETST+IPREPAD+ $ IPOSTPAD, ZPLXSZ / INTGSZ ) LLRWORK = ( MEMSIZE - PTRRWORK - IPOSTPAD - $ IPREPAD + 1 )* ( ZPLXSZ / DBLESZ ) NTESTS = NTESTS + 1 IF( LLRWORK.LT.RSIZETST ) THEN NSKIPPED = NSKIPPED + 1 ELSE CALL PZGSEPTST( DESCA, UPLOS( UPLO ), N, $ MATTYPES( MATTYPE ), $ IBTYPE, SUBTESTS, THRESH, $ N, ABSTOL, ISEED, $ MEM( PTRA ), $ MEM( PTRCOPYA ), $ MEM( PTRB ), $ MEM( PTRCOPYB ), $ MEM( PTRZ ), LDA, $ MEM( PTRW ), MEM( PTRW2 ), $ MEM( PTRIFAIL ), $ MEM( PTRICLUS ), $ MEM( PTRGAP ), IPREPAD, $ IPOSTPAD, MEM( PTRWORK ), $ SIZETST, MEM( PTRRWORK ), $ LLRWORK, MEM( PTRIWRK ), $ ISIZETST, NOUT, RES ) * IF( RES.EQ.0 ) THEN NPASSED = NPASSED + 1 ELSE IF( RES.EQ.2 ) THEN NNOCHECK = NNOCHECK + 1 ELSE IF( RES.EQ.3 ) THEN NSKIPPED = NSKIPPED + 1 WRITE( NOUT, FMT = * ) $ ' pZGSEPREQ failed' CALL BLACS_ABORT( CONTEXT, -1 ) END IF CALL BLACS_GRIDEXIT( CONTEXT ) END IF END IF 10 CONTINUE 20 CONTINUE 30 CONTINUE 40 CONTINUE 50 CONTINUE END IF * * RETURN * * End of PZDGSEPREQ * END scalapack-1.8.0/TESTING/EIG/pclasizeheevx.f0000640000175000017500000001714710363532303020070 0ustar muammarmuammar* * SUBROUTINE PCLASIZEHEEVX( WKNOWN, RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN, MAXSIZE, VECSIZE, VALSIZE ) * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. LOGICAL WKNOWN CHARACTER RANGE INTEGER IL, IU, MAXSIZE, N, VALSIZE, VECSIZE REAL VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL WIN( * ) * .. * * Purpose * ======= * * PCLASIZEHEEVX computes the amount of memory needed by PCHEEVX * to ensure: * 1) Orthogonal Eigenvectors * 2) Eigenvectors * 3) Eigenvalues * * Arguments * ========= * * WKNOWN (global input) INTEGER * .FALSE.: WIN does not contain the eigenvalues * .TRUE.: WIN does contain the eigenvalues * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] * will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * * VL (global input/output ) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VL is set * to a random value near an entry in WIN * * VU (global input/output ) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * If VL > VU, RANGE='V' and WKNOWN = .TRUE., VU is set * to a random value near an entry in WIN * * IL (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * If IL < 0, RANGE='I' and WKNOWN = .TRUE., IL is set * to a random value from 1 to N * * IU (global input/output ) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * If IU < 0, RANGE='I' and WKNOWN = .TRUE., IU is set * to a random value from IL to N * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * ISEED is not touched unless IL, IU, VL or VU are modified. * * WIN (global input) REAL array, dimension (N) * If WKNOWN=1, WIN contains the eigenvalues of the matrix. * * MAXSIZE (global output) INTEGER * Workspace required to guarantee that PCHEEVX will return * orthogonal eigenvectors. IF WKNOWN=0, MAXSIZE is set to a * a value which guarantees orthogonality no matter what the * spectrum is. If WKNOWN=1, MAXSIZE is set to a value which * guarantees orthogonality on a matrix with eigenvalues given * by WIN. * * VECSIZE (global output) INTEGER * Workspace required to guarantee that PCHEEVX * will compute eigenvectors. * * VALSIZE (global output) INTEGER * Workspace required to guarantee that PCHEEVX * will compute eigenvalues. * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL TWENTY PARAMETER ( TWENTY = 20.0E0 ) * .. * .. Local Scalars .. * INTEGER CLUSTERSIZE, I, ILMIN, IUMAX, MAXCLUSTERSIZE, $ MQ0, MYCOL, MYIL, MYIU, MYROW, NB, NEIG, NN, $ NP0, NPCOL, NPROW REAL ANORM, EPS, ORFAC, SAFMIN, VLMIN, VUMAX * .. * .. External Functions .. * * LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH, SLARAN EXTERNAL LSAME, ICEIL, NUMROC, PSLAMCH, SLARAN * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, REAL * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ORFAC = 1.0E-3 * * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe Minimum' ) NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * VALSIZE = 5*NN + 4*N * IF( WKNOWN ) THEN ANORM = SAFMIN / EPS IF( N.GE.1 ) $ ANORM = MAX( ABS( WIN( 1 ) ), ABS( WIN( N ) ), ANORM ) * IF( LSAME( RANGE, 'I' ) ) THEN IF( IL.LT.0 ) $ IL = INT( SLARAN( ISEED )*REAL( N ) ) + 1 IF( IU.LT.0 ) $ IU = INT( SLARAN( ISEED )*REAL( N-IL ) ) + IL IF( N.EQ.0 ) $ IU = 0 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IF( VL.GT.VU ) THEN MYIL = INT( SLARAN( ISEED )*REAL( N ) ) + 1 MYIU = INT( SLARAN( ISEED )*REAL( N-MYIL ) ) + MYIL VL = WIN( MYIL ) + TWENTY*EPS*ABS( WIN( MYIL ) ) VU = WIN( MYIU ) + TWENTY*EPS*ABS( WIN( MYIU ) ) VU = MAX( VU, VL+EPS*TWENTY*ABS( VL )+SAFMIN ) END IF END IF * END IF IF( LSAME( RANGE, 'V' ) ) THEN * * Compute ILMIN, IUMAX (based on VL, VU and WIN) * IF( WKNOWN ) THEN VLMIN = VL - TWENTY*EPS*ANORM VUMAX = VU + TWENTY*EPS*ANORM ILMIN = 1 IUMAX = 0 DO 10 I = 1, N IF( WIN( I ).LT.VLMIN ) $ ILMIN = ILMIN + 1 IF( WIN( I ).LT.VUMAX ) $ IUMAX = IUMAX + 1 10 CONTINUE ELSE ILMIN = 1 IUMAX = N END IF ELSE IF( LSAME( RANGE, 'I' ) ) THEN ILMIN = IL IUMAX = IU ELSE IF( LSAME( RANGE, 'A' ) ) THEN ILMIN = 1 IUMAX = N END IF * NEIG = IUMAX - ILMIN + 1 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) VECSIZE = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN * IF( WKNOWN ) THEN CLUSTERSIZE = 1 MAXCLUSTERSIZE = 1 DO 20 I = ILMIN + 1, IUMAX IF( ( WIN( I )-WIN( I-1 ) ).LT.ORFAC*2*ANORM ) THEN CLUSTERSIZE = CLUSTERSIZE + 1 IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE CLUSTERSIZE = 1 END IF 20 CONTINUE IF( CLUSTERSIZE.GT.MAXCLUSTERSIZE ) $ MAXCLUSTERSIZE = CLUSTERSIZE ELSE MAXCLUSTERSIZE = N END IF * MAXSIZE = VECSIZE + MAX( ( MAXCLUSTERSIZE-1 ), 0 )*N * * RETURN * * End of PCLASIZEHEEVX * END scalapack-1.8.0/TESTING/EIG/pclatms.f0000640000175000017500000003260310363532303016653 0ustar muammarmuammar* * SUBROUTINE PCLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, IA, JA, DESCA, ORDER, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER IA, INFO, JA, KL, KU, LWORK, M, MODE, N, ORDER REAL COND, DMAX * .. * .. Array Arguments .. INTEGER DESCA( * ), ISEED( 4 ) REAL D( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLATMS generates random Hermitian matrices with specified * eigenvalues for testing SCALAPACK programs. * * PCLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * ### bandwidth reduction NOT SUPPORTED ### * * Arguments * ========= * * M - (global input) INTEGER * The number of rows of A. Not modified. * * N - (global input) INTEGER * The number of columns of A. Not modified. * ### M .ne. N unsupported * * DIST - (global input) CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - (global input) INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to CLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - (global input) CHARACTER*1 * If SYM='S' or 'H', the generated matrix is Hermitian, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is Hermitian, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * ### SYM = 'N' NOT SUPPORTED ### * Not modified. * * D - (local input/output) REAL array, * dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - (global input) INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - (global input) REAL * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - (global input) REAL * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - (global input) INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is Hermitian. * Not modified. * ### 1 <= KL < N-1 is NOT SUPPORTED ### * * KU - (global input) INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is Hermitian. * Not modified. * ### 1 <= KU < N-1 is NOT SUPPORTED ### * * PACK - (global input) CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * ### PACK must be 'N' all other options NOT SUPPORTED ### * * A - (local output) COMPLEX array * Global dimension (M, N), local dimension (MP, NQ) * On exit A is the desired test matrix. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ORDER - (input) INTEGER * The number of reflectors used to define the orthogonal * matrix Q. A = Q * D * Q' * Higher ORDER requires more computation and communication. * * WORK - (local input/output) COMPLEX array, * dimension (LWORK) * * LWORK - (local input) INTEGER dimension of WORK * LWORK >= SIZETMS as returned by PCLASIZESEP * * INFO - (global output) INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -16 => DESCA is inconsistent * -17 => ORDER not in the range 0 to N inclusive * 1 => Error return from SLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from PCLAGHE * *----------------------------------------------------------------------- * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IDIST, IINFO, IPACK, IRSIGN, ISYM, LLB, $ MNMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL ALPHA, TEMP * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, CLASET, PCHK1MAT, $ PCLAGHE, PXERBLA, SLATM1, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * 1) Decode and Test the input parameters. * Initialize flags & seed. * * INFO = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IF( ( MYROW.GE.NPROW .OR. MYROW.LT.0 ) .OR. $ ( MYCOL.GE.NPCOL .OR. MYCOL.LT.0 ) )RETURN * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IPACK = 1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) * IF( ORDER.EQ.0 ) $ ORDER = N * * Set INFO if an error * IF( NPROW.EQ.-1 ) THEN INFO = -( 1600+CTXT_ ) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, INFO ) IF( INFO.EQ.0 ) THEN IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT. $ ONE ) THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( ( ORDER.LT.0 ) .OR. ( ORDER.GT.N ) ) THEN INFO = -17 END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 16, 0, IDUM1, IDUM2, $ INFO ) END IF * * Check for unsupported features * IF( ISYM.NE.2 ) THEN INFO = -5 ELSE IF( IPACK.NE.0 ) THEN INFO = -12 ELSE IF( KL.GT.0 .AND. KL.LT.M-1 ) THEN INFO = -10 ELSE IF( KU.GT.0 .AND. KU.LT.N-1 ) THEN INFO = -11 ELSE IF( LLB.NE.0 .AND. LLB.NE.M-1 ) THEN INFO = -10 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL SLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) * IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL SSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL CLASET( 'A', NP, NQ, CZERO, CZERO, A, DESCA( LLD_ ) ) * * Hermitian -- A = U D U' * CALL PCLAGHE( M, LLB, D, A, IA, JA, DESCA, ISEED, ORDER, WORK, $ LWORK, IINFO ) * RETURN * * End of PCLATMS * END scalapack-1.8.0/TESTING/EIG/pcgsepdriver.f0000640000175000017500000002254710363532303017713 0ustar muammarmuammar* * PROGRAM PCGSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel COMPLEX Hermitian eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * COMPLEX words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PCHEGVX, the expert driver for the parallel * Hermitian eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, CPLXSZ, NIN PARAMETER ( TOTMEM = 2000000, CPLXSZ = 8, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / CPLXSZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Functions .. REAL SLAMCH EXTERNAL SLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PCGSEPREQ, PSLACHKIEEE, PSLASNBT * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PSLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PSLACHKIEEE( ISIEEE, SLAMCH( 'O' ), SLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Hermitian Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'generalized ' // 'Hermitian eigenvalue routine: PCHEGVX.' WRITE( NOUT, FMT = 9999 )'A scaled residual check, ' // $ 'will be computed' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pCGSEPtst.f).' WRITE( NOUT, FMT = 9999 ) $ 'IBTYPE : Generalized eigenproblem type' // $ ' (see pCHEGVx.f)' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pCGSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : The scaled residual' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PCGSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP IBTYPE SUB WALL CPU ', $ ' CHK CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- ------ --- -------- --------', $ ' --------- -----' ) * * End of PCGSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pzhrdinfo.f0000640000175000017500000003240210363532303017210 0ustar muammarmuammar SUBROUTINE PZHRDINFO( SUMMRY, NOUT, NMAT, NVAL, NVLO, NVHI, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 27, 2000 * * .. Scalar Arguments .. INTEGER IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, $ NGRIDS, NMAT, NNB, NOUT, NPROCS REAL THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY INTEGER NBVAL( LDNBVAL ), NVAL( LDNVAL ), $ NVHI( LDNVAL ), NVLO( LDNVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZHRDINFO get the needed startup information for the Hessenberg * reduction tests and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for * N, IHI & ILO. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of rows & columns in matrix). * * NVLO (global output) INTEGER array, dimension (LDNVAL) * The values of ILO. * * NVHI (global output) INTEGER array, dimension (LDNVAL) * The values of IHI. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N, ILO and IHI. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * 3*LDNVAL+LDNBVAL+2*LDPVAL. Used to pack all input arrays * in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'HRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = * ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of matrices * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDNVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'N', LDNVAL GO TO 20 END IF * * Get values of N, ILO, IHI * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVLO( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVHI( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVLO, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVHI, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS -1 CALL IGEBS2D( ICTXT, 'All', ' ', 1, I, WORK, 1 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'ScaLAPACK Reduction routine to Hessenberg form.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex double precision Hessenberg ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q H Q''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ ' on to the next column of processes.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'HRD time : Time in seconds to compute HRD ' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for HRD ' // $ 'reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'ILO ', ( NVLO( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVLO( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'IHI ', ( NVHI( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVHI( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 3*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', 1, I, WORK, 1, 0, 0 ) * I = 1 CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVLO, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVHI, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' ILLEGAL INPUT IN FILE ', 40A, '. ABORTING RUN.' ) 9997 FORMAT( ' NUMBER OF VALUES OF ', 5A, $ ' IS LESS THAN 1 OR GREATER ', 'THAN ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G14.7 ) * * End of PZHRDINFO * END scalapack-1.8.0/TESTING/EIG/pdsepqtq.f0000640000175000017500000002533510363532303017055 0ustar muammarmuammar* * SUBROUTINE PDSEPQTQ( MS, NV, THRESH, Q, IQ, JQ, DESCQ, C, IC, JC, $ DESCC, PROCDIST, ICLUSTR, GAP, WORK, LWORK, $ QTQNRM, INFO, RES ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IC, INFO, IQ, JC, JQ, LWORK, MS, NV, RES DOUBLE PRECISION QTQNRM, THRESH * .. * .. Array Arguments .. * INTEGER DESCC( * ), DESCQ( * ), ICLUSTR( * ), $ PROCDIST( * ) DOUBLE PRECISION C( * ), GAP( * ), Q( * ), WORK( * ) * .. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Purpose * ======= * * Compute |I - QT * Q| / (ulp * n) * * Arguments * ========= * * NP = number of local rows in C * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in Q * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) DOUBLE PRECISION array, * global dimension (MS, NV), local dimension (LDQ, NQ) * * Contains the eigenvectors as computed by PDSTEIN * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) DOUBLE PRECISION array, * global dimension (NV, NV), local dimension (DESCC(DLEN_), NQ) * * Accumulator for computing I - QT * Q * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (input) DOUBLE PRECISION array, dimension (NV) * All procesors have an identical copy of W() * * Contains the computed eigenvalues * * PROCDIST (global input) INTEGER array dimension (NPROW*NPCOL+1) * Identifies which eigenvectors are the last to be computed * by a given process * * ICLUSTR (global input) INTEGER array dimension (2*P) * This input array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace. * This should be the output of PDSTEIN. * * GAP (global input) DOUBLE PRECISION array, dimension (P) * This input array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * Where: * NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) * MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * * QTQNRM (global output) DOUBLE PRECISION * |QTQ -I| / EPS * * RES (global output) INTEGER * 0 if the test passes i.e. |I - QT * Q| / (ulp * n) <= THRESH * 1 if the test fails i.e. |I - QT * Q| / (ulp * n) > THRESH * * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, NEGONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, $ NEGONE = -1.0D+0 ) * .. * .. Intrinsic Functions .. * INTRINSIC DBLE, MAX * .. * .. Local Scalars .. INTEGER CLUSTER, FIRSTP, IMAX, IMIN, JMAX, JMIN, LWMIN, $ MQ0, MYCOL, MYROW, NEXTP, NP0, NPCOL, NPROW DOUBLE PRECISION NORM, QTQNRM2, ULP * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL NUMROC, PDLAMCH, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PDGEMM, PDLASET, $ PDMATADD, PXERBLA * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * RES = 0 ULP = PDLAMCH( DESCC( CTXT_ ), 'P' ) * CALL BLACS_GRIDINFO( DESCC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IQ, JQ, DESCQ, 7, INFO ) CALL CHK1MAT( NV, 1, MS, 2, IC, JC, DESCC, 11, INFO ) * IF( INFO.EQ.0 ) THEN NP0 = NUMROC( NV, DESCC( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( NV, DESCC( NB_ ), 0, 0, NPCOL ) * LWMIN = 2 + MAX( DESCC( MB_ ), 2 )*( 2*NP0+MQ0 ) * IF( IQ.NE.1 ) THEN INFO = -5 ELSE IF( JQ.NE.1 ) THEN INFO = -6 ELSE IF( IC.NE.1 ) THEN INFO = -9 ELSE IF( JC.NE.1 ) THEN INFO = -10 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -16 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCC( CTXT_ ), 'PDSEPQTQ', -INFO ) RETURN END IF * * C = Identity matrix * CALL PDLASET( 'A', NV, NV, ZERO, ONE, C, IC, JC, DESCC ) * * C = C - QT * Q * IF( NV*MS.GT.0 ) THEN CALL PDGEMM( 'Transpose', 'N', NV, NV, MS, NEGONE, Q, 1, 1, $ DESCQ, Q, 1, 1, DESCQ, ONE, C, 1, 1, DESCC ) END IF * * Allow for poorly orthogonalized eigenvectors for large clusters * NORM = PDLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) QTQNRM = NORM / ( DBLE( MAX( MS, 1 ) )*ULP ) * CLUSTER = 1 10 CONTINUE DO 20 FIRSTP = 1, NPROW*NPCOL IF( PROCDIST( FIRSTP ).GE.ICLUSTR( 2*( CLUSTER-1 )+1 ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE * IMIN = ICLUSTR( 2*CLUSTER-1 ) JMAX = ICLUSTR( 2*CLUSTER ) * * IF( IMIN.EQ.0 ) $ GO TO 60 * DO 40 NEXTP = FIRSTP, NPROW*NPCOL IMAX = PROCDIST( NEXTP ) JMIN = IMAX + 1 * * CALL PDMATADD( IMAX-IMIN+1, JMAX-JMIN+1, ZERO, C, IMIN, JMIN, $ DESCC, GAP( CLUSTER ) / 0.01D+0, C, IMIN, JMIN, $ DESCC ) CALL PDMATADD( JMAX-JMIN+1, IMAX-IMIN+1, ZERO, C, JMIN, IMIN, $ DESCC, GAP( CLUSTER ) / 0.01D+0, C, JMIN, IMIN, $ DESCC ) IMIN = IMAX * IF( ICLUSTR( 2*CLUSTER ).LT.PROCDIST( NEXTP+1 ) ) $ GO TO 50 40 CONTINUE 50 CONTINUE * CLUSTER = CLUSTER + 1 GO TO 10 60 CONTINUE * * Compute the norm of C * NORM = PDLANGE( '1', NV, NV, C, 1, 1, DESCC, WORK ) * QTQNRM2 = NORM / ( DBLE( MAX( MS, 1 ) )*ULP ) * IF( QTQNRM2.GT.THRESH ) THEN RES = 1 QTQNRM = QTQNRM2 END IF RETURN * * End of PDSEPQTQ * END scalapack-1.8.0/TESTING/EIG/pdsepchk.f0000640000175000017500000002426210363532303017013 0ustar muammarmuammar* * SUBROUTINE PDSEPCHK( MS, NV, A, IA, JA, DESCA, EPSNORMA, THRESH, $ Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, W, WORK, $ LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, IQ, JA, JC, JQ, LWORK, MS, NV, RESULT DOUBLE PRECISION EPSNORMA, THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCC( * ), DESCQ( * ) DOUBLE PRECISION A( * ), C( * ), Q( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * Compute |AQ- QL| / (EPSNORMA * N) * where EPSNORMA = (abstol + eps)*norm(A) when called by pdsqpsubtst. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, C and Q * MQ = number of local columns in A * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, C and Q * Also, the number of global columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) DOUBLE PRECISION pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the MS-by-MS * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * EPSNORMA (input) DOUBLE PRECISION * abstol + eps * inf.norm(A) * Abstol is absolute tolerence for the eigenvalues and is set * in the calling routines, pdsepsubtst and pdsqpsubtst. * * THRESH (input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) DOUBLE PRECISION array * global dimension (MS, NV), local dimension (DESCA(DLEN_), NQ) * * Contains the eigenvectors as computed by PDSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) DOUBLE PRECISION array, * global dimension (NV, NV), local dimension (DESCA(DLEN_), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (global input) DOUBLE PRECISION array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) DOUBLE PRECISION array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) DOUBLE PRECISION * |AQ- QL| / ( EPSNROMA * MS ) * * RESULT (global output) INTEGER * 0 if the test passes i.e. * |AQ -QL| / (abstol + eps * norm(A) ) <= n* THRESH * 1 if the test fails i.e. * |AQ -QL| / (abstol + eps * norm(A) ) > n * THRESH * * .. Local Scalars .. * INTEGER INFO, J, LOCALCOL, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ, PCOL DOUBLE PRECISION NORM * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, NEGONE PARAMETER ( ONE = 1.0D+0, NEGONE = -1.0D+0 ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC DOUBLE PRECISION PDLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DLACPY, DSCAL, PDGEMM, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 12, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IC, JC, DESCC, 16, INFO ) * IF( INFO.EQ.0 ) THEN * MP = NUMROC( MS, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -10 ELSE IF( JQ.NE.1 ) THEN INFO = -11 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( IC.NE.1 ) THEN INFO = -14 ELSE IF( JC.NE.1 ) THEN INFO = -15 ELSE IF( LWORK.LT.NQ ) THEN INFO = -19 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSEPCHK', -INFO ) RETURN END IF * * C = Q * W * CALL DLACPY( 'A', MP, NQ, Q, DESCQ( LLD_ ), C, DESCC( LLD_ ) ) * * DO 10 J = 1, NV PCOL = INDXG2P( J, DESCC( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( J, DESCC( NB_ ), 0, 0, NPCOL ) * IF( MYCOL.EQ.PCOL ) THEN CALL DSCAL( MP, W( J ), C( ( LOCALCOL-1 )*DESCC( LLD_ )+1 ), $ 1 ) END IF 10 CONTINUE * * * C = C - A * Q * CALL PDGEMM( 'N', 'N', MS, NV, MS, NEGONE, A, 1, 1, DESCA, Q, 1, $ 1, DESCQ, ONE, C, 1, 1, DESCC ) * * Compute the norm of C * * NORM = PDLANGE( 'M', MS, NV, C, 1, 1, DESCC, WORK ) * * TSTNRM = NORM / EPSNORMA / MAX( MS, 1 ) * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0D0 ) ) THEN RESULT = 1 END IF * * RETURN * * End of PDSEPCHK * END scalapack-1.8.0/TESTING/EIG/pdgsepdriver.f0000640000175000017500000002337710363532303017716 0ustar muammarmuammar* * PROGRAM PDGSEPDRIVER * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Parallel DOUBLE PRECISION symmetric eigenproblem test driver * * The user should modify TOTMEM to indicate the maximum amount of * memory in bytes her system has. Remember to leave room in memory * for operating system, the BLACS buffer, etc. INTSIZ and DBLSIZ * indicate the length in bytes on the given platform for an integer * and a double precision real. * For example, on our system with 8 MB of memory, TOTMEM=6500000 * (leaves 1.5 MB for OS, code, BLACS buffer, etc), the length of a * DOUBLE is 8, and an integer takes up 4 bytes. Some playing around * to discover what the maximum value you can set MEMSIZ to may be * required. * All arrays used by factorization and solve are allocated out of * big array called MEM. * * The full tester requires approximately (5 n + 5 n^2/p + slop) * DOUBLE PRECISION words and 6*n integer words. * So, TOTMEM should be set to at least 1.1 * 8 * (5n + 5n^2/p) * * WHAT WE TEST * ============ * * This routine tests PDSYGVX, the expert driver for the parallel * symmetric eigenvalue problem. We would like to cover all * possible combinations of: matrix size, process configuration * (nprow and npcol), block size (nb), matrix type (??), range * of eigenvalue (all, by value, by position), sorting options, * and upper vs. lower storage. * * We intend to provide two types of test input files, an * installation test and a thorough test. * * We also intend that the reports be meaningful. Our input file * will allow multiple requests where each request is a cross product * of the following sets: * matrix sizes: n * process configuration triples: nprow, npcol, nb * matrix types: * eigenvalue requests: all, by value, by position * storage (upper vs. lower): uplo * * TERMS: * Request - means a set of tests, which is the cross product of * a set of specifications from the input file. * Test - one element in the cross product, i.e. a specific input * size and type, process configuration, etc. * * .. Parameters .. * INTEGER TOTMEM, DBLESZ, NIN PARAMETER ( TOTMEM = 2000000, DBLESZ = 8, NIN = 11 ) INTEGER MEMSIZ PARAMETER ( MEMSIZ = TOTMEM / DBLESZ ) * .. * .. Local Scalars .. CHARACTER HETERO CHARACTER*80 SUMMRY, USRINFO INTEGER CONTEXT, IAM, INFO, ISIEEE, MAXNODES, NNOCHECK, $ NOUT, NPASSED, NPROCS, NSKIPPED, NTESTS * .. * .. Local Arrays .. * INTEGER ISEED( 4 ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Functions .. DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH * .. * .. External Subroutines .. * EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_PINFO, BLACS_SETUP, $ IGAMN2D, PDGSEPREQ, PDLACHKIEEE, PDLASNBT * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'SEP.dat', STATUS = 'OLD' ) READ( NIN, FMT = * )SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 )USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) READ( NIN, FMT = * )MAXNODES READ( NIN, FMT = * )HETERO END IF * IF( NPROCS.LT.1 ) THEN CALL BLACS_SETUP( IAM, MAXNODES ) NPROCS = MAXNODES END IF * CALL BLACS_GET( -1, 0, CONTEXT ) CALL BLACS_GRIDINIT( CONTEXT, 'R', 1, NPROCS ) * CALL PDLASNBT( ISIEEE ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ( ISIEEE.NE.0 ) ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = 9996 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9991 ) WRITE( NOUT, FMT = 9990 ) END IF * CALL PDLACHKIEEE( ISIEEE, DLAMCH( 'O' ), DLAMCH( 'U' ) ) * CALL IGAMN2D( CONTEXT, 'a', ' ', 1, 1, ISIEEE, 1, 1, 1, -1, -1, $ 0 ) * IF( ISIEEE.EQ.0 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9987 ) END IF GO TO 20 END IF * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9986 ) END IF * END IF IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK symmetric Eigendecomposition routines.' WRITE( NOUT, FMT = 9999 )USRINFO WRITE( NOUT, FMT = 9999 )' ' WRITE( NOUT, FMT = 9999 )'Running tests of the parallel ' // $ 'generalized ' // 'symmetric eigenvalue routine: PDSYGVX.' WRITE( NOUT, FMT = 9999 )'A scaled residual check, ' // $ 'will be computed' WRITE( NOUT, FMT = 9999 ) WRITE( NOUT, FMT = 9999 )'An explanation of the ' // $ 'input/output parameters follows:' WRITE( NOUT, FMT = 9999 )'RESULT : passed; or ' // $ 'an indication of which eigen request test failed' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of rows and columns ' // $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks' // $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less ' // $ 'than THRESH, RESULT is flagged as PASSED.' WRITE( NOUT, FMT = 9999 ) $ ' : the QTQ norm is allowed to exceed THRESH' // $ ' for those eigenvectors' WRITE( NOUT, FMT = 9999 )' : which could not be ' // $ 'reorthogonalized for lack of workspace.' WRITE( NOUT, FMT = 9999 ) $ 'TYP : matrix type (see pDGSEPtst.f).' WRITE( NOUT, FMT = 9999 ) $ 'IBTYPE : Generalized eigenproblem type' // $ ' (see pDSYGVx.f)' WRITE( NOUT, FMT = 9999 )'SUB : Subtests ' // $ '(see pDGSEPtst).f' WRITE( NOUT, FMT = 9999 )'CHK : The scaled residual' WRITE( NOUT, FMT = 9999 )' ' END IF * NTESTS = 0 NPASSED = 0 NSKIPPED = 0 NNOCHECK = 0 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) END IF * 10 CONTINUE * ISEED( 1 ) = 139 ISEED( 2 ) = 1139 ISEED( 3 ) = 2139 ISEED( 4 ) = 3139 * CALL PDGSEPREQ( NIN, MEM, MEMSIZ, NOUT, ISEED, NTESTS, NSKIPPED, $ NNOCHECK, NPASSED, INFO ) IF( INFO.EQ.0 ) $ GO TO 10 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9985 )NTESTS WRITE( NOUT, FMT = 9984 )NPASSED WRITE( NOUT, FMT = 9983 )NNOCHECK WRITE( NOUT, FMT = 9982 )NSKIPPED WRITE( NOUT, FMT = 9981 )NTESTS - NPASSED - NSKIPPED - $ NNOCHECK WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF * * Uncomment this line on SUN systems to avoid the useless print out * * CALL IEEE_FLAGS( 'clear', 'exception', 'underflow', '') * * * 20 CONTINUE IF( IAM.EQ.0 ) THEN CLOSE ( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_GRIDEXIT( CONTEXT ) * CALL BLACS_EXIT( 0 ) STOP * * 9999 FORMAT( A ) 9998 FORMAT( ' I am about to check to make sure that overflow' ) 9997 FORMAT( ' is handled in the ieee default manner. If this' ) 9996 FORMAT( ' is the last output you see, you should assume' ) 9995 FORMAT( ' that overflow caused a floating point exception.' ) 9994 FORMAT( ' In that case, we recommend that you add -DNO_IEEE' ) 9993 FORMAT( ' to the CDEFS line in SLmake.inc.' ) 9992 FORMAT( ' Alternatively, you could set CDEFS in SLmake.inc ' ) 9991 FORMAT( ' to enable the default ieee behaviour, However, this' ) 9990 FORMAT( ' may result in good or very bad performance.' ) 9989 FORMAT( ' Either signed zeroes or signed infinities ' ) 9988 FORMAT( ' work incorrectly or your system. Change your' ) 9987 FORMAT( ' SLmake.inc as suggested above.' ) * 9986 FORMAT( ' Your system appears to handle ieee overflow.' ) * 9985 FORMAT( 'Finished ', I6, ' tests, with the following results:' ) 9984 FORMAT( I5, ' tests completed and passed residual checks.' ) 9983 FORMAT( I5, ' tests completed without checking.' ) 9982 FORMAT( I5, ' tests skipped for lack of memory.' ) 9981 FORMAT( I5, ' tests completed and failed.' ) 9980 FORMAT( 'END OF TESTS.' ) 9979 FORMAT( ' N NB P Q TYP IBTYPE SUB WALL CPU ', $ ' CHK CHECK' ) 9978 FORMAT( ' ----- --- --- --- --- ------ --- -------- --------', $ ' --------- -----' ) * * End of PDGSEPDRIVER * END scalapack-1.8.0/TESTING/EIG/pcttrdtester.f0000640000175000017500000006216610363532303017746 0ustar muammarmuammar SUBROUTINE PCTTRDTESTER( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL, $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 24, 2000 * * .. Scalar Arguments .. LOGICAL CHECK INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS, $ TOTMEM REAL THRESH * .. * .. Array Arguments .. INTEGER NVAL( * ) COMPLEX MEM( * ) * .. * * Purpose * ======= * * PCTTRDTESTER tests PCHETTRD * * Arguments * ========= * * IAM (local input) INTEGER * The local process number * * NPROCS (global input) INTEGER * The number of processors * * CHECK (global input) LOGICAL * Specifies whether the user wants to check the answer * * NOUT (local input) INTEGER * File descriptor * * THRESH (global input) REAL * Acceptable error threshold * * NVAL (global input) INTEGER array dimension NMAT * The matrix sizes to test * * NMAT (global input) INTEGER * The number of matrix sizes to test * * MEM (local input) COMPLEX array dimension MEMSIZ * Where: * MEMSIZ = TOTMEM / CPLXSZ * * TOTMEM (global input) INTEGER * Number of bytes in MEM * * KPASS (local input/output) INTEGER * The number of tests which passed. Only relevant on * processor 0. * * KFAIL (local input/output) INTEGER * The number of tests which failed. Only relevant on * processor 0. * * KSKIP (local input/output) INTEGER * The number of tests which were skipped. Only relevant on * processor 0. * * ================================================================ * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER REALSZ, CPLXSZ COMPLEX PADVAL PARAMETER ( REALSZ = 4, CPLXSZ = 8, $ PADVAL = ( -9923.0E+0, -9924.0E+0 ) ) INTEGER TIMETESTS PARAMETER ( TIMETESTS = 11 ) INTEGER TESTS PARAMETER ( TESTS = 8 ) INTEGER MINTIMEN PARAMETER ( MINTIMEN = 8 ) * .. * .. Local Scalars .. LOGICAL TIME CHARACTER UPLO CHARACTER*6 PASSED INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD, $ IPE, IPOSTPAD, IPREPAD, IPT, IPW, ITEMP, J, K, $ LCM, LWMIN, MAXTESTS, MEMSIZ, MYCOL, MYROW, N, $ NB, NDIAG, NGRIDS, NN, NOFFD, NP, NPCOL, NPROW, $ NPS, NQ, SPLITSTIMED, WORKSIZ, WORKTRD REAL ANORM, FRESID DOUBLE PRECISION NOPS, TMFLOPS * .. * .. Local Arrays .. INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ), $ BALTEST( TESTS ), BALTIME( TIMETESTS ), $ DESCA( DLEN_ ), DESCD( DLEN_ ), IERR( 1 ), $ INTERTEST( TESTS ), INTERTIME( TIMETESTS ), $ PNBTEST( TESTS ), PNBTIME( TIMETESTS ), $ TWOGEMMTEST( TESTS ), TWOGEMMTIME( TIMETESTS ) DOUBLE PRECISION CTIME( 100 ), WTIME( 100 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, DESCINIT, $ IGEBR2D, IGEBS2D, IGSUM2D, PCCHEKPAD, $ PCFILLPAD, PCHETDRV, PCHETTRD, PCLAFCHK, $ PCLATRAN, PCMATGEN, SLBOOT, SLCOMBINE, SLTIMER * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, NUMROC, PJLAENV REAL PCLANHE EXTERNAL LSAME, ICEIL, ILCM, NUMROC, PJLAENV, PCLANHE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, REAL, SQRT * .. * * .. Scalars in Common .. INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE, $ LLTBLOCK, MINSZ, PNB, TIMEINTERNALS, TIMING, $ TRSBLOCK, TWOGEMMS * .. * .. Common blocks .. COMMON / BLOCKSIZES / GSTBLOCK, LLTBLOCK, BCKBLOCK, $ TRSBLOCK COMMON / MINSIZE / MINSZ COMMON / PJLAENVTIMING / TIMING COMMON / TAILOREDOPTS / PNB, ANB, INTERLEAVE, $ BALANCED, TWOGEMMS COMMON / TIMECONTROL / TIMEINTERNALS * .. * .. Data statements .. DATA BALTIME / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 / DATA INTERTIME / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 / DATA TWOGEMMTIME / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 / DATA ANBTIME / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16, $ 16 / DATA PNBTIME / 32, 32, 32, 32, 32, 32, 32, 32, 32, $ 16, 64 / DATA BALTEST / 0, 0, 0, 0, 1, 1, 1, 1 / DATA INTERTEST / 0, 0, 1, 1, 0, 0, 1, 1 / DATA TWOGEMMTEST / 0, 1, 0, 1, 0, 1, 0, 1 / DATA ANBTEST / 1, 2, 3, 16, 1, 2, 3, 16 / DATA PNBTEST / 1, 16, 8, 1, 16, 8, 1, 16 / * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * IASEED = 100 SPLITSTIMED = 0 NB = 1 UPLO = 'L' MEMSIZ = TOTMEM / CPLXSZ * * Print headings * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = * ) END IF * * Loop over different process grids * NGRIDS = INT( SQRT( REAL( NPROCS ) ) ) * DO 30 NN = 1, NGRIDS * NPROW = NN NPCOL = NN IERR( 1 ) = 0 * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of loop if this case doesn't use my process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 30 * DO 20 J = 1, NMAT * N = NVAL( J ) * * Make sure matrix information is correct * IERR( 1 ) = 0 IF( N.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N IERR( 1 ) = 1 END IF * * Make sure no one had error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'matrix' KSKIP = KSKIP + 1 GO TO 20 END IF * * Loop over different blocking sizes * IF( N.GT.MINTIMEN ) THEN * * For timing tests, we perform one or two extra tests. * Both of these extra tests are performed with the * default values for the performance tuning parameters. * The second extra test (which is only performed if * split times are non-zero) is performed with timeinternals * set to 1 (which forces barrier syncs between many * phases of the computation). * TIME = .TRUE. MAXTESTS = TIMETESTS + 2 ELSE TIME = .FALSE. MAXTESTS = TESTS END IF * * DO 10 K = 1, MAXTESTS TIMEINTERNALS = 0 IF( TIME ) THEN IF( K.GE.MAXTESTS-1 ) THEN * * For the last two timings, we let pjlaenv set * the execution path values. These dummy * initializations aren't really necessary, * but they illustrate the fact that these values are * set in xpjlaenv. The dummy call to pjlaenv * has the side effect of setting ANB. * MINSZ = -13 BALANCED = -13 INTERLEAVE = -13 TWOGEMMS = -13 ANB = -13 PNB = -13 TIMING = 1 DUMMY = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, $ 0, 0 ) IF( K.EQ.MAXTESTS ) $ TIMEINTERNALS = 1 ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTIME( K ) INTERLEAVE = INTERTIME( K ) TWOGEMMS = TWOGEMMTIME( K ) ANB = ANBTIME( K ) PNB = PNBTIME( K ) END IF ELSE TIMING = 0 MINSZ = 1 BALANCED = BALTEST( K ) INTERLEAVE = INTERTEST( K ) TWOGEMMS = TWOGEMMTEST( K ) ANB = ANBTEST( K ) PNB = PNBTEST( K ) END IF * * Skip the last test (with timeinternals = 1) if * PCHETTRD is not collecting the split times. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 1, 1, SPLITSTIMED, 1, $ 0, 0 ) END IF * * IF( SPLITSTIMED.EQ.0 .AND. K.EQ.MAXTESTS ) $ GO TO 10 * * The following hack tests to make sure that PNB need not * be the same on all processes. (Provided that PNB is set * to 1 in the TRD.dat file.) * IF( PNB.EQ.1 ) $ PNB = 1 + IAM * * Padding constants * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) IF( CHECK ) THEN IPREPAD = MAX( NB, NP ) IMIDPAD = NB IPOSTPAD = MAX( NB, NQ ) ELSE IPREPAD = 0 IMIDPAD = 0 IPOSTPAD = 0 END IF * * Initialize the array descriptor for the matrix A * * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) ) * CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1, $ INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).LT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'descriptor' KSKIP = KSKIP + 1 GO TO 10 END IF * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( IPREPAD+1 ) * NDIAG = NQ IF( LSAME( UPLO, 'U' ) ) THEN NOFFD = NQ ELSE NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL ) END IF NDIAG = ICEIL( REALSZ*NDIAG, CPLXSZ ) NOFFD = ICEIL( REALSZ*NOFFD, CPLXSZ ) * IPA = IPREPAD + 1 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD IPE = IPD + NDIAG + IPOSTPAD + IPREPAD IPT = IPE + NOFFD + IPOSTPAD + IPREPAD IPW = IPT + NQ + IPOSTPAD + IPREPAD * * Calculate the amount of workspace required for the * reduction * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORKTRD = LWMIN + IPOSTPAD WORKSIZ = WORKTRD * * Figure the amount of workspace required by the check * IF( CHECK ) THEN ITEMP = 2*NQ + NP IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) + $ ITEMP END IF ITEMP = MAX( ICEIL( REALSZ*ITEMP, CPLXSZ ), $ 2*( NB+NP )*NB ) WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD END IF * * Check for adequate memory for problem size * IERR( 1 ) = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9996 )'Tridiagonal reduction', $ ( IPW+WORKSIZ )*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 )'MEMORY' KSKIP = KSKIP + 1 GO TO 10 END IF * * * * Generate the matrix A * CALL PCMATGEN( ICTXT, 'Hemm', 'N', DESCA( M_ ), $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ), $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ), $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ, $ MYROW, MYCOL, NPROW, NPCOL ) * * * Need Infinity-norm of A for checking * IF( CHECK ) THEN CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ), $ DESCA( LLD_ ), IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ), $ NDIAG, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ), $ NOFFD, IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) ANORM = PCLANHE( 'I', UPLO, N, MEM( IPA ), 1, 1, $ DESCA, MEM( IPW ) ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCLANHE', WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) END IF * CALL SLBOOT CALL BLACS_BARRIER( ICTXT, 'All' ) CALL SLTIMER( 1 ) * * Reduce to symmetric tridiagonal form * CALL PCHETTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), LWMIN, INFO ) * CALL SLTIMER( 1 ) * IF( CHECK ) THEN * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCHETTRD', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETTRD', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) * CALL PCCHEKPAD( ICTXT, 'PCHETTRDc', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETTRDd', NQ, 1, $ MEM( IPT-IPREPAD ), NQ, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETTRDe', WORKTRD-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) CALL PCFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1, $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD, $ IPREPAD, IPOSTPAD, PADVAL ) * * Compute fctres = ||A - QTQ'|| / (||A|| * N * eps) * CALL PCHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPD ), MEM( IPE ), MEM( IPT ), $ MEM( IPW ), IERR( 1 ) ) * * TTRD does not preserve the upper triangular part of A. * The following call to PCLATRAN means that we only * check the lower triangular part of A - QTQ' * CALL PCLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA, $ MEM( IPW ) ) CALL PCLAFCHK( 'Hemm', 'No', N, N, MEM( IPA ), 1, 1, $ DESCA, IASEED, ANORM, FRESID, $ MEM( IPW ) ) * * Check for memory overwrite * CALL PCCHEKPAD( ICTXT, 'PCHETDRVf', NP, NQ, $ MEM( IPA-IPREPAD ), DESCA( LLD_ ), $ IPREPAD, IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRVg', NDIAG, 1, $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRVh', NOFFD, 1, $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD, $ IPOSTPAD, PADVAL ) CALL PCCHEKPAD( ICTXT, 'PCHETDRVi', WORKSIZ-IPOSTPAD, $ 1, MEM( IPW-IPREPAD ), $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD, $ PADVAL ) * * Test residual and detect NaN result * IF( FRESID.LE.THRESH .AND. FRESID-FRESID.EQ. $ 0.0E+0 .AND. IERR( 1 ).EQ.0 ) THEN KPASS = KPASS + 1 PASSED = 'PASSED' ELSE IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9991 )FRESID KFAIL = KFAIL + 1 PASSED = 'FAILED' * * END IF * * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 .AND. IERR( 1 ).NE.0 ) $ WRITE( NOUT, FMT = * )'D or E copies incorrect ...' ELSE * * Don't perform the checking, only the timing operation * KPASS = KPASS + 1 FRESID = FRESID - FRESID PASSED = 'BYPASS' END IF * * Gather maximum of all CPU and WALL clock timings * CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 50, 1, WTIME ) CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 50, 1, CTIME ) * * Print results * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * * TRD requires 16/3 N^3 floating point operations * NOPS = DBLE( N ) NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3 NOPS = NOPS / 1.0D+6 * * Print WALL time * IF( WTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / WTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( WTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'WALL', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ WTIME( 1 ), TMFLOPS, FRESID, PASSED * * Print CPU time * IF( CTIME( 1 ).GT.0.0D+0 ) THEN TMFLOPS = NOPS / CTIME( 1 ) ELSE TMFLOPS = 0.0D+0 END IF IF( CTIME( 1 ).GE.0.0D+0 ) $ WRITE( NOUT, FMT = 9992 )'CPU ', N, INTERLEAVE, $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL, $ CTIME( 1 ), TMFLOPS, FRESID, PASSED * * * If split times were collected (in PCHEttrd.f), print * them out. * IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 ).GT.0.0D+0 .OR. $ CTIME( 13 )+CTIME( 15 )+CTIME( 16 ).GT.0.0D+0 ) $ THEN SPLITSTIMED = 1 END IF IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ), $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ), $ WTIME( 15 ) WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ), $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ), $ WTIME( 21 ) * WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ), $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ), $ CTIME( 15 ) WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ), $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ), $ CTIME( 21 ) WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB, $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS END IF END IF 10 CONTINUE 20 CONTINUE * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN IF( SPLITSTIMED.EQ.1 ) THEN WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = 9974 ) WRITE( NOUT, FMT = 9973 ) END IF END IF * * CALL BLACS_GRIDEXIT( ICTXT ) 30 CONTINUE RETURN * 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3, $ '; It should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most', $ I4 ) 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) * 9995 FORMAT( 'PCHETTRD, tailored reduction to tridiagonal form, test.' $ ) 9994 FORMAT( 'TIME N int 2gm bal anb pnb prcs TRD Time ', $ ' MFLOPS Residual CHECK' ) 9993 FORMAT( '---- ---- --- --- --- --- --- ---- -------- ', $ '----------- -------- ------' ) 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 ) 9991 FORMAT( '||A - Q*T*Q''|| / (||A|| * N * eps) = ', G25.7 ) 9990 FORMAT( 'wsplit1=[wsplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9989 FORMAT( 'wsplit2=[wsplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9988 FORMAT( 'csplit1=[csplit1;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9987 FORMAT( 'csplit2=[csplit2;', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2, $ 1X, F9.2, 1X, F9.2, ' ];' ) 9986 FORMAT( 'size_opts=[size_opts;', I4, 1X, I4, 1X, I4, 1X, I4, 1X, $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];' ) 9985 FORMAT( 'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;', $ ' TWOGEMMS=7; TIMEINTERNALS=8;' ) 9984 FORMAT( 'S1_OVERHEAD = 1; % Should be mainly cost of barrier' ) 9983 FORMAT( 'S1_BARRIER = 2; % Cost of barrier' ) 9982 FORMAT( 'S1_UPDCURCOL = 3; % Update the current column' ) 9981 FORMAT( 'S1_HOUSE = 4; % Compute the householder vector' ) 9980 FORMAT( 'S1_SPREAD = 5; % Spread across' ) 9979 FORMAT( 'S1_TRANSPOSE = 6; % Transpose' ) 9978 FORMAT( 'S2_UPDCURBLK = 1; % Update the current block column' ) 9977 FORMAT( 'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' ) 9976 FORMAT( 'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' ) 9975 FORMAT( 'S2_TRANS_SUM = 4; % v = v + vt'' ' ) 9974 FORMAT( 'S2_DOT = 5; % c = v'' * h ' ) 9973 FORMAT( 'S2_R2K = 6; % A = A - v * h'' - h * v'' ' ) * * * End of PCTTRDTESTER * END scalapack-1.8.0/TESTING/EIG/pzbrdinfo.f0000640000175000017500000003220210363532303017200 0ustar muammarmuammar SUBROUTINE PZBRDINFO( SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, $ LDNVAL, NNB, NBVAL, LDNBVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, $ NPROCS ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 27, 2000 * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, LDMVAL, LDNBVAL, LDNVAL, LDPVAL, $ LDQVAL, NGRIDS, NMAT, NNB, NPROCS, NOUT REAL THRESH * .. * .. Array Arguments .. INTEGER MVAL( LDMVAL ), NBVAL( LDNBVAL ), $ NVAL( LDNVAL ), PVAL( LDPVAL ), $ QVAL( LDQVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBRDINFO get needed startup information for the bidiagonal * reduction and transmits it to all processes. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * Name of output (summary) file (if any). Only defined for * process 0. * * NOUT (global output) INTEGER * The unit number for output file. NOUT = 6, output to screen, * NOUT = 0, output to stderr. Only defined for process 0. * * NMAT (global output) INTEGER * The number of different values that can be used for M & N. * * MVAL (global output) INTEGER array, dimension (LDMVAL) * The values of M (number of rows in matrix) to run the code * with. * * LDMVAL (global input) INTEGER * The maximum number of different values that can be used for * M. LDMVAL >= NMAT. * * NVAL (global output) INTEGER array, dimension (LDNVAL) * The values of N (number of columns in matrix) to run the * code with. * * LDNVAL (global input) INTEGER * The maximum number of different values that can be used for * N. LDNVAL >= NMAT. * * NNB (global output) INTEGER * The number of different values that can be used for NB. * * NBVAL (global output) INTEGER array, dimension (LDNBVAL) * The values of NB (blocksize) to run the code with. * * LDNBVAL (global input) INTEGER * The maximum number of different values that can be used for * NB, LDNBVAL >= NNB. * * NGRIDS (global output) INTEGER * The number of different values that can be used for P & Q. * * PVAL (global output) INTEGER array, dimension (LDPVAL) * The values of P (number of process rows) to run the code * with. * * LDPVAL (global input) INTEGER * The maximum number of different values that can be used for * P, LDPVAL >= NGRIDS. * * QVAL (global output) INTEGER array, dimension (LDQVAL) * The values of Q (number of process columns) to run the code * with. * * LDQVAL (global input) INTEGER * The maximum number of different values that can be used for * Q, LDQVAL >= NGRIDS. * * THRESH (global output) REAL * Indicates what error checks shall be run and printed out: * = 0 : Perform no error checking * > 0 : report all residuals greater than THRESH. * * WORK (local workspace) INTEGER array, dimension >= * MAX( 5, LDMVAL+LDNVAL+LDNBVAL+LDPVAL+LDQVAL ). Used to pack * all input arrays in order to send info in one message. * * IAM (local input) INTEGER * My process number. * * NPROCS (global input) INTEGER * The total number of processes. * * Note * ==== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER I, ICTXT DOUBLE PRECISION EPS * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, IGEBR2D, IGEBS2D, $ SCOPY * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( UNIT = NIN, FILE = 'BRD.dat', STATUS = 'OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( UNIT = NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get values of M, N * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1. .OR. NMAT.GT.LDMVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'M', LDMVAL GO TO 20 END IF READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) * * Get values of NB * READ( NIN, FMT = * ) NNB IF( NNB.LT.1 .OR. NNB.GT.LDNBVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'NB', LDNBVAL GO TO 20 END IF READ( NIN, FMT = * ) ( NBVAL( I ), I = 1, NNB ) * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDPVAL GO TO 20 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9997 ) 'Grids', LDQVAL GO TO 20 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Get level of checking * READ( NIN, FMT = * ) THRESH * * Close input file * CLOSE( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 10 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 10 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) * WORK( 1 ) = NMAT WORK( 2 ) = NNB WORK( 3 ) = NGRIDS CALL IGEBS2D( ICTXT, 'All', ' ', 1, 3, WORK, 1 ) * I = 1 CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 ) I = I + NNB CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK Bidiagonal reduction' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the parallel '// $ 'complex double precision bidiagonal ' WRITE( NOUT, FMT = 9999 ) 'reduction routines.' WRITE( NOUT, FMT = 9999 ) $ 'The following scaled residual '// $ 'checks will be computed:' WRITE( NOUT, FMT = 9999 ) $ ' ||A - Q B P''|| / (||A|| * eps * N)' WRITE( NOUT, FMT = 9999 ) $ 'The matrix A is randomly '// $ 'generated for each test.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' WRITE( NOUT, FMT = 9999 ) $ 'TIME : Indicates whether WALL or '// $ 'CPU time was used.' WRITE( NOUT, FMT = 9999 ) $ 'M : The number of rows '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'N : The number of columns '// $ 'of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks'// $ ' the matrix A is split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = 9999 ) $ 'THRESH : If a residual value is less'// $ ' than THRESH, CHECK is flagged as PASSED' WRITE( NOUT, FMT = 9999 ) $ 'BRD time : Time in seconds to reduce the'// $ ' matrix' WRITE( NOUT, FMT = 9999 ) $ 'MFLOPS : Rate of execution for '// $ 'the bidiagonal reduction.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9995 ) $ 'M ', ( MVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( MVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'N ', ( NVAL( I ), I = 1, MIN( NMAT, 10 ) ) IF( NMAT.GT.10 ) $ WRITE( NOUT, FMT = 9994 ) ( NVAL( I ), I = 11, NMAT ) WRITE( NOUT, FMT = 9995 ) $ 'NB ', ( NBVAL( I ), I = 1, MIN( NNB, 10 ) ) IF( NNB.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( NBVAL( I ), I = 11, NNB ) WRITE( NOUT, FMT = 9995 ) $ 'P ', ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( PVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9995 ) $ 'Q ', ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9994 )( QVAL( I ), I = 11, NGRIDS ) WRITE( NOUT, FMT = 9999 ) ' ' WRITE( NOUT, FMT = 9996 ) EPS WRITE( NOUT, FMT = 9993 ) THRESH * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * all processes have needed startup information * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL IGEBR2D( ICTXT, 'All', ' ', 1, 3, WORK, 1, 0, 0 ) NMAT = WORK( 1 ) NNB = WORK( 2 ) NGRIDS = WORK( 3 ) * I = 2*NMAT + NNB + 2*NGRIDS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) I = 1 CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 ) I = I + NNB CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 CONTINUE WRITE( NOUT, FMT = 9998 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Illegal input in file ', 40A, '. Aborting run.' ) 9997 FORMAT( ' Number of values of ', 5A, $ ' is less than 1 or greater ', 'than ', I2 ) 9996 FORMAT( 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9995 FORMAT( 2X, A5, ': ', 10I6 ) 9994 FORMAT( ' ', 10I6 ) 9993 FORMAT( 'Routines pass computational tests if scaled residual is', $ ' less than ', G12.5 ) * * End of PZBRDINFO * END scalapack-1.8.0/TESTING/EIG/pdlasizegsep.f0000640000175000017500000001111310363532303017673 0ustar muammarmuammar* * SUBROUTINE PDLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, $ SIZESUBTST, ISIZESUBTST, SIZETST, $ ISIZETST ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPOSTPAD, IPREPAD, ISIZESUBTST, ISIZESYEVX, $ ISIZETST, SIZECHK, SIZEMQRLEFT, SIZEMQRRIGHT, $ SIZEQRF, SIZEQTQ, SIZESUBTST, SIZESYEVX, $ SIZETMS, SIZETST * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PDLASIZEGSEP computes the amount of memory needed by * various GSEP test routines, as well as SYGVX itself * * Arguments * ========= * * DESCA (global input) INTEGER array dimension ( DLEN_ ) * Array descriptor as passed to PDSYGVX * * SIZEMQRLEFT LWORK for the 1st PDORMQR call in PDLAGSY * * SIZEMQRRIGHT LWORK for the 2nd PDORMQR call in PDLAGSY * * SIZEQRF LWORK for PDGEQRF in PDLAGSY * * SIZETMS LWORK for PDLATMS * * SIZEQTQ LWORK for PDSEPQTQ (nexer complex) * * SIZECHK LWORK for PDGSEPCHK * * SIZESYEVX LWORK for PDSYGVX * * ISIZESYEVX LIWORK for PDSYGVX * * SIZESUBTST LWORK for PDSUBTST * * ISIZESUBTST LIWORK for PDSUBTST * * SIZETST LWORK for PDTST * * ISIZETST LIWORK for PDTST * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC_A, IACOL, IAROW, ICOFFA, IROFFA, LCM, $ LCMQ, LDA, MQ0, MYCOL, MYROW, N, NB, NEIG, NN, $ NNP, NP, NP0, NPCOL, NPROW, NQ, RSRC_A * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. ** .. Executable Statements .. * This is just to keep ftnchek happy * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * N = DESCA( M_ ) NB = DESCA( MB_ ) RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) * LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL IROFFA = 0 ICOFFA = 0 IAROW = INDXG2P( 1, NB, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, NB, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) SIZEMQRLEFT = MAX( ( NB*( NB-1 ) ) / 2, ( NP+NQ )*NB ) + NB*NB SIZEMQRRIGHT = MAX( ( NB*( NB-1 ) ) / 2, $ ( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB, 0, 0, $ NPCOL ), NB, 0, 0, LCMQ ), NP ) )*NB ) + NB*NB SIZEQRF = NB*NP + NB*NQ + NB*NB SIZETMS = ( LDA+1 )*MAX( 1, NQ ) + $ MAX( SIZEMQRLEFT, SIZEMQRRIGHT, SIZEQRF ) * NP0 = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) MQ0 = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) SIZEQTQ = 0 SIZECHK = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) * NEIG = N NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) SIZESYEVX = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN NNP = MAX( N, NPROW*NPCOL+1, 4 ) ISIZESYEVX = 6*NNP * SIZESUBTST = MAX( SIZETMS, SIZEQTQ, SIZECHK, SIZESYEVX ) + $ IPREPAD + IPOSTPAD ISIZESUBTST = ISIZESYEVX + IPREPAD + IPOSTPAD * * * Allow room for A, COPYA and Z and DIAG, WIN, WNEW, GAP, WORK * SIZETST = 5*( LDA*NP+IPREPAD+IPOSTPAD ) + $ 4*( N+IPREPAD+IPOSTPAD ) + SIZESUBTST * * Allow room for IFAIL, ICLUSTR, and IWORK (all in PDSYGVX) * ISIZETST = N + 2*NPROW*NPCOL + 2*( IPREPAD+IPOSTPAD ) + $ ISIZESUBTST * RETURN END scalapack-1.8.0/TESTING/EIG/pcgehdrv.f0000640000175000017500000001774710602576752017042 0ustar muammarmuammar SUBROUTINE PCGEHDRV( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEHDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from the * unitary matrix Q, the Hessenberg matrix, and the array TAU returned * by PCGEHRD: * sub( A ) := Q * H * Q' * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows and columns 1:ILO-1 and IHI+1:N. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) reduced to Hessenberg * form by PCGEHRD. The upper triangle and the first sub- * diagonal of sub( A ) contain the upper Hessenberg matrix H, * and the elements below the first subdiagonal, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors. On exit, the original distributed * N-by-N matrix sub( A ) is recovered. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors returned by * PCGEHRD. TAU is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK). * LWORK >= NB*NB + NB*IHLP + MAX[ NB*( IHLP+INLQ ), * NB*( IHLQ + MAX[ IHIP, * IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0, * NPCOL ), NB, 0, 0, LCMQ ) ] ) ] * * where NB = MB_A = NB_A, * LCM is the least common multiple of NPROW and NPCOL, * LCM = ILCM( NPROW, NPCOL ), LCMQ = LCM / NPCOL, * * IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * IHLP = NUMROC( IHI-ILO+IROFFA+1, NB, MYROW, ILROW, NPROW ), * IHLQ = NUMROC( IHI-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ), * INLQ = NUMROC( N-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ). * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, IHLP, II, IOFF, IPT, $ IPV, IPW, IV, J, JB, JJ, JL, K, MYCOL, MYROW, $ NB, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCLARFB, $ PCLARFT, PCLACPY, PCLASET * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * NB = DESCA( MB_ ) IOFF = MOD( IA+ILO-2, NB ) CALL INFOG2L( IA+ILO-1, JA+ILO-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, IAROW, NPROW ) * IPT = 1 IPV = IPT + NB * NB IPW = IPV + IHLP * NB JL = MAX( ( ( JA+IHI-2 ) / NB ) * NB + 1, JA + ILO - 1 ) CALL DESCSET( DESCV, IHI-ILO+IOFF+1, NB, NB, NB, IAROW, $ INDXG2P( JL, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, IHLP ) ) * DO 10 J = JL, ILO+JA+NB-IOFF-1, -NB JB = MIN( JA+IHI-J-1, NB ) I = IA + J - JA K = I - IA + 1 IV = K - ILO + IOFF + 1 * * Compute upper triangular matrix T from TAU. * CALL PCLARFT( 'Forward', 'Columnwise', IHI-K, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'All', IHI-K, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * CALL PCLASET( 'Lower', IHI-K-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-K, N-K+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI, IHI-K, JB, WORK( IPV ), IV+1, $ 1, DESCV, WORK( IPT ), A, IA, J+1, DESCA, $ WORK( IPW ) ) * DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the first block separately * IV = IOFF + 1 I = IA + ILO - 1 J = JA + ILO - 1 JB = MIN( NB-IOFF, JA+IHI-J-1 ) * * Compute upper triangular matrix T from TAU. * CALL PCLARFT( 'Forward', 'Columnwise', IHI-ILO, JB, A, I+1, J, $ DESCA, TAU, WORK( IPT ), WORK( IPW ) ) * * Copy Householder vectors into workspace. * CALL PCLACPY( 'All', IHI-ILO, JB, A, I+1, J, DESCA, WORK( IPV ), $ IV+1, 1, DESCV ) * * Zero out the strict lower triangular part of A. * IF( IHI-ILO.GT.0 ) $ CALL PCLASET( 'Lower', IHI-ILO-1, JB, ZERO, ZERO, A, I+2, J, $ DESCA ) * * Apply block Householder transformation from Left. * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ IHI-ILO, N-ILO+1, JB, WORK( IPV ), IV+1, 1, DESCV, $ WORK( IPT ), A, I+1, J, DESCA, WORK( IPW ) ) * * Apply block Householder transformation from Right. * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI, IHI-ILO, JB, WORK( IPV ), IV+1, $ 1, DESCV, WORK( IPT ), A, IA, J+1, DESCA, $ WORK( IPW ) ) * RETURN * * End of PCGEHDRV * END scalapack-1.8.0/TESTING/EIG/pdgseptst.f0000640000175000017500000012136110363532303017225 0ustar muammarmuammar* * SUBROUTINE PDGSEPTST( DESCA, UPLO, N, MATTYPE, IBTYPE, SUBTESTS, $ THRESH, ORDER, ABSTOL, ISEED, A, COPYA, B, $ COPYB, Z, LDA, WIN, WNEW, IFAIL, ICLUSTR, $ GAP, IPREPAD, IPOSTPAD, WORK, LWORK, IWORK, $ LIWORK, NOUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER SUBTESTS, UPLO INTEGER IBTYPE, INFO, IPOSTPAD, IPREPAD, LDA, LIWORK, $ LWORK, MATTYPE, N, NOUT, ORDER DOUBLE PRECISION ABSTOL, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), ICLUSTR( * ), IFAIL( * ), $ ISEED( 4 ), IWORK( * ) DOUBLE PRECISION A( LDA, * ), B( LDA, * ), COPYA( LDA, * ), $ COPYB( LDA, * ), GAP( * ), WIN( * ), WNEW( * ), $ WORK( * ), Z( LDA, * ) * .. * * Purpose * ======= * * PDGSEPTST builds a random matrix A, and a well conditioned * matrix B, runs PDSYGVX() to compute the eigenvalues * and eigenvectors and then calls PDSYGVCHK to compute * the residual. * * The random matrix built depends upon the following parameters: * N, NB, ISEED, ORDER * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * DESCA (global and local input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrices * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * Size of the matrix to be tested. (global size) * * MATTYPE (global input) INTEGER * Matrix type * Currently, the list of possible types is: * * (1) The zero matrix. * (2) The identity matrix. * * (3) A diagonal matrix with evenly spaced entries * 1, ..., ULP and random signs. * (ULP = (first number larger than 1) - 1 ) * (4) A diagonal matrix with geometrically spaced entries * 1, ..., ULP and random signs. * (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP * and random signs. * * (6) Same as (4), but multiplied by SQRT( overflow threshold ) * (7) Same as (4), but multiplied by SQRT( underflow threshold ) * * (8) A matrix of the form U' D U, where U is orthogonal and * D has evenly spaced entries 1, ..., ULP with random signs * on the diagonal. * * (9) A matrix of the form U' D U, where U is orthogonal and * D has geometrically spaced entries 1, ..., ULP with random * signs on the diagonal. * * (10) A matrix of the form U' D U, where U is orthogonal and * D has "clustered" entries 1, ULP,..., ULP with random * signs on the diagonal. * * (11) Same as (8), but multiplied by SQRT( overflow threshold ) * (12) Same as (8), but multiplied by SQRT( underflow threshold ) * * (13) symmetric matrix with random entries chosen from (-1,1). * (14) Same as (13), but multiplied by SQRT( overflow threshold ) * (15) Same as (13), but multiplied by SQRT( underflow threshold ) * (16) Same as (8), but diagonal elements are all positive. * (17) Same as (9), but diagonal elements are all positive. * (18) Same as (10), but diagonal elements are all positive. * (19) Same as (16), but multiplied by SQRT( overflow threshold ) * (20) Same as (16), but multiplied by SQRT( underflow threshold ) * (21) A tridiagonal matrix that is a direct sum of smaller diagonally * dominant submatrices. Each unreduced submatrix has geometrically * spaced diagonal entries 1, ..., ULP. * (22) A matrix of the form U' D U, where U is orthogonal and * D has ceil(lgN) "clusters" at 0,1,2,...,ceil(lgN)-1. The * size of the cluster at the value I is 2^I. * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * * SUBTESTS (global input) CHARACTER*1 * 'Y' - Perform subset tests * 'N' - Do not perform subset tests * * THRESH (global input) DOUBLE PRECISION * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * ORDER (global input) INTEGER * Number of reflectors used in test matrix creation. * If ORDER is large, it will * take more time to create the test matrices but they will * be closer to random. * ORDER .lt. N not implemented * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An * eigenvalue is considered to be located if it has * been determined to lie in an interval whose width * is "abstol" or less. If "abstol" is less than or equal * to zero, then ulp*|T| will be used, where |T| is * the 1-norm of the matrix. If eigenvectors are * desired later by inverse iteration ("PDSTEIN"), * "abstol" MUST NOT be bigger than ulp*|T|. * * For the purposes of this test, ABSTOL=0.0 is fine. * THis test does not test for high relative accuracy. * * ISEED (global input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * A (local workspace) DOUBLE PRECISION array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. The actual location of a particular element * in A is controlled by the values of NPROW, NPCOL, and NB. * The test matrix, which is then modified by PDSYGVX * * COPYA (local workspace) DOUBLE PRECISION array, dim (N, N) * COPYA is used to hold an identical copy of the array A * identical in both form and content to A * * B (local workspace) DOUBLE PRECISION array, dim (N*N) * global dimension (N, N), local dimension (LDA, NQ) * A is distributed in a block cyclic manner over both rows * and columns. * The B test matrix, which is then modified by PDSYGVX * * COPYB (local workspace) DOUBLE PRECISION array, dim (N, N) * COPYB is used to hold an identical copy of the array B * identical in both form and content to B * * Z (local workspace) DOUBLE PRECISION array, dim (N*N) * Z is distributed in the same manner as A * Z is used as workspace by the test routines * PDGSEPCHK * * W (local workspace) DOUBLE PRECISION array, dimension (N) * On normal exit from PDSYGVX, the first M entries * contain the selected eigenvalues in ascending order. * * IFAIL (global workspace) INTEGER array, dimension (N) * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. LWORK >= SIZETST as * returned by PDLASIZEGSEP * * IWORK (local workspace) INTEGER array, dimension (LIWORK) * * LIWORK (local input) INTEGER * The length of the array IWORK. LIWORK >= ISIZETST as * returned by PDLASIZEGSEP * * NOUT (local input) INTEGER * The unit number for output file. Only used on node 0. * NOUT = 6, output to screen, * NOUT = 0, output to stderr. * NOUT = 13, output to file, divide thresh by 10.0 * NOUT = 14, output to file, divide thresh by 20.0 * (This hack allows us to test more stringently internally * so that when errors on found on other computers they will * be serious enough to warrant our attention.) * * INFO (global output) INTEGER * -3 This process is not involved * 0 Test succeeded (passed |AQ -QL| and |QT*Q - I| tests) * 1 At least one test failed * 2 Residual test were not performed, thresh <= 0.0 * 3 Test was skipped because of inadequate memory space * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, TEN, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0, $ HALF = 0.5D+0 ) DOUBLE PRECISION PADVAL PARAMETER ( PADVAL = 19.25D+0 ) INTEGER MAXTYP PARAMETER ( MAXTYP = 22 ) * .. * * .. Local Scalars .. LOGICAL WKNOWN CHARACTER JOBZ, RANGE CHARACTER*14 PASSED INTEGER CONTEXT, I, IAM, IINFO, IL, IMODE, IN, INDD, $ INDWORK, ISIZESUBTST, ISIZESYEVX, ISIZETST, $ ITYPE, IU, J, LLWORK, LSYEVXSIZE, MAXSIZE, $ MYCOL, MYROW, NB, NGEN, NLOC, NNODES, NP, $ NPCOL, NPROW, NQ, RES, SIZECHK, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZEQTQ, SIZESUBTST, $ SIZESYEVX, SIZETMS, SIZETST, VALSIZE, VECSIZE DOUBLE PRECISION ANINV, ANORM, COND, MAXQTQNRM, MAXTSTNRM, OVFL, $ QTQNRM, RTOVFL, RTUNFL, TEMP1, TSTNRM, ULP, $ ULPINV, UNFL, VL, VU * .. * .. Local Arrays .. INTEGER ISEEDIN( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), $ KTYPE( MAXTYP ) DOUBLE PRECISION CTIME( 10 ), WTIME( 10 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION DLARAN, PDLAMCH EXTERNAL LSAME, NUMROC, DLARAN, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, DLABAD, DLASRT, $ DLATMS, IGAMX2D, IGEBR2D, IGEBS2D, PDCHEKPAD, $ PDELSET, PDFILLPAD, PDGSEPSUBTST, PDLASET, $ PDLASIZEGSEP, PDLASIZESYEVX, PDLATMS, PDMATGEN, $ SLCOMBINE * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD, SQRT * .. * .. Data statements .. DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, $ 8, 8, 9, 9, 9, 9, 9, 10, 11 / DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, $ 2, 3, 1, 1, 1, 2, 3, 1, 1 / DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, $ 0, 0, 4, 3, 1, 4, 4, 3, 0 / * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 PASSED = 'PASSED ' CONTEXT = DESCA( CTXT_ ) NB = DESCA( NB_ ) * CALL BLACS_PINFO( IAM, NNODES ) CALL BLACS_GRIDINFO( CONTEXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Make sure that we have enough memory * * CALL PDLASIZEGSEP( DESCA, IPREPAD, IPOSTPAD, SIZEMQRLEFT, $ SIZEMQRRIGHT, SIZEQRF, SIZETMS, SIZEQTQ, $ SIZECHK, SIZESYEVX, ISIZESYEVX, SIZESUBTST, $ ISIZESUBTST, SIZETST, ISIZETST ) * IF( LWORK.LT.SIZETST ) THEN INFO = 3 END IF * CALL IGAMX2D( CONTEXT, 'a', ' ', 1, 1, INFO, 1, 1, 1, -1, -1, 0 ) * IF( INFO.EQ.0 ) THEN * INDD = 1 INDWORK = INDD + N LLWORK = LWORK - INDWORK + 1 * ULP = PDLAMCH( CONTEXT, 'P' ) ULPINV = ONE / ULP UNFL = PDLAMCH( CONTEXT, 'Safe min' ) OVFL = ONE / UNFL CALL DLABAD( UNFL, OVFL ) RTUNFL = SQRT( UNFL ) RTOVFL = SQRT( OVFL ) ANINV = ONE / DBLE( MAX( 1, N ) ) * * This ensures that everyone starts out with the same seed. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4 ) ELSE CALL IGEBR2D( CONTEXT, 'a', ' ', 4, 1, ISEED, 4, 0, 0 ) END IF ISEEDIN( 1 ) = ISEED( 1 ) ISEEDIN( 2 ) = ISEED( 2 ) ISEEDIN( 3 ) = ISEED( 3 ) ISEEDIN( 4 ) = ISEED( 4 ) * * Compute the matrix A * * Control parameters: * * KMAGN KMODE KTYPE * =1 O(1) clustered 1 zero * =2 large clustered 2 identity * =3 small exponential (none) * =4 arithmetic diagonal, (w/ eigenvalues) * =5 random log symmetric, w/ eigenvalues * =6 random (none) * =7 random diagonal * =8 random symmetric * =9 positive definite * =10 block diagonal with tridiagonal blocks * =11 Geometrically sized clusters. * ITYPE = KTYPE( MATTYPE ) IMODE = KMODE( MATTYPE ) * * Compute norm * GO TO ( 10, 20, 30 )KMAGN( MATTYPE ) * 10 CONTINUE ANORM = ONE GO TO 40 * 20 CONTINUE ANORM = ( RTOVFL*ULP )*ANINV GO TO 40 * 30 CONTINUE ANORM = RTUNFL*N*ULPINV GO TO 40 * 40 CONTINUE IF( MATTYPE.LE.15 ) THEN COND = ULPINV ELSE COND = ULPINV*ANINV / TEN END IF * * Special Matrices * * Zero * * IF( ITYPE.EQ.1 ) THEN * * Zero Matrix * DO 50 I = 1, N WORK( INDD+I-1 ) = ZERO 50 CONTINUE CALL PDLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.2 ) THEN * * Identity Matrix * DO 60 I = 1, N WORK( INDD+I-1 ) = ONE 60 CONTINUE CALL PDLASET( 'All', N, N, ZERO, ONE, COPYA, 1, 1, DESCA ) WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.4 ) THEN * * Diagonal Matrix, [Eigen]values Specified * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+1.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) WKNOWN = .TRUE. * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS1-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+1.0D+0 ) * ELSE IF( ITYPE.EQ.5 ) THEN * * symmetric, eigenvalues specified * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+2.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS2-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+2.0D+0 ) * WKNOWN = .TRUE. * ELSE IF( ITYPE.EQ.8 ) THEN * * symmetric, random eigenvalues * NP = NUMROC( N, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), MYCOL, 0, NPCOL ) CALL PDMATGEN( DESCA( CTXT_ ), 'S', 'N', N, N, DESCA( MB_ ), $ DESCA( NB_ ), COPYA, DESCA( LLD_ ), $ DESCA( RSRC_ ), DESCA( CSRC_ ), ISEED( 1 ), $ 0, NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL ) INFO = 0 WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.9 ) THEN * * Positive definite, eigenvalues specified. * * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, N, N, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * WKNOWN = .TRUE. * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS3-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.0D+0 ) * ELSE IF( ITYPE.EQ.10 ) THEN * * Block diagonal matrix with each block being a positive * definite tridiagonal submatrix. * CALL PDLASET( 'All', N, N, ZERO, ZERO, COPYA, 1, 1, DESCA ) NP = NUMROC( N, DESCA( MB_ ), 0, 0, NPROW ) NQ = NUMROC( N, DESCA( NB_ ), 0, 0, NPCOL ) NLOC = MIN( NP, NQ ) NGEN = 0 70 CONTINUE * IF( NGEN.LT.N ) THEN IN = MIN( 1+INT( DLARAN( ISEED )*DBLE( NLOC ) ), N-NGEN ) * CALL DLATMS( IN, IN, 'S', ISEED, 'P', WORK( INDD ), $ IMODE, COND, ANORM, 1, 1, 'N', A, LDA, $ WORK( INDWORK ), IINFO ) * DO 80 I = 2, IN TEMP1 = ABS( A( I-1, I ) ) / $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) IF( TEMP1.GT.HALF ) THEN A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, $ I ) ) ) A( I, I-1 ) = A( I-1, I ) END IF 80 CONTINUE CALL PDELSET( COPYA, NGEN+1, NGEN+1, DESCA, A( 1, 1 ) ) DO 90 I = 2, IN CALL PDELSET( COPYA, NGEN+I, NGEN+I, DESCA, $ A( I, I ) ) CALL PDELSET( COPYA, NGEN+I-1, NGEN+I, DESCA, $ A( I-1, I ) ) CALL PDELSET( COPYA, NGEN+I, NGEN+I-1, DESCA, $ A( I, I-1 ) ) 90 CONTINUE NGEN = NGEN + IN GO TO 70 END IF WKNOWN = .FALSE. * ELSE IF( ITYPE.EQ.11 ) THEN * * Geometrically sized clusters. Eigenvalues: 0,1,1,2,2,2,2, ... * NGEN = 0 J = 1 TEMP1 = ZERO 100 CONTINUE IF( NGEN.LT.N ) THEN IN = MIN( J, N-NGEN ) DO 110 I = 0, IN - 1 WORK( INDD+NGEN+I ) = TEMP1 110 CONTINUE TEMP1 = TEMP1 + ONE J = 2*J NGEN = NGEN + IN GO TO 100 END IF * * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+4.0D+0 ) * CALL PDLATMS( N, N, 'S', ISEED, 'S', WORK( INDD ), IMODE, $ COND, ANORM, 0, 0, 'N', COPYA, 1, 1, DESCA, $ ORDER, WORK( INDWORK+IPREPAD ), SIZETMS, $ IINFO ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS4-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+4.0D+0 ) * ELSE IINFO = 1 END IF * IF( WKNOWN ) $ CALL DLASRT( 'I', N, WORK( INDD ), IINFO ) * * Create the B matrix * CALL PDFILLPAD( DESCA( CTXT_ ), SIZETMS, 1, WORK( INDWORK ), $ SIZETMS, IPREPAD, IPOSTPAD, PADVAL+3.3D+0 ) * ANORM = ONE * * Update ISEED so that {DLAGSY creates a different Q * ISEED( 4 ) = MOD( ISEED( 4 )+257, 4096 ) ISEED( 3 ) = MOD( ISEED( 3 )+192, 4096 ) ISEED( 2 ) = MOD( ISEED( 2 )+35, 4096 ) ISEED( 1 ) = MOD( ISEED( 1 )+128, 4096 ) CALL PDLATMS( N, N, 'S', ISEED, 'P', WORK( INDD ), 3, TEN, $ ANORM, N, N, 'N', COPYB, 1, 1, DESCA, ORDER, $ WORK( INDWORK+IPREPAD ), SIZETMS, IINFO ) * CALL PDCHEKPAD( DESCA( CTXT_ ), 'PDLATMS5-WORK', SIZETMS, 1, $ WORK( INDWORK ), SIZETMS, IPREPAD, IPOSTPAD, $ PADVAL+3.3D+0 ) * * * These values aren't actually used, but they make ftncheck happy. * IL = -1 IU = -2 VL = ONE VU = -ONE * CALL PDLASIZESYEVX( WKNOWN, 'A', N, DESCA, VL, VU, IL, IU, $ ISEED, WORK( INDD ), MAXSIZE, VECSIZE, $ VALSIZE ) * LSYEVXSIZE = MIN( MAXSIZE, LWORK ) WKNOWN = .FALSE. * CALL PDGSEPSUBTST( WKNOWN, IBTYPE, 'v', 'a', UPLO, N, VL, VU, $ IL, IU, THRESH, ABSTOL, A, COPYA, B, COPYB, $ Z, 1, 1, DESCA, WORK( INDD ), WIN, IFAIL, $ ICLUSTR, GAP, IPREPAD, IPOSTPAD, $ WORK( INDWORK ), LLWORK, LSYEVXSIZE, IWORK, $ ISIZESYEVX, RES, TSTNRM, QTQNRM, NOUT ) * * * MAXTSTNRM = TSTNRM MAXQTQNRM = QTQNRM * IF( THRESH.LE.ZERO ) THEN PASSED = 'SKIPPED ' INFO = 2 ELSE IF( RES.NE.0 ) THEN PASSED = 'FAILED ' INFO = 1 END IF END IF * IF( THRESH.GT.ZERO .AND. LSAME( SUBTESTS, 'Y' ) ) THEN * * Subtest 1: JOBZ = 'V', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'V' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 1' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 2: JOBZ = 'V', RANGE = 'A', random memory * IF( INFO.EQ.0 ) THEN JOBZ = 'V' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN PASSED = 'FAILED stest 2' MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) INFO = 1 END IF END IF * * Subtest 3: JOBZ = 'N', RANGE = 'A', minimum memory * IF( INFO.EQ.0 ) THEN * JOBZ = 'N' RANGE = 'A' CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 3' INFO = 1 END IF END IF * * Subtest 4: JOBZ = 'N', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'N' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 4' INFO = 1 END IF END IF * * Subtest 5: JOBZ = 'V', RANGE = 'I', maximum memory * IF( INFO.EQ.0 ) THEN * IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 5' INFO = 1 END IF END IF * * Subtest 6: JOBZ = 'V', RANGE = 'I', minimum memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 6' INFO = 1 END IF END IF * * Subtest 7: JOBZ = 'V', RANGE = 'I', random memory * IF( INFO.EQ.0 ) THEN IL = -1 IU = -1 JOBZ = 'V' RANGE = 'I' * * We use PDLASIZESYEVX to choose IL and IU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 7' INFO = 1 END IF END IF * * Subtest 8: JOBZ = 'N', RANGE = 'V', minimum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'N' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 8' INFO = 1 END IF END IF * * Subtest 9: JOBZ = 'V', RANGE = 'V', maximum memory * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = MAXSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest 9' INFO = 1 END IF END IF * * Subtest 10: JOBZ = 'V', RANGE = 'V', * minimum memory required for eigenvectors * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest10' INFO = 1 END IF END IF * * Subtest 11: JOBZ = 'V', RANGE = 'V', * random memory (enough for all eigenvectors * but not enough to guarantee orthogonality * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VECSIZE + INT( DLARAN( ISEED )* $ DBLE( MAXSIZE-VECSIZE ) ) * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest11' INFO = 1 END IF END IF * * Subtest 12: JOBZ = 'V', RANGE = 'V', * miniimum memory required for eigenvalues only * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest12' INFO = 1 END IF END IF * * Subtest 13: JOBZ = 'V', RANGE = 'V', * random memory (more than minimum required * for eigenvalues, less than required for vectors) * IF( INFO.EQ.0 ) THEN VL = ONE VU = -ONE JOBZ = 'V' RANGE = 'V' * * We use PDLASIZESYEVX to choose VL and VU for us. * CALL PDLASIZESYEVX( .TRUE., RANGE, N, DESCA, VL, VU, IL, IU, $ ISEED, WIN( 1+IPREPAD ), MAXSIZE, $ VECSIZE, VALSIZE ) * LSYEVXSIZE = VALSIZE + INT( DLARAN( ISEED )* $ DBLE( VECSIZE-VALSIZE ) ) * CALL PDGSEPSUBTST( .TRUE., IBTYPE, JOBZ, RANGE, UPLO, N, VL, $ VU, IL, IU, THRESH, ABSTOL, A, COPYA, B, $ COPYB, Z, 1, 1, DESCA, WIN( 1+IPREPAD ), $ WNEW, IFAIL, ICLUSTR, GAP, IPREPAD, $ IPOSTPAD, WORK( INDWORK ), LLWORK, $ LSYEVXSIZE, IWORK, ISIZESYEVX, RES, $ TSTNRM, QTQNRM, NOUT ) * IF( RES.NE.0 ) THEN MAXTSTNRM = MAX( TSTNRM, MAXTSTNRM ) MAXQTQNRM = MAX( QTQNRM, MAXQTQNRM ) PASSED = 'FAILED stest13' INFO = 1 END IF END IF END IF * * * CALL IGAMX2D( CONTEXT, 'All', ' ', 1, 1, INFO, 1, -1, -1, -1, -1, $ -1 ) * IF( INFO.EQ.1 ) THEN IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9994 )'C ' WRITE( NOUT, FMT = 9993 )ISEEDIN( 1 ) WRITE( NOUT, FMT = 9992 )ISEEDIN( 2 ) WRITE( NOUT, FMT = 9991 )ISEEDIN( 3 ) WRITE( NOUT, FMT = 9990 )ISEEDIN( 4 ) IF( LSAME( UPLO, 'L' ) ) THEN WRITE( NOUT, FMT = 9994 )' UPLO= ''L'' ' ELSE WRITE( NOUT, FMT = 9994 )' UPLO= ''U'' ' END IF IF( LSAME( SUBTESTS, 'Y' ) ) THEN WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''Y'' ' ELSE WRITE( NOUT, FMT = 9994 )' SUBTESTS= ''N'' ' END IF WRITE( NOUT, FMT = 9989 )N WRITE( NOUT, FMT = 9988 )NPROW WRITE( NOUT, FMT = 9987 )NPCOL WRITE( NOUT, FMT = 9986 )NB WRITE( NOUT, FMT = 9985 )MATTYPE WRITE( NOUT, FMT = 9984 )IBTYPE WRITE( NOUT, FMT = 9982 )ABSTOL WRITE( NOUT, FMT = 9981 )THRESH WRITE( NOUT, FMT = 9994 )'C ' END IF END IF * CALL SLCOMBINE( CONTEXT, 'All', '>', 'W', 6, 1, WTIME ) CALL SLCOMBINE( CONTEXT, 'All', '>', 'C', 6, 1, CTIME ) IF( IAM.EQ.0 ) THEN IF( INFO.EQ.0 .OR. INFO.EQ.1 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9999 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ), MAXTSTNRM, $ PASSED ELSE WRITE( NOUT, FMT = 9998 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ), MAXTSTNRM, PASSED END IF ELSE IF( INFO.EQ.2 ) THEN IF( WTIME( 1 ).GE.0.0 ) THEN WRITE( NOUT, FMT = 9997 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, WTIME( 1 ), CTIME( 1 ) ELSE WRITE( NOUT, FMT = 9996 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS, CTIME( 1 ) END IF ELSE IF( INFO.EQ.3 ) THEN WRITE( NOUT, FMT = 9995 )N, NB, NPROW, NPCOL, MATTYPE, $ IBTYPE, SUBTESTS END IF END IF * 120 CONTINUE * RETURN 9999 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 1X, G9.2, 1X, A14 ) 9998 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 1X, G9.2, A14 ) 9997 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, F8.2, 1X, F8.2, 11X, 'Bypassed' ) 9996 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 1X, 8X, 1X, F8.2, 11X, 'Bypassed' ) 9995 FORMAT( 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 3X, I3, 4X, A1, $ 22X, 'Bad MEMORY parameters' ) 9994 FORMAT( A ) 9993 FORMAT( ' ISEED( 1 ) =', I8 ) 9992 FORMAT( ' ISEED( 2 ) =', I8 ) 9991 FORMAT( ' ISEED( 3 ) =', I8 ) 9990 FORMAT( ' ISEED( 4 ) =', I8 ) 9989 FORMAT( ' N=', I8 ) 9988 FORMAT( ' NPROW=', I8 ) 9987 FORMAT( ' NPCOL=', I8 ) 9986 FORMAT( ' NB=', I8 ) 9985 FORMAT( ' MATTYPE=', I8 ) 9984 FORMAT( ' IBTYPE=', I8 ) 9983 FORMAT( ' SUBTESTS=', A1 ) 9982 FORMAT( ' ABSTOL=', D16.6 ) 9981 FORMAT( ' THRESH=', D16.6 ) 9980 FORMAT( ' Increase TOTMEM in PDGSEPDRIVER' ) * * End of PDGSEPTST * END scalapack-1.8.0/TESTING/EIG/pssepchk.f0000640000175000017500000002412210363532303017025 0ustar muammarmuammar* * SUBROUTINE PSSEPCHK( MS, NV, A, IA, JA, DESCA, EPSNORMA, THRESH, $ Q, IQ, JQ, DESCQ, C, IC, JC, DESCC, W, WORK, $ LWORK, TSTNRM, RESULT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, IQ, JA, JC, JQ, LWORK, MS, NV, RESULT REAL EPSNORMA, THRESH, TSTNRM * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCC( * ), DESCQ( * ) REAL A( * ), C( * ), Q( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * Compute |AQ- QL| / (EPSNORMA * N) * where EPSNORMA = (abstol + eps)*norm(A) when called by pdsqpsubtst. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * MP = number of local rows in A, C and Q * MQ = number of local columns in A * NQ = number of local columns in C and Q * * MS (global input) INTEGER * Matrix size. * The number of global rows in A, C and Q * Also, the number of global columns in A * * NV (global input) INTEGER * Number of eigenvectors * The number of global columns in C and Q. * * A (local input) REAL pointer to an * array in local memory of dimension (LLD_A, LOCc(JA+N-1)). * This array contains the local pieces of the MS-by-MS * distributed test matrix A * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * EPSNORMA (input) REAL * abstol + eps * inf.norm(A) * Abstol is absolute tolerence for the eigenvalues and is set * in the calling routines, pdsepsubtst and pdsqpsubtst. * * THRESH (input) REAL * A test will count as "failed" if the "error", computed as * described below, exceeds THRESH. Note that the error * is scaled to be O(1), so THRESH should be a reasonably * small multiple of 1, e.g., 10 or 100. In particular, * it should not depend on the precision (single vs. double) * or the size of the matrix. It must be at least zero. * * Q (local input) REAL array * global dimension (MS, NV), local dimension (DESCA(DLEN_), NQ) * * Contains the eigenvectors as computed by PSSYEVX * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Q. * * C (local workspace) REAL array, * global dimension (NV, NV), local dimension (DESCA(DLEN_), MQ) * * Accumulator for computing AQ -QL * * IC (global input) INTEGER * C's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JC (global input) INTEGER * C's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * W (global input) REAL array, dimension (NV) * * Contains the computed eigenvalues * * WORK (local workspace) REAL array, * dimension (LWORK) * * LWORK (local input) INTEGER * The length of the array WORK. * LWORK >= NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * * TSTNRM (global output) REAL * |AQ- QL| / ( EPSNROMA * MS ) * * RESULT (global output) INTEGER * 0 if the test passes i.e. * |AQ -QL| / (abstol + eps * norm(A) ) <= n* THRESH * 1 if the test fails i.e. * |AQ -QL| / (abstol + eps * norm(A) ) > n * THRESH * * .. Local Scalars .. * INTEGER INFO, J, LOCALCOL, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ, PCOL REAL NORM * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, NEGONE PARAMETER ( ONE = 1.0E+0, NEGONE = -1.0E+0 ) * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC REAL PSLANGE EXTERNAL INDXG2L, INDXG2P, NUMROC, PSLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PSGEMM, PXERBLA, $ SLACPY, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * RESULT = 0 * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 CALL CHK1MAT( MS, 1, MS, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IQ, JQ, DESCQ, 12, INFO ) CALL CHK1MAT( MS, 1, NV, 2, IC, JC, DESCC, 16, INFO ) * IF( INFO.EQ.0 ) THEN * MP = NUMROC( MS, DESCA( MB_ ), MYROW, 0, NPROW ) NQ = NUMROC( NV, DESCA( NB_ ), MYCOL, 0, NPCOL ) * IF( IQ.NE.1 ) THEN INFO = -10 ELSE IF( JQ.NE.1 ) THEN INFO = -11 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( IC.NE.1 ) THEN INFO = -14 ELSE IF( JC.NE.1 ) THEN INFO = -15 ELSE IF( LWORK.LT.NQ ) THEN INFO = -19 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSSEPCHK', -INFO ) RETURN END IF * * C = Q * W * CALL SLACPY( 'A', MP, NQ, Q, DESCQ( LLD_ ), C, DESCC( LLD_ ) ) * * DO 10 J = 1, NV PCOL = INDXG2P( J, DESCC( NB_ ), 0, 0, NPCOL ) LOCALCOL = INDXG2L( J, DESCC( NB_ ), 0, 0, NPCOL ) * IF( MYCOL.EQ.PCOL ) THEN CALL SSCAL( MP, W( J ), C( ( LOCALCOL-1 )*DESCC( LLD_ )+1 ), $ 1 ) END IF 10 CONTINUE * * * C = C - A * Q * CALL PSGEMM( 'N', 'N', MS, NV, MS, NEGONE, A, 1, 1, DESCA, Q, 1, $ 1, DESCQ, ONE, C, 1, 1, DESCC ) * * Compute the norm of C * * NORM = PSLANGE( 'M', MS, NV, C, 1, 1, DESCC, WORK ) * * TSTNRM = NORM / EPSNORMA / MAX( MS, 1 ) * IF( TSTNRM.GT.THRESH .OR. ( TSTNRM-TSTNRM.NE.0.0E0 ) ) THEN RESULT = 1 END IF * * RETURN * * End of PSSEPCHK * END scalapack-1.8.0/TESTING/EIG/pcmatgen.f0000640000175000017500000004632310363532303017012 0ustar muammarmuammar SUBROUTINE PCMATGEN( ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, $ IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, $ ICNUM, MYROW, MYCOL, NPROW, NPCOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 AFORM, DIAG INTEGER IACOL, IAROW, ICNUM, ICOFF, ICTXT, IRNUM, $ IROFF, ISEED, LDA, M, MB, MYCOL, MYROW, N, $ NB, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCMATGEN : Parallel Complex Single precision MATrix GENerator. * Generate (or regenerate) a distributed matrix A (or sub-matrix of A). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * AFORM (global input) CHARACTER*1 * if AFORM = 'S' : A is returned is a symmetric matrix. * if AFORM = 'H' : A is returned is a Hermitian matrix. * if AFORM = 'T' : A is overwritten with the transpose of * what would normally be generated. * if AFORM = 'C' : A is overwritten with the conjugate trans- * pose of what would normally be generated. * otherwise a random matrix is generated. * * DIAG (global input) CHARACTER*1 * if DIAG = 'D' : A is diagonally dominant. * * M (global input) INTEGER * The number of rows in the generated distributed matrix. * * N (global input) INTEGER * The number of columns in the generated distributed * matrix. * * MB (global input) INTEGER * The row blocking factor of the distributed matrix A. * * NB (global input) INTEGER * The column blocking factor of the distributed matrix A. * * A (local output) COMPLEX, pointer into the local memory to * an array of dimension ( LDA, * ) containing the local * pieces of the distributed matrix. * * LDA (local input) INTEGER * The leading dimension of the array containing the local * pieces of the distributed matrix A. * * IAROW (global input) INTEGER * The row processor coordinate which holds the first block * of the distributed matrix A. * * IACOL (global input) INTEGER * The column processor coordinate which holds the first * block of the distributed matrix A. * * ISEED (global input) INTEGER * The seed number to generate the distributed matrix A. * * IROFF (local input) INTEGER * The number of local rows of A that have already been * generated. It should be a multiple of MB. * * IRNUM (local input) INTEGER * The number of local rows to be generated. * * ICOFF (local input) INTEGER * The number of local columns of A that have already been * generated. It should be a multiple of NB. * * ICNUM (local input) INTEGER * The number of local columns to be generated. * * MYROW (local input) INTEGER * The row process coordinate of the calling process. * * MYCOL (local input) INTEGER * The column process coordinate of the calling process. * * NPROW (global input) INTEGER * The number of process rows in the grid. * * NPCOL (global input) INTEGER * The number of process columns in the grid. * * Notes * ===== * * The code is originally developed by David Walker, ORNL, * and modified by Jaeyoung Choi, ORNL. * * Reference: G. Fox et al. * Section 12.3 of "Solving problems on concurrent processors Vol. I" * * ===================================================================== * * .. Parameters .. INTEGER MULT0, MULT1, IADD0, IADD1 PARAMETER ( MULT0=20077, MULT1=16838, IADD0=12345, $ IADD1=0 ) REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL SYMM, HERM, TRAN INTEGER I, IC, IK, INFO, IOFFC, IOFFR, IR, J, JK, $ JUMP1, JUMP2, JUMP3, JUMP4, JUMP5, JUMP6, $ JUMP7, MAXMN, MEND, MOFF, MP, MRCOL, MRROW, $ NEND, NOFF, NPMB, NQ, NQNB REAL DUMMY * .. * .. Local Arrays .. INTEGER IADD(2), IA1(2), IA2(2), IA3(2), IA4(2), $ IA5(2), IB1(2), IB2(2), IB3(2), IC1(2), IC2(2), $ IC3(2), IC4(2), IC5(2), IRAN1(2), IRAN2(2), $ IRAN3(2), IRAN4(2), ITMP1(2), ITMP2(2), $ ITMP3(2), JSEED(2), MULT(2) * .. * .. External Subroutines .. EXTERNAL JUMPIT, PXERBLA, SETRAN, XJUMPM * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MOD, REAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSRAND EXTERNAL ICEIL, NUMROC, LSAME, PSRAND * .. * .. Executable Statements .. * * Test the input arguments * MP = NUMROC( M, MB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) TRAN = LSAME( AFORM, 'T' ) * INFO = 0 IF( .NOT.LSAME( DIAG, 'D' ) .AND. $ .NOT.LSAME( DIAG, 'N' ) ) THEN INFO = 3 ELSE IF( SYMM.OR.HERM ) THEN IF( M.NE.N ) THEN INFO = 5 ELSE IF( MB.NE.NB ) THEN INFO = 7 END IF ELSE IF( M.LT.0 ) THEN INFO = 4 ELSE IF( N.LT.0 ) THEN INFO = 5 ELSE IF( MB.LT.1 ) THEN INFO = 6 ELSE IF( NB.LT.1 ) THEN INFO = 7 ELSE IF( LDA.LT.0 ) THEN INFO = 9 ELSE IF( ( IAROW.LT.0 ).OR.( IAROW.GE.NPROW ) ) THEN INFO = 10 ELSE IF( ( IACOL.LT.0 ).OR.( IACOL.GE.NPCOL ) ) THEN INFO = 11 ELSE IF( MOD(IROFF,MB).GT.0 ) THEN INFO = 13 ELSE IF( IRNUM.GT.(MP-IROFF) ) THEN INFO = 14 ELSE IF( MOD(ICOFF,NB).GT.0 ) THEN INFO = 15 ELSE IF( ICNUM.GT.(NQ-ICOFF) ) THEN INFO = 16 ELSE IF( ( MYROW.LT.0 ).OR.( MYROW.GE.NPROW ) ) THEN INFO = 17 ELSE IF( ( MYCOL.LT.0 ).OR.( MYCOL.GE.NPCOL ) ) THEN INFO = 18 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCMATGEN', INFO ) RETURN END IF * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) NPMB = NPROW * MB NQNB = NPCOL * NB MOFF = IROFF / MB NOFF = ICOFF / NB MEND = ICEIL(IRNUM, MB) + MOFF NEND = ICEIL(ICNUM, NB) + NOFF * MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * * Symmetric or Hermitian matrix will be generated. * IF( SYMM.OR.HERM ) THEN * * First, generate the lower triangular part (with diagonal block) * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 10 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 10 CONTINUE * JK = 1 DO 80 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 70 I = 1, NB IF( JK .GT. ICNUM ) GO TO 90 * IK = 1 DO 50 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB * IF( IOFFR .GT. IOFFC ) THEN DO 20 J = 1, MB IF( IK .GT. IRNUM ) GO TO 60 A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IK = IK + 1 20 CONTINUE * ELSE IF( IOFFC .EQ. IOFFR ) THEN IK = IK + I - 1 IF( IK .GT. IRNUM ) GO TO 60 DO 30 J = 1, I-1 A(IK,JK) = CMPLX( PSRAND(0), PSRAND(0) ) 30 CONTINUE IF( SYMM ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), ZERO ) DUMMY = PSRAND(0) END IF DO 40 J = 1, MB-I IF( IK+J .GT. IRNUM ) GO TO 60 A(IK+J,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IF( HERM ) THEN A(IK,JK+J) = CONJG( A(IK+J,JK) ) ELSE A(IK,JK+J) = A(IK+J,JK) END IF 40 CONTINUE IK = IK + MB - I + 1 ELSE IK = IK + MB END IF * CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 50 CONTINUE * 60 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 70 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 80 CONTINUE * * Next, generate the upper triangular part. * 90 CONTINUE MULT(1) = MULT0 MULT(2) = MULT1 IADD(1) = IADD0 IADD(2) = IADD1 JSEED(1) = ISEED JSEED(2) = 0 * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 100 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 100 CONTINUE * IK = 1 DO 150 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 140 J = 1, MB IF( IK .GT. IRNUM ) GO TO 160 JK = 1 DO 120 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IF( IOFFC .GT. IOFFR ) THEN DO 110 I = 1, NB IF( JK .GT. ICNUM ) GO TO 130 IF( SYMM ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ TWO*PSRAND(0) - ONE ) END IF JK = JK + 1 110 CONTINUE ELSE JK = JK + NB END IF CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 120 CONTINUE * 130 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 140 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 150 CONTINUE 160 CONTINUE * * (Conjugate) Transposed matrix A will be generated. * ELSE IF( TRAN .OR. LSAME( AFORM, 'C' ) ) THEN * JUMP1 = 1 JUMP2 = 2*NQNB JUMP3 = 2*N JUMP4 = NPMB JUMP5 = MB JUMP6 = MRROW JUMP7 = 2*NB*MRCOL * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( MOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( NOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 170 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 170 CONTINUE * IK = 1 DO 220 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 210 J = 1, MB IF( IK .GT. IRNUM ) GO TO 230 JK = 1 DO 190 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 180 I = 1, NB IF( JK .GT. ICNUM ) GO TO 200 IF( TRAN ) THEN A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) ELSE A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ TWO*PSRAND(0) - ONE ) END IF JK = JK + 1 180 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 190 CONTINUE * 200 CONTINUE IK = IK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 210 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 220 CONTINUE 230 CONTINUE * * A random matrix is generated. * ELSE * JUMP1 = 1 JUMP2 = 2*NPMB JUMP3 = 2*M JUMP4 = NQNB JUMP5 = NB JUMP6 = MRCOL JUMP7 = 2*MB*MRROW * CALL XJUMPM( JUMP1, MULT, IADD, JSEED, IRAN1, IA1, IC1 ) CALL XJUMPM( JUMP2, MULT, IADD, IRAN1, ITMP1, IA2, IC2 ) CALL XJUMPM( JUMP3, MULT, IADD, IRAN1, ITMP1, IA3, IC3 ) CALL XJUMPM( JUMP4, IA3, IC3, IRAN1, ITMP1, IA4, IC4 ) CALL XJUMPM( JUMP5, IA3, IC3, IRAN1, ITMP1, IA5, IC5 ) CALL XJUMPM( JUMP6, IA5, IC5, IRAN1, ITMP3, ITMP1, ITMP2 ) CALL XJUMPM( JUMP7, MULT, IADD, ITMP3, IRAN1, ITMP1, ITMP2 ) CALL XJUMPM( NOFF, IA4, IC4, IRAN1, ITMP1, ITMP2, ITMP3 ) CALL XJUMPM( MOFF, IA2, IC2, ITMP1, IRAN1, ITMP2, ITMP3 ) CALL SETRAN( IRAN1, IA1, IC1 ) * DO 240 I = 1, 2 IB1(I) = IRAN1(I) IB2(I) = IRAN1(I) IB3(I) = IRAN1(I) 240 CONTINUE * JK = 1 DO 290 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB DO 280 I = 1, NB IF( JK .GT. ICNUM ) GO TO 300 IK = 1 DO 260 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB DO 250 J = 1, MB IF( IK .GT. IRNUM ) GO TO 270 A(IK,JK) = CMPLX( ONE - TWO*PSRAND(0), $ ONE - TWO*PSRAND(0) ) IK = IK + 1 250 CONTINUE CALL JUMPIT( IA2, IC2, IB1, IRAN2 ) IB1(1) = IRAN2(1) IB1(2) = IRAN2(2) 260 CONTINUE * 270 CONTINUE JK = JK + 1 CALL JUMPIT( IA3, IC3, IB2, IRAN3 ) IB1(1) = IRAN3(1) IB1(2) = IRAN3(2) IB2(1) = IRAN3(1) IB2(2) = IRAN3(2) 280 CONTINUE * CALL JUMPIT( IA4, IC4, IB3, IRAN4 ) IB1(1) = IRAN4(1) IB1(2) = IRAN4(2) IB2(1) = IRAN4(1) IB2(2) = IRAN4(2) IB3(1) = IRAN4(1) IB3(2) = IRAN4(2) 290 CONTINUE 300 CONTINUE END IF * * Diagonally dominant matrix will be generated. * IF( LSAME( DIAG, 'D' ) ) THEN IF( MB.NE.NB ) THEN WRITE(*,*) 'Diagonally dominant matrices with rowNB not'// $ ' equal colNB is not supported!' RETURN END IF * MAXMN = MAX(M, N) JK = 1 DO 340 IC = NOFF+1, NEND IOFFC = ((IC-1)*NPCOL+MRCOL) * NB IK = 1 DO 320 IR = MOFF+1, MEND IOFFR = ((IR-1)*NPROW+MRROW) * MB IF( IOFFC.EQ.IOFFR ) THEN DO 310 J = 0, MB-1 IF( IK .GT. IRNUM ) GO TO 330 IF( HERM ) THEN A(IK,JK+J) = CMPLX( $ ABS(REAL(A(IK,JK+J)))+2*MAXMN, ZERO ) ELSE A(IK,JK+J) = CMPLX( ABS(REAL(A(IK,JK+J)))+MAXMN, $ ABS(AIMAG(A(IK,JK+J)))+ MAXMN ) END IF IK = IK + 1 310 CONTINUE ELSE IK = IK + MB END IF 320 CONTINUE 330 CONTINUE JK = JK + NB 340 CONTINUE END IF * RETURN * * End of PCMATGEN * END scalapack-1.8.0/TESTING/LS.dat0000640000175000017500000000066710363532303015452 0ustar muammarmuammar'ScaLAPACK LS solve input file' 'MPI machine' 'LS.out' output file name (if any) 6 device out 3 number of problems sizes 15 7 31 values of M 5 21 31 values of N 2 number of NB's 2 3 5 values of NB 2 number of NRHS's 2 3 5 values of NRHS 2 number of NBRHS's 1 2 values of NBRHS 4 number of process grids (ordered pairs P & Q) 1 1 4 2 2 3 8 values of P 1 4 1 2 3 2 1 values of Q 4.0 threshold scalapack-1.8.0/TESTING/HRD.dat0000640000175000017500000000060010363532303015534 0ustar muammarmuammar'ScaLAPACK HRD input file' 'MPI machine' 'HRD.out' output file name (if any) 6 device out 4 number of problems sizes 50 50 50 50 values of N 1 2 3 5 values of ILO 50 48 45 49 values of IHI 3 number of NB's 2 3 4 values of NB 4 number of processor grids (ordered pairs of P & Q) 1 2 1 4 2 3 8 values of P 1 2 4 1 3 2 1 values of Q 3.0 threshold scalapack-1.8.0/PBLAS/0000750000175000017500000000000010605234725014162 5ustar muammarmuammarscalapack-1.8.0/PBLAS/TIMING/0000750000175000017500000000000010605234725015151 5ustar muammarmuammarscalapack-1.8.0/PBLAS/TIMING/PDBLAS1TIM.dat0000640000175000017500000000232710363532303017242 0ustar muammarmuammar'Level 1 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS1TIM.SUMM' output file name (if any) 6 device out 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 2 number of tests problems 1000 1000 values of N 1000 1 values of M_X 1 1500 values of N_X 32 32 values of IMB_X 32 32 values of INB_X 32 32 values of MB_X 32 32 values of NB_X 0 0 values of RSRC_X 0 0 values of CSRC_X 1 1 values of IX 1 1 values of JX 1 1 values of INCX 1 1 values of M_Y 1000 1500 values of N_Y 32 32 values of IMB_Y 32 32 values of INB_Y 32 32 values of MB_Y 32 32 values of NB_Y 0 0 values of RSRC_Y 0 0 values of CSRC_Y 1 1 values of IY 1 1 values of JY 1 1 values of INCY PDSWAP T put F for no test in the same column PDSCAL T put F for no test in the same column PDCOPY T put F for no test in the same column PDAXPY T put F for no test in the same column PDDOT T put F for no test in the same column PDNRM2 T put F for no test in the same column PDASUM T put F for no test in the same column PDAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/Makefile0000640000175000017500000001265510363532303016615 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0) # # Module: Makefile # # Purpose: Timing Makefile # # Creation date: April 1, 1998 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc sPBLAS1exe = xspblas1tim dPBLAS1exe = xdpblas1tim cPBLAS1exe = xcpblas1tim zPBLAS1exe = xzpblas1tim sPBLAS2exe = xspblas2tim dPBLAS2exe = xdpblas2tim cPBLAS2exe = xcpblas2tim zPBLAS2exe = xzpblas2tim sPBLAS3exe = xspblas3tim dPBLAS3exe = xdpblas3tim cPBLAS3exe = xcpblas3tim zPBLAS3exe = xzpblas3tim spb1tim = $(PBLASTSTdir)/$(sPBLAS1exe) dpb1tim = $(PBLASTSTdir)/$(dPBLAS1exe) cpb1tim = $(PBLASTSTdir)/$(cPBLAS1exe) zpb1tim = $(PBLASTSTdir)/$(zPBLAS1exe) spb2tim = $(PBLASTSTdir)/$(sPBLAS2exe) dpb2tim = $(PBLASTSTdir)/$(dPBLAS2exe) cpb2tim = $(PBLASTSTdir)/$(cPBLAS2exe) zpb2tim = $(PBLASTSTdir)/$(zPBLAS2exe) spb3tim = $(PBLASTSTdir)/$(sPBLAS3exe) dpb3tim = $(PBLASTSTdir)/$(dPBLAS3exe) cpb3tim = $(PBLASTSTdir)/$(cPBLAS3exe) zpb3tim = $(PBLASTSTdir)/$(zPBLAS3exe) pbtcom = pblastim.o PB_Cwarn.o PB_Cabort.o spb1t = psblas1tim.o psblastim.o $(pbtcom) dpb1t = pdblas1tim.o pdblastim.o $(pbtcom) cpb1t = pcblas1tim.o pcblastim.o $(pbtcom) zpb1t = pzblas1tim.o pzblastim.o $(pbtcom) spb2t = psblas2tim.o psblastim.o $(pbtcom) dpb2t = pdblas2tim.o pdblastim.o $(pbtcom) cpb2t = pcblas2tim.o pcblastim.o $(pbtcom) zpb2t = pzblas2tim.o pzblastim.o $(pbtcom) spb3t = psblas3tim.o psblastim.o $(pbtcom) dpb3t = pdblas3tim.o pdblastim.o $(pbtcom) cpb3t = pcblas3tim.o pcblastim.o $(pbtcom) zpb3t = pzblas3tim.o pzblastim.o $(pbtcom) all : single double complex complex16 single: PblasErrorHandler $(spb1tim) $(spb2tim) $(spb3tim) double: PblasErrorHandler $(dpb1tim) $(dpb2tim) $(dpb3tim) complex: PblasErrorHandler $(cpb1tim) $(cpb2tim) $(cpb3tim) complex16: PblasErrorHandler $(zpb1tim) $(zpb2tim) $(zpb3tim) PblasErrorHandler: rm -f PB_Cwarn.c PB_Cwarn.o ln -s $(PBLASdir)/SRC/PTOOLS/PB_Cwarn.c PB_Cwarn.c $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas PB_Cwarn.c rm -f PB_Cabort.c PB_Cabort.o ln -s $(PBLASdir)/SRC/PTOOLS/PB_Cabort.c PB_Cabort.c $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas PB_Cabort.c $(PBLASTSTdir)/PSBLAS1TIM.dat: PSBLAS1TIM.dat cp PSBLAS1TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS1TIM.dat: PDBLAS1TIM.dat cp PDBLAS1TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS1TIM.dat: PCBLAS1TIM.dat cp PCBLAS1TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS1TIM.dat: PZBLAS1TIM.dat cp PZBLAS1TIM.dat $(PBLASTSTdir) $(spb1tim) : $(SCALAPACKLIB) $(spb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb1tim) $(spb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS1TIM.dat $(dpb1tim) : $(SCALAPACKLIB) $(dpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb1tim) $(dpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS1TIM.dat $(cpb1tim) : $(SCALAPACKLIB) $(cpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb1tim) $(cpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS1TIM.dat $(zpb1tim) : $(SCALAPACKLIB) $(zpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb1tim) $(zpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS1TIM.dat $(PBLASTSTdir)/PSBLAS2TIM.dat: PSBLAS2TIM.dat cp PSBLAS2TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS2TIM.dat: PDBLAS2TIM.dat cp PDBLAS2TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS2TIM.dat: PCBLAS2TIM.dat cp PCBLAS2TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS2TIM.dat: PZBLAS2TIM.dat cp PZBLAS2TIM.dat $(PBLASTSTdir) $(spb2tim) : $(SCALAPACKLIB) $(spb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb2tim) $(spb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS2TIM.dat $(dpb2tim) : $(SCALAPACKLIB) $(dpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb2tim) $(dpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS2TIM.dat $(cpb2tim) : $(SCALAPACKLIB) $(cpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb2tim) $(cpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS2TIM.dat $(zpb2tim) : $(SCALAPACKLIB) $(zpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb2tim) $(zpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS2TIM.dat $(PBLASTSTdir)/PSBLAS3TIM.dat: PSBLAS3TIM.dat cp PSBLAS3TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS3TIM.dat: PDBLAS3TIM.dat cp PDBLAS3TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS3TIM.dat: PCBLAS3TIM.dat cp PCBLAS3TIM.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS3TIM.dat: PZBLAS3TIM.dat cp PZBLAS3TIM.dat $(PBLASTSTdir) $(spb3tim) : $(SCALAPACKLIB) $(spb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb3tim) $(spb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS3TIM.dat $(dpb3tim) : $(SCALAPACKLIB) $(dpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb3tim) $(dpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS3TIM.dat $(cpb3tim) : $(SCALAPACKLIB) $(cpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb3tim) $(cpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS3TIM.dat $(zpb3tim) : $(SCALAPACKLIB) $(zpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb3tim) $(zpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS3TIM.dat $(spb1t): $(FRC) $(dpb1t): $(FRC) $(cpb1t): $(FRC) $(zpb1t): $(FRC) $(spb2t): $(FRC) $(dpb2t): $(FRC) $(cpb2t): $(FRC) $(zpb2t): $(FRC) $(spb3t): $(FRC) $(dpb3t): $(FRC) $(cpb3t): $(FRC) $(zpb3t): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f # .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas $*.c scalapack-1.8.0/PBLAS/TIMING/psblas3tim.f0000640000175000017500000016573010363532303017410 0ustar muammarmuammar PROGRAM PSBLA3TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA3TIM is the main timing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 56 lines: * 'Level 3 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS3TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 1.0E0 value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PSGEMM T put F for no test in the same column * PSSYMM T put F for no test in the same column * PSSYRK T put F for no test in the same column * PSSYR2K T put F for no test in the same column * PSTRMM T put F for no test in the same column * PSTRSM T put F for no test in the same column * PSGEADD T put F for no test in the same column * PSTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, REALSZ, TOTMEM, MEMSIZ, $ NSUBS REAL ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, REALSZ = 4, $ ONE = 1.0E+0, TOTMEM = 2000000, NSUBS = 8, $ MEMSIZ = TOTMEM / REALSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IMBA, IMBB, IMBC, $ IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, IPA, $ IPB, IPC, IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, $ IPREC, J, JA, JB, JC, K, L, M, MA, MB, MBA, $ MBB, MBC, MC, MEMREQD, MPA, MPB, MPC, MYCOL, $ MYROW, N, NA, NB, NBA, NBB, NBC, NC, NCOLA, $ NCOLB, NCOLC, NGRIDS, NOUT, NPCOL, NPROCS, $ NPROW, NQA, NQB, NQC, NROWA, NROWB, NROWC, $ NTESTS, OFFDA, OFFDC, RSRCA, RSRCB, RSRCC REAL ALPHA, BETA, SCALE DOUBLE PRECISION CFLOPS, NOPS, WFLOPS * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), BCHECK( NSUBS ), $ CCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCB( DLEN_ ), DESCC( DLEN_ ), $ IAVAL( MAXTESTS ), IBVAL( MAXTESTS ), $ ICVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBBVAL( MAXTESTS ), $ IMBCVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBBVAL( MAXTESTS ), INBCVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JBVAL( MAXTESTS ), $ JCVAL( MAXTESTS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PMDESCCHK, PMDIMCHK, PSBLA3TIMINFO, $ PSGEADD, PSGEMM, PSLAGEN, PSLASCAL, PSSYMM, $ PSSYR2K, PSSYRK, PSTRADD, PSTRMM, PSTRSM * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL3 EXTERNAL LSAME, PDOPBL3 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSGEMM ', 'PSSYMM ', 'PSSYRK ', $ 'PSSYR2K', 'PSTRMM ', 'PSTRSM ', $ 'PSGEADD', 'PSTRADD'/ DATA BCHECK/.TRUE., .TRUE., .FALSE., .TRUE., .TRUE., $ .TRUE., .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA3TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, 0, 0, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA IPC = IPB + DESCB( LLD_ )*NQB * * Check if sufficient memory. * MEMREQD = IPC + DESCC( LLD_ )*NQC - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PSGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF ELSE IF( L.EQ.2 ) THEN * * PSSYMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF ELSE IF( L.EQ.3 ) THEN * * PSSYRK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 ELSE IF( L.EQ.4 ) THEN * * PSSYR2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN * * PSTRMM, PSTRSM * NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 ELSE IF( L.EQ.7 .OR. L.EQ.8 ) THEN * * PSGEADD, PSTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PSSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 .OR. L.EQ.4 ) THEN * * PSSYRK, PSSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( ( L.EQ.6 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PSTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PSLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) IF( ( L.EQ.6 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PSLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PSLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF * END IF * IF( BCHECK( L ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PSLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 3 PBLAS routine * IF( L.EQ.1 ) THEN * * Test PSGEMM * NOPS = PDOPBL3( SNAMES( L ), M, N, K ) * CALL PB_TIMER( 1 ) CALL PSGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.2 ) THEN * * Test PSSYMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PSSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.3 ) THEN * * Test PSSYRK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PSSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.4 ) THEN * * Test PSSYR2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PSSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.5 ) THEN * * Test PSTRMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PSTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.6 ) THEN * * Test PSTRSM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PSTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.7 ) THEN * * Test PSGEADD * NOPS = PDOPBL3( SNAMES( L ), M, N, M ) * CALL PB_TIMER( 1 ) CALL PSGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.8 ) THEN * * Test PSTRADD * IF( LSAME( UPLO, 'U' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PSTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( L ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9985 FORMAT( 2X, 'End of Tests.' ) 9984 FORMAT( 2X, 'Tests started.' ) 9983 FORMAT( 5X, A, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PSBLA3TIM * END SUBROUTINE PSBLA3TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, $ IAM, NPROCS, ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS REAL ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA3TIMINFO get the needed startup information for timing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) REAL * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS3TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 3 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real single precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Beta : ', G16.6 ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PSBLA3TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/pdblastim.f0000640000175000017500000023015410363532303017277 0ustar muammarmuammar SUBROUTINE PDLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_DLASCAL, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_DLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_DLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_DLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_DLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_DLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PDLASCAL * END SUBROUTINE PDLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP DOUBLE PRECISION ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_DLAGEN, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PDLADOM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_DLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_DLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) ALPHA = DBLE( MAXMN ) * IF( IOFFDA.GE.0 ) THEN CALL PDLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PDLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PDLAGEN * END SUBROUTINE PDLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP DOUBLE PRECISION ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PDLADOM * END SUBROUTINE PB_DLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PB_DLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (input/output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_DLASCAL * END SUBROUTINE PB_DLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PB_DLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP DOUBLE PRECISION DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PB_DRAND EXTERNAL LSAME, PB_DRAND * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) .OR. LSAME( AFORM, 'C' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( ( LSAME( AFORM, 'S' ) ).OR.( LSAME( AFORM, 'H' ) ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = PB_DRAND( 0 ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = PB_DRAND( 0 ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = PB_DRAND( 0 ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = PB_DRAND( 0 ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * END IF * RETURN * * End of PB_DLAGEN * END DOUBLE PRECISION FUNCTION PB_DRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION PB_DRAN EXTERNAL PB_DRAN * .. * .. Executable Statements .. * PB_DRAND = ONE - TWO * PB_DRAN( IDUMM ) * RETURN * * End of PB_DRAND * END DOUBLE PRECISION FUNCTION PB_DRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648D+9, $ POW16 = 6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_DRAN = ( DBLE( IRAND( 1 ) ) + POW16 * DBLE( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_DRAN * END scalapack-1.8.0/PBLAS/TIMING/PCBLAS1TIM.dat0000640000175000017500000000247610363532303017246 0ustar muammarmuammar'Level 1 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS1TIM.SUMM' output file name (if any) 6 device out 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0E0, -3.0E0) value of ALPHA 2 number of tests problems 1000 1000 values of N 1000 1 values of M_X 1 1500 values of N_X 32 32 values of IMB_X 32 32 values of INB_X 32 32 values of MB_X 32 32 values of NB_X 0 0 values of RSRC_X 0 0 values of CSRC_X 1 1 values of IX 1 1 values of JX 1 1 values of INCX 1 1 values of M_Y 1000 1500 values of N_Y 32 32 values of IMB_Y 32 32 values of INB_Y 32 32 values of MB_Y 32 32 values of NB_Y 0 0 values of RSRC_Y 0 0 values of CSRC_Y 1 1 values of IY 1 1 values of JY 1 1 values of INCY PCSWAP T put F for no test in the same column PCSCAL T put F for no test in the same column PCSSCAL T put F for no test in the same column PCCOPY T put F for no test in the same column PCAXPY T put F for no test in the same column PCDOTU T put F for no test in the same column PCDOTC T put F for no test in the same column PSCNRM2 T put F for no test in the same column PSCASUM T put F for no test in the same column PCAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pzblastim.f0000640000175000017500000026352610363532303017336 0ustar muammarmuammar SUBROUTINE PZLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_INFOG2L, PB_ZLASCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_ZLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_ZLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_ZLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_ZLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_ZLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PZLASCAL * END SUBROUTINE PZLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP COMPLEX*16 ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PB_ZLAGEN, PXERBLA, $ PZLADOM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_ZLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_ZLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) IF( HERM ) THEN ALPHA = DCMPLX( DBLE( 2 * MAXMN ), ZERO ) ELSE ALPHA = DCMPLX( DBLE( NVIR ), DBLE( MAXMN ) ) END IF * IF( IOFFDA.GE.0 ) THEN CALL PZLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PZLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PZLAGEN * END SUBROUTINE PZLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP COMPLEX*16 ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PZLADOM * END SUBROUTINE PB_ZLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PB_ZLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_ZLASCAL * END SUBROUTINE PB_ZLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PB_ZLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP COMPLEX*16 DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PB_DRAND EXTERNAL LSAME, PB_DRAND * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( LSAME( AFORM, 'S' ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * ELSE IF( LSAME( AFORM, 'C' ) ) THEN * * Generate the conjugate transpose of the matrix that would be * normally generated. * II = 1 * DO 290 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 280 IK = II, II + IB - 1 * JJ = 1 * DO 270 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 260 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 260 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 270 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 280 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 290 CONTINUE * ELSE IF( LSAME( AFORM, 'H' ) ) THEN * * Generate a Hermitian matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 370 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 360 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 350 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 300 IK = II, II + IB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 300 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 310 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 310 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 320 IK = II, ITMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 320 CONTINUE * IF( ITMP.LE.( II + IB - 1 ) ) THEN DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) A( ITMP, JK ) = DCMPLX( DBLE( DUMMY ), $ ZERO ) END IF * DO 330 IK = ITMP + 1, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 330 CONTINUE * END IF * ELSE * DO 340 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 340 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 350 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 360 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 370 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 450 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 440 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 430 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 380 JK = JJ, JJ + JB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 380 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 390 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 390 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 400 JK = JJ, JTMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 400 CONTINUE * IF( JTMP.LE.( JJ + JB - 1 ) ) THEN DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) A( IK, JTMP ) = DCMPLX( DBLE( DUMMY ), $ ZERO ) END IF * DO 410 JK = JTMP + 1, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 410 CONTINUE * END IF * ELSE * DO 420 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 420 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 430 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 440 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 450 CONTINUE * END IF * END IF * RETURN * * End of PB_ZLAGEN * END DOUBLE PRECISION FUNCTION PB_DRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION PB_DRAN EXTERNAL PB_DRAN * .. * .. Executable Statements .. * PB_DRAND = ONE - TWO * PB_DRAN( IDUMM ) * RETURN * * End of PB_DRAND * END DOUBLE PRECISION FUNCTION PB_DRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648D+9, $ POW16 = 6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_DRAN = ( DBLE( IRAND( 1 ) ) + POW16 * DBLE( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_DRAN * END scalapack-1.8.0/PBLAS/TIMING/PSBLAS2TIM.dat0000640000175000017500000000577610363532303017275 0ustar muammarmuammar'Level 2 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS2TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 3.0E0 value of BETA 16 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of N 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_X 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_X 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCX 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_Y 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_Y 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCY PSGEMV T put F for no test in the same column PSSYMV T put F for no test in the same column PSTRMV T put F for no test in the same column PSTRSV T put F for no test in the same column PSGER T put F for no test in the same column PSSYR T put F for no test in the same column PSSYR2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/psblas1tim.f0000640000175000017500000012223610363532303017400 0ustar muammarmuammar PROGRAM PSBLA1TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA1TIM is the main timing program for the Level 1 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 40 lines: * 'Level 1 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS1TIM.SUMM' output file name (if any) * 6 device out * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PSSWAP T put F for no test in the same column * PSSCAL T put F for no test in the same column * PSCOPY T put F for no test in the same column * PSAXPY T put F for no test in the same column * PSDOT T put F for no test in the same column * PSNRM2 T put F for no test in the same column * PSASUM T put F for no test in the same column * PSAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, REALSZ, TOTMEM, MEMSIZ, $ NSUBS PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, REALSZ = 4, $ TOTMEM = 2000000, NSUBS = 8, $ MEMSIZ = TOTMEM / REALSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IMBX, IMBY, IMIDX, $ IMIDY, INBX, INBY, INCX, INCY, IPOSTX, IPOSTY, $ IPREX, IPREY, IPX, IPY, IX, IXSEED, IY, IYSEED, $ J, JX, JY, K, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY REAL ALPHA, PSCLR, PUSCLR DOUBLE PRECISION ADDS, CFLOPS, MULTS, NOPS, WFLOPS * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), IERR( 2 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ MBXVAL( MAXTESTS ), MBYVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCXVAL( MAXTESTS ), $ RSCYVAL( MAXTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PSAMAX, PSASUM, PSAXPY, $ PSBLA1TIMINFO, PSCOPY, PSDOT, PSLAGEN, PSNRM2, $ PSSCAL, PSSWAP, PVDESCCHK, PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSSWAP ', 'PSSCAL ', 'PSCOPY ', $ 'PSAXPY ', 'PSDOT ', 'PSNRM2 ', $ 'PSASUM ', 'PSAMAX '/ DATA YCHECK/.TRUE., .FALSE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE., .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA1TIMINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, IAM, NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9983 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) $ GO TO 40 * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( 1 ). * IPX = 1 IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) $ GO TO 30 * * Generate distributed matrices X and Y * CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PSSWAP * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PSSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PSSCAL * ADDS = 0.0D+0 MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PSSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PSCOPY * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PSCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PSAXPY * ADDS = DBLE( N ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PSAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PSDOT * ADDS = DBLE( N-1 ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PSDOT( N, PSCLR, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PSNRM2 * ADDS = DBLE( N-1 ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PSNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PSASUM * ADDS = DBLE( N - 1 ) MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PSASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PSAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = ADDS + MULTS * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9984 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9988 ) J END IF * 50 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9988 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9987 FORMAT( 2X, 'End of Tests.' ) 9986 FORMAT( 2X, 'Tests started.' ) 9985 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9984 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9983 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PSBLA1TIM * END SUBROUTINE PSBLA1TIMINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, $ RSCXVAL, CSCXVAL, IXVAL, JXVAL, $ INCXVAL, MYVAL, NYVAL, IMBYVAL, MBYVAL, $ INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, LDVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, LTEST, IAM, $ NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NGRIDS, NMAT, NOUT, $ NPROCS REAL ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA1TIMINFO get the needed startup information for timing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS1TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 1 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Timing of the real single precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9989 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9988 FORMAT( 2X, ' ', A, A8 ) * * End of PSBLA1TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/pcblas3tim.f0000640000175000017500000017435110363532303017367 0ustar muammarmuammar PROGRAM PCBLA3TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA3TIM is the main timing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 59 lines: * 'Level 3 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS3TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * (1.0E0, 0.0E0) value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PCGEMM T put F for no test in the same column * PCSYMM T put F for no test in the same column * PCHEMM T put F for no test in the same column * PCSYRK T put F for no test in the same column * PCHERK T put F for no test in the same column * PCSYR2K T put F for no test in the same column * PCHER2K T put F for no test in the same column * PCTRMM T put F for no test in the same column * PCTRSM T put F for no test in the same column * PCGEADD T put F for no test in the same column * PCTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, CPLXSZ, TOTMEM, MEMSIZ, $ NSUBS COMPLEX ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, CPLXSZ = 8, $ ONE = ( 1.0E+0, 0.0E+0 ), TOTMEM = 2000000, $ NSUBS = 11, MEMSIZ = TOTMEM / CPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IMBA, IMBB, IMBC, $ IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, IPA, $ IPB, IPC, IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, $ IPREC, J, JA, JB, JC, K, L, M, MA, MB, MBA, $ MBB, MBC, MC, MEMREQD, MPA, MPB, MPC, MYCOL, $ MYROW, N, NA, NB, NBA, NBB, NBC, NC, NCOLA, $ NCOLB, NCOLC, NGRIDS, NOUT, NPCOL, NPROCS, $ NPROW, NQA, NQB, NQC, NROWA, NROWB, NROWC, $ NTESTS, OFFDA, OFFDC, RSRCA, RSRCB, RSRCC DOUBLE PRECISION CFLOPS, NOPS, WFLOPS COMPLEX ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), BCHECK( NSUBS ), $ CCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCB( DLEN_ ), DESCC( DLEN_ ), $ IAVAL( MAXTESTS ), IBVAL( MAXTESTS ), $ ICVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBBVAL( MAXTESTS ), $ IMBCVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBBVAL( MAXTESTS ), INBCVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JBVAL( MAXTESTS ), $ JCVAL( MAXTESTS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PCBLA3TIMINFO, PCGEADD, PCGEMM, $ PCHEMM, PCHER2K, PCHERK, PCLAGEN, PCLASCAL, $ PCSYMM, PCSYR2K, PCSYRK, PCTRADD, PCTRMM, $ PCTRSM, PMDESCCHK, PMDIMCHK * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL3 EXTERNAL LSAME, PDOPBL3 * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, DBLE, MAX, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCGEMM ', 'PCSYMM ', 'PCHEMM ', $ 'PCSYRK ', 'PCHERK ', 'PCSYR2K', $ 'PCHER2K', 'PCTRMM ', 'PCTRSM ', $ 'PCGEADD', 'PCTRADD'/ DATA BCHECK/.TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .TRUE., $ .TRUE., .TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA3TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, 0, 0, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA IPC = IPB + DESCB( LLD_ )*NQB * * Check if sufficient memory. * MEMREQD = IPC + DESCC( LLD_ )*NQC - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PCGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF ELSE IF( L.EQ.2 .OR. L.EQ.3 ) THEN * * PCSYMM, PCHEMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF ELSE IF( L.EQ.4 .OR. L.EQ.5 ) THEN * * PCSYRK, PCHERK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 ELSE IF( L.EQ.6 .OR. L.EQ.7 ) THEN * * PCSYR2K, PCHER2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN * * PCTRMM, PCTRSM * NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 ELSE IF( L.EQ.10 .OR. L.EQ.11 ) THEN * * PCGEADD, PCTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Check special values of TRANSA for symmetric and * hermitian rank-k and rank-2k updates. * IF( L.EQ.4 .OR. L.EQ.6 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( L ), 'TRANSA' GO TO 30 END IF ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( L ), 'TRANSA' GO TO 30 END IF END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PCSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 ) THEN * * PCHEMM * AFORM = 'H' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.4 .OR. L.EQ.6 ) THEN * * PCSYRK, PCSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN * * PCHERK, PCHER2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'H' OFFDC = IC - JC * ELSE IF( ( L.EQ.9 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PCTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PCLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) IF( ( L.EQ.9 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PCLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PCLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF * END IF * IF( BCHECK( L ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PCLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 3 PBLAS routine * IF( L.EQ.1 ) THEN * * Test PCGEMM * NOPS = PDOPBL3( SNAMES( L ), M, N, K ) * CALL PB_TIMER( 1 ) CALL PCGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.2 ) THEN * * Test PCSYMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PCSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.3 ) THEN * * Test PCHEMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PCHEMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.4 ) THEN * * Test PCSYRK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PCSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.5 ) THEN * * Test PCHERK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PCHERK( UPLO, TRANSA, N, K, REAL( ALPHA ), $ MEM( IPA ), IA, JA, DESCA, REAL( BETA ), $ MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.6 ) THEN * * Test PCSYR2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PCSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.7 ) THEN * * Test PCHER2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PCHER2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, REAL( BETA ), MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.8 ) THEN * * Test PCTRMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PCTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.9 ) THEN * * Test PCTRSM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PCTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.10 ) THEN * * Test PCGEADD * NOPS = PDOPBL3( SNAMES( L ), M, N, M ) * CALL PB_TIMER( 1 ) CALL PCGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.11 ) THEN * * Test PCTRADD * IF( LSAME( UPLO, 'U' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PCTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( L ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9985 FORMAT( 2X, 'End of Tests.' ) 9984 FORMAT( 2X, 'Tests started.' ) 9983 FORMAT( 5X, A, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PCBLA3TIM * END SUBROUTINE PCBLA3TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, $ IAM, NPROCS, ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA3TIMINFO get the needed startup information for timing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eleven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS ) with NSUBS = 11. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS3TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 3 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex single precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PCBLA3TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/pcblastim.f0000640000175000017500000026313510363532303017303 0ustar muammarmuammar SUBROUTINE PCLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CLASCAL, PB_DESCTRANS, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_CLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_CLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_CLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_CLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_CLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PCLASCAL * END SUBROUTINE PCLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP COMPLEX ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_CLAGEN, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PCLADOM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_CLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_CLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) IF( HERM ) THEN ALPHA = CMPLX( REAL( 2 * MAXMN ), ZERO ) ELSE ALPHA = CMPLX( REAL( MAXMN ), REAL( MAXMN ) ) END IF * IF( IOFFDA.GE.0 ) THEN CALL PCLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PCLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PCLAGEN * END SUBROUTINE PCLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP COMPLEX ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PCLADOM * END SUBROUTINE PB_CLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PB_CLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_CLASCAL * END SUBROUTINE PB_CLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PB_CLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP COMPLEX DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME REAL PB_SRAND EXTERNAL LSAME, PB_SRAND * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( LSAME( AFORM, 'S' ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * ELSE IF( LSAME( AFORM, 'C' ) ) THEN * * Generate the conjugate transpose of the matrix that would be * normally generated. * II = 1 * DO 290 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 280 IK = II, II + IB - 1 * JJ = 1 * DO 270 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 260 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 260 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 270 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 280 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 290 CONTINUE * ELSE IF( LSAME( AFORM, 'H' ) ) THEN * * Generate a Hermitian matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 370 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 360 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 350 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 300 IK = II, II + IB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 300 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 310 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 310 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 320 IK = II, ITMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 320 CONTINUE * IF( ITMP.LE.( II + IB - 1 ) ) THEN DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) A( ITMP, JK ) = CMPLX( REAL( DUMMY ), $ ZERO ) END IF * DO 330 IK = ITMP + 1, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 330 CONTINUE * END IF * ELSE * DO 340 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 340 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 350 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 360 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 370 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 450 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 440 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 430 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 380 JK = JJ, JJ + JB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 380 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 390 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 390 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 400 JK = JJ, JTMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 400 CONTINUE * IF( JTMP.LE.( JJ + JB - 1 ) ) THEN DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) A( IK, JTMP ) = CMPLX( REAL( DUMMY ), $ ZERO ) END IF * DO 410 JK = JTMP + 1, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 410 CONTINUE * END IF * ELSE * DO 420 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 420 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 430 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 440 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 450 CONTINUE * END IF * END IF * RETURN * * End of PB_CLAGEN * END REAL FUNCTION PB_SRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. External Functions .. REAL PB_SRAN EXTERNAL PB_SRAN * .. * .. Executable Statements .. * PB_SRAND = ONE - TWO * PB_SRAN( IDUMM ) * RETURN * * End of PB_SRAND * END REAL FUNCTION PB_SRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648E+9, $ POW16 = 6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_SRAN = ( REAL( IRAND( 1 ) ) + POW16 * REAL( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_SRAN * END scalapack-1.8.0/PBLAS/TIMING/pcblas2tim.f0000640000175000017500000015462710363532303017372 0ustar muammarmuammar PROGRAM PCBLA2TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA2TIM is the main timing program for the Level 2 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 56 lines: * 'Level 2 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS2TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * (1.0E0, 0.0E0) value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PCGEMV T put F for no test in the same column * PCHEMV T put F for no test in the same column * PCTRMV T put F for no test in the same column * PCTRSV T put F for no test in the same column * PCGERU T put F for no test in the same column * PCGERC T put F for no test in the same column * PCHER T put F for no test in the same column * PCHER2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, CPLXSZ, TOTMEM, MEMSIZ, $ NSUBS COMPLEX ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, CPLXSZ = 8, $ ONE = ( 1.0E+0, 0.0E+0 ), TOTMEM = 2000000, $ NSUBS = 8, MEMSIZ = TOTMEM / CPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, INBA, $ INBX, INBY, INCX, INCY, IPA, IPOSTA, IPOSTX, $ IPOSTY, IPREA, IPREX, IPREY, IPX, IPY, IX, $ IXSEED, IY, IYSEED, J, JA, JX, JY, K, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY DOUBLE PRECISION CFLOPS, NOPS, WFLOPS COMPLEX ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), $ IAVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PCBLA2TIMINFO, PCGEMV, PCGERC, $ PCGERU, PCHEMV, PCHER, PCHER2, PCLAGEN, $ PCLASCAL, PCTRMV, PCTRSV, PMDESCCHK, PMDIMCHK, $ PVDESCCHK, PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL2 EXTERNAL LSAME, PDOPBL2 * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, DBLE, MAX, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCGEMV ', 'PCHEMV ', 'PCTRMV ', $ 'PCTRSV ', 'PCGERU ', 'PCGERC ', $ 'PCHER ', 'PCHER2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .TRUE. * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA2TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, MAXTESTS, $ NGRIDS, PVAL, MAXGRIDS, QVAL, MAXGRIDS, $ NBLOG, LTEST, IAM, NPROCS, ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = 1 IPX = IPA + DESCA( LLD_ ) * NQA IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 .OR. K.EQ.6 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ) THEN AFORM = 'H' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PCLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, $ NY, 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * IF( ( K.EQ.4 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PCLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PCLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF END IF * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 2 PBLAS routine * IF( K.EQ.1 ) THEN * * Test PCGEMV * CALL PB_TIMER( 1 ) CALL PCGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PCHEMV * CALL PB_TIMER( 1 ) CALL PCHEMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PCTRMV * CALL PB_TIMER( 1 ) CALL PCTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PCTRSV * CALL PB_TIMER( 1 ) CALL PCTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PCGERU * CALL PB_TIMER( 1 ) CALL PCGERU( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PCGERC * CALL PB_TIMER( 1 ) CALL PCGERC( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PCHER * CALL PB_TIMER( 1 ) CALL PCHER( UPLO, N, REAL( ALPHA ), MEM( IPX ), IX, $ JX, DESCX, INCX, MEM( IPA ), IA, JA, $ DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * * Test PCHER2 * CALL PB_TIMER( 1 ) CALL PCHER2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = PDOPBL2( SNAMES( K ), NROWA, NCOLA, 0, 0 ) * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9984 FORMAT( 2X, 'End of Tests.' ) 9983 FORMAT( 2X, 'Tests started.' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PCBLA2TIM * END SUBROUTINE PCBLA2TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA2TIMINFO get the needed startup information for timing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS2TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 2 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex single precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 1, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PCBLA2TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/psblas2tim.f0000640000175000017500000015332310363532303017402 0ustar muammarmuammar PROGRAM PSBLA2TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA2TIM is the main timing program for the Level 2 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 55 lines: * 'Level 2 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS2TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 1.0E0 value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PSGEMV T put F for no test in the same column * PSSYMV T put F for no test in the same column * PSTRMV T put F for no test in the same column * PSTRSV T put F for no test in the same column * PSGER T put F for no test in the same column * PSSYR T put F for no test in the same column * PSSYR2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, REALSZ, TOTMEM, MEMSIZ, $ NSUBS REAL ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, REALSZ = 4, $ ONE = 1.0E+0, TOTMEM = 2000000, NSUBS = 7, $ MEMSIZ = TOTMEM / REALSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, INBA, $ INBX, INBY, INCX, INCY, IPA, IPOSTA, IPOSTX, $ IPOSTY, IPREA, IPREX, IPREY, IPX, IPY, IX, $ IXSEED, IY, IYSEED, J, JA, JX, JY, K, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY REAL ALPHA, BETA, SCALE DOUBLE PRECISION CFLOPS, NOPS, WFLOPS * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), $ IAVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) REAL MEM( MEMSIZ ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PMDESCCHK, PMDIMCHK, PSBLA2TIMINFO, $ PSGEMV, PSGER, PSLAGEN, PSLASCAL, PSSYMV, $ PSSYR, PSSYR2, PSTRMV, PSTRSV, PVDESCCHK, $ PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL2 EXTERNAL LSAME, PDOPBL2 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSGEMV ', 'PSSYMV ', 'PSTRMV ', $ 'PSTRSV ', 'PSGER ', 'PSSYR ', $ 'PSSYR2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .TRUE. * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA2TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, MAXTESTS, $ NGRIDS, PVAL, MAXGRIDS, QVAL, MAXGRIDS, $ NBLOG, LTEST, IAM, NPROCS, ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = 1 IPX = IPA + DESCA( LLD_ ) * NQA IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ) THEN AFORM = 'S' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PSLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, $ NY, 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * IF( ( K.EQ.4 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PSLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PSLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF END IF * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 2 PBLAS routine * IF( K.EQ.1 ) THEN * * Test PSGEMV * CALL PB_TIMER( 1 ) CALL PSGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PSSYMV * CALL PB_TIMER( 1 ) CALL PSSYMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PSTRMV * CALL PB_TIMER( 1 ) CALL PSTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PSTRSV * CALL PB_TIMER( 1 ) CALL PSTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PSGER * CALL PB_TIMER( 1 ) CALL PSGER( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PSSYR * CALL PB_TIMER( 1 ) CALL PSSYR( UPLO, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PSSYR2 * CALL PB_TIMER( 1 ) CALL PSSYR2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = PDOPBL2( SNAMES( K ), NROWA, NCOLA, 0, 0 ) * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9984 FORMAT( 2X, 'End of Tests.' ) 9983 FORMAT( 2X, 'Tests started.' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PSBLA2TIM * END SUBROUTINE PSBLA2TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS REAL ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA2TIMINFO get the needed startup information for timing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least seven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) REAL * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS ) with NSUBS = 7. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS2TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 2 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real single precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 1, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Beta : ', G16.6 ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PSBLA2TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/PZBLAS2TIM.dat0000640000175000017500000000607710363532303017277 0ustar muammarmuammar'Level 2 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS2TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0D0, -4.0D0) value of ALPHA (3.0D0, -2.0D0) value of BETA 16 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of N 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_X 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_X 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCX 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_Y 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_Y 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCY PZGEMV T put F for no test in the same column PZHEMV T put F for no test in the same column PZTRMV T put F for no test in the same column PZTRSV T put F for no test in the same column PZGERU T put F for no test in the same column PZGERC T put F for no test in the same column PZHER T put F for no test in the same column PZHER2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pcblas1tim.f0000640000175000017500000012452410363532303017362 0ustar muammarmuammar PROGRAM PCBLA1TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA1TIM is the main timing program for the Level 1 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 42 lines: * 'Level 1 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS1TIM.SUMM' output file name (if any) * 6 device out * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PCSWAP T put F for no test in the same column * PCSCAL T put F for no test in the same column * PCSSCAL T put F for no test in the same column * PCCOPY T put F for no test in the same column * PCAXPY T put F for no test in the same column * PCDOTU T put F for no test in the same column * PCDOTC T put F for no test in the same column * PSCNRM2 T put F for no test in the same column * PSCASUM T put F for no test in the same column * PCAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, CPLXSZ, TOTMEM, MEMSIZ, $ NSUBS PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, CPLXSZ = 8, $ TOTMEM = 2000000, NSUBS = 10, $ MEMSIZ = TOTMEM / CPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IMBX, IMBY, IMIDX, $ IMIDY, INBX, INBY, INCX, INCY, IPOSTX, IPOSTY, $ IPREX, IPREY, IPX, IPY, IX, IXSEED, IY, IYSEED, $ J, JX, JY, K, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY REAL PUSCLR DOUBLE PRECISION ADDS, CFLOPS, MULTS, NOPS, WFLOPS COMPLEX ALPHA, PSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), IERR( 2 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ MBXVAL( MAXTESTS ), MBYVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCXVAL( MAXTESTS ), $ RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PCAMAX, PCAXPY, PCBLA1TIMINFO, $ PCCOPY, PCDOTC, PCDOTU, PCLAGEN, PCSCAL, $ PCSSCAL, PCSWAP, PSCASUM, PSCNRM2, PVDESCCHK, $ PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC DBLE, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCSWAP ', 'PCSCAL ', $ 'PCSSCAL ', 'PCCOPY', 'PCAXPY ', $ 'PCDOTU ', 'PCDOTC' , 'PSCNRM2', $ 'PSCASUM', 'PCAMAX '/ DATA YCHECK/.TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE., .TRUE., .TRUE., .FALSE., .FALSE., $ .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA1TIMINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, IAM, NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9983 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) $ GO TO 40 * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( 1 ). * IPX = 1 IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) $ GO TO 30 * * Generate distributed matrices X and Y * CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PCSWAP * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PCSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PCSCAL * ADDS = 0.0D+0 MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PCSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PCSSCAL * ADDS = 0.0D+0 MULTS = DBLE( 2*N ) CALL PB_TIMER( 1 ) CALL PCSSCAL( N, REAL( ALPHA ), MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PCCOPY * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PCCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PCAXPY * ADDS = DBLE( 2*N ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PCAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PCDOTU * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PCDOTU( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PCDOTC * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PCDOTC( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * * Test PSCNRM2 * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PSCNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.9 ) THEN * * Test PSCASUM * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PSCASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.10 ) THEN * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PCAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = ADDS + MULTS * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9984 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9988 ) J END IF * 50 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9988 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9987 FORMAT( 2X, 'End of Tests.' ) 9986 FORMAT( 2X, 'Tests started.' ) 9985 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9984 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9983 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PCBLA1TIM * END SUBROUTINE PCBLA1TIMINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, $ RSCXVAL, CSCXVAL, IXVAL, JXVAL, $ INCXVAL, MYVAL, NYVAL, IMBYVAL, MBYVAL, $ INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, LDVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, LTEST, IAM, $ NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NGRIDS, NMAT, NOUT, $ NPROCS COMPLEX ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA1TIMINFO get the needed startup information for timing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least ten. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS ) with NSUBS = 10. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS1TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 1 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Timing of the complex single precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9989 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9988 FORMAT( 2X, ' ', A, A8 ) * * End of PCBLA1TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/PZBLAS1TIM.dat0000640000175000017500000000247610363532303017275 0ustar muammarmuammar'Level 1 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS1TIM.SUMM' output file name (if any) 6 device out 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0D0, -3.0D0) value of ALPHA 2 number of tests problems 1000 1000 values of N 1000 1 values of M_X 1 1500 values of N_X 32 32 values of IMB_X 32 32 values of INB_X 32 32 values of MB_X 32 32 values of NB_X 0 0 values of RSRC_X 0 0 values of CSRC_X 1 1 values of IX 1 1 values of JX 1 1 values of INCX 1 1 values of M_Y 1000 1500 values of N_Y 32 32 values of IMB_Y 32 32 values of INB_Y 32 32 values of MB_Y 32 32 values of NB_Y 0 0 values of RSRC_Y 0 0 values of CSRC_Y 1 1 values of IY 1 1 values of JY 1 1 values of INCY PZSWAP T put F for no test in the same column PZSCAL T put F for no test in the same column PZDSCAL T put F for no test in the same column PZCOPY T put F for no test in the same column PZAXPY T put F for no test in the same column PZDOTU T put F for no test in the same column PZDOTC T put F for no test in the same column PDZNRM2 T put F for no test in the same column PDZASUM T put F for no test in the same column PZAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pdblas1tim.f0000640000175000017500000012226210363532303017360 0ustar muammarmuammar PROGRAM PDBLA1TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA1TIM is the main timing program for the Level 1 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 40 lines: * 'Level 1 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS1TIM.SUMM' output file name (if any) * 6 device out * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PDSWAP T put F for no test in the same column * PDSCAL T put F for no test in the same column * PDCOPY T put F for no test in the same column * PDAXPY T put F for no test in the same column * PDDOT T put F for no test in the same column * PDNRM2 T put F for no test in the same column * PDASUM T put F for no test in the same column * PDAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, DBLESZ, TOTMEM, MEMSIZ, $ NSUBS PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, DBLESZ = 8, $ TOTMEM = 2000000, NSUBS = 8, $ MEMSIZ = TOTMEM / DBLESZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IMBX, IMBY, IMIDX, $ IMIDY, INBX, INBY, INCX, INCY, IPOSTX, IPOSTY, $ IPREX, IPREY, IPX, IPY, IX, IXSEED, IY, IYSEED, $ J, JX, JY, K, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY DOUBLE PRECISION ADDS, ALPHA, CFLOPS, MULTS, NOPS, PSCLR, $ PUSCLR, WFLOPS * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), IERR( 2 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ MBXVAL( MAXTESTS ), MBYVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCXVAL( MAXTESTS ), $ RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PDAMAX, PDASUM, PDAXPY, $ PDBLA1TIMINFO, PDCOPY, PDDOT, PDLAGEN, PDNRM2, $ PDSCAL, PDSWAP, PVDESCCHK, PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDSWAP ', 'PDSCAL ', 'PDCOPY ', $ 'PDAXPY ', 'PDDOT ', 'PDNRM2 ', $ 'PDASUM ', 'PDAMAX '/ DATA YCHECK/.TRUE., .FALSE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE., .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA1TIMINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, IAM, NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9983 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) $ GO TO 40 * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( 1 ). * IPX = 1 IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) $ GO TO 30 * * Generate distributed matrices X and Y * CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PDSWAP * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PDSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PDSCAL * ADDS = 0.0D+0 MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PDSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PDCOPY * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PDCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PDAXPY * ADDS = DBLE( N ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PDAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PDDOT * ADDS = DBLE( N-1 ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PDDOT( N, PSCLR, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PDNRM2 * ADDS = DBLE( N-1 ) MULTS = DBLE( N ) CALL PB_TIMER( 1 ) CALL PDNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PDASUM * ADDS = DBLE( N - 1 ) MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PDASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PDAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = ADDS + MULTS * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9984 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9988 ) J END IF * 50 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9988 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9987 FORMAT( 2X, 'End of Tests.' ) 9986 FORMAT( 2X, 'Tests started.' ) 9985 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9984 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9983 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PDBLA1TIM * END SUBROUTINE PDBLA1TIMINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, $ RSCXVAL, CSCXVAL, IXVAL, JXVAL, $ INCXVAL, MYVAL, NYVAL, IMBYVAL, MBYVAL, $ INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, LDVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, LTEST, IAM, $ NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NGRIDS, NMAT, NOUT, $ NPROCS DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA1TIMINFO get the needed startup information for timing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS1TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 1 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Timing of the real double precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9989 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9988 FORMAT( 2X, ' ', A, A8 ) * * End of PDBLA1TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/pzblas3tim.f0000640000175000017500000017436110363532303017417 0ustar muammarmuammar PROGRAM PZBLA3TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA3TIM is the main timing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 59 lines: * 'Level 3 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS3TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * (1.0D0, 0.0D0) value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PZGEMM T put F for no test in the same column * PZSYMM T put F for no test in the same column * PZHEMM T put F for no test in the same column * PZSYRK T put F for no test in the same column * PZHERK T put F for no test in the same column * PZSYR2K T put F for no test in the same column * PZHER2K T put F for no test in the same column * PZTRMM T put F for no test in the same column * PZTRSM T put F for no test in the same column * PZGEADD T put F for no test in the same column * PZTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, ZPLXSZ, TOTMEM, MEMSIZ, $ NSUBS COMPLEX*16 ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, ZPLXSZ = 16, $ ONE = ( 1.0D+0, 0.0D+0 ), TOTMEM = 2000000, $ NSUBS = 11, MEMSIZ = TOTMEM / ZPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IMBA, IMBB, IMBC, $ IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, IPA, $ IPB, IPC, IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, $ IPREC, J, JA, JB, JC, K, L, M, MA, MB, MBA, $ MBB, MBC, MC, MEMREQD, MPA, MPB, MPC, MYCOL, $ MYROW, N, NA, NB, NBA, NBB, NBC, NC, NCOLA, $ NCOLB, NCOLC, NGRIDS, NOUT, NPCOL, NPROCS, $ NPROW, NQA, NQB, NQC, NROWA, NROWB, NROWC, $ NTESTS, OFFDA, OFFDC, RSRCA, RSRCB, RSRCC DOUBLE PRECISION CFLOPS, NOPS, WFLOPS COMPLEX*16 ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), BCHECK( NSUBS ), $ CCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCB( DLEN_ ), DESCC( DLEN_ ), $ IAVAL( MAXTESTS ), IBVAL( MAXTESTS ), $ ICVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBBVAL( MAXTESTS ), $ IMBCVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBBVAL( MAXTESTS ), INBCVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JBVAL( MAXTESTS ), $ JCVAL( MAXTESTS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PMDESCCHK, PMDIMCHK, PZBLA3TIMINFO, $ PZGEADD, PZGEMM, PZHEMM, PZHER2K, PZHERK, $ PZLAGEN, PZLASCAL, PZSYMM, PZSYR2K, PZSYRK, $ PZTRADD, PZTRMM, PZTRSM * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL3 EXTERNAL LSAME, PDOPBL3 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZGEMM ', 'PZSYMM ', 'PZHEMM ', $ 'PZSYRK ', 'PZHERK ', 'PZSYR2K', $ 'PZHER2K', 'PZTRMM ', 'PZTRSM ', $ 'PZGEADD', 'PZTRADD'/ DATA BCHECK/.TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .TRUE., $ .TRUE., .TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA3TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, 0, 0, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA IPC = IPB + DESCB( LLD_ )*NQB * * Check if sufficient memory. * MEMREQD = IPC + DESCC( LLD_ )*NQC - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PZGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF ELSE IF( L.EQ.2 .OR. L.EQ.3 ) THEN * * PZSYMM, PZHEMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF ELSE IF( L.EQ.4 .OR. L.EQ.5 ) THEN * * PZSYRK, PZHERK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 ELSE IF( L.EQ.6 .OR. L.EQ.7 ) THEN * * PZSYR2K, PZHER2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN * * PZTRMM, PZTRSM * NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 ELSE IF( L.EQ.10 .OR. L.EQ.11 ) THEN * * PZGEADD, PZTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Check special values of TRANSA for symmetric and * hermitian rank-k and rank-2k updates. * IF( L.EQ.4 .OR. L.EQ.6 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( L ), 'TRANSA' GO TO 30 END IF ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( L ), 'TRANSA' GO TO 30 END IF END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PZSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 ) THEN * * PZHEMM * AFORM = 'H' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.4 .OR. L.EQ.6 ) THEN * * PZSYRK, PZSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN * * PZHERK, PZHER2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'H' OFFDC = IC - JC * ELSE IF( ( L.EQ.9 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PZTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PZLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) IF( ( L.EQ.9 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PZLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PZLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF * END IF * IF( BCHECK( L ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PZLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 3 PBLAS routine * IF( L.EQ.1 ) THEN * * Test PZGEMM * NOPS = PDOPBL3( SNAMES( L ), M, N, K ) * CALL PB_TIMER( 1 ) CALL PZGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.2 ) THEN * * Test PZSYMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PZSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.3 ) THEN * * Test PZHEMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PZHEMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.4 ) THEN * * Test PZSYRK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PZSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.5 ) THEN * * Test PZHERK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PZHERK( UPLO, TRANSA, N, K, DBLE( ALPHA ), $ MEM( IPA ), IA, JA, DESCA, DBLE( BETA ), $ MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.6 ) THEN * * Test PZSYR2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PZSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.7 ) THEN * * Test PZHER2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PZHER2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, DBLE( BETA ), MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.8 ) THEN * * Test PZTRMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PZTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.9 ) THEN * * Test PZTRSM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PZTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.10 ) THEN * * Test PZGEADD * NOPS = PDOPBL3( SNAMES( L ), M, N, M ) * CALL PB_TIMER( 1 ) CALL PZGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.11 ) THEN * * Test PZTRADD * IF( LSAME( UPLO, 'U' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PZTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( L ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9985 FORMAT( 2X, 'End of Tests.' ) 9984 FORMAT( 2X, 'Tests started.' ) 9983 FORMAT( 5X, A, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PZBLA3TIM * END SUBROUTINE PZBLA3TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, $ IAM, NPROCS, ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA3TIMINFO get the needed startup information for timing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eleven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX*16 * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS ) with NSUBS = 11. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS3TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 3 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex double precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PZBLA3TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/PB_Cabort.c0000640000175000017500000001024710363532303017107 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cabort( int ICTXT, char * ROUT, int INFO ) #else void PB_Cabort( ICTXT, ROUT, INFO ) /* * .. Scalar Arguments .. */ int ICTXT, INFO; /* * .. Array Arguments .. */ char * ROUT; #endif { /* * Purpose * ======= * * PB_Cabort is an error handler for the PBLAS routines. This routine * displays an error message on stderr by calling PB_Cwarn, and halts * execution by calling Cblacs_abort(). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * INFO (local input) INTEGER * The error code computed by the calling PBLAS routine. * = 0: no error found * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int mycol, myrow, npcol, nprow; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &INFO ) ) return; #endif if( INFO < 0 ) { /* * Display an error message */ if( INFO < DESCMULT ) PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d had an illegal value", -INFO ); else PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d, entry number %d had an illegal value", (-INFO) / DESCMULT, (-INFO) % DESCMULT ); } else { /* * Error code is incorrect, it should be negative */ PB_Cwarn( ICTXT, -1, ROUT, "Positive error code %d returned by %s!!!", INFO ); } Cblacs_abort( ICTXT, INFO ); /* * End of PB_Cabort */ } scalapack-1.8.0/PBLAS/TIMING/PB_Cwarn.c0000640000175000017500000001125110363532303016743 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cwarn( int ICTXT, int LINE, char * ROUT, char * FORM, ... ) #else void PB_Cwarn( va_alist ) va_dcl #endif { /* * Purpose * ======= * * PB_Cwarn is an error handler for the PBLAS routines. This routine * displays an error message on stderr. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * LINE (local input) INTEGER * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a valid line number, * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * FORM (local input) pointer to CHAR * On entry, FORM is a control string specifying the format * conversion of its following arguments. * * ... (local input) * On entry, FORM is a control string specifying the format * On entry, the expressions that are to be evaluated and con- * verted according to the formats in the control string FORM * and then placed in the output stream. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ va_list argptr; int iam, mycol, myrow, npcol, nprow; char cline[100]; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif #ifdef __STDC__ va_start( argptr, FORM ); #else char * ROUT, * FORM; int ICTXT, LINE; /* .. * .. Executable Statements .. * */ va_start( argptr ); ICTXT = va_arg( argptr, int ); LINE = va_arg( argptr, int ); ROUT = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &ICTXT ) ) return; #endif vsprintf( cline, FORM, argptr ); va_end( argptr ); Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); if( nprow != -1 ) iam = Cblacs_pnum( ICTXT, myrow, mycol ); else iam = -1; /* * Display an error message */ if( LINE <= 0 ) (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", in routine ", ROUT ); else (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", on line ", LINE, " of routine ", ROUT ); /* * End of PB_Cwarn */ } scalapack-1.8.0/PBLAS/TIMING/PDBLAS3TIM.dat0000640000175000017500000000432410363532303017243 0ustar muammarmuammar'Level 3 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS3TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 3.0D0 value of BETA 8 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 values of N 3 4 3 4 3 4 3 4 values of K 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 values of M_B 6 10 6 10 6 10 6 10 values of N_B 2 5 2 5 2 5 2 5 values of IMB_B 2 5 2 5 2 5 2 5 values of INB_B 2 5 2 5 2 5 2 5 values of MB_B 2 5 2 5 2 5 2 5 values of NB_B 0 1 0 1 0 1 0 1 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 1 1 1 1 1 1 1 1 values of IB 1 1 1 1 1 1 1 1 values of JB 6 10 6 10 6 10 6 10 values of M_C 6 10 6 10 6 10 6 10 values of N_C 2 5 2 5 2 5 2 5 values of IMB_C 2 5 2 5 2 5 2 5 values of INB_C 2 5 2 5 2 5 2 5 values of MB_C 2 5 2 5 2 5 2 5 values of NB_C 0 1 0 1 0 1 0 1 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 1 1 1 1 1 1 1 1 values of IC 1 1 1 1 1 1 1 1 values of JC PDGEMM T put F for no test in the same column PDSYMM T put F for no test in the same column PDSYRK T put F for no test in the same column PDSYR2K T put F for no test in the same column PDTRMM T put F for no test in the same column PDTRSM T put F for no test in the same column PDGEADD T put F for no test in the same column PDTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pdblas3tim.f0000640000175000017500000016573110363532303017372 0ustar muammarmuammar PROGRAM PDBLA3TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA3TIM is the main timing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 56 lines: * 'Level 3 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS3TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 1.0D0 value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PDGEMM T put F for no test in the same column * PDSYMM T put F for no test in the same column * PDSYRK T put F for no test in the same column * PDSYR2K T put F for no test in the same column * PDTRMM T put F for no test in the same column * PDTRSM T put F for no test in the same column * PDGEADD T put F for no test in the same column * PDTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, DBLESZ, TOTMEM, MEMSIZ, $ NSUBS DOUBLE PRECISION ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, DBLESZ = 8, $ ONE = 1.0D+0, TOTMEM = 2000000, NSUBS = 8, $ MEMSIZ = TOTMEM / DBLESZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IMBA, IMBB, IMBC, $ IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, IPA, $ IPB, IPC, IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, $ IPREC, J, JA, JB, JC, K, L, M, MA, MB, MBA, $ MBB, MBC, MC, MEMREQD, MPA, MPB, MPC, MYCOL, $ MYROW, N, NA, NB, NBA, NBB, NBC, NC, NCOLA, $ NCOLB, NCOLC, NGRIDS, NOUT, NPCOL, NPROCS, $ NPROW, NQA, NQB, NQC, NROWA, NROWB, NROWC, $ NTESTS, OFFDA, OFFDC, RSRCA, RSRCB, RSRCC DOUBLE PRECISION ALPHA, BETA, CFLOPS, NOPS, SCALE, WFLOPS * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), BCHECK( NSUBS ), $ CCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCB( DLEN_ ), DESCC( DLEN_ ), $ IAVAL( MAXTESTS ), IBVAL( MAXTESTS ), $ ICVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBBVAL( MAXTESTS ), $ IMBCVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBBVAL( MAXTESTS ), INBCVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JBVAL( MAXTESTS ), $ JCVAL( MAXTESTS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PDBLA3TIMINFO, PDGEADD, PDGEMM, $ PDLAGEN, PDLASCAL, PDSYMM, PDSYR2K, PDSYRK, $ PDTRADD, PDTRMM, PDTRSM, PMDESCCHK, PMDIMCHK * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL3 EXTERNAL LSAME, PDOPBL3 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDGEMM ', 'PDSYMM ', 'PDSYRK ', $ 'PDSYR2K', 'PDTRMM ', 'PDTRSM ', $ 'PDGEADD', 'PDTRADD'/ DATA BCHECK/.TRUE., .TRUE., .FALSE., .TRUE., .TRUE., $ .TRUE., .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA3TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, 0, 0, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA IPC = IPB + DESCB( LLD_ )*NQB * * Check if sufficient memory. * MEMREQD = IPC + DESCC( LLD_ )*NQC - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PDGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF ELSE IF( L.EQ.2 ) THEN * * PDSYMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF ELSE IF( L.EQ.3 ) THEN * * PDSYRK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 ELSE IF( L.EQ.4 ) THEN * * PDSYR2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN * * PDTRMM, PDTRSM * NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 ELSE IF( L.EQ.7 .OR. L.EQ.8 ) THEN * * PDGEADD, PDTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PDSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 .OR. L.EQ.4 ) THEN * * PDSYRK, PDSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( ( L.EQ.6 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PDTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PDLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) IF( ( L.EQ.6 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PDLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PDLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF * END IF * IF( BCHECK( L ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PDLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 3 PBLAS routine * IF( L.EQ.1 ) THEN * * Test PDGEMM * NOPS = PDOPBL3( SNAMES( L ), M, N, K ) * CALL PB_TIMER( 1 ) CALL PDGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.2 ) THEN * * Test PDSYMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PDSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.3 ) THEN * * Test PDSYRK * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PDSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.4 ) THEN * * Test PDSYR2K * NOPS = PDOPBL3( SNAMES( L ), N, N, K ) * CALL PB_TIMER( 1 ) CALL PDSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.5 ) THEN * * Test PDTRMM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PDTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.6 ) THEN * * Test PDTRSM * IF( LSAME( SIDE, 'L' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PDTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.7 ) THEN * * Test PDGEADD * NOPS = PDOPBL3( SNAMES( L ), M, N, M ) * CALL PB_TIMER( 1 ) CALL PDGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) CALL PB_TIMER( 1 ) * ELSE IF( L.EQ.8 ) THEN * * Test PDTRADD * IF( LSAME( UPLO, 'U' ) ) THEN NOPS = PDOPBL3( SNAMES( L ), M, N, 0 ) ELSE NOPS = PDOPBL3( SNAMES( L ), M, N, 1 ) END IF * CALL PB_TIMER( 1 ) CALL PDTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( L ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9985 FORMAT( 2X, 'End of Tests.' ) 9984 FORMAT( 2X, 'Tests started.' ) 9983 FORMAT( 5X, A, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PDBLA3TIM * END SUBROUTINE PDBLA3TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, $ IAM, NPROCS, ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA3TIMINFO get the needed startup information for timing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) DOUBLE PRECISION * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS3TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 3 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real double precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Beta : ', G16.6 ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PDBLA3TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/PCBLAS2TIM.dat0000640000175000017500000000607710363532303017250 0ustar muammarmuammar'Level 2 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS2TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0E0, -4.0E0) value of ALPHA (3.0E0, -2.0E0) value of BETA 16 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of N 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_X 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_X 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCX 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_Y 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_Y 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCY PCGEMV T put F for no test in the same column PCHEMV T put F for no test in the same column PCTRMV T put F for no test in the same column PCTRSV T put F for no test in the same column PCGERU T put F for no test in the same column PCGERC T put F for no test in the same column PCHER T put F for no test in the same column PCHER2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pzblas1tim.f0000640000175000017500000012447710363532303017420 0ustar muammarmuammar PROGRAM PZBLA1TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA1TIM is the main timing program for the Level 1 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 42 lines: * 'Level 1 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS1TIM.SUMM' output file name (if any) * 6 device out * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PZSWAP T put F for no test in the same column * PZSCAL T put F for no test in the same column * PZDSCAL T put F for no test in the same column * PZCOPY T put F for no test in the same column * PZAXPY T put F for no test in the same column * PZDOTU T put F for no test in the same column * PZDOTC T put F for no test in the same column * PDZNRM2 T put F for no test in the same column * PDZASUM T put F for no test in the same column * PZAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, ZPLXSZ, TOTMEM, MEMSIZ, $ NSUBS PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, ZPLXSZ = 16, $ TOTMEM = 2000000, NSUBS = 10, $ MEMSIZ = TOTMEM / ZPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IMBX, IMBY, IMIDX, $ IMIDY, INBX, INBY, INCX, INCY, IPOSTX, IPOSTY, $ IPREX, IPREY, IPX, IPY, IX, IXSEED, IY, IYSEED, $ J, JX, JY, K, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY DOUBLE PRECISION ADDS, CFLOPS, MULTS, NOPS, PUSCLR, WFLOPS COMPLEX*16 ALPHA, PSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), IERR( 2 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ MBXVAL( MAXTESTS ), MBYVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCXVAL( MAXTESTS ), $ RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PDZASUM, PDZNRM2, PVDESCCHK, $ PVDIMCHK, PZAMAX, PZAXPY, PZBLA1TIMINFO, $ PZCOPY, PZDOTC, PZDOTU, PZDSCAL, PZLAGEN, $ PZSCAL, PZSWAP * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZSWAP ', 'PZSCAL ', $ 'PZDSCAL ', 'PZCOPY', 'PZAXPY ', $ 'PZDOTU ', 'PZDOTC' , 'PDZNRM2', $ 'PDZASUM', 'PZAMAX '/ DATA YCHECK/.TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE., .TRUE., .TRUE., .FALSE., .FALSE., $ .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA1TIMINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, IAM, NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9983 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) $ GO TO 40 * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( 1 ). * IPX = 1 IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) $ GO TO 30 * * Generate distributed matrices X and Y * CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PZSWAP * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PZSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PZSCAL * ADDS = 0.0D+0 MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PZSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PZDSCAL * ADDS = 0.0D+0 MULTS = DBLE( 2*N ) CALL PB_TIMER( 1 ) CALL PZDSCAL( N, DBLE( ALPHA ), MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PZCOPY * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PZCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PZAXPY * ADDS = DBLE( 2*N ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PZAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PZDOTU * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PZDOTU( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PZDOTC * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PZDOTC( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * * Test PDZNRM2 * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = DBLE( 6*N ) CALL PB_TIMER( 1 ) CALL PDZNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.9 ) THEN * * Test PDZASUM * ADDS = DBLE( 2 * ( N - 1 ) ) MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PDZASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.10 ) THEN * ADDS = 0.0D+0 MULTS = 0.0D+0 CALL PB_TIMER( 1 ) CALL PZAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = ADDS + MULTS * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9984 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9988 ) J END IF * 50 CONTINUE * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9987 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9988 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9987 FORMAT( 2X, 'End of Tests.' ) 9986 FORMAT( 2X, 'Tests started.' ) 9985 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9984 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9983 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PZBLA1TIM * END SUBROUTINE PZBLA1TIMINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, $ RSCXVAL, CSCXVAL, IXVAL, JXVAL, $ INCXVAL, MYVAL, NYVAL, IMBYVAL, MBYVAL, $ INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, LDVAL, NGRIDS, $ PVAL, LDPVAL, QVAL, LDQVAL, LTEST, IAM, $ NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NGRIDS, NMAT, NOUT, $ NPROCS COMPLEX*16 ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA1TIMINFO get the needed startup information for timing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least ten. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS ) with NSUBS = 10. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS1TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 1 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Timing of the complex double precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9989 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9989 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9988 FORMAT( 2X, ' ', A, A8 ) * * End of PZBLA1TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/PSBLAS3TIM.dat0000640000175000017500000000432410363532303017262 0ustar muammarmuammar'Level 3 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS3TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 3.0E0 value of BETA 8 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 values of N 3 4 3 4 3 4 3 4 values of K 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 values of M_B 6 10 6 10 6 10 6 10 values of N_B 2 5 2 5 2 5 2 5 values of IMB_B 2 5 2 5 2 5 2 5 values of INB_B 2 5 2 5 2 5 2 5 values of MB_B 2 5 2 5 2 5 2 5 values of NB_B 0 1 0 1 0 1 0 1 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 1 1 1 1 1 1 1 1 values of IB 1 1 1 1 1 1 1 1 values of JB 6 10 6 10 6 10 6 10 values of M_C 6 10 6 10 6 10 6 10 values of N_C 2 5 2 5 2 5 2 5 values of IMB_C 2 5 2 5 2 5 2 5 values of INB_C 2 5 2 5 2 5 2 5 values of MB_C 2 5 2 5 2 5 2 5 values of NB_C 0 1 0 1 0 1 0 1 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 1 1 1 1 1 1 1 1 values of IC 1 1 1 1 1 1 1 1 values of JC PSGEMM T put F for no test in the same column PSSYMM T put F for no test in the same column PSSYRK T put F for no test in the same column PSSYR2K T put F for no test in the same column PSTRMM T put F for no test in the same column PSTRSM T put F for no test in the same column PSGEADD T put F for no test in the same column PSTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/PCBLAS3TIM.dat0000640000175000017500000000456310363532303017247 0ustar muammarmuammar'Level 3 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS3TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0E0, -4.0E0) value of ALPHA (3.0E0, -2.0E0) value of BETA 8 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 values of N 3 4 3 4 3 4 3 4 values of K 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 values of M_B 6 10 6 10 6 10 6 10 values of N_B 2 5 2 5 2 5 2 5 values of IMB_B 2 5 2 5 2 5 2 5 values of INB_B 2 5 2 5 2 5 2 5 values of MB_B 2 5 2 5 2 5 2 5 values of NB_B 0 1 0 1 0 1 0 1 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 1 1 1 1 1 1 1 1 values of IB 1 1 1 1 1 1 1 1 values of JB 6 10 6 10 6 10 6 10 values of M_C 6 10 6 10 6 10 6 10 values of N_C 2 5 2 5 2 5 2 5 values of IMB_C 2 5 2 5 2 5 2 5 values of INB_C 2 5 2 5 2 5 2 5 values of MB_C 2 5 2 5 2 5 2 5 values of NB_C 0 1 0 1 0 1 0 1 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 1 1 1 1 1 1 1 1 values of IC 1 1 1 1 1 1 1 1 values of JC PCGEMM T put F for no test in the same column PCSYMM T put F for no test in the same column PCHEMM T put F for no test in the same column PCSYRK T put F for no test in the same column PCHERK T put F for no test in the same column PCSYR2K T put F for no test in the same column PCHER2K T put F for no test in the same column PCTRMM T put F for no test in the same column PCTRSM T put F for no test in the same column PCGEADD T put F for no test in the same column PCTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/PSBLAS1TIM.dat0000640000175000017500000000232710363532303017261 0ustar muammarmuammar'Level 1 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS1TIM.SUMM' output file name (if any) 6 device out 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 2 number of tests problems 1000 1000 values of N 1000 1 values of M_X 1 1500 values of N_X 32 32 values of IMB_X 32 32 values of INB_X 32 32 values of MB_X 32 32 values of NB_X 0 0 values of RSRC_X 0 0 values of CSRC_X 1 1 values of IX 1 1 values of JX 1 1 values of INCX 1 1 values of M_Y 1000 1500 values of N_Y 32 32 values of IMB_Y 32 32 values of INB_Y 32 32 values of MB_Y 32 32 values of NB_Y 0 0 values of RSRC_Y 0 0 values of CSRC_Y 1 1 values of IY 1 1 values of JY 1 1 values of INCY PSSWAP T put F for no test in the same column PSSCAL T put F for no test in the same column PSCOPY T put F for no test in the same column PSAXPY T put F for no test in the same column PSDOT T put F for no test in the same column PSNRM2 T put F for no test in the same column PSASUM T put F for no test in the same column PSAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/PDBLAS2TIM.dat0000640000175000017500000000577610363532303017256 0ustar muammarmuammar'Level 2 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS2TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 3.0D0 value of BETA 16 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 values of N 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_X 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_X 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_X 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCX 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of M_Y 6 10 6 10 6 10 6 10 6 10 6 10 6 10 6 10 values of N_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of IMB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of INB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of MB_Y 2 5 2 5 2 5 2 5 2 5 2 5 2 5 2 5 values of NB_Y 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 values of INCY PDGEMV T put F for no test in the same column PDSYMV T put F for no test in the same column PDTRMV T put F for no test in the same column PDTRSV T put F for no test in the same column PDGER T put F for no test in the same column PDSYR T put F for no test in the same column PDSYR2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/PZBLAS3TIM.dat0000640000175000017500000000456310363532303017276 0ustar muammarmuammar'Level 3 PBLAS, Timing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS3TIM.SUMM' output file name (if any) 6 device out 10 value of the logical computational blocksize NB 1 number of process grids (ordered pairs of P & Q) 2 2 1 4 2 3 8 values of P 2 2 4 1 3 2 1 values of Q (2.0D0, -4.0D0) value of ALPHA (3.0D0, -2.0D0) value of BETA 8 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 3 4 3 4 3 4 3 4 values of M 3 4 3 4 3 4 3 4 values of N 3 4 3 4 3 4 3 4 values of K 6 10 6 10 6 10 6 10 values of M_A 6 10 6 10 6 10 6 10 values of N_A 2 5 2 5 2 5 2 5 values of IMB_A 2 5 2 5 2 5 2 5 values of INB_A 2 5 2 5 2 5 2 5 values of MB_A 2 5 2 5 2 5 2 5 values of NB_A 0 1 0 1 0 1 0 1 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 1 1 1 1 1 1 1 1 values of IA 1 1 1 1 1 1 1 1 values of JA 6 10 6 10 6 10 6 10 values of M_B 6 10 6 10 6 10 6 10 values of N_B 2 5 2 5 2 5 2 5 values of IMB_B 2 5 2 5 2 5 2 5 values of INB_B 2 5 2 5 2 5 2 5 values of MB_B 2 5 2 5 2 5 2 5 values of NB_B 0 1 0 1 0 1 0 1 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 1 1 1 1 1 1 1 1 values of IB 1 1 1 1 1 1 1 1 values of JB 6 10 6 10 6 10 6 10 values of M_C 6 10 6 10 6 10 6 10 values of N_C 2 5 2 5 2 5 2 5 values of IMB_C 2 5 2 5 2 5 2 5 values of INB_C 2 5 2 5 2 5 2 5 values of MB_C 2 5 2 5 2 5 2 5 values of NB_C 0 1 0 1 0 1 0 1 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 1 1 1 1 1 1 1 1 values of IC 1 1 1 1 1 1 1 1 values of JC PZGEMM T put F for no test in the same column PZSYMM T put F for no test in the same column PZHEMM T put F for no test in the same column PZSYRK T put F for no test in the same column PZHERK T put F for no test in the same column PZSYR2K T put F for no test in the same column PZHER2K T put F for no test in the same column PZTRMM T put F for no test in the same column PZTRSM T put F for no test in the same column PZGEADD T put F for no test in the same column PZTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TIMING/pzblas2tim.f0000640000175000017500000015463710363532303017422 0ustar muammarmuammar PROGRAM PZBLA2TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA2TIM is the main timing program for the Level 2 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 56 lines: * 'Level 2 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS2TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * (1.0D0, 0.0D0) value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PZGEMV T put F for no test in the same column * PZHEMV T put F for no test in the same column * PZTRMV T put F for no test in the same column * PZTRSV T put F for no test in the same column * PZGERU T put F for no test in the same column * PZGERC T put F for no test in the same column * PZHER T put F for no test in the same column * PZHER2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, ZPLXSZ, TOTMEM, MEMSIZ, $ NSUBS COMPLEX*16 ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, ZPLXSZ = 16, $ ONE = ( 1.0D+0, 0.0D+0 ), TOTMEM = 2000000, $ NSUBS = 8, MEMSIZ = TOTMEM / ZPLXSZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, INBA, $ INBX, INBY, INCX, INCY, IPA, IPOSTA, IPOSTX, $ IPOSTY, IPREA, IPREX, IPREY, IPX, IPY, IX, $ IXSEED, IY, IYSEED, J, JA, JX, JY, K, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY DOUBLE PRECISION CFLOPS, NOPS, WFLOPS COMPLEX*16 ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), $ IAVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), WTIME( 1 ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PMDESCCHK, PMDIMCHK, PVDESCCHK, $ PVDIMCHK, PZBLA2TIMINFO, PZGEMV, PZGERC, $ PZGERU, PZHEMV, PZHER, PZHER2, PZLAGEN, $ PZLASCAL, PZTRMV, PZTRSV * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL2 EXTERNAL LSAME, PDOPBL2 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZGEMV ', 'PZHEMV ', 'PZTRMV ', $ 'PZTRSV ', 'PZGERU ', 'PZGERC ', $ 'PZHER ', 'PZHER2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .TRUE. * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA2TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, MAXTESTS, $ NGRIDS, PVAL, MAXGRIDS, QVAL, MAXGRIDS, $ NBLOG, LTEST, IAM, NPROCS, ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = 1 IPX = IPA + DESCA( LLD_ ) * NQA IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 .OR. K.EQ.6 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ) THEN AFORM = 'H' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PZLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, $ NY, 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * IF( ( K.EQ.4 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PZLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PZLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF END IF * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 2 PBLAS routine * IF( K.EQ.1 ) THEN * * Test PZGEMV * CALL PB_TIMER( 1 ) CALL PZGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PZHEMV * CALL PB_TIMER( 1 ) CALL PZHEMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PZTRMV * CALL PB_TIMER( 1 ) CALL PZTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PZTRSV * CALL PB_TIMER( 1 ) CALL PZTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PZGERU * CALL PB_TIMER( 1 ) CALL PZGERU( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PZGERC * CALL PB_TIMER( 1 ) CALL PZGERC( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PZHER * CALL PB_TIMER( 1 ) CALL PZHER( UPLO, N, DBLE( ALPHA ), MEM( IPX ), IX, $ JX, DESCX, INCX, MEM( IPA ), IA, JA, $ DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.8 ) THEN * * Test PZHER2 * CALL PB_TIMER( 1 ) CALL PZHER2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = PDOPBL2( SNAMES( K ), NROWA, NCOLA, 0, 0 ) * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9984 FORMAT( 2X, 'End of Tests.' ) 9983 FORMAT( 2X, 'Tests started.' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PZBLA2TIM * END SUBROUTINE PZBLA2TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA2TIMINFO get the needed startup information for timing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX*16 * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS ) with NSUBS = 8. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS2TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 2 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex double precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 1, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9993 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PZBLA2TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/pdblas2tim.f0000640000175000017500000015332410363532303017364 0ustar muammarmuammar PROGRAM PDBLA2TIM * * -- PBLAS timing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA2TIM is the main timing program for the Level 2 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 55 lines: * 'Level 2 PBLAS, Timing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS2TIM.SUMM' output file name (if any) * 6 device out * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 1.0D0 value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PDGEMV T put F for no test in the same column * PDSYMV T put F for no test in the same column * PDTRMV T put F for no test in the same column * PDTRSV T put F for no test in the same column * PDGER T put F for no test in the same column * PDSYR T put F for no test in the same column * PDSYR2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, DBLESZ, TOTMEM, MEMSIZ, $ NSUBS DOUBLE PRECISION ONE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, DBLESZ = 8, $ ONE = 1.0D+0, TOTMEM = 2000000, NSUBS = 7, $ MEMSIZ = TOTMEM / DBLESZ ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, INBA, $ INBX, INBY, INCX, INCY, IPA, IPOSTA, IPOSTX, $ IPOSTY, IPREA, IPREX, IPREY, IPX, IPY, IX, $ IXSEED, IY, IYSEED, J, JA, JX, JY, K, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY DOUBLE PRECISION ALPHA, BETA, CFLOPS, NOPS, SCALE, WFLOPS * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ), $ IAVAL( MAXTESTS ), IERR( 3 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) DOUBLE PRECISION CTIME( 1 ), MEM( MEMSIZ ), WTIME( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, $ BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT, $ BLACS_PINFO, IGSUM2D, PB_BOOT, PB_COMBINE, $ PB_TIMER, PDBLA2TIMINFO, PDGEMV, PDGER, $ PDLAGEN, PDLASCAL, PDSYMV, PDSYR, PDSYR2, $ PDTRMV, PDTRSV, PMDESCCHK, PMDIMCHK, PVDESCCHK, $ PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDOPBL2 EXTERNAL LSAME, PDOPBL2 * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDGEMV ', 'PDSYMV ', 'PDTRMV ', $ 'PDTRSV ', 'PDGER ', 'PDSYR ', $ 'PDSYR2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .TRUE. * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA2TIMINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, MAXTESTS, $ NGRIDS, PVAL, MAXGRIDS, QVAL, MAXGRIDS, $ NBLOG, LTEST, IAM, NPROCS, ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) J, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9980 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, 0, 0, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, 0, 0, IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, 0, 0, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN GO TO 40 END IF * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = 1 IPX = IPA + DESCA( LLD_ ) * NQA IPY = IPX + DESCX( LLD_ ) * NQX * * Check if sufficient memory. * MEMREQD = IPY + DESCY( LLD_ ) * NQY - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ) THEN AFORM = 'S' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PDLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, $ NY, 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * IF( ( K.EQ.4 ).AND.( .NOT.( LSAME( DIAG, 'N' ) ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) IF( LSAME( UPLO, 'L' ) ) THEN CALL PDLASCAL( 'Lower', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA+1, JA, DESCA ) ELSE CALL PDLASCAL( 'Upper', NROWA-1, NCOLA-1, SCALE, $ MEM( IPA ), IA, JA+1, DESCA ) END IF END IF * INFO = 0 CALL PB_BOOT() CALL BLACS_BARRIER( ICTXT, 'All' ) * * Call the Level 2 PBLAS routine * IF( K.EQ.1 ) THEN * * Test PDGEMV * CALL PB_TIMER( 1 ) CALL PDGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.2 ) THEN * * Test PDSYMV * CALL PB_TIMER( 1 ) CALL PDSYMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.3 ) THEN * * Test PDTRMV * CALL PB_TIMER( 1 ) CALL PDTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.4 ) THEN * * Test PDTRSV * CALL PB_TIMER( 1 ) CALL PDTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.5 ) THEN * * Test PDGER * CALL PB_TIMER( 1 ) CALL PDGER( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.6 ) THEN * * Test PDSYR * CALL PB_TIMER( 1 ) CALL PDSYR( UPLO, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * ELSE IF( K.EQ.7 ) THEN * * Test PDSYR2 * CALL PB_TIMER( 1 ) CALL PDSYR2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) CALL PB_TIMER( 1 ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) INFO GO TO 30 END IF * CALL PB_COMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME ) CALL PB_COMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME ) * * Only node 0 prints timing test result * IF( IAM.EQ.0 ) THEN * * Calculate total flops * NOPS = PDOPBL2( SNAMES( K ), NROWA, NCOLA, 0, 0 ) * * Print WALL time if machine supports it * IF( WTIME( 1 ).GT.0.0D+0 ) THEN WFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 ) ELSE WFLOPS = 0.0D+0 END IF * * Print CPU time if machine supports it * IF( CTIME( 1 ).GT.0.0D+0 ) THEN CFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 ) ELSE CFLOPS = 0.0D+0 END IF * WRITE( NOUT, FMT = 9981 ) SNAMES( K ), WTIME( 1 ), $ WFLOPS, CTIME( 1 ), CFLOPS * END IF * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) WRITE( NOUT, FMT = * ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I2 , ' started on a ', I4, ' x ', $ I4, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, 'Test number ', I2, ' completed.' ) 9984 FORMAT( 2X, 'End of Tests.' ) 9983 FORMAT( 2X, 'Tests started.' ) 9982 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) 9981 FORMAT( 2X, '| ', A, 2X, F13.3, 2X, F13.3, 2X, F13.3, 2X, F13.3 ) 9980 FORMAT( 2X, ' WALL time (s) WALL Mflops ', $ ' CPU time (s) CPU Mflops' ) * STOP * * End of PDBLA2TIM * END SUBROUTINE PDBLA2TIMINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, IAM, NPROCS, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IAM, LDPVAL, LDQVAL, LDVAL, NBLOG, NGRIDS, $ NMAT, NOUT, NPROCS DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA2TIMINFO get the needed startup information for timing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least seven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) DOUBLE PRECISION * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS ) with NSUBS = 7. This array * is used to pack all output arrays in order to send info in * one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS2TIM.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'Level 2 PBLAS timing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real double precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9992 ) NMAT WRITE( NOUT, FMT = 9986 ) NBLOG WRITE( NOUT, FMT = 9991 ) NGRIDS WRITE( NOUT, FMT = 9989 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9989 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9990 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9994 ) ALPHA WRITE( NOUT, FMT = 9993 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9988 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 1, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9987 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Alpha : ', G16.6 ) 9993 FORMAT( 2X, 'Beta : ', G16.6 ) 9992 FORMAT( 2X, 'Number of Tests : ', I6 ) 9991 FORMAT( 2X, 'Number of process grids : ', I6 ) 9990 FORMAT( 2X, ' : ', 5I6 ) 9989 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9987 FORMAT( 2X, ' ', A, A8 ) 9986 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PDBLA2TIMINFO * END scalapack-1.8.0/PBLAS/TIMING/psblastim.f0000640000175000017500000023004510363532303017315 0ustar muammarmuammar SUBROUTINE PSLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N REAL ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_INFOG2L, PB_SLASCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_SLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_SLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_SLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_SLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_SLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PSLASCAL * END SUBROUTINE PSLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( LDA, * ) * .. * * Purpose * ======= * * PSLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP REAL ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PB_SLAGEN, PSLADOM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, REAL * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_SLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_SLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) ALPHA = REAL( MAXMN ) * IF( IOFFDA.GE.0 ) THEN CALL PSLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PSLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PSLAGEN * END SUBROUTINE PSLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N REAL ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP REAL ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PSLADOM * END SUBROUTINE PB_SLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PB_SLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (input/output) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_SLASCAL * END SUBROUTINE PB_SLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) REAL A( LDA, * ) * .. * * Purpose * ======= * * PB_SLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) REAL array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP REAL DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME REAL PB_SRAND EXTERNAL LSAME, PB_SRAND * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) .OR. LSAME( AFORM, 'C' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( ( LSAME( AFORM, 'S' ) ).OR.( LSAME( AFORM, 'H' ) ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = PB_SRAND( 0 ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = PB_SRAND( 0 ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = PB_SRAND( 0 ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = PB_SRAND( 0 ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * END IF * RETURN * * End of PB_SLAGEN * END REAL FUNCTION PB_SRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. External Functions .. REAL PB_SRAN EXTERNAL PB_SRAN * .. * .. Executable Statements .. * PB_SRAND = ONE - TWO * PB_SRAN( IDUMM ) * RETURN * * End of PB_SRAND * END REAL FUNCTION PB_SRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648E+9, $ POW16 = 6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_SRAN = ( REAL( IRAND( 1 ) ) + POW16 * REAL( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_SRAN * END scalapack-1.8.0/PBLAS/TIMING/pblastim.f0000640000175000017500000057167410363532303017152 0ustar muammarmuammar SUBROUTINE PVDIMCHK( ICTXT, NOUT, N, MATRIX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER ICTXT, INCX, INFO, IX, JX, N, NOUT * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PVDIMCHK checks the validity of the input test dimensions. In case of * an invalid parameter or discrepancy between the parameters, this rou- * tine displays error messages and returns an non-zero error code in * INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.LT.0 ) THEN INFO = 1 ELSE IF( N.EQ.0 ) THEN IF( DESCX( M_ ).LT.0 ) $ INFO = 1 IF( DESCX( N_ ).LT.0 ) $ INFO = 1 ELSE IF( INCX.EQ.DESCX( M_ ) .AND. $ DESCX( N_ ).LT.( JX+N-1 ) ) THEN INFO = 1 ELSE IF( INCX.EQ.1 .AND. INCX.NE.DESCX( M_ ) .AND. $ DESCX( M_ ).LT.( IX+N-1 ) ) THEN INFO = 1 ELSE IF( IX.GT.DESCX( M_ ) ) THEN INFO = 1 ELSE IF( JX.GT.DESCX( N_ ) ) THEN INFO = 1 END IF END IF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) MATRIX WRITE( NOUT, FMT = 9998 ) N, MATRIX, IX, MATRIX, JX, MATRIX, $ INCX WRITE( NOUT, FMT = 9997 ) MATRIX, DESCX( M_ ), MATRIX, $ DESCX( N_ ) WRITE( NOUT, FMT = * ) END IF END IF * 9999 FORMAT( 'Incompatible arguments for matrix ', A1, ':' ) 9998 FORMAT( 'N = ', I6, ', I', A1, ' = ', I6, ', J', A1, ' = ', $ I6, ',INC', A1, ' = ', I6 ) 9997 FORMAT( 'DESC', A1, '( M_ ) = ', I6, ', DESC', A1, '( N_ ) = ', $ I6, '.' ) * RETURN * * End of PVDIMCHK * END SUBROUTINE PMDIMCHK( ICTXT, NOUT, M, N, MATRIX, IA, JA, DESCA, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER ICTXT, INFO, IA, JA, M, N, NOUT * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PMDIMCHK checks the validity of the input test dimensions. In case of * an invalid parameter or discrepancy between the parameters, this rou- * tine displays error messages and returns an non-zero error code in * INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( ( M.LT.0 ).OR.( N.LT.0 ) ) THEN INFO = 1 ELSE IF( ( M.EQ.0 ).OR.( N.EQ.0 ) )THEN IF( DESCA( M_ ).LT.0 ) $ INFO = 1 IF( DESCA( N_ ).LT.0 ) $ INFO = 1 ELSE IF( DESCA( M_ ).LT.( IA+M-1 ) ) $ INFO = 1 IF( DESCA( N_ ).LT.( JA+N-1 ) ) $ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) MATRIX WRITE( NOUT, FMT = 9998 ) M, N, MATRIX, IA, MATRIX, JA WRITE( NOUT, FMT = 9997 ) MATRIX, DESCA( M_ ), MATRIX, $ DESCA( N_ ) WRITE( NOUT, FMT = * ) END IF END IF * 9999 FORMAT( 'Incompatible arguments for matrix ', A1, ':' ) 9998 FORMAT( 'M = ', I6, ', N = ', I6, ', I', A1, ' = ', I6, $ ', J', A1, ' = ', I6 ) 9997 FORMAT( 'DESC', A1, '( M_ ) = ', I6, ', DESC', A1, '( N_ ) = ', $ I6, '.' ) * RETURN * * End of PMDIMCHK * END SUBROUTINE PVDESCCHK( ICTXT, NOUT, MATRIX, DESCX, DTX, MX, NX, $ IMBX, INBX, MBX, NBX, RSRCX, CSRCX, INCX, $ MPX, NQX, IPREX, IMIDX, IPOSTX, IGAP, $ GAPMUL, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER CSRCX, DTX, GAPMUL, ICTXT, IGAP, IMBX, IMIDX, $ INBX, INCX, INFO, IPOSTX, IPREX, MBX, MPX, MX, $ NBX, NOUT, NQX, NX, RSRCX * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PVDESCCHK checks the validity of the input test parameters and ini- * tializes the descriptor DESCX and the scalar variables MPX, NQX. In * case of an invalid parameter, this routine displays error messages * and return an non-zero error code in INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * DESCX (global output) INTEGER array * On entry, DESCX is an array of dimension DLEN_. DESCX is the * array descriptor to be set. * * DTYPEX (global input) INTEGER * On entry, DTYPEX specifies the descriptor type. In this ver- * sion, DTYPEX must be BLOCK_CYCLIC_INB_2D. * * MX (global input) INTEGER * On entry, MX specifies the number of rows in the matrix. MX * must be at least zero. * * NX (global input) INTEGER * On entry, NX specifies the number of columns in the matrix. * NX must be at least zero. * * IMBX (global input) INTEGER * On entry, IMBX specifies the row blocking factor used to dis- * tribute the first IMBX rows of the matrix. IMBX must be at * least one. * * INBX (global input) INTEGER * On entry, INBX specifies the column blocking factor used to * distribute the first INBX columns of the matrix. INBX must * be at least one. * * MBX (global input) INTEGER * On entry, MBX specifies the row blocking factor used to dis- * tribute the rows of the matrix. MBX must be at least one. * * NBX (global input) INTEGER * On entry, NBX specifies the column blocking factor used to * distribute the columns of the matrix. NBX must be at least * one. * * RSRCX (global input) INTEGER * On entry, RSRCX specifies the process row in which the first * row of the matrix resides. When RSRCX is -1, the matrix is * row replicated, otherwise RSCRX must be at least zero and * strictly less than NPROW. * * CSRCX (global input) INTEGER * On entry, CSRCX specifies the process column in which the * first column of the matrix resides. When CSRCX is -1, the * matrix is column replicated, otherwise CSCRX must be at least * zero and strictly less than NPCOL. * * INCX (global input) INTEGER * On entry, INCX specifies the global vector increment. INCX * must be one or MX. * * MPX (local output) INTEGER * On exit, MPX is Lr( 1, MX ). * * NQX (local output) INTEGER * On exit, NQX is Lc( 1, NX ). * * IPREX (local output) INTEGER * On exit, IPREX specifies the size of the guard zone to put * before the start of the local padded array. * * IMIDX (local output) INTEGER * On exit, IMIDX specifies the ldx-gap of the guard zone to * put after each column of the local padded array. * * IPOSTX (local output) INTEGER * On exit, IPOSTX specifies the size of the guard zone to put * after the local padded array. * * IGAP (global input) INTEGER * On entry, IGAP specifies the size of the ldx-gap. * * GAPMUL (global input) INTEGER * On entry, GAPMUL is a constant factor controlling the size * of the pre- and post guardzone. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDX, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_DESCINIT2 * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Verify descriptor type DTYPE_ * IF( DTX.NE.BLOCK_CYCLIC_2D_INB ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) MATRIX, 'DTYPE', MATRIX, DTX, $ BLOCK_CYCLIC_2D_INB INFO = 1 END IF * * Verify global matrix dimensions (M_,N_) are correct * IF( MX.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) MATRIX, 'M', MATRIX, MX INFO = 1 ELSE IF( NX.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) MATRIX, 'N', MATRIX, NX INFO = 1 END IF * * Verify if blocking factors (IMB_, INB_) are correct * IF( IMBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) MATRIX, 'IMB', MATRIX, IMBX INFO = 1 ELSE IF( INBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) MATRIX, 'INB', MATRIX, INBX INFO = 1 END IF * * Verify if blocking factors (MB_, NB_) are correct * IF( MBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) MATRIX, 'MB', MATRIX, MBX INFO = 1 ELSE IF( NBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) MATRIX, 'NB', MATRIX, NBX INFO = 1 END IF * * Verify if origin process coordinates (RSRC_, CSRC_) are valid * IF( RSRCX.LT.-1 .OR. RSRCX.GE.NPROW ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'RSRC', MATRIX, RSRCX, NPROW END IF INFO = 1 ELSE IF( CSRCX.LT.-1 .OR. CSRCX.GE.NPCOL ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9991 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'CSRC', MATRIX, CSRCX, NPCOL END IF INFO = 1 END IF * * Check input increment value * IF( INCX.NE.1 .AND. INCX.NE.MX ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = 9988 ) 'INC', MATRIX, INCX, MATRIX, MX END IF INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9987 ) MATRIX WRITE( NOUT, FMT = * ) END IF * ELSE * * Compute local testing leading dimension * MPX = PB_NUMROC( MX, 1, IMBX, MBX, MYROW, RSRCX, NPROW ) NQX = PB_NUMROC( NX, 1, INBX, NBX, MYCOL, CSRCX, NPCOL ) IPREX = MAX( GAPMUL*NBX, MPX ) IMIDX = IGAP IPOSTX = MAX( GAPMUL*NBX, NQX ) LLDX = MAX( 1, MPX ) + IMIDX * CALL PB_DESCINIT2( DESCX, MX, NX, IMBX, INBX, MBX, NBX, RSRCX, $ CSRCX, ICTXT, LLDX, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9987 ) MATRIX WRITE( NOUT, FMT = * ) END IF END IF * END IF * 9999 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor type ', A5, A1, $ ': ', I6, ' should be ', I3, '.' ) 9998 FORMAT( 2X, '>> Invalid matrix ', A1, ' row dimension ', A1, A1, $ ': ', I6, ' should be at least 1.' ) 9997 FORMAT( 2X, '>> Invalid matrix ', A1, ' column dimension ', A1, $ A1, ': ', I6, ' should be at least 1.' ) 9996 FORMAT( 2X, '>> Invalid matrix ', A1, ' first row block size ', $ A3, A1, ': ', I6, ' should be at least 1.' ) 9995 FORMAT( 2X, '>> Invalid matrix ', A1, ' first column block size ', $ A3, A1,': ', I6, ' should be at least 1.' ) 9994 FORMAT( 2X, '>> Invalid matrix ', A1, ' row block size ', A2, A1, $ ': ', I6, ' should be at least 1.' ) 9993 FORMAT( 2X, '>> Invalid matrix ', A1, ' column block size ', A2, $ A1,': ', I6, ' should be at least 1.' ) 9992 FORMAT( 2X, '>> Invalid matrix ', A1, ' row process source:' ) 9991 FORMAT( 2X, '>> Invalid matrix ', A1, ' column process source:' ) 9990 FORMAT( 2X, '>> ', A4, A1, '= ', I6, ' should be >= -1 and < ', $ I6, '.' ) 9989 FORMAT( 2X, '>> Invalid vector ', A1, ' increment:' ) 9988 FORMAT( 2X, '>> ', A3, A1, '= ', I6, ' should be 1 or M', A1, $ ' = ', I6, '.' ) 9987 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor: going on to ', $ 'next test case.' ) * RETURN * * End of PVDESCCHK * END SUBROUTINE PMDESCCHK( ICTXT, NOUT, MATRIX, DESCA, DTA, MA, NA, $ IMBA, INBA, MBA, NBA, RSRCA, CSRCA, MPA, $ NQA, IPREA, IMIDA, IPOSTA, IGAP, GAPMUL, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER CSRCA, DTA, GAPMUL, ICTXT, IGAP, IMBA, IMIDA, $ INBA, INFO, IPOSTA, IPREA, MA, MBA, MPA, NA, $ NBA, NOUT, NQA, RSRCA * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PMDESCCHK checks the validity of the input test parameters and ini- * tializes the descriptor DESCA and the scalar variables MPA, NQA. In * case of an invalid parameter, this routine displays error messages * and return an non-zero error code in INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * DESCA (global output) INTEGER array * On entry, DESCA is an array of dimension DLEN_. DESCA is the * array descriptor to be set. * * DTYPEA (global input) INTEGER * On entry, DTYPEA specifies the descriptor type. In this ver- * sion, DTYPEA must be BLOCK_CYCLIC_INB_2D. * * MA (global input) INTEGER * On entry, MA specifies the number of rows in the matrix. MA * must be at least zero. * * NA (global input) INTEGER * On entry, NA specifies the number of columns in the matrix. * NA must be at least zero. * * IMBA (global input) INTEGER * On entry, IMBA specifies the row blocking factor used to dis- * tribute the first IMBA rows of the matrix. IMBA must be at * least one. * * INBA (global input) INTEGER * On entry, INBA specifies the column blocking factor used to * distribute the first INBA columns of the matrix. INBA must * be at least one. * * MBA (global input) INTEGER * On entry, MBA specifies the row blocking factor used to dis- * tribute the rows of the matrix. MBA must be at least one. * * NBA (global input) INTEGER * On entry, NBA specifies the column blocking factor used to * distribute the columns of the matrix. NBA must be at least * one. * * RSRCA (global input) INTEGER * On entry, RSRCA specifies the process row in which the first * row of the matrix resides. When RSRCA is -1, the matrix is * row replicated, otherwise RSCRA must be at least zero and * strictly less than NPROW. * * CSRCA (global input) INTEGER * On entry, CSRCA specifies the process column in which the * first column of the matrix resides. When CSRCA is -1, the * matrix is column replicated, otherwise CSCRA must be at least * zero and strictly less than NPCOL. * * MPA (local output) INTEGER * On exit, MPA is Lr( 1, MA ). * * NQA (local output) INTEGER * On exit, NQA is Lc( 1, NA ). * * IPREA (local output) INTEGER * On exit, IPREA specifies the size of the guard zone to put * before the start of the local padded array. * * IMIDA (local output) INTEGER * On exit, IMIDA specifies the lda-gap of the guard zone to * put after each column of the local padded array. * * IPOSTA (local output) INTEGER * On exit, IPOSTA specifies the size of the guard zone to put * after the local padded array. * * IGAP (global input) INTEGER * On entry, IGAP specifies the size of the lda-gap. * * GAPMUL (global input) INTEGER * On entry, GAPMUL is a constant factor controlling the size * of the pre- and post guardzone. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_DESCINIT2 * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Verify descriptor type DTYPE_ * IF( DTA.NE.BLOCK_CYCLIC_2D_INB ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) MATRIX, 'DTYPE', MATRIX, DTA, $ BLOCK_CYCLIC_2D_INB INFO = 1 END IF * * Verify global matrix dimensions (M_,N_) are correct * IF( MA.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) MATRIX, 'M', MATRIX, MA INFO = 1 ELSE IF( NA.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) MATRIX, 'N', MATRIX, NA INFO = 1 END IF * * Verify if blocking factors (IMB_, INB_) are correct * IF( IMBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) MATRIX, 'IMB', MATRIX, IMBA INFO = 1 ELSE IF( INBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) MATRIX, 'INB', MATRIX, INBA INFO = 1 END IF * * Verify if blocking factors (MB_, NB_) are correct * IF( MBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) MATRIX, 'MB', MATRIX, MBA INFO = 1 ELSE IF( NBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) MATRIX, 'NB', MATRIX, NBA INFO = 1 END IF * * Verify if origin process coordinates (RSRC_, CSRC_) are valid * IF( RSRCA.LT.-1 .OR. RSRCA.GE.NPROW ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'RSRC', MATRIX, RSRCA, NPROW END IF INFO = 1 ELSE IF( CSRCA.LT.-1 .OR. CSRCA.GE.NPCOL ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9991 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'CSRC', MATRIX, CSRCA, NPCOL END IF INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = * ) END IF * ELSE * * Compute local testing leading dimension * MPA = PB_NUMROC( MA, 1, IMBA, MBA, MYROW, RSRCA, NPROW ) NQA = PB_NUMROC( NA, 1, INBA, NBA, MYCOL, CSRCA, NPCOL ) IPREA = MAX( GAPMUL*NBA, MPA ) IMIDA = IGAP IPOSTA = MAX( GAPMUL*NBA, NQA ) LLDA = MAX( 1, MPA ) + IMIDA * CALL PB_DESCINIT2( DESCA, MA, NA, IMBA, INBA, MBA, NBA, RSRCA, $ CSRCA, ICTXT, LLDA, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = * ) END IF END IF * END IF * 9999 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor type ', A5, A1, $ ': ', I6, ' should be ', I3, '.' ) 9998 FORMAT( 2X, '>> Invalid matrix ', A1, ' row dimension ', A1, A1, $ ': ', I6, ' should be at least 1.' ) 9997 FORMAT( 2X, '>> Invalid matrix ', A1, ' column dimension ', A1, $ A1, ': ', I6, ' should be at least 1.' ) 9996 FORMAT( 2X, '>> Invalid matrix ', A1, ' first row block size ', $ A3, A1, ': ', I6, ' should be at least 1.' ) 9995 FORMAT( 2X, '>> Invalid matrix ', A1, ' first column block size ', $ A3, A1,': ', I6, ' should be at least 1.' ) 9994 FORMAT( 2X, '>> Invalid matrix ', A1, ' row block size ', A2, A1, $ ': ', I6, ' should be at least 1.' ) 9993 FORMAT( 2X, '>> Invalid matrix ', A1, ' column block size ', A2, $ A1,': ', I6, ' should be at least 1.' ) 9992 FORMAT( 2X, '>> Invalid matrix ', A1, ' row process source:' ) 9991 FORMAT( 2X, '>> Invalid matrix ', A1, ' column process source:' ) 9990 FORMAT( 2X, '>> ', A4, A1, '= ', I6, ' should be >= -1 and < ', $ I6, '.' ) 9989 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor: going on to ', $ 'next test case.' ) * RETURN * * End of PMDESCCHK * END DOUBLE PRECISION FUNCTION PDOPBL2( SUBNAM, M, N, KKL, KKU ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*7 SUBNAM INTEGER KKL, KKU, M, N * .. * * Purpose * ======= * * PDOPBL2 computes an approximation of the number of floating point * operations performed by a subroutine SUBNAM with the given values of * the parameters M, N, KL, and KU. * * This version counts operations for the Level 2 PBLAS. * * Arguments * ========= * * SUBNAM (input) CHARACTER*7 * On entry, SUBNAM specifies the name of the subroutine. * * M (input) INTEGER * On entry, M specifies the number of rows of the coefficient * matrix. M must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the coeffi- * cient matrix. If the matrix is square (such as in a solve * routine) then N is the number of right hand sides. N must be * at least zero. * * KKL (input) INTEGER * On entry, KKL specifies the lower band width of the coeffi- * cient matrix. KL is set to max( 0, min( M-1, KKL ) ). * * KKU (input) INTEGER * On entry, KKU specifies the upper band width of the coeffi- * cient matrix. KU is set to max( 0, min( N-1, KKU ) ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, SIX, TWO, ZERO PARAMETER ( ONE = 1.0D+0, SIX = 6.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER*1 C1 CHARACTER*2 C2 CHARACTER*3 C3 DOUBLE PRECISION ADDS, EK, EM, EN, KL, KU, MULTS * .. * .. External Functions .. LOGICAL LSAME, LSAMEN EXTERNAL LSAME, LSAMEN * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. .NOT.( LSAMEN( 2, SUBNAM, 'PS' ) .OR. $ LSAMEN( 2, SUBNAM, 'PD' ) .OR. $ LSAMEN( 2, SUBNAM, 'PC' ) .OR. LSAMEN( 2, SUBNAM, 'PZ' ) ) ) $ THEN PDOPBL2 = ZERO RETURN END IF * C1 = SUBNAM( 2: 2 ) C2 = SUBNAM( 3: 4 ) C3 = SUBNAM( 5: 7 ) MULTS = ZERO ADDS = ZERO KL = MAX( 0, MIN( M-1, KKL ) ) KU = MAX( 0, MIN( N-1, KKU ) ) EM = DBLE( M ) EN = DBLE( N ) EK = DBLE( KL ) * * ------------------------------- * Matrix-vector multiply routines * ------------------------------- * IF( LSAMEN( 3, C3, 'MV ' ) ) THEN * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * MULTS = EM * ( EN + ONE ) ADDS = EM * EN * * Assume M <= N + KL and KL < M * N <= M + KU and KU < N * so that the zero sections are triangles. * ELSE IF( LSAMEN( 2, C2, 'GB' ) ) THEN * MULTS = EM * ( EN + ONE ) - $ ( EM - ONE - KL ) * ( EM - KL ) / TWO - $ ( EN - ONE - KU ) * ( EN - KU ) / TWO ADDS = EM * ( EN + ONE ) - $ ( EM - ONE - KL ) * ( EM - KL ) / TWO - $ ( EN - ONE - KU ) * ( EN - KU ) / TWO * ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR. $ LSAMEN( 2, C2, 'HE' ) .OR. LSAMEN( 2, C2, 'HP' ) ) $ THEN * MULTS = EM * ( EM + ONE ) ADDS = EM * EM * ELSE IF( LSAMEN( 2, C2, 'SB' ) .OR. $ LSAMEN( 2, C2, 'HB' ) ) THEN * MULTS = EM * ( EM + ONE ) - ( EM - ONE - EK ) * ( EM - EK ) ADDS = EM * EM - ( EM - ONE - EK ) * ( EM - EK ) * ELSE IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) $ THEN * MULTS = EM * ( EM + ONE ) / TWO ADDS = ( EM - ONE ) * EM / TWO * ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN * MULTS = EM * ( EM + ONE ) / TWO - $ ( EM - EK - ONE ) * ( EM - EK ) / TWO ADDS = ( EM - ONE ) * EM / TWO - $ ( EM - EK - ONE ) * ( EM - EK ) / TWO * END IF * * --------------------- * Matrix solve routines * --------------------- * ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN * IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) THEN * MULTS = EM * ( EM + ONE ) / TWO ADDS = ( EM - ONE ) * EM / TWO * ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN * MULTS = EM * ( EM + ONE ) / TWO - $ ( EM - EK - ONE ) * ( EM - EK ) / TWO ADDS = ( EM - ONE ) * EM / TWO - $ ( EM - EK - ONE ) * ( EM - EK ) / TWO * END IF * * ---------------- * Rank-one updates * ---------------- * ELSE IF( LSAMEN( 3, C3, 'R ' ) ) THEN * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * MULTS = EM * EN + MIN( EM, EN ) ADDS = EM * EN * ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR. $ LSAMEN( 2, C2, 'HE' ) .OR. LSAMEN( 2, C2, 'HP' ) ) $ THEN * MULTS = EM * ( EM + ONE ) / TWO + EM ADDS = EM * ( EM + ONE ) / TWO * END IF * ELSE IF( LSAMEN( 3, C3, 'RC ' ) .OR. LSAMEN( 3, C3, 'RU ' ) ) THEN * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * MULTS = EM * EN + MIN( EM, EN ) ADDS = EM * EN * END IF * * ---------------- * Rank-two updates * ---------------- * ELSE IF( LSAMEN( 3, C3, 'R2 ' ) ) THEN IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR. $ LSAMEN( 2, C2, 'HE' ) .OR. LSAMEN( 2, C2, 'HP' ) ) THEN * MULTS = EM * ( EM + ONE ) + TWO * EM ADDS = EM * ( EM + ONE ) * END IF END IF * * ------------------------------------------------ * Compute the total number of operations. * For real and double precision routines, count * 1 for each multiply and 1 for each add. * For complex and complex*16 routines, count * 6 for each multiply and 2 for each add. * ------------------------------------------------ * IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN * PDOPBL2 = MULTS + ADDS * ELSE * PDOPBL2 = SIX * MULTS + TWO * ADDS * END IF * RETURN * * End of PDOPBL2 * END DOUBLE PRECISION FUNCTION PDOPBL3( SUBNAM, M, N, K ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*7 SUBNAM INTEGER K, M, N * .. * * Purpose * ======= * * PDOPBL3 computes an approximation of the number of floating point * operations performed by a subroutine SUBNAM with the given values of * the parameters M, N and K. * * This version counts operations for the Level 3 PBLAS. * * Arguments * ========= * * SUBNAM (input) CHARACTER*7 * On entry, SUBNAM specifies the name of the subroutine. * * M (input) INTEGER * N (input) INTEGER * K (input) INTEGER * On entry, M, N, and K contain parameter values used by the * Level 3 PBLAS. The output matrix is always M x N or N x N if * symmetric, but K has different uses in different contexts. * For example, in the matrix-matrix multiply routine, we have * C = A * B where C is M x N, A is M x K, and B is K x N. In * PxSYMM, PxHEMM, PxTRMM, and PxTRSM, K indicates whether the * matrix A is applied on the left or right. If K <= 0, the ma- * trix is applied on the left, and if K > 0, on the right. In * PxTRADD, K indicates whether the matrix C is upper or lower * triangular. If K <= 0, the matrix C is upper triangular, and * lower triangular otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, SIX, TWO, ZERO PARAMETER ( ONE = 1.0D+0, SIX = 6.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER*1 C1 CHARACTER*2 C2 CHARACTER*3 C3 DOUBLE PRECISION ADDS, EK, EM, EN, MULTS * .. * .. External Functions .. LOGICAL LSAME, LSAMEN EXTERNAL LSAME, LSAMEN * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. .NOT.( LSAMEN( 2, SUBNAM, 'PS' ) .OR. $ LSAMEN( 2, SUBNAM, 'PD' ) .OR. LSAMEN( 2, SUBNAM, 'PC' ) $ .OR. LSAMEN( 2, SUBNAM, 'PZ' ) ) ) $ THEN PDOPBL3 = ZERO RETURN END IF * C1 = SUBNAM( 2: 2 ) C2 = SUBNAM( 3: 4 ) C3 = SUBNAM( 5: 7 ) MULTS = ZERO ADDS = ZERO EM = DBLE( M ) EN = DBLE( N ) EK = DBLE( K ) * * ---------------------- * Matrix-matrix products * assume beta = 1 * ---------------------- * IF( LSAMEN( 3, C3, 'MM ' ) ) THEN * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * MULTS = EM * EK * EN ADDS = EM * EK * EN * ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. $ LSAMEN( 2, C2, 'HE' ) ) THEN * * IF K <= 0, assume A multiplies B on the left. * IF( K.LE.0 ) THEN MULTS = EM * EM * EN ADDS = EM * EM * EN ELSE MULTS = EM * EN * EN ADDS = EM * EN * EN END IF * ELSE IF( LSAMEN( 2, C2, 'TR' ) ) THEN * * IF K <= 0, assume A multiplies B on the left. * IF( K.LE.0 ) THEN MULTS = EN * EM * ( EM + ONE ) / TWO ADDS = EN * EM * ( EM - ONE ) / TWO ELSE MULTS = EM * EN * ( EN + ONE ) / TWO ADDS = EM * EN * ( EN - ONE ) / TWO END IF * END IF * * ------------------------------------------------ * Rank-K update of a symmetric or Hermitian matrix * ------------------------------------------------ * ELSE IF( LSAMEN( 3, C3, 'RK ' ) ) THEN * IF( LSAMEN( 2, C2, 'SY' ) .OR. $ LSAMEN( 2, C2, 'HE' ) ) THEN * MULTS = EK * EM *( EM + ONE ) / TWO ADDS = EK * EM *( EM + ONE ) / TWO END IF * * ------------------------------------------------- * Rank-2K update of a symmetric or Hermitian matrix * ------------------------------------------------- * ELSE IF( LSAMEN( 3, C3, 'R2K' ) ) THEN * IF( LSAMEN( 2, C2, 'SY' ) .OR. $ LSAMEN( 3, C2, 'HE' ) ) THEN * MULTS = EK * EM * EM ADDS = EK * EM * EM + EM END IF * * ----------------------------------------- * Solving system with many right hand sides * ----------------------------------------- * ELSE IF( LSAMEN( 4, SUBNAM( 3:6 ), 'TRSM' ) ) THEN * IF( K.LE.0 ) THEN MULTS = EN * EM * ( EM + ONE ) / TWO ADDS = EN * EM * ( EM - ONE ) / TWO ELSE MULTS = EM * EN * ( EN + ONE ) / TWO ADDS = EM * EN * ( EN - ONE ) / TWO END IF * * -------------------------- * Matrix (tranpose) Addition * -------------------------- * ELSE IF( LSAMEN( 3, C3, 'ADD' ) ) THEN * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * MULTS = 2 * EM * EN ADDS = EM * EN * ELSE IF( LSAMEN( 2, C2, 'TR' ) ) THEN * * IF K <= 0, assume C is upper triangular. * IF( K.LE.0 ) THEN IF( M.LE.N ) THEN MULTS = EM * ( TWO * EN - EM + ONE ) ADDS = EM * ( EM + ONE ) / TWO + EM * ( EN - EM ) ELSE MULTS = EN * ( EN + ONE ) ADDS = EN * ( EN + ONE ) / TWO END IF ELSE IF( M.GE.N ) THEN MULTS = EN * ( TWO * EM - EN + ONE ) ADDS = EN * ( EN + ONE ) / TWO + EN * ( EM - EN ) ELSE MULTS = EM * ( EM + ONE ) ADDS = EM * ( EM + ONE ) / TWO END IF END IF * END IF * END IF * * ------------------------------------------------ * Compute the total number of operations. * For real and double precision routines, count * 1 for each multiply and 1 for each add. * For complex and complex*16 routines, count * 6 for each multiply and 2 for each add. * ------------------------------------------------ * IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN * PDOPBL3 = MULTS + ADDS * ELSE * PDOPBL3 = SIX * MULTS + TWO * ADDS * END IF * RETURN * * End of PDOPBL3 * END SUBROUTINE PXERBLA( ICTXT, SRNAME, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INFO * .. * .. Array Arguments .. CHARACTER*(*) SRNAME * .. * * Purpose * ======= * * PXERBLA is an error handler for the ScaLAPACK routines. It is called * by a ScaLAPACK routine if an input parameter has an invalid value. A * message is printed. Installers may consider modifying this routine in * order to call system-specific exception-handling facilities. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * SRNAME (global input) CHARACTER*(*) * On entry, SRNAME specifies the name of the routine which cal- * ling PXERBLA. * * INFO (global input) INTEGER * On entry, INFO specifies the position of the invalid parame- * ter in the parameter list of the calling routine. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * WRITE( *, FMT = 9999 ) MYROW, MYCOL, SRNAME, INFO * 9999 FORMAT( '{', I5, ',', I5, '}: On entry to ', A, $ ' parameter number ', I4, ' had an illegal value' ) * RETURN * * End of PXERBLA * END LOGICAL FUNCTION LSAME( CA, CB ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER CA, CB * .. * * Purpose * ======= * * LSAME returns .TRUE. if CA is the same letter as CB regardless of * case. * * Arguments * ========= * * CA (input) CHARACTER*1 * CB (input) CHARACTER*1 * CA and CB specify the single characters to be compared. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ICHAR * .. * .. Local Scalars .. INTEGER INTA, INTB, ZCODE * .. * .. Executable Statements .. * * Test if the characters are equal * LSAME = CA.EQ.CB IF( LSAME ) $ RETURN * * Now test for equivalence if both characters are alphabetic. * ZCODE = ICHAR( 'Z' ) * * Use 'Z' rather than 'A' so that ASCII can be detected on Prime * machines, on which ICHAR returns a value with bit 8 set. * ICHAR('A') on Prime machines returns 193 which is the same as * ICHAR('A') on an EBCDIC machine. * INTA = ICHAR( CA ) INTB = ICHAR( CB ) * IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN * * ASCII is assumed - ZCODE is the ASCII code of either lower or * upper case 'Z'. * IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 * ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN * * EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or * upper case 'Z'. * IF( INTA.GE.129 .AND. INTA.LE.137 .OR. $ INTA.GE.145 .AND. INTA.LE.153 .OR. $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 IF( INTB.GE.129 .AND. INTB.LE.137 .OR. $ INTB.GE.145 .AND. INTB.LE.153 .OR. $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 * ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN * * ASCII is assumed, on Prime machines - ZCODE is the ASCII code * plus 128 of either lower or upper case 'Z'. * IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 END IF LSAME = INTA.EQ.INTB * * RETURN * * End of LSAME * END LOGICAL FUNCTION LSAMEN( N, CA, CB ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER*( * ) CA, CB INTEGER N * .. * * Purpose * ======= * * LSAMEN tests if the first N letters of CA are the same as the * first N letters of CB, regardless of case. * LSAMEN returns .TRUE. if CA and CB are equivalent except for case * and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) * or LEN( CB ) is less than N. * * Arguments * ========= * * N (input) INTEGER * The number of characters in CA and CB to be compared. * * CA (input) CHARACTER*(*) * CB (input) CHARACTER*(*) * CA and CB specify two character strings of length at least N. * Only the first N characters of each string will be accessed. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC LEN * .. * .. Executable Statements .. * LSAMEN = .FALSE. IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) $ GO TO 20 * * Do for each character in the two strings. * DO 10 I = 1, N * * Test if the characters are equal using LSAME. * IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) $ GO TO 20 * 10 CONTINUE LSAMEN = .TRUE. * 20 CONTINUE RETURN * * End of LSAMEN * END SUBROUTINE ICOPY( N, SX, INCX, SY, INCY ) * * -- LAPACK auxiliary test routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * February 29, 1992 * * .. Scalar Arguments .. INTEGER INCX, INCY, N * .. * .. Array Arguments .. INTEGER SX( * ), SY( * ) * .. * * Purpose * ======= * * ICOPY copies an integer vector x to an integer vector y. * Uses unrolled loops for increments equal to 1. * * Arguments * ========= * * N (input) INTEGER * The length of the vectors SX and SY. * * SX (input) INTEGER array, dimension (1+(N-1)*abs(INCX)) * The vector X. * * INCX (input) INTEGER * The spacing between consecutive elements of SX. * * SY (output) INTEGER array, dimension (1+(N-1)*abs(INCY)) * The vector Y. * * INCY (input) INTEGER * The spacing between consecutive elements of SY. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IX, IY, M, MP1 * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * IF( N.LE.0 ) $ RETURN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) $ GO TO 20 * * Code for unequal increments or equal increments not equal to 1 * IX = 1 IY = 1 IF( INCX.LT.0 ) $ IX = ( -N+1 )*INCX + 1 IF( INCY.LT.0 ) $ IY = ( -N+1 )*INCY + 1 DO 10 I = 1, N SY( IY ) = SX( IX ) IX = IX + INCX IY = IY + INCY 10 CONTINUE RETURN * * Code for both increments equal to 1 * * Clean-up loop * 20 CONTINUE M = MOD( N, 7 ) IF( M.EQ.0 ) $ GO TO 40 DO 30 I = 1, M SY( I ) = SX( I ) 30 CONTINUE IF( N.LT.7 ) $ RETURN 40 CONTINUE MP1 = M + 1 DO 50 I = MP1, N, 7 SY( I ) = SX( I ) SY( I+1 ) = SX( I+1 ) SY( I+2 ) = SX( I+2 ) SY( I+3 ) = SX( I+3 ) SY( I+4 ) = SX( I+4 ) SY( I+5 ) = SX( I+5 ) SY( I+6 ) = SX( I+6 ) 50 CONTINUE RETURN * * End of ICOPY * END INTEGER FUNCTION PB_NOABORT( CINFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CINFO * .. * * Purpose * ======= * * PB_NOABORT transmits the info parameter of a PBLAS routine to the * tester and tells the PBLAS error handler to avoid aborting on erro- * neous input arguments. * * Notes * ===== * * This routine is necessary because of the CRAY C fortran interface * and the fact that the usual PBLAS error handler routine has been * initially written in C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER INFO, NBLOG, NOUT LOGICAL ABRTFLG COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Executable Statements .. * INFO = CINFO IF( ABRTFLG ) THEN PB_NOABORT = 0 ELSE PB_NOABORT = 1 END IF * RETURN * * End of PB_NOABORT * END SUBROUTINE PB_INFOG2L( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, PROW, PCOL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, II, J, JJ, MYCOL, MYROW, NPCOL, NPROW, PCOL, $ PROW * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_INFOG2L computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by I, J. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least one. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least one. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least one. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least one. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero * and strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least * zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRC, I1, ILOCBLK, IMB, INB, J1, MB, MYDIST, $ NB, NBLOCKS, RSRC * .. * .. Local Arrays .. INTEGER DESC2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL PB_DESCTRANS * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESC, DESC2 ) * IMB = DESC2( IMB_ ) PROW = DESC2( RSRC_ ) * * Has every process row I ? * IF( ( PROW.EQ.-1 ).OR.( NPROW.EQ.1 ) ) THEN * II = I * ELSE IF( I.LE.IMB ) THEN * * I is in range of first block * IF( MYROW.EQ.PROW ) THEN II = I ELSE II = 1 END IF * ELSE * * I is not in first block of matrix, figure out who has it. * RSRC = PROW MB = DESC2( MB_ ) * IF( MYROW.EQ.RSRC ) THEN * NBLOCKS = ( I - IMB - 1 ) / MB + 1 PROW = PROW + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPROW ).GE.NBLOCKS ) THEN IF( MYROW.EQ.PROW ) THEN II = I + ( ILOCBLK - NBLOCKS ) * MB ELSE II = IMB + ( ILOCBLK - 1 ) * MB + 1 END IF ELSE II = IMB + ILOCBLK * MB + 1 END IF ELSE II = IMB + 1 END IF * ELSE * I1 = I - IMB NBLOCKS = ( I1 - 1 ) / MB + 1 PROW = PROW + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * MYDIST = MYROW - RSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN II = MB + ILOCBLK * MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( ILOCBLK - NBLOCKS + 1 ) * MB ELSE II = ILOCBLK * MB + 1 END IF END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN II = MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( 1 - NBLOCKS ) * MB ELSE II = 1 END IF END IF END IF * END IF * INB = DESC2( INB_ ) PCOL = DESC2( CSRC_ ) * * Has every process column J ? * IF( ( PCOL.EQ.-1 ).OR.( NPCOL.EQ.1 ) ) THEN * JJ = J * ELSE IF( J.LE.INB ) THEN * * J is in range of first block * IF( MYCOL.EQ.PCOL ) THEN JJ = J ELSE JJ = 1 END IF * ELSE * * J is not in first block of matrix, figure out who has it. * CSRC = PCOL NB = DESC2( NB_ ) * IF( MYCOL.EQ.CSRC ) THEN * NBLOCKS = ( J - INB - 1 ) / NB + 1 PCOL = PCOL + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPCOL ).GE.NBLOCKS ) THEN IF( MYCOL.EQ.PCOL ) THEN JJ = J + ( ILOCBLK - NBLOCKS ) * NB ELSE JJ = INB + ( ILOCBLK - 1 ) * NB + 1 END IF ELSE JJ = INB + ILOCBLK * NB + 1 END IF ELSE JJ = INB + 1 END IF * ELSE * J1 = J - INB NBLOCKS = ( J1 - 1 ) / NB + 1 PCOL = PCOL + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * MYDIST = MYCOL - CSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN JJ = NB + ILOCBLK * NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( ILOCBLK - NBLOCKS + 1 ) * NB ELSE JJ = ILOCBLK * NB + 1 END IF END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN JJ = NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( 1 - NBLOCKS ) * NB ELSE JJ = 1 END IF END IF END IF * END IF * RETURN * * End of PB_INFOG2L * END SUBROUTINE PB_AINFOG2L( M, N, I, J, DESC, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, $ PCOL, RPROW, RPCOL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, II, IMB1, INB1, J, JJ, M, MP, MYCOL, MYROW, $ N, NPCOL, NPROW, NQ, PCOL, PROW, RPCOL, RPROW * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_AINFOG2L computes the starting local row and column indexes II, * JJ corresponding to the submatrix starting globally at the entry * pointed by I, J. This routine returns the coordinates in the grid of * the process owning the matrix entry of global indexes I, J, namely * PROW and PCOL. In addition, this routine computes the quantities MP * and NQ, which are respectively the local number of rows and columns * owned by the process of coordinate MYROW, MYCOL corresponding to the * global submatrix A(I:I+M-1,J:J+N-1). Finally, the size of the first * partial block and the relative process coordinates are also returned * respectively in IMB, INB and RPROW, RPCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the global number of rows of the subma- * trix. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of columns of the * submatrix. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least one. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least one. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * IMB1 (global output) INTEGER * On exit, IMB1 specifies the number of rows of the upper left * block of the submatrix. On exit, IMB1 is less or equal than * M and greater or equal than MIN( 1, M ). * * INB1 (global output) INTEGER * On exit, INB1 specifies the number of columns of the upper * left block of the submatrix. On exit, INB1 is less or equal * than N and greater or equal than MIN( 1, N ). * * MP (local output) INTEGER * On exit, MP specifies the local number of rows of the subma- * trix, that the processes of row coordinate MYROW own. MP is * at least zero. * * NQ (local output) INTEGER * On exit, NQ specifies the local number of columns of the * submatrix, that the processes of column coordinate MYCOL * own. NQ is at least zero. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least one. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of * the submatrix. On exit, II is at least one. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC(RSRC_) is -1 on input, and, at least zero and * strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC(CSRC_) is -1 on input, and, at least zero * and strictly less than NPCOL otherwise. * * RPROW (global output) INTEGER * On exit, RPROW specifies the relative row coordinate of the * process that possesses the first row I of the submatrix. On * exit, RPROW is -1 if DESC(RSRC_) is -1 on input, and, at * least zero and strictly less than NPROW otherwise. * * RPCOL (global output) INTEGER * On exit, RPCOL specifies the relative column coordinate of * the process that possesses the first column J of the subma- * trix. On exit, RPCOL is -1 if DESC(CSRC_) is -1 on input, * and, at least zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRC, I1, ILOCBLK, J1, M1, MB, MYDIST, N1, NB, $ NBLOCKS, RSRC * .. * .. Local Arrays .. INTEGER DESC2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESC, DESC2 ) * MB = DESC2( MB_ ) IMB1 = DESC2( IMB_ ) RSRC = DESC2( RSRC_ ) * IF( ( RSRC.EQ.-1 ).OR.( NPROW.EQ.1 ) ) THEN * II = I IMB1 = IMB1 - I + 1 IF( IMB1.LE.0 ) $ IMB1 = ( ( -IMB1 ) / MB + 1 ) * MB + IMB1 IMB1 = MIN( IMB1, M ) MP = M PROW = RSRC RPROW = 0 * ELSE * * Figure out PROW, II and IMB1 first * IF( I.LE.IMB1 ) THEN * PROW = RSRC * IF( MYROW.EQ.PROW ) THEN II = I ELSE II = 1 END IF * IMB1 = IMB1 - I + 1 * ELSE * I1 = I - IMB1 - 1 NBLOCKS = I1 / MB + 1 PROW = RSRC + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * IF( MYROW.EQ.RSRC ) THEN * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPROW ).GE.NBLOCKS ) THEN IF( MYROW.EQ.PROW ) THEN II = I + ( ILOCBLK - NBLOCKS ) * MB ELSE II = IMB1 + ( ILOCBLK - 1 ) * MB + 1 END IF ELSE II = IMB1 + ILOCBLK * MB + 1 END IF ELSE II = IMB1 + 1 END IF * ELSE * MYDIST = MYROW - RSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN II = ( ILOCBLK + 1 ) * MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( ILOCBLK - NBLOCKS + 1 ) * MB + 1 ELSE II = ILOCBLK * MB + 1 END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN II = MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( 1 - NBLOCKS ) * MB + 1 ELSE II = 1 END IF END IF END IF * IMB1 = NBLOCKS * MB - I1 * END IF * * Figure out MP * IF( M.LE.IMB1 ) THEN * IF( MYROW.EQ.PROW ) THEN MP = M ELSE MP = 0 END IF * ELSE * M1 = M - IMB1 NBLOCKS = M1 / MB + 1 * IF( MYROW.EQ.PROW ) THEN ILOCBLK = NBLOCKS / NPROW IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPROW ).GT.0 ) THEN MP = IMB1 + ILOCBLK * MB ELSE MP = M + MB * ( ILOCBLK - NBLOCKS ) END IF ELSE MP = IMB1 END IF ELSE MYDIST = MYROW - PROW IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW ILOCBLK = NBLOCKS / NPROW IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN MP = ( ILOCBLK + 1 ) * MB ELSE IF( MYDIST.GT.0 ) THEN MP = ILOCBLK * MB ELSE MP = M1 + MB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN MP = MB ELSE IF( MYDIST.GT.0 ) THEN MP = 0 ELSE MP = M1 + MB * ( 1 - NBLOCKS ) END IF END IF END IF * END IF * IMB1 = MIN( IMB1, M ) RPROW = MYROW - PROW IF( RPROW.LT.0 ) $ RPROW = RPROW + NPROW * END IF * NB = DESC2( NB_ ) INB1 = DESC2( INB_ ) CSRC = DESC2( CSRC_ ) * IF( ( CSRC.EQ.-1 ).OR.( NPCOL.EQ.1 ) ) THEN * JJ = J INB1 = INB1 - I + 1 IF( INB1.LE.0 ) $ INB1 = ( ( -INB1 ) / NB + 1 ) * NB + INB1 INB1 = MIN( INB1, N ) NQ = N PCOL = CSRC RPCOL = 0 * ELSE * * Figure out PCOL, JJ and INB1 first * IF( J.LE.INB1 ) THEN * PCOL = CSRC * IF( MYCOL.EQ.PCOL ) THEN JJ = J ELSE JJ = 1 END IF * INB1 = INB1 - J + 1 * ELSE * J1 = J - INB1 - 1 NBLOCKS = J1 / NB + 1 PCOL = CSRC + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * IF( MYCOL.EQ.CSRC ) THEN * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPCOL ).GE.NBLOCKS ) THEN IF( MYCOL.EQ.PCOL ) THEN JJ = J + ( ILOCBLK - NBLOCKS ) * NB ELSE JJ = INB1 + ( ILOCBLK - 1 ) * NB + 1 END IF ELSE JJ = INB1 + ILOCBLK * NB + 1 END IF ELSE JJ = INB1 + 1 END IF * ELSE * MYDIST = MYCOL - CSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN JJ = ( ILOCBLK + 1 ) * NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( ILOCBLK - NBLOCKS + 1 ) * NB + 1 ELSE JJ = ILOCBLK * NB + 1 END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN JJ = NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( 1 - NBLOCKS ) * NB + 1 ELSE JJ = 1 END IF END IF END IF * INB1 = NBLOCKS * NB - J1 * END IF * * Figure out NQ * IF( N.LE.INB1 ) THEN * IF( MYCOL.EQ.PCOL ) THEN NQ = N ELSE NQ = 0 END IF * ELSE * N1 = N - INB1 NBLOCKS = N1 / NB + 1 * IF( MYCOL.EQ.PCOL ) THEN ILOCBLK = NBLOCKS / NPCOL IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPCOL ).GT.0 ) THEN NQ = INB1 + ILOCBLK * NB ELSE NQ = N + NB * ( ILOCBLK - NBLOCKS ) END IF ELSE NQ = INB1 END IF ELSE MYDIST = MYCOL - PCOL IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL ILOCBLK = NBLOCKS / NPCOL IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN NQ = ( ILOCBLK + 1 ) * NB ELSE IF( MYDIST.GT.0 ) THEN NQ = ILOCBLK * NB ELSE NQ = N1 + NB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN NQ = NB ELSE IF( MYDIST.GT.0 ) THEN NQ = 0 ELSE NQ = N1 + NB * ( 1 - NBLOCKS ) END IF END IF END IF * END IF * INB1 = MIN( INB1, N ) RPCOL = MYCOL - PCOL IF( RPCOL.LT.0 ) $ RPCOL = RPCOL + NPCOL * END IF * RETURN * * End of PB_AINFOG2L * END INTEGER FUNCTION PB_NUMROC( N, I, INB, NB, PROC, SRCPROC, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, INB, N, NB, NPROCS, PROC, SRCPROC * .. * * Purpose * ======= * * PB_NUMROC returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global in- * dex I. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I1, ILOCBLK, INB1, MYDIST, N1, NBLOCKS, $ SRCPROC1 * .. * .. Executable Statements .. * IF( ( SRCPROC.EQ.-1 ).OR.( NPROCS.EQ.1 ) ) THEN PB_NUMROC = N RETURN END IF * * Compute coordinate of process owning I and corresponding INB * IF( I.LE.INB ) THEN * * I is in range of first block, i.e SRCPROC owns I. * SRCPROC1 = SRCPROC INB1 = INB - I + 1 * ELSE * * I is not in first block of matrix, figure out who has it * I1 = I - 1 - INB NBLOCKS = I1 / NB + 1 SRCPROC1 = SRCPROC + NBLOCKS SRCPROC1 = SRCPROC1 - ( SRCPROC1 / NPROCS ) * NPROCS INB1 = NBLOCKS*NB - I1 * END IF * * Now everything is just like I=1. Search now who has N-1, Is N-1 * in the first block ? * IF( N.LE.INB1 ) THEN IF( PROC.EQ.SRCPROC1 ) THEN PB_NUMROC = N ELSE PB_NUMROC = 0 END IF RETURN END IF * N1 = N - INB1 NBLOCKS = N1 / NB + 1 * IF( PROC.EQ.SRCPROC1 ) THEN ILOCBLK = NBLOCKS / NPROCS IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPROCS ).GT.0 ) THEN PB_NUMROC = INB1 + ILOCBLK * NB ELSE PB_NUMROC = N + NB * ( ILOCBLK - NBLOCKS ) END IF ELSE PB_NUMROC = INB1 END IF ELSE MYDIST = PROC - SRCPROC1 IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROCS ILOCBLK = NBLOCKS / NPROCS IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROCS IF( MYDIST.LT.0 ) THEN PB_NUMROC = ( ILOCBLK + 1 ) * NB ELSE IF( MYDIST.GT.0 ) THEN PB_NUMROC = ILOCBLK * NB ELSE PB_NUMROC = N1 + NB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN PB_NUMROC = NB ELSE IF( MYDIST.GT.0 ) THEN PB_NUMROC = 0 ELSE PB_NUMROC = N1 + NB * ( 1 - NBLOCKS ) END IF END IF END IF * RETURN * * End of PB_NUMROC * END SUBROUTINE PB_BOOT() * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * * Purpose * ======= * * PB_BOOT (re)sets all timers to 0, and enables PB_TIMER. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION STARTFLAG, ZERO PARAMETER ( STARTFLAG = -5.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .FALSE. DO 10 I = 1, NTIMER CPUSEC( I ) = ZERO WALLSEC( I ) = ZERO CPUSTART( I ) = STARTFLAG WALLSTART( I ) = STARTFLAG 10 CONTINUE * RETURN * * End of PB_BOOT * END * SUBROUTINE PB_TIMER( I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I * .. * * Purpose * ======= * * PB_TIMER provides a "stopwatch" functionality cpu/wall timer in se- * conds. Up to 64 separate timers can be functioning at once. The first * call starts the timer, and the second stops it. This routine can be * disenabled, so that calls to the timer are ignored. This feature can * be used to make sure certain sections of code do not affect timings, * even if they call routines which have PB_TIMER calls in them. * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the timer to stop/start. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION STARTFLAG PARAMETER ( STARTFLAG = -5.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00 * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * * If timing disabled, return * IF( DISABLED ) $ RETURN * IF( WALLSTART( I ).EQ.STARTFLAG ) THEN * * If timer has not been started, start it * WALLSTART( I ) = DWALLTIME00() CPUSTART( I ) = DCPUTIME00() * ELSE * * Stop timer and add interval to count * CPUSEC( I ) = CPUSEC( I ) + DCPUTIME00() - CPUSTART( I ) WALLSEC( I ) = WALLSEC( I ) + DWALLTIME00() - WALLSTART( I ) WALLSTART( I ) = STARTFLAG * END IF * RETURN * * End of PB_TIMER * END * SUBROUTINE PB_ENABLE() * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * * Purpose * ======= * * PB_ENABLE sets it so calls to PB_TIMER are not ignored. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .FALSE. * RETURN * * End of PB_ENABLE * END * SUBROUTINE PB_DISABLE() * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PB_DISABLE sets it so calls to PB_TIMER are ignored. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .TRUE. * RETURN * * End of PB_DISABLE * END * DOUBLE PRECISION FUNCTION PB_INQUIRE( TMTYPE, I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TMTYPE INTEGER I * .. * * Purpose * ======= * * PB_INQUIRE returns wall or cpu time that has accumulated in timer I. * * Arguments * ========= * * TMTYPE (global input) CHARACTER * On entry, TMTYPE specifies what time will be returned as fol- * lows * = 'W': wall clock time is returned, * = 'C': CPU time is returned (default). * * I (global input) INTEGER * On entry, I specifies the timer to return. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION ERRFLAG PARAMETER ( ERRFLAG = -1.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION TIME * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00, LSAME * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * IF( LSAME( TMTYPE, 'W' ) ) THEN * * If walltime not available on this machine, return -1 flag * IF( DWALLTIME00().EQ.ERRFLAG ) THEN TIME = ERRFLAG ELSE TIME = WALLSEC( I ) END IF ELSE IF( DCPUTIME00().EQ.ERRFLAG ) THEN TIME = ERRFLAG ELSE TIME = CPUSEC( I ) END IF END IF * PB_INQUIRE = TIME * RETURN * * End of PB_INQUIRE * END * SUBROUTINE PB_COMBINE( ICTXT, SCOPE, OP, TMTYPE, N, IBEG, $ TIMES ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 OP, SCOPE, TMTYPE INTEGER IBEG, ICTXT, N * .. * .. Array Arguments .. DOUBLE PRECISION TIMES( N ) * .. * * Purpose * ======= * * PB_COMBINE returns wall or cpu time that has accumulated in timer I. * * Arguments * ========= * * TMTYPE (global input) CHARACTER * On entry, TMTYPE specifies what time will be returned as fol- * lows * = 'W': wall clock time is returned, * = 'C': CPU time is returned (default). * * I (global input) INTEGER * On entry, I specifies the timer to return. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION ERRFLAG PARAMETER ( ERRFLAG = -1.0D+0 ) * .. * .. Local Scalars .. CHARACTER*1 TOP LOGICAL TMPDIS INTEGER I * .. * .. External Subroutines .. EXTERNAL DGAMX2D, DGAMN2D, DGSUM2D, PB_TOPGET * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00, LSAME * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * * Disable timer for combine operation * TMPDIS = DISABLED DISABLED = .TRUE. * * Copy timer information into user's times array * IF( LSAME( TMTYPE, 'W' ) ) THEN * * If walltime not available on this machine, fill in times * with -1 flag, and return * IF( DWALLTIME00().EQ.ERRFLAG ) THEN DO 10 I = 1, N TIMES( I ) = ERRFLAG 10 CONTINUE RETURN ELSE DO 20 I = 1, N TIMES( I ) = WALLSEC( IBEG + I - 1 ) 20 CONTINUE END IF ELSE IF( DCPUTIME00().EQ.ERRFLAG ) THEN DO 30 I = 1, N TIMES( I ) = ERRFLAG 30 CONTINUE RETURN ELSE DO 40 I = 1, N TIMES( I ) = CPUSEC( IBEG + I - 1 ) 40 CONTINUE END IF ENDIF * * Combine all nodes' information, restore disabled, and return * IF( OP.EQ.'>' ) THEN CALL PB_TOPGET( ICTXT, 'Combine', SCOPE, TOP ) CALL DGAMX2D( ICTXT, SCOPE, TOP, N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) ELSE IF( OP.EQ.'<' ) THEN CALL PB_TOPGET( ICTXT, 'Combine', SCOPE, TOP ) CALL DGAMN2D( ICTXT, SCOPE, TOP, N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) ELSE IF( OP.EQ.'+' ) THEN CALL PB_TOPGET( ICTXT, 'Combine', SCOPE, TOP ) CALL DGSUM2D( ICTXT, SCOPE, TOP, N, 1, TIMES, N, -1, 0 ) ELSE CALL PB_TOPGET( ICTXT, 'Combine', SCOPE, TOP ) CALL DGAMX2D( ICTXT, SCOPE, TOP, N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) END IF * DISABLED = TMPDIS * RETURN * * End of PB_COMBINE * END SUBROUTINE PB_CHKMAT( ICTXT, M, MPOS0, N, NPOS0, IA, JA, DESCA, $ DPOS0, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER DPOS0, IA, ICTXT, INFO, JA, M, MPOS0, N, NPOS0 * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PB_CHKMAT checks the validity of a descriptor vector DESCA, the re- * lated global indexes IA, JA from a local view point. If an inconsis- * tency is found among its parameters IA, JA and DESCA, the routine re- * turns an error code in INFO. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (global input) INTEGER * On entry, M specifies the number of rows the submatrix * sub( A ). * * MPOS0 (global input) INTEGER * On entry, MPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter M appears. * * N (global input) INTEGER * On entry, N specifies the number of columns the submatrix * sub( A ). * * NPOS0 (global input) INTEGER * On entry, NPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter N appears. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * DPOS0 (global input) INTEGER * On entry, DPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter DESCA ap- * pears. Note that it is assumed that IA and JA are respecti- * vely 2 and 1 entries behind DESCA. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had an * illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) * .. * .. Local Scalars .. INTEGER DPOS, IAPOS, JAPOS, MP, MPOS, MYCOL, MYROW, $ NPCOL, NPOS, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the des- * criptor multiplier * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Figure where in parameter list each parameter was, factoring in * descriptor multiplier * MPOS = MPOS0 * DESCMULT NPOS = NPOS0 * DESCMULT IAPOS = ( DPOS0 - 2 ) * DESCMULT JAPOS = ( DPOS0 - 1 ) * DESCMULT DPOS = DPOS0 * DESCMULT * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check that matrix values make sense from local viewpoint * IF( M.LT.0 ) $ INFO = MIN( INFO, MPOS ) IF( N.LT.0 ) $ INFO = MIN( INFO, NPOS ) IF( IA.LT.1 ) $ INFO = MIN( INFO, IAPOS ) IF( JA.LT.1 ) $ INFO = MIN( INFO, JAPOS ) IF( DESCA2( DTYPE_ ).NE.BLOCK_CYCLIC_2D_INB ) $ INFO = MIN( INFO, DPOS + DTYPE_ ) IF( DESCA2( IMB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + IMB_ ) IF( DESCA2( INB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + INB_ ) IF( DESCA2( MB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + MB_ ) IF( DESCA2( NB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + NB_ ) IF( DESCA2( RSRC_ ).LT.-1 .OR. DESCA2( RSRC_ ).GE.NPROW ) $ INFO = MIN( INFO, DPOS + RSRC_ ) IF( DESCA2( CSRC_ ).LT.-1 .OR. DESCA2( CSRC_ ).GE.NPCOL ) $ INFO = MIN( INFO, DPOS + CSRC_ ) IF( DESCA2( CTXT_ ).NE.ICTXT ) $ INFO = MIN( INFO, DPOS + CTXT_ ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN * * NULL matrix, relax some checks * IF( DESCA2( M_ ).LT.0 ) $ INFO = MIN( INFO, DPOS + M_ ) IF( DESCA2( N_ ).LT.0 ) $ INFO = MIN( INFO, DPOS + N_ ) IF( DESCA2( LLD_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + LLD_ ) * ELSE * * more rigorous checks for non-degenerate matrices * MP = PB_NUMROC( DESCA2( M_ ), 1, DESCA2( IMB_ ), DESCA2( MB_ ), $ MYROW, DESCA2( RSRC_ ), NPROW ) * IF( DESCA2( M_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + M_ ) IF( DESCA2( N_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + N_ ) IF( IA.GT.DESCA2( M_ ) ) $ INFO = MIN( INFO, IAPOS ) IF( JA.GT.DESCA2( N_ ) ) $ INFO = MIN( INFO, JAPOS ) IF( IA+M-1.GT.DESCA2( M_ ) ) $ INFO = MIN( INFO, MPOS ) IF( JA+N-1.GT.DESCA2( N_ ) ) $ INFO = MIN( INFO, NPOS ) * IF( DESCA2( LLD_ ).LT.MAX( 1, MP ) ) THEN NQ = PB_NUMROC( DESCA2( N_ ), 1, DESCA2( INB_ ), $ DESCA2( NB_ ), MYCOL, DESCA2( CSRC_ ), $ NPCOL ) IF( DESCA2( LLD_ ).LT.1 ) THEN INFO = MIN( INFO, DPOS + LLD_ ) ELSE IF( NQ.GT.0 ) THEN INFO = MIN( INFO, DPOS + LLD_ ) END IF END IF * END IF * * Prepare output: set info = 0 if no error, and divide by * DESCMULT if error is not in a descriptor entry * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -( INFO / DESCMULT ) ELSE INFO = -INFO END IF * RETURN * * End of PB_CHKMAT * END SUBROUTINE PB_DESCTRANS( DESCIN, DESCOUT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER DESCIN( * ), DESCOUT( * ) * .. * * Purpose * ======= * * PB_DESCTRANS converts a descriptor DESCIN of type BLOCK_CYCLIC_2D * or BLOCK_CYCLIC_INB_2D into a descriptor DESCOUT of type * BLOCK_CYCLIC_INB_2D. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ----------------------------------- * DTYPE_A (global) DESCA( DTYPE1_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT1_ ) The BLACS context handle indicating * the NPROW x NPCOL BLACS process * grid A is distributed over. The * context itself is global, but the * handle (the integer value) may * vary. * M_A (global) DESCA( M1_ ) The number of rows in the distri- * buted matrix A, M_A >= 0. * N_A (global) DESCA( N1_ ) The number of columns in the dis- * tributed matrix A, N_A >= 0. * MB_A (global) DESCA( MB1_ ) The blocking factor used to distri- * bute the rows of A, MB_A > 0. * NB_A (global) DESCA( NB1_ ) The blocking factor used to distri- * bute the columns of A, NB_A > 0. * RSRC_A (global) DESCA( RSRC1_ ) The process row over which the * first row of the matrix A is dis- * tributed, NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC1_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD1_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, MB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, NB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESCIN (global and local input) INTEGER array * On entry, DESCIN is an array of dimension DLEN1_ or DLEN_ as * specified by its first entry DESCIN( DTYPE_ ). DESCIN is the * source array descriptor of type BLOCK_CYCLIC_2D or of type * BLOCK_CYCLIC_2D_INB. * * DESCOUT (global and local output) INTEGER array * On entry, DESCOUT is an array of dimension DLEN_. DESCOUT is * the target array descriptor of type BLOCK_CYCLIC_2D_INB. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC1_, CTXT1_, DLEN1_, $ DTYPE1_, LLD1_, M1_, MB1_, N1_, NB1_, RSRC1_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN1_ = 9, DTYPE1_ = 1, $ CTXT1_ = 2, M1_ = 3, N1_ = 4, MB1_ = 5, $ NB1_ = 6, RSRC1_ = 7, CSRC1_ = 8, LLD1_ = 9 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I * .. * .. Executable Statements .. * IF( DESCIN( DTYPE_ ).EQ.BLOCK_CYCLIC_2D ) THEN DESCOUT( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESCOUT( CTXT_ ) = DESCIN( CTXT1_ ) DESCOUT( M_ ) = DESCIN( M1_ ) DESCOUT( N_ ) = DESCIN( N1_ ) DESCOUT( IMB_ ) = DESCIN( MB1_ ) DESCOUT( INB_ ) = DESCIN( NB1_ ) DESCOUT( MB_ ) = DESCIN( MB1_ ) DESCOUT( NB_ ) = DESCIN( NB1_ ) DESCOUT( RSRC_ ) = DESCIN( RSRC1_ ) DESCOUT( CSRC_ ) = DESCIN( CSRC1_ ) DESCOUT( LLD_ ) = DESCIN( LLD1_ ) ELSE IF( DESCIN( DTYPE_ ).EQ.BLOCK_CYCLIC_2D_INB ) THEN DO 10 I = 1, DLEN_ DESCOUT( I ) = DESCIN( I ) 10 CONTINUE ELSE DESCOUT( DTYPE_ ) = DESCIN( 1 ) DESCOUT( CTXT_ ) = DESCIN( 2 ) DESCOUT( M_ ) = 0 DESCOUT( N_ ) = 0 DESCOUT( IMB_ ) = 1 DESCOUT( INB_ ) = 1 DESCOUT( MB_ ) = 1 DESCOUT( NB_ ) = 1 DESCOUT( RSRC_ ) = 0 DESCOUT( CSRC_ ) = 0 DESCOUT( LLD_ ) = 1 END IF * RETURN * * End of PB_DESCTRANS * END SUBROUTINE PB_DESCSET2( DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, $ CTXT, LLD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CSRC, CTXT, IMB, INB, LLD, M, MB, N, NB, RSRC * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_DESCSET2 uses its 10 input arguments M, N, IMB, INB, MB, NB, * RSRC, CSRC, CTXT and LLD to initialize a descriptor vector of type * BLOCK_CYCLIC_2D_INB. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ----------------------------------- * DTYPE_A (global) DESCA( DTYPE1_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT1_ ) The BLACS context handle indicating * the NPROW x NPCOL BLACS process * grid A is distributed over. The * context itself is global, but the * handle (the integer value) may * vary. * M_A (global) DESCA( M1_ ) The number of rows in the distri- * buted matrix A, M_A >= 0. * N_A (global) DESCA( N1_ ) The number of columns in the dis- * tributed matrix A, N_A >= 0. * MB_A (global) DESCA( MB1_ ) The blocking factor used to distri- * bute the rows of A, MB_A > 0. * NB_A (global) DESCA( NB1_ ) The blocking factor used to distri- * bute the columns of A, NB_A > 0. * RSRC_A (global) DESCA( RSRC1_ ) The process row over which the * first row of the matrix A is dis- * tributed, NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC1_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD1_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, MB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, NB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESC (global and local output) INTEGER array * On entry, DESC is an array of dimension DLEN_. DESC is the * array descriptor to be set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix. * N must be at least zero. * * IMB (global input) INTEGER * On entry, IMB specifies the row size of the first block of * the global matrix distribution. IMB must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the column size of the first block * of the global matrix distribution. INB must be at least one. * * MB (global input) INTEGER * On entry, MB specifies the row size of the blocks used to * partition the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the column size of the blocks used to * partition the matrix. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the data is not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When * CSRC = -1, the data is not distributed but replicated, other- * wise CSRC must be at least zero and strictly less than NPCOL. * * CTXT (local input) INTEGER * On entry, CTXT specifies the BLACS context handle, indicating * the global communication context. The value of the context * itself is local. * * LLD (local input) INTEGER * On entry, LLD specifies the leading dimension of the local * array storing the local entries of the matrix. LLD must be at * least MAX( 1, Lr(1,M) ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Executable Statements .. * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESC( CTXT_ ) = CTXT DESC( M_ ) = M DESC( N_ ) = N DESC( IMB_ ) = IMB DESC( INB_ ) = INB DESC( MB_ ) = MB DESC( NB_ ) = NB DESC( RSRC_ ) = RSRC DESC( CSRC_ ) = CSRC DESC( LLD_ ) = LLD * RETURN * * End of PB_DESCSET2 * END SUBROUTINE PB_DESCINIT2( DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, $ CTXT, LLD, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CSRC, CTXT, IMB, INB, INFO, LLD, M, MB, N, NB, $ RSRC * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_DESCINIT2 uses its 10 input arguments M, N, IMB, INB, MB, NB, * RSRC, CSRC, CTXT and LLD to initialize a descriptor vector of type * BLOCK_CYCLIC_2D_INB. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESC (global and local output) INTEGER array * On entry, DESC is an array of dimension DLEN_. DESC is the * array descriptor to be set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix. * N must be at least zero. * * IMB (global input) INTEGER * On entry, IMB specifies the row size of the first block of * the global matrix distribution. IMB must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the column size of the first block * of the global matrix distribution. INB must be at least one. * * MB (global input) INTEGER * On entry, MB specifies the row size of the blocks used to * partition the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the column size of the blocks used to * partition the matrix. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the data is not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When * CSRC = -1, the data is not distributed but replicated, other- * wise CSRC must be at least zero and strictly less than NPCOL. * * CTXT (local input) INTEGER * On entry, CTXT specifies the BLACS context handle, indicating * the global communication context. The value of the context * itself is local. * * LLD (local input) INTEGER * On entry, LLD specifies the leading dimension of the local * array storing the local entries of the matrix. LLD must be at * least MAX( 1, Lr(1,M) ). * * INFO (local output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value. * * Notes * ===== * * If the routine can recover from an erroneous input argument, it will * return an acceptable descriptor vector. For example, if LLD = 0 on * input, DESC( LLD_ ) will contain the smallest leading dimension re- * quired to store the specified m by n matrix, INFO will however be set * to -11 on exit in that case. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDMIN, MP, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( CTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IMB.LT.1 ) THEN INFO = -4 ELSE IF( INB.LT.1 ) THEN INFO = -5 ELSE IF( MB.LT.1 ) THEN INFO = -6 ELSE IF( NB.LT.1 ) THEN INFO = -7 ELSE IF( RSRC.LT.-1 .OR. RSRC.GE.NPROW ) THEN INFO = -8 ELSE IF( CSRC.LT.-1 .OR. CSRC.GE.NPCOL ) THEN INFO = -9 ELSE IF( NPROW.EQ.-1 ) THEN INFO = -10 END IF * * Compute minimum LLD if safe (to avoid division by 0) * IF( INFO.EQ.0 ) THEN MP = PB_NUMROC( M, 1, IMB, MB, MYROW, RSRC, NPROW ) IF( PB_NUMROC( N, 1, INB, NB, MYCOL, CSRC, NPCOL ).GT.0 ) THEN LLDMIN = MAX( 1, MP ) ELSE LLDMIN = 1 END IF IF( LLD.LT.LLDMIN ) $ INFO = -11 END IF * IF( INFO.NE.0 ) $ CALL PXERBLA( CTXT, 'PB_DESCINIT2', -INFO ) * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESC( CTXT_ ) = CTXT DESC( M_ ) = MAX( 0, M ) DESC( N_ ) = MAX( 0, N ) DESC( IMB_ ) = MAX( 1, IMB ) DESC( INB_ ) = MAX( 1, INB ) DESC( MB_ ) = MAX( 1, MB ) DESC( NB_ ) = MAX( 1, NB ) DESC( RSRC_ ) = MAX( -1, MIN( RSRC, NPROW-1 ) ) DESC( CSRC_ ) = MAX( -1, MIN( CSRC, NPCOL-1 ) ) DESC( LLD_ ) = MAX( LLD, LLDMIN ) * RETURN * * End of PB_DESCINIT2 * END SUBROUTINE PB_BINFO( OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ILOW, IMB1, IMBLOC, INB1, INBLOC, IUPP, LCMT00, $ LMBLOC, LNBLOC, LOW, M, MB, MBLKS, MRCOL, $ MRROW, N, NB, NBLKS, OFFD, UPP * .. * * Purpose * ======= * * PB_BINFO initializes the local information of an m by n local array * owned by the process of relative coordinates ( MRROW, MRCOL ). Note * that if m or n is less or equal than zero, there is no data, in which * case this process does not need the local information computed by * this routine to proceed. * * Arguments * ========= * * OFFD (global input) INTEGER * On entry, OFFD specifies the off-diagonal of the underlying * matrix of interest as follows: * OFFD = 0 specifies the main diagonal, * OFFD > 0 specifies lower subdiagonals, and * OFFD < 0 specifies upper superdiagonals. * * M (local input) INTEGER * On entry, M specifies the local number of rows of the under- * lying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the local number of columns of the un- * derlying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). N must be at least zero. * * IMB1 (global input) INTEGER * On input, IMB1 specifies the global true size of the first * block of rows of the underlying global submatrix. IMB1 must * be at least MIN( 1, M ). * * INB1 (global input) INTEGER * On input, INB1 specifies the global true size of the first * block of columns of the underlying global submatrix. INB1 * must be at least MIN( 1, N ). * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * MRROW (local input) INTEGER * On entry, MRROW specifies the relative row coordinate of the * process that possesses these M rows. MRROW must be least zero * and strictly less than NPROW. * * MRCOL (local input) INTEGER * On entry, MRCOL specifies the relative column coordinate of * the process that possesses these N columns. MRCOL must be * least zero and strictly less than NPCOL. * * LCMT00 (local output) INTEGER * On exit, LCMT00 is the LCM value of the left upper block of * this m by n local block owned by the process of relative co- * ordinates ( MRROW, MRCOL ). * * MBLKS (local output) INTEGER * On exit, MBLKS specifies the local number of blocks of rows * corresponding to M. MBLKS must be at least zero. * * NBLKS (local output) INTEGER * On exit, NBLKS specifies the local number of blocks of co- * lumns corresponding to N. NBLKS must be at least zero. * * IMBLOC (local output) INTEGER * On exit, IMBLOC specifies the number of rows (size) of the * uppest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). IMBLOC is at least * MIN( 1, M ). * * INBLOC (local output) INTEGER * On exit, INBLOC specifies the number of columns (size) of * the leftmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). INBLOC is * at least MIN( 1, N ). * * LMBLOC (local output) INTEGER * On exit, LMBLOC specifies the number of rows (size) of the * lowest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). LMBLOC is at least * MIN( 1, M ). * * LNBLOC (local output) INTEGER * On exit, LNBLOC specifies the number of columns (size) of the * rightmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). LNBLOC is * at least MIN( 1, N ). * * ILOW (local output) INTEGER * On exit, ILOW is the lower bound characterizing the first co- * lumn block owning offdiagonals of this m by n array. ILOW * must be less or equal than zero. * * LOW (global output) INTEGER * On exit, LOW is the lower bound characterizing the column * blocks with te exception of the first one (see ILOW) owning * offdiagonals of this m by n array. LOW must be less or equal * than zero. * * IUPP (local output) INTEGER * On exit, IUPP is the upper bound characterizing the first row * block owning offdiagonals of this m by n array. IUPP must be * greater or equal than zero. * * UPP (global output) INTEGER * On exit, UPP is the upper bound characterizing the row * blocks with te exception of the first one (see IUPP) owning * offdiagonals of this m by n array. UPP must be greater or * equal than zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER TMP1 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Initialize LOW, ILOW, UPP, IUPP, LMBLOC, LNBLOC, IMBLOC, INBLOC, * MBLKS, NBLKS and LCMT00. * LOW = 1 - NB UPP = MB - 1 * LCMT00 = OFFD * IF( M.LE.0 .OR. N.LE.0 ) THEN * IF( MRROW.GT.0 ) THEN IUPP = MB - 1 ELSE IUPP = MAX( 0, IMB1 - 1 ) END IF IMBLOC = 0 MBLKS = 0 LMBLOC = 0 * IF( MRCOL.GT.0 ) THEN ILOW = 1 - NB ELSE ILOW = MIN( 0, 1 - INB1 ) END IF INBLOC = 0 NBLKS = 0 LNBLOC = 0 * LCMT00 = LCMT00 + ( LOW - ILOW + MRCOL * NB ) - $ ( IUPP - UPP + MRROW * MB ) * RETURN * END IF * IF( MRROW.GT.0 ) THEN * IMBLOC = MIN( M, MB ) IUPP = MB - 1 LCMT00 = LCMT00 - ( IMB1 - MB + MRROW * MB ) MBLKS = ( M - 1 ) / MB + 1 LMBLOC = M - ( M / MB ) * MB IF( LMBLOC.EQ.0 ) $ LMBLOC = MB * IF( MRCOL.GT.0 ) THEN * INBLOC = MIN( N, NB ) ILOW = 1 - NB LCMT00 = LCMT00 + INB1 - NB + MRCOL * NB NBLKS = ( N - 1 ) / NB + 1 LNBLOC = N - ( N / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * INBLOC = INB1 ILOW = 1 - INB1 TMP1 = N - INB1 IF( TMP1.GT.0 ) THEN * * more than one block * NBLKS = ( TMP1 - 1 ) / NB + 2 LNBLOC = TMP1 - ( TMP1 / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * NBLKS = 1 LNBLOC = INB1 * END IF * END IF * ELSE * IMBLOC = IMB1 IUPP = IMB1 - 1 TMP1 = M - IMB1 IF( TMP1.GT.0 ) THEN * * more than one block * MBLKS = ( TMP1 - 1 ) / MB + 2 LMBLOC = TMP1 - ( TMP1 / MB ) * MB IF( LMBLOC.EQ.0 ) $ LMBLOC = MB * ELSE * MBLKS = 1 LMBLOC = IMB1 * END IF * IF( MRCOL.GT.0 ) THEN * INBLOC = MIN( N, NB ) ILOW = 1 - NB LCMT00 = LCMT00 + INB1 - NB + MRCOL * NB NBLKS = ( N - 1 ) / NB + 1 LNBLOC = N - ( N / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * INBLOC = INB1 ILOW = 1 - INB1 TMP1 = N - INB1 IF( TMP1.GT.0 ) THEN * * more than one block * NBLKS = ( TMP1 - 1 ) / NB + 2 LNBLOC = TMP1 - ( TMP1 / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * NBLKS = 1 LNBLOC = INB1 * END IF * END IF * END IF * RETURN * * End of PB_BINFO * END INTEGER FUNCTION PILAENV( ICTXT, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT CHARACTER*1 PREC * .. * * Purpose * ======= * * PILAENV returns the logical computational block size to be used by * the PBLAS routines during testing and timing. This is a special ver- * sion to be used only as part of the testing or timing PBLAS programs * for testing different values of logical computational block sizes for * the PBLAS routines. It is called by the PBLAS routines to retrieve a * logical computational block size value. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * PREC (dummy input) CHARACTER*1 * On entry, PREC is a dummy argument. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER INFO, NBLOG COMMON /INFOC/INFO, NBLOG * .. * .. Executable Statements .. * PILAENV = NBLOG * RETURN * * End of PILAENV * END SUBROUTINE PB_LOCINFO( I, INB, NB, MYROC, SRCPROC, NPROCS, $ ILOCBLK, ILOCOFF, MYDIST ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, ILOCBLK, ILOCOFF, INB, MYDIST, MYROC, NB, $ NPROCS, SRCPROC * .. * * Purpose * ======= * * PB_LOCINFO computes local information about the beginning of a sub- * matrix starting at the global index I. * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting index in the ma- * trix. I must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of rows * or columns of the matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks of rows or co- * lumns of the matrix is partitioned into. NB must be at least * one. * * MYROC (local input) INTEGER * On entry, MYROC is the coordinate of the process whose local * information is determined. MYROC is at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the submatrix is distributed. NPROCS * must be at least one. * * ILOCBLK (local output) INTEGER * On exit, ILOCBLK specifies the local row or column block * coordinate corresponding to the row or column I of the ma- * trix. ILOCBLK must be at least zero. * * ILOCOFF (local output) INTEGER * On exit, ILOCOFF specifies the local row offset in the block * of local coordinate ILOCBLK corresponding to the row or co- * lumn I of the matrix. ILOCOFF must at least zero. * * MYDIST (local output) INTEGER * On exit, MYDIST specifies the relative process coordinate of * the process specified by MYROC to the process owning the row * or column I. MYDIST is at least zero and strictly less than * NPROCS. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER ITMP, NBLOCKS, PROC * .. * .. Executable Statements .. * ILOCOFF = 0 * IF( SRCPROC.LT.0 ) THEN * MYDIST = 0 * IF( I.LE.INB ) THEN * ILOCBLK = 0 ILOCOFF = I - 1 * ELSE * ITMP = I - INB NBLOCKS = ( ITMP - 1 ) / NB + 1 ILOCBLK = NBLOCKS ILOCOFF = ITMP - 1 - ( NBLOCKS - 1 ) * NB * END IF * ELSE * PROC = SRCPROC MYDIST = MYROC - PROC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROCS * IF( I.LE.INB ) THEN * ILOCBLK = 0 IF( MYROC.EQ.PROC ) $ ILOCOFF = I - 1 * ELSE * ITMP = I - INB NBLOCKS = ( ITMP - 1 ) / NB + 1 PROC = PROC + NBLOCKS PROC = PROC - ( PROC / NPROCS ) * NPROCS ILOCBLK = NBLOCKS / NPROCS * IF( ( ILOCBLK*NPROCS ).LT.( MYDIST-NBLOCKS ) ) $ ILOCBLK = ILOCBLK + 1 * IF( MYROC.EQ.PROC ) $ ILOCOFF = ITMP - 1 - ( NBLOCKS - 1 ) * NB * END IF * END IF * RETURN * * End of PB_LOCINFO * END SUBROUTINE PB_INITJMP( COLMAJ, NVIR, IMBVIR, INBVIR, IMBLOC, $ INBLOC, MB, NB, RSRC, CSRC, NPROW, NPCOL, $ STRIDE, JMP ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL COLMAJ INTEGER CSRC, IMBLOC, IMBVIR, INBLOC, INBVIR, MB, NB, $ NPCOL, NPROW, NVIR, RSRC, STRIDE * .. * .. Array Arguments .. INTEGER JMP( * ) * .. * * Purpose * ======= * * PB_INITJMP initializes the jump values JMP used by the random matrix * generator. * * Arguments * ========= * * COLMAJ (global input) LOGICAL * On entry, COLMAJ specifies the ordering of the random sequen- * ce. When COLMAJ is .TRUE., the random sequence will be used * for a column major ordering, and otherwise a row-major orde- * ring. This impacts on the computation of the jump values. * * NVIR (global input) INTEGER * On entry, NVIR specifies the size of the underlying virtual * matrix. NVIR must be at least zero. * * IMBVIR (local input) INTEGER * On entry, IMBVIR specifies the number of virtual rows of the * upper left block of the underlying virtual submatrix. IMBVIR * must be at least IMBLOC. * * INBVIR (local input) INTEGER * On entry, INBVIR specifies the number of virtual columns of * the upper left block of the underlying virtual submatrix. * INBVIR must be at least INBLOC. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the size of the blocks used to parti- * tion the matrix rows. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix columns. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the rows are not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When CSRC * is equal to -1, the columns are not distributed but replica- * ted, otherwise CSRC must be at least zero and strictly less * than NPCOL. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process co- * lumns over which the matrix is distributed. NPCOL must be at * least one. * * STRIDE (global input) INTEGER * On entry, STRIDE specifies the number of random numbers to be * generated to compute one matrix entry. In the real case, * STRIDE is usually 1, where as in the complex case STRIDE is * usually 2 in order to generate the real and imaginary parts. * * JMP (local output) INTEGER array * On entry, JMP is an array of dimension JMP_LEN. On exit, this * array contains the different jump values used by the random * matrix generator. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER NPMB, NQNB * .. * .. Executable Statements .. * IF( RSRC.LT.0 ) THEN NPMB = MB ELSE NPMB = NPROW * MB END IF IF( CSRC.LT.0 ) THEN NQNB = NB ELSE NQNB = NPCOL * NB END IF * JMP( JMP_1 ) = 1 * JMP( JMP_MB ) = MB JMP( JMP_IMBV ) = IMBVIR JMP( JMP_NPMB ) = NPMB JMP( JMP_NPIMBLOC ) = IMBLOC + NPMB - MB * JMP( JMP_NB ) = NB JMP( JMP_INBV ) = INBVIR JMP( JMP_NQNB ) = NQNB JMP( JMP_NQINBLOC ) = INBLOC + NQNB - NB * IF( COLMAJ ) THEN JMP( JMP_ROW ) = STRIDE JMP( JMP_COL ) = STRIDE * NVIR ELSE JMP( JMP_ROW ) = STRIDE * NVIR JMP( JMP_COL ) = STRIDE END IF * RETURN * * End of PB_INITJMP * END SUBROUTINE PB_INITMULADD( MULADD0, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IMULADD( 4, * ), JMP( * ), MULADD0( * ) * .. * * Purpose * ======= * * PB_INITMULADD initializes the constants a's and c's corresponding to * the jump values (JMP) used by the matrix generator. * * Arguments * ========= * * MULADD0 (local input) INTEGER array * On entry, MULADD0 is an array of dimension 4 containing the * encoded initial constants a and c to jump from X( n ) to * X( n+1 ) = a*X( n ) + c in the random sequence. MULADD0(1:2) * contains respectively the 16-lower and 16-higher bits of the * constant a, and MULADD0(3:4) contains the 16-lower and * 16-higher bits of the constant c. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the matrix generator. * * IMULADD (local output) INTEGER array * On entry, IMULADD is an array of dimension ( 4, JMP_LEN ). On * exit, the jth column of this array contains the encoded ini- * tial constants a_j and c_j to jump from X( n ) to X(n+JMP(j)) * (= a_j*X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * * .. Local Arrays .. INTEGER ITMP1( 2 ), ITMP2( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMP * .. * .. Executable Statements .. * ITMP2( 1 ) = 100 ITMP2( 2 ) = 0 * * Compute IMULADD for all JMP values * CALL PB_JUMP( JMP( JMP_1 ), MULADD0, ITMP2, ITMP1, $ IMULADD( 1, JMP_1 ) ) * CALL PB_JUMP( JMP( JMP_ROW ), MULADD0, ITMP1, ITMP2, $ IMULADD( 1, JMP_ROW ) ) CALL PB_JUMP( JMP( JMP_COL ), MULADD0, ITMP1, ITMP2, $ IMULADD( 1, JMP_COL ) ) * * Compute constants a and c to jump JMP( * ) numbers in the * sequence for column- or row-major ordering of the sequence. * CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_IMBV ) ) CALL PB_JUMP( JMP( JMP_MB ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_MB ) ) CALL PB_JUMP( JMP( JMP_NPMB ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NPMB ) ) CALL PB_JUMP( JMP( JMP_NPIMBLOC ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NPIMBLOC ) ) * CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_INBV ) ) CALL PB_JUMP( JMP( JMP_NB ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NB ) ) CALL PB_JUMP( JMP( JMP_NQNB ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NQNB ) ) CALL PB_JUMP( JMP( JMP_NQINBLOC ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NQINBLOC ) ) * RETURN * * End of PB_INITMULADD * END SUBROUTINE PB_SETLOCRAN( SEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ILOCBLK, ILOCOFF, JLOCBLK, JLOCOFF, MYCDIST, $ MYRDIST, NPCOL, NPROW, SEED * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) * .. * * Purpose * ======= * * PB_SETLOCRAN locally initializes the random number generator. * * Arguments * ========= * * SEED (global input) INTEGER * On entry, SEED specifies a positive integer used to initiali- * ze the first number in the random sequence used by the matrix * generator. SEED must be at least zero. * * ILOCBLK (local input) INTEGER * On entry, ILOCBLK specifies the local row block coordinate * corresponding to the first row of the submatrix of interest. * ILOCBLK must be at least zero. * * ILOCOFF (local input) INTEGER * On entry, ILOCOFF specifies the local row offset in the block * of local coordinate ILOCBLK corresponding to the first row of * the submatrix of interest. ILOCOFF must at least zero. * * JLOCBLK (local input) INTEGER * On entry, JLOCBLK specifies the local column block coordinate * corresponding to the first column of the submatrix of inte- * rest. JLOCBLK must be at least zero. * * JLOCOFF (local input) INTEGER * On entry, JLOCOFF specifies the local column offset in the * block of local coordinate JLOCBLK corresponding to the first * column of the submatrix of interest. JLOCOFF must be at least * zero. * * MYRDIST (local input) INTEGER * On entry, MYRDIST specifies the relative row process coordi- * nate to the process owning the first row of the submatrix of * interest. MYRDIST must be at least zero and stricly less than * NPROW (see the subroutine PB_LOCINFO). * * MYCDIST (local input) INTEGER * On entry, MYCDIST specifies the relative column process coor- * dinate to the process owning the first column of the subma- * trix of interest. MYCDIST must be at least zero and stricly * less than NPCOL (see the subroutine PB_LOCINFO). * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process co- * lumns over which the matrix is distributed. NPCOL must be at * least one. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * IRAN (local output) INTEGER array * On entry, IRAN is an array of dimension 2. On exit, IRAN con- * tains respectively the 16-lower and 32-higher bits of the en- * coding of the entry of the random sequence corresponding lo- * cally to the first local array entry to generate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Arrays .. INTEGER IMULADDTMP( 4 ), ITMP( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMP, PB_SETRAN * .. * .. Executable Statements .. * * Compute and set the value of IRAN corresponding to A( IA, JA ) * ITMP( 1 ) = SEED ITMP( 2 ) = 0 * CALL PB_JUMP( JMP( JMP_1 ), IMULADD( 1, JMP_1 ), ITMP, IRAN, $ IMULADDTMP ) * * Jump ILOCBLK blocks of rows + ILOCOFF rows * CALL PB_JUMP( ILOCOFF, IMULADD( 1, JMP_ROW ), IRAN, ITMP, $ IMULADDTMP ) IF( MYRDIST.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( MYRDIST - 1, IMULADD( 1, JMP_MB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( ILOCBLK, IMULADD( 1, JMP_NPMB ), ITMP, $ IRAN, IMULADDTMP ) ELSE IF( ILOCBLK.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( NPROW - 1, IMULADD( 1, JMP_MB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( ILOCBLK - 1, IMULADD( 1, JMP_NPMB ), ITMP, $ IRAN, IMULADDTMP ) ELSE CALL PB_JUMP( 0, IMULADD( 1, JMP_1 ), ITMP, $ IRAN, IMULADDTMP ) END IF END IF * * Jump JLOCBLK blocks of columns + JLOCOFF columns * CALL PB_JUMP( JLOCOFF, IMULADD( 1, JMP_COL ), IRAN, ITMP, $ IMULADDTMP ) IF( MYCDIST.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( MYCDIST - 1, IMULADD( 1, JMP_NB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( JLOCBLK, IMULADD( 1, JMP_NQNB ), ITMP, $ IRAN, IMULADDTMP ) ELSE IF( JLOCBLK.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( NPCOL - 1, IMULADD( 1, JMP_NB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( JLOCBLK - 1, IMULADD( 1, JMP_NQNB ), ITMP, $ IRAN, IMULADDTMP ) ELSE CALL PB_JUMP( 0, IMULADD( 1, JMP_1 ), ITMP, $ IRAN, IMULADDTMP ) END IF END IF * CALL PB_SETRAN( IRAN, IMULADD( 1, JMP_1 ) ) * RETURN * * End of PB_SETLOCRAN * END SUBROUTINE PB_LADD( J, K, I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER I( 2 ), J( 2 ), K( 2 ) * .. * * Purpose * ======= * * PB_LADD adds without carry two long positive integers K and J and put * the result into I. The long integers I, J, K are encoded on 31 bits * using an array of 2 integers. The 16-lower bits are stored in the * first entry of each array, the 15-higher bits in the second entry. * For efficiency purposes, the intrisic modulo function is inlined. * * Arguments * ========= * * J (local input) INTEGER array * On entry, J is an array of dimension 2 containing the encoded * long integer J. * * K (local input) INTEGER array * On entry, K is an array of dimension 2 containing the encoded * long integer K. * * I (local output) INTEGER array * On entry, I is an array of dimension 2. On exit, this array * contains the encoded long integer I. * * Further Details * =============== * * K( 2 ) K( 1 ) * 0XXXXXXX XXXXXXXX K I( 1 ) = MOD( K( 1 ) + J( 1 ), 2**16 ) * + carry = ( K( 1 ) + J( 1 ) ) / 2**16 * J( 2 ) J( 1 ) * 0XXXXXXX XXXXXXXX J I( 2 ) = K( 2 ) + J( 2 ) + carry * ---------------------- I( 2 ) = MOD( I( 2 ), 2**15 ) * I( 2 ) I( 1 ) * 0XXXXXXX XXXXXXXX I * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16 PARAMETER ( IPOW15 = 2**15, IPOW16 = 2**16 ) * .. * .. Local Scalars .. INTEGER ITMP1, ITMP2 * .. * .. Executable Statements .. * * I( 1 ) = MOD( K( 1 ) + J( 1 ), IPOW16 ) * ITMP1 = K( 1 ) + J( 1 ) ITMP2 = ITMP1 / IPOW16 I( 1 ) = ITMP1 - ITMP2 * IPOW16 * * I( 2 ) = MOD( ( K( 1 ) + J( 1 ) ) / IPOW16 + K( 2 ) + J( 2 ), * IPOW15 ) * ITMP1 = ITMP2 + K( 2 ) + J( 2 ) ITMP2 = ITMP1 / IPOW15 I( 2 ) = ITMP1 - ITMP2 * IPOW15 * RETURN * * End of PB_LADD * END SUBROUTINE PB_LMUL( K, J, I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER I( 2 ), J( 2 ), K( 2 ) * .. * * Purpose * ======= * * PB_LMUL multiplies without carry two long positive integers K and J * and put the result into I. The long integers I, J, K are encoded on * 31 bits using an array of 2 integers. The 16-lower bits are stored in * the first entry of each array, the 15-higher bits in the second entry * of each array. For efficiency purposes, the intrisic modulo function * is inlined. * * Arguments * ========= * * K (local input) INTEGER array * On entry, K is an array of dimension 2 containing the encoded * long integer K. * * J (local input) INTEGER array * On entry, J is an array of dimension 2 containing the encoded * long integer J. * * I (local output) INTEGER array * On entry, I is an array of dimension 2. On exit, this array * contains the encoded long integer I. * * Further Details * =============== * * K( 2 ) K( 1 ) * 0XXXXXXX XXXXXXXX K I( 1 ) = MOD( K( 1 ) + J( 1 ), 2**16 ) * * carry = ( K( 1 ) + J( 1 ) ) / 2**16 * J( 2 ) J( 1 ) * 0XXXXXXX XXXXXXXX J I( 2 ) = K( 2 ) + J( 2 ) + carry * ---------------------- I( 2 ) = MOD( I( 2 ), 2**15 ) * I( 2 ) I( 1 ) * 0XXXXXXX XXXXXXXX I * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16, IPOW30 PARAMETER ( IPOW15 = 2**15, IPOW16 = 2**16, $ IPOW30 = 2**30 ) * .. * .. Local Scalars .. INTEGER ITMP1, ITMP2 * .. * .. Executable Statements .. * ITMP1 = K( 1 ) * J( 1 ) IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * * I( 1 ) = MOD( ITMP1, IPOW16 ) * ITMP2 = ITMP1 / IPOW16 I( 1 ) = ITMP1 - ITMP2 * IPOW16 * ITMP1 = K( 1 ) * J( 2 ) + K( 2 ) * J( 1 ) IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * ITMP1 = ITMP2 + ITMP1 IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * * I( 2 ) = MOD( ITMP1, IPOW15 ) * I( 2 ) = ITMP1 - ( ITMP1 / IPOW15 ) * IPOW15 * RETURN * * End of PB_LMUL * END SUBROUTINE PB_JUMP( K, MULADD, IRANN, IRANM, IMA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER K * .. * .. Array Arguments .. INTEGER IMA( 4 ), IRANM( 2 ), IRANN( 2 ), MULADD( 4 ) * .. * * Purpose * ======= * * PB_JUMP computes the constants A and C to jump K numbers in the ran- * dom sequence: * * X( n+K ) = A * X( n ) + C. * * The constants encoded in MULADD specify how to jump from entry in the * sequence to the next. * * Arguments * ========= * * K (local input) INTEGER * On entry, K specifies the number of entries of the sequence * to jump over. When K is less or equal than zero, A and C are * not computed, and IRANM is set to IRANN corresponding to a * jump of size zero. * * MULADD (local input) INTEGER array * On entry, MULADD is an array of dimension 4 containing the * encoded constants a and c to jump from X( n ) to X( n+1 ) * ( = a*X( n )+c) in the random sequence. MULADD(1:2) contains * respectively the 16-lower and 16-higher bits of the constant * a, and MULADD(3:4) contains the 16-lower and 16-higher bits * of the constant c. * * IRANN (local input) INTEGER array * On entry, IRANN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( n ). * * IRANM (local output) INTEGER array * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 16-higher bits * of the encoding of X( n+K ). * * IMA (local output) INTEGER array * On entry, IMA is an array of dimension 4. On exit, when K is * greater than zero, this array contains the encoded constants * A and C to jump from X( n ) to X( n+K ) in the random se- * quence. IMA(1:2) contains respectively the 16-lower and * 16-higher bits of the constant A, and IMA(3:4) contains the * 16-lower and 16-higher bits of the constant C. When K is * less or equal than zero, this array is not referenced. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Executable Statements .. * IF( K.GT.0 ) THEN * IMA( 1 ) = MULADD( 1 ) IMA( 2 ) = MULADD( 2 ) IMA( 3 ) = MULADD( 3 ) IMA( 4 ) = MULADD( 4 ) * DO 10 I = 1, K - 1 * CALL PB_LMUL( IMA, MULADD, J ) * IMA( 1 ) = J( 1 ) IMA( 2 ) = J( 2 ) * CALL PB_LMUL( IMA( 3 ), MULADD, J ) CALL PB_LADD( MULADD( 3 ), J, IMA( 3 ) ) * 10 CONTINUE * CALL PB_LMUL( IRANN, IMA, J ) CALL PB_LADD( J, IMA( 3 ), IRANM ) * ELSE * IRANM( 1 ) = IRANN( 1 ) IRANM( 2 ) = IRANN( 2 ) * END IF * RETURN * * End of PB_JUMP * END SUBROUTINE PB_SETRAN( IRAN, IAC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IAC( 4 ), IRAN( 2 ) * .. * * Purpose * ======= * * PB_SETRAN initializes the random generator with the encoding of the * first number X( 1 ) in the sequence, and the constants a and c used * to compute the next element in the sequence: * * X( n+1 ) = a * X( n ) + c. * * X( 1 ), a and c are stored in the common block RANCOM for later use * (see the routines PB_SRAN or PB_DRAN). * * Arguments * ========= * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( 1 ). * * IAC (local input) INTEGER array * On entry, IAC is an array of dimension 4. IAC(1:2) contain * respectively the 16-lower and 16-higher bits of the constant * a, and IAC(3:4) contain the 16-lower and 16-higher bits of * the constant c. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * IRAND( 1 ) = IRAN( 1 ) IRAND( 2 ) = IRAN( 2 ) IACS( 1 ) = IAC( 1 ) IACS( 2 ) = IAC( 2 ) IACS( 3 ) = IAC( 3 ) IACS( 4 ) = IAC( 4 ) * RETURN * * End of PB_SETRAN * END SUBROUTINE PB_JUMPIT( MULADD, IRANN, IRANM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IRANM( 2 ), IRANN( 2 ), MULADD( 4 ) * .. * * Purpose * ======= * * PB_JUMPIT jumps in the random sequence from the number X( n ) enco- * ded in IRANN to the number X( m ) encoded in IRANM using the cons- * tants A and C encoded in MULADD: * * X( m ) = A * X( n ) + C. * * The constants A and C obviously depend on m and n, see the subroutine * PB_JUMP in order to set them up. * * Arguments * ========= * * MULADD (local input) INTEGER array * On netry, MULADD is an array of dimension 4. MULADD(1:2) con- * tains respectively the 16-lower and 16-higher bits of the * constant A, and MULADD(3:4) contains the 16-lower and * 16-higher bits of the constant C. * * IRANN (local input) INTEGER array * On entry, IRANN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( n ). * * IRANM (local output) INTEGER array * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 16-higher bits * of the encoding of X( m ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * CALL PB_LMUL( IRANN, MULADD, J ) CALL PB_LADD( J, MULADD( 3 ), IRANM ) * IRAND( 1 ) = IRANM( 1 ) IRAND( 2 ) = IRANM( 2 ) * RETURN * * End of PB_JUMPIT * END scalapack-1.8.0/PBLAS/TESTING/0000750000175000017500000000000010605234726015300 5ustar muammarmuammarscalapack-1.8.0/PBLAS/TESTING/PDBLAS2TST.dat0000640000175000017500000000635410363532303017416 0ustar muammarmuammar'Level 2 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS2TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 3.0D0 value of BETA 4 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 values of M 8 10 12 14 16 18 20 22 24 9 11 13 15 17 19 21 values of N 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of M_A 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of N_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of M_X 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of N_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 30 1 30 1 30 1 30 1 30 1 30 1 30 1 30 1 values of INCX 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of M_Y 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of N_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 40 1 40 1 40 1 40 1 40 1 40 1 40 1 40 1 values of INCY PDGEMV T put F for no test in the same column PDSYMV T put F for no test in the same column PDTRMV T put F for no test in the same column PDTRSV T put F for no test in the same column PDGER T put F for no test in the same column PDSYR T put F for no test in the same column PDSYR2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/PDBLAS1TST.dat0000640000175000017500000000273710363532303017416 0ustar muammarmuammar'Level 1 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS1TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 4 number of tests problems 14 44 28 7 values of N 36 9 39 53 values of M_X 24 67 3 12 values of N_X 2 5 2 5 values of IMB_X 2 5 2 5 values of INB_X 2 5 2 5 values of MB_X 2 5 2 5 values of NB_X 0 0 0 0 values of RSRC_X 0 0 0 0 values of CSRC_X 5 3 1 1 values of IX 2 6 1 1 values of JX 1 9 1 53 values of INCX 2 6 35 14 values of M_Y 27 55 43 12 values of N_Y 2 5 2 5 values of IMB_Y 2 5 2 5 values of INB_Y 2 5 2 5 values of MB_Y 2 5 2 5 values of NB_Y 0 0 0 0 values of RSRC_Y 0 0 0 0 values of CSRC_Y 1 4 1 1 values of IY 7 6 1 1 values of JY 2 6 35 1 values of INCY PDSWAP T put F for no test in the same column PDSCAL T put F for no test in the same column PDCOPY T put F for no test in the same column PDAXPY T put F for no test in the same column PDDOT T put F for no test in the same column PDNRM2 T put F for no test in the same column PDASUM T put F for no test in the same column PDAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/Makefile0000640000175000017500000001306710363532303016741 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0) # # Module: Makefile # # Purpose: Testing Makefile # # Creation date: April 1, 1998 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc sPBLAS1exe = xspblas1tst dPBLAS1exe = xdpblas1tst cPBLAS1exe = xcpblas1tst zPBLAS1exe = xzpblas1tst sPBLAS2exe = xspblas2tst dPBLAS2exe = xdpblas2tst cPBLAS2exe = xcpblas2tst zPBLAS2exe = xzpblas2tst sPBLAS3exe = xspblas3tst dPBLAS3exe = xdpblas3tst cPBLAS3exe = xcpblas3tst zPBLAS3exe = xzpblas3tst spb1tst = $(PBLASTSTdir)/$(sPBLAS1exe) dpb1tst = $(PBLASTSTdir)/$(dPBLAS1exe) cpb1tst = $(PBLASTSTdir)/$(cPBLAS1exe) zpb1tst = $(PBLASTSTdir)/$(zPBLAS1exe) spb2tst = $(PBLASTSTdir)/$(sPBLAS2exe) dpb2tst = $(PBLASTSTdir)/$(dPBLAS2exe) cpb2tst = $(PBLASTSTdir)/$(cPBLAS2exe) zpb2tst = $(PBLASTSTdir)/$(zPBLAS2exe) spb3tst = $(PBLASTSTdir)/$(sPBLAS3exe) dpb3tst = $(PBLASTSTdir)/$(dPBLAS3exe) cpb3tst = $(PBLASTSTdir)/$(cPBLAS3exe) zpb3tst = $(PBLASTSTdir)/$(zPBLAS3exe) pbtcom = pblastst.o PB_Cwarn.o PB_Cabort.o spbtcom = psblastst.o slamch.o $(pbtcom) dpbtcom = pdblastst.o dlamch.o $(pbtcom) cpbtcom = pcblastst.o slamch.o $(pbtcom) zpbtcom = pzblastst.o dlamch.o $(pbtcom) spb1t = psblas1tst.o $(spbtcom) dpb1t = pdblas1tst.o $(dpbtcom) cpb1t = pcblas1tst.o $(cpbtcom) zpb1t = pzblas1tst.o $(zpbtcom) spb2t = psblas2tst.o $(spbtcom) dpb2t = pdblas2tst.o $(dpbtcom) cpb2t = pcblas2tst.o $(cpbtcom) zpb2t = pzblas2tst.o $(zpbtcom) spb3t = psblas3tst.o $(spbtcom) dpb3t = pdblas3tst.o $(dpbtcom) cpb3t = pcblas3tst.o $(cpbtcom) zpb3t = pzblas3tst.o $(zpbtcom) all : single double complex complex16 single: PblasErrorHandler $(spb1tst) $(spb2tst) $(spb3tst) double: PblasErrorHandler $(dpb1tst) $(dpb2tst) $(dpb3tst) complex: PblasErrorHandler $(cpb1tst) $(cpb2tst) $(cpb3tst) complex16: PblasErrorHandler $(zpb1tst) $(zpb2tst) $(zpb3tst) PblasErrorHandler: rm -f PB_Cwarn.c PB_Cwarn.o ln -s $(PBLASdir)/SRC/PTOOLS/PB_Cwarn.c PB_Cwarn.c $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas PB_Cwarn.c rm -f PB_Cabort.c PB_Cabort.o ln -s $(PBLASdir)/SRC/PTOOLS/PB_Cabort.c PB_Cabort.c $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas PB_Cabort.c $(PBLASTSTdir)/PSBLAS1TST.dat: PSBLAS1TST.dat cp PSBLAS1TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS1TST.dat: PDBLAS1TST.dat cp PDBLAS1TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS1TST.dat: PCBLAS1TST.dat cp PCBLAS1TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS1TST.dat: PZBLAS1TST.dat cp PZBLAS1TST.dat $(PBLASTSTdir) $(spb1tst) : $(SCALAPACKLIB) $(spb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb1tst) $(spb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS1TST.dat $(dpb1tst) : $(SCALAPACKLIB) $(dpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb1tst) $(dpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS1TST.dat $(cpb1tst) : $(SCALAPACKLIB) $(cpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb1tst) $(cpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS1TST.dat $(zpb1tst) : $(SCALAPACKLIB) $(zpb1t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb1tst) $(zpb1t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS1TST.dat $(PBLASTSTdir)/PSBLAS2TST.dat: PSBLAS2TST.dat cp PSBLAS2TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS2TST.dat: PDBLAS2TST.dat cp PDBLAS2TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS2TST.dat: PCBLAS2TST.dat cp PCBLAS2TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS2TST.dat: PZBLAS2TST.dat cp PZBLAS2TST.dat $(PBLASTSTdir) $(spb2tst) : $(SCALAPACKLIB) $(spb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb2tst) $(spb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS2TST.dat $(dpb2tst) : $(SCALAPACKLIB) $(dpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb2tst) $(dpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS2TST.dat $(cpb2tst) : $(SCALAPACKLIB) $(cpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb2tst) $(cpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS2TST.dat $(zpb2tst) : $(SCALAPACKLIB) $(zpb2t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb2tst) $(zpb2t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS2TST.dat $(PBLASTSTdir)/PSBLAS3TST.dat: PSBLAS3TST.dat cp PSBLAS3TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PDBLAS3TST.dat: PDBLAS3TST.dat cp PDBLAS3TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PCBLAS3TST.dat: PCBLAS3TST.dat cp PCBLAS3TST.dat $(PBLASTSTdir) $(PBLASTSTdir)/PZBLAS3TST.dat: PZBLAS3TST.dat cp PZBLAS3TST.dat $(PBLASTSTdir) $(spb3tst) : $(SCALAPACKLIB) $(spb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(spb3tst) $(spb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PSBLAS3TST.dat $(dpb3tst) : $(SCALAPACKLIB) $(dpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(dpb3tst) $(dpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PDBLAS3TST.dat $(cpb3tst) : $(SCALAPACKLIB) $(cpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(cpb3tst) $(cpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PCBLAS3TST.dat $(zpb3tst) : $(SCALAPACKLIB) $(zpb3t) $(F77LOADER) $(F77LOADFLAGS) -o $(zpb3tst) $(zpb3t) $(LIBS) $(MAKE) $(PBLASTSTdir)/PZBLAS3TST.dat $(spb1t): $(FRC) $(dpb1t): $(FRC) $(cpb1t): $(FRC) $(zpb1t): $(FRC) $(spb2t): $(FRC) $(dpb2t): $(FRC) $(cpb2t): $(FRC) $(zpb2t): $(FRC) $(spb3t): $(FRC) $(dpb3t): $(FRC) $(cpb3t): $(FRC) $(zpb3t): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o slamch.o: $(F77) -c $(NOOPT) slamch.f dlamch.o: $(F77) -c $(NOOPT) dlamch.f .f.o : ; $(F77) -c $(F77FLAGS) $*.f # .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) -DTestingPblas $*.c scalapack-1.8.0/PBLAS/TESTING/pblastst.f0000640000175000017500000053317110363532303017307 0ustar muammarmuammar SUBROUTINE PVDIMCHK( ICTXT, NOUT, N, MATRIX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER ICTXT, INCX, INFO, IX, JX, N, NOUT * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PVDIMCHK checks the validity of the input test dimensions. In case of * an invalid parameter or discrepancy between the parameters, this rou- * tine displays error messages and returns an non-zero error code in * INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.LT.0 ) THEN INFO = 1 ELSE IF( N.EQ.0 ) THEN IF( DESCX( M_ ).LT.0 ) $ INFO = 1 IF( DESCX( N_ ).LT.0 ) $ INFO = 1 ELSE IF( INCX.EQ.DESCX( M_ ) .AND. $ DESCX( N_ ).LT.( JX+N-1 ) ) THEN INFO = 1 ELSE IF( INCX.EQ.1 .AND. INCX.NE.DESCX( M_ ) .AND. $ DESCX( M_ ).LT.( IX+N-1 ) ) THEN INFO = 1 ELSE IF( IX.GT.DESCX( M_ ) ) THEN INFO = 1 ELSE IF( JX.GT.DESCX( N_ ) ) THEN INFO = 1 END IF END IF END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) MATRIX WRITE( NOUT, FMT = 9998 ) N, MATRIX, IX, MATRIX, JX, MATRIX, $ INCX WRITE( NOUT, FMT = 9997 ) MATRIX, DESCX( M_ ), MATRIX, $ DESCX( N_ ) WRITE( NOUT, FMT = * ) END IF END IF * 9999 FORMAT( 'Incompatible arguments for matrix ', A1, ':' ) 9998 FORMAT( 'N = ', I6, ', I', A1, ' = ', I6, ', J', A1, ' = ', $ I6, ',INC', A1, ' = ', I6 ) 9997 FORMAT( 'DESC', A1, '( M_ ) = ', I6, ', DESC', A1, '( N_ ) = ', $ I6, '.' ) * RETURN * * End of PVDIMCHK * END SUBROUTINE PMDIMCHK( ICTXT, NOUT, M, N, MATRIX, IA, JA, DESCA, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER ICTXT, INFO, IA, JA, M, N, NOUT * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PMDIMCHK checks the validity of the input test dimensions. In case of * an invalid parameter or discrepancy between the parameters, this rou- * tine displays error messages and returns an non-zero error code in * INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( ( M.LT.0 ).OR.( N.LT.0 ) ) THEN INFO = 1 ELSE IF( ( M.EQ.0 ).OR.( N.EQ.0 ) )THEN IF( DESCA( M_ ).LT.0 ) $ INFO = 1 IF( DESCA( N_ ).LT.0 ) $ INFO = 1 ELSE IF( DESCA( M_ ).LT.( IA+M-1 ) ) $ INFO = 1 IF( DESCA( N_ ).LT.( JA+N-1 ) ) $ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9999 ) MATRIX WRITE( NOUT, FMT = 9998 ) M, N, MATRIX, IA, MATRIX, JA WRITE( NOUT, FMT = 9997 ) MATRIX, DESCA( M_ ), MATRIX, $ DESCA( N_ ) WRITE( NOUT, FMT = * ) END IF END IF * 9999 FORMAT( 'Incompatible arguments for matrix ', A1, ':' ) 9998 FORMAT( 'M = ', I6, ', N = ', I6, ', I', A1, ' = ', I6, $ ', J', A1, ' = ', I6 ) 9997 FORMAT( 'DESC', A1, '( M_ ) = ', I6, ', DESC', A1, '( N_ ) = ', $ I6, '.' ) * RETURN * * End of PMDIMCHK * END SUBROUTINE PVDESCCHK( ICTXT, NOUT, MATRIX, DESCX, DTX, MX, NX, $ IMBX, INBX, MBX, NBX, RSRCX, CSRCX, INCX, $ MPX, NQX, IPREX, IMIDX, IPOSTX, IGAP, $ GAPMUL, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER CSRCX, DTX, GAPMUL, ICTXT, IGAP, IMBX, IMIDX, $ INBX, INCX, INFO, IPOSTX, IPREX, MBX, MPX, MX, $ NBX, NOUT, NQX, NX, RSRCX * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PVDESCCHK checks the validity of the input test parameters and ini- * tializes the descriptor DESCX and the scalar variables MPX, NQX. In * case of an invalid parameter, this routine displays error messages * and return an non-zero error code in INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * DESCX (global output) INTEGER array * On entry, DESCX is an array of dimension DLEN_. DESCX is the * array descriptor to be set. * * DTYPEX (global input) INTEGER * On entry, DTYPEX specifies the descriptor type. In this ver- * sion, DTYPEX must be BLOCK_CYCLIC_INB_2D. * * MX (global input) INTEGER * On entry, MX specifies the number of rows in the matrix. MX * must be at least zero. * * NX (global input) INTEGER * On entry, NX specifies the number of columns in the matrix. * NX must be at least zero. * * IMBX (global input) INTEGER * On entry, IMBX specifies the row blocking factor used to dis- * tribute the first IMBX rows of the matrix. IMBX must be at * least one. * * INBX (global input) INTEGER * On entry, INBX specifies the column blocking factor used to * distribute the first INBX columns of the matrix. INBX must * be at least one. * * MBX (global input) INTEGER * On entry, MBX specifies the row blocking factor used to dis- * tribute the rows of the matrix. MBX must be at least one. * * NBX (global input) INTEGER * On entry, NBX specifies the column blocking factor used to * distribute the columns of the matrix. NBX must be at least * one. * * RSRCX (global input) INTEGER * On entry, RSRCX specifies the process row in which the first * row of the matrix resides. When RSRCX is -1, the matrix is * row replicated, otherwise RSCRX must be at least zero and * strictly less than NPROW. * * CSRCX (global input) INTEGER * On entry, CSRCX specifies the process column in which the * first column of the matrix resides. When CSRCX is -1, the * matrix is column replicated, otherwise CSCRX must be at least * zero and strictly less than NPCOL. * * INCX (global input) INTEGER * On entry, INCX specifies the global vector increment. INCX * must be one or MX. * * MPX (local output) INTEGER * On exit, MPX is Lr( 1, MX ). * * NQX (local output) INTEGER * On exit, NQX is Lc( 1, NX ). * * IPREX (local output) INTEGER * On exit, IPREX specifies the size of the guard zone to put * before the start of the local padded array. * * IMIDX (local output) INTEGER * On exit, IMIDX specifies the ldx-gap of the guard zone to * put after each column of the local padded array. * * IPOSTX (local output) INTEGER * On exit, IPOSTX specifies the size of the guard zone to put * after the local padded array. * * IGAP (global input) INTEGER * On entry, IGAP specifies the size of the ldx-gap. * * GAPMUL (global input) INTEGER * On entry, GAPMUL is a constant factor controlling the size * of the pre- and post guardzone. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDX, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_DESCINIT2 * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Verify descriptor type DTYPE_ * IF( DTX.NE.BLOCK_CYCLIC_2D_INB ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) MATRIX, 'DTYPE', MATRIX, DTX, $ BLOCK_CYCLIC_2D_INB INFO = 1 END IF * * Verify global matrix dimensions (M_,N_) are correct * IF( MX.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) MATRIX, 'M', MATRIX, MX INFO = 1 ELSE IF( NX.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) MATRIX, 'N', MATRIX, NX INFO = 1 END IF * * Verify if blocking factors (IMB_, INB_) are correct * IF( IMBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) MATRIX, 'IMB', MATRIX, IMBX INFO = 1 ELSE IF( INBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) MATRIX, 'INB', MATRIX, INBX INFO = 1 END IF * * Verify if blocking factors (MB_, NB_) are correct * IF( MBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) MATRIX, 'MB', MATRIX, MBX INFO = 1 ELSE IF( NBX.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) MATRIX, 'NB', MATRIX, NBX INFO = 1 END IF * * Verify if origin process coordinates (RSRC_, CSRC_) are valid * IF( RSRCX.LT.-1 .OR. RSRCX.GE.NPROW ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'RSRC', MATRIX, RSRCX, NPROW END IF INFO = 1 ELSE IF( CSRCX.LT.-1 .OR. CSRCX.GE.NPCOL ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9991 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'CSRC', MATRIX, CSRCX, NPCOL END IF INFO = 1 END IF * * Check input increment value * IF( INCX.NE.1 .AND. INCX.NE.MX ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = 9988 ) 'INC', MATRIX, INCX, MATRIX, MX END IF INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9987 ) MATRIX WRITE( NOUT, FMT = * ) END IF * ELSE * * Compute local testing leading dimension * MPX = PB_NUMROC( MX, 1, IMBX, MBX, MYROW, RSRCX, NPROW ) NQX = PB_NUMROC( NX, 1, INBX, NBX, MYCOL, CSRCX, NPCOL ) IPREX = MAX( GAPMUL*NBX, MPX ) IMIDX = IGAP IPOSTX = MAX( GAPMUL*NBX, NQX ) LLDX = MAX( 1, MPX ) + IMIDX * CALL PB_DESCINIT2( DESCX, MX, NX, IMBX, INBX, MBX, NBX, RSRCX, $ CSRCX, ICTXT, LLDX, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9987 ) MATRIX WRITE( NOUT, FMT = * ) END IF END IF * END IF * 9999 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor type ', A5, A1, $ ': ', I6, ' should be ', I3, '.' ) 9998 FORMAT( 2X, '>> Invalid matrix ', A1, ' row dimension ', A1, A1, $ ': ', I6, ' should be at least 1.' ) 9997 FORMAT( 2X, '>> Invalid matrix ', A1, ' column dimension ', A1, $ A1, ': ', I6, ' should be at least 1.' ) 9996 FORMAT( 2X, '>> Invalid matrix ', A1, ' first row block size ', $ A3, A1, ': ', I6, ' should be at least 1.' ) 9995 FORMAT( 2X, '>> Invalid matrix ', A1, ' first column block size ', $ A3, A1,': ', I6, ' should be at least 1.' ) 9994 FORMAT( 2X, '>> Invalid matrix ', A1, ' row block size ', A2, A1, $ ': ', I6, ' should be at least 1.' ) 9993 FORMAT( 2X, '>> Invalid matrix ', A1, ' column block size ', A2, $ A1,': ', I6, ' should be at least 1.' ) 9992 FORMAT( 2X, '>> Invalid matrix ', A1, ' row process source:' ) 9991 FORMAT( 2X, '>> Invalid matrix ', A1, ' column process source:' ) 9990 FORMAT( 2X, '>> ', A4, A1, '= ', I6, ' should be >= -1 and < ', $ I6, '.' ) 9989 FORMAT( 2X, '>> Invalid vector ', A1, ' increment:' ) 9988 FORMAT( 2X, '>> ', A3, A1, '= ', I6, ' should be 1 or M', A1, $ ' = ', I6, '.' ) 9987 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor: going on to ', $ 'next test case.' ) * RETURN * * End of PVDESCCHK * END SUBROUTINE PMDESCCHK( ICTXT, NOUT, MATRIX, DESCA, DTA, MA, NA, $ IMBA, INBA, MBA, NBA, RSRCA, CSRCA, MPA, $ NQA, IPREA, IMIDA, IPOSTA, IGAP, GAPMUL, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 MATRIX INTEGER CSRCA, DTA, GAPMUL, ICTXT, IGAP, IMBA, IMIDA, $ INBA, INFO, IPOSTA, IPREA, MA, MBA, MPA, NA, $ NBA, NOUT, NQA, RSRCA * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PMDESCCHK checks the validity of the input test parameters and ini- * tializes the descriptor DESCA and the scalar variables MPA, NQA. In * case of an invalid parameter, this routine displays error messages * and return an non-zero error code in INFO. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * MATRIX (global input) CHARACTER*1 * On entry, MATRIX specifies the one character matrix identi- * fier. * * DESCA (global output) INTEGER array * On entry, DESCA is an array of dimension DLEN_. DESCA is the * array descriptor to be set. * * DTYPEA (global input) INTEGER * On entry, DTYPEA specifies the descriptor type. In this ver- * sion, DTYPEA must be BLOCK_CYCLIC_INB_2D. * * MA (global input) INTEGER * On entry, MA specifies the number of rows in the matrix. MA * must be at least zero. * * NA (global input) INTEGER * On entry, NA specifies the number of columns in the matrix. * NA must be at least zero. * * IMBA (global input) INTEGER * On entry, IMBA specifies the row blocking factor used to dis- * tribute the first IMBA rows of the matrix. IMBA must be at * least one. * * INBA (global input) INTEGER * On entry, INBA specifies the column blocking factor used to * distribute the first INBA columns of the matrix. INBA must * be at least one. * * MBA (global input) INTEGER * On entry, MBA specifies the row blocking factor used to dis- * tribute the rows of the matrix. MBA must be at least one. * * NBA (global input) INTEGER * On entry, NBA specifies the column blocking factor used to * distribute the columns of the matrix. NBA must be at least * one. * * RSRCA (global input) INTEGER * On entry, RSRCA specifies the process row in which the first * row of the matrix resides. When RSRCA is -1, the matrix is * row replicated, otherwise RSCRA must be at least zero and * strictly less than NPROW. * * CSRCA (global input) INTEGER * On entry, CSRCA specifies the process column in which the * first column of the matrix resides. When CSRCA is -1, the * matrix is column replicated, otherwise CSCRA must be at least * zero and strictly less than NPCOL. * * MPA (local output) INTEGER * On exit, MPA is Lr( 1, MA ). * * NQA (local output) INTEGER * On exit, NQA is Lc( 1, NA ). * * IPREA (local output) INTEGER * On exit, IPREA specifies the size of the guard zone to put * before the start of the local padded array. * * IMIDA (local output) INTEGER * On exit, IMIDA specifies the lda-gap of the guard zone to * put after each column of the local padded array. * * IPOSTA (local output) INTEGER * On exit, IPOSTA specifies the size of the guard zone to put * after the local padded array. * * IGAP (global input) INTEGER * On entry, IGAP specifies the size of the lda-gap. * * GAPMUL (global input) INTEGER * On entry, GAPMUL is a constant factor controlling the size * of the pre- and post guardzone. * * INFO (global output) INTEGER * On exit, when INFO is zero, no error has been detected, * otherwise an error has been detected. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_DESCINIT2 * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * INFO = 0 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Verify descriptor type DTYPE_ * IF( DTA.NE.BLOCK_CYCLIC_2D_INB ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) MATRIX, 'DTYPE', MATRIX, DTA, $ BLOCK_CYCLIC_2D_INB INFO = 1 END IF * * Verify global matrix dimensions (M_,N_) are correct * IF( MA.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) MATRIX, 'M', MATRIX, MA INFO = 1 ELSE IF( NA.LT.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) MATRIX, 'N', MATRIX, NA INFO = 1 END IF * * Verify if blocking factors (IMB_, INB_) are correct * IF( IMBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) MATRIX, 'IMB', MATRIX, IMBA INFO = 1 ELSE IF( INBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9995 ) MATRIX, 'INB', MATRIX, INBA INFO = 1 END IF * * Verify if blocking factors (MB_, NB_) are correct * IF( MBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9994 ) MATRIX, 'MB', MATRIX, MBA INFO = 1 ELSE IF( NBA.LT.1 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9993 ) MATRIX, 'NB', MATRIX, NBA INFO = 1 END IF * * Verify if origin process coordinates (RSRC_, CSRC_) are valid * IF( RSRCA.LT.-1 .OR. RSRCA.GE.NPROW ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9992 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'RSRC', MATRIX, RSRCA, NPROW END IF INFO = 1 ELSE IF( CSRCA.LT.-1 .OR. CSRCA.GE.NPCOL ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9991 ) MATRIX WRITE( NOUT, FMT = 9990 ) 'CSRC', MATRIX, CSRCA, NPCOL END IF INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = * ) END IF * ELSE * * Compute local testing leading dimension * MPA = PB_NUMROC( MA, 1, IMBA, MBA, MYROW, RSRCA, NPROW ) NQA = PB_NUMROC( NA, 1, INBA, NBA, MYCOL, CSRCA, NPCOL ) IPREA = MAX( GAPMUL*NBA, MPA ) IMIDA = IGAP IPOSTA = MAX( GAPMUL*NBA, NQA ) LLDA = MAX( 1, MPA ) + IMIDA * CALL PB_DESCINIT2( DESCA, MA, NA, IMBA, INBA, MBA, NBA, RSRCA, $ CSRCA, ICTXT, LLDA, INFO ) * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( INFO.NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9989 ) MATRIX WRITE( NOUT, FMT = * ) END IF END IF * END IF * 9999 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor type ', A5, A1, $ ': ', I6, ' should be ', I3, '.' ) 9998 FORMAT( 2X, '>> Invalid matrix ', A1, ' row dimension ', A1, A1, $ ': ', I6, ' should be at least 1.' ) 9997 FORMAT( 2X, '>> Invalid matrix ', A1, ' column dimension ', A1, $ A1, ': ', I6, ' should be at least 1.' ) 9996 FORMAT( 2X, '>> Invalid matrix ', A1, ' first row block size ', $ A3, A1, ': ', I6, ' should be at least 1.' ) 9995 FORMAT( 2X, '>> Invalid matrix ', A1, ' first column block size ', $ A3, A1,': ', I6, ' should be at least 1.' ) 9994 FORMAT( 2X, '>> Invalid matrix ', A1, ' row block size ', A2, A1, $ ': ', I6, ' should be at least 1.' ) 9993 FORMAT( 2X, '>> Invalid matrix ', A1, ' column block size ', A2, $ A1,': ', I6, ' should be at least 1.' ) 9992 FORMAT( 2X, '>> Invalid matrix ', A1, ' row process source:' ) 9991 FORMAT( 2X, '>> Invalid matrix ', A1, ' column process source:' ) 9990 FORMAT( 2X, '>> ', A4, A1, '= ', I6, ' should be >= -1 and < ', $ I6, '.' ) 9989 FORMAT( 2X, '>> Invalid matrix ', A1, ' descriptor: going on to ', $ 'next test case.' ) * RETURN * * End of PMDESCCHK * END SUBROUTINE PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INFOT, NOUT CHARACTER*(*) SNAME * .. * * Purpose * ======= * * PCHKPBE tests whether a PBLAS routine has detected an error when it * should. This routine does a global operation to ensure all processes * have detected this error. If an error has been detected an error * message is displayed. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * INFOT (global input) INTEGER * On entry, INFOT specifies the position of the wrong argument. * If the PBLAS error handler is called, INFO will be set to * -INFOT. This routine verifies if the error was reported by * all processes by doing a global sum, and assert the result to * be NPROW * NPCOL. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER GERR, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Common Blocks .. INTEGER INFO, NBLOG COMMON /INFOC/INFO, NBLOG * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * GERR = 0 IF( INFO.NE.-INFOT ) $ GERR = 1 * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, GERR, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN IF( GERR.EQ.( NPROW * NPCOL ) ) THEN WRITE( NOUT, FMT = 9999 ) SNAME, INFO, -INFOT END IF END IF * 9999 FORMAT( 1X, A7, ': *** ERROR *** ERROR CODE RETURNED = ', I6, $ ' SHOULD HAVE BEEN ', I6 ) * RETURN * * End of PCHKPBE * END REAL FUNCTION PSDIFF( X, Y ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL X, Y * .. * * Purpose * ======= * * PSDIFF returns the scalar difference X - Y. Similarly to the * BLAS tester, this routine allows for the possibility of computing a * more accurate difference if necessary. * * Arguments * ========= * * X (input) REAL * The real scalar X. * * Y (input) REAL * The real scalar Y. * * ===================================================================== * * .. Executable Statements .. * PSDIFF = X - Y * RETURN * * End of PSDIFF * END * DOUBLE PRECISION FUNCTION PDDIFF( X, Y ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. * * Purpose * ======= * * PDDIFF returns the scalar difference X - Y. Similarly to the * BLAS tester, this routine allows for the possibility of computing a * more accurate difference if necessary. * * Arguments * ========= * * X (input) DOUBLE PRECISION * The real scalar X. * * Y (input) DOUBLE PRECISION * The real scalar Y. * * ===================================================================== * * .. Executable Statements .. * PDDIFF = X - Y * RETURN * * End of PDDIFF * END SUBROUTINE PXERBLA( ICTXT, SRNAME, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INFO * .. * .. Array Arguments .. CHARACTER*(*) SRNAME * .. * * Purpose * ======= * * PXERBLA is an error handler for the ScaLAPACK routines. It is called * by a ScaLAPACK routine if an input parameter has an invalid value. A * message is printed. Installers may consider modifying this routine in * order to call system-specific exception-handling facilities. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * SRNAME (global input) CHARACTER*(*) * On entry, SRNAME specifies the name of the routine which cal- * ling PXERBLA. * * INFO (global input) INTEGER * On entry, INFO specifies the position of the invalid parame- * ter in the parameter list of the calling routine. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * WRITE( *, FMT = 9999 ) MYROW, MYCOL, SRNAME, INFO * 9999 FORMAT( '{', I5, ',', I5, '}: On entry to ', A, $ ' parameter number ', I4, ' had an illegal value' ) * RETURN * * End of PXERBLA * END LOGICAL FUNCTION LSAME( CA, CB ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER CA, CB * .. * * Purpose * ======= * * LSAME returns .TRUE. if CA is the same letter as CB regardless of * case. * * Arguments * ========= * * CA (input) CHARACTER*1 * CB (input) CHARACTER*1 * CA and CB specify the single characters to be compared. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ICHAR * .. * .. Local Scalars .. INTEGER INTA, INTB, ZCODE * .. * .. Executable Statements .. * * Test if the characters are equal * LSAME = CA.EQ.CB IF( LSAME ) $ RETURN * * Now test for equivalence if both characters are alphabetic. * ZCODE = ICHAR( 'Z' ) * * Use 'Z' rather than 'A' so that ASCII can be detected on Prime * machines, on which ICHAR returns a value with bit 8 set. * ICHAR('A') on Prime machines returns 193 which is the same as * ICHAR('A') on an EBCDIC machine. * INTA = ICHAR( CA ) INTB = ICHAR( CB ) * IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN * * ASCII is assumed - ZCODE is the ASCII code of either lower or * upper case 'Z'. * IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 * ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN * * EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or * upper case 'Z'. * IF( INTA.GE.129 .AND. INTA.LE.137 .OR. $ INTA.GE.145 .AND. INTA.LE.153 .OR. $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 IF( INTB.GE.129 .AND. INTB.LE.137 .OR. $ INTB.GE.145 .AND. INTB.LE.153 .OR. $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 * ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN * * ASCII is assumed, on Prime machines - ZCODE is the ASCII code * plus 128 of either lower or upper case 'Z'. * IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 END IF LSAME = INTA.EQ.INTB * * RETURN * * End of LSAME * END LOGICAL FUNCTION LSAMEN( N, CA, CB ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER*( * ) CA, CB INTEGER N * .. * * Purpose * ======= * * LSAMEN tests if the first N letters of CA are the same as the * first N letters of CB, regardless of case. * LSAMEN returns .TRUE. if CA and CB are equivalent except for case * and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) * or LEN( CB ) is less than N. * * Arguments * ========= * * N (input) INTEGER * The number of characters in CA and CB to be compared. * * CA (input) CHARACTER*(*) * CB (input) CHARACTER*(*) * CA and CB specify two character strings of length at least N. * Only the first N characters of each string will be accessed. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC LEN * .. * .. Executable Statements .. * LSAMEN = .FALSE. IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) $ GO TO 20 * * Do for each character in the two strings. * DO 10 I = 1, N * * Test if the characters are equal using LSAME. * IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) $ GO TO 20 * 10 CONTINUE LSAMEN = .TRUE. * 20 CONTINUE RETURN * * End of LSAMEN * END SUBROUTINE ICOPY( N, SX, INCX, SY, INCY ) * * -- LAPACK auxiliary test routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * February 29, 1992 * * .. Scalar Arguments .. INTEGER INCX, INCY, N * .. * .. Array Arguments .. INTEGER SX( * ), SY( * ) * .. * * Purpose * ======= * * ICOPY copies an integer vector x to an integer vector y. * Uses unrolled loops for increments equal to 1. * * Arguments * ========= * * N (input) INTEGER * The length of the vectors SX and SY. * * SX (input) INTEGER array, dimension (1+(N-1)*abs(INCX)) * The vector X. * * INCX (input) INTEGER * The spacing between consecutive elements of SX. * * SY (output) INTEGER array, dimension (1+(N-1)*abs(INCY)) * The vector Y. * * INCY (input) INTEGER * The spacing between consecutive elements of SY. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IX, IY, M, MP1 * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * IF( N.LE.0 ) $ RETURN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) $ GO TO 20 * * Code for unequal increments or equal increments not equal to 1 * IX = 1 IY = 1 IF( INCX.LT.0 ) $ IX = ( -N+1 )*INCX + 1 IF( INCY.LT.0 ) $ IY = ( -N+1 )*INCY + 1 DO 10 I = 1, N SY( IY ) = SX( IX ) IX = IX + INCX IY = IY + INCY 10 CONTINUE RETURN * * Code for both increments equal to 1 * * Clean-up loop * 20 CONTINUE M = MOD( N, 7 ) IF( M.EQ.0 ) $ GO TO 40 DO 30 I = 1, M SY( I ) = SX( I ) 30 CONTINUE IF( N.LT.7 ) $ RETURN 40 CONTINUE MP1 = M + 1 DO 50 I = MP1, N, 7 SY( I ) = SX( I ) SY( I+1 ) = SX( I+1 ) SY( I+2 ) = SX( I+2 ) SY( I+3 ) = SX( I+3 ) SY( I+4 ) = SX( I+4 ) SY( I+5 ) = SX( I+5 ) SY( I+6 ) = SX( I+6 ) 50 CONTINUE RETURN * * End of ICOPY * END INTEGER FUNCTION PB_NOABORT( CINFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CINFO * .. * * Purpose * ======= * * PB_NOABORT transmits the info parameter of a PBLAS routine to the * tester and tells the PBLAS error handler to avoid aborting on erro- * neous input arguments. * * Notes * ===== * * This routine is necessary because of the CRAY C fortran interface * and the fact that the usual PBLAS error handler routine has been * initially written in C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER INFO, NBLOG, NOUT LOGICAL ABRTFLG COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Executable Statements .. * INFO = CINFO IF( ABRTFLG ) THEN PB_NOABORT = 0 ELSE PB_NOABORT = 1 END IF * RETURN * * End of PB_NOABORT * END SUBROUTINE PB_INFOG2L( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, PROW, PCOL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, II, J, JJ, MYCOL, MYROW, NPCOL, NPROW, PCOL, $ PROW * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_INFOG2L computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by I, J. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least one. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least one. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least one. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least one. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero * and strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least * zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRC, I1, ILOCBLK, IMB, INB, J1, MB, MYDIST, $ NB, NBLOCKS, RSRC * .. * .. Local Arrays .. INTEGER DESC2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL PB_DESCTRANS * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESC, DESC2 ) * IMB = DESC2( IMB_ ) PROW = DESC2( RSRC_ ) * * Has every process row I ? * IF( ( PROW.EQ.-1 ).OR.( NPROW.EQ.1 ) ) THEN * II = I * ELSE IF( I.LE.IMB ) THEN * * I is in range of first block * IF( MYROW.EQ.PROW ) THEN II = I ELSE II = 1 END IF * ELSE * * I is not in first block of matrix, figure out who has it. * RSRC = PROW MB = DESC2( MB_ ) * IF( MYROW.EQ.RSRC ) THEN * NBLOCKS = ( I - IMB - 1 ) / MB + 1 PROW = PROW + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPROW ).GE.NBLOCKS ) THEN IF( MYROW.EQ.PROW ) THEN II = I + ( ILOCBLK - NBLOCKS ) * MB ELSE II = IMB + ( ILOCBLK - 1 ) * MB + 1 END IF ELSE II = IMB + ILOCBLK * MB + 1 END IF ELSE II = IMB + 1 END IF * ELSE * I1 = I - IMB NBLOCKS = ( I1 - 1 ) / MB + 1 PROW = PROW + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * MYDIST = MYROW - RSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN II = MB + ILOCBLK * MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( ILOCBLK - NBLOCKS + 1 ) * MB ELSE II = ILOCBLK * MB + 1 END IF END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN II = MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( 1 - NBLOCKS ) * MB ELSE II = 1 END IF END IF END IF * END IF * INB = DESC2( INB_ ) PCOL = DESC2( CSRC_ ) * * Has every process column J ? * IF( ( PCOL.EQ.-1 ).OR.( NPCOL.EQ.1 ) ) THEN * JJ = J * ELSE IF( J.LE.INB ) THEN * * J is in range of first block * IF( MYCOL.EQ.PCOL ) THEN JJ = J ELSE JJ = 1 END IF * ELSE * * J is not in first block of matrix, figure out who has it. * CSRC = PCOL NB = DESC2( NB_ ) * IF( MYCOL.EQ.CSRC ) THEN * NBLOCKS = ( J - INB - 1 ) / NB + 1 PCOL = PCOL + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPCOL ).GE.NBLOCKS ) THEN IF( MYCOL.EQ.PCOL ) THEN JJ = J + ( ILOCBLK - NBLOCKS ) * NB ELSE JJ = INB + ( ILOCBLK - 1 ) * NB + 1 END IF ELSE JJ = INB + ILOCBLK * NB + 1 END IF ELSE JJ = INB + 1 END IF * ELSE * J1 = J - INB NBLOCKS = ( J1 - 1 ) / NB + 1 PCOL = PCOL + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * MYDIST = MYCOL - CSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN JJ = NB + ILOCBLK * NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( ILOCBLK - NBLOCKS + 1 ) * NB ELSE JJ = ILOCBLK * NB + 1 END IF END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN JJ = NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( 1 - NBLOCKS ) * NB ELSE JJ = 1 END IF END IF END IF * END IF * RETURN * * End of PB_INFOG2L * END SUBROUTINE PB_AINFOG2L( M, N, I, J, DESC, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, $ PCOL, RPROW, RPCOL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, II, IMB1, INB1, J, JJ, M, MP, MYCOL, MYROW, $ N, NPCOL, NPROW, NQ, PCOL, PROW, RPCOL, RPROW * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_AINFOG2L computes the starting local row and column indexes II, * JJ corresponding to the submatrix starting globally at the entry * pointed by I, J. This routine returns the coordinates in the grid of * the process owning the matrix entry of global indexes I, J, namely * PROW and PCOL. In addition, this routine computes the quantities MP * and NQ, which are respectively the local number of rows and columns * owned by the process of coordinate MYROW, MYCOL corresponding to the * global submatrix A(I:I+M-1,J:J+N-1). Finally, the size of the first * partial block and the relative process coordinates are also returned * respectively in IMB, INB and RPROW, RPCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the global number of rows of the subma- * trix. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of columns of the * submatrix. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least one. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least one. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * IMB1 (global output) INTEGER * On exit, IMB1 specifies the number of rows of the upper left * block of the submatrix. On exit, IMB1 is less or equal than * M and greater or equal than MIN( 1, M ). * * INB1 (global output) INTEGER * On exit, INB1 specifies the number of columns of the upper * left block of the submatrix. On exit, INB1 is less or equal * than N and greater or equal than MIN( 1, N ). * * MP (local output) INTEGER * On exit, MP specifies the local number of rows of the subma- * trix, that the processes of row coordinate MYROW own. MP is * at least zero. * * NQ (local output) INTEGER * On exit, NQ specifies the local number of columns of the * submatrix, that the processes of column coordinate MYCOL * own. NQ is at least zero. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least one. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of * the submatrix. On exit, II is at least one. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC(RSRC_) is -1 on input, and, at least zero and * strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC(CSRC_) is -1 on input, and, at least zero * and strictly less than NPCOL otherwise. * * RPROW (global output) INTEGER * On exit, RPROW specifies the relative row coordinate of the * process that possesses the first row I of the submatrix. On * exit, RPROW is -1 if DESC(RSRC_) is -1 on input, and, at * least zero and strictly less than NPROW otherwise. * * RPCOL (global output) INTEGER * On exit, RPCOL specifies the relative column coordinate of * the process that possesses the first column J of the subma- * trix. On exit, RPCOL is -1 if DESC(CSRC_) is -1 on input, * and, at least zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER CSRC, I1, ILOCBLK, J1, M1, MB, MYDIST, N1, NB, $ NBLOCKS, RSRC * .. * .. Local Arrays .. INTEGER DESC2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESC, DESC2 ) * MB = DESC2( MB_ ) IMB1 = DESC2( IMB_ ) RSRC = DESC2( RSRC_ ) * IF( ( RSRC.EQ.-1 ).OR.( NPROW.EQ.1 ) ) THEN * II = I IMB1 = IMB1 - I + 1 IF( IMB1.LE.0 ) $ IMB1 = ( ( -IMB1 ) / MB + 1 ) * MB + IMB1 IMB1 = MIN( IMB1, M ) MP = M PROW = RSRC RPROW = 0 * ELSE * * Figure out PROW, II and IMB1 first * IF( I.LE.IMB1 ) THEN * PROW = RSRC * IF( MYROW.EQ.PROW ) THEN II = I ELSE II = 1 END IF * IMB1 = IMB1 - I + 1 * ELSE * I1 = I - IMB1 - 1 NBLOCKS = I1 / MB + 1 PROW = RSRC + NBLOCKS PROW = PROW - ( PROW / NPROW ) * NPROW * IF( MYROW.EQ.RSRC ) THEN * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPROW ).GE.NBLOCKS ) THEN IF( MYROW.EQ.PROW ) THEN II = I + ( ILOCBLK - NBLOCKS ) * MB ELSE II = IMB1 + ( ILOCBLK - 1 ) * MB + 1 END IF ELSE II = IMB1 + ILOCBLK * MB + 1 END IF ELSE II = IMB1 + 1 END IF * ELSE * MYDIST = MYROW - RSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW * ILOCBLK = NBLOCKS / NPROW * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN II = ( ILOCBLK + 1 ) * MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( ILOCBLK - NBLOCKS + 1 ) * MB + 1 ELSE II = ILOCBLK * MB + 1 END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN II = MB + 1 ELSE IF( MYROW.EQ.PROW ) THEN II = I1 + ( 1 - NBLOCKS ) * MB + 1 ELSE II = 1 END IF END IF END IF * IMB1 = NBLOCKS * MB - I1 * END IF * * Figure out MP * IF( M.LE.IMB1 ) THEN * IF( MYROW.EQ.PROW ) THEN MP = M ELSE MP = 0 END IF * ELSE * M1 = M - IMB1 NBLOCKS = M1 / MB + 1 * IF( MYROW.EQ.PROW ) THEN ILOCBLK = NBLOCKS / NPROW IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPROW ).GT.0 ) THEN MP = IMB1 + ILOCBLK * MB ELSE MP = M + MB * ( ILOCBLK - NBLOCKS ) END IF ELSE MP = IMB1 END IF ELSE MYDIST = MYROW - PROW IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROW ILOCBLK = NBLOCKS / NPROW IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROW IF( MYDIST.LT.0 ) THEN MP = ( ILOCBLK + 1 ) * MB ELSE IF( MYDIST.GT.0 ) THEN MP = ILOCBLK * MB ELSE MP = M1 + MB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN MP = MB ELSE IF( MYDIST.GT.0 ) THEN MP = 0 ELSE MP = M1 + MB * ( 1 - NBLOCKS ) END IF END IF END IF * END IF * IMB1 = MIN( IMB1, M ) RPROW = MYROW - PROW IF( RPROW.LT.0 ) $ RPROW = RPROW + NPROW * END IF * NB = DESC2( NB_ ) INB1 = DESC2( INB_ ) CSRC = DESC2( CSRC_ ) * IF( ( CSRC.EQ.-1 ).OR.( NPCOL.EQ.1 ) ) THEN * JJ = J INB1 = INB1 - I + 1 IF( INB1.LE.0 ) $ INB1 = ( ( -INB1 ) / NB + 1 ) * NB + INB1 INB1 = MIN( INB1, N ) NQ = N PCOL = CSRC RPCOL = 0 * ELSE * * Figure out PCOL, JJ and INB1 first * IF( J.LE.INB1 ) THEN * PCOL = CSRC * IF( MYCOL.EQ.PCOL ) THEN JJ = J ELSE JJ = 1 END IF * INB1 = INB1 - J + 1 * ELSE * J1 = J - INB1 - 1 NBLOCKS = J1 / NB + 1 PCOL = CSRC + NBLOCKS PCOL = PCOL - ( PCOL / NPCOL ) * NPCOL * IF( MYCOL.EQ.CSRC ) THEN * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN IF( ( ILOCBLK*NPCOL ).GE.NBLOCKS ) THEN IF( MYCOL.EQ.PCOL ) THEN JJ = J + ( ILOCBLK - NBLOCKS ) * NB ELSE JJ = INB1 + ( ILOCBLK - 1 ) * NB + 1 END IF ELSE JJ = INB1 + ILOCBLK * NB + 1 END IF ELSE JJ = INB1 + 1 END IF * ELSE * MYDIST = MYCOL - CSRC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL * ILOCBLK = NBLOCKS / NPCOL * IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN JJ = ( ILOCBLK + 1 ) * NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( ILOCBLK - NBLOCKS + 1 ) * NB + 1 ELSE JJ = ILOCBLK * NB + 1 END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN JJ = NB + 1 ELSE IF( MYCOL.EQ.PCOL ) THEN JJ = J1 + ( 1 - NBLOCKS ) * NB + 1 ELSE JJ = 1 END IF END IF END IF * INB1 = NBLOCKS * NB - J1 * END IF * * Figure out NQ * IF( N.LE.INB1 ) THEN * IF( MYCOL.EQ.PCOL ) THEN NQ = N ELSE NQ = 0 END IF * ELSE * N1 = N - INB1 NBLOCKS = N1 / NB + 1 * IF( MYCOL.EQ.PCOL ) THEN ILOCBLK = NBLOCKS / NPCOL IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPCOL ).GT.0 ) THEN NQ = INB1 + ILOCBLK * NB ELSE NQ = N + NB * ( ILOCBLK - NBLOCKS ) END IF ELSE NQ = INB1 END IF ELSE MYDIST = MYCOL - PCOL IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPCOL ILOCBLK = NBLOCKS / NPCOL IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPCOL IF( MYDIST.LT.0 ) THEN NQ = ( ILOCBLK + 1 ) * NB ELSE IF( MYDIST.GT.0 ) THEN NQ = ILOCBLK * NB ELSE NQ = N1 + NB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN NQ = NB ELSE IF( MYDIST.GT.0 ) THEN NQ = 0 ELSE NQ = N1 + NB * ( 1 - NBLOCKS ) END IF END IF END IF * END IF * INB1 = MIN( INB1, N ) RPCOL = MYCOL - PCOL IF( RPCOL.LT.0 ) $ RPCOL = RPCOL + NPCOL * END IF * RETURN * * End of PB_AINFOG2L * END INTEGER FUNCTION PB_NUMROC( N, I, INB, NB, PROC, SRCPROC, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, INB, N, NB, NPROCS, PROC, SRCPROC * .. * * Purpose * ======= * * PB_NUMROC returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global in- * dex I. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I1, ILOCBLK, INB1, MYDIST, N1, NBLOCKS, $ SRCPROC1 * .. * .. Executable Statements .. * IF( ( SRCPROC.EQ.-1 ).OR.( NPROCS.EQ.1 ) ) THEN PB_NUMROC = N RETURN END IF * * Compute coordinate of process owning I and corresponding INB * IF( I.LE.INB ) THEN * * I is in range of first block, i.e SRCPROC owns I. * SRCPROC1 = SRCPROC INB1 = INB - I + 1 * ELSE * * I is not in first block of matrix, figure out who has it * I1 = I - 1 - INB NBLOCKS = I1 / NB + 1 SRCPROC1 = SRCPROC + NBLOCKS SRCPROC1 = SRCPROC1 - ( SRCPROC1 / NPROCS ) * NPROCS INB1 = NBLOCKS*NB - I1 * END IF * * Now everything is just like I=1. Search now who has N-1, Is N-1 * in the first block ? * IF( N.LE.INB1 ) THEN IF( PROC.EQ.SRCPROC1 ) THEN PB_NUMROC = N ELSE PB_NUMROC = 0 END IF RETURN END IF * N1 = N - INB1 NBLOCKS = N1 / NB + 1 * IF( PROC.EQ.SRCPROC1 ) THEN ILOCBLK = NBLOCKS / NPROCS IF( ILOCBLK.GT.0 ) THEN IF( ( NBLOCKS - ILOCBLK * NPROCS ).GT.0 ) THEN PB_NUMROC = INB1 + ILOCBLK * NB ELSE PB_NUMROC = N + NB * ( ILOCBLK - NBLOCKS ) END IF ELSE PB_NUMROC = INB1 END IF ELSE MYDIST = PROC - SRCPROC1 IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROCS ILOCBLK = NBLOCKS / NPROCS IF( ILOCBLK.GT.0 ) THEN MYDIST = MYDIST - NBLOCKS + ILOCBLK * NPROCS IF( MYDIST.LT.0 ) THEN PB_NUMROC = ( ILOCBLK + 1 ) * NB ELSE IF( MYDIST.GT.0 ) THEN PB_NUMROC = ILOCBLK * NB ELSE PB_NUMROC = N1 + NB * ( ILOCBLK - NBLOCKS + 1 ) END IF ELSE MYDIST = MYDIST - NBLOCKS IF( MYDIST.LT.0 ) THEN PB_NUMROC = NB ELSE IF( MYDIST.GT.0 ) THEN PB_NUMROC = 0 ELSE PB_NUMROC = N1 + NB * ( 1 - NBLOCKS ) END IF END IF END IF * RETURN * * End of PB_NUMROC * END INTEGER FUNCTION PB_FCEIL( NUM, DENOM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL DENOM, NUM * .. * * Purpose * ======= * * PB_FCEIL returns the ceiling of the division of two integers. The * integer operands are passed as real to avoid integer overflow. * * Arguments * ========= * * NUM (local input) REAL * On entry, NUM specifies the numerator of the fraction to be * evaluated. * * DENOM (local input) REAL * On entry, DENOM specifies the denominator of the fraction to * be evaluated. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC NINT * .. * .. Executable Statements .. * PB_FCEIL = NINT( ( ( NUM + DENOM - 1.0E+0 ) / DENOM ) - 0.5E+0 ) * RETURN * * End of PB_FCEIL * END SUBROUTINE PB_CHKMAT( ICTXT, M, MPOS0, N, NPOS0, IA, JA, DESCA, $ DPOS0, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER DPOS0, IA, ICTXT, INFO, JA, M, MPOS0, N, NPOS0 * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * PB_CHKMAT checks the validity of a descriptor vector DESCA, the re- * lated global indexes IA, JA from a local view point. If an inconsis- * tency is found among its parameters IA, JA and DESCA, the routine re- * turns an error code in INFO. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (global input) INTEGER * On entry, M specifies the number of rows the submatrix * sub( A ). * * MPOS0 (global input) INTEGER * On entry, MPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter M appears. * * N (global input) INTEGER * On entry, N specifies the number of columns the submatrix * sub( A ). * * NPOS0 (global input) INTEGER * On entry, NPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter N appears. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * DPOS0 (global input) INTEGER * On entry, DPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter DESCA ap- * pears. Note that it is assumed that IA and JA are respecti- * vely 2 and 1 entries behind DESCA. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had an * illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) * .. * .. Local Scalars .. INTEGER DPOS, IAPOS, JAPOS, MP, MPOS, MYCOL, MYROW, $ NPCOL, NPOS, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the des- * criptor multiplier * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Figure where in parameter list each parameter was, factoring in * descriptor multiplier * MPOS = MPOS0 * DESCMULT NPOS = NPOS0 * DESCMULT IAPOS = ( DPOS0 - 2 ) * DESCMULT JAPOS = ( DPOS0 - 1 ) * DESCMULT DPOS = DPOS0 * DESCMULT * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check that matrix values make sense from local viewpoint * IF( M.LT.0 ) $ INFO = MIN( INFO, MPOS ) IF( N.LT.0 ) $ INFO = MIN( INFO, NPOS ) IF( IA.LT.1 ) $ INFO = MIN( INFO, IAPOS ) IF( JA.LT.1 ) $ INFO = MIN( INFO, JAPOS ) IF( DESCA2( DTYPE_ ).NE.BLOCK_CYCLIC_2D_INB ) $ INFO = MIN( INFO, DPOS + DTYPE_ ) IF( DESCA2( IMB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + IMB_ ) IF( DESCA2( INB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + INB_ ) IF( DESCA2( MB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + MB_ ) IF( DESCA2( NB_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + NB_ ) IF( DESCA2( RSRC_ ).LT.-1 .OR. DESCA2( RSRC_ ).GE.NPROW ) $ INFO = MIN( INFO, DPOS + RSRC_ ) IF( DESCA2( CSRC_ ).LT.-1 .OR. DESCA2( CSRC_ ).GE.NPCOL ) $ INFO = MIN( INFO, DPOS + CSRC_ ) IF( DESCA2( CTXT_ ).NE.ICTXT ) $ INFO = MIN( INFO, DPOS + CTXT_ ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN * * NULL matrix, relax some checks * IF( DESCA2( M_ ).LT.0 ) $ INFO = MIN( INFO, DPOS + M_ ) IF( DESCA2( N_ ).LT.0 ) $ INFO = MIN( INFO, DPOS + N_ ) IF( DESCA2( LLD_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + LLD_ ) * ELSE * * more rigorous checks for non-degenerate matrices * MP = PB_NUMROC( DESCA2( M_ ), 1, DESCA2( IMB_ ), DESCA2( MB_ ), $ MYROW, DESCA2( RSRC_ ), NPROW ) * IF( DESCA2( M_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + M_ ) IF( DESCA2( N_ ).LT.1 ) $ INFO = MIN( INFO, DPOS + N_ ) IF( IA.GT.DESCA2( M_ ) ) $ INFO = MIN( INFO, IAPOS ) IF( JA.GT.DESCA2( N_ ) ) $ INFO = MIN( INFO, JAPOS ) IF( IA+M-1.GT.DESCA2( M_ ) ) $ INFO = MIN( INFO, MPOS ) IF( JA+N-1.GT.DESCA2( N_ ) ) $ INFO = MIN( INFO, NPOS ) * IF( DESCA2( LLD_ ).LT.MAX( 1, MP ) ) THEN NQ = PB_NUMROC( DESCA2( N_ ), 1, DESCA2( INB_ ), $ DESCA2( NB_ ), MYCOL, DESCA2( CSRC_ ), $ NPCOL ) IF( DESCA2( LLD_ ).LT.1 ) THEN INFO = MIN( INFO, DPOS + LLD_ ) ELSE IF( NQ.GT.0 ) THEN INFO = MIN( INFO, DPOS + LLD_ ) END IF END IF * END IF * * Prepare output: set info = 0 if no error, and divide by * DESCMULT if error is not in a descriptor entry * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -( INFO / DESCMULT ) ELSE INFO = -INFO END IF * RETURN * * End of PB_CHKMAT * END SUBROUTINE PB_DESCTRANS( DESCIN, DESCOUT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER DESCIN( * ), DESCOUT( * ) * .. * * Purpose * ======= * * PB_DESCTRANS converts a descriptor DESCIN of type BLOCK_CYCLIC_2D * or BLOCK_CYCLIC_INB_2D into a descriptor DESCOUT of type * BLOCK_CYCLIC_INB_2D. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ----------------------------------- * DTYPE_A (global) DESCA( DTYPE1_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT1_ ) The BLACS context handle indicating * the NPROW x NPCOL BLACS process * grid A is distributed over. The * context itself is global, but the * handle (the integer value) may * vary. * M_A (global) DESCA( M1_ ) The number of rows in the distri- * buted matrix A, M_A >= 0. * N_A (global) DESCA( N1_ ) The number of columns in the dis- * tributed matrix A, N_A >= 0. * MB_A (global) DESCA( MB1_ ) The blocking factor used to distri- * bute the rows of A, MB_A > 0. * NB_A (global) DESCA( NB1_ ) The blocking factor used to distri- * bute the columns of A, NB_A > 0. * RSRC_A (global) DESCA( RSRC1_ ) The process row over which the * first row of the matrix A is dis- * tributed, NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC1_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD1_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, MB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, NB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESCIN (global and local input) INTEGER array * On entry, DESCIN is an array of dimension DLEN1_ or DLEN_ as * specified by its first entry DESCIN( DTYPE_ ). DESCIN is the * source array descriptor of type BLOCK_CYCLIC_2D or of type * BLOCK_CYCLIC_2D_INB. * * DESCOUT (global and local output) INTEGER array * On entry, DESCOUT is an array of dimension DLEN_. DESCOUT is * the target array descriptor of type BLOCK_CYCLIC_2D_INB. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC1_, CTXT1_, DLEN1_, $ DTYPE1_, LLD1_, M1_, MB1_, N1_, NB1_, RSRC1_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN1_ = 9, DTYPE1_ = 1, $ CTXT1_ = 2, M1_ = 3, N1_ = 4, MB1_ = 5, $ NB1_ = 6, RSRC1_ = 7, CSRC1_ = 8, LLD1_ = 9 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I * .. * .. Executable Statements .. * IF( DESCIN( DTYPE_ ).EQ.BLOCK_CYCLIC_2D ) THEN DESCOUT( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESCOUT( CTXT_ ) = DESCIN( CTXT1_ ) DESCOUT( M_ ) = DESCIN( M1_ ) DESCOUT( N_ ) = DESCIN( N1_ ) DESCOUT( IMB_ ) = DESCIN( MB1_ ) DESCOUT( INB_ ) = DESCIN( NB1_ ) DESCOUT( MB_ ) = DESCIN( MB1_ ) DESCOUT( NB_ ) = DESCIN( NB1_ ) DESCOUT( RSRC_ ) = DESCIN( RSRC1_ ) DESCOUT( CSRC_ ) = DESCIN( CSRC1_ ) DESCOUT( LLD_ ) = DESCIN( LLD1_ ) ELSE IF( DESCIN( DTYPE_ ).EQ.BLOCK_CYCLIC_2D_INB ) THEN DO 10 I = 1, DLEN_ DESCOUT( I ) = DESCIN( I ) 10 CONTINUE ELSE DESCOUT( DTYPE_ ) = DESCIN( 1 ) DESCOUT( CTXT_ ) = DESCIN( 2 ) DESCOUT( M_ ) = 0 DESCOUT( N_ ) = 0 DESCOUT( IMB_ ) = 1 DESCOUT( INB_ ) = 1 DESCOUT( MB_ ) = 1 DESCOUT( NB_ ) = 1 DESCOUT( RSRC_ ) = 0 DESCOUT( CSRC_ ) = 0 DESCOUT( LLD_ ) = 1 END IF * RETURN * * End of PB_DESCTRANS * END SUBROUTINE PB_DESCSET2( DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, $ CTXT, LLD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CSRC, CTXT, IMB, INB, LLD, M, MB, N, NB, RSRC * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_DESCSET2 uses its 10 input arguments M, N, IMB, INB, MB, NB, * RSRC, CSRC, CTXT and LLD to initialize a descriptor vector of type * BLOCK_CYCLIC_2D_INB. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ----------------------------------- * DTYPE_A (global) DESCA( DTYPE1_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT1_ ) The BLACS context handle indicating * the NPROW x NPCOL BLACS process * grid A is distributed over. The * context itself is global, but the * handle (the integer value) may * vary. * M_A (global) DESCA( M1_ ) The number of rows in the distri- * buted matrix A, M_A >= 0. * N_A (global) DESCA( N1_ ) The number of columns in the dis- * tributed matrix A, N_A >= 0. * MB_A (global) DESCA( MB1_ ) The blocking factor used to distri- * bute the rows of A, MB_A > 0. * NB_A (global) DESCA( NB1_ ) The blocking factor used to distri- * bute the columns of A, NB_A > 0. * RSRC_A (global) DESCA( RSRC1_ ) The process row over which the * first row of the matrix A is dis- * tributed, NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC1_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD1_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, MB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, NB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESC (global and local output) INTEGER array * On entry, DESC is an array of dimension DLEN_. DESC is the * array descriptor to be set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix. * N must be at least zero. * * IMB (global input) INTEGER * On entry, IMB specifies the row size of the first block of * the global matrix distribution. IMB must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the column size of the first block * of the global matrix distribution. INB must be at least one. * * MB (global input) INTEGER * On entry, MB specifies the row size of the blocks used to * partition the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the column size of the blocks used to * partition the matrix. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the data is not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When * CSRC = -1, the data is not distributed but replicated, other- * wise CSRC must be at least zero and strictly less than NPCOL. * * CTXT (local input) INTEGER * On entry, CTXT specifies the BLACS context handle, indicating * the global communication context. The value of the context * itself is local. * * LLD (local input) INTEGER * On entry, LLD specifies the leading dimension of the local * array storing the local entries of the matrix. LLD must be at * least MAX( 1, Lr(1,M) ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Executable Statements .. * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESC( CTXT_ ) = CTXT DESC( M_ ) = M DESC( N_ ) = N DESC( IMB_ ) = IMB DESC( INB_ ) = INB DESC( MB_ ) = MB DESC( NB_ ) = NB DESC( RSRC_ ) = RSRC DESC( CSRC_ ) = CSRC DESC( LLD_ ) = LLD * RETURN * * End of PB_DESCSET2 * END SUBROUTINE PB_DESCINIT2( DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, $ CTXT, LLD, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER CSRC, CTXT, IMB, INB, INFO, LLD, M, MB, N, NB, $ RSRC * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * PB_DESCINIT2 uses its 10 input arguments M, N, IMB, INB, MB, NB, * RSRC, CSRC, CTXT and LLD to initialize a descriptor vector of type * BLOCK_CYCLIC_2D_INB. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESC (global and local output) INTEGER array * On entry, DESC is an array of dimension DLEN_. DESC is the * array descriptor to be set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix. * N must be at least zero. * * IMB (global input) INTEGER * On entry, IMB specifies the row size of the first block of * the global matrix distribution. IMB must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the column size of the first block * of the global matrix distribution. INB must be at least one. * * MB (global input) INTEGER * On entry, MB specifies the row size of the blocks used to * partition the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the column size of the blocks used to * partition the matrix. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the data is not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When * CSRC = -1, the data is not distributed but replicated, other- * wise CSRC must be at least zero and strictly less than NPCOL. * * CTXT (local input) INTEGER * On entry, CTXT specifies the BLACS context handle, indicating * the global communication context. The value of the context * itself is local. * * LLD (local input) INTEGER * On entry, LLD specifies the leading dimension of the local * array storing the local entries of the matrix. LLD must be at * least MAX( 1, Lr(1,M) ). * * INFO (local output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value. * * Notes * ===== * * If the routine can recover from an erroneous input argument, it will * return an acceptable descriptor vector. For example, if LLD = 0 on * input, DESC( LLD_ ) will contain the smallest leading dimension re- * quired to store the specified m by n matrix, INFO will however be set * to -11 on exit in that case. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER LLDMIN, MP, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. INTEGER PB_NUMROC EXTERNAL PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( CTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IMB.LT.1 ) THEN INFO = -4 ELSE IF( INB.LT.1 ) THEN INFO = -5 ELSE IF( MB.LT.1 ) THEN INFO = -6 ELSE IF( NB.LT.1 ) THEN INFO = -7 ELSE IF( RSRC.LT.-1 .OR. RSRC.GE.NPROW ) THEN INFO = -8 ELSE IF( CSRC.LT.-1 .OR. CSRC.GE.NPCOL ) THEN INFO = -9 ELSE IF( NPROW.EQ.-1 ) THEN INFO = -10 END IF * * Compute minimum LLD if safe (to avoid division by 0) * IF( INFO.EQ.0 ) THEN MP = PB_NUMROC( M, 1, IMB, MB, MYROW, RSRC, NPROW ) IF( PB_NUMROC( N, 1, INB, NB, MYCOL, CSRC, NPCOL ).GT.0 ) THEN LLDMIN = MAX( 1, MP ) ELSE LLDMIN = 1 END IF IF( LLD.LT.LLDMIN ) $ INFO = -11 END IF * IF( INFO.NE.0 ) $ CALL PXERBLA( CTXT, 'PB_DESCINIT2', -INFO ) * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D_INB DESC( CTXT_ ) = CTXT DESC( M_ ) = MAX( 0, M ) DESC( N_ ) = MAX( 0, N ) DESC( IMB_ ) = MAX( 1, IMB ) DESC( INB_ ) = MAX( 1, INB ) DESC( MB_ ) = MAX( 1, MB ) DESC( NB_ ) = MAX( 1, NB ) DESC( RSRC_ ) = MAX( -1, MIN( RSRC, NPROW-1 ) ) DESC( CSRC_ ) = MAX( -1, MIN( CSRC, NPCOL-1 ) ) DESC( LLD_ ) = MAX( LLD, LLDMIN ) * RETURN * * End of PB_DESCINIT2 * END SUBROUTINE PB_BINFO( OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ILOW, IMB1, IMBLOC, INB1, INBLOC, IUPP, LCMT00, $ LMBLOC, LNBLOC, LOW, M, MB, MBLKS, MRCOL, $ MRROW, N, NB, NBLKS, OFFD, UPP * .. * * Purpose * ======= * * PB_BINFO initializes the local information of an m by n local array * owned by the process of relative coordinates ( MRROW, MRCOL ). Note * that if m or n is less or equal than zero, there is no data, in which * case this process does not need the local information computed by * this routine to proceed. * * Arguments * ========= * * OFFD (global input) INTEGER * On entry, OFFD specifies the off-diagonal of the underlying * matrix of interest as follows: * OFFD = 0 specifies the main diagonal, * OFFD > 0 specifies lower subdiagonals, and * OFFD < 0 specifies upper superdiagonals. * * M (local input) INTEGER * On entry, M specifies the local number of rows of the under- * lying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the local number of columns of the un- * derlying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). N must be at least zero. * * IMB1 (global input) INTEGER * On input, IMB1 specifies the global true size of the first * block of rows of the underlying global submatrix. IMB1 must * be at least MIN( 1, M ). * * INB1 (global input) INTEGER * On input, INB1 specifies the global true size of the first * block of columns of the underlying global submatrix. INB1 * must be at least MIN( 1, N ). * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * MRROW (local input) INTEGER * On entry, MRROW specifies the relative row coordinate of the * process that possesses these M rows. MRROW must be least zero * and strictly less than NPROW. * * MRCOL (local input) INTEGER * On entry, MRCOL specifies the relative column coordinate of * the process that possesses these N columns. MRCOL must be * least zero and strictly less than NPCOL. * * LCMT00 (local output) INTEGER * On exit, LCMT00 is the LCM value of the left upper block of * this m by n local block owned by the process of relative co- * ordinates ( MRROW, MRCOL ). * * MBLKS (local output) INTEGER * On exit, MBLKS specifies the local number of blocks of rows * corresponding to M. MBLKS must be at least zero. * * NBLKS (local output) INTEGER * On exit, NBLKS specifies the local number of blocks of co- * lumns corresponding to N. NBLKS must be at least zero. * * IMBLOC (local output) INTEGER * On exit, IMBLOC specifies the number of rows (size) of the * uppest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). IMBLOC is at least * MIN( 1, M ). * * INBLOC (local output) INTEGER * On exit, INBLOC specifies the number of columns (size) of * the leftmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). INBLOC is * at least MIN( 1, N ). * * LMBLOC (local output) INTEGER * On exit, LMBLOC specifies the number of rows (size) of the * lowest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). LMBLOC is at least * MIN( 1, M ). * * LNBLOC (local output) INTEGER * On exit, LNBLOC specifies the number of columns (size) of the * rightmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). LNBLOC is * at least MIN( 1, N ). * * ILOW (local output) INTEGER * On exit, ILOW is the lower bound characterizing the first co- * lumn block owning offdiagonals of this m by n array. ILOW * must be less or equal than zero. * * LOW (global output) INTEGER * On exit, LOW is the lower bound characterizing the column * blocks with te exception of the first one (see ILOW) owning * offdiagonals of this m by n array. LOW must be less or equal * than zero. * * IUPP (local output) INTEGER * On exit, IUPP is the upper bound characterizing the first row * block owning offdiagonals of this m by n array. IUPP must be * greater or equal than zero. * * UPP (global output) INTEGER * On exit, UPP is the upper bound characterizing the row * blocks with te exception of the first one (see IUPP) owning * offdiagonals of this m by n array. UPP must be greater or * equal than zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER TMP1 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Initialize LOW, ILOW, UPP, IUPP, LMBLOC, LNBLOC, IMBLOC, INBLOC, * MBLKS, NBLKS and LCMT00. * LOW = 1 - NB UPP = MB - 1 * LCMT00 = OFFD * IF( M.LE.0 .OR. N.LE.0 ) THEN * IF( MRROW.GT.0 ) THEN IUPP = MB - 1 ELSE IUPP = MAX( 0, IMB1 - 1 ) END IF IMBLOC = 0 MBLKS = 0 LMBLOC = 0 * IF( MRCOL.GT.0 ) THEN ILOW = 1 - NB ELSE ILOW = MIN( 0, 1 - INB1 ) END IF INBLOC = 0 NBLKS = 0 LNBLOC = 0 * LCMT00 = LCMT00 + ( LOW - ILOW + MRCOL * NB ) - $ ( IUPP - UPP + MRROW * MB ) * RETURN * END IF * IF( MRROW.GT.0 ) THEN * IMBLOC = MIN( M, MB ) IUPP = MB - 1 LCMT00 = LCMT00 - ( IMB1 - MB + MRROW * MB ) MBLKS = ( M - 1 ) / MB + 1 LMBLOC = M - ( M / MB ) * MB IF( LMBLOC.EQ.0 ) $ LMBLOC = MB * IF( MRCOL.GT.0 ) THEN * INBLOC = MIN( N, NB ) ILOW = 1 - NB LCMT00 = LCMT00 + INB1 - NB + MRCOL * NB NBLKS = ( N - 1 ) / NB + 1 LNBLOC = N - ( N / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * INBLOC = INB1 ILOW = 1 - INB1 TMP1 = N - INB1 IF( TMP1.GT.0 ) THEN * * more than one block * NBLKS = ( TMP1 - 1 ) / NB + 2 LNBLOC = TMP1 - ( TMP1 / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * NBLKS = 1 LNBLOC = INB1 * END IF * END IF * ELSE * IMBLOC = IMB1 IUPP = IMB1 - 1 TMP1 = M - IMB1 IF( TMP1.GT.0 ) THEN * * more than one block * MBLKS = ( TMP1 - 1 ) / MB + 2 LMBLOC = TMP1 - ( TMP1 / MB ) * MB IF( LMBLOC.EQ.0 ) $ LMBLOC = MB * ELSE * MBLKS = 1 LMBLOC = IMB1 * END IF * IF( MRCOL.GT.0 ) THEN * INBLOC = MIN( N, NB ) ILOW = 1 - NB LCMT00 = LCMT00 + INB1 - NB + MRCOL * NB NBLKS = ( N - 1 ) / NB + 1 LNBLOC = N - ( N / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * INBLOC = INB1 ILOW = 1 - INB1 TMP1 = N - INB1 IF( TMP1.GT.0 ) THEN * * more than one block * NBLKS = ( TMP1 - 1 ) / NB + 2 LNBLOC = TMP1 - ( TMP1 / NB ) * NB IF( LNBLOC.EQ.0 ) $ LNBLOC = NB * ELSE * NBLKS = 1 LNBLOC = INB1 * END IF * END IF * END IF * RETURN * * End of PB_BINFO * END INTEGER FUNCTION PILAENV( ICTXT, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT CHARACTER*1 PREC * .. * * Purpose * ======= * * PILAENV returns the logical computational block size to be used by * the PBLAS routines during testing and timing. This is a special ver- * sion to be used only as part of the testing or timing PBLAS programs * for testing different values of logical computational block sizes for * the PBLAS routines. It is called by the PBLAS routines to retrieve a * logical computational block size value. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * PREC (dummy input) CHARACTER*1 * On entry, PREC is a dummy argument. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER INFO, NBLOG COMMON /INFOC/INFO, NBLOG * .. * .. Executable Statements .. * PILAENV = NBLOG * RETURN * * End of PILAENV * END SUBROUTINE PB_LOCINFO( I, INB, NB, MYROC, SRCPROC, NPROCS, $ ILOCBLK, ILOCOFF, MYDIST ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER I, ILOCBLK, ILOCOFF, INB, MYDIST, MYROC, NB, $ NPROCS, SRCPROC * .. * * Purpose * ======= * * PB_LOCINFO computes local information about the beginning of a sub- * matrix starting at the global index I. * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting index in the ma- * trix. I must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of rows * or columns of the matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks of rows or co- * lumns of the matrix is partitioned into. NB must be at least * one. * * MYROC (local input) INTEGER * On entry, MYROC is the coordinate of the process whose local * information is determined. MYROC is at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the submatrix is distributed. NPROCS * must be at least one. * * ILOCBLK (local output) INTEGER * On exit, ILOCBLK specifies the local row or column block * coordinate corresponding to the row or column I of the ma- * trix. ILOCBLK must be at least zero. * * ILOCOFF (local output) INTEGER * On exit, ILOCOFF specifies the local row offset in the block * of local coordinate ILOCBLK corresponding to the row or co- * lumn I of the matrix. ILOCOFF must at least zero. * * MYDIST (local output) INTEGER * On exit, MYDIST specifies the relative process coordinate of * the process specified by MYROC to the process owning the row * or column I. MYDIST is at least zero and strictly less than * NPROCS. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER ITMP, NBLOCKS, PROC * .. * .. Executable Statements .. * ILOCOFF = 0 * IF( SRCPROC.LT.0 ) THEN * MYDIST = 0 * IF( I.LE.INB ) THEN * ILOCBLK = 0 ILOCOFF = I - 1 * ELSE * ITMP = I - INB NBLOCKS = ( ITMP - 1 ) / NB + 1 ILOCBLK = NBLOCKS ILOCOFF = ITMP - 1 - ( NBLOCKS - 1 ) * NB * END IF * ELSE * PROC = SRCPROC MYDIST = MYROC - PROC IF( MYDIST.LT.0 ) $ MYDIST = MYDIST + NPROCS * IF( I.LE.INB ) THEN * ILOCBLK = 0 IF( MYROC.EQ.PROC ) $ ILOCOFF = I - 1 * ELSE * ITMP = I - INB NBLOCKS = ( ITMP - 1 ) / NB + 1 PROC = PROC + NBLOCKS PROC = PROC - ( PROC / NPROCS ) * NPROCS ILOCBLK = NBLOCKS / NPROCS * IF( ( ILOCBLK*NPROCS ).LT.( MYDIST-NBLOCKS ) ) $ ILOCBLK = ILOCBLK + 1 * IF( MYROC.EQ.PROC ) $ ILOCOFF = ITMP - 1 - ( NBLOCKS - 1 ) * NB * END IF * END IF * RETURN * * End of PB_LOCINFO * END SUBROUTINE PB_INITJMP( COLMAJ, NVIR, IMBVIR, INBVIR, IMBLOC, $ INBLOC, MB, NB, RSRC, CSRC, NPROW, NPCOL, $ STRIDE, JMP ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL COLMAJ INTEGER CSRC, IMBLOC, IMBVIR, INBLOC, INBVIR, MB, NB, $ NPCOL, NPROW, NVIR, RSRC, STRIDE * .. * .. Array Arguments .. INTEGER JMP( * ) * .. * * Purpose * ======= * * PB_INITJMP initializes the jump values JMP used by the random matrix * generator. * * Arguments * ========= * * COLMAJ (global input) LOGICAL * On entry, COLMAJ specifies the ordering of the random sequen- * ce. When COLMAJ is .TRUE., the random sequence will be used * for a column major ordering, and otherwise a row-major orde- * ring. This impacts on the computation of the jump values. * * NVIR (global input) INTEGER * On entry, NVIR specifies the size of the underlying virtual * matrix. NVIR must be at least zero. * * IMBVIR (local input) INTEGER * On entry, IMBVIR specifies the number of virtual rows of the * upper left block of the underlying virtual submatrix. IMBVIR * must be at least IMBLOC. * * INBVIR (local input) INTEGER * On entry, INBVIR specifies the number of virtual columns of * the upper left block of the underlying virtual submatrix. * INBVIR must be at least INBLOC. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the size of the blocks used to parti- * tion the matrix rows. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix columns. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the rows are not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When CSRC * is equal to -1, the columns are not distributed but replica- * ted, otherwise CSRC must be at least zero and strictly less * than NPCOL. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process co- * lumns over which the matrix is distributed. NPCOL must be at * least one. * * STRIDE (global input) INTEGER * On entry, STRIDE specifies the number of random numbers to be * generated to compute one matrix entry. In the real case, * STRIDE is usually 1, where as in the complex case STRIDE is * usually 2 in order to generate the real and imaginary parts. * * JMP (local output) INTEGER array * On entry, JMP is an array of dimension JMP_LEN. On exit, this * array contains the different jump values used by the random * matrix generator. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER NPMB, NQNB * .. * .. Executable Statements .. * IF( RSRC.LT.0 ) THEN NPMB = MB ELSE NPMB = NPROW * MB END IF IF( CSRC.LT.0 ) THEN NQNB = NB ELSE NQNB = NPCOL * NB END IF * JMP( JMP_1 ) = 1 * JMP( JMP_MB ) = MB JMP( JMP_IMBV ) = IMBVIR JMP( JMP_NPMB ) = NPMB JMP( JMP_NPIMBLOC ) = IMBLOC + NPMB - MB * JMP( JMP_NB ) = NB JMP( JMP_INBV ) = INBVIR JMP( JMP_NQNB ) = NQNB JMP( JMP_NQINBLOC ) = INBLOC + NQNB - NB * IF( COLMAJ ) THEN JMP( JMP_ROW ) = STRIDE JMP( JMP_COL ) = STRIDE * NVIR ELSE JMP( JMP_ROW ) = STRIDE * NVIR JMP( JMP_COL ) = STRIDE END IF * RETURN * * End of PB_INITJMP * END SUBROUTINE PB_INITMULADD( MULADD0, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IMULADD( 4, * ), JMP( * ), MULADD0( * ) * .. * * Purpose * ======= * * PB_INITMULADD initializes the constants a's and c's corresponding to * the jump values (JMP) used by the matrix generator. * * Arguments * ========= * * MULADD0 (local input) INTEGER array * On entry, MULADD0 is an array of dimension 4 containing the * encoded initial constants a and c to jump from X( n ) to * X( n+1 ) = a*X( n ) + c in the random sequence. MULADD0(1:2) * contains respectively the 16-lower and 16-higher bits of the * constant a, and MULADD0(3:4) contains the 16-lower and * 16-higher bits of the constant c. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the matrix generator. * * IMULADD (local output) INTEGER array * On entry, IMULADD is an array of dimension ( 4, JMP_LEN ). On * exit, the jth column of this array contains the encoded ini- * tial constants a_j and c_j to jump from X( n ) to X(n+JMP(j)) * (= a_j*X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * * .. Local Arrays .. INTEGER ITMP1( 2 ), ITMP2( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMP * .. * .. Executable Statements .. * ITMP2( 1 ) = 100 ITMP2( 2 ) = 0 * * Compute IMULADD for all JMP values * CALL PB_JUMP( JMP( JMP_1 ), MULADD0, ITMP2, ITMP1, $ IMULADD( 1, JMP_1 ) ) * CALL PB_JUMP( JMP( JMP_ROW ), MULADD0, ITMP1, ITMP2, $ IMULADD( 1, JMP_ROW ) ) CALL PB_JUMP( JMP( JMP_COL ), MULADD0, ITMP1, ITMP2, $ IMULADD( 1, JMP_COL ) ) * * Compute constants a and c to jump JMP( * ) numbers in the * sequence for column- or row-major ordering of the sequence. * CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_IMBV ) ) CALL PB_JUMP( JMP( JMP_MB ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_MB ) ) CALL PB_JUMP( JMP( JMP_NPMB ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NPMB ) ) CALL PB_JUMP( JMP( JMP_NPIMBLOC ), IMULADD( 1, JMP_ROW ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NPIMBLOC ) ) * CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_INBV ) ) CALL PB_JUMP( JMP( JMP_NB ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NB ) ) CALL PB_JUMP( JMP( JMP_NQNB ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NQNB ) ) CALL PB_JUMP( JMP( JMP_NQINBLOC ), IMULADD( 1, JMP_COL ), ITMP1, $ ITMP2, IMULADD( 1, JMP_NQINBLOC ) ) * RETURN * * End of PB_INITMULADD * END SUBROUTINE PB_SETLOCRAN( SEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ILOCBLK, ILOCOFF, JLOCBLK, JLOCOFF, MYCDIST, $ MYRDIST, NPCOL, NPROW, SEED * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) * .. * * Purpose * ======= * * PB_SETLOCRAN locally initializes the random number generator. * * Arguments * ========= * * SEED (global input) INTEGER * On entry, SEED specifies a positive integer used to initiali- * ze the first number in the random sequence used by the matrix * generator. SEED must be at least zero. * * ILOCBLK (local input) INTEGER * On entry, ILOCBLK specifies the local row block coordinate * corresponding to the first row of the submatrix of interest. * ILOCBLK must be at least zero. * * ILOCOFF (local input) INTEGER * On entry, ILOCOFF specifies the local row offset in the block * of local coordinate ILOCBLK corresponding to the first row of * the submatrix of interest. ILOCOFF must at least zero. * * JLOCBLK (local input) INTEGER * On entry, JLOCBLK specifies the local column block coordinate * corresponding to the first column of the submatrix of inte- * rest. JLOCBLK must be at least zero. * * JLOCOFF (local input) INTEGER * On entry, JLOCOFF specifies the local column offset in the * block of local coordinate JLOCBLK corresponding to the first * column of the submatrix of interest. JLOCOFF must be at least * zero. * * MYRDIST (local input) INTEGER * On entry, MYRDIST specifies the relative row process coordi- * nate to the process owning the first row of the submatrix of * interest. MYRDIST must be at least zero and stricly less than * NPROW (see the subroutine PB_LOCINFO). * * MYCDIST (local input) INTEGER * On entry, MYCDIST specifies the relative column process coor- * dinate to the process owning the first column of the subma- * trix of interest. MYCDIST must be at least zero and stricly * less than NPCOL (see the subroutine PB_LOCINFO). * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process co- * lumns over which the matrix is distributed. NPCOL must be at * least one. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * IRAN (local output) INTEGER array * On entry, IRAN is an array of dimension 2. On exit, IRAN con- * tains respectively the 16-lower and 32-higher bits of the en- * coding of the entry of the random sequence corresponding lo- * cally to the first local array entry to generate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Arrays .. INTEGER IMULADDTMP( 4 ), ITMP( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMP, PB_SETRAN * .. * .. Executable Statements .. * * Compute and set the value of IRAN corresponding to A( IA, JA ) * ITMP( 1 ) = SEED ITMP( 2 ) = 0 * CALL PB_JUMP( JMP( JMP_1 ), IMULADD( 1, JMP_1 ), ITMP, IRAN, $ IMULADDTMP ) * * Jump ILOCBLK blocks of rows + ILOCOFF rows * CALL PB_JUMP( ILOCOFF, IMULADD( 1, JMP_ROW ), IRAN, ITMP, $ IMULADDTMP ) IF( MYRDIST.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( MYRDIST - 1, IMULADD( 1, JMP_MB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( ILOCBLK, IMULADD( 1, JMP_NPMB ), ITMP, $ IRAN, IMULADDTMP ) ELSE IF( ILOCBLK.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_IMBV ), IMULADD( 1, JMP_ROW ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( NPROW - 1, IMULADD( 1, JMP_MB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( ILOCBLK - 1, IMULADD( 1, JMP_NPMB ), ITMP, $ IRAN, IMULADDTMP ) ELSE CALL PB_JUMP( 0, IMULADD( 1, JMP_1 ), ITMP, $ IRAN, IMULADDTMP ) END IF END IF * * Jump JLOCBLK blocks of columns + JLOCOFF columns * CALL PB_JUMP( JLOCOFF, IMULADD( 1, JMP_COL ), IRAN, ITMP, $ IMULADDTMP ) IF( MYCDIST.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( MYCDIST - 1, IMULADD( 1, JMP_NB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( JLOCBLK, IMULADD( 1, JMP_NQNB ), ITMP, $ IRAN, IMULADDTMP ) ELSE IF( JLOCBLK.GT.0 ) THEN CALL PB_JUMP( JMP( JMP_INBV ), IMULADD( 1, JMP_COL ), ITMP, $ IRAN, IMULADDTMP ) CALL PB_JUMP( NPCOL - 1, IMULADD( 1, JMP_NB ), IRAN, $ ITMP, IMULADDTMP ) CALL PB_JUMP( JLOCBLK - 1, IMULADD( 1, JMP_NQNB ), ITMP, $ IRAN, IMULADDTMP ) ELSE CALL PB_JUMP( 0, IMULADD( 1, JMP_1 ), ITMP, $ IRAN, IMULADDTMP ) END IF END IF * CALL PB_SETRAN( IRAN, IMULADD( 1, JMP_1 ) ) * RETURN * * End of PB_SETLOCRAN * END SUBROUTINE PB_LADD( J, K, I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER I( 2 ), J( 2 ), K( 2 ) * .. * * Purpose * ======= * * PB_LADD adds without carry two long positive integers K and J and put * the result into I. The long integers I, J, K are encoded on 31 bits * using an array of 2 integers. The 16-lower bits are stored in the * first entry of each array, the 15-higher bits in the second entry. * For efficiency purposes, the intrisic modulo function is inlined. * * Arguments * ========= * * J (local input) INTEGER array * On entry, J is an array of dimension 2 containing the encoded * long integer J. * * K (local input) INTEGER array * On entry, K is an array of dimension 2 containing the encoded * long integer K. * * I (local output) INTEGER array * On entry, I is an array of dimension 2. On exit, this array * contains the encoded long integer I. * * Further Details * =============== * * K( 2 ) K( 1 ) * 0XXXXXXX XXXXXXXX K I( 1 ) = MOD( K( 1 ) + J( 1 ), 2**16 ) * + carry = ( K( 1 ) + J( 1 ) ) / 2**16 * J( 2 ) J( 1 ) * 0XXXXXXX XXXXXXXX J I( 2 ) = K( 2 ) + J( 2 ) + carry * ---------------------- I( 2 ) = MOD( I( 2 ), 2**15 ) * I( 2 ) I( 1 ) * 0XXXXXXX XXXXXXXX I * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16 PARAMETER ( IPOW15 = 2**15, IPOW16 = 2**16 ) * .. * .. Local Scalars .. INTEGER ITMP1, ITMP2 * .. * .. Executable Statements .. * * I( 1 ) = MOD( K( 1 ) + J( 1 ), IPOW16 ) * ITMP1 = K( 1 ) + J( 1 ) ITMP2 = ITMP1 / IPOW16 I( 1 ) = ITMP1 - ITMP2 * IPOW16 * * I( 2 ) = MOD( ( K( 1 ) + J( 1 ) ) / IPOW16 + K( 2 ) + J( 2 ), * IPOW15 ) * ITMP1 = ITMP2 + K( 2 ) + J( 2 ) ITMP2 = ITMP1 / IPOW15 I( 2 ) = ITMP1 - ITMP2 * IPOW15 * RETURN * * End of PB_LADD * END SUBROUTINE PB_LMUL( K, J, I ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER I( 2 ), J( 2 ), K( 2 ) * .. * * Purpose * ======= * * PB_LMUL multiplies without carry two long positive integers K and J * and put the result into I. The long integers I, J, K are encoded on * 31 bits using an array of 2 integers. The 16-lower bits are stored in * the first entry of each array, the 15-higher bits in the second entry * of each array. For efficiency purposes, the intrisic modulo function * is inlined. * * Arguments * ========= * * K (local input) INTEGER array * On entry, K is an array of dimension 2 containing the encoded * long integer K. * * J (local input) INTEGER array * On entry, J is an array of dimension 2 containing the encoded * long integer J. * * I (local output) INTEGER array * On entry, I is an array of dimension 2. On exit, this array * contains the encoded long integer I. * * Further Details * =============== * * K( 2 ) K( 1 ) * 0XXXXXXX XXXXXXXX K I( 1 ) = MOD( K( 1 ) + J( 1 ), 2**16 ) * * carry = ( K( 1 ) + J( 1 ) ) / 2**16 * J( 2 ) J( 1 ) * 0XXXXXXX XXXXXXXX J I( 2 ) = K( 2 ) + J( 2 ) + carry * ---------------------- I( 2 ) = MOD( I( 2 ), 2**15 ) * I( 2 ) I( 1 ) * 0XXXXXXX XXXXXXXX I * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER IPOW15, IPOW16, IPOW30 PARAMETER ( IPOW15 = 2**15, IPOW16 = 2**16, $ IPOW30 = 2**30 ) * .. * .. Local Scalars .. INTEGER ITMP1, ITMP2 * .. * .. Executable Statements .. * ITMP1 = K( 1 ) * J( 1 ) IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * * I( 1 ) = MOD( ITMP1, IPOW16 ) * ITMP2 = ITMP1 / IPOW16 I( 1 ) = ITMP1 - ITMP2 * IPOW16 * ITMP1 = K( 1 ) * J( 2 ) + K( 2 ) * J( 1 ) IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * ITMP1 = ITMP2 + ITMP1 IF( ITMP1.LT.0 ) $ ITMP1 = ( ITMP1 + IPOW30 ) + IPOW30 * * I( 2 ) = MOD( ITMP1, IPOW15 ) * I( 2 ) = ITMP1 - ( ITMP1 / IPOW15 ) * IPOW15 * RETURN * * End of PB_LMUL * END SUBROUTINE PB_JUMP( K, MULADD, IRANN, IRANM, IMA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER K * .. * .. Array Arguments .. INTEGER IMA( 4 ), IRANM( 2 ), IRANN( 2 ), MULADD( 4 ) * .. * * Purpose * ======= * * PB_JUMP computes the constants A and C to jump K numbers in the ran- * dom sequence: * * X( n+K ) = A * X( n ) + C. * * The constants encoded in MULADD specify how to jump from entry in the * sequence to the next. * * Arguments * ========= * * K (local input) INTEGER * On entry, K specifies the number of entries of the sequence * to jump over. When K is less or equal than zero, A and C are * not computed, and IRANM is set to IRANN corresponding to a * jump of size zero. * * MULADD (local input) INTEGER array * On entry, MULADD is an array of dimension 4 containing the * encoded constants a and c to jump from X( n ) to X( n+1 ) * ( = a*X( n )+c) in the random sequence. MULADD(1:2) contains * respectively the 16-lower and 16-higher bits of the constant * a, and MULADD(3:4) contains the 16-lower and 16-higher bits * of the constant c. * * IRANN (local input) INTEGER array * On entry, IRANN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( n ). * * IRANM (local output) INTEGER array * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 16-higher bits * of the encoding of X( n+K ). * * IMA (local output) INTEGER array * On entry, IMA is an array of dimension 4. On exit, when K is * greater than zero, this array contains the encoded constants * A and C to jump from X( n ) to X( n+K ) in the random se- * quence. IMA(1:2) contains respectively the 16-lower and * 16-higher bits of the constant A, and IMA(3:4) contains the * 16-lower and 16-higher bits of the constant C. When K is * less or equal than zero, this array is not referenced. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Executable Statements .. * IF( K.GT.0 ) THEN * IMA( 1 ) = MULADD( 1 ) IMA( 2 ) = MULADD( 2 ) IMA( 3 ) = MULADD( 3 ) IMA( 4 ) = MULADD( 4 ) * DO 10 I = 1, K - 1 * CALL PB_LMUL( IMA, MULADD, J ) * IMA( 1 ) = J( 1 ) IMA( 2 ) = J( 2 ) * CALL PB_LMUL( IMA( 3 ), MULADD, J ) CALL PB_LADD( MULADD( 3 ), J, IMA( 3 ) ) * 10 CONTINUE * CALL PB_LMUL( IRANN, IMA, J ) CALL PB_LADD( J, IMA( 3 ), IRANM ) * ELSE * IRANM( 1 ) = IRANN( 1 ) IRANM( 2 ) = IRANN( 2 ) * END IF * RETURN * * End of PB_JUMP * END SUBROUTINE PB_SETRAN( IRAN, IAC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IAC( 4 ), IRAN( 2 ) * .. * * Purpose * ======= * * PB_SETRAN initializes the random generator with the encoding of the * first number X( 1 ) in the sequence, and the constants a and c used * to compute the next element in the sequence: * * X( n+1 ) = a * X( n ) + c. * * X( 1 ), a and c are stored in the common block RANCOM for later use * (see the routines PB_SRAN or PB_DRAN). * * Arguments * ========= * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( 1 ). * * IAC (local input) INTEGER array * On entry, IAC is an array of dimension 4. IAC(1:2) contain * respectively the 16-lower and 16-higher bits of the constant * a, and IAC(3:4) contain the 16-lower and 16-higher bits of * the constant c. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * IRAND( 1 ) = IRAN( 1 ) IRAND( 2 ) = IRAN( 2 ) IACS( 1 ) = IAC( 1 ) IACS( 2 ) = IAC( 2 ) IACS( 3 ) = IAC( 3 ) IACS( 4 ) = IAC( 4 ) * RETURN * * End of PB_SETRAN * END SUBROUTINE PB_JUMPIT( MULADD, IRANN, IRANM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Array Arguments .. INTEGER IRANM( 2 ), IRANN( 2 ), MULADD( 4 ) * .. * * Purpose * ======= * * PB_JUMPIT jumps in the random sequence from the number X( n ) enco- * ded in IRANN to the number X( m ) encoded in IRANM using the cons- * tants A and C encoded in MULADD: * * X( m ) = A * X( n ) + C. * * The constants A and C obviously depend on m and n, see the subroutine * PB_JUMP in order to set them up. * * Arguments * ========= * * MULADD (local input) INTEGER array * On netry, MULADD is an array of dimension 4. MULADD(1:2) con- * tains respectively the 16-lower and 16-higher bits of the * constant A, and MULADD(3:4) contains the 16-lower and * 16-higher bits of the constant C. * * IRANN (local input) INTEGER array * On entry, IRANN is an array of dimension 2. This array con- * tains respectively the 16-lower and 16-higher bits of the en- * coding of X( n ). * * IRANM (local output) INTEGER array * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 16-higher bits * of the encoding of X( m ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * CALL PB_LMUL( IRANN, MULADD, J ) CALL PB_LADD( J, MULADD( 3 ), IRANM ) * IRAND( 1 ) = IRANM( 1 ) IRAND( 2 ) = IRANM( 2 ) * RETURN * * End of PB_JUMPIT * END scalapack-1.8.0/PBLAS/TESTING/dlamch.f0000640000175000017500000006050310363532303016675 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. CHARACTER CMACH * .. * * Purpose * ======= * * DLAMCH determines double precision machine parameters. * * Arguments * ========= * * CMACH (input) CHARACTER*1 * Specifies the value to be returned by DLAMCH: * = 'E' or 'e', DLAMCH := eps * = 'S' or 's , DLAMCH := sfmin * = 'B' or 'b', DLAMCH := base * = 'P' or 'p', DLAMCH := eps*base * = 'N' or 'n', DLAMCH := t * = 'R' or 'r', DLAMCH := rnd * = 'M' or 'm', DLAMCH := emin * = 'U' or 'u', DLAMCH := rmin * = 'L' or 'l', DLAMCH := emax * = 'O' or 'o', DLAMCH := rmax * * where * * eps = relative machine precision * sfmin = safe minimum, such that 1/sfmin does not overflow * base = base of the machine * prec = eps*base * t = number of (base) digits in the mantissa * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise * emin = minimum exponent before (gradual) underflow * rmin = underflow threshold - base**(emin-1) * emax = largest exponent before overflow * rmax = overflow threshold - (base**emax)*(1-eps) * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL FIRST, LRND INTEGER BETA, IMAX, IMIN, IT DOUBLE PRECISION BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, $ RND, SFMIN, SMALL, T * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL DLAMC2 * .. * .. Save statement .. SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, $ EMAX, RMAX, PREC * .. * .. Data statements .. DATA FIRST / .TRUE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) BASE = BETA T = IT IF( LRND ) THEN RND = ONE EPS = ( BASE**( 1-IT ) ) / 2 ELSE RND = ZERO EPS = BASE**( 1-IT ) END IF PREC = EPS*BASE EMIN = IMIN EMAX = IMAX SFMIN = RMIN SMALL = ONE / RMAX IF( SMALL.GE.SFMIN ) THEN * * Use SMALL plus a bit, to avoid the possibility of rounding * causing overflow when computing 1/sfmin. * SFMIN = SMALL*( ONE+EPS ) END IF END IF * IF( LSAME( CMACH, 'E' ) ) THEN RMACH = EPS ELSE IF( LSAME( CMACH, 'S' ) ) THEN RMACH = SFMIN ELSE IF( LSAME( CMACH, 'B' ) ) THEN RMACH = BASE ELSE IF( LSAME( CMACH, 'P' ) ) THEN RMACH = PREC ELSE IF( LSAME( CMACH, 'N' ) ) THEN RMACH = T ELSE IF( LSAME( CMACH, 'R' ) ) THEN RMACH = RND ELSE IF( LSAME( CMACH, 'M' ) ) THEN RMACH = EMIN ELSE IF( LSAME( CMACH, 'U' ) ) THEN RMACH = RMIN ELSE IF( LSAME( CMACH, 'L' ) ) THEN RMACH = EMAX ELSE IF( LSAME( CMACH, 'O' ) ) THEN RMACH = RMAX END IF * DLAMCH = RMACH RETURN * * End of DLAMCH * END * ************************************************************************ * SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL IEEE1, RND INTEGER BETA, T * .. * * Purpose * ======= * * DLAMC1 determines the machine parameters given by BETA, T, RND, and * IEEE1. * * Arguments * ========= * * BETA (output) INTEGER * The base of the machine. * * T (output) INTEGER * The number of ( BETA ) digits in the mantissa. * * RND (output) LOGICAL * Specifies whether proper rounding ( RND = .TRUE. ) or * chopping ( RND = .FALSE. ) occurs in addition. This may not * be a reliable guide to the way in which the machine performs * its arithmetic. * * IEEE1 (output) LOGICAL * Specifies whether rounding appears to be done in the IEEE * 'round to nearest' style. * * Further Details * =============== * * The routine is based on the routine ENVRON by Malcolm and * incorporates suggestions by Gentleman and Marovich. See * * Malcolm M. A. (1972) Algorithms to reveal properties of * floating-point arithmetic. Comms. of the ACM, 15, 949-951. * * Gentleman W. M. and Marovich S. B. (1974) More on algorithms * that reveal properties of floating point arithmetic units. * Comms. of the ACM, 17, 276-277. * * ===================================================================== * * .. Local Scalars .. LOGICAL FIRST, LIEEE1, LRND INTEGER LBETA, LT DOUBLE PRECISION A, B, C, F, ONE, QTR, SAVEC, T1, T2 * .. * .. External Functions .. DOUBLE PRECISION DLAMC3 EXTERNAL DLAMC3 * .. * .. Save statement .. SAVE FIRST, LIEEE1, LBETA, LRND, LT * .. * .. Data statements .. DATA FIRST / .TRUE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. ONE = 1 * * LBETA, LIEEE1, LT and LRND are the local values of BETA, * IEEE1, T and RND. * * Throughout this routine we use the function DLAMC3 to ensure * that relevant values are stored and not held in registers, or * are not affected by optimizers. * * Compute a = 2.0**m with the smallest positive integer m such * that * * fl( a + 1.0 ) = a. * A = 1 C = 1 * *+ WHILE( C.EQ.ONE )LOOP 10 CONTINUE IF( C.EQ.ONE ) THEN A = 2*A C = DLAMC3( A, ONE ) C = DLAMC3( C, -A ) GO TO 10 END IF *+ END WHILE * * Now compute b = 2.0**m with the smallest positive integer m * such that * * fl( a + b ) .gt. a. * B = 1 C = DLAMC3( A, B ) * *+ WHILE( C.EQ.A )LOOP 20 CONTINUE IF( C.EQ.A ) THEN B = 2*B C = DLAMC3( A, B ) GO TO 20 END IF *+ END WHILE * * Now compute the base. a and c are neighbouring floating point * numbers in the interval ( beta**t, beta**( t + 1 ) ) and so * their difference is beta. Adding 0.25 to c is to ensure that it * is truncated to beta and not ( beta - 1 ). * QTR = ONE / 4 SAVEC = C C = DLAMC3( C, -A ) LBETA = C + QTR * * Now determine whether rounding or chopping occurs, by adding a * bit less than beta/2 and a bit more than beta/2 to a. * B = LBETA F = DLAMC3( B / 2, -B / 100 ) C = DLAMC3( F, A ) IF( C.EQ.A ) THEN LRND = .TRUE. ELSE LRND = .FALSE. END IF F = DLAMC3( B / 2, B / 100 ) C = DLAMC3( F, A ) IF( ( LRND ) .AND. ( C.EQ.A ) ) $ LRND = .FALSE. * * Try and decide whether rounding is done in the IEEE 'round to * nearest' style. B/2 is half a unit in the last place of the two * numbers A and SAVEC. Furthermore, A is even, i.e. has last bit * zero, and SAVEC is odd. Thus adding B/2 to A should not change * A, but adding B/2 to SAVEC should change SAVEC. * T1 = DLAMC3( B / 2, A ) T2 = DLAMC3( B / 2, SAVEC ) LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND * * Now find the mantissa, t. It should be the integer part of * log to the base beta of a, however it is safer to determine t * by powering. So we find t as the smallest positive integer for * which * * fl( beta**t + 1.0 ) = 1.0. * LT = 0 A = 1 C = 1 * *+ WHILE( C.EQ.ONE )LOOP 30 CONTINUE IF( C.EQ.ONE ) THEN LT = LT + 1 A = A*LBETA C = DLAMC3( A, ONE ) C = DLAMC3( C, -A ) GO TO 30 END IF *+ END WHILE * END IF * BETA = LBETA T = LT RND = LRND IEEE1 = LIEEE1 RETURN * * End of DLAMC1 * END * ************************************************************************ * SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL RND INTEGER BETA, EMAX, EMIN, T DOUBLE PRECISION EPS, RMAX, RMIN * .. * * Purpose * ======= * * DLAMC2 determines the machine parameters specified in its argument * list. * * Arguments * ========= * * BETA (output) INTEGER * The base of the machine. * * T (output) INTEGER * The number of ( BETA ) digits in the mantissa. * * RND (output) LOGICAL * Specifies whether proper rounding ( RND = .TRUE. ) or * chopping ( RND = .FALSE. ) occurs in addition. This may not * be a reliable guide to the way in which the machine performs * its arithmetic. * * EPS (output) DOUBLE PRECISION * The smallest positive number such that * * fl( 1.0 - EPS ) .LT. 1.0, * * where fl denotes the computed value. * * EMIN (output) INTEGER * The minimum exponent before (gradual) underflow occurs. * * RMIN (output) DOUBLE PRECISION * The smallest normalized number for the machine, given by * BASE**( EMIN - 1 ), where BASE is the floating point value * of BETA. * * EMAX (output) INTEGER * The maximum exponent before overflow occurs. * * RMAX (output) DOUBLE PRECISION * The largest positive number for the machine, given by * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point * value of BETA. * * Further Details * =============== * * The computation of EPS is based on a routine PARANOIA by * W. Kahan of the University of California at Berkeley. * * ===================================================================== * * .. Local Scalars .. LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, $ NGNMIN, NGPMIN DOUBLE PRECISION A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, $ SIXTH, SMALL, THIRD, TWO, ZERO * .. * .. External Functions .. DOUBLE PRECISION DLAMC3 EXTERNAL DLAMC3 * .. * .. External Subroutines .. EXTERNAL DLAMC1, DLAMC4, DLAMC5 * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Save statement .. SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, $ LRMIN, LT * .. * .. Data statements .. DATA FIRST / .TRUE. / , IWARN / .FALSE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. ZERO = 0 ONE = 1 TWO = 2 * * LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of * BETA, T, RND, EPS, EMIN and RMIN. * * Throughout this routine we use the function DLAMC3 to ensure * that relevant values are stored and not held in registers, or * are not affected by optimizers. * * DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. * CALL DLAMC1( LBETA, LT, LRND, LIEEE1 ) * * Start to find EPS. * B = LBETA A = B**( -LT ) LEPS = A * * Try some tricks to see whether or not this is the correct EPS. * B = TWO / 3 HALF = ONE / 2 SIXTH = DLAMC3( B, -HALF ) THIRD = DLAMC3( SIXTH, SIXTH ) B = DLAMC3( THIRD, -HALF ) B = DLAMC3( B, SIXTH ) B = ABS( B ) IF( B.LT.LEPS ) $ B = LEPS * LEPS = 1 * *+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP 10 CONTINUE IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN LEPS = B C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) C = DLAMC3( HALF, -C ) B = DLAMC3( HALF, C ) C = DLAMC3( HALF, -B ) B = DLAMC3( HALF, C ) GO TO 10 END IF *+ END WHILE * IF( A.LT.LEPS ) $ LEPS = A * * Computation of EPS complete. * * Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). * Keep dividing A by BETA until (gradual) underflow occurs. This * is detected when we cannot recover the previous A. * RBASE = ONE / LBETA SMALL = ONE DO 20 I = 1, 3 SMALL = DLAMC3( SMALL*RBASE, ZERO ) 20 CONTINUE A = DLAMC3( ONE, SMALL ) CALL DLAMC4( NGPMIN, ONE, LBETA ) CALL DLAMC4( NGNMIN, -ONE, LBETA ) CALL DLAMC4( GPMIN, A, LBETA ) CALL DLAMC4( GNMIN, -A, LBETA ) IEEE = .FALSE. * IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN IF( NGPMIN.EQ.GPMIN ) THEN LEMIN = NGPMIN * ( Non twos-complement machines, no gradual underflow; * e.g., VAX ) ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN LEMIN = NGPMIN - 1 + LT IEEE = .TRUE. * ( Non twos-complement machines, with gradual underflow; * e.g., IEEE standard followers ) ELSE LEMIN = MIN( NGPMIN, GPMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN LEMIN = MAX( NGPMIN, NGNMIN ) * ( Twos-complement machines, no gradual underflow; * e.g., CYBER 205 ) ELSE LEMIN = MIN( NGPMIN, NGNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. $ ( GPMIN.EQ.GNMIN ) ) THEN IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT * ( Twos-complement machines with gradual underflow; * no known machine ) ELSE LEMIN = MIN( NGPMIN, NGNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF *** * Comment out this if block if EMIN is ok IF( IWARN ) THEN FIRST = .TRUE. WRITE( 6, FMT = 9999 )LEMIN END IF *** * * Assume IEEE arithmetic if we found denormalised numbers above, * or if arithmetic seems to round in the IEEE style, determined * in routine DLAMC1. A true IEEE machine should have both things * true; however, faulty machines may have one or the other. * IEEE = IEEE .OR. LIEEE1 * * Compute RMIN by successive division by BETA. We could compute * RMIN as BASE**( EMIN - 1 ), but some machines underflow during * this computation. * LRMIN = 1 DO 30 I = 1, 1 - LEMIN LRMIN = DLAMC3( LRMIN*RBASE, ZERO ) 30 CONTINUE * * Finally, call DLAMC5 to compute EMAX and RMAX. * CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) END IF * BETA = LBETA T = LT RND = LRND EPS = LEPS EMIN = LEMIN RMIN = LRMIN EMAX = LEMAX RMAX = LRMAX * RETURN * 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', $ ' EMIN = ', I8, / $ ' If, after inspection, the value EMIN looks', $ ' acceptable please comment out ', $ / ' the IF block as marked within the code of routine', $ ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / ) * * End of DLAMC2 * END * ************************************************************************ * DOUBLE PRECISION FUNCTION DLAMC3( A, B ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. DOUBLE PRECISION A, B * .. * * Purpose * ======= * * DLAMC3 is intended to force A and B to be stored prior to doing * the addition of A and B , for use in situations where optimizers * might hold one of these in a register. * * Arguments * ========= * * A, B (input) DOUBLE PRECISION * The values A and B. * * ===================================================================== * * .. Executable Statements .. * DLAMC3 = A + B * RETURN * * End of DLAMC3 * END * ************************************************************************ * SUBROUTINE DLAMC4( EMIN, START, BASE ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. INTEGER BASE, EMIN DOUBLE PRECISION START * .. * * Purpose * ======= * * DLAMC4 is a service routine for DLAMC2. * * Arguments * ========= * * EMIN (output) EMIN * The minimum exponent before (gradual) underflow, computed by * setting A = START and dividing by BASE until the previous A * can not be recovered. * * START (input) DOUBLE PRECISION * The starting point for determining EMIN. * * BASE (input) INTEGER * The base of the machine. * * ===================================================================== * * .. Local Scalars .. INTEGER I DOUBLE PRECISION A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO * .. * .. External Functions .. DOUBLE PRECISION DLAMC3 EXTERNAL DLAMC3 * .. * .. Executable Statements .. * A = START ONE = 1 RBASE = ONE / BASE ZERO = 0 EMIN = 1 B1 = DLAMC3( A*RBASE, ZERO ) C1 = A C2 = A D1 = A D2 = A *+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. * $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP 10 CONTINUE IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. $ ( D2.EQ.A ) ) THEN EMIN = EMIN - 1 A = B1 B1 = DLAMC3( A / BASE, ZERO ) C1 = DLAMC3( B1*BASE, ZERO ) D1 = ZERO DO 20 I = 1, BASE D1 = D1 + B1 20 CONTINUE B2 = DLAMC3( A*RBASE, ZERO ) C2 = DLAMC3( B2 / RBASE, ZERO ) D2 = ZERO DO 30 I = 1, BASE D2 = D2 + B2 30 CONTINUE GO TO 10 END IF *+ END WHILE * RETURN * * End of DLAMC4 * END * ************************************************************************ * SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL IEEE INTEGER BETA, EMAX, EMIN, P DOUBLE PRECISION RMAX * .. * * Purpose * ======= * * DLAMC5 attempts to compute RMAX, the largest machine floating-point * number, without overflow. It assumes that EMAX + abs(EMIN) sum * approximately to a power of 2. It will fail on machines where this * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, * EMAX = 28718). It will also fail if the value supplied for EMIN is * too large (i.e. too close to zero), probably with overflow. * * Arguments * ========= * * BETA (input) INTEGER * The base of floating-point arithmetic. * * P (input) INTEGER * The number of base BETA digits in the mantissa of a * floating-point value. * * EMIN (input) INTEGER * The minimum exponent before (gradual) underflow. * * IEEE (input) LOGICAL * A logical flag specifying whether or not the arithmetic * system is thought to comply with the IEEE standard. * * EMAX (output) INTEGER * The largest exponent before overflow * * RMAX (output) DOUBLE PRECISION * The largest machine floating-point number. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP DOUBLE PRECISION OLDY, RECBAS, Y, Z * .. * .. External Functions .. DOUBLE PRECISION DLAMC3 EXTERNAL DLAMC3 * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * First compute LEXP and UEXP, two powers of 2 that bound * abs(EMIN). We then assume that EMAX + abs(EMIN) will sum * approximately to the bound that is closest to abs(EMIN). * (EMAX is the exponent of the required number RMAX). * LEXP = 1 EXBITS = 1 10 CONTINUE TRY = LEXP*2 IF( TRY.LE.( -EMIN ) ) THEN LEXP = TRY EXBITS = EXBITS + 1 GO TO 10 END IF IF( LEXP.EQ.-EMIN ) THEN UEXP = LEXP ELSE UEXP = TRY EXBITS = EXBITS + 1 END IF * * Now -LEXP is less than or equal to EMIN, and -UEXP is greater * than or equal to EMIN. EXBITS is the number of bits needed to * store the exponent. * IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN EXPSUM = 2*LEXP ELSE EXPSUM = 2*UEXP END IF * * EXPSUM is the exponent range, approximately equal to * EMAX - EMIN + 1 . * EMAX = EXPSUM + EMIN - 1 NBITS = 1 + EXBITS + P * * NBITS is the total number of bits needed to store a * floating-point number. * IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN * * Either there are an odd number of bits used to store a * floating-point number, which is unlikely, or some bits are * not used in the representation of numbers, which is possible, * (e.g. Cray machines) or the mantissa has an implicit bit, * (e.g. IEEE machines, Dec Vax machines), which is perhaps the * most likely. We have to assume the last alternative. * If this is true, then we need to reduce EMAX by one because * there must be some way of representing zero in an implicit-bit * system. On machines like Cray, we are reducing EMAX by one * unnecessarily. * EMAX = EMAX - 1 END IF * IF( IEEE ) THEN * * Assume we are on an IEEE machine which reserves one exponent * for infinity and NaN. * EMAX = EMAX - 1 END IF * * Now create RMAX, the largest machine number, which should * be equal to (1.0 - BETA**(-P)) * BETA**EMAX . * * First compute 1.0 - BETA**(-P), being careful that the * result is less than 1.0 . * RECBAS = ONE / BETA Z = BETA - ONE Y = ZERO DO 20 I = 1, P Z = Z*RECBAS IF( Y.LT.ONE ) $ OLDY = Y Y = DLAMC3( Y, Z ) 20 CONTINUE IF( Y.GE.ONE ) $ Y = OLDY * * Now multiply by BETA**EMAX to get RMAX. * DO 30 I = 1, EMAX Y = DLAMC3( Y*BETA, ZERO ) 30 CONTINUE * RMAX = Y RETURN * * End of DLAMC5 * END scalapack-1.8.0/PBLAS/TESTING/pdblastst.f0000640000175000017500000135622010363532303017452 0ustar muammarmuammar SUBROUTINE PDOPTEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDOPTEE tests whether the PBLAS respond correctly to a bad option * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PDCHKOPT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2nd option * APOS = 2 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 3rd option * APOS = 3 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2'nd option * APOS = 2 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 .OR. SCODE.EQ.40 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2'nd option * APOS = 2 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 3rd option * APOS = 3 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 4th option * APOS = 4 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st option * APOS = 1 CALL PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * END IF * RETURN * * End of PDOPTEE * END SUBROUTINE PDCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDCHKOPT tests the option ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the option to be * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PDCALLSUB, PDSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PDSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'D' ) ) THEN * * Generate bad DIAG option * DIAG = '/' * ELSE IF( LSAME( ARGNAM, 'S' ) ) THEN * * Generate bad SIDE option * SIDE = '/' * ELSE IF( LSAME( ARGNAM, 'A' ) ) THEN * * Generate bad TRANSA option * TRANSA = '/' * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Generate bad TRANSB option * TRANSB = '/' * ELSE IF( LSAME( ARGNAM, 'U' ) ) THEN * * Generate bad UPLO option * UPLO = '/' * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PDCHKOPT * END SUBROUTINE PDDIMEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDDIMEE tests whether the PBLAS respond correctly to a bad dimension * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PDCHKDIM * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 .OR. SCODE.EQ.12 .OR. SCODE.EQ.13 .OR. $ SCODE.EQ.14 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) dimension * APOS = 1 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st dimension * APOS = 2 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) dimension * APOS = 2 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) dimension * APOS = 4 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.24 ) THEN * * Check 1st dimension * APOS = 1 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 3rd dimension * APOS = 5 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 ) THEN * * Check 1st dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 2nd dimension * APOS = 4 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st dimension * APOS = 1 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st dimension * APOS = 5 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 6 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st dimension * APOS = 2 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st dimension * APOS = 3 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * END IF * RETURN * * End of PDDIMEE * END SUBROUTINE PDCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDCHKDIM tests the dimension ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the dimension to be * checked. ARGNAM can either be 'M', 'N' or 'K'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PDCALLSUB, PDSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER KDIM, MDIM, NDIM COMMON /PBLASN/KDIM, MDIM, NDIM * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PDSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'M' ) ) THEN * * Generate bad MDIM * MDIM = -1 * ELSE IF( LSAME( ARGNAM, 'N' ) ) THEN * * Generate bad NDIM * NDIM = -1 * ELSE * * Generate bad KDIM * KDIM = -1 * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PDCHKDIM * END SUBROUTINE PDVECEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDVECEE tests whether the PBLAS respond correctly to a bad vector * argument. Each vector is described by: , I, J, * DESC, INC. Out of all these, only I, J, * DESC, and INC can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PDCHKMAT * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * * Check 1st vector * APOS = 2 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 7 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.12 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) vector * APOS = 3 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.13 ) THEN * * Check 1st vector * APOS = 3 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 8 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.14 ) THEN * * Check 1st (and only) vector * APOS = 4 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st vector * APOS = 9 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 15 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st vector * APOS = 8 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 14 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) vector * APOS = 9 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st vector * APOS = 4 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 9 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.26 .OR. SCODE.EQ.27 ) THEN * * Check 1'st (and only) vector * APOS = 4 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * END IF * RETURN * * End of PDVECEE * END SUBROUTINE PDMATEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDMATEE tests whether the PBLAS respond correctly to a bad matrix * argument. Each matrix is described by: , I, J, * and DESC. Out of all these, only I, J and DESC * can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PDCHKMAT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 .OR. SCODE.EQ.23 ) THEN * * Check 1st (and only) matrix * APOS = 5 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st (and only) matrix * APOS = 4 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st (and only) matrix * APOS = 14 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.25 .OR. SCODE.EQ.26 ) THEN * * Check 1st (and only) matrix * APOS = 9 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st matrix * APOS = 7 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 16 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.32 .OR. SCODE.EQ.35 .OR. SCODE.EQ.36 ) THEN * * Check 1st matrix * APOS = 6 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 15 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 ) THEN * * Check 1st matrix * APOS = 6 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st matrix * APOS = 4 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 9 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st matrix * APOS = 8 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 12 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st matrix * APOS = 5 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st matrix * APOS = 6 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * END IF * RETURN * * End of PDMATEE * END SUBROUTINE PDSETPBLAS( ICTXT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT * .. * * Purpose * ======= * * PDSETPBLAS initializes *all* the dummy arguments to correct values. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. External Subroutines .. EXTERNAL PB_DESCSET2 * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM DOUBLE PRECISION USCLR, SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Set default values for options * DIAG = 'N' SIDE = 'L' TRANSA = 'N' TRANSB = 'N' UPLO = 'U' * * Set default values for scalars * KDIM = 1 MDIM = 1 NDIM = 1 ISCLR = 1 SCLR = ONE USCLR = ONE * * Set default values for distributed matrix A * A( 1, 1 ) = ONE A( 2, 1 ) = ONE A( 1, 2 ) = ONE A( 2, 2 ) = ONE IA = 1 JA = 1 CALL PB_DESCSET2( DESCA, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix B * B( 1, 1 ) = ONE B( 2, 1 ) = ONE B( 1, 2 ) = ONE B( 2, 2 ) = ONE IB = 1 JB = 1 CALL PB_DESCSET2( DESCB, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix C * C( 1, 1 ) = ONE C( 2, 1 ) = ONE C( 1, 2 ) = ONE C( 2, 2 ) = ONE IC = 1 JC = 1 CALL PB_DESCSET2( DESCC, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix X * X( 1 ) = ONE X( 2 ) = ONE IX = 1 JX = 1 CALL PB_DESCSET2( DESCX, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCX = 1 * * Set default values for distributed matrix Y * Y( 1 ) = ONE Y( 2 ) = ONE IY = 1 JY = 1 CALL PB_DESCSET2( DESCY, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCY = 1 * RETURN * * End of PDSETPBLAS * END SUBROUTINE PDCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the matrix or vector * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one * wants to check a matrix, and 'X' or 'Y' for a vector. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the first argument * of the matrix (or vector) ARGNAM. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT PARAMETER ( DESCMULT = 100 ) * .. * .. Local Scalars .. INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCHKPBE, PDCALLSUB, PDSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ARGNAM, 'A' ) ) THEN * * Check IA. Set all other OK, bad IA * CALL PDSETPBLAS( ICTXT ) IA = -1 INFOT = ARGPOS + 1 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JA. Set all other OK, bad JA * CALL PDSETPBLAS( ICTXT ) JA = -1 INFOT = ARGPOS + 2 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCA. Set all other OK, bad DESCA * DO 10 I = 1, DLEN_ * * Set I'th entry of DESCA to incorrect value, rest ok. * CALL PDSETPBLAS( ICTXT ) DESCA( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCA, CSRCA, LDA * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PDSETPBLAS( ICTXT ) * * Test RSRCA >= NPROW * IF( I.EQ.RSRC_ ) $ DESCA( I ) = NPROW * * Test CSRCA >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCA( I ) = NPCOL * * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCA( I ) = 1 ELSE DESCA( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 10 CONTINUE * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Check IB. Set all other OK, bad IB * CALL PDSETPBLAS( ICTXT ) IB = -1 INFOT = ARGPOS + 1 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JB. Set all other OK, bad JB * CALL PDSETPBLAS( ICTXT ) JB = -1 INFOT = ARGPOS + 2 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCB. Set all other OK, bad DESCB * DO 20 I = 1, DLEN_ * * Set I'th entry of DESCB to incorrect value, rest ok. * CALL PDSETPBLAS( ICTXT ) DESCB( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCB, CSRCB, LDB * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PDSETPBLAS( ICTXT ) * * Test RSRCB >= NPROW * IF( I.EQ.RSRC_ ) $ DESCB( I ) = NPROW * * Test CSRCB >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCB( I ) = NPCOL * * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCB( I ) = 1 ELSE DESCB( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 20 CONTINUE * ELSE IF( LSAME( ARGNAM, 'C' ) ) THEN * * Check IC. Set all other OK, bad IC * CALL PDSETPBLAS( ICTXT ) IC = -1 INFOT = ARGPOS + 1 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JC. Set all other OK, bad JC * CALL PDSETPBLAS( ICTXT ) JC = -1 INFOT = ARGPOS + 2 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCC. Set all other OK, bad DESCC * DO 30 I = 1, DLEN_ * * Set I'th entry of DESCC to incorrect value, rest ok. * CALL PDSETPBLAS( ICTXT ) DESCC( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCC, CSRCC, LDC * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PDSETPBLAS( ICTXT ) * * Test RSRCC >= NPROW * IF( I.EQ.RSRC_ ) $ DESCC( I ) = NPROW * * Test CSRCC >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCC( I ) = NPCOL * * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCC( I ) = 1 ELSE DESCC( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 30 CONTINUE * ELSE IF( LSAME( ARGNAM, 'X' ) ) THEN * * Check IX. Set all other OK, bad IX * CALL PDSETPBLAS( ICTXT ) IX = -1 INFOT = ARGPOS + 1 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JX. Set all other OK, bad JX * CALL PDSETPBLAS( ICTXT ) JX = -1 INFOT = ARGPOS + 2 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCX. Set all other OK, bad DESCX * DO 40 I = 1, DLEN_ * * Set I'th entry of DESCX to incorrect value, rest ok. * CALL PDSETPBLAS( ICTXT ) DESCX( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCX, CSRCX, LDX * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PDSETPBLAS( ICTXT ) * * Test RSRCX >= NPROW * IF( I.EQ.RSRC_ ) $ DESCX( I ) = NPROW * * Test CSRCX >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCX( I ) = NPCOL * * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCX( I ) = 1 ELSE DESCX( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 40 CONTINUE * * Check INCX. Set all other OK, bad INCX * CALL PDSETPBLAS( ICTXT ) INCX = -1 INFOT = ARGPOS + 4 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * ELSE * * Check IY. Set all other OK, bad IY * CALL PDSETPBLAS( ICTXT ) IY = -1 INFOT = ARGPOS + 1 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JY. Set all other OK, bad JY * CALL PDSETPBLAS( ICTXT ) JY = -1 INFOT = ARGPOS + 2 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCY. Set all other OK, bad DESCY * DO 50 I = 1, DLEN_ * * Set I'th entry of DESCY to incorrect value, rest ok. * CALL PDSETPBLAS( ICTXT ) DESCY( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCY, CSRCY, LDY * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PDSETPBLAS( ICTXT ) * * Test RSRCY >= NPROW * IF( I.EQ.RSRC_ ) $ DESCY( I ) = NPROW * * Test CSRCY >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCY( I ) = NPCOL * * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCY( I ) = 1 ELSE DESCY( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 50 CONTINUE * * Check INCY. Set all other OK, bad INCY * CALL PDSETPBLAS( ICTXT ) INCY = -1 INFOT = ARGPOS + 4 CALL PDCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * RETURN * * End of PDCHKMAT * END SUBROUTINE PDCALLSUB( SUBPTR, SCODE ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER SCODE * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDCALLSUB calls the subroutine SUBPTR with the calling sequence iden- * tified by SCODE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM DOUBLE PRECISION USCLR, SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) DOUBLE PRECISION A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * CALL SUBPTR( NDIM, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY ) * ELSE IF( SCODE.EQ.12 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.13 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, JY, $ DESCY, INCY ) * ELSE IF( SCODE.EQ.14 ) THEN * CALL SUBPTR( NDIM, SCLR, ISCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.15 ) THEN * CALL SUBPTR( NDIM, USCLR, X, IX, JX, DESCX, INCX ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.22 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, A, IA, JA, DESCA, X, IX, JX, $ DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.23 ) THEN * CALL SUBPTR( UPLO, TRANSA, DIAG, NDIM, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.24 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * ELSE IF( SCODE.EQ.25 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.26 ) THEN * CALL SUBPTR( UPLO, NDIM, USCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.27 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * CALL SUBPTR( TRANSA, TRANSB, MDIM, NDIM, KDIM, SCLR, A, IA, JA, $ DESCA, B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.32 ) THEN * CALL SUBPTR( SIDE, UPLO, MDIM, NDIM, SCLR, A, IA, JA, DESCA, B, $ IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.33 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.34 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, USCLR, A, IA, JA, DESCA, $ USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.35 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.36 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.37 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, C, IC, $ JC, DESCC ) * ELSE IF( SCODE.EQ.38 ) THEN * CALL SUBPTR( SIDE, UPLO, TRANSA, DIAG, MDIM, NDIM, SCLR, A, IA, $ JA, DESCA, B, IB, JB, DESCB ) * ELSE IF( SCODE.EQ.39 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, $ C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.40 ) THEN * CALL SUBPTR( UPLO, TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * END IF * RETURN * * End of PDCALLSUB * END SUBROUTINE PDERRSET( ERR, ERRMAX, XTRUE, X ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERR, ERRMAX, X, XTRUE * .. * * Purpose * ======= * * PDERRSET computes the absolute difference ERR = |XTRUE - X| and com- * pares it with zero. ERRMAX accumulates the absolute error difference. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERR (local output) DOUBLE PRECISION * On exit, ERR specifies the absolute difference |XTRUE - X|. * * ERRMAX (local input/local output) DOUBLE PRECISION * On entry, ERRMAX specifies a previously computed error. On * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ). * * XTRUE (local input) DOUBLE PRECISION * On entry, XTRUE specifies the true value. * * X (local input) DOUBLE PRECISION * On entry, X specifies the value to be compared to XTRUE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION PDDIFF EXTERNAL PDDIFF * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * ERR = ABS( PDDIFF( XTRUE, X ) ) * ERRMAX = MAX( ERRMAX, ERR ) * RETURN * * End of PDERRSET * END SUBROUTINE PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N DOUBLE PRECISION ERRMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) DOUBLE PRECISION PX( * ), X( * ) * .. * * Purpose * ======= * * PDCHKVIN checks that the submatrix sub( PX ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) DOUBLE PRECISION * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( X ) and sub( PX ). * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) DOUBLE PRECISION array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL, $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PB_INFOG2L, PDERRSET * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) * LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( N.EQ.1 ) THEN * IF( ( MYROW.EQ.IXROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.IXCOL .OR. COLREP ) ) $ CALL PDERRSET( ERR, ERRMAX, X( IX+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) * ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * IF( MYROW.EQ.IXROW .OR. ROWREP ) THEN * ICURCOL = IXCOL IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 10 J = JX, JN CALL PDERRSET( ERR, ERRMAX, X( IX+(J-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) JJX = JJX + 1 10 CONTINUE END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 20 KK = 0, JB-1 CALL PDERRSET( ERR, ERRMAX, X( IX+(J+KK-1)*LDX ), $ PX( IIX+(JJX+KK-1)*LDPX ) ) 20 CONTINUE * JJX = JJX + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 30 CONTINUE * END IF * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * IF( MYCOL.EQ.IXCOL .OR. COLREP ) THEN * ICURROW = IXROW IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 40 I = IX, IN CALL PDERRSET( ERR, ERRMAX, X( I+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) IIX = IIX + 1 40 CONTINUE END IF ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * DO 50 KK = 0, IB-1 CALL PDERRSET( ERR, ERRMAX, X( I+KK+(JX-1)*LDX ), $ PX( IIX+KK+(JJX-1)*LDPX ) ) 50 CONTINUE * IIX = IIX + IB * END IF * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PDCHKVIN * END SUBROUTINE PDCHKVOUT( N, X, PX, IX, JX, DESCX, INCX, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) DOUBLE PRECISION PX( * ), X( * ) * .. * * Purpose * ======= * * PDCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) DOUBLE PRECISION array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX, $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL, $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL, $ NPROW, NQALL DOUBLE PRECISION EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDERRSET * .. * .. External Functions .. INTEGER PB_NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCX( M_ ).LE.0 ).OR.( DESCX( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCX( M_ ), 1, DESCX( IMB_ ), DESCX( MB_ ), $ MYROW, DESCX( RSRC_ ), NPROW ) NQALL = PB_NUMROC( DESCX( N_ ), 1, DESCX( INB_ ), DESCX( NB_ ), $ MYCOL, DESCX( CSRC_ ), NPCOL ) * MBX = DESCX( MB_ ) NBX = DESCX( NB_ ) LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ICURROW = DESCX( RSRC_ ) ICURCOL = DESCX( CSRC_ ) ROWREP = ( ICURROW.EQ.-1 ) COLREP = ( ICURCOL.EQ.-1 ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN IMBX = DESCX( IMB_ ) ELSE IMBX = MBX END IF IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN INBX = DESCX( INB_ ) ELSE INBX = NBX END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - ICURROW + NPROW, NPROW ) END IF IF( COLREP ) THEN MYCOLDIST = 0 ELSE MYCOLDIST = MOD( MYCOL - ICURCOL + NPCOL, NPCOL ) END IF II = 1 JJ = 1 * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * I = 1 IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) IB = MIN( DESCX( M_ ), DESCX( IMB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. J+KK.GT.JX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 10 CONTINUE 20 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 50 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 30 CONTINUE 40 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 50 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * DO 110 I = DESCX( IMB_ ) + 1, DESCX( M_ ), MBX IB = MIN( DESCX( M_ ) - I + 1, MBX ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF * JJ = 1 JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 60 CONTINUE 70 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 100 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 80 CONTINUE 90 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 100 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * 110 CONTINUE * ELSE * * sub( X ) is a column vector * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) JB = MIN( DESCX( N_ ), DESCX( INB_ ) ) * DO 130 KK = 0, JB-1 DO 120 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. I+LL.GT.IX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 120 CONTINUE 130 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 160 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 150 KK = 0, JB-1 DO 140 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 140 CONTINUE 150 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 160 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 220 J = DESCX( INB_ ) + 1, DESCX( N_ ), NBX JB = MIN( DESCX( N_ ) - J + 1, NBX ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) DO 180 KK = 0, JB-1 DO 170 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 170 CONTINUE 180 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 210 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 200 KK = 0, JB-1 DO 190 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 190 CONTINUE 200 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 210 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * 220 CONTINUE * END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PDCHKVOUT * END SUBROUTINE PDCHKMIN( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N DOUBLE PRECISION ERRMAX * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION PA( * ), A( * ) * .. * * Purpose * ======= * * PDCHKMIN checks that the submatrix sub( PA ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) DOUBLE PRECISION * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( A ) and sub( PA ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * operand sub( A ). N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PB_INFOG2L, PDERRSET * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if posssible * IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) * II = IIA JJ = JJA LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) ICURROW = IAROW ICURCOL = IACOL ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA + JB - 1 * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 40 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 10 K = 0, IB-1 CALL PDERRSET( ERR, ERRMAX, A( IA+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 10 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 20 K = 0, IB-1 CALL PDERRSET( ERR, ERRMAX, A( I+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 20 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = IIA ICURROW = IAROW 40 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining column blocks * DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 80 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 50 K = 0, IB-1 CALL PDERRSET( ERR, ERRMAX, A( IA+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 50 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 70 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 60 K = 0, IB-1 CALL PDERRSET( ERR, ERRMAX, $ A( I+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 60 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 70 CONTINUE * II = IIA ICURROW = IAROW 80 CONTINUE * JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 90 CONTINUE * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PDCHKMIN * END SUBROUTINE PDCHKMOUT( M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), PA( * ) * .. * * Purpose * ======= * * PDCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( PA ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( PA ). N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK, $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PDERRSET * .. * .. External Functions .. INTEGER PB_NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCA( M_ ).LE.0 ).OR.( DESCA( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCA( M_ ), 1, DESCA( IMB_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) * LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) * II = 1 JJ = 1 ROWREP = ( DESCA( RSRC_ ).EQ.-1 ) COLREP = ( DESCA( CSRC_ ).EQ.-1 ) ICURCOL = DESCA( CSRC_ ) IF( MYROW.EQ.DESCA( RSRC_ ) .OR. ROWREP ) THEN IMBA = DESCA( IMB_ ) ELSE IMBA = DESCA( MB_ ) END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - DESCA( RSRC_ ) + NPROW, NPROW ) END IF * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) JB = MIN( DESCA( N_ ), DESCA( INB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 10 CONTINUE 20 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 50 II = IMBA + 1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 30 CONTINUE 40 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 50 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 110 J = DESCA( INB_ ) + 1, DESCA( N_ ), DESCA( NB_ ) JB = MIN( DESCA( N_ ) - J + 1, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 60 CONTINUE 70 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 100 II = IMBA+1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PDERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 80 CONTINUE 90 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 100 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * INSERT MODE 110 CONTINUE * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PDCHKMOUT * END SUBROUTINE PDMPRNT( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, $ CMATNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDMPRNT prints to the standard output an array A of size m by n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). The leading m * by n part of this array is printed. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array A to be printed. LDA must be at least MAX( 1, M ). * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM specifies the identifier of the matrix to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 20 J = 1, N * DO 10 I = 1, M * WRITE( NOUT, FMT = 9999 ) CMATNM, I, J, A( I, J ) * 10 CONTINUE * 20 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', D30.18 ) * RETURN * * End of PDMPRNT * END SUBROUTINE PDVPRNT( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, $ CVECNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CVECNM DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * PDVPRNT prints to the standard output an vector x of length n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * N (global input) INTEGER * On entry, N specifies the length of the vector X. N must be * at least zero. * * X (global input) DOUBLE PRECISION array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CVECNM (global input) CHARACTER*(*) * On entry, CVECNM specifies the identifier of the vector to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 10 I = 1, 1 + ( N-1 )*INCX, INCX * WRITE( NOUT, FMT = 9999 ) CVECNM, I, X( I ) * 10 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ')=', D30.18 ) * RETURN * * End of PDVPRNT * END SUBROUTINE PDMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, $ DESCY, INCY, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION A( * ), G( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PDMVCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies which matrix-vector product is to * be computed as follows: * If TRANS = 'N', * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ), * otherwise * sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) DOUBLE PRECISION array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP, TRAN INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX, $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA, $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL, $ NPROW DOUBLE PRECISION EPS, ERRI, GTMP, TBETA, YTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN TBETA = ONE ELSE TBETA = BETA END IF * TRAN = LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) IF( TRAN ) THEN ML = N NL = M ELSE ML = M NL = N END IF * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in Y using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * IOFFY = IY + ( JY - 1 ) * LDY DO 30 I = 1, ML YTMP = ZERO GTMP = ZERO IOFFX = IX + ( JX - 1 ) * LDX IF( TRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 10 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS( A( IOFFA ) * X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 10 CONTINUE ELSE IOFFA = IA + I - 1 + ( JA - 1 ) * LDA DO 20 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS( A( IOFFA ) * X( IOFFX ) ) IOFFA = IOFFA + LDA IOFFX = IOFFX + INCX 20 CONTINUE END IF G( I ) = ABS( ALPHA ) * GTMP + ABS( TBETA * Y( IOFFY ) ) Y( IOFFY ) = ALPHA * YTMP + TBETA * Y( IOFFY ) IOFFY = IOFFY + INCY 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPY = DESCY( LLD_ ) IOFFY = IY + ( JY - 1 ) * LDY CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, IIY, $ JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, ML ) JN = JY + JB - 1 * DO 50 J = JY, JN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( J-JY+1 ).NE.ZERO ) $ ERRI = ERRI / G( J-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 50 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 70 J = JN+1, JY+ML-1, DESCY( NB_ ) JB = MIN( JY+ML-J, DESCY( NB_ ) ) * DO 60 KK = 0, JB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( J+KK-JY+1 ).NE.ZERO ) $ ERRI = ERRI / G( J+KK-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 60 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 70 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, ML ) IN = IY + IB - 1 * DO 80 I = IY, IN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( I-IY+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 80 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 100 I = IN+1, IY+ML-1, DESCY( MB_ ) IB = MIN( IY+ML-I, DESCY( MB_ ) ) * DO 90 KK = 0, IB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( I+KK-IY+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 90 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 100 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PDMVCH * END SUBROUTINE PDVMCH( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, $ DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ALPHA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PDVMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) DOUBLE PRECISION array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFX, IOFFY, J, JJA, KK, LDA, LDPA, $ LDX, LDY, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ATMP, EPS, ERRI, GTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFY = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND * IOFFX = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA ATMP = X( IOFFX ) * Y( IOFFY ) GTMP = ABS( X( IOFFX ) * Y( IOFFY ) ) G( I ) = ABS( ALPHA ) * GTMP + ABS( A( IOFFA ) ) A( IOFFA ) = ALPHA * ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PDVMCH * END SUBROUTINE PDVMCH2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, $ JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ALPHA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION A( * ), G( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PDVMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) DOUBLE PRECISION array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J, $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERRI, GTMP, ATMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFXJ = IX + ( JX - 1 ) * LDX + ( J - 1 ) * INCX IOFFYJ = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IOFFXI = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFYI = IY + ( JY - 1 ) * LDY + ( I - 1 ) * INCY ATMP = X( IOFFXI ) * Y( IOFFYJ ) ATMP = ATMP + Y( IOFFYI ) * X( IOFFXJ ) GTMP = ABS( X( IOFFXI ) * Y( IOFFYJ ) ) GTMP = GTMP + ABS( Y( IOFFYI ) * X( IOFFXJ ) ) G( I ) = ABS( ALPHA ) * GTMP + ABS( A( IOFFA ) ) A( IOFFA ) = ALPHA*ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PDVMCH2 * END SUBROUTINE PDMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANSA, TRANSB INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N DOUBLE PRECISION ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ), $ PC( * ) * .. * * Purpose * ======= * * PDMMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) DOUBLE PRECISION array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) DOUBLE PRECISION array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP, TRANA, TRANB INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA, $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * TRANA = LSAME( TRANSA, 'T' ).OR.LSAME( TRANSA, 'C' ) TRANB = LSAME( TRANSB, 'T' ).OR.LSAME( TRANSB, 'C' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 240 J = 1, N * IOFFC = IC + ( JC + J - 2 ) * LDC DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( .NOT.TRANA .AND. .NOT.TRANB ) THEN DO 30 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 20 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA .AND. .NOT.TRANB ) THEN DO 50 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA .AND. TRANB ) THEN DO 70 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 60 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA .AND. TRANB ) THEN DO 90 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 80 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 80 CONTINUE 90 CONTINUE END IF * DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, M ) IN = IC + IBB - 1 * DO 210 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 210 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 230 I = IN+1, IC+M-1, DESCC( MB_ ) IBB = MIN( IC+M-I, DESCC( MB_ ) ) * DO 220 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 220 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 230 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 250 * 240 CONTINUE * 250 CONTINUE * RETURN * * End of PDMMCH * END SUBROUTINE PDMMCH1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, $ ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N DOUBLE PRECISION ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), CT( * ), G( * ), PC( * ) * .. * * Purpose * ======= * * PDMMCH1 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least * zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) DOUBLE PRECISION array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) DOUBLE PRECISION array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA, $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS( A( IOFFAK ) ) * $ ABS( A( IOFFAN ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS( A( IOFFAK ) ) * $ ABS( A( IOFFAN ) ) 40 CONTINUE 50 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PDMMCH1 * END SUBROUTINE PDMMCH2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N DOUBLE PRECISION ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION A( * ), B( * ), C( * ), CT( * ), G( * ), $ PC( * ) * .. * * Purpose * ======= * * PDMMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrices A and B have * to be transposed or not before computing the matrix-matrix * product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at * least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) DOUBLE PRECISION array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) DOUBLE PRECISION array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J, $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS( A( IOFFAN ) ) * ABS( B( IOFFBK ) ) + $ ABS( B( IOFFBN ) ) * ABS( A( IOFFAK ) ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS( A( IOFFAN ) ) * ABS( B( IOFFBK ) ) + $ ABS( B( IOFFBN ) ) * ABS( A( IOFFAK ) ) ) 40 CONTINUE 50 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = CT( I ) + BETA * C( IOFFC ) G( I ) = G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PDMMCH2 * END SUBROUTINE PDMMCH3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, M, N DOUBLE PRECISION ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), PC( * ) * .. * * Purpose * ======= * * PDMMCH3 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix addi- * tion. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) DOUBLE PRECISION array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J, $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL, $ NPROW DOUBLE PRECISION ERR0, ERRI, PREC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L, $ PDERRAXPBY * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PREC = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * Compute expected result in C using data in A and C. This part of * the computation is performed by every process in the grid. * INFO = 0 ERR = ZERO * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) LDPC = MAX( 1, DESCC( LLD_ ) ) ROWREP = ( DESCC( RSRC_ ).EQ.-1 ) COLREP = ( DESCC( CSRC_ ).EQ.-1 ) * IF( NOTRAN ) THEN * DO 20 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( JA - 1 + J - JC ) * LDA * DO 10 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFA = IOFFA + 1 IOFFC = IOFFC + 1 * 10 CONTINUE * 20 CONTINUE * ELSE * DO 40 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 30 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PDERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 30 CONTINUE * 40 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PDMMCH3 * END SUBROUTINE PDERRAXPBY( ERRBND, ALPHA, X, BETA, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA, ERRBND, PREC, X, Y * .. * * Purpose * ======= * * PDERRAXPBY serially computes y := beta*y + alpha * x and returns a * scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * X (global input) DOUBLE PRECISION * On entry, X specifies the scalar x to be scaled. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * Y (global input/global output) DOUBLE PRECISION * On entry, Y specifies the scalar y to be added. On exit, Y * contains the resulting scalar y. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION ADDBND, FACT, SUMPOS, SUMNEG, TMP * .. * .. Intrinsic Functions .. * .. * .. Executable Statements .. * SUMPOS = ZERO SUMNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = ALPHA * X IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * TMP = BETA * Y IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * Y = ( BETA * Y ) + ( ALPHA * X ) * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PDERRAXPBY * END DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 CMACH INTEGER ICTXT * .. * * Purpose * ======= * * PDLAMCH determines double precision machine parameters. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * CMACH (global input) CHARACTER*1 * On entry, CMACH specifies the value to be returned by PDLAMCH * as follows: * = 'E' or 'e', PDLAMCH := eps, * = 'S' or 's , PDLAMCH := sfmin, * = 'B' or 'b', PDLAMCH := base, * = 'P' or 'p', PDLAMCH := eps*base, * = 'N' or 'n', PDLAMCH := t, * = 'R' or 'r', PDLAMCH := rnd, * = 'M' or 'm', PDLAMCH := emin, * = 'U' or 'u', PDLAMCH := rmin, * = 'L' or 'l', PDLAMCH := emax, * = 'O' or 'o', PDLAMCH := rmax, * * where * * eps = relative machine precision, * sfmin = safe minimum, such that 1/sfmin does not overflow, * base = base of the machine, * prec = eps*base, * t = number of (base) digits in the mantissa, * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise, * emin = minimum exponent before (gradual) underflow, * rmin = underflow threshold - base**(emin-1), * emax = largest exponent before overflow, * rmax = overflow threshold - (base**emax)*(1-eps). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER IDUMM DOUBLE PRECISION TEMP * .. * .. External Subroutines .. EXTERNAL DGAMN2D, DGAMX2D, PB_TOPGET * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH, LSAME * .. * .. Executable Statements .. * TEMP = DLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL DGAMX2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL DGAMN2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PDLAMCH = TEMP * RETURN * * End of PDLAMCH * END SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, JA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno- * ted by sub( A ) to beta on the diagonal and alpha on the offdiago- * nals. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the part of the submatrix sub( A ) * to be set: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant to which the offdiagonal elements are to be set. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta, i.e., the constant * to which the diagonal elements are to be set. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be set. On exit, the * leading m by n submatrix sub( A ) is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * and IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER, $ UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA, $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC, $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP, $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD, $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1, $ UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_DLASET * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * ISROWREP = ( DESCA2( RSRC_ ).LT.0 ) ISCOLREP = ( DESCA2( CSRC_ ).LT.0 ) LDA = DESCA2( LLD_ ) * UPPER = .NOT.( LSAME( UPLO, 'L' ) ) LOWER = .NOT.( LSAME( UPLO, 'U' ) ) * IF( ( ( LOWER.AND.UPPER ).AND.( ALPHA.EQ.BETA ) ).OR. $ ( ISROWREP .AND. ISCOLREP ) ) THEN IF( ( MP.GT.0 ).AND.( NQ.GT.0 ) ) $ CALL PB_DLASET( UPLO, MP, NQ, 0, ALPHA, BETA, $ A( IIA + ( JJA - 1 ) * LDA ), LDA ) RETURN END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, MP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( ISROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( ISCOLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * M1 = MP N1 = NQ * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_DLASET( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, BETA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_DLASET( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ ALPHA, A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_DLASET( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ ALPHA, A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASET( UPLO, MBLOC, INBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASET( 'ALL', TMP1, INBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASET( 'ALL', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_DLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_DLASET( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_DLASET( 'All', IMBLOC, TMP1, 0, ALPHA, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_DLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASET( UPLO, MBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASET( 'All', TMP1, NBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * RETURN * * End of PDLASET * END SUBROUTINE PDLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_DLASCAL, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_DLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_DLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_DLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_DLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_DLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_DLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_DLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PDLASCAL * END SUBROUTINE PDLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PDLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP DOUBLE PRECISION ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_DLAGEN, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PDLADOM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_DLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_DLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) ALPHA = DBLE( MAXMN ) * IF( IOFFDA.GE.0 ) THEN CALL PDLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PDLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PDLAGEN * END SUBROUTINE PDLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP DOUBLE PRECISION ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PDLADOM * END SUBROUTINE PB_PDLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PB_PDLAPRNT prints to the standard output a submatrix sub( A ) deno- * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by * the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the row index of the printing pro- * cess. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the column index of the printing * process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM is the name of the matrix to be printed. * * NOUT (global input) INTEGER * On entry, NOUT specifies the output unit number. When NOUT is * equal to 6, the submatrix is printed on the screen. * * WORK (local workspace) DOUBLE PRECISION array * On entry, WORK is a work array of dimension at least equal to * MAX( IMB_A, MB_A ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PDLAPRN2 * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * CALL BLACS_GRIDINFO( DESCA2( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( DESCA2( RSRC_ ).GE.0 ) THEN IF( DESCA2( CSRC_ ).GE.0 ) THEN CALL PB_PDLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, ICPRNT, $ CMATNM, NOUT, DESCA2( RSRC_ ), $ DESCA2( CSRC_ ), WORK ) ELSE DO 10 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Colum-replicated array -- ' , $ 'copy in process column: ', PCOL CALL PB_PDLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, DESCA2( RSRC_ ), $ PCOL, WORK ) 10 CONTINUE END IF ELSE IF( DESCA2( CSRC_ ).GE.0 ) THEN DO 20 PROW = 0, NPROW - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Row-replicated array -- ' , $ 'copy in process row: ', PROW CALL PB_PDLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, $ DESCA2( CSRC_ ), WORK ) 20 CONTINUE ELSE DO 40 PROW = 0, NPROW - 1 DO 30 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Replicated array -- ' , $ 'copy in process (', PROW, ',', PCOL, ')' CALL PB_PDLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, PCOL, $ WORK ) 30 CONTINUE 40 CONTINUE END IF END IF * RETURN * * End of PB_PDLAPRNT * END SUBROUTINE PB_PDLAPRN2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, PROW, PCOL, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL AISCOLREP, AISROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, DGERV2D, $ DGESD2D, PB_INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) II = IIA JJ = JJA IF( DESCA( RSRC_ ).LT.0 ) THEN AISROWREP = .TRUE. IAROW = PROW ICURROW = PROW ELSE AISROWREP = .FALSE. ICURROW = IAROW END IF IF( DESCA( CSRC_ ).LT.0 ) THEN AISCOLREP = .TRUE. IACOL = PCOL ICURCOL = PCOL ELSE AISCOLREP = .FALSE. ICURCOL = IACOL END IF LDA = DESCA( LLD_ ) LDW = MAX( DESCA( IMB_ ), DESCA( MB_ ) ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA+JB-1 DO 60 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IB = DESCA( IMB_ )-IA+1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', D30.18 ) * RETURN * * End of PB_PDLAPRN2 * END SUBROUTINE PB_DFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N DOUBLE PRECISION CHKVAL * .. * .. Array Arguments .. DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PB_DFILLPAD surrounds a two dimensional local array with a guard-zone * initialized to the value CHKVAL. The user may later call the routine * PB_DCHEKPAD to discover if the guardzone has been violated. There are * three guardzones. The first is a buffer of size IPRE that is before * the start of the array. The second is the buffer of size IPOST which * is after the end of the array to be padded. Finally, there is a guard * zone inside every column of the array to be padded, in the elements * of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). On exit, this * array is the padded array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) DOUBLE PRECISION * On entry, CHKVAL specifies the value to pad the array with. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no pre-guardzone in PB_DFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no post-guardzone in PB_DFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + ( LDA - M ) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PB_DFILLPAD * END SUBROUTINE PB_DCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N DOUBLE PRECISION CHKVAL * .. * .. Array Arguments .. CHARACTER*(*) MESS DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PB_DCHEKPAD checks that the padding around a local array has not been * overwritten since the call to PB_DFILLPAD. Three types of errors are * reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells that the 5th element of the IPRE long buffer has been overwrit- * ten with the value 18, where it should still have the value CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells that the 19th element after the end of A was overwritten with * the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells that the element at the 12th row and 3rd column of A was over- * written with the value of 22, where it should still have the value of * CHKVAL. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * MESS (local input) CHARACTER*(*) * On entry, MESS is a ttring containing a user-defined message. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) DOUBLE PRECISION * On entry, CHKVAL specifies the value to pad the array with. * * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ A( I ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PB_DCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, A( I ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PB_DCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, A( I ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G20.7 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G20.7 ) * RETURN * * End of PB_DCHEKPAD * END SUBROUTINE PB_DLASET( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PB_DLASET initializes a two-dimensional array A to beta on the diago- * nal specified by IOFFD and alpha on the offdiagonals. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * = 'D' or 'd' Only the diagonal of A is set, * Otherwise: All of the array A is set. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the value to which the offdiagonal * array elements are set to. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the value to which the diagonal ar- * ray elements are set to. * * A (input/output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be set, and the strictly lower trape- * zoidal part of A is not referenced; When IUPLO = 'L' or 'l', * the leading m by n part of the array A must contain the * lower trapezoidal part of the matrix as specified by IOFFD to * be set, and the strictly upper trapezoidal part of A is * not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA and the strictly lower triangular * part of the array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA and the strictly upper triangular * part of the array to ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 50 I = 1, JTMP - 1 A( I, J ) = ALPHA 50 CONTINUE A( JTMP, J ) = BETA 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the array to BETA on the diagonal. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 90 CONTINUE * ELSE * * Set the array to BETA on the diagonal and ALPHA on the * offdiagonal. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA 100 CONTINUE 110 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 120 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 120 CONTINUE END IF * END IF * RETURN * * End of PB_DLASET * END SUBROUTINE PB_DLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PB_DLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (input/output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_DLASCAL * END SUBROUTINE PB_DLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PB_DLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP DOUBLE PRECISION DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PB_DRAND EXTERNAL LSAME, PB_DRAND * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) .OR. LSAME( AFORM, 'C' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( ( LSAME( AFORM, 'S' ) ).OR.( LSAME( AFORM, 'H' ) ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = PB_DRAND( 0 ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = PB_DRAND( 0 ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = PB_DRAND( 0 ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = PB_DRAND( 0 ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = PB_DRAND( 0 ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_DRAND( 0 ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * END IF * RETURN * * End of PB_DLAGEN * END DOUBLE PRECISION FUNCTION PB_DRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION PB_DRAN EXTERNAL PB_DRAN * .. * .. Executable Statements .. * PB_DRAND = ONE - TWO * PB_DRAN( IDUMM ) * RETURN * * End of PB_DRAND * END DOUBLE PRECISION FUNCTION PB_DRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648D+9, $ POW16 = 6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_DRAN = ( DBLE( IRAND( 1 ) ) + POW16 * DBLE( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_DRAN * END scalapack-1.8.0/PBLAS/TESTING/PCBLAS3TST.dat0000640000175000017500000000515710363532303017416 0ustar muammarmuammar'Level 3 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS3TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0E0, -4.0E0) value of ALPHA (3.0E0, -2.0E0) value of BETA 4 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 22 30 8 20 3 5 11 8 values of M 23 29 7 21 3 14 11 8 values of N 24 28 9 22 3 9 11 8 values of K 31 31 31 31 31 31 31 31 values of M_A 31 31 31 31 31 31 31 31 values of N_A 4 5 6 3 2 1 4 5 values of IMB_A 4 5 6 3 2 1 4 5 values of INB_A 4 5 6 3 2 1 4 5 values of MB_A 4 5 6 3 2 1 4 5 values of NB_A 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 5 1 7 7 1 12 1 11 values of IA 5 1 7 7 1 12 1 11 values of JA 32 32 32 32 32 32 32 32 values of M_B 32 32 32 32 32 32 32 32 values of N_B 4 5 6 3 2 1 4 5 values of IMB_B 4 5 6 3 2 1 4 5 values of INB_B 4 5 6 3 2 1 4 5 values of MB_B 4 5 6 3 2 1 4 5 values of NB_B 0 0 0 0 0 0 0 0 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 5 1 7 7 1 12 1 11 values of IB 5 1 7 7 1 12 1 11 values of JB 35 35 35 35 35 35 35 35 values of M_C 35 35 35 35 35 35 35 35 values of N_C 4 5 6 3 2 1 4 5 values of IMB_C 4 5 6 3 2 1 4 5 values of INB_C 4 5 6 3 2 1 4 5 values of MB_C 4 5 6 3 2 1 4 5 values of NB_C 0 0 0 0 0 0 0 0 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 5 1 7 7 1 12 1 11 values of IC 5 1 7 7 1 12 1 11 values of JC PCGEMM T put F for no test in the same column PCSYMM T put F for no test in the same column PCHEMM T put F for no test in the same column PCSYRK T put F for no test in the same column PCHERK T put F for no test in the same column PCSYR2K T put F for no test in the same column PCHER2K T put F for no test in the same column PCTRMM T put F for no test in the same column PCTRSM T put F for no test in the same column PCGEADD T put F for no test in the same column PCTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/PSBLAS2TST.dat0000640000175000017500000000635410363532303017435 0ustar muammarmuammar'Level 2 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS2TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 3.0E0 value of BETA 4 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 values of M 8 10 12 14 16 18 20 22 24 9 11 13 15 17 19 21 values of N 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of M_A 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of N_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of M_X 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of N_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 30 1 30 1 30 1 30 1 30 1 30 1 30 1 30 1 values of INCX 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of M_Y 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of N_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 40 1 40 1 40 1 40 1 40 1 40 1 40 1 40 1 values of INCY PSGEMV T put F for no test in the same column PSSYMV T put F for no test in the same column PSTRMV T put F for no test in the same column PSTRSV T put F for no test in the same column PSGER T put F for no test in the same column PSSYR T put F for no test in the same column PSSYR2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/psblas1tst.f0000640000175000017500000036306210363532303017553 0ustar muammarmuammar PROGRAM PSBLA1TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA1TST is the main testing program for the PBLAS Level 1 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 44 lines: * 'Level 1 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS1TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PSSWAP T put F for no test in the same column * PSSCAL T put F for no test in the same column * PSCOPY T put F for no test in the same column * PSAXPY T put F for no test in the same column * PSDOT T put F for no test in the same column * PSNRM2 T put F for no test in the same column * PSASUM T put F for no test in the same column * PSAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, REALSZ, TOTMEM, $ MEMSIZ, NSUBS REAL PADVAL, ZERO PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, ZERO = 0.0E+0, $ PADVAL = -9923.0E+0, NSUBS = 8 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IGAP, IMBX, IMBY, $ IMIDX, IMIDY, INBX, INBY, INCX, INCY, IPMATX, $ IPMATY, IPOSTX, IPOSTY, IPREX, IPREY, IPW, IPX, $ IPY, IVERB, IX, IXSEED, IY, IYSEED, J, JX, JY, $ K, LDX, LDY, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY, TSKIP, TSTCNT REAL ALPHA, PSCLR, PUSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCXR( DLEN_ ), $ DESCY( DLEN_ ), DESCYR( DLEN_ ), IERR( 4 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MXVAL( MAXTESTS ), $ MYVAL( MAXTESTS ), NBXVAL( MAXTESTS ), $ NBYVAL( MAXTESTS ), NVAL( MAXTESTS ), $ NXVAL( MAXTESTS ), NYVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) REAL MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PSLAPRNT, PB_SCHEKPAD, $ PB_SFILLPAD, PSAMAX, PSASUM, PSAXPY, $ PSBLA1TSTINFO, PSBLAS1TSTCHK, PSBLAS1TSTCHKE, $ PSCHKARG1, PSCHKVOUT, PSCOPY, PSDOT, PSLAGEN, $ PSMPRNT, PSNRM2, PSSCAL, PSSWAP, PSVPRNT, $ PVDESCCHK, PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSSWAP ', 'PSSCAL ', 'PSCOPY ', $ 'PSAXPY ', 'PSDOT ', 'PSNRM2 ', $ 'PSASUM ', 'PSAMAX '/ DATA YCHECK/.TRUE., .FALSE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE., .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler will abort on errors. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA1TSTINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, SOF, TEE, IAM, IGAP, IVERB, $ NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PSBLAS1TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( IPREX+1 ). * IPX = IPREX + 1 IPY = IPX + DESCX( LLD_ ) * NQX + IPOSTX + IPREY IPMATX = IPY + DESCY( LLD_ ) * NQY + IPOSTY IPMATY = IPMATX + MX * NX IPW = IPMATY + MY * NY * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPW - 1 + $ MAX( MAX( IMBX, MBX ), MAX( IMBY, MBY ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9989 ) SNAMES( K ) END IF * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices X and Y * CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) END IF * * Pad the guard zones of X, and Y * CALL PB_SFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_SFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT only args. * INFO = 0 CALL PSCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * INFO = 0 PSCLR = ZERO PUSCLR = ZERO PISCLR = 0 * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PSLAPRNT( 1, N, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PSLAPRNT( N, 1, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) END IF IF( YCHECK( K ) ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PSLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PSLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) IF( YCHECK( K ) ) $ CALL PB_PSLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PSSWAP * CALL PSSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PSSCAL * PSCLR = ALPHA CALL PSSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.3 ) THEN * * Test PSCOPY * CALL PSCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.4 ) THEN * * Test PSAXPY * PSCLR = ALPHA CALL PSAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.5 ) THEN * * Test PSDOT * CALL PSDOT( N, PSCLR, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.6 ) THEN * * Test PSNRM2 * CALL PSNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.7 ) THEN * * Test PSASUM * CALL PSASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.8 ) THEN * CALL PSAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9978 ) INFO GO TO 30 END IF * * Check the computations * CALL PSBLAS1TSTCHK( ICTXT, NOUT, K, N, PSCLR, PUSCLR, $ PISCLR, MEM( IPMATX ), MEM( IPX ), $ IX, JX, DESCX, INCX, MEM( IPMATY ), $ MEM( IPY ), IY, JY, DESCY, INCY, $ INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 END IF * * Check padding * CALL PB_SCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), $ IPREX, IPOSTX, PADVAL ) IF( YCHECK( K ) ) THEN CALL PB_SCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check input-only scalar arguments * INFO = 1 CALL PSCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * * Check input-only array arguments * CALL PSCHKVOUT( N, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_X', SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PSCHKVOUT( N, MEM( IPMATY ), MEM( IPY ), IY, JY, $ DESCY, INCY, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE. 0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 3 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PSLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PSVPRNT( ICTXT, NOUT, N, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PSLAPRNT( 1, N, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PSLAPRNT( N, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PSLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PSVPRNT( ICTXT, NOUT, N, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PSLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PSLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9984 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9989 FORMAT( 2X, ' Tested Subroutine: ', A ) 9988 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9987 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9986 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9985 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9984 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9983 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9982 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9981 FORMAT( 2X, 'Testing Summary') 9980 FORMAT( 2X, 'End of Tests.' ) 9979 FORMAT( 2X, 'Tests started.' ) 9978 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PSBLA1TST * END SUBROUTINE PSBLA1TSTINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, $ NXVAL, IMBXVAL, MBXVAL, INBXVAL, $ NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, $ LDVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, LTEST, SOF, TEE, IAM, IGAP, $ IVERB, NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, $ NGRIDS, NMAT, NOUT, NPROCS REAL ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA1TSTINFO get the needed startup information for testing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS1TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 1 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real single precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9982 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) * * End of PSBLA1TSTINFO * END SUBROUTINE PSBLAS1TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PSBLAS1TSTCHKE tests the error exits of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 8 (NSUBS). * If LTEST( 1 ) is .TRUE., PSSWAP will be tested; * If LTEST( 2 ) is .TRUE., PSSCAL will be tested; * If LTEST( 3 ) is .TRUE., PSCOPY will be tested; * If LTEST( 4 ) is .TRUE., PSAXPY will be tested; * If LTEST( 5 ) is .TRUE., PSDOT will be tested; * If LTEST( 6 ) is .TRUE., PSNRM2 will be tested; * If LTEST( 7 ) is .TRUE., PSASUM will be tested; * If LTEST( 8 ) is .TRUE., PSAMAX will be tested. * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PSAMAX, PSASUM, PSAXPY, PSCOPY, $ PSDIMEE, PSDOT, PSNRM2, PSSCAL, PSSWAP, $ PSVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/11, 12, 11, 13, 13, 15, 15, 14/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PSSWAP * I = 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSSWAP, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSSWAP, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSSCAL, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PSCOPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSCOPY, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSCOPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PSAXPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSAXPY, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSAXPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PSDOT * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSDOT, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSDOT, SCODE( I ), SNAMES( I ) ) END IF * * Test PSNRM2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSNRM2, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSNRM2, SCODE( I ), SNAMES( I ) ) END IF * * Test PSASUM * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSASUM, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSASUM, SCODE( I ), SNAMES( I ) ) END IF * * Test PSAMAX * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSAMAX, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSAMAX, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PSBLAS1TSTCHKE * END SUBROUTINE PSCHKARG1( ICTXT, NOUT, SNAME, N, ALPHA, IX, JX, $ DESCX, INCX, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT REAL ALPHA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PSCHKARG1 checks the input-only arguments of the Level 1 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, INCXREF, INCYREF, IXREF, IYREF, JXREF, $ JYREF, MYCOL, MYROW, NPCOL, NPROW, NREF REAL ALPHAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCXREF( DLEN_ ), DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * NREF = N IXREF = IX JXREF = JX DO 10 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 10 CONTINUE INCXREF = INCX IYREF = IY JYREF = JY DO 20 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 20 CONTINUE INCYREF = INCY ALPHAREF = ALPHA * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.GT.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PSCHKARG1 * END LOGICAL FUNCTION PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PISINSCOPE returns .TRUE. if the calling process is in the scope of * sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) and .FALSE. if it is * not. This routine is used to determine which processes should check * the answer returned by some Level 1 PBLAS routines. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * N (global input) INTEGER * The length of the subvector sub( X ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER IIX, IXCOL, IXROW, JJX, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN * * This is the special case, find process owner of IX, JX, and * only this process is the scope. * PISINSCOPE = ( ( IXROW.EQ.MYROW .OR. ROWREP ) .AND. $ ( IXCOL.EQ.MYCOL .OR. COLREP ) ) * ELSE * IF( INCX.EQ.DESCX( M_ ) ) THEN * * row vector * PISINSCOPE = ( MYROW.EQ.IXROW .OR. ROWREP ) * ELSE * * column vector * PISINSCOPE = ( MYCOL.EQ.IXCOL .OR. COLREP ) * END IF * END IF * RETURN * * End of PISINSCOPE * END SUBROUTINE PSBLAS1TSTCHK( ICTXT, NOUT, NROUT, N, PSCLR, PUSCLR, $ PISCLR, X, PX, IX, JX, DESCX, INCX, Y, $ PY, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT, NROUT, PISCLR REAL PSCLR, PUSCLR * .. * .. Array Arguments .. INTEGER DESCX( * ), DESCY( * ) REAL PX( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PSBLAS1TSTCHK performs the computational tests of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PSSWAP will be tested; * else if NROUT = 2, PSSCAL will be tested; * else if NROUT = 3, PSCOPY will be tested; * else if NROUT = 4, PSAXPY will be tested; * else if NROUT = 5, PSDOT will be tested; * else if NROUT = 6, PSNRM2 will be tested; * else if NROUT = 7, PSASUM will be tested; * else if NROUT = 8, PSAMAX will be tested. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * PSCLR (global input) REAL * On entry, depending on the value of NROUT, PSCLR specifies * the scalar ALPHA, or the output scalar returned by the PBLAS, * i.e., the dot product, the 2-norm, the absolute sum or the * value of AMAX. * * PUSCLR (global input) REAL * On entry, PUSCLR specifies the real part of the scalar ALPHA * used by the real scaling, the 2-norm, or the absolute sum * routines. PUSCLR is not used in the real versions of this * routine. * * PISCLR (global input) REAL * On entry, PISCLR specifies the value of the global index re- * turned by PSAMAX, otherwise PISCLR is not used. * * X (local input/local output) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) REAL array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) REAL array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, INXSCOPE, INYSCOPE, ROWREP INTEGER I, IB, ICURCOL, ICURROW, IDUMM, IIX, IIY, IN, $ IOFFX, IOFFY, ISCLR, IXCOL, IXROW, IYCOL, $ IYROW, J, JB, JJX, JJY, JN, KK, LDX, LDY, $ MYCOL, MYROW, NPCOL, NPROW REAL ERR, ERRMAX, PREC, SCLR, USCLR * .. * .. Local Arrays .. INTEGER IERR( 6 ) CHARACTER*5 ARGIN1, ARGIN2, ARGOUT1, ARGOUT2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_INFOG2L, PSCHKVIN, $ PSERRASUM, PSERRAXPY, PSERRDOT, PSERRNRM2, $ PSERRSCAL, SCOPY, SSWAP * .. * .. External Functions .. LOGICAL PISINSCOPE INTEGER ISAMAX REAL PSLAMCH EXTERNAL ISAMAX, PISINSCOPE, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ARGIN1 = ' ' ARGIN2 = ' ' ARGOUT1 = ' ' ARGOUT2 = ' ' DO 10 I = 1, 6 IERR( I ) = 0 10 CONTINUE * PREC = PSLAMCH( ICTXT, 'precision' ) * IF( NROUT.EQ.1 ) THEN * * Test PSSWAP * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL SSWAP( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PSCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.2 ) THEN * * Test PSSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 20 J = JX, JN * CALL PSERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 20 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 40 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 30 KK = 0, JB-1 * CALL PSERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 30 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 50 I = IX, IN * CALL PSERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 70 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 60 KK = 0, IB-1 * CALL PSERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 60 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 70 CONTINUE * END IF * ELSE IF( NROUT.EQ.3 ) THEN * * Test PSCOPY * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL SCOPY( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PSCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PSAXPY * CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) LDY = DESCY( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, $ IIY, JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, N ) JN = JY + JB - 1 * DO 140 J = JY, JN * CALL PSERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 140 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 160 J = JN+1, JY+N-1, DESCY( NB_ ) JB = MIN( JY+N-J, DESCY( NB_ ) ) * DO 150 KK = 0, JB-1 * CALL PSERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 150 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 160 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, N ) IN = IY + IB - 1 * DO 170 I = IY, IN * CALL PSERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 170 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 190 I = IN+1, IY+N-1, DESCY( MB_ ) IB = MIN( IY+N-I, DESCY( MB_ ) ) * DO 180 KK = 0, IB-1 * CALL PSERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 180 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 190 CONTINUE * END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PSDOT * CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PSCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PSERRDOT( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOT' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOT' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PSNRM2 * CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PSERRNRM2( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF ELSE USCLR = ZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PSASUM * CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PSERRASUM( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ) .GT. ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF ELSE USCLR = ZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PSAMAX * CALL PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN ISCLR = ISAMAX( N, X( IOFFX ), INCX ) IF( N.LT.1 ) THEN SCLR = ZERO ELSE IF( ( INCX.EQ.1 ).AND.( DESCX( M_ ).EQ.1 ).AND. $ ( N.EQ.1 ) ) THEN ISCLR = JX SCLR = X( IOFFX ) ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN ISCLR = JX + ISCLR - 1 SCLR = X( IX + ( ISCLR - 1 ) * DESCX( M_ ) ) ELSE ISCLR = IX + ISCLR - 1 SCLR = X( ISCLR + ( JX - 1 ) * DESCX( M_ ) ) END IF * IF( PSCLR.NE.SCLR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF * IF( PISCLR.NE.ISCLR ) THEN IERR( 5 ) = 1 WRITE( ARGIN2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF ELSE ISCLR = 0 SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF IF( PISCLR.NE.ISCLR ) THEN IERR( 6 ) = 1 WRITE( ARGOUT2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF END IF * END IF * * Find IERR across all processes * CALL IGAMX2D( ICTXT, 'All', ' ', 6, 1, IERR, 6, IDUMM, IDUMM, -1, $ -1, 0 ) * * Encode the errors found in INFO * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'X' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Y' END IF * IF( IERR( 3 ).NE.0 ) $ INFO = INFO + 4 * IF( IERR( 4 ).NE.0 ) $ INFO = INFO + 8 * IF( IERR( 5 ).NE.0 ) $ INFO = INFO + 16 * IF( IERR( 6 ).NE.0 ) $ INFO = INFO + 32 * 9999 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' in scope is incorrect.' ) 9997 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' out of scope is incorrect.' ) 9996 FORMAT( 2X, ' ***** Expected value is: ', E16.8, /2X, $ ' Obtained value is: ', E16.8 ) 9995 FORMAT( 2X, ' ***** Expected value is: ', I6, /2X, $ ' Obtained value is: ', I6 ) * RETURN * * End of PSBLAS1TSTCHK * END SUBROUTINE PSERRDOT( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N REAL ERRBND, PREC, SCLR * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * PSERRDOT serially computes the dot product X**T * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) REAL * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) REAL array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) REAL array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY REAL ADDBND, FACT, SUMNEG, SUMPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMPOS = ZERO SUMNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N TMP = X( IX ) * Y( IY ) SCLR = SCLR + TMP IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF IX = IX + INCX IY = IY + INCY 10 CONTINUE * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PSERRDOT * END SUBROUTINE PSERRNRM2( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ERRBND, PREC, USCLR * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * PSERRNRM2 serially computes the 2-norm the vector X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If norm1 = SCLR and norm2 are two different computed results, and * norm1 being assumed to be correct, we require * * abs( norm1 - norm2 ) <= ERRBND = ERRFACT * abs( norm1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) REAL * On exit, USCLR specifies the 2-norm of the vector X. * * X (global input) REAL array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IX REAL ABSXI, ADDBND, FACT, SCALE, SSQ, SUMSCA, SUMSSQ * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * USCLR = ZERO SUMSSQ = ONE SUMSCA = ZERO ADDBND = TWO * TWO * TWO * PREC FACT = ONE + TWO * ( ( ONE + PREC )**3 - ONE ) * SCALE = ZERO SSQ = ONE DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX IF( X( IX ).NE.ZERO ) THEN ABSXI = ABS( X( IX ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE * USCLR = SCALE * SQRT( SSQ ) * * Error on square root * ERRBND = SQRT( SUMSSQ ) * ( ONE + TWO * ( 1.00001E+0 * PREC ) ) * ERRBND = ( SUMSCA * ERRBND ) - USCLR * RETURN * * End of PSERRNRM2 * END SUBROUTINE PSERRASUM( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ERRBND, PREC, USCLR * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * PSERRASUM serially computes the sum of absolute values of the vector * X and returns a scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies a scaled relative acceptable error * bound. In this case the error bound is just the absolute sum * multiplied by a constant proportional to the machine preci- * sion. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) REAL * On exit, USCLR specifies the sum of absolute values of the * vector X. * * X (global input) REAL array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL TWO, ZERO PARAMETER ( TWO = 2.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IX REAL ADDBND * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * IX = 1 USCLR = ZERO ADDBND = TWO * TWO * TWO * PREC * DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX USCLR = USCLR + ABS( X( IX ) ) 10 CONTINUE * ERRBND = ADDBND * USCLR * RETURN * * End of PSERRASUM * END SUBROUTINE PSERRSCAL( ERRBND, PSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC, PSCLR, X * .. * * Purpose * ======= * * PSERRSCAL serially computes the product PSCLR * X and returns a sca- * led relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) REAL * On entry, PSCLR specifies the scale factor. * * X (global input/global output) REAL * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL TWO PARAMETER ( TWO = 2.0E+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * X = PSCLR * X * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PSERRSCAL * END SUBROUTINE PSERRAXPY( ERRBND, PSCLR, X, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC, PSCLR, X, Y * .. * * Purpose * ======= * * PSERRAXPY serially computes Y := Y + PSCLR * X and returns a scaled * relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) REAL * On entry, PSCLR specifies the scale factor. * * X (global input) REAL * On entry, X specifies the scalar to be scaled. * * Y (global input/global output) REAL * On entry, Y specifies the scalar to be added. On exit, Y con- * tains the resulting scalar. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. REAL ADDBND, FACT, SUMPOS, SUMNEG, TMP * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * SUMPOS = ZERO SUMNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = PSCLR * X IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * TMP = Y IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP ELSE SUMNEG = SUMNEG - TMP END IF * Y = Y + ( PSCLR * X ) * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PSERRAXPY * END scalapack-1.8.0/PBLAS/TESTING/pcblas1tst.f0000640000175000017500000042031410363532303017525 0ustar muammarmuammar PROGRAM PCBLA1TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA1TST is the main testing program for the PBLAS Level 1 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 46 lines: * 'Level 1 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS1TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PCSWAP T put F for no test in the same column * PCSCAL T put F for no test in the same column * PCSSCAL T put F for no test in the same column * PCCOPY T put F for no test in the same column * PCAXPY T put F for no test in the same column * PCDOTU T put F for no test in the same column * PCDOTC T put F for no test in the same column * PSCNRM2 T put F for no test in the same column * PSCASUM T put F for no test in the same column * PCAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, CPLXSZ, TOTMEM, $ MEMSIZ, NSUBS REAL RZERO COMPLEX PADVAL, ZERO PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ CPLXSZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ RZERO = 0.0E+0, ZERO = ( 0.0E+0, 0.0E+0 ), $ NSUBS = 10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IGAP, IMBX, IMBY, $ IMIDX, IMIDY, INBX, INBY, INCX, INCY, IPMATX, $ IPMATY, IPOSTX, IPOSTY, IPREX, IPREY, IPW, IPX, $ IPY, IVERB, IX, IXSEED, IY, IYSEED, J, JX, JY, $ K, LDX, LDY, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY, TSKIP, TSTCNT REAL PUSCLR COMPLEX ALPHA, PSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCXR( DLEN_ ), $ DESCY( DLEN_ ), DESCYR( DLEN_ ), IERR( 4 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MXVAL( MAXTESTS ), $ MYVAL( MAXTESTS ), NBXVAL( MAXTESTS ), $ NBYVAL( MAXTESTS ), NVAL( MAXTESTS ), $ NXVAL( MAXTESTS ), NYVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_CCHEKPAD, PB_CFILLPAD, PB_DESCSET2, $ PB_PCLAPRNT, PCAMAX, PCAXPY, PCBLA1TSTINFO, $ PCBLAS1TSTCHK, PCBLAS1TSTCHKE, PCCHKARG1, $ PCCHKVOUT, PCCOPY, PCDOTC, PCDOTU, PCLAGEN, $ PCMPRNT, PCSCAL, PCSSCAL, PCSWAP, PCVPRNT, $ PSCASUM, PSCNRM2, PVDESCCHK, PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCSWAP ', 'PCSCAL ', $ 'PCSSCAL', 'PCCOPY ', 'PCAXPY ', $ 'PCDOTU ', 'PCDOTC ', 'PSCNRM2', $ 'PSCASUM', 'PCAMAX'/ DATA YCHECK/.TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE., .TRUE., .TRUE., .FALSE., .FALSE., $ .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler will abort on errors. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA1TSTINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, SOF, TEE, IAM, IGAP, IVERB, $ NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PCBLAS1TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( IPREX+1 ). * IPX = IPREX + 1 IPY = IPX + DESCX( LLD_ ) * NQX + IPOSTX + IPREY IPMATX = IPY + DESCY( LLD_ ) * NQY + IPOSTY IPMATY = IPMATX + MX * NX IPW = IPMATY + MY * NY * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPW - 1 + $ MAX( MAX( IMBX, MBX ), MAX( IMBY, MBY ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9989 ) SNAMES( K ) END IF * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices X and Y * CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) END IF * * Pad the guard zones of X, and Y * CALL PB_CFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_CFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT only args. * INFO = 0 CALL PCCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * INFO = 0 PSCLR = ZERO PUSCLR = RZERO PISCLR = 0 * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PCLAPRNT( 1, N, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PCLAPRNT( N, 1, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) END IF IF( YCHECK( K ) ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PCLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PCLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) IF( YCHECK( K ) ) $ CALL PB_PCLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PCSWAP * CALL PCSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PCSCAL * PSCLR = ALPHA CALL PCSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.3 ) THEN * * Test PCSSCAL * PUSCLR = REAL( ALPHA ) CALL PCSSCAL( N, REAL( ALPHA ), MEM( IPX ), IX, JX, $ DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PCCOPY * CALL PCCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.5 ) THEN * * Test PCAXPY * PSCLR = ALPHA CALL PCAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.6 ) THEN * * Test PCDOTU * CALL PCDOTU( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.7 ) THEN * * Test PCDOTC * CALL PCDOTC( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.8 ) THEN * * Test PSCNRM2 * CALL PSCNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.9 ) THEN * * Test PSCASUM * CALL PSCASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.10 ) THEN * CALL PCAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9978 ) INFO GO TO 30 END IF * * Check the computations * CALL PCBLAS1TSTCHK( ICTXT, NOUT, K, N, PSCLR, PUSCLR, $ PISCLR, MEM( IPMATX ), MEM( IPX ), $ IX, JX, DESCX, INCX, MEM( IPMATY ), $ MEM( IPY ), IY, JY, DESCY, INCY, $ INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 END IF * * Check padding * CALL PB_CCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), $ IPREX, IPOSTX, PADVAL ) IF( YCHECK( K ) ) THEN CALL PB_CCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check input-only scalar arguments * INFO = 1 CALL PCCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * * Check input-only array arguments * CALL PCCHKVOUT( N, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_X', SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PCCHKVOUT( N, MEM( IPMATY ), MEM( IPY ), IY, JY, $ DESCY, INCY, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE. 0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 3 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PCLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PCVPRNT( ICTXT, NOUT, N, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PCLAPRNT( 1, N, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PCLAPRNT( N, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PCLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PCVPRNT( ICTXT, NOUT, N, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PCLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PCLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9984 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9989 FORMAT( 2X, ' Tested Subroutine: ', A ) 9988 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9987 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9986 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9985 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9984 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9983 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9982 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9981 FORMAT( 2X, 'Testing Summary') 9980 FORMAT( 2X, 'End of Tests.' ) 9979 FORMAT( 2X, 'Tests started.' ) 9978 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PCBLA1TST * END SUBROUTINE PCBLA1TSTINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, $ NXVAL, IMBXVAL, MBXVAL, INBXVAL, $ NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, $ LDVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, LTEST, SOF, TEE, IAM, IGAP, $ IVERB, NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, $ NGRIDS, NMAT, NOUT, NPROCS COMPLEX ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA1TSTINFO get the needed startup information for testing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least ten. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS+4 ) with NSUBS equal to 10. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS1TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 1 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex single precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9982 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) * * End of PCBLA1TSTINFO * END SUBROUTINE PCBLAS1TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PCBLAS1TSTCHKE tests the error exits of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 10 (NSUBS). * If LTEST( 1 ) is .TRUE., PCSWAP will be tested; * If LTEST( 2 ) is .TRUE., PCSCAL will be tested; * If LTEST( 3 ) is .TRUE., PCSSCAL will be tested; * If LTEST( 4 ) is .TRUE., PCCOPY will be tested; * If LTEST( 5 ) is .TRUE., PCAXPY will be tested; * If LTEST( 6 ) is .TRUE., PCDOTU will be tested; * If LTEST( 7 ) is .TRUE., PCDOTC will be tested; * If LTEST( 8 ) is .TRUE., PSCNRM2 will be tested; * If LTEST( 9 ) is .TRUE., PSCASUM will be tested; * If LTEST( 10 ) is .TRUE., PCAMAX will be tested. * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PCAMAX, PCAXPY, PCCOPY, $ PCDIMEE, PCDOTC, PCDOTU, PCSCAL, PCSSCAL, $ PCSWAP, PCVECEE, PSCASUM, PSCNRM2 * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/11, 12, 12, 11, 13, 13, 13, 15, 15, 14/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PCSWAP * I = 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCSWAP, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCSWAP, SCODE( I ), SNAMES( I ) ) END IF * * Test PCSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCSCAL, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PCSSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCSSCAL, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCSSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PCCOPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCCOPY, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCCOPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PCAXPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCAXPY, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCAXPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PCDOTU * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCDOTU, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCDOTU, SCODE( I ), SNAMES( I ) ) END IF * * Test PCDOTC * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCDOTC, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCDOTC, SCODE( I ), SNAMES( I ) ) END IF * * PSCNRM2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PSCNRM2, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PSCNRM2, SCODE( I ), SNAMES( I ) ) END IF * * Test PSCASUM * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PSCASUM, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PSCASUM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCAMAX * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCAMAX, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCAMAX, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PCBLAS1TSTCHKE * END SUBROUTINE PCCHKARG1( ICTXT, NOUT, SNAME, N, ALPHA, IX, JX, $ DESCX, INCX, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT COMPLEX ALPHA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PCCHKARG1 checks the input-only arguments of the Level 1 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, INCXREF, INCYREF, IXREF, IYREF, JXREF, $ JYREF, MYCOL, MYROW, NPCOL, NPROW, NREF COMPLEX ALPHAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCXREF( DLEN_ ), DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * NREF = N IXREF = IX JXREF = JX DO 10 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 10 CONTINUE INCXREF = INCX IYREF = IY JYREF = JY DO 20 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 20 CONTINUE INCYREF = INCY ALPHAREF = ALPHA * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.GT.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PCCHKARG1 * END LOGICAL FUNCTION PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PISINSCOPE returns .TRUE. if the calling process is in the scope of * sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) and .FALSE. if it is * not. This routine is used to determine which processes should check * the answer returned by some Level 1 PBLAS routines. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * N (global input) INTEGER * The length of the subvector sub( X ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER IIX, IXCOL, IXROW, JJX, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN * * This is the special case, find process owner of IX, JX, and * only this process is the scope. * PISINSCOPE = ( ( IXROW.EQ.MYROW .OR. ROWREP ) .AND. $ ( IXCOL.EQ.MYCOL .OR. COLREP ) ) * ELSE * IF( INCX.EQ.DESCX( M_ ) ) THEN * * row vector * PISINSCOPE = ( MYROW.EQ.IXROW .OR. ROWREP ) * ELSE * * column vector * PISINSCOPE = ( MYCOL.EQ.IXCOL .OR. COLREP ) * END IF * END IF * RETURN * * End of PISINSCOPE * END SUBROUTINE PCBLAS1TSTCHK( ICTXT, NOUT, NROUT, N, PSCLR, PUSCLR, $ PISCLR, X, PX, IX, JX, DESCX, INCX, Y, $ PY, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT, NROUT, PISCLR REAL PUSCLR COMPLEX PSCLR * .. * .. Array Arguments .. INTEGER DESCX( * ), DESCY( * ) COMPLEX PX( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PCBLAS1TSTCHK performs the computational tests of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PCSWAP will be tested; * else if NROUT = 2, PCSCAL will be tested; * else if NROUT = 3, PCSSCAL will be tested; * else if NROUT = 4, PCCOPY will be tested; * else if NROUT = 5, PCAXPY will be tested; * else if NROUT = 6, PCDOTU will be tested; * else if NROUT = 7, PCDOTC will be tested; * else if NROUT = 8, PSCNRM2 will be tested; * else if NROUT = 9, PSCASUM will be tested; * else if NROUT = 10, PCAMAX will be tested. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * PSCLR (global input) COMPLEX * On entry, depending on the value of NROUT, PSCLR specifies * the scalar ALPHA, or the output scalar returned by the PBLAS, * i.e., the dot product, the 2-norm, the absolute sum or the * value of AMAX. * * PUSCLR (global input) REAL * On entry, PUSCLR specifies the real part of the scalar ALPHA * used by the real scaling, the 2-norm, or the absolute sum * routines. PUSCLR is not used in the real versions of this * routine. * * PISCLR (global input) REAL * On entry, PISCLR specifies the value of the global index re- * turned by PCAMAX, otherwise PISCLR is not used. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL RZERO COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ RZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, INXSCOPE, INYSCOPE, ROWREP INTEGER I, IB, ICURCOL, ICURROW, IDUMM, IIX, IIY, IN, $ IOFFX, IOFFY, ISCLR, IXCOL, IXROW, IYCOL, $ IYROW, J, JB, JJX, JJY, JN, KK, LDX, LDY, $ MYCOL, MYROW, NPCOL, NPROW REAL ERR, ERRMAX, PREC, USCLR COMPLEX SCLR * .. * .. Local Arrays .. INTEGER IERR( 6 ) CHARACTER*5 ARGIN1, ARGIN2, ARGOUT1, ARGOUT2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CSWAP, IGAMX2D, $ PB_INFOG2L, PCCHKVIN, PCERRASUM, PCERRAXPY, $ PCERRDOTC, PCERRDOTU, PCERRNRM2, PCERRSCAL, $ PCSERRSCAL * .. * .. External Functions .. LOGICAL PISINSCOPE INTEGER ICAMAX REAL PSLAMCH EXTERNAL ICAMAX, PISINSCOPE, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ARGIN1 = ' ' ARGIN2 = ' ' ARGOUT1 = ' ' ARGOUT2 = ' ' DO 10 I = 1, 6 IERR( I ) = 0 10 CONTINUE * PREC = PSLAMCH( ICTXT, 'precision' ) * IF( NROUT.EQ.1 ) THEN * * Test PCSWAP * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL CSWAP( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PCCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.2 ) THEN * * Test PCSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 20 J = JX, JN * CALL PCERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 20 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 40 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 30 KK = 0, JB-1 * CALL PCERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 30 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 50 I = IX, IN * CALL PCERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 70 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 60 KK = 0, IB-1 * CALL PCERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 60 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 70 CONTINUE * END IF * ELSE IF( NROUT.EQ.3 ) THEN * * Test PCSSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 80 J = JX, JN * CALL PCSERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 80 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 100 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 90 KK = 0, JB-1 * CALL PCSERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 90 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 100 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 110 I = IX, IN * CALL PCSERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 120 KK = 0, IB-1 * CALL PCSERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * ELSE IF( NROUT.EQ.4 ) THEN * * Test PCCOPY * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL CCOPY( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PCCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PCAXPY * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) LDY = DESCY( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, $ IIY, JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, N ) JN = JY + JB - 1 * DO 140 J = JY, JN * CALL PCERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 140 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 160 J = JN+1, JY+N-1, DESCY( NB_ ) JB = MIN( JY+N-J, DESCY( NB_ ) ) * DO 150 KK = 0, JB-1 * CALL PCERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 150 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 160 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, N ) IN = IY + IB - 1 * DO 170 I = IY, IN * CALL PCERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 170 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 190 I = IN+1, IY+N-1, DESCY( MB_ ) IB = MIN( IY+N-I, DESCY( MB_ ) ) * DO 180 KK = 0, IB-1 * CALL PCERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 180 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 190 CONTINUE * END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PCDOTU * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PCCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PCERRDOTU( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOTU' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOTU' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PCDOTC * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PCCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PCERRDOTC( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOTC' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOTC' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PSCNRM2 * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PCERRNRM2( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF ELSE USCLR = RZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.9 ) THEN * * Test PSCASUM * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PCERRASUM( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ) .GT. ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF ELSE USCLR = RZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.10 ) THEN * * Test PCAMAX * CALL PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN ISCLR = ICAMAX( N, X( IOFFX ), INCX ) IF( N.LT.1 ) THEN SCLR = ZERO ELSE IF( ( INCX.EQ.1 ).AND.( DESCX( M_ ).EQ.1 ).AND. $ ( N.EQ.1 ) ) THEN ISCLR = JX SCLR = X( IOFFX ) ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN ISCLR = JX + ISCLR - 1 SCLR = X( IX + ( ISCLR - 1 ) * DESCX( M_ ) ) ELSE ISCLR = IX + ISCLR - 1 SCLR = X( ISCLR + ( JX - 1 ) * DESCX( M_ ) ) END IF * IF( PSCLR.NE.SCLR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF * IF( PISCLR.NE.ISCLR ) THEN IERR( 5 ) = 1 WRITE( ARGIN2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF ELSE ISCLR = 0 SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF IF( PISCLR.NE.ISCLR ) THEN IERR( 6 ) = 1 WRITE( ARGOUT2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF END IF * END IF * * Find IERR across all processes * CALL IGAMX2D( ICTXT, 'All', ' ', 6, 1, IERR, 6, IDUMM, IDUMM, -1, $ -1, 0 ) * * Encode the errors found in INFO * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'X' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Y' END IF * IF( IERR( 3 ).NE.0 ) $ INFO = INFO + 4 * IF( IERR( 4 ).NE.0 ) $ INFO = INFO + 8 * IF( IERR( 5 ).NE.0 ) $ INFO = INFO + 16 * IF( IERR( 6 ).NE.0 ) $ INFO = INFO + 32 * 9999 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' in scope is incorrect.' ) 9997 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' out of scope is incorrect.' ) 9996 FORMAT( 2X, ' ***** Expected value is: ', E16.8, '+i*(', $ E16.8, '),', /2X, ' Obtained value is: ', $ E16.8, '+i*(', E16.8, ')' ) 9995 FORMAT( 2X, ' ***** Expected value is: ', I6, /2X, $ ' Obtained value is: ', I6 ) 9994 FORMAT( 2X, ' ***** Expected value is: ', E16.8, /2X, $ ' Obtained value is: ', E16.8 ) * RETURN * * End of PCBLAS1TSTCHK * END SUBROUTINE PCERRDOTU( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N REAL ERRBND, PREC COMPLEX SCLR * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * PCERRDOTU serially computes the dot product X**T * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) COMPLEX * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) COMPLEX array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) COMPLEX array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY REAL ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N * SCLR = SCLR + X( IX ) * Y( IY ) * TMP = REAL( X( IX ) ) * REAL( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = - AIMAG( X( IX ) ) * AIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = AIMAG( X( IX ) ) * REAL( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * TMP = REAL( X( IX ) ) * AIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * IX = IX + INCX IY = IY + INCY * 10 CONTINUE * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PCERRDOTU * END SUBROUTINE PCERRDOTC( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N REAL ERRBND, PREC COMPLEX SCLR * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * PCERRDOTC serially computes the dot product X**H * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) COMPLEX * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) COMPLEX array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) COMPLEX array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY REAL ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N * SCLR = SCLR + CONJG( X( IX ) ) * Y( IY ) * TMP = REAL( X( IX ) ) * REAL( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = AIMAG( X( IX ) ) * AIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = - AIMAG( X( IX ) ) * REAL( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * TMP = REAL( X( IX ) ) * AIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * IX = IX + INCX IY = IY + INCY * 10 CONTINUE * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PCERRDOTC * END SUBROUTINE PCERRNRM2( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ERRBND, PREC, USCLR * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * PCERRNRM2 serially computes the 2-norm the vector X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If norm1 = SCLR and norm2 are two different computed results, and * norm1 being assumed to be correct, we require * * abs( norm1 - norm2 ) <= ERRBND = ERRFACT * abs( norm1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) REAL * On exit, USCLR specifies the 2-norm of the vector X. * * X (global input) COMPLEX array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IX REAL ABSXI, ADDBND, FACT, SCALE, SSQ, SUMSCA, SUMSSQ * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, REAL * .. * .. Executable Statements .. * USCLR = ZERO SUMSSQ = ONE SUMSCA = ZERO ADDBND = TWO * TWO * TWO * PREC FACT = ONE + TWO * ( ( ONE + PREC )**3 - ONE ) * SCALE = ZERO SSQ = ONE DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX IF( REAL( X( IX ) ).NE.ZERO ) THEN ABSXI = ABS( REAL( X( IX ) ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF IF( AIMAG( X( IX ) ).NE.ZERO ) THEN ABSXI = ABS( AIMAG( X( IX ) ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE * USCLR = SCALE * SQRT( SSQ ) * * Error on square root * ERRBND = SQRT( SUMSSQ ) * ( ONE + TWO * ( 1.00001E+0 * PREC ) ) * ERRBND = ( SUMSCA * ERRBND ) - USCLR * RETURN * * End of PCERRNRM2 * END SUBROUTINE PCERRASUM( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ERRBND, PREC, USCLR * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * PCERRASUM serially computes the sum of absolute values of the vector * X and returns a scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies a scaled relative acceptable error * bound. In this case the error bound is just the absolute sum * multiplied by a constant proportional to the machine preci- * sion. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) REAL * On exit, USCLR specifies the sum of absolute values of the * vector X. * * X (global input) COMPLEX array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL TWO, ZERO PARAMETER ( TWO = 2.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IX REAL ADDBND * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, REAL * .. * .. Executable Statements .. * IX = 1 USCLR = ZERO ADDBND = TWO * TWO * TWO * PREC * DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX USCLR = USCLR + ABS( REAL( X( IX ) ) ) + $ ABS( AIMAG( X( IX ) ) ) 10 CONTINUE * ERRBND = ADDBND * USCLR * RETURN * * End of PCERRASUM * END SUBROUTINE PCERRSCAL( ERRBND, PSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC COMPLEX PSCLR, X * .. * * Purpose * ======= * * PCERRSCAL serially computes the product PSCLR * X and returns a sca- * led relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) COMPLEX * On entry, PSCLR specifies the scale factor. * * X (global input/global output) COMPLEX * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL TWO PARAMETER ( TWO = 2.0E+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * X = PSCLR * X * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PCERRSCAL * END SUBROUTINE PCSERRSCAL( ERRBND, PUSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC, PUSCLR COMPLEX X * .. * * Purpose * ======= * * PCSERRSCAL serially computes the product PUSCLR * X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PUSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PUSCLR (global input) REAL * On entry, PUSCLR specifies the real scale factor. * * X (global input/global output) COMPLEX * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL TWO PARAMETER ( TWO = 2.0E+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, REAL * .. * .. Executable Statements .. * X = CMPLX( PUSCLR * REAL( X ), PUSCLR * AIMAG( X ) ) * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PCSERRSCAL * END SUBROUTINE PCERRAXPY( ERRBND, PSCLR, X, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC COMPLEX PSCLR, X, Y * .. * * Purpose * ======= * * PCERRAXPY serially computes Y := Y + PSCLR * X and returns a scaled * relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) COMPLEX * On entry, PSCLR specifies the scale factor. * * X (global input) COMPLEX * On entry, X specifies the scalar to be scaled. * * Y (global input/global output) COMPLEX * On entry, Y specifies the scalar to be added. On exit, Y con- * tains the resulting scalar. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. REAL ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS COMPLEX TMP * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, MAX, REAL * .. * .. Executable Statements .. * SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = PSCLR * X IF( REAL( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + REAL( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - REAL( TMP ) * FACT END IF IF( AIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + AIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - AIMAG( TMP ) * FACT END IF * TMP = Y IF( REAL( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + REAL( TMP ) ELSE SUMRNEG = SUMRNEG - REAL( TMP ) END IF IF( AIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + AIMAG( TMP ) ELSE SUMINEG = SUMINEG - AIMAG( TMP ) END IF * Y = Y + ( PSCLR * X ) * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PCERRAXPY * END scalapack-1.8.0/PBLAS/TESTING/psblas2tst.f0000640000175000017500000034567110363532303017562 0ustar muammarmuammar PROGRAM PSBLA2TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA2TST is the main testing program for the PBLAS Level 2 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 60 lines: * 'Level 2 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS2TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 1.0E0 value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PSGEMV T put F for no test in the same column * PSSYMV T put F for no test in the same column * PSTRMV T put F for no test in the same column * PSTRSV T put F for no test in the same column * PSGER T put F for no test in the same column * PSSYR T put F for no test in the same column * PSSYR2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, REALSZ, TOTMEM, $ MEMSIZ, NSUBS REAL ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, ZERO = 0.0E+0, $ ONE = 1.0E+0, PADVAL = -9923.0E+0, $ NSUBS = 7, ROGUE = -1.0E+10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IGAP, IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, $ INBA, INBX, INBY, INCX, INCY, IPA, IPG, IPMATA, $ IPMATX, IPMATY, IPOSTA, IPOSTX, IPOSTY, IPREA, $ IPREX, IPREY, IPX, IPY, IVERB, IX, IXSEED, IY, $ IYSEED, J, JA, JX, JY, K, LDA, LDX, LDY, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY, TSKIP, TSTCNT REAL ALPHA, BETA, SCALE, THRESH * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCX( DLEN_ ), $ DESCXR( DLEN_ ), DESCY( DLEN_ ), $ DESCYR( DLEN_ ), IAVAL( MAXTESTS ), IERR( 6 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) REAL MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PSLAPRNT, PB_SCHEKPAD, $ PB_SFILLPAD, PB_SLASCAL, PB_SLASET, PMDESCCHK, $ PMDIMCHK, PSBLA2TSTINFO, PSBLAS2TSTCHK, $ PSBLAS2TSTCHKE, PSCHKARG2, PSCHKVOUT, PSGEMV, $ PSGER, PSLAGEN, PSLASCAL, PSLASET, PSMPRNT, $ PSSYMV, PSSYR, PSSYR2, PSTRMV, PSTRSV, PSVPRNT, $ PVDESCCHK, PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSGEMV ', 'PSSYMV ', 'PSTRMV ', $ 'PSTRSV ', 'PSGER ', 'PSSYR ', $ 'PSSYR2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA2TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, MAXTESTS, NGRIDS, $ PVAL, MAXGRIDS, QVAL, MAXGRIDS, NBLOG, LTEST, $ SOF, TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PSBLAS2TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) INBA = INBAVAL( J ) MBA = MBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) INBX = INBXVAL( J ) MBX = MBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) INBY = INBYVAL( J ) MBY = MBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPX = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREX IPY = IPX + DESCX( LLD_ )*NQX + IPOSTX + IPREY IPMATA = IPY + DESCY( LLD_ )*NQY + IPOSTY IPMATX = IPMATA + MA*NA IPMATY = IPMATX + MX*NX IPG = IPMATY + MAX( MX*NX, MY*NY ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPG + MAX( M, N ) - 1 + $ MAX( MAX( IMBA, MBA ), $ MAX( MAX( IMBX, MBX ), $ MAX( IMBY, MBY ) ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) SNAMES( K ) END IF * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ) THEN AFORM = 'S' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PSLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PSLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN * CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * ELSE * * If Y is not needed, generate a copy of X instead * CALL PB_DESCSET2( DESCYR, MX, NX, IMBX, INBX, MBX, $ NBX, -1, -1, ICTXT, MAX( 1, MX ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCYR, IXSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A * IF( ( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PSLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) IF( K.NE.2 ) THEN CALL PB_SLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PSLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) IF( K.NE.2 ) THEN CALL PB_SLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( K.EQ.3 .OR. K.EQ.4 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PSLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_SLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PSLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_SLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) CALL PSLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_SLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PSLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_SLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PSLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_SLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) CALL PSLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_SLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * END IF * * Pad the guard zones of A, X and Y * CALL PB_SFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * CALL PB_SFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_SFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PSCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PSLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_INITIAL_A', $ NOUT, MEM( IPG ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, 0, $ 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPG ) ) END IF * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PSLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PSLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF * IF( YCHECK( K ) ) THEN IF( IVERB.EQ.2 ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PSLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PSLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF END IF * * Call the Level 2 PBLAS routine * INFO = 0 IF( K.EQ.1 ) THEN * * Test PSGEMV * CALL PSGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PSSYMV * CALL PSSYMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.3 ) THEN * * Test PSTRMV * CALL PSTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PSTRSV * CALL PSTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.5 ) THEN * * Test PSGER * CALL PSGER( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.6 ) THEN * * Test PSSYR * CALL PSSYR( UPLO, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.7 ) THEN * * Test PSSYR2 * CALL PSSYR2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_SCHEKPAD( ICTXT, SNAMES( K ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), IPREA, $ IPOSTA, PADVAL ) * CALL PB_SCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), IPREX, $ IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_SCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check the computations * CALL PSBLAS2TSTCHK( ICTXT, NOUT, K, UPLO, TRANS, DIAG, M, $ N, ALPHA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, MEM( IPMATX ), $ MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, THRESH, ROGUE, $ MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PSCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Check input-only array arguments * CALL PSCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, IERR( 4 ) ) CALL PSCHKVOUT( NLX, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 5 ) ) * IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_A', $ SNAMES( K ) END IF * IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_X', $ SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PSCHKVOUT( NLY, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PSLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PSMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PSLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', $ NOUT, MEM( IPMATA ) ) END IF IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PSLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( NLX.GT.0 ) $ CALL PSVPRNT( ICTXT, NOUT, NLX, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PSLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PSLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PSLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( NLY.GT.0 ) $ CALL PSVPRNT( ICTXT, NOUT, NLY, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PSLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PSLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9980 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, ' Tested Subroutine: ', A ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9983 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9982 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9981 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9980 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9979 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9978 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9977 FORMAT( 2X, 'Testing Summary') 9976 FORMAT( 2X, 'End of Tests.' ) 9975 FORMAT( 2X, 'Tests started.' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PSBLA2TST * END SUBROUTINE PSBLA2TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, SOF, TEE, $ IAM, IGAP, IVERB, NPROCS, THRESH, ALPHA, $ BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL ALPHA, BETA, THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA2TSTINFO get the needed startup information for testing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least seven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) REAL * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS+4 ) with NSUBS equal to 7. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D *ype real dble cplx zplx * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS2TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 2 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real single precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) 9981 FORMAT( 2X, 'Beta : ', G16.6 ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PSBLA2TSTINFO * END SUBROUTINE PSBLAS2TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PSBLAS2TSTCHKE tests the error exits of the Level 2 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 7 (NSUBS). * If LTEST( 1 ) is .TRUE., PSGEMV will be tested; * If LTEST( 2 ) is .TRUE., PSSYMV will be tested; * If LTEST( 3 ) is .TRUE., PSTRMV will be tested; * If LTEST( 4 ) is .TRUE., PSTRSV will be tested; * If LTEST( 5 ) is .TRUE., PSGER will be tested; * If LTEST( 6 ) is .TRUE., PSSYR will be tested; * If LTEST( 7 ) is .TRUE., PSSYR2 will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PSDIMEE, PSGEMV, PSGER, $ PSMATEE, PSOPTEE, PSSYMV, PSSYR, PSSYR2, $ PSTRMV, PSTRSV, PSVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/21, 22, 23, 23, 24, 25, 27/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PSGEMV * I = 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSGEMV, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSGEMV, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSGEMV, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSGEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYMV, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYMV, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYMV, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSSYMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PSTRMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSTRMV, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSTRMV, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSTRMV, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSTRMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PSTRSV * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSTRSV, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSTRSV, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSTRSV, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSTRSV, SCODE( I ), SNAMES( I ) ) END IF * * Test PSGER * I = I + 1 IF( LTEST( I ) ) THEN CALL PSDIMEE( ICTXT, NOUT, PSGER, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSGER, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSGER, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYR * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYR, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYR, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSSYR, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYR, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYR2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYR2, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYR2, SCODE( I ), SNAMES( I ) ) CALL PSVECEE( ICTXT, NOUT, PSSYR2, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYR2, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PSBLAS2TSTCHKE * END SUBROUTINE PSCHKARG2( ICTXT, NOUT, SNAME, UPLO, TRANS, DIAG, M, $ N, ALPHA, IA, JA, DESCA, IX, JX, DESCX, $ INCX, BETA, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT REAL ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCA( * ), DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PSCHKARG2 checks the input-only arguments of the Level 2 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 2 PBLAS * operation. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the TRANS option in the Level 2 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 2 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, TRANSREF, UPLOREF INTEGER I, IAREF, INCXREF, INCYREF, IXREF, IYREF, $ JAREF, JXREF, JYREF, MREF, MYCOL, MYROW, NPCOL, $ NPROW, NREF REAL ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCXREF( DLEN_ ), $ DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG TRANSREF = TRANS UPLOREF = UPLO MREF = M NREF = N ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IXREF = IX JXREF = JX DO 20 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 20 CONTINUE INCXREF = INCX BETAREF = BETA IYREF = IY JYREF = JY DO 30 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 30 CONTINUE INCYREF = INCY * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( TRANS, TRANSREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANS' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PSCHKARG2 * END SUBROUTINE PSBLAS2TSTCHK( ICTXT, NOUT, NROUT, UPLO, TRANS, DIAG, $ M, N, ALPHA, A, PA, IA, JA, DESCA, X, $ PX, IX, JX, DESCX, INCX, BETA, Y, PY, $ IY, JY, DESCY, INCY, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT, NROUT REAL ALPHA, BETA, ROGUE, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL A( * ), PA( * ), PX( * ), PY( * ), WORK( * ), $ X( * ), Y( * ) * .. * * Purpose * ======= * * PSBLAS2TSTCHK performs the computational tests of the Level 2 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PSGEMV will be tested; * else if NROUT = 2, PSSYMV will be tested; * else if NROUT = 3, PSTRMV will be tested; * else if NROUT = 4, PSTRSV will be tested; * else if NROUT = 5, PSGER will be tested; * else if NROUT = 6, PSSYR will be tested; * else if NROUT = 7, PSSYR2 will be tested; * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies if the matrix operand A is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of A. * * N (global input) INTEGER * On entry, N specifies the number of columns of A. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) REAL array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) REAL array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) REAL * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular or symmetric matrices. * * WORK (workspace) REAL array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, N ). This array is used to store the compu- * ted gauges (see PSMVCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW REAL ERR * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_SLASET, PSCHKMIN, PSCHKVIN, $ PSMVCH, PSTRMV, PSVMCH, PSVMCH2, STRSV * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PSGEMV * * Check the resulting vector Y * CALL PSMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, $ IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, $ INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PSCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) IF( LSAME( TRANS, 'N' ) ) THEN CALL PSCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) ELSE CALL PSCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PSSYMV * * Check the resulting vector Y * CALL PSMVCH( ICTXT, 'No transpose', N, N, ALPHA, A, IA, JA, $ DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, $ JY, DESCY, INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PSCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PSTRMV * * Check the resulting vector X * CALL PSMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, Y, IX, $ JX, DESCX, INCX, ZERO, X, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PSTRSV * * Check the resulting vector X * CALL STRSV( UPLO, TRANS, DIAG, N, A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ), X( IX+(JX-1)*DESCX( M_ ) ), INCX ) CALL PSTRMV( UPLO, TRANS, DIAG, N, PA, IA, JA, DESCA, PX, IX, $ JX, DESCX, INCX ) CALL PSMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, ZERO, Y, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PSGER * * Check the resulting matrix A * CALL PSVMCH( ICTXT, 'Ge', M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, $ DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PSCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PSCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.6 ) THEN * * Test PSSYR * * Check the resulting matrix A * CALL PSVMCH( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, $ INCX, X, IX, JX, DESCX, INCX, A, PA, IA, JA, $ DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PSCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.7 ) THEN * * Test PSSYR2 * * Check the resulting matrix A * CALL PSVMCH2( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, INCX, $ Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, $ WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PSCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PSCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'X' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'Y' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9997 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9996 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PSBLAS2TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/PSBLAS3TST.dat0000640000175000017500000000472010363532303017431 0ustar muammarmuammar'Level 3 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS3TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 3.0E0 value of BETA 4 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 22 30 8 20 3 5 11 8 values of M 23 29 7 21 3 14 11 8 values of N 24 28 9 22 3 9 11 8 values of K 31 31 31 31 31 31 31 31 values of M_A 31 31 31 31 31 31 31 31 values of N_A 4 5 6 3 2 1 4 5 values of IMB_A 4 5 6 3 2 1 4 5 values of INB_A 4 5 6 3 2 1 4 5 values of MB_A 4 5 6 3 2 1 4 5 values of NB_A 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 5 1 7 7 1 12 1 11 values of IA 5 1 7 7 1 12 1 11 values of JA 32 32 32 32 32 32 32 32 values of M_B 32 32 32 32 32 32 32 32 values of N_B 4 5 6 3 2 1 4 5 values of IMB_B 4 5 6 3 2 1 4 5 values of INB_B 4 5 6 3 2 1 4 5 values of MB_B 4 5 6 3 2 1 4 5 values of NB_B 0 0 0 0 0 0 0 0 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 5 1 7 7 1 12 1 11 values of IB 5 1 7 7 1 12 1 11 values of JB 35 35 35 35 35 35 35 35 values of M_C 35 35 35 35 35 35 35 35 values of N_C 4 5 6 3 2 1 4 5 values of IMB_C 4 5 6 3 2 1 4 5 values of INB_C 4 5 6 3 2 1 4 5 values of MB_C 4 5 6 3 2 1 4 5 values of NB_C 0 0 0 0 0 0 0 0 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 5 1 7 7 1 12 1 11 values of IC 5 1 7 7 1 12 1 11 values of JC PSGEMM T put F for no test in the same column PSSYMM T put F for no test in the same column PSSYRK T put F for no test in the same column PSSYR2K T put F for no test in the same column PSTRMM T put F for no test in the same column PSTRSM T put F for no test in the same column PSGEADD T put F for no test in the same column PSTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/psblas3tst.f0000640000175000017500000037435210363532303017561 0ustar muammarmuammar PROGRAM PSBLA3TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PSBLA3TST is the main testing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * * from the following 61 lines: * 'Level 3 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PSBLAS3TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0E0 value of ALPHA * 1.0E0 value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PSGEMM T put F for no test in the same column * PSSYMM T put F for no test in the same column * PSSYRK T put F for no test in the same column * PSSYR2K T put F for no test in the same column * PSTRMM T put F for no test in the same column * PSTRSM T put F for no test in the same column * PSGEADD T put F for no test in the same column * PSTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * REALSZ indicates the length in bytes on the given platform * for a single precision real. By default, REALSZ is set to * four. * * MEM REAL array * MEM is an array of dimension TOTMEM / REALSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, REALSZ, TOTMEM, $ MEMSIZ, NSUBS REAL ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ REALSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ, ZERO = 0.0E+0, $ ONE = 1.0E+0, PADVAL = -9923.0E+0, $ NSUBS = 8, ROGUE = -1.0E+10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IGAP, IMBA, IMBB, $ IMBC, IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, $ IPA, IPB, IPC, IPG, IPMATA, IPMATB, IPMATC, $ IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, IPREC, $ IPW, IVERB, J, JA, JB, JC, K, L, LDA, LDB, LDC, $ M, MA, MB, MBA, MBB, MBC, MC, MEMREQD, MPA, $ MPB, MPC, MYCOL, MYROW, N, NA, NB, NBA, NBB, $ NBC, NC, NCOLA, NCOLB, NCOLC, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQA, NQB, NQC, NROWA, $ NROWB, NROWC, NTESTS, OFFDA, OFFDC, RSRCA, $ RSRCB, RSRCC, TSKIP, TSTCNT REAL ALPHA, BETA, SCALE, THRESH * .. * .. Local Arrays .. LOGICAL BCHECK( NSUBS ), CCHECK( NSUBS ), $ LTEST( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCB( DLEN_ ), $ DESCBR( DLEN_ ), DESCC( DLEN_ ), $ DESCCR( DLEN_ ), IAVAL( MAXTESTS ), $ IBVAL( MAXTESTS ), ICVAL( MAXTESTS ), $ IERR( 6 ), IMBAVAL( MAXTESTS ), $ IMBBVAL( MAXTESTS ), IMBCVAL( MAXTESTS ), $ INBAVAL( MAXTESTS ), INBBVAL( MAXTESTS ), $ INBCVAL( MAXTESTS ), JAVAL( MAXTESTS ), $ JBVAL( MAXTESTS ), JCVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) REAL MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PSLAPRNT, PB_SCHEKPAD, $ PB_SFILLPAD, PB_SLASCAL, PB_SLASET, PMDESCCHK, $ PMDIMCHK, PSBLA3TSTINFO, PSBLAS3TSTCHK, $ PSBLAS3TSTCHKE, PSCHKARG3, PSCHKMOUT, PSGEADD, $ PSGEMM, PSLAGEN, PSLASCAL, PSLASET, PSMPRNT, $ PSSYMM, PSSYR2K, PSSYRK, PSTRADD, PSTRMM, $ PSTRSM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PSGEMM ', 'PSSYMM ', 'PSSYRK ', $ 'PSSYR2K', 'PSTRMM ', 'PSTRSM ', $ 'PSGEADD', 'PSTRADD'/ DATA BCHECK/.TRUE., .TRUE., .FALSE., .TRUE., .TRUE., $ .TRUE., .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, * so that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PSBLA3TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, SOF, TEE, IAM, $ IGAP, IVERB, NPROCS, THRESH, ALPHA, BETA, $ MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PSBLAS3TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * TSTCNT = TSTCNT + 1 * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, IGAP, GAPMUL, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, IGAP, GAPMUL, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDB = MAX( 1, MB ) LDC = MAX( 1, MC ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREB IPC = IPB + DESCB( LLD_ )*NQB + IPOSTB + IPREC IPMATA = IPC + DESCC( LLD_ )*NQC + IPOSTC IPMATB = IPMATA + MA*NA IPMATC = IPMATB + MB*NB IPG = IPMATC + MAX( MB*NB, MC*NC ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * IPW = IPG + 2*MAX( M, MAX( N, K ) ) MEMREQD = IPW - 1 + MAX( MAX( MAX( IMBA, MBA ), $ MAX( IMBB, MBB ) ), $ MAX( IMBC, MBC ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*REALSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) SNAMES( L ) END IF * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PSGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF * ELSE IF( L.EQ.2 ) THEN * * PSSYMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF * ELSE IF( L.EQ.3 ) THEN * * PSSYRK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 * ELSE IF( L.EQ.4 ) THEN * * PSSYR2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF * ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 * ELSE IF( L.EQ.7 .OR. L.EQ.8 ) THEN * * PSGEADD, PSTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PSSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 .OR. L.EQ.4 ) THEN * * PSSYRK, PSSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( ( L.EQ.6 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PSTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PSLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) * IF( BCHECK( L ) ) $ CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PSLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PSLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) * IF( BCHECK( L ) ) THEN CALL PB_DESCSET2( DESCBR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCBR, IBSEED, MEM( IPMATB ), $ DESCBR( LLD_ ) ) END IF * IF( CCHECK( L ) ) THEN * CALL PB_DESCSET2( DESCCR, MC, NC, IMBC, INBC, MBC, $ NBC, -1, -1, ICTXT, MAX( 1, MC ) ) CALL PSLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCCR, ICSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * ELSE * * If C is not needed, generate a copy of B instead * CALL PB_DESCSET2( DESCCR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PSLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCCR, IBSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A, B, C * IF( ( L.EQ.2 ).AND.( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PSLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PSLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) * END IF * ELSE IF( ( ( L.EQ.3 ).OR.( L.EQ.4 ) ).AND. $ ( MAX( NROWC, NCOLC ).GT.1 ) ) THEN * * The distributed matrix C is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PSLASET( 'Upper', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC, JC+1, $ DESCC ) CALL PB_SLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PSLASET( 'Lower', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC+1, JC, $ DESCC ) CALL PB_SLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PSLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_SLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PSLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_SLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.6 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) CALL PSLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_SLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PSLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_SLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PSLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_SLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.6 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / REAL( MAX( NROWA, NCOLA ) ) CALL PSLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_SLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * ELSE IF( L.EQ.8 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix C is lower triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PSLASET( 'Upper', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC, $ JC+1, DESCC ) CALL PB_SLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix C is upper triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PSLASET( 'Lower', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC+1, $ JC, DESCC ) CALL PB_SLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * END IF * * Pad the guard zones of A, B and C * CALL PB_SFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_SFILLPAD( ICTXT, MPB, NQB, MEM( IPB-IPREB ), $ DESCB( LLD_ ), IPREB, IPOSTB, $ PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_SFILLPAD( ICTXT, MPC, NQC, MEM( IPC-IPREC ), $ DESCC( LLD_ ), IPREC, IPOSTC, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PSCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PSLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, $ 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) END IF * IF( BCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PSLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, JB, $ DESCB, 0, 0, $ 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MB, NB, MEM( IPB ), 1, 1, DESCB, $ 0, 0, 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) END IF END IF * IF( CCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PSLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, JC, $ DESCC, 0, 0, $ 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PSLAPRNT( MC, NC, MEM( IPC ), 1, 1, DESCC, $ 0, 0, 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) END IF END IF * * Call the Level 3 PBLAS routine * INFO = 0 IF( L.EQ.1 ) THEN * * Test PSGEMM * CALL PSGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.2 ) THEN * * Test PSSYMM * CALL PSSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.3 ) THEN * * Test PSSYRK * CALL PSSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.4 ) THEN * * Test PSSYR2K * CALL PSSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.5 ) THEN * * Test PSTRMM * CALL PSTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * ELSE IF( L.EQ.6 ) THEN * * Test PSTRSM * CALL PSTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * * ELSE IF( L.EQ.7 ) THEN * * Test PSGEADD * CALL PSGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.8 ) THEN * * Test PSTRADD * CALL PSTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_SCHEKPAD( ICTXT, SNAMES( L ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), $ IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_SCHEKPAD( ICTXT, SNAMES( L ), MPB, NQB, $ MEM( IPB-IPREB ), DESCB( LLD_ ), $ IPREB, IPOSTB, PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_SCHEKPAD( ICTXT, SNAMES( L ), MPC, NQC, $ MEM( IPC-IPREC ), DESCC( LLD_ ), $ IPREC, IPOSTC, PADVAL ) END IF * * Check the computations * CALL PSBLAS3TSTCHK( ICTXT, NOUT, L, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, $ MEM( IPMATA ), MEM( IPA ), IA, JA, $ DESCA, MEM( IPMATB ), MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, THRESH, $ ROGUE, MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PSCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Check input-only array arguments * CALL PSCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), $ MEM( IPA ), IA, JA, DESCA, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_A', $ SNAMES( L ) END IF * IF( BCHECK( L ) ) THEN CALL PSCHKMOUT( NROWB, NCOLB, MEM( IPMATB ), $ MEM( IPB ), IB, JB, DESCB, IERR( 5 ) ) IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_B', $ SNAMES( L ) END IF END IF * IF( CCHECK( L ) ) THEN CALL PSCHKMOUT( NROWC, NCOLC, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_C', $ SNAMES( L ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN KFAIL( L ) = KFAIL( L ) + 1 ERRFLG = .TRUE. IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) SNAMES( L ) ELSE KPASS( L ) = KPASS( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( L ) END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PSLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PSMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PSLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) END IF IF( BCHECK( L ) ) THEN IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MB, NB, $ MEM( IPMATB ), LDB, 0, 0, $ 'SERIAL_B' ) CALL PB_PSLAPRNT( MB, NB, MEM( IPB ), 1, 1, $ DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PSMPRNT( ICTXT, NOUT, NROWB, NCOLB, $ MEM( IPMATB+IB-1+(JB-1)*LDB ), $ LDB, 0, 0, 'SERIAL_B' ) CALL PB_PSLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, $ JB, DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) END IF END IF IF( CCHECK( L ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PSMPRNT( ICTXT, NOUT, MC, NC, $ MEM( IPMATC ), LDC, 0, 0, $ 'SERIAL_C' ) CALL PB_PSLAPRNT( MC, NC, MEM( IPC ), 1, 1, $ DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PSMPRNT( ICTXT, NOUT, NROWC, NCOLC, $ MEM( IPMATC+IC-1+(JC-1)*LDC ), $ LDC, 0, 0, 'SERIAL_C' ) CALL PB_PSLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, $ JC, DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9982 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9981 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, ' Tested Subroutine: ', A ) 9985 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9983 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9982 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9981 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9980 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9979 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9978 FORMAT( 2X, 'Testing Summary') 9977 FORMAT( 2X, 'End of Tests.' ) 9976 FORMAT( 2X, 'Tests started.' ) 9975 FORMAT( 2X, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PSBLA3TST * END SUBROUTINE PSBLA3TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, SOF, $ TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL ALPHA, BETA, THRESH * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PSBLA3TSTINFO get the needed startup information for testing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) REAL * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) REAL * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PSBLAS3TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 3 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real single precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) 9981 FORMAT( 2X, 'Beta : ', G16.6 ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PSBLA3TSTINFO * END SUBROUTINE PSBLAS3TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PSBLAS3TSTCHKE tests the error exits of the Level 3 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 7 (NSUBS). * If LTEST( 1 ) is .TRUE., PSGEMM will be tested; * If LTEST( 2 ) is .TRUE., PSSYMM will be tested; * If LTEST( 3 ) is .TRUE., PSSYRK will be tested; * If LTEST( 4 ) is .TRUE., PSSYR2K will be tested; * If LTEST( 5 ) is .TRUE., PSTRMM will be tested; * If LTEST( 6 ) is .TRUE., PSTRSM will be tested; * If LTEST( 7 ) is .TRUE., PSGEADD will be tested; * If LTEST( 8 ) is .TRUE., PSTRADD will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PSDIMEE, PSGEADD, PSGEMM, $ PSMATEE, PSOPTEE, PSSYMM, PSSYR2K, PSSYRK, $ PSTRADD, PSTRMM, PSTRSM * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/31, 32, 33, 35, 38, 38, 39, 40/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PSGEMM * I = 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSGEMM, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSGEMM, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSGEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYMM, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYMM, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYRK * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYRK, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYRK, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYRK, SCODE( I ), SNAMES( I ) ) END IF * * Test PSSYR2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSSYR2K, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSSYR2K, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSSYR2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PSTRMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSTRMM, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSTRMM, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSTRMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PSTRSM * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSTRSM, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSTRSM, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSTRSM, SCODE( I ), SNAMES( I ) ) END IF * * Test PSGEADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSGEADD, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSGEADD, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSGEADD, SCODE( I ), SNAMES( I ) ) END IF * * Test PSTRADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PSOPTEE( ICTXT, NOUT, PSTRADD, SCODE( I ), SNAMES( I ) ) CALL PSDIMEE( ICTXT, NOUT, PSTRADD, SCODE( I ), SNAMES( I ) ) CALL PSMATEE( ICTXT, NOUT, PSTRADD, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PSBLAS3TSTCHKE * END SUBROUTINE PSCHKARG3( ICTXT, NOUT, SNAME, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, IA, JA, $ DESCA, IB, JB, DESCB, BETA, IC, JC, DESCC, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT REAL ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*7 SNAME INTEGER DESCA( * ), DESCB( * ), DESCC( * ) * .. * * Purpose * ======= * * PSCHKARG3 checks the input-only arguments of the Level 3 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies the SIDE option in the Level 3 PBLAS * operation. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 3 PBLAS * operation. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the TRANSA option in the Level 3 * PBLAS operation. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the TRANSB option in the Level 3 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 3 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * K (global input) INTEGER * On entry, K specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, SIDEREF, TRANSAREF, TRANSBREF, UPLOREF INTEGER I, IAREF, IBREF, ICREF, JAREF, JBREF, JCREF, $ KREF, MREF, MYCOL, MYROW, NPCOL, NPROW, NREF REAL ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCBREF( DLEN_ ), $ DESCCREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG SIDEREF = SIDE TRANSAREF = TRANSA TRANSBREF = TRANSB UPLOREF = UPLO MREF = M NREF = N KREF = K ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IBREF = IB JBREF = JB DO 20 I = 1, DLEN_ DESCBREF( I ) = DESCB( I ) 20 CONTINUE BETAREF = BETA ICREF = IC JCREF = JC DO 30 I = 1, DLEN_ DESCCREF( I ) = DESCC( I ) 30 CONTINUE * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( SIDE, SIDEREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'SIDE' ELSE IF( .NOT. LSAME( TRANSA, TRANSAREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSA' ELSE IF( .NOT. LSAME( TRANSB, TRANSBREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSB' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( K.NE.KREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'K' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IB.NE.IBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IB' ELSE IF( JB.NE.JBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JB' ELSE IF( DESCB( DTYPE_ ).NE.DESCBREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( DTYPE_ )' ELSE IF( DESCB( M_ ).NE.DESCBREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( M_ )' ELSE IF( DESCB( N_ ).NE.DESCBREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( N_ )' ELSE IF( DESCB( IMB_ ).NE.DESCBREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( IMB_ )' ELSE IF( DESCB( INB_ ).NE.DESCBREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( INB_ )' ELSE IF( DESCB( MB_ ).NE.DESCBREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( MB_ )' ELSE IF( DESCB( NB_ ).NE.DESCBREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( NB_ )' ELSE IF( DESCB( RSRC_ ).NE.DESCBREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( RSRC_ )' ELSE IF( DESCB( CSRC_ ).NE.DESCBREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CSRC_ )' ELSE IF( DESCB( CTXT_ ).NE.DESCBREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CTXT_ )' ELSE IF( DESCB( LLD_ ).NE.DESCBREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( LLD_ )' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IC.NE.ICREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IC' ELSE IF( JC.NE.JCREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JC' ELSE IF( DESCC( DTYPE_ ).NE.DESCCREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( DTYPE_ )' ELSE IF( DESCC( M_ ).NE.DESCCREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( M_ )' ELSE IF( DESCC( N_ ).NE.DESCCREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( N_ )' ELSE IF( DESCC( IMB_ ).NE.DESCCREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( IMB_ )' ELSE IF( DESCC( INB_ ).NE.DESCCREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( INB_ )' ELSE IF( DESCC( MB_ ).NE.DESCCREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( MB_ )' ELSE IF( DESCC( NB_ ).NE.DESCCREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( NB_ )' ELSE IF( DESCC( RSRC_ ).NE.DESCCREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( RSRC_ )' ELSE IF( DESCC( CSRC_ ).NE.DESCCREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CSRC_ )' ELSE IF( DESCC( CTXT_ ).NE.DESCCREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CTXT_ )' ELSE IF( DESCC( LLD_ ).NE.DESCCREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( LLD_ )' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PSCHKARG3 * END SUBROUTINE PSBLAS3TSTCHK( ICTXT, NOUT, NROUT, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, A, PA, IA, $ JA, DESCA, B, PB, IB, JB, DESCB, BETA, $ C, PC, IC, JC, DESCC, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT, NROUT REAL ALPHA, BETA, ROGUE, THRESH * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL A( * ), B( * ), C( * ), PA( * ), PB( * ), $ PC( * ), WORK( * ) * .. * * Purpose * ======= * * PSBLAS3TSTCHK performs the computational tests of the Level 3 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PSGEMM will be tested; * else if NROUT = 2, PSSYMM will be tested; * else if NROUT = 3, PSSYRK will be tested; * else if NROUT = 4, PSSYR2K will be tested; * else if NROUT = 5, PSTRMM will be tested; * else if NROUT = 6, PSTRSM will be tested; * else if NROUT = 7, PSGEADD will be tested; * else if NROUT = 8, PSTRADD will be tested; * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies if the multiplication should be per- * formed from the left or the right. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) REAL array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * PB (local input) REAL array * On entry, PB is an array of dimension (DESCB( LLD_ ),*). This * array contains the local entries of the matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * C (local input/local output) REAL array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) REAL array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) REAL * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular or symmetric matrices. * * WORK (workspace) REAL array * On entry, WORK is an array of dimension LWORK where LWORK is * at least 2*MAX( M, MAX( N, K ) ). This array is used to store * a copy of a column of C and the computed gauges (see PSMMCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on B has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on C has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, IPG, MYCOL, MYROW, NPCOL, NPROW REAL ERR * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_SLASET, PSCHKMIN, PSMMCH, $ PSMMCH1, PSMMCH2, PSMMCH3, PSTRMM, STRSM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE IPG = MAX( M, MAX( N, K ) ) + 1 * IF( NROUT.EQ.1 ) THEN * * Test PSGEMM * * Check the resulting matrix C * CALL PSMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, $ DESCC, WORK, WORK( IPG ), ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSCHKMIN( ERR, M, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PSCHKMIN( ERR, K, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF IF( LSAME( TRANSB, 'N' ) ) THEN CALL PSCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PSCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PSSYMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PSMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, WORK( IPG ), $ ERR, IERR( 3 ) ) ELSE CALL PSMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, WORK( IPG ), $ ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_SLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_SLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PSCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PSCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PSSYRK * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSMMCH1( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, $ WORK, WORK( IPG ), ERR, IERR( 3 ) ) ELSE CALL PSMMCH1( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, IA, $ JA, DESCA, BETA, C, PC, IC, JC, DESCC, WORK, $ WORK( IPG ), ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PSCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.4 ) THEN * * Test PSSYR2K * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSMMCH2( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, WORK( IPG ), ERR, $ IERR( 3 ) ) ELSE CALL PSMMCH2( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, WORK( IPG ), ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PSCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PSCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PSCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PSTRMM * * Check the resulting matrix B * IF( LSAME( SIDE, 'L' ) ) THEN CALL PSMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, C, IB, JB, DESCB, $ ZERO, B, PB, IB, JB, DESCB, WORK, $ WORK( IPG ), ERR, IERR( 2 ) ) ELSE CALL PSMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, $ ALPHA, C, IB, JB, DESCB, A, IA, JA, DESCA, $ ZERO, B, PB, IB, JB, DESCB, WORK, $ WORK( IPG ), ERR, IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PSTRSM * * Check the resulting matrix B * CALL STRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ), $ B( IB+(JB-1)*DESCB( M_ ) ), DESCB( M_ ) ) CALL PSTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, PA, IA, JA, $ DESCA, PB, IB, JB, DESCB ) IF( LSAME( SIDE, 'L' ) ) THEN CALL PSMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, ALPHA, $ A, IA, JA, DESCA, B, IB, JB, DESCB, ZERO, C, $ PB, IB, JB, DESCB, WORK, WORK( IPG ), ERR, $ IERR( 2 ) ) ELSE CALL PSMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, ALPHA, $ B, IB, JB, DESCB, A, IA, JA, DESCA, ZERO, C, $ PB, IB, JB, DESCB, WORK, WORK( IPG ), ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_SLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_SLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PSCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF ELSE IF( NROUT.EQ.7 ) THEN * * Test PSGEADD * * Check the resulting matrix C * CALL PSMMCH3( 'All', TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PSCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PSTRADD * * Check the resulting matrix C * CALL PSMMCH3( UPLO, TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PSCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PSCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'B' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'C' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9997 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PSBLAS3TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/pdblas1tst.f0000640000175000017500000036370710363532303017542 0ustar muammarmuammar PROGRAM PDBLA1TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA1TST is the main testing program for the PBLAS Level 1 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 44 lines: * 'Level 1 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS1TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PDSWAP T put F for no test in the same column * PDSCAL T put F for no test in the same column * PDCOPY T put F for no test in the same column * PDAXPY T put F for no test in the same column * PDDOT T put F for no test in the same column * PDNRM2 T put F for no test in the same column * PDASUM T put F for no test in the same column * PDAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, DBLESZ, TOTMEM, $ MEMSIZ, NSUBS DOUBLE PRECISION PADVAL, ZERO PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, ZERO = 0.0D+0, $ PADVAL = -9923.0D+0, NSUBS = 8 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IGAP, IMBX, IMBY, $ IMIDX, IMIDY, INBX, INBY, INCX, INCY, IPMATX, $ IPMATY, IPOSTX, IPOSTY, IPREX, IPREY, IPW, IPX, $ IPY, IVERB, IX, IXSEED, IY, IYSEED, J, JX, JY, $ K, LDX, LDY, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY, TSKIP, TSTCNT DOUBLE PRECISION ALPHA, PSCLR, PUSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCXR( DLEN_ ), $ DESCY( DLEN_ ), DESCYR( DLEN_ ), IERR( 4 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MXVAL( MAXTESTS ), $ MYVAL( MAXTESTS ), NBXVAL( MAXTESTS ), $ NBYVAL( MAXTESTS ), NVAL( MAXTESTS ), $ NXVAL( MAXTESTS ), NYVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DCHEKPAD, PB_DESCSET2, PB_DFILLPAD, $ PB_PDLAPRNT, PDAMAX, PDASUM, PDAXPY, $ PDBLA1TSTINFO, PDBLAS1TSTCHK, PDBLAS1TSTCHKE, $ PDCHKARG1, PDCHKVOUT, PDCOPY, PDDOT, PDLAGEN, $ PDMPRNT, PDNRM2, PDSCAL, PDSWAP, PDVPRNT, $ PVDESCCHK, PVDIMCHK * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDSWAP ', 'PDSCAL ', 'PDCOPY ', $ 'PDAXPY ', 'PDDOT ', 'PDNRM2 ', $ 'PDASUM ', 'PDAMAX '/ DATA YCHECK/.TRUE., .FALSE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE., .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler will abort on errors. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA1TSTINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, SOF, TEE, IAM, IGAP, IVERB, $ NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PDBLAS1TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( IPREX+1 ). * IPX = IPREX + 1 IPY = IPX + DESCX( LLD_ ) * NQX + IPOSTX + IPREY IPMATX = IPY + DESCY( LLD_ ) * NQY + IPOSTY IPMATY = IPMATX + MX * NX IPW = IPMATY + MY * NY * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPW - 1 + $ MAX( MAX( IMBX, MBX ), MAX( IMBY, MBY ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9989 ) SNAMES( K ) END IF * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices X and Y * CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) END IF * * Pad the guard zones of X, and Y * CALL PB_DFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_DFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT only args. * INFO = 0 CALL PDCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * INFO = 0 PSCLR = ZERO PUSCLR = ZERO PISCLR = 0 * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PDLAPRNT( 1, N, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PDLAPRNT( N, 1, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) END IF IF( YCHECK( K ) ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PDLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PDLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) IF( YCHECK( K ) ) $ CALL PB_PDLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PDSWAP * CALL PDSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PDSCAL * PSCLR = ALPHA CALL PDSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.3 ) THEN * * Test PDCOPY * CALL PDCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.4 ) THEN * * Test PDAXPY * PSCLR = ALPHA CALL PDAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.5 ) THEN * * Test PDDOT * CALL PDDOT( N, PSCLR, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.6 ) THEN * * Test PDNRM2 * CALL PDNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.7 ) THEN * * Test PDASUM * CALL PDASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.8 ) THEN * CALL PDAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9978 ) INFO GO TO 30 END IF * * Check the computations * CALL PDBLAS1TSTCHK( ICTXT, NOUT, K, N, PSCLR, PUSCLR, $ PISCLR, MEM( IPMATX ), MEM( IPX ), $ IX, JX, DESCX, INCX, MEM( IPMATY ), $ MEM( IPY ), IY, JY, DESCY, INCY, $ INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 END IF * * Check padding * CALL PB_DCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), $ IPREX, IPOSTX, PADVAL ) IF( YCHECK( K ) ) THEN CALL PB_DCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check input-only scalar arguments * INFO = 1 CALL PDCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * * Check input-only array arguments * CALL PDCHKVOUT( N, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_X', SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PDCHKVOUT( N, MEM( IPMATY ), MEM( IPY ), IY, JY, $ DESCY, INCY, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE. 0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 3 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PDLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PDVPRNT( ICTXT, NOUT, N, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PDLAPRNT( 1, N, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PDLAPRNT( N, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PDLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PDVPRNT( ICTXT, NOUT, N, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PDLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PDLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9984 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9989 FORMAT( 2X, ' Tested Subroutine: ', A ) 9988 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9987 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9986 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9985 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9984 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9983 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9982 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9981 FORMAT( 2X, 'Testing Summary') 9980 FORMAT( 2X, 'End of Tests.' ) 9979 FORMAT( 2X, 'Tests started.' ) 9978 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PDBLA1TST * END SUBROUTINE PDBLA1TSTINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, $ NXVAL, IMBXVAL, MBXVAL, INBXVAL, $ NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, $ LDVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, LTEST, SOF, TEE, IAM, IGAP, $ IVERB, NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, $ NGRIDS, NMAT, NOUT, NPROCS DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA1TSTINFO get the needed startup information for testing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS1TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 1 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real double precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9982 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) * * End of PDBLA1TSTINFO * END SUBROUTINE PDBLAS1TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PDBLAS1TSTCHKE tests the error exits of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 8 (NSUBS). * If LTEST( 1 ) is .TRUE., PDSWAP will be tested; * If LTEST( 2 ) is .TRUE., PDSCAL will be tested; * If LTEST( 3 ) is .TRUE., PDCOPY will be tested; * If LTEST( 4 ) is .TRUE., PDAXPY will be tested; * If LTEST( 5 ) is .TRUE., PDDOT will be tested; * If LTEST( 6 ) is .TRUE., PDNRM2 will be tested; * If LTEST( 7 ) is .TRUE., PDASUM will be tested; * If LTEST( 8 ) is .TRUE., PDAMAX will be tested. * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PDAMAX, PDASUM, PDAXPY, PDCOPY, $ PDDIMEE, PDDOT, PDNRM2, PDSCAL, PDSWAP, $ PDVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/11, 12, 11, 13, 13, 15, 15, 14/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PDSWAP * I = 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDSWAP, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDSWAP, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDSCAL, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PDCOPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDCOPY, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDCOPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PDAXPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDAXPY, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDAXPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PDDOT * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDDOT, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDDOT, SCODE( I ), SNAMES( I ) ) END IF * * Test PDNRM2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDNRM2, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDNRM2, SCODE( I ), SNAMES( I ) ) END IF * * Test PDASUM * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDASUM, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDASUM, SCODE( I ), SNAMES( I ) ) END IF * * Test PDAMAX * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDAMAX, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDAMAX, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PDBLAS1TSTCHKE * END SUBROUTINE PDCHKARG1( ICTXT, NOUT, SNAME, N, ALPHA, IX, JX, $ DESCX, INCX, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PDCHKARG1 checks the input-only arguments of the Level 1 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, INCXREF, INCYREF, IXREF, IYREF, JXREF, $ JYREF, MYCOL, MYROW, NPCOL, NPROW, NREF DOUBLE PRECISION ALPHAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCXREF( DLEN_ ), DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * NREF = N IXREF = IX JXREF = JX DO 10 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 10 CONTINUE INCXREF = INCX IYREF = IY JYREF = JY DO 20 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 20 CONTINUE INCYREF = INCY ALPHAREF = ALPHA * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.GT.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PDCHKARG1 * END LOGICAL FUNCTION PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PISINSCOPE returns .TRUE. if the calling process is in the scope of * sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) and .FALSE. if it is * not. This routine is used to determine which processes should check * the answer returned by some Level 1 PBLAS routines. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * N (global input) INTEGER * The length of the subvector sub( X ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER IIX, IXCOL, IXROW, JJX, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN * * This is the special case, find process owner of IX, JX, and * only this process is the scope. * PISINSCOPE = ( ( IXROW.EQ.MYROW .OR. ROWREP ) .AND. $ ( IXCOL.EQ.MYCOL .OR. COLREP ) ) * ELSE * IF( INCX.EQ.DESCX( M_ ) ) THEN * * row vector * PISINSCOPE = ( MYROW.EQ.IXROW .OR. ROWREP ) * ELSE * * column vector * PISINSCOPE = ( MYCOL.EQ.IXCOL .OR. COLREP ) * END IF * END IF * RETURN * * End of PISINSCOPE * END SUBROUTINE PDBLAS1TSTCHK( ICTXT, NOUT, NROUT, N, PSCLR, PUSCLR, $ PISCLR, X, PX, IX, JX, DESCX, INCX, Y, $ PY, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT, NROUT, PISCLR DOUBLE PRECISION PSCLR, PUSCLR * .. * .. Array Arguments .. INTEGER DESCX( * ), DESCY( * ) DOUBLE PRECISION PX( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PDBLAS1TSTCHK performs the computational tests of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PDSWAP will be tested; * else if NROUT = 2, PDSCAL will be tested; * else if NROUT = 3, PDCOPY will be tested; * else if NROUT = 4, PDAXPY will be tested; * else if NROUT = 5, PDDOT will be tested; * else if NROUT = 6, PDNRM2 will be tested; * else if NROUT = 7, PDASUM will be tested; * else if NROUT = 8, PDAMAX will be tested. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * PSCLR (global input) DOUBLE PRECISION * On entry, depending on the value of NROUT, PSCLR specifies * the scalar ALPHA, or the output scalar returned by the PBLAS, * i.e., the dot product, the 2-norm, the absolute sum or the * value of AMAX. * * PUSCLR (global input) DOUBLE PRECISION * On entry, PUSCLR specifies the real part of the scalar ALPHA * used by the real scaling, the 2-norm, or the absolute sum * routines. PUSCLR is not used in the real versions of this * routine. * * PISCLR (global input) DOUBLE PRECISION * On entry, PISCLR specifies the value of the global index re- * turned by PDAMAX, otherwise PISCLR is not used. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) DOUBLE PRECISION array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) DOUBLE PRECISION array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, INXSCOPE, INYSCOPE, ROWREP INTEGER I, IB, ICURCOL, ICURROW, IDUMM, IIX, IIY, IN, $ IOFFX, IOFFY, ISCLR, IXCOL, IXROW, IYCOL, $ IYROW, J, JB, JJX, JJY, JN, KK, LDX, LDY, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, ERRMAX, PREC, SCLR, USCLR * .. * .. Local Arrays .. INTEGER IERR( 6 ) CHARACTER*5 ARGIN1, ARGIN2, ARGOUT1, ARGOUT2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DSWAP, IGAMX2D, $ PB_INFOG2L, PDCHKVIN, PDERRASUM, PDERRAXPY, $ PDERRDOT, PDERRNRM2, PDERRSCAL * .. * .. External Functions .. LOGICAL PISINSCOPE INTEGER IDAMAX DOUBLE PRECISION PDLAMCH EXTERNAL IDAMAX, PDLAMCH, PISINSCOPE * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ARGIN1 = ' ' ARGIN2 = ' ' ARGOUT1 = ' ' ARGOUT2 = ' ' DO 10 I = 1, 6 IERR( I ) = 0 10 CONTINUE * PREC = PDLAMCH( ICTXT, 'precision' ) * IF( NROUT.EQ.1 ) THEN * * Test PDSWAP * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL DSWAP( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PDCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.2 ) THEN * * Test PDSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 20 J = JX, JN * CALL PDERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 20 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 40 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 30 KK = 0, JB-1 * CALL PDERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 30 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 50 I = IX, IN * CALL PDERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 70 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 60 KK = 0, IB-1 * CALL PDERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 60 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 70 CONTINUE * END IF * ELSE IF( NROUT.EQ.3 ) THEN * * Test PDCOPY * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL DCOPY( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PDCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PDAXPY * CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) LDY = DESCY( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, $ IIY, JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, N ) JN = JY + JB - 1 * DO 140 J = JY, JN * CALL PDERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 140 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 160 J = JN+1, JY+N-1, DESCY( NB_ ) JB = MIN( JY+N-J, DESCY( NB_ ) ) * DO 150 KK = 0, JB-1 * CALL PDERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 150 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 160 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, N ) IN = IY + IB - 1 * DO 170 I = IY, IN * CALL PDERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 170 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 190 I = IN+1, IY+N-1, DESCY( MB_ ) IB = MIN( IY+N-I, DESCY( MB_ ) ) * DO 180 KK = 0, IB-1 * CALL PDERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 180 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 190 CONTINUE * END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PDDOT * CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PDCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PDERRDOT( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOT' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOT' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PDNRM2 * CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PDERRNRM2( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF ELSE USCLR = ZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PDASUM * CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PDERRASUM( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ) .GT. ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF ELSE USCLR = ZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PDAMAX * CALL PDCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN ISCLR = IDAMAX( N, X( IOFFX ), INCX ) IF( N.LT.1 ) THEN SCLR = ZERO ELSE IF( ( INCX.EQ.1 ).AND.( DESCX( M_ ).EQ.1 ).AND. $ ( N.EQ.1 ) ) THEN ISCLR = JX SCLR = X( IOFFX ) ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN ISCLR = JX + ISCLR - 1 SCLR = X( IX + ( ISCLR - 1 ) * DESCX( M_ ) ) ELSE ISCLR = IX + ISCLR - 1 SCLR = X( ISCLR + ( JX - 1 ) * DESCX( M_ ) ) END IF * IF( PSCLR.NE.SCLR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF * IF( PISCLR.NE.ISCLR ) THEN IERR( 5 ) = 1 WRITE( ARGIN2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF ELSE ISCLR = 0 SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF IF( PISCLR.NE.ISCLR ) THEN IERR( 6 ) = 1 WRITE( ARGOUT2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF END IF * END IF * * Find IERR across all processes * CALL IGAMX2D( ICTXT, 'All', ' ', 6, 1, IERR, 6, IDUMM, IDUMM, -1, $ -1, 0 ) * * Encode the errors found in INFO * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'X' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Y' END IF * IF( IERR( 3 ).NE.0 ) $ INFO = INFO + 4 * IF( IERR( 4 ).NE.0 ) $ INFO = INFO + 8 * IF( IERR( 5 ).NE.0 ) $ INFO = INFO + 16 * IF( IERR( 6 ).NE.0 ) $ INFO = INFO + 32 * 9999 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' in scope is incorrect.' ) 9997 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' out of scope is incorrect.' ) 9996 FORMAT( 2X, ' ***** Expected value is: ', D30.18, /2X, $ ' Obtained value is: ', D30.18 ) 9995 FORMAT( 2X, ' ***** Expected value is: ', I6, /2X, $ ' Obtained value is: ', I6 ) * RETURN * * End of PDBLAS1TSTCHK * END SUBROUTINE PDERRDOT( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N DOUBLE PRECISION ERRBND, PREC, SCLR * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * PDERRDOT serially computes the dot product X**T * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) DOUBLE PRECISION * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) DOUBLE PRECISION array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) DOUBLE PRECISION array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY DOUBLE PRECISION ADDBND, FACT, SUMNEG, SUMPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMPOS = ZERO SUMNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N TMP = X( IX ) * Y( IY ) SCLR = SCLR + TMP IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF IX = IX + INCX IY = IY + INCY 10 CONTINUE * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PDERRDOT * END SUBROUTINE PDERRNRM2( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ERRBND, PREC, USCLR * .. * .. Array Arguments .. DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * PDERRNRM2 serially computes the 2-norm the vector X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If norm1 = SCLR and norm2 are two different computed results, and * norm1 being assumed to be correct, we require * * abs( norm1 - norm2 ) <= ERRBND = ERRFACT * abs( norm1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) DOUBLE PRECISION * On exit, USCLR specifies the 2-norm of the vector X. * * X (global input) DOUBLE PRECISION array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ABSXI, ADDBND, FACT, SCALE, SSQ, SUMSCA, SUMSSQ * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * USCLR = ZERO SUMSSQ = ONE SUMSCA = ZERO ADDBND = TWO * TWO * TWO * PREC FACT = ONE + TWO * ( ( ONE + PREC )**3 - ONE ) * SCALE = ZERO SSQ = ONE DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX IF( X( IX ).NE.ZERO ) THEN ABSXI = ABS( X( IX ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE * USCLR = SCALE * SQRT( SSQ ) * * Error on square root * ERRBND = SQRT( SUMSSQ ) * ( ONE + TWO * ( 1.00001D+0 * PREC ) ) * ERRBND = ( SUMSCA * ERRBND ) - USCLR * RETURN * * End of PDERRNRM2 * END SUBROUTINE PDERRASUM( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ERRBND, PREC, USCLR * .. * .. Array Arguments .. DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * PDERRASUM serially computes the sum of absolute values of the vector * X and returns a scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies a scaled relative acceptable error * bound. In this case the error bound is just the absolute sum * multiplied by a constant proportional to the machine preci- * sion. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) DOUBLE PRECISION * On exit, USCLR specifies the sum of absolute values of the * vector X. * * X (global input) DOUBLE PRECISION array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION TWO, ZERO PARAMETER ( TWO = 2.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ADDBND * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * IX = 1 USCLR = ZERO ADDBND = TWO * TWO * TWO * PREC * DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX USCLR = USCLR + ABS( X( IX ) ) 10 CONTINUE * ERRBND = ADDBND * USCLR * RETURN * * End of PDERRASUM * END SUBROUTINE PDERRSCAL( ERRBND, PSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC, PSCLR, X * .. * * Purpose * ======= * * PDERRSCAL serially computes the product PSCLR * X and returns a sca- * led relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) DOUBLE PRECISION * On entry, PSCLR specifies the scale factor. * * X (global input/global output) DOUBLE PRECISION * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION TWO PARAMETER ( TWO = 2.0D+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * X = PSCLR * X * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PDERRSCAL * END SUBROUTINE PDERRAXPY( ERRBND, PSCLR, X, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC, PSCLR, X, Y * .. * * Purpose * ======= * * PDERRAXPY serially computes Y := Y + PSCLR * X and returns a scaled * relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) DOUBLE PRECISION * On entry, PSCLR specifies the scale factor. * * X (global input) DOUBLE PRECISION * On entry, X specifies the scalar to be scaled. * * Y (global input/global output) DOUBLE PRECISION * On entry, Y specifies the scalar to be added. On exit, Y con- * tains the resulting scalar. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION ADDBND, FACT, SUMPOS, SUMNEG, TMP * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * SUMPOS = ZERO SUMNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = PSCLR * X IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * TMP = Y IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP ELSE SUMNEG = SUMNEG - TMP END IF * Y = Y + ( PSCLR * X ) * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PDERRAXPY * END scalapack-1.8.0/PBLAS/TESTING/pcblas2tst.f0000640000175000017500000035372710363532303017543 0ustar muammarmuammar PROGRAM PCBLA2TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA2TST is the main testing program for the PBLAS Level 2 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 61 lines: * 'Level 2 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS2TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * (1.0E0, 0.0E0) value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PCGEMV T put F for no test in the same column * PCHEMV T put F for no test in the same column * PCTRMV T put F for no test in the same column * PCTRSV T put F for no test in the same column * PCGERU T put F for no test in the same column * PCGERC T put F for no test in the same column * PCHER T put F for no test in the same column * PCHER2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, CPLXSZ, TOTMEM, $ MEMSIZ, NSUBS, REALSZ COMPLEX ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ CPLXSZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, REALSZ = 4, $ ONE = ( 1.0E+0, 0.0E+0 ), $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ROGUE = ( -1.0E+10, 1.0E+10 ), $ ZERO = ( 0.0E+0, 0.0E+0 ), NSUBS = 8 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IGAP, IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, $ INBA, INBX, INBY, INCX, INCY, IPA, IPG, IPMATA, $ IPMATX, IPMATY, IPOSTA, IPOSTX, IPOSTY, IPREA, $ IPREX, IPREY, IPX, IPY, IVERB, IX, IXSEED, IY, $ IYSEED, J, JA, JX, JY, K, LDA, LDX, LDY, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY, TSKIP, TSTCNT REAL THRESH COMPLEX ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCX( DLEN_ ), $ DESCXR( DLEN_ ), DESCY( DLEN_ ), $ DESCYR( DLEN_ ), IAVAL( MAXTESTS ), IERR( 6 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_CCHEKPAD, PB_CFILLPAD, PB_CLASCAL, $ PB_CLASET, PB_DESCSET2, PB_PCLAPRNT, $ PCBLA2TSTINFO, PCBLAS2TSTCHK, PCBLAS2TSTCHKE, $ PCCHKARG2, PCCHKVOUT, PCGEMV, PCGERC, PCGERU, $ PCHEMV, PCHER, PCHER2, PCIPSET, PCLAGEN, $ PCLASCAL, PCLASET, PCMPRNT, PCTRMV, PCTRSV, $ PCVPRNT, PMDESCCHK, PMDIMCHK, PVDESCCHK, $ PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_FCEIL EXTERNAL PB_FCEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCGEMV ', 'PCHEMV ', 'PCTRMV ', $ 'PCTRSV ', 'PCGERU ', 'PCGERC ', $ 'PCHER ', 'PCHER2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA2TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, MAXTESTS, NGRIDS, $ PVAL, MAXGRIDS, QVAL, MAXGRIDS, NBLOG, LTEST, $ SOF, TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PCBLAS2TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) INBA = INBAVAL( J ) MBA = MBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) INBX = INBXVAL( J ) MBX = MBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) INBY = INBYVAL( J ) MBY = MBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPX = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREX IPY = IPX + DESCX( LLD_ )*NQX + IPOSTX + IPREY IPMATA = IPY + DESCY( LLD_ )*NQY + IPOSTY IPMATX = IPMATA + MA*NA IPMATY = IPMATX + MX*NX IPG = IPMATY + MAX( MX*NX, MY*NY ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPG + PB_FCEIL( REAL( MAX( M, N ) ) * $ REAL( REALSZ ), REAL( CPLXSZ ) ) - 1 + $ MAX( MAX( IMBA, MBA ), $ MAX( MAX( IMBX, MBX ), $ MAX( IMBY, MBY ) ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) SNAMES( K ) END IF * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 .OR. K.EQ.6 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ) THEN AFORM = 'H' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PCLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PCLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN * CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * ELSE * * If Y is not needed, generate a copy of X instead * CALL PB_DESCSET2( DESCYR, MX, NX, IMBX, INBX, MBX, $ NBX, -1, -1, ICTXT, MAX( 1, MX ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCYR, IXSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A * IF( ( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PCLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) IF( K.NE.2 ) THEN CALL PB_CLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PCLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) IF( K.NE.2 ) THEN CALL PB_CLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( K.EQ.3 .OR. K.EQ.4 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PCLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_CLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PCLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_CLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) CALL PCLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_CLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PCLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_CLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PCLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_CLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) CALL PCLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_CLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * END IF * * Pad the guard zones of A, X and Y * CALL PB_CFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * CALL PB_CFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_CFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PCCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PCLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_INITIAL_A', $ NOUT, MEM( IPG ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, 0, $ 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPG ) ) END IF * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PCLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PCLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF * IF( YCHECK( K ) ) THEN IF( IVERB.EQ.2 ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PCLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PCLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF END IF * * Call the Level 2 PBLAS routine * INFO = 0 IF( K.EQ.1 ) THEN * * Test PCGEMV * CALL PCGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PCHEMV * CALL PCIPSET( 'Bignum', N, MEM( IPA ), IA, JA, DESCA ) * CALL PCHEMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * CALL PCIPSET( 'Zero', N, MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.3 ) THEN * * Test PCTRMV * CALL PCTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PCTRSV * CALL PCTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.5 ) THEN * * Test PCGERU * CALL PCGERU( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.6 ) THEN * * Test PCGERC * CALL PCGERC( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.7 ) THEN * * Test PCHER * IF( CMPLX( REAL( ALPHA ) ).NE.ZERO ) $ CALL PCIPSET( 'Bignum', N, MEM( IPA ), IA, JA, $ DESCA ) * CALL PCHER( UPLO, N, REAL( ALPHA ), MEM( IPX ), IX, $ JX, DESCX, INCX, MEM( IPA ), IA, JA, $ DESCA ) * ELSE IF( K.EQ.8 ) THEN * * Test PCHER2 * IF( ALPHA.NE.ZERO ) $ CALL PCIPSET( 'Bignum', N, MEM( IPA ), IA, JA, $ DESCA ) * CALL PCHER2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_CCHEKPAD( ICTXT, SNAMES( K ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), IPREA, $ IPOSTA, PADVAL ) * CALL PB_CCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), IPREX, $ IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_CCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check the computations * CALL PCBLAS2TSTCHK( ICTXT, NOUT, K, UPLO, TRANS, DIAG, M, $ N, ALPHA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, MEM( IPMATX ), $ MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, THRESH, ROGUE, $ MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PCCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Check input-only array arguments * CALL PCCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, IERR( 4 ) ) CALL PCCHKVOUT( NLX, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 5 ) ) * IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_A', $ SNAMES( K ) END IF * IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_X', $ SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PCCHKVOUT( NLY, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PCLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PCMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PCLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', $ NOUT, MEM( IPMATA ) ) END IF IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PCLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( NLX.GT.0 ) $ CALL PCVPRNT( ICTXT, NOUT, NLX, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PCLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PCLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PCLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( NLY.GT.0 ) $ CALL PCVPRNT( ICTXT, NOUT, NLY, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PCLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PCLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9980 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, ' Tested Subroutine: ', A ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9983 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9982 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9981 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9980 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9979 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9978 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9977 FORMAT( 2X, 'Testing Summary') 9976 FORMAT( 2X, 'End of Tests.' ) 9975 FORMAT( 2X, 'Tests started.' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PCBLA2TST * END SUBROUTINE PCBLA2TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, SOF, TEE, $ IAM, IGAP, IVERB, NPROCS, THRESH, ALPHA, $ BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA2TSTINFO get the needed startup information for testing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D *ype real dble cplx zplx * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS2TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 2 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex single precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9981 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PCBLA2TSTINFO * END SUBROUTINE PCBLAS2TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PCBLAS2TSTCHKE tests the error exits of the Level 2 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 8 (NSUBS). * If LTEST( 1 ) is .TRUE., PCGEMV will be tested; * If LTEST( 2 ) is .TRUE., PCHEMV will be tested; * If LTEST( 3 ) is .TRUE., PCTRMV will be tested; * If LTEST( 4 ) is .TRUE., PCTRSV will be tested; * If LTEST( 5 ) is .TRUE., PCGERU will be tested; * If LTEST( 6 ) is .TRUE., PCGERC will be tested; * If LTEST( 7 ) is .TRUE., PCHER will be tested; * If LTEST( 8 ) is .TRUE., PCHER2 will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PCDIMEE, PCGEMV, PCGERC, $ PCGERU, PCHEMV, PCHER, PCHER2, PCMATEE, $ PCOPTEE, PCTRMV, PCTRSV, PCVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/21, 22, 23, 23, 24, 24, 26, 27/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PCGEMV * I = 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCGEMV, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCGEMV, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCGEMV, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCGEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHEMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHEMV, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHEMV, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHEMV, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCHEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PCTRMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCTRMV, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCTRMV, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCTRMV, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCTRMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PCTRSV * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCTRSV, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCTRSV, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCTRSV, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCTRSV, SCODE( I ), SNAMES( I ) ) END IF * * Test PCGERU * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCGERU, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCGERU, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCGERU, SCODE( I ), SNAMES( I ) ) END IF * * Test PCGERC * I = I + 1 IF( LTEST( I ) ) THEN CALL PCDIMEE( ICTXT, NOUT, PCGERC, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCGERC, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCGERC, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHER * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHER, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHER, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCHER, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHER, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHER2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHER2, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHER2, SCODE( I ), SNAMES( I ) ) CALL PCVECEE( ICTXT, NOUT, PCHER2, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHER2, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PCBLAS2TSTCHKE * END SUBROUTINE PCCHKARG2( ICTXT, NOUT, SNAME, UPLO, TRANS, DIAG, M, $ N, ALPHA, IA, JA, DESCA, IX, JX, DESCX, $ INCX, BETA, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCA( * ), DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PCCHKARG2 checks the input-only arguments of the Level 2 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 2 PBLAS * operation. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the TRANS option in the Level 2 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 2 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, TRANSREF, UPLOREF INTEGER I, IAREF, INCXREF, INCYREF, IXREF, IYREF, $ JAREF, JXREF, JYREF, MREF, MYCOL, MYROW, NPCOL, $ NPROW, NREF COMPLEX ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCXREF( DLEN_ ), $ DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG TRANSREF = TRANS UPLOREF = UPLO MREF = M NREF = N ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IXREF = IX JXREF = JX DO 20 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 20 CONTINUE INCXREF = INCX BETAREF = BETA IYREF = IY JYREF = JY DO 30 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 30 CONTINUE INCYREF = INCY * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( TRANS, TRANSREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANS' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PCCHKARG2 * END SUBROUTINE PCBLAS2TSTCHK( ICTXT, NOUT, NROUT, UPLO, TRANS, DIAG, $ M, N, ALPHA, A, PA, IA, JA, DESCA, X, $ PX, IX, JX, DESCX, INCX, BETA, Y, PY, $ IY, JY, DESCY, INCY, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT, NROUT REAL THRESH COMPLEX ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL WORK( * ) COMPLEX A( * ), PA( * ), PX( * ), PY( * ), X( * ), $ Y( * ) * .. * * Purpose * ======= * * PCBLAS2TSTCHK performs the computational tests of the Level 2 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PCGEMV will be tested; * else if NROUT = 2, PCHEMV will be tested; * else if NROUT = 3, PCTRMV will be tested; * else if NROUT = 4, PCTRSV will be tested; * else if NROUT = 5, PCGERU will be tested; * else if NROUT = 6, PCGERC will be tested; * else if NROUT = 7, PCHER will be tested; * else if NROUT = 8, PCHER2 will be tested; * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies if the matrix operand A is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of A. * * N (global input) INTEGER * On entry, N specifies the number of columns of A. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) COMPLEX * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular, symmetric or Hermitian ma- * trices. * * WORK (workspace) REAL array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, N ). This array is used to store the compu- * ted gauges (see PCMVCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL RZERO PARAMETER ( RZERO = 0.0E+0 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW REAL ERR COMPLEX ALPHA1 * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CTRSV, PB_CLASET, PCCHKMIN, $ PCCHKVIN, PCMVCH, PCTRMV, PCVMCH, PCVMCH2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, REAL * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PCGEMV * * Check the resulting vector Y * CALL PCMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, $ IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, $ INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PCCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) IF( LSAME( TRANS, 'N' ) ) THEN CALL PCCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) ELSE CALL PCCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PCHEMV * * Check the resulting vector Y * CALL PCMVCH( ICTXT, 'No transpose', N, N, ALPHA, A, IA, JA, $ DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, $ JY, DESCY, INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PCCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PCTRMV * * Check the resulting vector X * CALL PCMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, Y, IX, $ JX, DESCX, INCX, ZERO, X, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PCTRSV * * Check the resulting vector X * CALL CTRSV( UPLO, TRANS, DIAG, N, A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ), X( IX+(JX-1)*DESCX( M_ ) ), INCX ) CALL PCTRMV( UPLO, TRANS, DIAG, N, PA, IA, JA, DESCA, PX, IX, $ JX, DESCX, INCX ) CALL PCMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, ZERO, Y, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PCGERU * * Check the resulting matrix A * CALL PCVMCH( ICTXT, 'No transpose', 'Ge', M, N, ALPHA, X, IX, $ JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, $ IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PCCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PCCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.6 ) THEN * * Test PCGERC * * Check the resulting matrix A * CALL PCVMCH( ICTXT, 'Conjugate transpose', 'Ge', M, N, ALPHA, $ X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, $ A, PA, IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PCCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PCCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.7 ) THEN * * Test PCHER * * Check the resulting matrix A * ALPHA1 = CMPLX( REAL( ALPHA ), RZERO ) CALL PCVMCH( ICTXT, 'Conjugate transpose', UPLO, N, N, ALPHA1, $ X, IX, JX, DESCX, INCX, X, IX, JX, DESCX, INCX, A, $ PA, IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PCCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.8 ) THEN * * Test PCHER2 * * Check the resulting matrix A * CALL PCVMCH2( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, INCX, $ Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, $ WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PCCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PCCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'X' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'Y' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9997 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9996 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PCBLAS2TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/pdblas3tst.f0000640000175000017500000037514210363532303017540 0ustar muammarmuammar PROGRAM PDBLA3TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA3TST is the main testing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * * from the following 61 lines: * 'Level 3 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS3TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 1.0D0 value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PDGEMM T put F for no test in the same column * PDSYMM T put F for no test in the same column * PDSYRK T put F for no test in the same column * PDSYR2K T put F for no test in the same column * PDTRMM T put F for no test in the same column * PDTRSM T put F for no test in the same column * PDGEADD T put F for no test in the same column * PDTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, DBLESZ, TOTMEM, $ MEMSIZ, NSUBS DOUBLE PRECISION ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, ZERO = 0.0D+0, $ ONE = 1.0D+0, PADVAL = -9923.0D+0, $ NSUBS = 8, ROGUE = -1.0D+10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IGAP, IMBA, IMBB, $ IMBC, IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, $ IPA, IPB, IPC, IPG, IPMATA, IPMATB, IPMATC, $ IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, IPREC, $ IPW, IVERB, J, JA, JB, JC, K, L, LDA, LDB, LDC, $ M, MA, MB, MBA, MBB, MBC, MC, MEMREQD, MPA, $ MPB, MPC, MYCOL, MYROW, N, NA, NB, NBA, NBB, $ NBC, NC, NCOLA, NCOLB, NCOLC, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQA, NQB, NQC, NROWA, $ NROWB, NROWC, NTESTS, OFFDA, OFFDC, RSRCA, $ RSRCB, RSRCC, TSKIP, TSTCNT REAL THRESH DOUBLE PRECISION ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL BCHECK( NSUBS ), CCHECK( NSUBS ), $ LTEST( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCB( DLEN_ ), $ DESCBR( DLEN_ ), DESCC( DLEN_ ), $ DESCCR( DLEN_ ), IAVAL( MAXTESTS ), $ IBVAL( MAXTESTS ), ICVAL( MAXTESTS ), $ IERR( 6 ), IMBAVAL( MAXTESTS ), $ IMBBVAL( MAXTESTS ), IMBCVAL( MAXTESTS ), $ INBAVAL( MAXTESTS ), INBBVAL( MAXTESTS ), $ INBCVAL( MAXTESTS ), JAVAL( MAXTESTS ), $ JBVAL( MAXTESTS ), JCVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DCHEKPAD, PB_DESCSET2, PB_DFILLPAD, $ PB_DLASCAL, PB_DLASET, PB_PDLAPRNT, $ PDBLA3TSTINFO, PDBLAS3TSTCHK, PDBLAS3TSTCHKE, $ PDCHKARG3, PDCHKMOUT, PDGEADD, PDGEMM, PDLAGEN, $ PDLASCAL, PDLASET, PDMPRNT, PDSYMM, PDSYR2K, $ PDSYRK, PDTRADD, PDTRMM, PDTRSM, PMDESCCHK, $ PMDIMCHK * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MOD * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDGEMM ', 'PDSYMM ', 'PDSYRK ', $ 'PDSYR2K', 'PDTRMM ', 'PDTRSM ', $ 'PDGEADD', 'PDTRADD'/ DATA BCHECK/.TRUE., .TRUE., .FALSE., .TRUE., .TRUE., $ .TRUE., .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, * so that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA3TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, SOF, TEE, IAM, $ IGAP, IVERB, NPROCS, THRESH, ALPHA, BETA, $ MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PDBLAS3TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * TSTCNT = TSTCNT + 1 * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, IGAP, GAPMUL, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, IGAP, GAPMUL, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDB = MAX( 1, MB ) LDC = MAX( 1, MC ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREB IPC = IPB + DESCB( LLD_ )*NQB + IPOSTB + IPREC IPMATA = IPC + DESCC( LLD_ )*NQC + IPOSTC IPMATB = IPMATA + MA*NA IPMATC = IPMATB + MB*NB IPG = IPMATC + MAX( MB*NB, MC*NC ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * IPW = IPG + 2*MAX( M, MAX( N, K ) ) MEMREQD = IPW - 1 + MAX( MAX( MAX( IMBA, MBA ), $ MAX( IMBB, MBB ) ), $ MAX( IMBC, MBC ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) SNAMES( L ) END IF * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PDGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF * ELSE IF( L.EQ.2 ) THEN * * PDSYMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF * ELSE IF( L.EQ.3 ) THEN * * PDSYRK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 * ELSE IF( L.EQ.4 ) THEN * * PDSYR2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF * ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 * ELSE IF( L.EQ.7 .OR. L.EQ.8 ) THEN * * PDGEADD, PDTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PDSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 .OR. L.EQ.4 ) THEN * * PDSYRK, PDSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( ( L.EQ.6 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PDTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PDLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) * IF( BCHECK( L ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PDLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PDLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) * IF( BCHECK( L ) ) THEN CALL PB_DESCSET2( DESCBR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCBR, IBSEED, MEM( IPMATB ), $ DESCBR( LLD_ ) ) END IF * IF( CCHECK( L ) ) THEN * CALL PB_DESCSET2( DESCCR, MC, NC, IMBC, INBC, MBC, $ NBC, -1, -1, ICTXT, MAX( 1, MC ) ) CALL PDLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCCR, ICSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * ELSE * * If C is not needed, generate a copy of B instead * CALL PB_DESCSET2( DESCCR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCCR, IBSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A, B, C * IF( ( L.EQ.2 ).AND.( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PDLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PDLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) * END IF * ELSE IF( ( ( L.EQ.3 ).OR.( L.EQ.4 ) ).AND. $ ( MAX( NROWC, NCOLC ).GT.1 ) ) THEN * * The distributed matrix C is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PDLASET( 'Upper', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC, JC+1, $ DESCC ) CALL PB_DLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PDLASET( 'Lower', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC+1, JC, $ DESCC ) CALL PB_DLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * ELSE IF( L.EQ.5 .OR. L.EQ.6 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PDLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_DLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PDLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_DLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.6 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) CALL PDLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_DLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PDLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_DLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PDLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_DLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.6 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) CALL PDLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_DLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * ELSE IF( L.EQ.8 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix C is lower triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PDLASET( 'Upper', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC, $ JC+1, DESCC ) CALL PB_DLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix C is upper triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PDLASET( 'Lower', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC+1, $ JC, DESCC ) CALL PB_DLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * END IF * * Pad the guard zones of A, B and C * CALL PB_DFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_DFILLPAD( ICTXT, MPB, NQB, MEM( IPB-IPREB ), $ DESCB( LLD_ ), IPREB, IPOSTB, $ PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_DFILLPAD( ICTXT, MPC, NQC, MEM( IPC-IPREC ), $ DESCC( LLD_ ), IPREC, IPOSTC, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PDCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PDLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, $ 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) END IF * IF( BCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PDLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, JB, $ DESCB, 0, 0, $ 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MB, NB, MEM( IPB ), 1, 1, DESCB, $ 0, 0, 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) END IF END IF * IF( CCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PDLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, JC, $ DESCC, 0, 0, $ 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MC, NC, MEM( IPC ), 1, 1, DESCC, $ 0, 0, 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) END IF END IF * * Call the Level 3 PBLAS routine * INFO = 0 IF( L.EQ.1 ) THEN * * Test PDGEMM * CALL PDGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.2 ) THEN * * Test PDSYMM * CALL PDSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.3 ) THEN * * Test PDSYRK * CALL PDSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.4 ) THEN * * Test PDSYR2K * CALL PDSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.5 ) THEN * * Test PDTRMM * CALL PDTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * ELSE IF( L.EQ.6 ) THEN * * Test PDTRSM * CALL PDTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * * ELSE IF( L.EQ.7 ) THEN * * Test PDGEADD * CALL PDGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.8 ) THEN * * Test PDTRADD * CALL PDTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_DCHEKPAD( ICTXT, SNAMES( L ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), $ IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_DCHEKPAD( ICTXT, SNAMES( L ), MPB, NQB, $ MEM( IPB-IPREB ), DESCB( LLD_ ), $ IPREB, IPOSTB, PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_DCHEKPAD( ICTXT, SNAMES( L ), MPC, NQC, $ MEM( IPC-IPREC ), DESCC( LLD_ ), $ IPREC, IPOSTC, PADVAL ) END IF * * Check the computations * CALL PDBLAS3TSTCHK( ICTXT, NOUT, L, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, $ MEM( IPMATA ), MEM( IPA ), IA, JA, $ DESCA, MEM( IPMATB ), MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, THRESH, $ ROGUE, MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PDCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Check input-only array arguments * CALL PDCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), $ MEM( IPA ), IA, JA, DESCA, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_A', $ SNAMES( L ) END IF * IF( BCHECK( L ) ) THEN CALL PDCHKMOUT( NROWB, NCOLB, MEM( IPMATB ), $ MEM( IPB ), IB, JB, DESCB, IERR( 5 ) ) IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_B', $ SNAMES( L ) END IF END IF * IF( CCHECK( L ) ) THEN CALL PDCHKMOUT( NROWC, NCOLC, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_C', $ SNAMES( L ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN KFAIL( L ) = KFAIL( L ) + 1 ERRFLG = .TRUE. IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) SNAMES( L ) ELSE KPASS( L ) = KPASS( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( L ) END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PDLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PDMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PDLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) END IF IF( BCHECK( L ) ) THEN IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MB, NB, $ MEM( IPMATB ), LDB, 0, 0, $ 'SERIAL_B' ) CALL PB_PDLAPRNT( MB, NB, MEM( IPB ), 1, 1, $ DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PDMPRNT( ICTXT, NOUT, NROWB, NCOLB, $ MEM( IPMATB+IB-1+(JB-1)*LDB ), $ LDB, 0, 0, 'SERIAL_B' ) CALL PB_PDLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, $ JB, DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) END IF END IF IF( CCHECK( L ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MC, NC, $ MEM( IPMATC ), LDC, 0, 0, $ 'SERIAL_C' ) CALL PB_PDLAPRNT( MC, NC, MEM( IPC ), 1, 1, $ DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PDMPRNT( ICTXT, NOUT, NROWC, NCOLC, $ MEM( IPMATC+IC-1+(JC-1)*LDC ), $ LDC, 0, 0, 'SERIAL_C' ) CALL PB_PDLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, $ JC, DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9982 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9981 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, ' Tested Subroutine: ', A ) 9985 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9983 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9982 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9981 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9980 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9979 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9978 FORMAT( 2X, 'Testing Summary') 9977 FORMAT( 2X, 'End of Tests.' ) 9976 FORMAT( 2X, 'Tests started.' ) 9975 FORMAT( 2X, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PDBLA3TST * END SUBROUTINE PDBLA3TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, SOF, $ TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA3TSTINFO get the needed startup information for testing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) DOUBLE PRECISION * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS3TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 3 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real double precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) 9981 FORMAT( 2X, 'Beta : ', G16.6 ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PDBLA3TSTINFO * END SUBROUTINE PDBLAS3TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PDBLAS3TSTCHKE tests the error exits of the Level 3 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 7 (NSUBS). * If LTEST( 1 ) is .TRUE., PDGEMM will be tested; * If LTEST( 2 ) is .TRUE., PDSYMM will be tested; * If LTEST( 3 ) is .TRUE., PDSYRK will be tested; * If LTEST( 4 ) is .TRUE., PDSYR2K will be tested; * If LTEST( 5 ) is .TRUE., PDTRMM will be tested; * If LTEST( 6 ) is .TRUE., PDTRSM will be tested; * If LTEST( 7 ) is .TRUE., PDGEADD will be tested; * If LTEST( 8 ) is .TRUE., PDTRADD will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PDDIMEE, PDGEADD, PDGEMM, $ PDMATEE, PDOPTEE, PDSYMM, PDSYR2K, PDSYRK, $ PDTRADD, PDTRMM, PDTRSM * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/31, 32, 33, 35, 38, 38, 39, 40/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PDGEMM * I = 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDGEMM, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDGEMM, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDGEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYMM, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYMM, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYRK * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYRK, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYRK, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYRK, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYR2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYR2K, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYR2K, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYR2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PDTRMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDTRMM, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDTRMM, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDTRMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PDTRSM * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDTRSM, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDTRSM, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDTRSM, SCODE( I ), SNAMES( I ) ) END IF * * Test PDGEADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDGEADD, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDGEADD, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDGEADD, SCODE( I ), SNAMES( I ) ) END IF * * Test PDTRADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDTRADD, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDTRADD, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDTRADD, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PDBLAS3TSTCHKE * END SUBROUTINE PDCHKARG3( ICTXT, NOUT, SNAME, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, IA, JA, $ DESCA, IB, JB, DESCB, BETA, IC, JC, DESCC, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*7 SNAME INTEGER DESCA( * ), DESCB( * ), DESCC( * ) * .. * * Purpose * ======= * * PDCHKARG3 checks the input-only arguments of the Level 3 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies the SIDE option in the Level 3 PBLAS * operation. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 3 PBLAS * operation. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the TRANSA option in the Level 3 * PBLAS operation. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the TRANSB option in the Level 3 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 3 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * K (global input) INTEGER * On entry, K specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, SIDEREF, TRANSAREF, TRANSBREF, UPLOREF INTEGER I, IAREF, IBREF, ICREF, JAREF, JBREF, JCREF, $ KREF, MREF, MYCOL, MYROW, NPCOL, NPROW, NREF DOUBLE PRECISION ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCBREF( DLEN_ ), $ DESCCREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG SIDEREF = SIDE TRANSAREF = TRANSA TRANSBREF = TRANSB UPLOREF = UPLO MREF = M NREF = N KREF = K ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IBREF = IB JBREF = JB DO 20 I = 1, DLEN_ DESCBREF( I ) = DESCB( I ) 20 CONTINUE BETAREF = BETA ICREF = IC JCREF = JC DO 30 I = 1, DLEN_ DESCCREF( I ) = DESCC( I ) 30 CONTINUE * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( SIDE, SIDEREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'SIDE' ELSE IF( .NOT. LSAME( TRANSA, TRANSAREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSA' ELSE IF( .NOT. LSAME( TRANSB, TRANSBREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSB' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( K.NE.KREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'K' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IB.NE.IBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IB' ELSE IF( JB.NE.JBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JB' ELSE IF( DESCB( DTYPE_ ).NE.DESCBREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( DTYPE_ )' ELSE IF( DESCB( M_ ).NE.DESCBREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( M_ )' ELSE IF( DESCB( N_ ).NE.DESCBREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( N_ )' ELSE IF( DESCB( IMB_ ).NE.DESCBREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( IMB_ )' ELSE IF( DESCB( INB_ ).NE.DESCBREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( INB_ )' ELSE IF( DESCB( MB_ ).NE.DESCBREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( MB_ )' ELSE IF( DESCB( NB_ ).NE.DESCBREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( NB_ )' ELSE IF( DESCB( RSRC_ ).NE.DESCBREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( RSRC_ )' ELSE IF( DESCB( CSRC_ ).NE.DESCBREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CSRC_ )' ELSE IF( DESCB( CTXT_ ).NE.DESCBREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CTXT_ )' ELSE IF( DESCB( LLD_ ).NE.DESCBREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( LLD_ )' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IC.NE.ICREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IC' ELSE IF( JC.NE.JCREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JC' ELSE IF( DESCC( DTYPE_ ).NE.DESCCREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( DTYPE_ )' ELSE IF( DESCC( M_ ).NE.DESCCREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( M_ )' ELSE IF( DESCC( N_ ).NE.DESCCREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( N_ )' ELSE IF( DESCC( IMB_ ).NE.DESCCREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( IMB_ )' ELSE IF( DESCC( INB_ ).NE.DESCCREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( INB_ )' ELSE IF( DESCC( MB_ ).NE.DESCCREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( MB_ )' ELSE IF( DESCC( NB_ ).NE.DESCCREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( NB_ )' ELSE IF( DESCC( RSRC_ ).NE.DESCCREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( RSRC_ )' ELSE IF( DESCC( CSRC_ ).NE.DESCCREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CSRC_ )' ELSE IF( DESCC( CTXT_ ).NE.DESCCREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CTXT_ )' ELSE IF( DESCC( LLD_ ).NE.DESCCREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( LLD_ )' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PDCHKARG3 * END SUBROUTINE PDBLAS3TSTCHK( ICTXT, NOUT, NROUT, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, A, PA, IA, $ JA, DESCA, B, PB, IB, JB, DESCB, BETA, $ C, PC, IC, JC, DESCC, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT, NROUT REAL THRESH DOUBLE PRECISION ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION A( * ), B( * ), C( * ), PA( * ), PB( * ), $ PC( * ), WORK( * ) * .. * * Purpose * ======= * * PDBLAS3TSTCHK performs the computational tests of the Level 3 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PDGEMM will be tested; * else if NROUT = 2, PDSYMM will be tested; * else if NROUT = 3, PDSYRK will be tested; * else if NROUT = 4, PDSYR2K will be tested; * else if NROUT = 5, PDTRMM will be tested; * else if NROUT = 6, PDTRSM will be tested; * else if NROUT = 7, PDGEADD will be tested; * else if NROUT = 8, PDTRADD will be tested; * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies if the multiplication should be per- * formed from the left or the right. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * PB (local input) DOUBLE PRECISION array * On entry, PB is an array of dimension (DESCB( LLD_ ),*). This * array contains the local entries of the matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) DOUBLE PRECISION array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) DOUBLE PRECISION * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular or symmetric matrices. * * WORK (workspace) DOUBLE PRECISION array * On entry, WORK is an array of dimension LWORK where LWORK is * at least 2*MAX( M, MAX( N, K ) ). This array is used to store * a copy of a column of C and the computed gauges (see PDMMCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on B has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on C has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, IPG, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DTRSM, PB_DLASET, PDCHKMIN, $ PDMMCH, PDMMCH1, PDMMCH2, PDMMCH3, PDTRMM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE IPG = MAX( M, MAX( N, K ) ) + 1 * IF( NROUT.EQ.1 ) THEN * * Test PDGEMM * * Check the resulting matrix C * CALL PDMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, $ DESCC, WORK, WORK( IPG ), ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDCHKMIN( ERR, M, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PDCHKMIN( ERR, K, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF IF( LSAME( TRANSB, 'N' ) ) THEN CALL PDCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PDCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PDSYMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PDMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, WORK( IPG ), $ ERR, IERR( 3 ) ) ELSE CALL PDMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, WORK( IPG ), $ ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_DLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_DLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PDCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PDCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PDSYRK * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDMMCH1( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, $ WORK, WORK( IPG ), ERR, IERR( 3 ) ) ELSE CALL PDMMCH1( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, IA, $ JA, DESCA, BETA, C, PC, IC, JC, DESCC, WORK, $ WORK( IPG ), ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PDCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.4 ) THEN * * Test PDSYR2K * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDMMCH2( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, WORK( IPG ), ERR, $ IERR( 3 ) ) ELSE CALL PDMMCH2( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, WORK( IPG ), ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PDCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PDCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PDCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PDTRMM * * Check the resulting matrix B * IF( LSAME( SIDE, 'L' ) ) THEN CALL PDMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, C, IB, JB, DESCB, $ ZERO, B, PB, IB, JB, DESCB, WORK, $ WORK( IPG ), ERR, IERR( 2 ) ) ELSE CALL PDMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, $ ALPHA, C, IB, JB, DESCB, A, IA, JA, DESCA, $ ZERO, B, PB, IB, JB, DESCB, WORK, $ WORK( IPG ), ERR, IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PDTRSM * * Check the resulting matrix B * CALL DTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ), $ B( IB+(JB-1)*DESCB( M_ ) ), DESCB( M_ ) ) CALL PDTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, PA, IA, JA, $ DESCA, PB, IB, JB, DESCB ) IF( LSAME( SIDE, 'L' ) ) THEN CALL PDMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, ALPHA, $ A, IA, JA, DESCA, B, IB, JB, DESCB, ZERO, C, $ PB, IB, JB, DESCB, WORK, WORK( IPG ), ERR, $ IERR( 2 ) ) ELSE CALL PDMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, ALPHA, $ B, IB, JB, DESCB, A, IA, JA, DESCA, ZERO, C, $ PB, IB, JB, DESCB, WORK, WORK( IPG ), ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF ELSE IF( NROUT.EQ.7 ) THEN * * Test PDGEADD * * Check the resulting matrix C * CALL PDMMCH3( 'All', TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PDCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PDTRADD * * Check the resulting matrix C * CALL PDMMCH3( UPLO, TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PDCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PDCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'B' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'C' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9997 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PDBLAS3TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/PDBLAS3TST.dat0000640000175000017500000000472010363532303017412 0ustar muammarmuammar'Level 3 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PDBLAS3TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0D0 value of ALPHA 3.0D0 value of BETA 4 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 22 30 8 20 3 5 11 8 values of M 23 29 7 21 3 14 11 8 values of N 24 28 9 22 3 9 11 8 values of K 31 31 31 31 31 31 31 31 values of M_A 31 31 31 31 31 31 31 31 values of N_A 4 5 6 3 2 1 4 5 values of IMB_A 4 5 6 3 2 1 4 5 values of INB_A 4 5 6 3 2 1 4 5 values of MB_A 4 5 6 3 2 1 4 5 values of NB_A 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 5 1 7 7 1 12 1 11 values of IA 5 1 7 7 1 12 1 11 values of JA 32 32 32 32 32 32 32 32 values of M_B 32 32 32 32 32 32 32 32 values of N_B 4 5 6 3 2 1 4 5 values of IMB_B 4 5 6 3 2 1 4 5 values of INB_B 4 5 6 3 2 1 4 5 values of MB_B 4 5 6 3 2 1 4 5 values of NB_B 0 0 0 0 0 0 0 0 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 5 1 7 7 1 12 1 11 values of IB 5 1 7 7 1 12 1 11 values of JB 35 35 35 35 35 35 35 35 values of M_C 35 35 35 35 35 35 35 35 values of N_C 4 5 6 3 2 1 4 5 values of IMB_C 4 5 6 3 2 1 4 5 values of INB_C 4 5 6 3 2 1 4 5 values of MB_C 4 5 6 3 2 1 4 5 values of NB_C 0 0 0 0 0 0 0 0 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 5 1 7 7 1 12 1 11 values of IC 5 1 7 7 1 12 1 11 values of JC PDGEMM T put F for no test in the same column PDSYMM T put F for no test in the same column PDSYRK T put F for no test in the same column PDSYR2K T put F for no test in the same column PDTRMM T put F for no test in the same column PDTRSM T put F for no test in the same column PDGEADD T put F for no test in the same column PDTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/pcblastst.f0000640000175000017500000147056110363532303017456 0ustar muammarmuammar SUBROUTINE PCOPTEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCOPTEE tests whether the PBLAS respond correctly to a bad option * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PCCHKOPT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2nd option * APOS = 2 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 3rd option * APOS = 3 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2'nd option * APOS = 2 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 .OR. SCODE.EQ.40 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2'nd option * APOS = 2 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 3rd option * APOS = 3 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 4th option * APOS = 4 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st option * APOS = 1 CALL PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * END IF * RETURN * * End of PCOPTEE * END SUBROUTINE PCCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCCHKOPT tests the option ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the option to be * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCCALLSUB, PCHKPBE, PCSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PCSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'D' ) ) THEN * * Generate bad DIAG option * DIAG = '/' * ELSE IF( LSAME( ARGNAM, 'S' ) ) THEN * * Generate bad SIDE option * SIDE = '/' * ELSE IF( LSAME( ARGNAM, 'A' ) ) THEN * * Generate bad TRANSA option * TRANSA = '/' * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Generate bad TRANSB option * TRANSB = '/' * ELSE IF( LSAME( ARGNAM, 'U' ) ) THEN * * Generate bad UPLO option * UPLO = '/' * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PCCHKOPT * END SUBROUTINE PCDIMEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCDIMEE tests whether the PBLAS respond correctly to a bad dimension * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PCCHKDIM * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 .OR. SCODE.EQ.12 .OR. SCODE.EQ.13 .OR. $ SCODE.EQ.14 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) dimension * APOS = 1 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st dimension * APOS = 2 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) dimension * APOS = 2 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) dimension * APOS = 4 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.24 ) THEN * * Check 1st dimension * APOS = 1 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 3rd dimension * APOS = 5 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 ) THEN * * Check 1st dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 2nd dimension * APOS = 4 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st dimension * APOS = 1 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st dimension * APOS = 5 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 6 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st dimension * APOS = 2 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st dimension * APOS = 3 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * END IF * RETURN * * End of PCDIMEE * END SUBROUTINE PCCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCCHKDIM tests the dimension ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the dimension to be * checked. ARGNAM can either be 'M', 'N' or 'K'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCCALLSUB, PCHKPBE, PCSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER KDIM, MDIM, NDIM COMMON /PBLASN/KDIM, MDIM, NDIM * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PCSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'M' ) ) THEN * * Generate bad MDIM * MDIM = -1 * ELSE IF( LSAME( ARGNAM, 'N' ) ) THEN * * Generate bad NDIM * NDIM = -1 * ELSE * * Generate bad KDIM * KDIM = -1 * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PCCHKDIM * END SUBROUTINE PCVECEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCVECEE tests whether the PBLAS respond correctly to a bad vector * argument. Each vector is described by: , I, J, * DESC, INC. Out of all these, only I, J, * DESC, and INC can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PCCHKMAT * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * * Check 1st vector * APOS = 2 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 7 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.12 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) vector * APOS = 3 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.13 ) THEN * * Check 1st vector * APOS = 3 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 8 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.14 ) THEN * * Check 1st (and only) vector * APOS = 4 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st vector * APOS = 9 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 15 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st vector * APOS = 8 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 14 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) vector * APOS = 9 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st vector * APOS = 4 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 9 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.26 .OR. SCODE.EQ.27 ) THEN * * Check 1'st (and only) vector * APOS = 4 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * END IF * RETURN * * End of PCVECEE * END SUBROUTINE PCMATEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCMATEE tests whether the PBLAS respond correctly to a bad matrix * argument. Each matrix is described by: , I, J, * and DESC. Out of all these, only I, J and DESC * can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PCCHKMAT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 .OR. SCODE.EQ.23 ) THEN * * Check 1st (and only) matrix * APOS = 5 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st (and only) matrix * APOS = 4 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st (and only) matrix * APOS = 14 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.25 .OR. SCODE.EQ.26 ) THEN * * Check 1st (and only) matrix * APOS = 9 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st matrix * APOS = 7 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 16 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.32 .OR. SCODE.EQ.35 .OR. SCODE.EQ.36 ) THEN * * Check 1st matrix * APOS = 6 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 15 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 ) THEN * * Check 1st matrix * APOS = 6 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st matrix * APOS = 4 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 9 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st matrix * APOS = 8 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 12 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st matrix * APOS = 5 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st matrix * APOS = 6 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * END IF * RETURN * * End of PCMATEE * END SUBROUTINE PCSETPBLAS( ICTXT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT * .. * * Purpose * ======= * * PCSETPBLAS initializes *all* the dummy arguments to correct values. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL RONE COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ RONE = 1.0E+0 ) * .. * .. External Subroutines .. EXTERNAL PB_DESCSET2 * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM REAL USCLR COMPLEX SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMPLEX A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Set default values for options * DIAG = 'N' SIDE = 'L' TRANSA = 'N' TRANSB = 'N' UPLO = 'U' * * Set default values for scalars * KDIM = 1 MDIM = 1 NDIM = 1 ISCLR = 1 SCLR = ONE USCLR = RONE * * Set default values for distributed matrix A * A( 1, 1 ) = ONE A( 2, 1 ) = ONE A( 1, 2 ) = ONE A( 2, 2 ) = ONE IA = 1 JA = 1 CALL PB_DESCSET2( DESCA, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix B * B( 1, 1 ) = ONE B( 2, 1 ) = ONE B( 1, 2 ) = ONE B( 2, 2 ) = ONE IB = 1 JB = 1 CALL PB_DESCSET2( DESCB, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix C * C( 1, 1 ) = ONE C( 2, 1 ) = ONE C( 1, 2 ) = ONE C( 2, 2 ) = ONE IC = 1 JC = 1 CALL PB_DESCSET2( DESCC, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix X * X( 1 ) = ONE X( 2 ) = ONE IX = 1 JX = 1 CALL PB_DESCSET2( DESCX, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCX = 1 * * Set default values for distributed matrix Y * Y( 1 ) = ONE Y( 2 ) = ONE IY = 1 JY = 1 CALL PB_DESCSET2( DESCY, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCY = 1 * RETURN * * End of PCSETPBLAS * END SUBROUTINE PCCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the matrix or vector * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one * wants to check a matrix, and 'X' or 'Y' for a vector. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the first argument * of the matrix (or vector) ARGNAM. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT PARAMETER ( DESCMULT = 100 ) * .. * .. Local Scalars .. INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCCALLSUB, PCHKPBE, PCSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ARGNAM, 'A' ) ) THEN * * Check IA. Set all other OK, bad IA * CALL PCSETPBLAS( ICTXT ) IA = -1 INFOT = ARGPOS + 1 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JA. Set all other OK, bad JA * CALL PCSETPBLAS( ICTXT ) JA = -1 INFOT = ARGPOS + 2 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCA. Set all other OK, bad DESCA * DO 10 I = 1, DLEN_ * * Set I'th entry of DESCA to incorrect value, rest ok. * CALL PCSETPBLAS( ICTXT ) DESCA( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCA, CSRCA, LDA * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PCSETPBLAS( ICTXT ) * * Test RSRCA >= NPROW * IF( I.EQ.RSRC_ ) $ DESCA( I ) = NPROW * * Test CSRCA >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCA( I ) = NPCOL * * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCA( I ) = 1 ELSE DESCA( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 10 CONTINUE * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Check IB. Set all other OK, bad IB * CALL PCSETPBLAS( ICTXT ) IB = -1 INFOT = ARGPOS + 1 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JB. Set all other OK, bad JB * CALL PCSETPBLAS( ICTXT ) JB = -1 INFOT = ARGPOS + 2 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCB. Set all other OK, bad DESCB * DO 20 I = 1, DLEN_ * * Set I'th entry of DESCB to incorrect value, rest ok. * CALL PCSETPBLAS( ICTXT ) DESCB( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCB, CSRCB, LDB * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PCSETPBLAS( ICTXT ) * * Test RSRCB >= NPROW * IF( I.EQ.RSRC_ ) $ DESCB( I ) = NPROW * * Test CSRCB >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCB( I ) = NPCOL * * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCB( I ) = 1 ELSE DESCB( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 20 CONTINUE * ELSE IF( LSAME( ARGNAM, 'C' ) ) THEN * * Check IC. Set all other OK, bad IC * CALL PCSETPBLAS( ICTXT ) IC = -1 INFOT = ARGPOS + 1 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JC. Set all other OK, bad JC * CALL PCSETPBLAS( ICTXT ) JC = -1 INFOT = ARGPOS + 2 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCC. Set all other OK, bad DESCC * DO 30 I = 1, DLEN_ * * Set I'th entry of DESCC to incorrect value, rest ok. * CALL PCSETPBLAS( ICTXT ) DESCC( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCC, CSRCC, LDC * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PCSETPBLAS( ICTXT ) * * Test RSRCC >= NPROW * IF( I.EQ.RSRC_ ) $ DESCC( I ) = NPROW * * Test CSRCC >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCC( I ) = NPCOL * * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCC( I ) = 1 ELSE DESCC( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 30 CONTINUE * ELSE IF( LSAME( ARGNAM, 'X' ) ) THEN * * Check IX. Set all other OK, bad IX * CALL PCSETPBLAS( ICTXT ) IX = -1 INFOT = ARGPOS + 1 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JX. Set all other OK, bad JX * CALL PCSETPBLAS( ICTXT ) JX = -1 INFOT = ARGPOS + 2 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCX. Set all other OK, bad DESCX * DO 40 I = 1, DLEN_ * * Set I'th entry of DESCX to incorrect value, rest ok. * CALL PCSETPBLAS( ICTXT ) DESCX( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCX, CSRCX, LDX * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PCSETPBLAS( ICTXT ) * * Test RSRCX >= NPROW * IF( I.EQ.RSRC_ ) $ DESCX( I ) = NPROW * * Test CSRCX >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCX( I ) = NPCOL * * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCX( I ) = 1 ELSE DESCX( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 40 CONTINUE * * Check INCX. Set all other OK, bad INCX * CALL PCSETPBLAS( ICTXT ) INCX = -1 INFOT = ARGPOS + 4 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * ELSE * * Check IY. Set all other OK, bad IY * CALL PCSETPBLAS( ICTXT ) IY = -1 INFOT = ARGPOS + 1 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JY. Set all other OK, bad JY * CALL PCSETPBLAS( ICTXT ) JY = -1 INFOT = ARGPOS + 2 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCY. Set all other OK, bad DESCY * DO 50 I = 1, DLEN_ * * Set I'th entry of DESCY to incorrect value, rest ok. * CALL PCSETPBLAS( ICTXT ) DESCY( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCY, CSRCY, LDY * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PCSETPBLAS( ICTXT ) * * Test RSRCY >= NPROW * IF( I.EQ.RSRC_ ) $ DESCY( I ) = NPROW * * Test CSRCY >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCY( I ) = NPCOL * * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCY( I ) = 1 ELSE DESCY( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 50 CONTINUE * * Check INCY. Set all other OK, bad INCY * CALL PCSETPBLAS( ICTXT ) INCY = -1 INFOT = ARGPOS + 4 CALL PCCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * RETURN * * End of PCCHKMAT * END SUBROUTINE PCCALLSUB( SUBPTR, SCODE ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER SCODE * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCCALLSUB calls the subroutine SUBPTR with the calling sequence iden- * tified by SCODE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM REAL USCLR COMPLEX SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMPLEX A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * CALL SUBPTR( NDIM, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY ) * ELSE IF( SCODE.EQ.12 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.13 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, JY, $ DESCY, INCY ) * ELSE IF( SCODE.EQ.14 ) THEN * CALL SUBPTR( NDIM, SCLR, ISCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.15 ) THEN * CALL SUBPTR( NDIM, USCLR, X, IX, JX, DESCX, INCX ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.22 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, A, IA, JA, DESCA, X, IX, JX, $ DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.23 ) THEN * CALL SUBPTR( UPLO, TRANSA, DIAG, NDIM, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.24 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * ELSE IF( SCODE.EQ.25 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.26 ) THEN * CALL SUBPTR( UPLO, NDIM, USCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.27 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * CALL SUBPTR( TRANSA, TRANSB, MDIM, NDIM, KDIM, SCLR, A, IA, JA, $ DESCA, B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.32 ) THEN * CALL SUBPTR( SIDE, UPLO, MDIM, NDIM, SCLR, A, IA, JA, DESCA, B, $ IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.33 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.34 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, USCLR, A, IA, JA, DESCA, $ USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.35 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.36 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.37 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, C, IC, $ JC, DESCC ) * ELSE IF( SCODE.EQ.38 ) THEN * CALL SUBPTR( SIDE, UPLO, TRANSA, DIAG, MDIM, NDIM, SCLR, A, IA, $ JA, DESCA, B, IB, JB, DESCB ) * ELSE IF( SCODE.EQ.39 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, $ C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.40 ) THEN * CALL SUBPTR( UPLO, TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * END IF * RETURN * * End of PCCALLSUB * END SUBROUTINE PCERRSET( ERR, ERRMAX, XTRUE, X ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERR, ERRMAX COMPLEX X, XTRUE * .. * * Purpose * ======= * * PCERRSET computes the absolute difference ERR = |XTRUE - X| and com- * pares it with zero. ERRMAX accumulates the absolute error difference. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERR (local output) REAL * On exit, ERR specifies the absolute difference |XTRUE - X|. * * ERRMAX (local input/local output) REAL * On entry, ERRMAX specifies a previously computed error. On * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ). * * XTRUE (local input) COMPLEX * On entry, XTRUE specifies the true value. * * X (local input) COMPLEX * On entry, X specifies the value to be compared to XTRUE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. REAL PSDIFF EXTERNAL PSDIFF * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL * .. * .. Executable Statements .. * ERR = ABS( PSDIFF( REAL( XTRUE ), REAL( X ) ) ) ERR = MAX( ERR, ABS( PSDIFF( AIMAG( XTRUE ), AIMAG( X ) ) ) ) * ERRMAX = MAX( ERRMAX, ERR ) * RETURN * * End of PCERRSET * END SUBROUTINE PCCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N REAL ERRMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX PX( * ), X( * ) * .. * * Purpose * ======= * * PCCHKVIN checks that the submatrix sub( PX ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) REAL * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( X ) and sub( PX ). * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL, $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL, $ MYCOL, MYROW, NPCOL, NPROW REAL ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L, PCERRSET, SGAMX2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) * LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( N.EQ.1 ) THEN * IF( ( MYROW.EQ.IXROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.IXCOL .OR. COLREP ) ) $ CALL PCERRSET( ERR, ERRMAX, X( IX+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) * ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * IF( MYROW.EQ.IXROW .OR. ROWREP ) THEN * ICURCOL = IXCOL IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 10 J = JX, JN CALL PCERRSET( ERR, ERRMAX, X( IX+(J-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) JJX = JJX + 1 10 CONTINUE END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 20 KK = 0, JB-1 CALL PCERRSET( ERR, ERRMAX, X( IX+(J+KK-1)*LDX ), $ PX( IIX+(JJX+KK-1)*LDPX ) ) 20 CONTINUE * JJX = JJX + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 30 CONTINUE * END IF * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * IF( MYCOL.EQ.IXCOL .OR. COLREP ) THEN * ICURROW = IXROW IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 40 I = IX, IN CALL PCERRSET( ERR, ERRMAX, X( I+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) IIX = IIX + 1 40 CONTINUE END IF ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * DO 50 KK = 0, IB-1 CALL PCERRSET( ERR, ERRMAX, X( I+KK+(JX-1)*LDX ), $ PX( IIX+KK+(JJX-1)*LDPX ) ) 50 CONTINUE * IIX = IIX + IB * END IF * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PCCHKVIN * END SUBROUTINE PCCHKVOUT( N, X, PX, IX, JX, DESCX, INCX, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX PX( * ), X( * ) * .. * * Purpose * ======= * * PCCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX, $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL, $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL, $ NPROW, NQALL REAL EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCERRSET, SGAMX2D * .. * .. External Functions .. INTEGER PB_NUMROC REAL PSLAMCH EXTERNAL PSLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCX( M_ ).LE.0 ).OR.( DESCX( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCX( M_ ), 1, DESCX( IMB_ ), DESCX( MB_ ), $ MYROW, DESCX( RSRC_ ), NPROW ) NQALL = PB_NUMROC( DESCX( N_ ), 1, DESCX( INB_ ), DESCX( NB_ ), $ MYCOL, DESCX( CSRC_ ), NPCOL ) * MBX = DESCX( MB_ ) NBX = DESCX( NB_ ) LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ICURROW = DESCX( RSRC_ ) ICURCOL = DESCX( CSRC_ ) ROWREP = ( ICURROW.EQ.-1 ) COLREP = ( ICURCOL.EQ.-1 ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN IMBX = DESCX( IMB_ ) ELSE IMBX = MBX END IF IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN INBX = DESCX( INB_ ) ELSE INBX = NBX END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - ICURROW + NPROW, NPROW ) END IF IF( COLREP ) THEN MYCOLDIST = 0 ELSE MYCOLDIST = MOD( MYCOL - ICURCOL + NPCOL, NPCOL ) END IF II = 1 JJ = 1 * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * I = 1 IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) IB = MIN( DESCX( M_ ), DESCX( IMB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. J+KK.GT.JX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 10 CONTINUE 20 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 50 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 30 CONTINUE 40 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 50 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * DO 110 I = DESCX( IMB_ ) + 1, DESCX( M_ ), MBX IB = MIN( DESCX( M_ ) - I + 1, MBX ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF * JJ = 1 JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 60 CONTINUE 70 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 100 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 80 CONTINUE 90 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 100 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * 110 CONTINUE * ELSE * * sub( X ) is a column vector * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) JB = MIN( DESCX( N_ ), DESCX( INB_ ) ) * DO 130 KK = 0, JB-1 DO 120 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. I+LL.GT.IX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 120 CONTINUE 130 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 160 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 150 KK = 0, JB-1 DO 140 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 140 CONTINUE 150 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 160 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 220 J = DESCX( INB_ ) + 1, DESCX( N_ ), NBX JB = MIN( DESCX( N_ ) - J + 1, NBX ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) DO 180 KK = 0, JB-1 DO 170 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 170 CONTINUE 180 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 210 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 200 KK = 0, JB-1 DO 190 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 190 CONTINUE 200 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 210 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * 220 CONTINUE * END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PCCHKVOUT * END SUBROUTINE PCCHKMIN( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N REAL ERRMAX * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX PA( * ), A( * ) * .. * * Purpose * ======= * * PCCHKMIN checks that the submatrix sub( PA ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) REAL * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( A ) and sub( PA ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * operand sub( A ). N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW REAL ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L, PCERRSET, SGAMX2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if posssible * IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) * II = IIA JJ = JJA LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) ICURROW = IAROW ICURCOL = IACOL ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA + JB - 1 * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 40 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 10 K = 0, IB-1 CALL PCERRSET( ERR, ERRMAX, A( IA+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 10 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 20 K = 0, IB-1 CALL PCERRSET( ERR, ERRMAX, A( I+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 20 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = IIA ICURROW = IAROW 40 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining column blocks * DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 80 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 50 K = 0, IB-1 CALL PCERRSET( ERR, ERRMAX, A( IA+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 50 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 70 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 60 K = 0, IB-1 CALL PCERRSET( ERR, ERRMAX, $ A( I+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 60 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 70 CONTINUE * II = IIA ICURROW = IAROW 80 CONTINUE * JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 90 CONTINUE * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PCCHKMIN * END SUBROUTINE PCCHKMOUT( M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), PA( * ) * .. * * Purpose * ======= * * PCCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( PA ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( PA ). N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK, $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST, $ NPCOL, NPROW REAL EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCERRSET, SGAMX2D * .. * .. External Functions .. INTEGER PB_NUMROC REAL PSLAMCH EXTERNAL PSLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCA( M_ ).LE.0 ).OR.( DESCA( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCA( M_ ), 1, DESCA( IMB_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) * LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) * II = 1 JJ = 1 ROWREP = ( DESCA( RSRC_ ).EQ.-1 ) COLREP = ( DESCA( CSRC_ ).EQ.-1 ) ICURCOL = DESCA( CSRC_ ) IF( MYROW.EQ.DESCA( RSRC_ ) .OR. ROWREP ) THEN IMBA = DESCA( IMB_ ) ELSE IMBA = DESCA( MB_ ) END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - DESCA( RSRC_ ) + NPROW, NPROW ) END IF * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) JB = MIN( DESCA( N_ ), DESCA( INB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 10 CONTINUE 20 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 50 II = IMBA + 1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 30 CONTINUE 40 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 50 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 110 J = DESCA( INB_ ) + 1, DESCA( N_ ), DESCA( NB_ ) JB = MIN( DESCA( N_ ) - J + 1, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 60 CONTINUE 70 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 100 II = IMBA+1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PCERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 80 CONTINUE 90 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 100 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * INSERT MODE 110 CONTINUE * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PCCHKMOUT * END SUBROUTINE PCMPRNT( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, $ CMATNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCMPRNT prints to the standard output an array A of size m by n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LDA,N). The leading m * by n part of this array is printed. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array A to be printed. LDA must be at least MAX( 1, M ). * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM specifies the identifier of the matrix to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 20 J = 1, N * DO 10 I = 1, M * WRITE( NOUT, FMT = 9999 ) CMATNM, I, J, $ REAL( A( I, J ) ), AIMAG( A( I, J ) ) * 10 CONTINUE * 20 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', E16.8, '+i*(', $ E16.8, ')' ) * RETURN * * End of PCMPRNT * END SUBROUTINE PCVPRNT( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, $ CVECNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CVECNM COMPLEX X( * ) * .. * * Purpose * ======= * * PCVPRNT prints to the standard output an vector x of length n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * N (global input) INTEGER * On entry, N specifies the length of the vector X. N must be * at least zero. * * X (global input) COMPLEX array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CVECNM (global input) CHARACTER*(*) * On entry, CVECNM specifies the identifier of the vector to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 10 I = 1, 1 + ( N-1 )*INCX, INCX * WRITE( NOUT, FMT = 9999 ) CVECNM, I, REAL( X( I ) ), $ AIMAG( X( I ) ) * 10 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ')=', E16.8, '+i*(', E16.8, ')' ) * RETURN * * End of PCVPRNT * END SUBROUTINE PCMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, $ DESCY, INCY, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ERR COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL G( * ) COMPLEX A( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PCMVCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies which matrix-vector product is to * be computed as follows: * If TRANS = 'T', * sub( Y ) = BETA * sub( Y ) + sub( A )**T * sub( X ), * else if TRANS = 'C', * sub( Y ) = BETA * sub( Y ) + sub( A )**H * sub( X ), * otherwise * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, ROWREP, TRAN INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX, $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA, $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL, $ NPROW REAL EPS, ERRI, GTMP COMPLEX C, TBETA, YTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( C ) = ABS( REAL( C ) ) + ABS( AIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN TBETA = ONE ELSE TBETA = BETA END IF * TRAN = LSAME( TRANS, 'T' ) CTRAN = LSAME( TRANS, 'C' ) IF( TRAN.OR.CTRAN ) THEN ML = N NL = M ELSE ML = M NL = N END IF * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in Y using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * IOFFY = IY + ( JY - 1 ) * LDY DO 40 I = 1, ML YTMP = ZERO GTMP = RZERO IOFFX = IX + ( JX - 1 ) * LDX IF( TRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 10 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 10 CONTINUE ELSE IF( CTRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 20 J = 1, NL YTMP = YTMP + CONJG( A( IOFFA ) ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 20 CONTINUE ELSE IOFFA = IA + I - 1 + ( JA - 1 ) * LDA DO 30 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + LDA IOFFX = IOFFX + INCX 30 CONTINUE END IF G( I ) = ABS1( ALPHA )*GTMP + ABS1( TBETA )*ABS1( Y( IOFFY ) ) Y( IOFFY ) = ALPHA * YTMP + TBETA * Y( IOFFY ) IOFFY = IOFFY + INCY 40 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPY = DESCY( LLD_ ) IOFFY = IY + ( JY - 1 ) * LDY CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, IIY, $ JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, ML ) JN = JY + JB - 1 * DO 50 J = JY, JN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( J-JY+1 ).NE.RZERO ) $ ERRI = ERRI / G( J-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 50 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 70 J = JN+1, JY+ML-1, DESCY( NB_ ) JB = MIN( JY+ML-J, DESCY( NB_ ) ) * DO 60 KK = 0, JB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( J+KK-JY+1 ).NE.RZERO ) $ ERRI = ERRI / G( J+KK-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 60 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 70 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, ML ) IN = IY + IB - 1 * DO 80 I = IY, IN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( I-IY+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 80 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 100 I = IN+1, IY+ML-1, DESCY( MB_ ) IB = MIN( IY+ML-I, DESCY( MB_ ) ) * DO 90 KK = 0, IB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( I+KK-IY+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 90 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 100 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PCMVCH * END SUBROUTINE PCVMCH( ICTXT, TRANS, UPLO, M, N, ALPHA, X, IX, JX, $ DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, $ IA, JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ERR COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL G( * ) COMPLEX A( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PCVMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed in * the complex cases: * if TRANS = 'C', * sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**H, * otherwise * sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**T. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (local input) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFX, IOFFY, J, JJA, KK, LDA, LDPA, $ LDX, LDY, MYCOL, MYROW, NPCOL, NPROW REAL EPS, ERRI, GTMP COMPLEX ATMP, C * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( C ) = ABS( REAL( C ) ) + ABS( AIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * CTRAN = LSAME( TRANS, 'C' ) UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFY = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND * IOFFX = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IF( CTRAN ) THEN ATMP = X( IOFFX ) * CONJG( Y( IOFFY ) ) ELSE ATMP = X( IOFFX ) * Y( IOFFY ) END IF GTMP = ABS1( X( IOFFX ) ) * ABS1( Y( IOFFY ) ) G( I ) = ABS1( ALPHA ) * GTMP + ABS1( A( IOFFA ) ) A( IOFFA ) = ALPHA * ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PCVMCH * END SUBROUTINE PCVMCH2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, $ JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ERR COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL G( * ) COMPLEX A( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PCVMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (local input) COMPLEX array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J, $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW, $ NPCOL, NPROW REAL EPS, ERRI, GTMP COMPLEX C, ATMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( C ) = ABS( REAL( C ) ) + ABS( AIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFXJ = IX + ( JX - 1 ) * LDX + ( J - 1 ) * INCX IOFFYJ = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IOFFXI = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFYI = IY + ( JY - 1 ) * LDY + ( I - 1 ) * INCY ATMP = ALPHA * X( IOFFXI ) * CONJG( Y( IOFFYJ ) ) ATMP = ATMP + Y( IOFFYI ) * CONJG( ALPHA * X( IOFFXJ ) ) GTMP = ABS1( ALPHA * X( IOFFXI ) ) * ABS1( Y( IOFFYJ ) ) GTMP = GTMP + ABS1( Y( IOFFYI ) ) * $ ABS1( CONJG( ALPHA * X( IOFFXJ ) ) ) G( I ) = GTMP + ABS1( A( IOFFA ) ) A( IOFFA ) = A( IOFFA ) + ATMP * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PCVMCH2 * END SUBROUTINE PCMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANSA, TRANSB INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N REAL ERR COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL G( * ) COMPLEX A( * ), B( * ), C( * ), CT( * ), PC( * ) * .. * * Purpose * ======= * * PCMMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRANA, CTRANB, ROWREP, TRANA, TRANB INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA, $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC, $ MYCOL, MYROW, NPCOL, NPROW REAL EPS, ERRI COMPLEX Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * TRANA = LSAME( TRANSA, 'T' ).OR.LSAME( TRANSA, 'C' ) TRANB = LSAME( TRANSB, 'T' ).OR.LSAME( TRANSB, 'C' ) CTRANA = LSAME( TRANSA, 'C' ) CTRANB = LSAME( TRANSB, 'C' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 240 J = 1, N * IOFFC = IC + ( JC + J - 2 ) * LDC DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( .NOT.TRANA .AND. .NOT.TRANB ) THEN DO 30 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 20 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA .AND. .NOT.TRANB ) THEN IF( CTRANA ) THEN DO 50 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + CONJG( A( IOFFA ) ) * $ B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 60 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA .AND. TRANB ) THEN IF( CTRANB ) THEN DO 90 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 80 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * $ CONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 100 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA .AND. TRANB ) THEN IF( CTRANA ) THEN IF( CTRANB ) THEN DO 130 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 120 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + CONJG( A( IOFFA ) ) * $ CONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 140 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + CONJG( A( IOFFA ) ) * $ B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB ) THEN DO 170 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 160 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * $ CONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 180 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF * DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 200 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, M ) IN = IC + IBB - 1 * DO 210 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 210 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 230 I = IN+1, IC+M-1, DESCC( MB_ ) IBB = MIN( IC+M-I, DESCC( MB_ ) ) * DO 220 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 220 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 230 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 250 * 240 CONTINUE * 250 CONTINUE * RETURN * * End of PCMMCH * END SUBROUTINE PCMMCH1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, $ ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N REAL ERR COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL G( * ) COMPLEX A( * ), C( * ), CT( * ), PC( * ) * .. * * Purpose * ======= * * PCMMCH1 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least * zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA, $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW REAL EPS, ERRI COMPLEX Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) HTRAN = LSAME( TRANS, 'H' ) * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 40 CONTINUE 50 CONTINUE ELSE IF( HTRAN ) THEN DO 70 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 60 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAN ) * $ CONJG( A( IOFFAK ) ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 60 CONTINUE 70 CONTINUE ELSE DO 90 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 80 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + CONJG( A( IOFFAN ) ) * A( IOFFAK ) G( I ) = G( I ) + ABS1( CONJG( A( IOFFAN ) ) ) * $ ABS1( A( IOFFAK ) ) 80 CONTINUE 90 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PCMMCH1 * END SUBROUTINE PCMMCH2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N REAL ERR COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL G( * ) COMPLEX A( * ), B( * ), C( * ), CT( * ), $ PC( * ) * .. * * Purpose * ======= * * PCMMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrices A and B have * to be transposed or not before computing the matrix-matrix * product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at * least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J, $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW, $ NPCOL, NPROW REAL EPS, ERRI COMPLEX Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL, SQRT * .. * .. Statement Functions .. REAL ABS1 ABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) HTRAN = LSAME( TRANS, 'H' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 40 CONTINUE 50 CONTINUE ELSE IF( HTRAN ) THEN DO 70 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 60 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + $ ALPHA * A( IOFFAN ) * CONJG( B( IOFFBK ) ) + $ B( IOFFBN ) * CONJG( ALPHA * A( IOFFAK ) ) G( I ) = G( I ) + ABS1( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 60 CONTINUE 70 CONTINUE ELSE DO 90 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 80 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + $ ALPHA * CONJG( A( IOFFAN ) ) * B( IOFFBK ) + $ CONJG( ALPHA * B( IOFFBN ) ) * A( IOFFAK ) G( I ) = G( I ) + ABS1( ALPHA ) * ( $ ABS1( CONJG( A( IOFFAN ) ) * B( IOFFBK ) ) + $ ABS1( CONJG( B( IOFFBN ) ) * A( IOFFAK ) ) ) 80 CONTINUE 90 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = CT( I ) + BETA * C( IOFFC ) G( I ) = G( I ) + ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PCMMCH2 * END SUBROUTINE PCMMCH3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, M, N REAL ERR COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), PC( * ) * .. * * Purpose * ======= * * PCMMCH3 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix addi- * tion. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, LOWER, NOTRAN, ROWREP, UPPER INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J, $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL, $ NPROW REAL ERR0, ERRI, PREC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, $ PCERRAXPBY, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX * .. * .. Executable Statements .. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PREC = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) CTRAN = LSAME( TRANS, 'C' ) * * Compute expected result in C using data in A and C. This part of * the computation is performed by every process in the grid. * INFO = 0 ERR = ZERO * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) LDPC = MAX( 1, DESCC( LLD_ ) ) ROWREP = ( DESCC( RSRC_ ).EQ.-1 ) COLREP = ( DESCC( CSRC_ ).EQ.-1 ) * IF( NOTRAN ) THEN * DO 20 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( JA - 1 + J - JC ) * LDA * DO 10 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFA = IOFFA + 1 IOFFC = IOFFC + 1 * 10 CONTINUE * 20 CONTINUE * ELSE IF( CTRAN ) THEN * DO 40 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 30 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, CONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, CONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PCERRAXPBY( ERRI, ALPHA, CONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 30 CONTINUE * 40 CONTINUE * ELSE * DO 60 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 50 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PCERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 50 CONTINUE * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PCMMCH3 * END SUBROUTINE PCERRAXPBY( ERRBND, ALPHA, X, BETA, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERRBND, PREC COMPLEX ALPHA, BETA, X, Y * .. * * Purpose * ======= * * PCERRAXPBY serially computes y := beta*y + alpha * x and returns a * scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (global input) COMPLEX * On entry, X specifies the scalar x to be scaled. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * Y (global input/global output) COMPLEX * On entry, Y specifies the scalar y to be added. On exit, Y * contains the resulting scalar y. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. REAL ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS COMPLEX TMP * .. * .. Intrinsic Functions .. * .. * .. Executable Statements .. * SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = ALPHA * X IF( REAL( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + REAL( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - REAL( TMP ) * FACT END IF IF( AIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + AIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - AIMAG( TMP ) * FACT END IF * TMP = BETA * Y IF( REAL( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + REAL( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - REAL( TMP ) * FACT END IF IF( AIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + AIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - AIMAG( TMP ) * FACT END IF * Y = ( BETA * Y ) + ( ALPHA * X ) * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PCERRAXPBY * END SUBROUTINE PCIPSET( TOGGLE, N, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TOGGLE INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCIPSET sets the imaginary part of the diagonal entries of an n by n * matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). This is used to * test the PBLAS routines for complex Hermitian matrices, which are * either not supposed to access or use the imaginary parts of the dia- * gonals, or supposed to set them to zero. The value used to set the * imaginary part of the diagonals depends on the value of TOGGLE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TOGGLE (global input) CHARACTER*1 * On entry, TOGGLE specifies the set-value to be used as fol- * lows: * If TOGGLE = 'Z' or 'z', the imaginary part of the diago- * nals are set to zero, * If TOGGLE = 'B' or 'b', the imaginary part of the diago- * nals are set to a large value. * * N (global input) INTEGER * On entry, N specifies the order of sub( A ). N must be at * least zero. * * A (local input/local output) pointer to COMPLEX * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the diagonals of * sub( A ) have been updated as specified by TOGGLE. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, GODOWN, GOLEFT, ROWREP INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP REAL ALPHA, ATMP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.LE.0 ) $ RETURN * IF( LSAME( TOGGLE, 'Z' ) ) THEN ALPHA = ZERO ELSE IF( LSAME( TOGGLE, 'B' ) ) THEN ALPHA = PSLAMCH( ICTXT, 'Epsilon' ) ALPHA = ALPHA / PSLAMCH( ICTXT, 'Safe minimum' ) END IF * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( NP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 ROWREP = ( DESCA2( RSRC_ ).EQ.-1 ) COLREP = ( DESCA2( CSRC_ ).EQ.-1 ) LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( ROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( COLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA + LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = REAL( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = CMPLX( ATMP, ALPHA ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PCIPSET * END REAL FUNCTION PSLAMCH( ICTXT, CMACH ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 CMACH INTEGER ICTXT * .. * * Purpose * ======= * * * .. Local Scalars .. CHARACTER*1 TOP INTEGER IDUMM REAL TEMP * .. * .. External Subroutines .. EXTERNAL PB_TOPGET, SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL SLAMCH EXTERNAL LSAME, SLAMCH * .. * .. Executable Statements .. * TEMP = SLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL SGAMX2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL SGAMN2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PSLAMCH = TEMP * RETURN * * End of PSLAMCH * END SUBROUTINE PCLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, JA, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno- * ted by sub( A ) to beta on the diagonal and alpha on the offdiago- * nals. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the part of the submatrix sub( A ) * to be set: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant to which the offdiagonal elements are to be set. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta, i.e., the constant * to which the diagonal elements are to be set. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be set. On exit, the * leading m by n submatrix sub( A ) is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * and IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER, $ UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA, $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC, $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP, $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD, $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1, $ UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CLASET, PB_DESCTRANS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * ISROWREP = ( DESCA2( RSRC_ ).LT.0 ) ISCOLREP = ( DESCA2( CSRC_ ).LT.0 ) LDA = DESCA2( LLD_ ) * UPPER = .NOT.( LSAME( UPLO, 'L' ) ) LOWER = .NOT.( LSAME( UPLO, 'U' ) ) * IF( ( ( LOWER.AND.UPPER ).AND.( ALPHA.EQ.BETA ) ).OR. $ ( ISROWREP .AND. ISCOLREP ) ) THEN IF( ( MP.GT.0 ).AND.( NQ.GT.0 ) ) $ CALL PB_CLASET( UPLO, MP, NQ, 0, ALPHA, BETA, $ A( IIA + ( JJA - 1 ) * LDA ), LDA ) RETURN END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, MP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( ISROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( ISCOLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * M1 = MP N1 = NQ * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_CLASET( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, BETA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_CLASET( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ ALPHA, A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_CLASET( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ ALPHA, A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASET( UPLO, MBLOC, INBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASET( 'ALL', TMP1, INBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASET( 'ALL', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_CLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_CLASET( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_CLASET( 'All', IMBLOC, TMP1, 0, ALPHA, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_CLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASET( UPLO, MBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASET( 'All', TMP1, NBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * RETURN * * End of PCLASET * END SUBROUTINE PCLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CLASCAL, PB_DESCTRANS, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_CLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_CLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_CLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_CLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_CLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_CLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_CLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PCLASCAL * END SUBROUTINE PCLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PCLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP COMPLEX ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_CLAGEN, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PCLADOM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_CLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_CLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) IF( HERM ) THEN ALPHA = CMPLX( REAL( 2 * MAXMN ), ZERO ) ELSE ALPHA = CMPLX( REAL( MAXMN ), REAL( MAXMN ) ) END IF * IF( IOFFDA.GE.0 ) THEN CALL PCLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PCLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PCLAGEN * END SUBROUTINE PCLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP COMPLEX ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ CMPLX( ABS( REAL( ATMP ) ), $ ABS( AIMAG( ATMP ) ) ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PCLADOM * END SUBROUTINE PB_PCLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PB_PCLAPRNT prints to the standard output a submatrix sub( A ) deno- * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by * the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the row index of the printing pro- * cess. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the column index of the printing * process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM is the name of the matrix to be printed. * * NOUT (global input) INTEGER * On entry, NOUT specifies the output unit number. When NOUT is * equal to 6, the submatrix is printed on the screen. * * WORK (local workspace) COMPLEX array * On entry, WORK is a work array of dimension at least equal to * MAX( IMB_A, MB_A ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PCLAPRN2 * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * CALL BLACS_GRIDINFO( DESCA2( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( DESCA2( RSRC_ ).GE.0 ) THEN IF( DESCA2( CSRC_ ).GE.0 ) THEN CALL PB_PCLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, ICPRNT, $ CMATNM, NOUT, DESCA2( RSRC_ ), $ DESCA2( CSRC_ ), WORK ) ELSE DO 10 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Colum-replicated array -- ' , $ 'copy in process column: ', PCOL CALL PB_PCLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, DESCA2( RSRC_ ), $ PCOL, WORK ) 10 CONTINUE END IF ELSE IF( DESCA2( CSRC_ ).GE.0 ) THEN DO 20 PROW = 0, NPROW - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Row-replicated array -- ' , $ 'copy in process row: ', PROW CALL PB_PCLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, $ DESCA2( CSRC_ ), WORK ) 20 CONTINUE ELSE DO 40 PROW = 0, NPROW - 1 DO 30 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Replicated array -- ' , $ 'copy in process (', PROW, ',', PCOL, ')' CALL PB_PCLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, PCOL, $ WORK ) 30 CONTINUE 40 CONTINUE END IF END IF * RETURN * * End of PB_PCLAPRNT * END SUBROUTINE PB_PCLAPRN2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, PROW, PCOL, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL AISCOLREP, AISROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, CGERV2D, $ CGESD2D, PB_INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, MIN, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) II = IIA JJ = JJA IF( DESCA( RSRC_ ).LT.0 ) THEN AISROWREP = .TRUE. IAROW = PROW ICURROW = PROW ELSE AISROWREP = .FALSE. ICURROW = IAROW END IF IF( DESCA( CSRC_ ).LT.0 ) THEN AISCOLREP = .TRUE. IACOL = PCOL ICURCOL = PCOL ELSE AISCOLREP = .FALSE. ICURCOL = IACOL END IF LDA = DESCA( LLD_ ) LDW = MAX( DESCA( IMB_ ), DESCA( MB_ ) ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA+JB-1 DO 60 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, $ REAL( A(II+K+(JJ+H-1)*LDA) ), $ AIMAG( A(II+K+(JJ+H-1)*LDA) ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IB = DESCA( IMB_ )-IA+1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', E16.8, '+i*(', $ E16.8, ')' ) * RETURN * * End of PB_PCLAPRN2 * END SUBROUTINE PB_CFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX CHKVAL * .. * .. Array Arguments .. COMPLEX A( * ) * .. * * Purpose * ======= * * PB_CFILLPAD surrounds a two dimensional local array with a guard-zone * initialized to the value CHKVAL. The user may later call the routine * PB_CCHEKPAD to discover if the guardzone has been violated. There are * three guardzones. The first is a buffer of size IPRE that is before * the start of the array. The second is the buffer of size IPOST which * is after the end of the array to be padded. Finally, there is a guard * zone inside every column of the array to be padded, in the elements * of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LDA,N). On exit, this * array is the padded array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) COMPLEX * On entry, CHKVAL specifies the value to pad the array with. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no pre-guardzone in PB_CFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no post-guardzone in PB_CFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + ( LDA - M ) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PB_CFILLPAD * END SUBROUTINE PB_CCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX CHKVAL * .. * .. Array Arguments .. CHARACTER*(*) MESS COMPLEX A( * ) * .. * * Purpose * ======= * * PB_CCHEKPAD checks that the padding around a local array has not been * overwritten since the call to PB_CFILLPAD. Three types of errors are * reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells that the 5th element of the IPRE long buffer has been overwrit- * ten with the value 18, where it should still have the value CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells that the 19th element after the end of A was overwritten with * the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells that the element at the 12th row and 3rd column of A was over- * written with the value of 22, where it should still have the value of * CHKVAL. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * MESS (local input) CHARACTER*(*) * On entry, MESS is a ttring containing a user-defined message. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LDA,N). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) COMPLEX * On entry, CHKVAL specifies the value to pad the array with. * * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ REAL( A( I ) ), AIMAG( A( I ) ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PB_CCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, REAL( A( I ) ), $ AIMAG( A( I ) ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PB_CCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, REAL( A( I ) ), $ AIMAG( A( I ) ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G11.4, '+ i*', $ G11.4 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G11.4, $ '+ i*', G11.4 ) * RETURN * * End of PB_CCHEKPAD * END SUBROUTINE PB_CLASET( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PB_CLASET initializes a two-dimensional array A to beta on the diago- * nal specified by IOFFD and alpha on the offdiagonals. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * = 'D' or 'd' Only the diagonal of A is set, * Otherwise: All of the array A is set. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the value to which the offdiagonal * array elements are set to. * * BETA (input) COMPLEX * On entry, BETA specifies the value to which the diagonal ar- * ray elements are set to. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be set, and the strictly lower trape- * zoidal part of A is not referenced; When IUPLO = 'L' or 'l', * the leading m by n part of the array A must contain the * lower trapezoidal part of the matrix as specified by IOFFD to * be set, and the strictly upper trapezoidal part of A is * not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA and the strictly lower triangular * part of the array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA and the strictly upper triangular * part of the array to ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 50 I = 1, JTMP - 1 A( I, J ) = ALPHA 50 CONTINUE A( JTMP, J ) = BETA 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the array to BETA on the diagonal. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 90 CONTINUE * ELSE * * Set the array to BETA on the diagonal and ALPHA on the * offdiagonal. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA 100 CONTINUE 110 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 120 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 120 CONTINUE END IF * END IF * RETURN * * End of PB_CLASET * END SUBROUTINE PB_CLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PB_CLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_CLASCAL * END SUBROUTINE PB_CLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PB_CLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP COMPLEX DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME REAL PB_SRAND EXTERNAL LSAME, PB_SRAND * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( LSAME( AFORM, 'S' ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), PB_SRAND( 0 ) ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * ELSE IF( LSAME( AFORM, 'C' ) ) THEN * * Generate the conjugate transpose of the matrix that would be * normally generated. * II = 1 * DO 290 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 280 IK = II, II + IB - 1 * JJ = 1 * DO 270 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 260 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 260 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 270 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 280 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 290 CONTINUE * ELSE IF( LSAME( AFORM, 'H' ) ) THEN * * Generate a Hermitian matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 370 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 360 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 350 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 300 IK = II, II + IB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 300 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 310 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 310 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 320 IK = II, ITMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 320 CONTINUE * IF( ITMP.LE.( II + IB - 1 ) ) THEN DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) A( ITMP, JK ) = CMPLX( REAL( DUMMY ), $ ZERO ) END IF * DO 330 IK = ITMP + 1, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 330 CONTINUE * END IF * ELSE * DO 340 IK = II, II + IB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ PB_SRAND( 0 ) ) 340 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 350 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 360 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 370 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 450 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 440 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 430 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 380 JK = JJ, JJ + JB - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 380 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 390 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 390 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 400 JK = JJ, JTMP - 1 DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 400 CONTINUE * IF( JTMP.LE.( JJ + JB - 1 ) ) THEN DUMMY = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) A( IK, JTMP ) = CMPLX( REAL( DUMMY ), $ ZERO ) END IF * DO 410 JK = JTMP + 1, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 410 CONTINUE * END IF * ELSE * DO 420 JK = JJ, JJ + JB - 1 A( IK, JK ) = CMPLX( PB_SRAND( 0 ), $ -PB_SRAND( 0 ) ) 420 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 430 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 440 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 450 CONTINUE * END IF * END IF * RETURN * * End of PB_CLAGEN * END REAL FUNCTION PB_SRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. External Functions .. REAL PB_SRAN EXTERNAL PB_SRAN * .. * .. Executable Statements .. * PB_SRAND = ONE - TWO * PB_SRAN( IDUMM ) * RETURN * * End of PB_SRAND * END REAL FUNCTION PB_SRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648E+9, $ POW16 = 6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_SRAN = ( REAL( IRAND( 1 ) ) + POW16 * REAL( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_SRAN * END scalapack-1.8.0/PBLAS/TESTING/PZBLAS2TST.dat0000640000175000017500000000645510363532303017446 0ustar muammarmuammar'Level 2 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS2TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0D0, -4.0D0) value of ALPHA (3.0D0, -2.0D0) value of BETA 4 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 values of M 8 10 12 14 16 18 20 22 24 9 11 13 15 17 19 21 values of N 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of M_A 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of N_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of M_X 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of N_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 30 1 30 1 30 1 30 1 30 1 30 1 30 1 30 1 values of INCX 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of M_Y 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of N_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 40 1 40 1 40 1 40 1 40 1 40 1 40 1 40 1 values of INCY PZGEMV T put F for no test in the same column PZHEMV T put F for no test in the same column PZTRMV T put F for no test in the same column PZTRSV T put F for no test in the same column PZGERU T put F for no test in the same column PZGERC T put F for no test in the same column PZHER T put F for no test in the same column PZHER2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/pzblas3tst.f0000640000175000017500000042120510363532303017556 0ustar muammarmuammar PROGRAM PZBLA3TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA3TST is the main testing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * * from the following 64 lines: * 'Level 3 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS3TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * (1.0D0, 0.0D0) value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PZGEMM T put F for no test in the same column * PZSYMM T put F for no test in the same column * PZHEMM T put F for no test in the same column * PZSYRK T put F for no test in the same column * PZHERK T put F for no test in the same column * PZSYR2K T put F for no test in the same column * PZHER2K T put F for no test in the same column * PZTRMM T put F for no test in the same column * PZTRSM T put F for no test in the same column * PZGEADD T put F for no test in the same column * PZTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, ZPLXSZ, TOTMEM, $ MEMSIZ, NSUBS, DBLESZ COMPLEX*16 ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ ZPLXSZ = 16, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / ZPLXSZ, DBLESZ = 8, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ), $ ROGUE = ( -1.0D+10, 1.0D+10 ), $ ONE = ( 1.0D+0, 0.0D+0 ), NSUBS = 11 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IGAP, IMBA, IMBB, $ IMBC, IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, $ IPA, IPB, IPC, IPG, IPMATA, IPMATB, IPMATC, $ IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, IPREC, $ IPW, IVERB, J, JA, JB, JC, K, L, LDA, LDB, LDC, $ M, MA, MB, MBA, MBB, MBC, MC, MEMREQD, MPA, $ MPB, MPC, MYCOL, MYROW, N, NA, NB, NBA, NBB, $ NBC, NC, NCOLA, NCOLB, NCOLC, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQA, NQB, NQC, NROWA, $ NROWB, NROWC, NTESTS, OFFDA, OFFDC, RSRCA, $ RSRCB, RSRCC, TSKIP, TSTCNT REAL THRESH COMPLEX*16 ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL BCHECK( NSUBS ), CCHECK( NSUBS ), $ LTEST( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCB( DLEN_ ), $ DESCBR( DLEN_ ), DESCC( DLEN_ ), $ DESCCR( DLEN_ ), IAVAL( MAXTESTS ), $ IBVAL( MAXTESTS ), ICVAL( MAXTESTS ), $ IERR( 6 ), IMBAVAL( MAXTESTS ), $ IMBBVAL( MAXTESTS ), IMBCVAL( MAXTESTS ), $ INBAVAL( MAXTESTS ), INBBVAL( MAXTESTS ), $ INBCVAL( MAXTESTS ), JAVAL( MAXTESTS ), $ JBVAL( MAXTESTS ), JCVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PZLAPRNT, PB_ZCHEKPAD, $ PB_ZFILLPAD, PB_ZLASCAL, PB_ZLASET, PMDESCCHK, $ PMDIMCHK, PZBLA3TSTINFO, PZBLAS3TSTCHK, $ PZBLAS3TSTCHKE, PZCHKARG3, PZCHKMOUT, PZGEADD, $ PZGEMM, PZHEMM, PZHER2K, PZHERK, PZIPSET, $ PZLAGEN, PZLASCAL, PZLASET, PZMPRNT, PZSYMM, $ PZSYR2K, PZSYRK, PZTRADD, PZTRMM, PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_FCEIL EXTERNAL PB_FCEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZGEMM ', 'PZSYMM ', 'PZHEMM ', $ 'PZSYRK ', 'PZHERK ', 'PZSYR2K', $ 'PZHER2K', 'PZTRMM ', 'PZTRSM ', $ 'PZGEADD', 'PZTRADD'/ DATA BCHECK/.TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .TRUE., $ .TRUE., .TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, * so that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA3TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, SOF, TEE, IAM, $ IGAP, IVERB, NPROCS, THRESH, ALPHA, BETA, $ MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PZBLAS3TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * TSTCNT = TSTCNT + 1 * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, IGAP, GAPMUL, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, IGAP, GAPMUL, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDB = MAX( 1, MB ) LDC = MAX( 1, MC ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREB IPC = IPB + DESCB( LLD_ )*NQB + IPOSTB + IPREC IPMATA = IPC + DESCC( LLD_ )*NQC + IPOSTC IPMATB = IPMATA + MA*NA IPMATC = IPMATB + MB*NB IPG = IPMATC + MAX( MB*NB, MC*NC ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * IPW = IPG + MAX( MAX( MAX( IMBA, MBA ), $ MAX( IMBB, MBB ) ), $ MAX( IMBC, MBC ) ) + MAX( M, MAX( N, K ) ) MEMREQD = IPW + PB_FCEIL( REAL( MAX( M, MAX( N, K ) ) ) * $ REAL( DBLESZ ), REAL( ZPLXSZ ) ) - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) SNAMES( L ) END IF * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PZGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF * ELSE IF( L.EQ.2 .OR. L.EQ.3 ) THEN * * PZSYMM, PZHEMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF * ELSE IF( L.EQ.4 .OR. L.EQ.5 ) THEN * * PZSYRK, PZHERK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 * ELSE IF( L.EQ.6 .OR. L.EQ.7 ) THEN * * PZSYR2K, PZHER2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF * ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 * ELSE IF( L.EQ.10 .OR. L.EQ.11 ) THEN * * PZGEADD, PZTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF * * Check special values of TRANSA for symmetric and * hermitian rank-k and rank-2k updates. * IF( L.EQ.4 .OR. L.EQ.6 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9975 ) 'TRANSA' KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9975 ) 'TRANSA' KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PZSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 ) THEN * * PZHEMM * AFORM = 'H' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.4 .OR. L.EQ.6 ) THEN * * PZSYRK, PZSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN * * PZHERK, PZHER2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'H' OFFDC = IC - JC * ELSE IF( ( L.EQ.9 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PZTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PZLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) * IF( BCHECK( L ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PZLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PZLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) * IF( BCHECK( L ) ) THEN CALL PB_DESCSET2( DESCBR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCBR, IBSEED, MEM( IPMATB ), $ DESCBR( LLD_ ) ) END IF * IF( CCHECK( L ) ) THEN * CALL PB_DESCSET2( DESCCR, MC, NC, IMBC, INBC, MBC, $ NBC, -1, -1, ICTXT, MAX( 1, MC ) ) CALL PZLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCCR, ICSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * ELSE * * If C is not needed, generate a copy of B instead * CALL PB_DESCSET2( DESCCR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCCR, IBSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A, B, C * IF( ( ( L.EQ.2 ).OR. ( L.EQ.3 ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric or Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PZLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PZLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) * END IF * ELSE IF( ( ( L.EQ.4 ).OR.( L.EQ.5 ).OR.( L.EQ.6 ).OR. $ ( L.EQ.7 ) ).AND. $ ( MAX( NROWC, NCOLC ).GT.1 ) ) THEN * * The distributed matrix C is symmetric or Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PZLASET( 'Upper', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC, JC+1, $ DESCC ) CALL PB_ZLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PZLASET( 'Lower', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC+1, JC, $ DESCC ) CALL PB_ZLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PZLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_ZLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PZLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_ZLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.9 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) CALL PZLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_ZLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PZLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_ZLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PZLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_ZLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.9 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) CALL PZLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_ZLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * ELSE IF( L.EQ.11 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix C is lower triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PZLASET( 'Upper', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC, $ JC+1, DESCC ) CALL PB_ZLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix C is upper triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PZLASET( 'Lower', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC+1, $ JC, DESCC ) CALL PB_ZLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * END IF * * Pad the guard zones of A, B and C * CALL PB_ZFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_ZFILLPAD( ICTXT, MPB, NQB, MEM( IPB-IPREB ), $ DESCB( LLD_ ), IPREB, IPOSTB, $ PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_ZFILLPAD( ICTXT, MPC, NQC, MEM( IPC-IPREC ), $ DESCC( LLD_ ), IPREC, IPOSTC, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PZCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PZLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, $ 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) END IF * IF( BCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PZLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, JB, $ DESCB, 0, 0, $ 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MB, NB, MEM( IPB ), 1, 1, DESCB, $ 0, 0, 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) END IF END IF * IF( CCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PZLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, JC, $ DESCC, 0, 0, $ 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MC, NC, MEM( IPC ), 1, 1, DESCC, $ 0, 0, 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) END IF END IF * * Call the Level 3 PBLAS routine * INFO = 0 IF( L.EQ.1 ) THEN * * Test PZGEMM * CALL PZGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.2 ) THEN * * Test PZSYMM * CALL PZSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.3 ) THEN * * Test PZHEMM * CALL PZIPSET( 'Bignum', NROWA, MEM( IPA ), IA, JA, $ DESCA ) * CALL PZHEMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * CALL PZIPSET( 'Zero', NROWA, MEM( IPA ), IA, JA, $ DESCA ) * ELSE IF( L.EQ.4 ) THEN * * Test PZSYRK * CALL PZSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.5 ) THEN * * Test PZHERK * IF( ( ( DCMPLX( DBLE( ALPHA ) ).NE.ZERO ).AND. $ ( K.NE.0 ) ).OR. $ ( DCMPLX( DBLE( BETA ) ).NE.ONE ) ) $ CALL PZIPSET( 'Bignum', N, MEM( IPC ), IC, JC, $ DESCC ) * CALL PZHERK( UPLO, TRANSA, N, K, DBLE( ALPHA ), $ MEM( IPA ), IA, JA, DESCA, DBLE( BETA ), $ MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.6 ) THEN * * Test PZSYR2K * CALL PZSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.7 ) THEN * * Test PZHER2K * IF( ( ( ALPHA.NE.ZERO ).AND.( K.NE.0 ) ).OR. $ ( DCMPLX( DBLE( BETA ) ).NE.ONE ) ) $ CALL PZIPSET( 'Bignum', N, MEM( IPC ), IC, JC, $ DESCC ) * CALL PZHER2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, DBLE( BETA ), MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.8 ) THEN * * Test PZTRMM * CALL PZTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * ELSE IF( L.EQ.9 ) THEN * * Test PZTRSM * CALL PZTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * * ELSE IF( L.EQ.10 ) THEN * * Test PZGEADD * CALL PZGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.11 ) THEN * * Test PZTRADD * CALL PZTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_ZCHEKPAD( ICTXT, SNAMES( L ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), $ IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_ZCHEKPAD( ICTXT, SNAMES( L ), MPB, NQB, $ MEM( IPB-IPREB ), DESCB( LLD_ ), $ IPREB, IPOSTB, PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_ZCHEKPAD( ICTXT, SNAMES( L ), MPC, NQC, $ MEM( IPC-IPREC ), DESCC( LLD_ ), $ IPREC, IPOSTC, PADVAL ) END IF * * Check the computations * CALL PZBLAS3TSTCHK( ICTXT, NOUT, L, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, $ MEM( IPMATA ), MEM( IPA ), IA, JA, $ DESCA, MEM( IPMATB ), MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, THRESH, $ ROGUE, MEM( IPG ), MEM( IPW ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PZCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Check input-only array arguments * CALL PZCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), $ MEM( IPA ), IA, JA, DESCA, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_A', $ SNAMES( L ) END IF * IF( BCHECK( L ) ) THEN CALL PZCHKMOUT( NROWB, NCOLB, MEM( IPMATB ), $ MEM( IPB ), IB, JB, DESCB, IERR( 5 ) ) IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_B', $ SNAMES( L ) END IF END IF * IF( CCHECK( L ) ) THEN CALL PZCHKMOUT( NROWC, NCOLC, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_C', $ SNAMES( L ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN KFAIL( L ) = KFAIL( L ) + 1 ERRFLG = .TRUE. IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) SNAMES( L ) ELSE KPASS( L ) = KPASS( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( L ) END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PZLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PZMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PZLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) END IF IF( BCHECK( L ) ) THEN IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MB, NB, $ MEM( IPMATB ), LDB, 0, 0, $ 'SERIAL_B' ) CALL PB_PZLAPRNT( MB, NB, MEM( IPB ), 1, 1, $ DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PZMPRNT( ICTXT, NOUT, NROWB, NCOLB, $ MEM( IPMATB+IB-1+(JB-1)*LDB ), $ LDB, 0, 0, 'SERIAL_B' ) CALL PB_PZLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, $ JB, DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) END IF END IF IF( CCHECK( L ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MC, NC, $ MEM( IPMATC ), LDC, 0, 0, $ 'SERIAL_C' ) CALL PB_PZLAPRNT( MC, NC, MEM( IPC ), 1, 1, $ DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PZMPRNT( ICTXT, NOUT, NROWC, NCOLC, $ MEM( IPMATC+IC-1+(JC-1)*LDC ), $ LDC, 0, 0, 'SERIAL_C' ) CALL PB_PZLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, $ JC, DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9982 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9981 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, ' Tested Subroutine: ', A ) 9985 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9983 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9982 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9981 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9980 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9979 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9978 FORMAT( 2X, 'Testing Summary') 9977 FORMAT( 2X, 'End of Tests.' ) 9976 FORMAT( 2X, 'Tests started.' ) 9975 FORMAT( 2X, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PZBLA3TST * END SUBROUTINE PZBLA3TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, SOF, $ TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA3TSTINFO get the needed startup information for testing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eleven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX*16 * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS+4 ) with NSUBS equal to 11. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS3TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 3 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex double precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9981 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PZBLA3TSTINFO * END SUBROUTINE PZBLAS3TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PZBLAS3TSTCHKE tests the error exits of the Level 3 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 11 (NSUBS). * If LTEST( 1 ) is .TRUE., PZGEMM will be tested; * If LTEST( 2 ) is .TRUE., PZSYMM will be tested; * If LTEST( 3 ) is .TRUE., PZHEMM will be tested; * If LTEST( 4 ) is .TRUE., PZSYRK will be tested; * If LTEST( 5 ) is .TRUE., PZHERK will be tested; * If LTEST( 6 ) is .TRUE., PZSYR2K will be tested; * If LTEST( 7 ) is .TRUE., PZHER2K will be tested; * If LTEST( 8 ) is .TRUE., PZTRMM will be tested; * If LTEST( 9 ) is .TRUE., PZTRSM will be tested; * If LTEST( 10 ) is .TRUE., PZGEADD will be tested; * If LTEST( 11 ) is .TRUE., PZTRADD will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PZDIMEE, PZGEADD, PZGEMM, $ PZHEMM, PZHER2K, PZHERK, PZMATEE, PZOPTEE, $ PZSYMM, PZSYR2K, PZSYRK, PZTRADD, PZTRMM, $ PZTRSM * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/31, 32, 32, 33, 34, 35, 36, 38, 38, 39, $ 40/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PZGEMM * I = 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZGEMM, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZGEMM, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZGEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZSYMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZSYMM, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZSYMM, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZSYMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHEMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHEMM, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHEMM, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZSYRK * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZSYRK, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZSYRK, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZSYRK, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHERK * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHERK, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHERK, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHERK, SCODE( I ), SNAMES( I ) ) END IF * * Test PZSYR2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZSYR2K, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZSYR2K, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZSYR2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHER2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHER2K, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHER2K, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHER2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PZTRMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZTRMM, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZTRMM, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZTRMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZTRSM * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZTRSM, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZTRSM, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZTRSM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZGEADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZGEADD, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZGEADD, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZGEADD, SCODE( I ), SNAMES( I ) ) END IF * * Test PZTRADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZTRADD, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZTRADD, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZTRADD, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PZBLAS3TSTCHKE * END SUBROUTINE PZCHKARG3( ICTXT, NOUT, SNAME, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, IA, JA, $ DESCA, IB, JB, DESCB, BETA, IC, JC, DESCC, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*7 SNAME INTEGER DESCA( * ), DESCB( * ), DESCC( * ) * .. * * Purpose * ======= * * PZCHKARG3 checks the input-only arguments of the Level 3 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies the SIDE option in the Level 3 PBLAS * operation. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 3 PBLAS * operation. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the TRANSA option in the Level 3 * PBLAS operation. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the TRANSB option in the Level 3 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 3 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * K (global input) INTEGER * On entry, K specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, SIDEREF, TRANSAREF, TRANSBREF, UPLOREF INTEGER I, IAREF, IBREF, ICREF, JAREF, JBREF, JCREF, $ KREF, MREF, MYCOL, MYROW, NPCOL, NPROW, NREF COMPLEX*16 ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCBREF( DLEN_ ), $ DESCCREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG SIDEREF = SIDE TRANSAREF = TRANSA TRANSBREF = TRANSB UPLOREF = UPLO MREF = M NREF = N KREF = K ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IBREF = IB JBREF = JB DO 20 I = 1, DLEN_ DESCBREF( I ) = DESCB( I ) 20 CONTINUE BETAREF = BETA ICREF = IC JCREF = JC DO 30 I = 1, DLEN_ DESCCREF( I ) = DESCC( I ) 30 CONTINUE * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( SIDE, SIDEREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'SIDE' ELSE IF( .NOT. LSAME( TRANSA, TRANSAREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSA' ELSE IF( .NOT. LSAME( TRANSB, TRANSBREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSB' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( K.NE.KREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'K' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IB.NE.IBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IB' ELSE IF( JB.NE.JBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JB' ELSE IF( DESCB( DTYPE_ ).NE.DESCBREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( DTYPE_ )' ELSE IF( DESCB( M_ ).NE.DESCBREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( M_ )' ELSE IF( DESCB( N_ ).NE.DESCBREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( N_ )' ELSE IF( DESCB( IMB_ ).NE.DESCBREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( IMB_ )' ELSE IF( DESCB( INB_ ).NE.DESCBREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( INB_ )' ELSE IF( DESCB( MB_ ).NE.DESCBREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( MB_ )' ELSE IF( DESCB( NB_ ).NE.DESCBREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( NB_ )' ELSE IF( DESCB( RSRC_ ).NE.DESCBREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( RSRC_ )' ELSE IF( DESCB( CSRC_ ).NE.DESCBREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CSRC_ )' ELSE IF( DESCB( CTXT_ ).NE.DESCBREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CTXT_ )' ELSE IF( DESCB( LLD_ ).NE.DESCBREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( LLD_ )' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IC.NE.ICREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IC' ELSE IF( JC.NE.JCREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JC' ELSE IF( DESCC( DTYPE_ ).NE.DESCCREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( DTYPE_ )' ELSE IF( DESCC( M_ ).NE.DESCCREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( M_ )' ELSE IF( DESCC( N_ ).NE.DESCCREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( N_ )' ELSE IF( DESCC( IMB_ ).NE.DESCCREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( IMB_ )' ELSE IF( DESCC( INB_ ).NE.DESCCREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( INB_ )' ELSE IF( DESCC( MB_ ).NE.DESCCREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( MB_ )' ELSE IF( DESCC( NB_ ).NE.DESCCREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( NB_ )' ELSE IF( DESCC( RSRC_ ).NE.DESCCREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( RSRC_ )' ELSE IF( DESCC( CSRC_ ).NE.DESCCREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CSRC_ )' ELSE IF( DESCC( CTXT_ ).NE.DESCCREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CTXT_ )' ELSE IF( DESCC( LLD_ ).NE.DESCCREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( LLD_ )' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PZCHKARG3 * END SUBROUTINE PZBLAS3TSTCHK( ICTXT, NOUT, NROUT, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, A, PA, IA, $ JA, DESCA, B, PB, IB, JB, DESCB, BETA, $ C, PC, IC, JC, DESCC, THRESH, ROGUE, $ WORK, RWORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT, NROUT REAL THRESH COMPLEX*16 ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), B( * ), C( * ), PA( * ), PB( * ), $ PC( * ), WORK( * ) * .. * * Purpose * ======= * * PZBLAS3TSTCHK performs the computational tests of the Level 3 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PZGEMM will be tested; * else if NROUT = 2, PZSYMM will be tested; * else if NROUT = 3, PZHEMM will be tested; * else if NROUT = 4, PZSYRK will be tested; * else if NROUT = 5, PZHERK will be tested; * else if NROUT = 6, PZSYR2K will be tested; * else if NROUT = 7, PZHER2K will be tested; * else if NROUT = 8, PZTRMM will be tested; * else if NROUT = 9, PZTRSM will be tested; * else if NROUT = 10, PZGEADD will be tested; * else if NROUT = 11, PZTRADD will be tested; * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies if the multiplication should be per- * formed from the left or the right. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * PB (local input) COMPLEX*16 array * On entry, PB is an array of dimension (DESCB( LLD_ ),*). This * array contains the local entries of the matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX*16 array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) COMPLEX*16 * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular, symmetric or Hermitian ma- * trices. * * WORK (workspace) COMPLEX*16 array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, MAX( N, K ) ). This array is used to store * a copy of a column of C (see PZMMCH). * * RWORK (workspace) DOUBLE PRECISION array * On entry, RWORK is an array of dimension LRWORK where LRWORK * is at least MAX( M, MAX( N, K ) ). This array is used to sto- * re the computed gauges (see PZMMCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on B has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on C has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D+0 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR COMPLEX*16 ALPHA1, BETA1 * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_ZLASET, PZCHKMIN, PZMMCH, $ PZMMCH1, PZMMCH2, PZMMCH3, PZTRMM, ZTRSM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PZGEMM * * Check the resulting matrix C * CALL PZMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, $ DESCC, WORK, RWORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, M, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, K, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF IF( LSAME( TRANSB, 'N' ) ) THEN CALL PZCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PZCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PZSYMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PZMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) ELSE CALL PZMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_ZLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_ZLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PZCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PZCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PZHEMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PZMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) ELSE CALL PZMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_ZLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_ZLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PZCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PZCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PZSYRK * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZMMCH1( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, $ WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PZMMCH1( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, IA, $ JA, DESCA, BETA, C, PC, IC, JC, DESCC, WORK, $ RWORK, ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PZHERK * * Check the resulting matrix C * BETA1 = DCMPLX( DBLE( BETA ), RZERO ) ALPHA1 = DCMPLX( DBLE( ALPHA ), RZERO ) IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZMMCH1( ICTXT, UPLO, 'Hermitian', N, K, ALPHA1, A, IA, $ JA, DESCA, BETA1, C, PC, IC, JC, DESCC, WORK, $ RWORK, ERR, IERR( 3 ) ) ELSE CALL PZMMCH1( ICTXT, UPLO, 'Conjugate transpose', N, K, $ ALPHA1, A, IA, JA, DESCA, BETA1, C, PC, IC, $ JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PZSYR2K * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZMMCH2( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PZMMCH2( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PZCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PZCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PZCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PZHER2K * * Check the resulting matrix C * BETA1 = DCMPLX( DBLE( BETA ), RZERO ) IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZMMCH2( ICTXT, UPLO, 'Hermitian', N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA1, C, PC, IC, $ JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PZMMCH2( ICTXT, UPLO, 'Conjugate transpose', N, K, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA1, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PZCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PZCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PZCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PZTRMM * * Check the resulting matrix B * IF( LSAME( SIDE, 'L' ) ) THEN CALL PZMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, C, IB, JB, DESCB, $ ZERO, B, PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) ELSE CALL PZMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, $ ALPHA, C, IB, JB, DESCB, A, IA, JA, DESCA, $ ZERO, B, PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.9 ) THEN * * Test PZTRSM * * Check the resulting matrix B * CALL ZTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ), $ B( IB+(JB-1)*DESCB( M_ ) ), DESCB( M_ ) ) CALL PZTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, PA, IA, JA, $ DESCA, PB, IB, JB, DESCB ) IF( LSAME( SIDE, 'L' ) ) THEN CALL PZMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, ALPHA, $ A, IA, JA, DESCA, B, IB, JB, DESCB, ZERO, C, $ PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) ELSE CALL PZMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, ALPHA, $ B, IB, JB, DESCB, A, IA, JA, DESCA, ZERO, C, $ PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF ELSE IF( NROUT.EQ.10 ) THEN * * Test PZGEADD * * Check the resulting matrix C * CALL PZMMCH3( 'All', TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.11 ) THEN * * Test PZTRADD * * Check the resulting matrix C * CALL PZMMCH3( UPLO, TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PZCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PZCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'B' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'C' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9997 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PZBLAS3TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/slamch.f0000640000175000017500000006037410363532303016722 0ustar muammarmuammar REAL FUNCTION SLAMCH( CMACH ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. CHARACTER CMACH * .. * * Purpose * ======= * * SLAMCH determines single precision machine parameters. * * Arguments * ========= * * CMACH (input) CHARACTER*1 * Specifies the value to be returned by SLAMCH: * = 'E' or 'e', SLAMCH := eps * = 'S' or 's , SLAMCH := sfmin * = 'B' or 'b', SLAMCH := base * = 'P' or 'p', SLAMCH := eps*base * = 'N' or 'n', SLAMCH := t * = 'R' or 'r', SLAMCH := rnd * = 'M' or 'm', SLAMCH := emin * = 'U' or 'u', SLAMCH := rmin * = 'L' or 'l', SLAMCH := emax * = 'O' or 'o', SLAMCH := rmax * * where * * eps = relative machine precision * sfmin = safe minimum, such that 1/sfmin does not overflow * base = base of the machine * prec = eps*base * t = number of (base) digits in the mantissa * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise * emin = minimum exponent before (gradual) underflow * rmin = underflow threshold - base**(emin-1) * emax = largest exponent before overflow * rmax = overflow threshold - (base**emax)*(1-eps) * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL FIRST, LRND INTEGER BETA, IMAX, IMIN, IT REAL BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, $ RND, SFMIN, SMALL, T * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL SLAMC2 * .. * .. Save statement .. SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, $ EMAX, RMAX, PREC * .. * .. Data statements .. DATA FIRST / .TRUE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. CALL SLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) BASE = BETA T = IT IF( LRND ) THEN RND = ONE EPS = ( BASE**( 1-IT ) ) / 2 ELSE RND = ZERO EPS = BASE**( 1-IT ) END IF PREC = EPS*BASE EMIN = IMIN EMAX = IMAX SFMIN = RMIN SMALL = ONE / RMAX IF( SMALL.GE.SFMIN ) THEN * * Use SMALL plus a bit, to avoid the possibility of rounding * causing overflow when computing 1/sfmin. * SFMIN = SMALL*( ONE+EPS ) END IF END IF * IF( LSAME( CMACH, 'E' ) ) THEN RMACH = EPS ELSE IF( LSAME( CMACH, 'S' ) ) THEN RMACH = SFMIN ELSE IF( LSAME( CMACH, 'B' ) ) THEN RMACH = BASE ELSE IF( LSAME( CMACH, 'P' ) ) THEN RMACH = PREC ELSE IF( LSAME( CMACH, 'N' ) ) THEN RMACH = T ELSE IF( LSAME( CMACH, 'R' ) ) THEN RMACH = RND ELSE IF( LSAME( CMACH, 'M' ) ) THEN RMACH = EMIN ELSE IF( LSAME( CMACH, 'U' ) ) THEN RMACH = RMIN ELSE IF( LSAME( CMACH, 'L' ) ) THEN RMACH = EMAX ELSE IF( LSAME( CMACH, 'O' ) ) THEN RMACH = RMAX END IF * SLAMCH = RMACH RETURN * * End of SLAMCH * END * ************************************************************************ * SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL IEEE1, RND INTEGER BETA, T * .. * * Purpose * ======= * * SLAMC1 determines the machine parameters given by BETA, T, RND, and * IEEE1. * * Arguments * ========= * * BETA (output) INTEGER * The base of the machine. * * T (output) INTEGER * The number of ( BETA ) digits in the mantissa. * * RND (output) LOGICAL * Specifies whether proper rounding ( RND = .TRUE. ) or * chopping ( RND = .FALSE. ) occurs in addition. This may not * be a reliable guide to the way in which the machine performs * its arithmetic. * * IEEE1 (output) LOGICAL * Specifies whether rounding appears to be done in the IEEE * 'round to nearest' style. * * Further Details * =============== * * The routine is based on the routine ENVRON by Malcolm and * incorporates suggestions by Gentleman and Marovich. See * * Malcolm M. A. (1972) Algorithms to reveal properties of * floating-point arithmetic. Comms. of the ACM, 15, 949-951. * * Gentleman W. M. and Marovich S. B. (1974) More on algorithms * that reveal properties of floating point arithmetic units. * Comms. of the ACM, 17, 276-277. * * ===================================================================== * * .. Local Scalars .. LOGICAL FIRST, LIEEE1, LRND INTEGER LBETA, LT REAL A, B, C, F, ONE, QTR, SAVEC, T1, T2 * .. * .. External Functions .. REAL SLAMC3 EXTERNAL SLAMC3 * .. * .. Save statement .. SAVE FIRST, LIEEE1, LBETA, LRND, LT * .. * .. Data statements .. DATA FIRST / .TRUE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. ONE = 1 * * LBETA, LIEEE1, LT and LRND are the local values of BETA, * IEEE1, T and RND. * * Throughout this routine we use the function SLAMC3 to ensure * that relevant values are stored and not held in registers, or * are not affected by optimizers. * * Compute a = 2.0**m with the smallest positive integer m such * that * * fl( a + 1.0 ) = a. * A = 1 C = 1 * *+ WHILE( C.EQ.ONE )LOOP 10 CONTINUE IF( C.EQ.ONE ) THEN A = 2*A C = SLAMC3( A, ONE ) C = SLAMC3( C, -A ) GO TO 10 END IF *+ END WHILE * * Now compute b = 2.0**m with the smallest positive integer m * such that * * fl( a + b ) .gt. a. * B = 1 C = SLAMC3( A, B ) * *+ WHILE( C.EQ.A )LOOP 20 CONTINUE IF( C.EQ.A ) THEN B = 2*B C = SLAMC3( A, B ) GO TO 20 END IF *+ END WHILE * * Now compute the base. a and c are neighbouring floating point * numbers in the interval ( beta**t, beta**( t + 1 ) ) and so * their difference is beta. Adding 0.25 to c is to ensure that it * is truncated to beta and not ( beta - 1 ). * QTR = ONE / 4 SAVEC = C C = SLAMC3( C, -A ) LBETA = C + QTR * * Now determine whether rounding or chopping occurs, by adding a * bit less than beta/2 and a bit more than beta/2 to a. * B = LBETA F = SLAMC3( B / 2, -B / 100 ) C = SLAMC3( F, A ) IF( C.EQ.A ) THEN LRND = .TRUE. ELSE LRND = .FALSE. END IF F = SLAMC3( B / 2, B / 100 ) C = SLAMC3( F, A ) IF( ( LRND ) .AND. ( C.EQ.A ) ) $ LRND = .FALSE. * * Try and decide whether rounding is done in the IEEE 'round to * nearest' style. B/2 is half a unit in the last place of the two * numbers A and SAVEC. Furthermore, A is even, i.e. has last bit * zero, and SAVEC is odd. Thus adding B/2 to A should not change * A, but adding B/2 to SAVEC should change SAVEC. * T1 = SLAMC3( B / 2, A ) T2 = SLAMC3( B / 2, SAVEC ) LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND * * Now find the mantissa, t. It should be the integer part of * log to the base beta of a, however it is safer to determine t * by powering. So we find t as the smallest positive integer for * which * * fl( beta**t + 1.0 ) = 1.0. * LT = 0 A = 1 C = 1 * *+ WHILE( C.EQ.ONE )LOOP 30 CONTINUE IF( C.EQ.ONE ) THEN LT = LT + 1 A = A*LBETA C = SLAMC3( A, ONE ) C = SLAMC3( C, -A ) GO TO 30 END IF *+ END WHILE * END IF * BETA = LBETA T = LT RND = LRND IEEE1 = LIEEE1 RETURN * * End of SLAMC1 * END * ************************************************************************ * SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL RND INTEGER BETA, EMAX, EMIN, T REAL EPS, RMAX, RMIN * .. * * Purpose * ======= * * SLAMC2 determines the machine parameters specified in its argument * list. * * Arguments * ========= * * BETA (output) INTEGER * The base of the machine. * * T (output) INTEGER * The number of ( BETA ) digits in the mantissa. * * RND (output) LOGICAL * Specifies whether proper rounding ( RND = .TRUE. ) or * chopping ( RND = .FALSE. ) occurs in addition. This may not * be a reliable guide to the way in which the machine performs * its arithmetic. * * EPS (output) REAL * The smallest positive number such that * * fl( 1.0 - EPS ) .LT. 1.0, * * where fl denotes the computed value. * * EMIN (output) INTEGER * The minimum exponent before (gradual) underflow occurs. * * RMIN (output) REAL * The smallest normalized number for the machine, given by * BASE**( EMIN - 1 ), where BASE is the floating point value * of BETA. * * EMAX (output) INTEGER * The maximum exponent before overflow occurs. * * RMAX (output) REAL * The largest positive number for the machine, given by * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point * value of BETA. * * Further Details * =============== * * The computation of EPS is based on a routine PARANOIA by * W. Kahan of the University of California at Berkeley. * * ===================================================================== * * .. Local Scalars .. LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, $ NGNMIN, NGPMIN REAL A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, $ SIXTH, SMALL, THIRD, TWO, ZERO * .. * .. External Functions .. REAL SLAMC3 EXTERNAL SLAMC3 * .. * .. External Subroutines .. EXTERNAL SLAMC1, SLAMC4, SLAMC5 * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Save statement .. SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, $ LRMIN, LT * .. * .. Data statements .. DATA FIRST / .TRUE. / , IWARN / .FALSE. / * .. * .. Executable Statements .. * IF( FIRST ) THEN FIRST = .FALSE. ZERO = 0 ONE = 1 TWO = 2 * * LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of * BETA, T, RND, EPS, EMIN and RMIN. * * Throughout this routine we use the function SLAMC3 to ensure * that relevant values are stored and not held in registers, or * are not affected by optimizers. * * SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. * CALL SLAMC1( LBETA, LT, LRND, LIEEE1 ) * * Start to find EPS. * B = LBETA A = B**( -LT ) LEPS = A * * Try some tricks to see whether or not this is the correct EPS. * B = TWO / 3 HALF = ONE / 2 SIXTH = SLAMC3( B, -HALF ) THIRD = SLAMC3( SIXTH, SIXTH ) B = SLAMC3( THIRD, -HALF ) B = SLAMC3( B, SIXTH ) B = ABS( B ) IF( B.LT.LEPS ) $ B = LEPS * LEPS = 1 * *+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP 10 CONTINUE IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN LEPS = B C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) C = SLAMC3( HALF, -C ) B = SLAMC3( HALF, C ) C = SLAMC3( HALF, -B ) B = SLAMC3( HALF, C ) GO TO 10 END IF *+ END WHILE * IF( A.LT.LEPS ) $ LEPS = A * * Computation of EPS complete. * * Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). * Keep dividing A by BETA until (gradual) underflow occurs. This * is detected when we cannot recover the previous A. * RBASE = ONE / LBETA SMALL = ONE DO 20 I = 1, 3 SMALL = SLAMC3( SMALL*RBASE, ZERO ) 20 CONTINUE A = SLAMC3( ONE, SMALL ) CALL SLAMC4( NGPMIN, ONE, LBETA ) CALL SLAMC4( NGNMIN, -ONE, LBETA ) CALL SLAMC4( GPMIN, A, LBETA ) CALL SLAMC4( GNMIN, -A, LBETA ) IEEE = .FALSE. * IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN IF( NGPMIN.EQ.GPMIN ) THEN LEMIN = NGPMIN * ( Non twos-complement machines, no gradual underflow; * e.g., VAX ) ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN LEMIN = NGPMIN - 1 + LT IEEE = .TRUE. * ( Non twos-complement machines, with gradual underflow; * e.g., IEEE standard followers ) ELSE LEMIN = MIN( NGPMIN, GPMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN LEMIN = MAX( NGPMIN, NGNMIN ) * ( Twos-complement machines, no gradual underflow; * e.g., CYBER 205 ) ELSE LEMIN = MIN( NGPMIN, NGNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. $ ( GPMIN.EQ.GNMIN ) ) THEN IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT * ( Twos-complement machines with gradual underflow; * no known machine ) ELSE LEMIN = MIN( NGPMIN, NGNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF * ELSE LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) * ( A guess; no known machine ) IWARN = .TRUE. END IF *** * Comment out this if block if EMIN is ok IF( IWARN ) THEN FIRST = .TRUE. WRITE( 6, FMT = 9999 )LEMIN END IF *** * * Assume IEEE arithmetic if we found denormalised numbers above, * or if arithmetic seems to round in the IEEE style, determined * in routine SLAMC1. A true IEEE machine should have both things * true; however, faulty machines may have one or the other. * IEEE = IEEE .OR. LIEEE1 * * Compute RMIN by successive division by BETA. We could compute * RMIN as BASE**( EMIN - 1 ), but some machines underflow during * this computation. * LRMIN = 1 DO 30 I = 1, 1 - LEMIN LRMIN = SLAMC3( LRMIN*RBASE, ZERO ) 30 CONTINUE * * Finally, call SLAMC5 to compute EMAX and RMAX. * CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) END IF * BETA = LBETA T = LT RND = LRND EPS = LEPS EMIN = LEMIN RMIN = LRMIN EMAX = LEMAX RMAX = LRMAX * RETURN * 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', $ ' EMIN = ', I8, / $ ' If, after inspection, the value EMIN looks', $ ' acceptable please comment out ', $ / ' the IF block as marked within the code of routine', $ ' SLAMC2,', / ' otherwise supply EMIN explicitly.', / ) * * End of SLAMC2 * END * ************************************************************************ * REAL FUNCTION SLAMC3( A, B ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. REAL A, B * .. * * Purpose * ======= * * SLAMC3 is intended to force A and B to be stored prior to doing * the addition of A and B , for use in situations where optimizers * might hold one of these in a register. * * Arguments * ========= * * A, B (input) REAL * The values A and B. * * ===================================================================== * * .. Executable Statements .. * SLAMC3 = A + B * RETURN * * End of SLAMC3 * END * ************************************************************************ * SUBROUTINE SLAMC4( EMIN, START, BASE ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. INTEGER BASE, EMIN REAL START * .. * * Purpose * ======= * * SLAMC4 is a service routine for SLAMC2. * * Arguments * ========= * * EMIN (output) EMIN * The minimum exponent before (gradual) underflow, computed by * setting A = START and dividing by BASE until the previous A * can not be recovered. * * START (input) REAL * The starting point for determining EMIN. * * BASE (input) INTEGER * The base of the machine. * * ===================================================================== * * .. Local Scalars .. INTEGER I REAL A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO * .. * .. External Functions .. REAL SLAMC3 EXTERNAL SLAMC3 * .. * .. Executable Statements .. * A = START ONE = 1 RBASE = ONE / BASE ZERO = 0 EMIN = 1 B1 = SLAMC3( A*RBASE, ZERO ) C1 = A C2 = A D1 = A D2 = A *+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. * $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP 10 CONTINUE IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. $ ( D2.EQ.A ) ) THEN EMIN = EMIN - 1 A = B1 B1 = SLAMC3( A / BASE, ZERO ) C1 = SLAMC3( B1*BASE, ZERO ) D1 = ZERO DO 20 I = 1, BASE D1 = D1 + B1 20 CONTINUE B2 = SLAMC3( A*RBASE, ZERO ) C2 = SLAMC3( B2 / RBASE, ZERO ) D2 = ZERO DO 30 I = 1, BASE D2 = D2 + B2 30 CONTINUE GO TO 10 END IF *+ END WHILE * RETURN * * End of SLAMC4 * END * ************************************************************************ * SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) * * -- LAPACK auxiliary routine (version 2.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * October 31, 1992 * * .. Scalar Arguments .. LOGICAL IEEE INTEGER BETA, EMAX, EMIN, P REAL RMAX * .. * * Purpose * ======= * * SLAMC5 attempts to compute RMAX, the largest machine floating-point * number, without overflow. It assumes that EMAX + abs(EMIN) sum * approximately to a power of 2. It will fail on machines where this * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, * EMAX = 28718). It will also fail if the value supplied for EMIN is * too large (i.e. too close to zero), probably with overflow. * * Arguments * ========= * * BETA (input) INTEGER * The base of floating-point arithmetic. * * P (input) INTEGER * The number of base BETA digits in the mantissa of a * floating-point value. * * EMIN (input) INTEGER * The minimum exponent before (gradual) underflow. * * IEEE (input) LOGICAL * A logical flag specifying whether or not the arithmetic * system is thought to comply with the IEEE standard. * * EMAX (output) INTEGER * The largest exponent before overflow * * RMAX (output) REAL * The largest machine floating-point number. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP REAL OLDY, RECBAS, Y, Z * .. * .. External Functions .. REAL SLAMC3 EXTERNAL SLAMC3 * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * First compute LEXP and UEXP, two powers of 2 that bound * abs(EMIN). We then assume that EMAX + abs(EMIN) will sum * approximately to the bound that is closest to abs(EMIN). * (EMAX is the exponent of the required number RMAX). * LEXP = 1 EXBITS = 1 10 CONTINUE TRY = LEXP*2 IF( TRY.LE.( -EMIN ) ) THEN LEXP = TRY EXBITS = EXBITS + 1 GO TO 10 END IF IF( LEXP.EQ.-EMIN ) THEN UEXP = LEXP ELSE UEXP = TRY EXBITS = EXBITS + 1 END IF * * Now -LEXP is less than or equal to EMIN, and -UEXP is greater * than or equal to EMIN. EXBITS is the number of bits needed to * store the exponent. * IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN EXPSUM = 2*LEXP ELSE EXPSUM = 2*UEXP END IF * * EXPSUM is the exponent range, approximately equal to * EMAX - EMIN + 1 . * EMAX = EXPSUM + EMIN - 1 NBITS = 1 + EXBITS + P * * NBITS is the total number of bits needed to store a * floating-point number. * IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN * * Either there are an odd number of bits used to store a * floating-point number, which is unlikely, or some bits are * not used in the representation of numbers, which is possible, * (e.g. Cray machines) or the mantissa has an implicit bit, * (e.g. IEEE machines, Dec Vax machines), which is perhaps the * most likely. We have to assume the last alternative. * If this is true, then we need to reduce EMAX by one because * there must be some way of representing zero in an implicit-bit * system. On machines like Cray, we are reducing EMAX by one * unnecessarily. * EMAX = EMAX - 1 END IF * IF( IEEE ) THEN * * Assume we are on an IEEE machine which reserves one exponent * for infinity and NaN. * EMAX = EMAX - 1 END IF * * Now create RMAX, the largest machine number, which should * be equal to (1.0 - BETA**(-P)) * BETA**EMAX . * * First compute 1.0 - BETA**(-P), being careful that the * result is less than 1.0 . * RECBAS = ONE / BETA Z = BETA - ONE Y = ZERO DO 20 I = 1, P Z = Z*RECBAS IF( Y.LT.ONE ) $ OLDY = Y Y = SLAMC3( Y, Z ) 20 CONTINUE IF( Y.GE.ONE ) $ Y = OLDY * * Now multiply by BETA**EMAX to get RMAX. * DO 30 I = 1, EMAX Y = SLAMC3( Y*BETA, ZERO ) 30 CONTINUE * RMAX = Y RETURN * * End of SLAMC5 * END scalapack-1.8.0/PBLAS/TESTING/PB_Cabort.c0000640000175000017500000001024710363532303017235 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cabort( int ICTXT, char * ROUT, int INFO ) #else void PB_Cabort( ICTXT, ROUT, INFO ) /* * .. Scalar Arguments .. */ int ICTXT, INFO; /* * .. Array Arguments .. */ char * ROUT; #endif { /* * Purpose * ======= * * PB_Cabort is an error handler for the PBLAS routines. This routine * displays an error message on stderr by calling PB_Cwarn, and halts * execution by calling Cblacs_abort(). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * INFO (local input) INTEGER * The error code computed by the calling PBLAS routine. * = 0: no error found * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int mycol, myrow, npcol, nprow; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &INFO ) ) return; #endif if( INFO < 0 ) { /* * Display an error message */ if( INFO < DESCMULT ) PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d had an illegal value", -INFO ); else PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d, entry number %d had an illegal value", (-INFO) / DESCMULT, (-INFO) % DESCMULT ); } else { /* * Error code is incorrect, it should be negative */ PB_Cwarn( ICTXT, -1, ROUT, "Positive error code %d returned by %s!!!", INFO ); } Cblacs_abort( ICTXT, INFO ); /* * End of PB_Cabort */ } scalapack-1.8.0/PBLAS/TESTING/PB_Cwarn.c0000640000175000017500000001125110363532303017071 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cwarn( int ICTXT, int LINE, char * ROUT, char * FORM, ... ) #else void PB_Cwarn( va_alist ) va_dcl #endif { /* * Purpose * ======= * * PB_Cwarn is an error handler for the PBLAS routines. This routine * displays an error message on stderr. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * LINE (local input) INTEGER * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a valid line number, * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * FORM (local input) pointer to CHAR * On entry, FORM is a control string specifying the format * conversion of its following arguments. * * ... (local input) * On entry, FORM is a control string specifying the format * On entry, the expressions that are to be evaluated and con- * verted according to the formats in the control string FORM * and then placed in the output stream. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ va_list argptr; int iam, mycol, myrow, npcol, nprow; char cline[100]; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif #ifdef __STDC__ va_start( argptr, FORM ); #else char * ROUT, * FORM; int ICTXT, LINE; /* .. * .. Executable Statements .. * */ va_start( argptr ); ICTXT = va_arg( argptr, int ); LINE = va_arg( argptr, int ); ROUT = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &ICTXT ) ) return; #endif vsprintf( cline, FORM, argptr ); va_end( argptr ); Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); if( nprow != -1 ) iam = Cblacs_pnum( ICTXT, myrow, mycol ); else iam = -1; /* * Display an error message */ if( LINE <= 0 ) (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", in routine ", ROUT ); else (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", on line ", LINE, " of routine ", ROUT ); /* * End of PB_Cwarn */ } scalapack-1.8.0/PBLAS/TESTING/PCBLAS2TST.dat0000640000175000017500000000645510363532303017417 0ustar muammarmuammar'Level 2 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS2TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0E0, -4.0E0) value of ALPHA (3.0E0, -2.0E0) value of BETA 4 number of tests problems 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'U' 'U' 'U' 'U' 'U' 'U' 'U' 'U' values of UPLO 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANS 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' 'N' 'U' values of DIAG 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 values of M 8 10 12 14 16 18 20 22 24 9 11 13 15 17 19 21 values of N 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of M_A 31 33 35 36 34 32 30 31 33 35 36 34 32 30 38 40 values of N_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_A 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_A 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IA 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JA 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of M_X 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 values of N_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_X 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_X 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IX 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JX 30 1 30 1 30 1 30 1 30 1 30 1 30 1 30 1 values of INCX 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of M_Y 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 values of N_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of IMB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of INB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of MB_Y 1 2 3 4 5 6 7 8 9 8 7 5 4 3 5 6 values of NB_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of RSRC_Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 values of CSRC_Y 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of IY 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 values of JY 40 1 40 1 40 1 40 1 40 1 40 1 40 1 40 1 values of INCY PCGEMV T put F for no test in the same column PCHEMV T put F for no test in the same column PCTRMV T put F for no test in the same column PCTRSV T put F for no test in the same column PCGERU T put F for no test in the same column PCGERC T put F for no test in the same column PCHER T put F for no test in the same column PCHER2 T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/psblastst.f0000640000175000017500000135404610363532303017475 0ustar muammarmuammar SUBROUTINE PSOPTEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSOPTEE tests whether the PBLAS respond correctly to a bad option * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PSCHKOPT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2nd option * APOS = 2 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 3rd option * APOS = 3 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2'nd option * APOS = 2 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 .OR. SCODE.EQ.40 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2'nd option * APOS = 2 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 3rd option * APOS = 3 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 4th option * APOS = 4 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st option * APOS = 1 CALL PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * END IF * RETURN * * End of PSOPTEE * END SUBROUTINE PSCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSCHKOPT tests the option ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the option to be * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PSCALLSUB, PSSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PSSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'D' ) ) THEN * * Generate bad DIAG option * DIAG = '/' * ELSE IF( LSAME( ARGNAM, 'S' ) ) THEN * * Generate bad SIDE option * SIDE = '/' * ELSE IF( LSAME( ARGNAM, 'A' ) ) THEN * * Generate bad TRANSA option * TRANSA = '/' * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Generate bad TRANSB option * TRANSB = '/' * ELSE IF( LSAME( ARGNAM, 'U' ) ) THEN * * Generate bad UPLO option * UPLO = '/' * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PSCHKOPT * END SUBROUTINE PSDIMEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSDIMEE tests whether the PBLAS respond correctly to a bad dimension * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PSCHKDIM * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 .OR. SCODE.EQ.12 .OR. SCODE.EQ.13 .OR. $ SCODE.EQ.14 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) dimension * APOS = 1 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st dimension * APOS = 2 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) dimension * APOS = 2 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) dimension * APOS = 4 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.24 ) THEN * * Check 1st dimension * APOS = 1 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 3rd dimension * APOS = 5 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 ) THEN * * Check 1st dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 2nd dimension * APOS = 4 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st dimension * APOS = 1 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st dimension * APOS = 5 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 6 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st dimension * APOS = 2 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st dimension * APOS = 3 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * END IF * RETURN * * End of PSDIMEE * END SUBROUTINE PSCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSCHKDIM tests the dimension ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the dimension to be * checked. ARGNAM can either be 'M', 'N' or 'K'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PSCALLSUB, PSSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER KDIM, MDIM, NDIM COMMON /PBLASN/KDIM, MDIM, NDIM * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PSSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'M' ) ) THEN * * Generate bad MDIM * MDIM = -1 * ELSE IF( LSAME( ARGNAM, 'N' ) ) THEN * * Generate bad NDIM * NDIM = -1 * ELSE * * Generate bad KDIM * KDIM = -1 * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PSCHKDIM * END SUBROUTINE PSVECEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSVECEE tests whether the PBLAS respond correctly to a bad vector * argument. Each vector is described by: , I, J, * DESC, INC. Out of all these, only I, J, * DESC, and INC can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PSCHKMAT * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * * Check 1st vector * APOS = 2 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 7 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.12 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) vector * APOS = 3 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.13 ) THEN * * Check 1st vector * APOS = 3 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 8 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.14 ) THEN * * Check 1st (and only) vector * APOS = 4 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st vector * APOS = 9 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 15 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st vector * APOS = 8 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 14 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) vector * APOS = 9 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st vector * APOS = 4 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 9 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.26 .OR. SCODE.EQ.27 ) THEN * * Check 1'st (and only) vector * APOS = 4 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * END IF * RETURN * * End of PSVECEE * END SUBROUTINE PSMATEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSMATEE tests whether the PBLAS respond correctly to a bad matrix * argument. Each matrix is described by: , I, J, * and DESC. Out of all these, only I, J and DESC * can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PSCHKMAT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 .OR. SCODE.EQ.23 ) THEN * * Check 1st (and only) matrix * APOS = 5 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st (and only) matrix * APOS = 4 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st (and only) matrix * APOS = 14 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.25 .OR. SCODE.EQ.26 ) THEN * * Check 1st (and only) matrix * APOS = 9 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st matrix * APOS = 7 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 16 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.32 .OR. SCODE.EQ.35 .OR. SCODE.EQ.36 ) THEN * * Check 1st matrix * APOS = 6 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 15 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 ) THEN * * Check 1st matrix * APOS = 6 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st matrix * APOS = 4 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 9 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st matrix * APOS = 8 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 12 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st matrix * APOS = 5 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st matrix * APOS = 6 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * END IF * RETURN * * End of PSMATEE * END SUBROUTINE PSSETPBLAS( ICTXT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT * .. * * Purpose * ======= * * PSSETPBLAS initializes *all* the dummy arguments to correct values. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. External Subroutines .. EXTERNAL PB_DESCSET2 * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM REAL USCLR, SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Set default values for options * DIAG = 'N' SIDE = 'L' TRANSA = 'N' TRANSB = 'N' UPLO = 'U' * * Set default values for scalars * KDIM = 1 MDIM = 1 NDIM = 1 ISCLR = 1 SCLR = ONE USCLR = ONE * * Set default values for distributed matrix A * A( 1, 1 ) = ONE A( 2, 1 ) = ONE A( 1, 2 ) = ONE A( 2, 2 ) = ONE IA = 1 JA = 1 CALL PB_DESCSET2( DESCA, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix B * B( 1, 1 ) = ONE B( 2, 1 ) = ONE B( 1, 2 ) = ONE B( 2, 2 ) = ONE IB = 1 JB = 1 CALL PB_DESCSET2( DESCB, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix C * C( 1, 1 ) = ONE C( 2, 1 ) = ONE C( 1, 2 ) = ONE C( 2, 2 ) = ONE IC = 1 JC = 1 CALL PB_DESCSET2( DESCC, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix X * X( 1 ) = ONE X( 2 ) = ONE IX = 1 JX = 1 CALL PB_DESCSET2( DESCX, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCX = 1 * * Set default values for distributed matrix Y * Y( 1 ) = ONE Y( 2 ) = ONE IY = 1 JY = 1 CALL PB_DESCSET2( DESCY, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCY = 1 * RETURN * * End of PSSETPBLAS * END SUBROUTINE PSCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the matrix or vector * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one * wants to check a matrix, and 'X' or 'Y' for a vector. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the first argument * of the matrix (or vector) ARGNAM. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT PARAMETER ( DESCMULT = 100 ) * .. * .. Local Scalars .. INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCHKPBE, PSCALLSUB, PSSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ARGNAM, 'A' ) ) THEN * * Check IA. Set all other OK, bad IA * CALL PSSETPBLAS( ICTXT ) IA = -1 INFOT = ARGPOS + 1 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JA. Set all other OK, bad JA * CALL PSSETPBLAS( ICTXT ) JA = -1 INFOT = ARGPOS + 2 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCA. Set all other OK, bad DESCA * DO 10 I = 1, DLEN_ * * Set I'th entry of DESCA to incorrect value, rest ok. * CALL PSSETPBLAS( ICTXT ) DESCA( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCA, CSRCA, LDA * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PSSETPBLAS( ICTXT ) * * Test RSRCA >= NPROW * IF( I.EQ.RSRC_ ) $ DESCA( I ) = NPROW * * Test CSRCA >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCA( I ) = NPCOL * * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCA( I ) = 1 ELSE DESCA( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 10 CONTINUE * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Check IB. Set all other OK, bad IB * CALL PSSETPBLAS( ICTXT ) IB = -1 INFOT = ARGPOS + 1 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JB. Set all other OK, bad JB * CALL PSSETPBLAS( ICTXT ) JB = -1 INFOT = ARGPOS + 2 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCB. Set all other OK, bad DESCB * DO 20 I = 1, DLEN_ * * Set I'th entry of DESCB to incorrect value, rest ok. * CALL PSSETPBLAS( ICTXT ) DESCB( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCB, CSRCB, LDB * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PSSETPBLAS( ICTXT ) * * Test RSRCB >= NPROW * IF( I.EQ.RSRC_ ) $ DESCB( I ) = NPROW * * Test CSRCB >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCB( I ) = NPCOL * * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCB( I ) = 1 ELSE DESCB( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 20 CONTINUE * ELSE IF( LSAME( ARGNAM, 'C' ) ) THEN * * Check IC. Set all other OK, bad IC * CALL PSSETPBLAS( ICTXT ) IC = -1 INFOT = ARGPOS + 1 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JC. Set all other OK, bad JC * CALL PSSETPBLAS( ICTXT ) JC = -1 INFOT = ARGPOS + 2 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCC. Set all other OK, bad DESCC * DO 30 I = 1, DLEN_ * * Set I'th entry of DESCC to incorrect value, rest ok. * CALL PSSETPBLAS( ICTXT ) DESCC( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCC, CSRCC, LDC * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PSSETPBLAS( ICTXT ) * * Test RSRCC >= NPROW * IF( I.EQ.RSRC_ ) $ DESCC( I ) = NPROW * * Test CSRCC >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCC( I ) = NPCOL * * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCC( I ) = 1 ELSE DESCC( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 30 CONTINUE * ELSE IF( LSAME( ARGNAM, 'X' ) ) THEN * * Check IX. Set all other OK, bad IX * CALL PSSETPBLAS( ICTXT ) IX = -1 INFOT = ARGPOS + 1 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JX. Set all other OK, bad JX * CALL PSSETPBLAS( ICTXT ) JX = -1 INFOT = ARGPOS + 2 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCX. Set all other OK, bad DESCX * DO 40 I = 1, DLEN_ * * Set I'th entry of DESCX to incorrect value, rest ok. * CALL PSSETPBLAS( ICTXT ) DESCX( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCX, CSRCX, LDX * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PSSETPBLAS( ICTXT ) * * Test RSRCX >= NPROW * IF( I.EQ.RSRC_ ) $ DESCX( I ) = NPROW * * Test CSRCX >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCX( I ) = NPCOL * * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCX( I ) = 1 ELSE DESCX( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 40 CONTINUE * * Check INCX. Set all other OK, bad INCX * CALL PSSETPBLAS( ICTXT ) INCX = -1 INFOT = ARGPOS + 4 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * ELSE * * Check IY. Set all other OK, bad IY * CALL PSSETPBLAS( ICTXT ) IY = -1 INFOT = ARGPOS + 1 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JY. Set all other OK, bad JY * CALL PSSETPBLAS( ICTXT ) JY = -1 INFOT = ARGPOS + 2 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCY. Set all other OK, bad DESCY * DO 50 I = 1, DLEN_ * * Set I'th entry of DESCY to incorrect value, rest ok. * CALL PSSETPBLAS( ICTXT ) DESCY( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCY, CSRCY, LDY * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PSSETPBLAS( ICTXT ) * * Test RSRCY >= NPROW * IF( I.EQ.RSRC_ ) $ DESCY( I ) = NPROW * * Test CSRCY >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCY( I ) = NPCOL * * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCY( I ) = 1 ELSE DESCY( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 50 CONTINUE * * Check INCY. Set all other OK, bad INCY * CALL PSSETPBLAS( ICTXT ) INCY = -1 INFOT = ARGPOS + 4 CALL PSCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * RETURN * * End of PSCHKMAT * END SUBROUTINE PSCALLSUB( SUBPTR, SCODE ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER SCODE * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSCALLSUB calls the subroutine SUBPTR with the calling sequence iden- * tified by SCODE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM REAL USCLR, SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) REAL A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * CALL SUBPTR( NDIM, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY ) * ELSE IF( SCODE.EQ.12 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.13 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, JY, $ DESCY, INCY ) * ELSE IF( SCODE.EQ.14 ) THEN * CALL SUBPTR( NDIM, SCLR, ISCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.15 ) THEN * CALL SUBPTR( NDIM, USCLR, X, IX, JX, DESCX, INCX ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.22 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, A, IA, JA, DESCA, X, IX, JX, $ DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.23 ) THEN * CALL SUBPTR( UPLO, TRANSA, DIAG, NDIM, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.24 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * ELSE IF( SCODE.EQ.25 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.26 ) THEN * CALL SUBPTR( UPLO, NDIM, USCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.27 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * CALL SUBPTR( TRANSA, TRANSB, MDIM, NDIM, KDIM, SCLR, A, IA, JA, $ DESCA, B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.32 ) THEN * CALL SUBPTR( SIDE, UPLO, MDIM, NDIM, SCLR, A, IA, JA, DESCA, B, $ IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.33 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.34 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, USCLR, A, IA, JA, DESCA, $ USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.35 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.36 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.37 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, C, IC, $ JC, DESCC ) * ELSE IF( SCODE.EQ.38 ) THEN * CALL SUBPTR( SIDE, UPLO, TRANSA, DIAG, MDIM, NDIM, SCLR, A, IA, $ JA, DESCA, B, IB, JB, DESCB ) * ELSE IF( SCODE.EQ.39 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, $ C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.40 ) THEN * CALL SUBPTR( UPLO, TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * END IF * RETURN * * End of PSCALLSUB * END SUBROUTINE PSERRSET( ERR, ERRMAX, XTRUE, X ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ERR, ERRMAX, X, XTRUE * .. * * Purpose * ======= * * PSERRSET computes the absolute difference ERR = |XTRUE - X| and com- * pares it with zero. ERRMAX accumulates the absolute error difference. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERR (local output) REAL * On exit, ERR specifies the absolute difference |XTRUE - X|. * * ERRMAX (local input/local output) REAL * On entry, ERRMAX specifies a previously computed error. On * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ). * * XTRUE (local input) REAL * On entry, XTRUE specifies the true value. * * X (local input) REAL * On entry, X specifies the value to be compared to XTRUE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. REAL PSDIFF EXTERNAL PSDIFF * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * ERR = ABS( PSDIFF( XTRUE, X ) ) * ERRMAX = MAX( ERRMAX, ERR ) * RETURN * * End of PSERRSET * END SUBROUTINE PSCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N REAL ERRMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) REAL PX( * ), X( * ) * .. * * Purpose * ======= * * PSCHKVIN checks that the submatrix sub( PX ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) REAL * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( X ) and sub( PX ). * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) REAL array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL, $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL, $ MYCOL, MYROW, NPCOL, NPROW REAL ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L, PSERRSET, SGAMX2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) * LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( N.EQ.1 ) THEN * IF( ( MYROW.EQ.IXROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.IXCOL .OR. COLREP ) ) $ CALL PSERRSET( ERR, ERRMAX, X( IX+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) * ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * IF( MYROW.EQ.IXROW .OR. ROWREP ) THEN * ICURCOL = IXCOL IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 10 J = JX, JN CALL PSERRSET( ERR, ERRMAX, X( IX+(J-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) JJX = JJX + 1 10 CONTINUE END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 20 KK = 0, JB-1 CALL PSERRSET( ERR, ERRMAX, X( IX+(J+KK-1)*LDX ), $ PX( IIX+(JJX+KK-1)*LDPX ) ) 20 CONTINUE * JJX = JJX + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 30 CONTINUE * END IF * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * IF( MYCOL.EQ.IXCOL .OR. COLREP ) THEN * ICURROW = IXROW IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 40 I = IX, IN CALL PSERRSET( ERR, ERRMAX, X( I+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) IIX = IIX + 1 40 CONTINUE END IF ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * DO 50 KK = 0, IB-1 CALL PSERRSET( ERR, ERRMAX, X( I+KK+(JX-1)*LDX ), $ PX( IIX+KK+(JJX-1)*LDPX ) ) 50 CONTINUE * IIX = IIX + IB * END IF * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PSCHKVIN * END SUBROUTINE PSCHKVOUT( N, X, PX, IX, JX, DESCX, INCX, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) REAL PX( * ), X( * ) * .. * * Purpose * ======= * * PSCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) REAL array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX, $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL, $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL, $ NPROW, NQALL REAL EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSERRSET, SGAMX2D * .. * .. External Functions .. INTEGER PB_NUMROC REAL PSLAMCH EXTERNAL PSLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCX( M_ ).LE.0 ).OR.( DESCX( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCX( M_ ), 1, DESCX( IMB_ ), DESCX( MB_ ), $ MYROW, DESCX( RSRC_ ), NPROW ) NQALL = PB_NUMROC( DESCX( N_ ), 1, DESCX( INB_ ), DESCX( NB_ ), $ MYCOL, DESCX( CSRC_ ), NPCOL ) * MBX = DESCX( MB_ ) NBX = DESCX( NB_ ) LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ICURROW = DESCX( RSRC_ ) ICURCOL = DESCX( CSRC_ ) ROWREP = ( ICURROW.EQ.-1 ) COLREP = ( ICURCOL.EQ.-1 ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN IMBX = DESCX( IMB_ ) ELSE IMBX = MBX END IF IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN INBX = DESCX( INB_ ) ELSE INBX = NBX END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - ICURROW + NPROW, NPROW ) END IF IF( COLREP ) THEN MYCOLDIST = 0 ELSE MYCOLDIST = MOD( MYCOL - ICURCOL + NPCOL, NPCOL ) END IF II = 1 JJ = 1 * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * I = 1 IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) IB = MIN( DESCX( M_ ), DESCX( IMB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. J+KK.GT.JX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 10 CONTINUE 20 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 50 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 30 CONTINUE 40 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 50 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * DO 110 I = DESCX( IMB_ ) + 1, DESCX( M_ ), MBX IB = MIN( DESCX( M_ ) - I + 1, MBX ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF * JJ = 1 JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 60 CONTINUE 70 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 100 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 80 CONTINUE 90 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 100 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * 110 CONTINUE * ELSE * * sub( X ) is a column vector * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) JB = MIN( DESCX( N_ ), DESCX( INB_ ) ) * DO 130 KK = 0, JB-1 DO 120 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. I+LL.GT.IX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 120 CONTINUE 130 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 160 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 150 KK = 0, JB-1 DO 140 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 140 CONTINUE 150 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 160 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 220 J = DESCX( INB_ ) + 1, DESCX( N_ ), NBX JB = MIN( DESCX( N_ ) - J + 1, NBX ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) DO 180 KK = 0, JB-1 DO 170 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 170 CONTINUE 180 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 210 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 200 KK = 0, JB-1 DO 190 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 190 CONTINUE 200 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 210 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * 220 CONTINUE * END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PSCHKVOUT * END SUBROUTINE PSCHKMIN( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N REAL ERRMAX * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL PA( * ), A( * ) * .. * * Purpose * ======= * * PSCHKMIN checks that the submatrix sub( PA ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) REAL * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( A ) and sub( PA ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * operand sub( A ). N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW REAL ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L, PSERRSET, SGAMX2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if posssible * IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) * II = IIA JJ = JJA LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) ICURROW = IAROW ICURCOL = IACOL ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA + JB - 1 * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 40 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 10 K = 0, IB-1 CALL PSERRSET( ERR, ERRMAX, A( IA+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 10 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 20 K = 0, IB-1 CALL PSERRSET( ERR, ERRMAX, A( I+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 20 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = IIA ICURROW = IAROW 40 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining column blocks * DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 80 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 50 K = 0, IB-1 CALL PSERRSET( ERR, ERRMAX, A( IA+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 50 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 70 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 60 K = 0, IB-1 CALL PSERRSET( ERR, ERRMAX, $ A( I+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 60 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 70 CONTINUE * II = IIA ICURROW = IAROW 80 CONTINUE * JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 90 CONTINUE * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PSCHKMIN * END SUBROUTINE PSCHKMOUT( M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), PA( * ) * .. * * Purpose * ======= * * PSCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( PA ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( PA ). N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK, $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST, $ NPCOL, NPROW REAL EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSERRSET, SGAMX2D * .. * .. External Functions .. INTEGER PB_NUMROC REAL PSLAMCH EXTERNAL PSLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCA( M_ ).LE.0 ).OR.( DESCA( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCA( M_ ), 1, DESCA( IMB_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) * LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) * II = 1 JJ = 1 ROWREP = ( DESCA( RSRC_ ).EQ.-1 ) COLREP = ( DESCA( CSRC_ ).EQ.-1 ) ICURCOL = DESCA( CSRC_ ) IF( MYROW.EQ.DESCA( RSRC_ ) .OR. ROWREP ) THEN IMBA = DESCA( IMB_ ) ELSE IMBA = DESCA( MB_ ) END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - DESCA( RSRC_ ) + NPROW, NPROW ) END IF * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) JB = MIN( DESCA( N_ ), DESCA( INB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 10 CONTINUE 20 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 50 II = IMBA + 1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 30 CONTINUE 40 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 50 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 110 J = DESCA( INB_ ) + 1, DESCA( N_ ), DESCA( NB_ ) JB = MIN( DESCA( N_ ) - J + 1, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 60 CONTINUE 70 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 100 II = IMBA+1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PSERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 80 CONTINUE 90 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 100 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * INSERT MODE 110 CONTINUE * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PSCHKMOUT * END SUBROUTINE PSMPRNT( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, $ CMATNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM REAL A( LDA, * ) * .. * * Purpose * ======= * * PSMPRNT prints to the standard output an array A of size m by n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (LDA,N). The leading m * by n part of this array is printed. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array A to be printed. LDA must be at least MAX( 1, M ). * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM specifies the identifier of the matrix to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 20 J = 1, N * DO 10 I = 1, M * WRITE( NOUT, FMT = 9999 ) CMATNM, I, J, A( I, J ) * 10 CONTINUE * 20 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', E16.8 ) * RETURN * * End of PSMPRNT * END SUBROUTINE PSVPRNT( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, $ CVECNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CVECNM REAL X( * ) * .. * * Purpose * ======= * * PSVPRNT prints to the standard output an vector x of length n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * N (global input) INTEGER * On entry, N specifies the length of the vector X. N must be * at least zero. * * X (global input) REAL array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CVECNM (global input) CHARACTER*(*) * On entry, CVECNM specifies the identifier of the vector to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 10 I = 1, 1 + ( N-1 )*INCX, INCX * WRITE( NOUT, FMT = 9999 ) CVECNM, I, X( I ) * 10 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ')=', E16.8 ) * RETURN * * End of PSVPRNT * END SUBROUTINE PSMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, $ DESCY, INCY, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL A( * ), G( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PSMVCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies which matrix-vector product is to * be computed as follows: * If TRANS = 'N', * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ), * otherwise * sub( Y ) = BETA * sub( Y ) + sub( A )' * sub( X ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) REAL array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP, TRAN INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX, $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA, $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL, $ NPROW REAL EPS, ERRI, GTMP, TBETA, YTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN TBETA = ONE ELSE TBETA = BETA END IF * TRAN = LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) IF( TRAN ) THEN ML = N NL = M ELSE ML = M NL = N END IF * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in Y using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * IOFFY = IY + ( JY - 1 ) * LDY DO 30 I = 1, ML YTMP = ZERO GTMP = ZERO IOFFX = IX + ( JX - 1 ) * LDX IF( TRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 10 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS( A( IOFFA ) * X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 10 CONTINUE ELSE IOFFA = IA + I - 1 + ( JA - 1 ) * LDA DO 20 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS( A( IOFFA ) * X( IOFFX ) ) IOFFA = IOFFA + LDA IOFFX = IOFFX + INCX 20 CONTINUE END IF G( I ) = ABS( ALPHA ) * GTMP + ABS( TBETA * Y( IOFFY ) ) Y( IOFFY ) = ALPHA * YTMP + TBETA * Y( IOFFY ) IOFFY = IOFFY + INCY 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPY = DESCY( LLD_ ) IOFFY = IY + ( JY - 1 ) * LDY CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, IIY, $ JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, ML ) JN = JY + JB - 1 * DO 50 J = JY, JN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( J-JY+1 ).NE.ZERO ) $ ERRI = ERRI / G( J-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 50 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 70 J = JN+1, JY+ML-1, DESCY( NB_ ) JB = MIN( JY+ML-J, DESCY( NB_ ) ) * DO 60 KK = 0, JB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( J+KK-JY+1 ).NE.ZERO ) $ ERRI = ERRI / G( J+KK-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 60 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 70 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, ML ) IN = IY + IB - 1 * DO 80 I = IY, IN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( I-IY+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 80 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 100 I = IN+1, IY+ML-1, DESCY( MB_ ) IB = MIN( IY+ML-I, DESCY( MB_ ) ) * DO 90 KK = 0, IB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( I+KK-IY+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 90 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 100 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PSMVCH * END SUBROUTINE PSVMCH( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, $ DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ALPHA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL A( * ), G( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PSVMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * X (local input) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) REAL array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFX, IOFFY, J, JJA, KK, LDA, LDPA, $ LDX, LDY, MYCOL, MYROW, NPCOL, NPROW REAL ATMP, EPS, ERRI, GTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFY = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND * IOFFX = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA ATMP = X( IOFFX ) * Y( IOFFY ) GTMP = ABS( X( IOFFX ) * Y( IOFFY ) ) G( I ) = ABS( ALPHA ) * GTMP + ABS( A( IOFFA ) ) A( IOFFA ) = ALPHA * ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PSVMCH * END SUBROUTINE PSVMCH2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, $ JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N REAL ALPHA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL A( * ), G( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PSVMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * X (local input) REAL array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) REAL array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) REAL array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J, $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW, $ NPCOL, NPROW REAL EPS, ERRI, GTMP, ATMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFXJ = IX + ( JX - 1 ) * LDX + ( J - 1 ) * INCX IOFFYJ = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IOFFXI = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFYI = IY + ( JY - 1 ) * LDY + ( I - 1 ) * INCY ATMP = X( IOFFXI ) * Y( IOFFYJ ) ATMP = ATMP + Y( IOFFYI ) * X( IOFFXJ ) GTMP = ABS( X( IOFFXI ) * Y( IOFFYJ ) ) GTMP = GTMP + ABS( Y( IOFFYI ) * X( IOFFXJ ) ) G( I ) = ABS( ALPHA ) * GTMP + ABS( A( IOFFA ) ) A( IOFFA ) = ALPHA*ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PSVMCH2 * END SUBROUTINE PSMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANSA, TRANSB INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N REAL ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL A( * ), B( * ), C( * ), CT( * ), G( * ), $ PC( * ) * .. * * Purpose * ======= * * PSMMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) REAL array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * C (local input/local output) REAL array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) REAL array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) REAL array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP, TRANA, TRANB INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA, $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC, $ MYCOL, MYROW, NPCOL, NPROW REAL EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * TRANA = LSAME( TRANSA, 'T' ).OR.LSAME( TRANSA, 'C' ) TRANB = LSAME( TRANSB, 'T' ).OR.LSAME( TRANSB, 'C' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 240 J = 1, N * IOFFC = IC + ( JC + J - 2 ) * LDC DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( .NOT.TRANA .AND. .NOT.TRANB ) THEN DO 30 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 20 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA .AND. .NOT.TRANB ) THEN DO 50 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA .AND. TRANB ) THEN DO 70 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 60 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA .AND. TRANB ) THEN DO 90 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 80 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 80 CONTINUE 90 CONTINUE END IF * DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, M ) IN = IC + IBB - 1 * DO 210 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 210 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 230 I = IN+1, IC+M-1, DESCC( MB_ ) IBB = MIN( IC+M-I, DESCC( MB_ ) ) * DO 220 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 220 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 230 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 250 * 240 CONTINUE * 250 CONTINUE * RETURN * * End of PSMMCH * END SUBROUTINE PSMMCH1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, $ ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N REAL ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), CT( * ), G( * ), PC( * ) * .. * * Purpose * ======= * * PSMMCH1 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least * zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * C (local input/local output) REAL array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) REAL array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) REAL array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA, $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW REAL EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS( A( IOFFAK ) ) * $ ABS( A( IOFFAN ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS( A( IOFFAK ) ) * $ ABS( A( IOFFAN ) ) 40 CONTINUE 50 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PSMMCH1 * END SUBROUTINE PSMMCH2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N REAL ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL A( * ), B( * ), C( * ), CT( * ), G( * ), $ PC( * ) * .. * * Purpose * ======= * * PSMMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrices A and B have * to be transposed or not before computing the matrix-matrix * product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at * least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) REAL array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * C (local input/local output) REAL array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) REAL array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) REAL array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) REAL array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J, $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW, $ NPCOL, NPROW REAL EPS, ERRI * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS( A( IOFFAN ) ) * ABS( B( IOFFBK ) ) + $ ABS( B( IOFFBN ) ) * ABS( A( IOFFAK ) ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS( A( IOFFAN ) ) * ABS( B( IOFFBK ) ) + $ ABS( B( IOFFBN ) ) * ABS( A( IOFFAK ) ) ) 40 CONTINUE 50 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = CT( I ) + BETA * C( IOFFC ) G( I ) = G( I ) + ABS( BETA )*ABS( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PSMMCH2 * END SUBROUTINE PSMMCH3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, M, N REAL ALPHA, BETA, ERR * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), PC( * ) * .. * * Purpose * ======= * * PSMMCH3 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix addi- * tion. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input) REAL array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * C (local input/local output) REAL array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) REAL array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * ERR (global output) REAL * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, NOTRAN, ROWREP, UPPER INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J, $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL, $ NPROW REAL ERR0, ERRI, PREC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, PB_INFOG2L, $ PSERRAXPBY, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PREC = PSLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * Compute expected result in C using data in A and C. This part of * the computation is performed by every process in the grid. * INFO = 0 ERR = ZERO * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) LDPC = MAX( 1, DESCC( LLD_ ) ) ROWREP = ( DESCC( RSRC_ ).EQ.-1 ) COLREP = ( DESCC( CSRC_ ).EQ.-1 ) * IF( NOTRAN ) THEN * DO 20 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( JA - 1 + J - JC ) * LDA * DO 10 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFA = IOFFA + 1 IOFFC = IOFFC + 1 * 10 CONTINUE * 20 CONTINUE * ELSE * DO 40 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 30 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PSERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 30 CONTINUE * 40 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PSMMCH3 * END SUBROUTINE PSERRAXPBY( ERRBND, ALPHA, X, BETA, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL ALPHA, BETA, ERRBND, PREC, X, Y * .. * * Purpose * ======= * * PSERRAXPBY serially computes y := beta*y + alpha * x and returns a * scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) REAL * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * X (global input) REAL * On entry, X specifies the scalar x to be scaled. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. * * Y (global input/global output) REAL * On entry, Y specifies the scalar y to be added. On exit, Y * contains the resulting scalar y. * * PREC (global input) REAL * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO, ZERO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0, $ ZERO = 0.0E+0 ) * .. * .. Local Scalars .. REAL ADDBND, FACT, SUMPOS, SUMNEG, TMP * .. * .. Intrinsic Functions .. * .. * .. Executable Statements .. * SUMPOS = ZERO SUMNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = ALPHA * X IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * TMP = BETA * Y IF( TMP.GE.ZERO ) THEN SUMPOS = SUMPOS + TMP * FACT ELSE SUMNEG = SUMNEG - TMP * FACT END IF * Y = ( BETA * Y ) + ( ALPHA * X ) * ERRBND = ADDBND * MAX( SUMPOS, SUMNEG ) * RETURN * * End of PSERRAXPBY * END REAL FUNCTION PSLAMCH( ICTXT, CMACH ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 CMACH INTEGER ICTXT * .. * * Purpose * ======= * * PSLAMCH determines single precision machine parameters. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * CMACH (global input) CHARACTER*1 * On entry, CMACH specifies the value to be returned by PSLAMCH * as follows: * = 'E' or 'e', PSLAMCH := eps, * = 'S' or 's , PSLAMCH := sfmin, * = 'B' or 'b', PSLAMCH := base, * = 'P' or 'p', PSLAMCH := eps*base, * = 'N' or 'n', PSLAMCH := t, * = 'R' or 'r', PSLAMCH := rnd, * = 'M' or 'm', PSLAMCH := emin, * = 'U' or 'u', PSLAMCH := rmin, * = 'L' or 'l', PSLAMCH := emax, * = 'O' or 'o', PSLAMCH := rmax, * * where * * eps = relative machine precision, * sfmin = safe minimum, such that 1/sfmin does not overflow, * base = base of the machine, * prec = eps*base, * t = number of (base) digits in the mantissa, * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise, * emin = minimum exponent before (gradual) underflow, * rmin = underflow threshold - base**(emin-1), * emax = largest exponent before overflow, * rmax = overflow threshold - (base**emax)*(1-eps). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER IDUMM REAL TEMP * .. * .. External Subroutines .. EXTERNAL PB_TOPGET, SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL SLAMCH EXTERNAL LSAME, SLAMCH * .. * .. Executable Statements .. * TEMP = SLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL SGAMX2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL SGAMN2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PSLAMCH = TEMP * RETURN * * End of PSLAMCH * END SUBROUTINE PSLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, JA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno- * ted by sub( A ) to beta on the diagonal and alpha on the offdiago- * nals. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the part of the submatrix sub( A ) * to be set: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant to which the offdiagonal elements are to be set. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta, i.e., the constant * to which the diagonal elements are to be set. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be set. On exit, the * leading m by n submatrix sub( A ) is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * and IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER, $ UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA, $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC, $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP, $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD, $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1, $ UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_SLASET * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * ISROWREP = ( DESCA2( RSRC_ ).LT.0 ) ISCOLREP = ( DESCA2( CSRC_ ).LT.0 ) LDA = DESCA2( LLD_ ) * UPPER = .NOT.( LSAME( UPLO, 'L' ) ) LOWER = .NOT.( LSAME( UPLO, 'U' ) ) * IF( ( ( LOWER.AND.UPPER ).AND.( ALPHA.EQ.BETA ) ).OR. $ ( ISROWREP .AND. ISCOLREP ) ) THEN IF( ( MP.GT.0 ).AND.( NQ.GT.0 ) ) $ CALL PB_SLASET( UPLO, MP, NQ, 0, ALPHA, BETA, $ A( IIA + ( JJA - 1 ) * LDA ), LDA ) RETURN END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, MP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( ISROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( ISCOLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * M1 = MP N1 = NQ * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_SLASET( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, BETA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_SLASET( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ ALPHA, A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_SLASET( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ ALPHA, A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASET( UPLO, MBLOC, INBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASET( 'ALL', TMP1, INBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASET( 'ALL', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_SLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_SLASET( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_SLASET( 'All', IMBLOC, TMP1, 0, ALPHA, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_SLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASET( UPLO, MBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASET( 'All', TMP1, NBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * RETURN * * End of PSLASET * END SUBROUTINE PSLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N REAL ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_INFOG2L, PB_SLASCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_SLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_SLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_SLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_SLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_SLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_SLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_SLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PSLASCAL * END SUBROUTINE PSLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( LDA, * ) * .. * * Purpose * ======= * * PSLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP REAL ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PB_SLAGEN, PSLADOM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, REAL * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_SLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 1, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_SLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) ALPHA = REAL( MAXMN ) * IF( IOFFDA.GE.0 ) THEN CALL PSLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PSLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PSLAGEN * END SUBROUTINE PSLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N REAL ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP REAL ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ABS( ATMP ) + ALPHA 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PSLADOM * END SUBROUTINE PB_PSLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PB_PSLAPRNT prints to the standard output a submatrix sub( A ) deno- * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by * the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the row index of the printing pro- * cess. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the column index of the printing * process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM is the name of the matrix to be printed. * * NOUT (global input) INTEGER * On entry, NOUT specifies the output unit number. When NOUT is * equal to 6, the submatrix is printed on the screen. * * WORK (local workspace) REAL array * On entry, WORK is a work array of dimension at least equal to * MAX( IMB_A, MB_A ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PSLAPRN2 * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * CALL BLACS_GRIDINFO( DESCA2( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( DESCA2( RSRC_ ).GE.0 ) THEN IF( DESCA2( CSRC_ ).GE.0 ) THEN CALL PB_PSLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, ICPRNT, $ CMATNM, NOUT, DESCA2( RSRC_ ), $ DESCA2( CSRC_ ), WORK ) ELSE DO 10 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Colum-replicated array -- ' , $ 'copy in process column: ', PCOL CALL PB_PSLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, DESCA2( RSRC_ ), $ PCOL, WORK ) 10 CONTINUE END IF ELSE IF( DESCA2( CSRC_ ).GE.0 ) THEN DO 20 PROW = 0, NPROW - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Row-replicated array -- ' , $ 'copy in process row: ', PROW CALL PB_PSLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, $ DESCA2( CSRC_ ), WORK ) 20 CONTINUE ELSE DO 40 PROW = 0, NPROW - 1 DO 30 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Replicated array -- ' , $ 'copy in process (', PROW, ',', PCOL, ')' CALL PB_PSLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, PCOL, $ WORK ) 30 CONTINUE 40 CONTINUE END IF END IF * RETURN * * End of PB_PSLAPRNT * END SUBROUTINE PB_PSLAPRN2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, PROW, PCOL, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL AISCOLREP, AISROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, PB_INFOG2L, $ SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) II = IIA JJ = JJA IF( DESCA( RSRC_ ).LT.0 ) THEN AISROWREP = .TRUE. IAROW = PROW ICURROW = PROW ELSE AISROWREP = .FALSE. ICURROW = IAROW END IF IF( DESCA( CSRC_ ).LT.0 ) THEN AISCOLREP = .TRUE. IACOL = PCOL ICURCOL = PCOL ELSE AISCOLREP = .FALSE. ICURCOL = IACOL END IF LDA = DESCA( LLD_ ) LDW = MAX( DESCA( IMB_ ), DESCA( MB_ ) ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA+JB-1 DO 60 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IB = DESCA( IMB_ )-IA+1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', E16.8 ) * RETURN * * End of PB_PSLAPRN2 * END SUBROUTINE PB_SFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N REAL CHKVAL * .. * .. Array Arguments .. REAL A( * ) * .. * * Purpose * ======= * * PB_SFILLPAD surrounds a two dimensional local array with a guard-zone * initialized to the value CHKVAL. The user may later call the routine * PB_SCHEKPAD to discover if the guardzone has been violated. There are * three guardzones. The first is a buffer of size IPRE that is before * the start of the array. The second is the buffer of size IPOST which * is after the end of the array to be padded. Finally, there is a guard * zone inside every column of the array to be padded, in the elements * of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LDA,N). On exit, this * array is the padded array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) REAL * On entry, CHKVAL specifies the value to pad the array with. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no pre-guardzone in PB_SFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no post-guardzone in PB_SFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + ( LDA - M ) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PB_SFILLPAD * END SUBROUTINE PB_SCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N REAL CHKVAL * .. * .. Array Arguments .. CHARACTER*(*) MESS REAL A( * ) * .. * * Purpose * ======= * * PB_SCHEKPAD checks that the padding around a local array has not been * overwritten since the call to PB_SFILLPAD. Three types of errors are * reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells that the 5th element of the IPRE long buffer has been overwrit- * ten with the value 18, where it should still have the value CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells that the 19th element after the end of A was overwritten with * the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells that the element at the 12th row and 3rd column of A was over- * written with the value of 22, where it should still have the value of * CHKVAL. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * MESS (local input) CHARACTER*(*) * On entry, MESS is a ttring containing a user-defined message. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (LDA,N). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) REAL * On entry, CHKVAL specifies the value to pad the array with. * * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ A( I ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PB_SCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, A( I ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PB_SCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, A( I ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G11.4 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G11.4 ) * RETURN * * End of PB_SCHEKPAD * END SUBROUTINE PB_SLASET( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PB_SLASET initializes a two-dimensional array A to beta on the diago- * nal specified by IOFFD and alpha on the offdiagonals. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * = 'D' or 'd' Only the diagonal of A is set, * Otherwise: All of the array A is set. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the value to which the offdiagonal * array elements are set to. * * BETA (input) REAL * On entry, BETA specifies the value to which the diagonal ar- * ray elements are set to. * * A (input/output) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be set, and the strictly lower trape- * zoidal part of A is not referenced; When IUPLO = 'L' or 'l', * the leading m by n part of the array A must contain the * lower trapezoidal part of the matrix as specified by IOFFD to * be set, and the strictly upper trapezoidal part of A is * not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA and the strictly lower triangular * part of the array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA and the strictly upper triangular * part of the array to ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 50 I = 1, JTMP - 1 A( I, J ) = ALPHA 50 CONTINUE A( JTMP, J ) = BETA 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the array to BETA on the diagonal. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 90 CONTINUE * ELSE * * Set the array to BETA on the diagonal and ALPHA on the * offdiagonal. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA 100 CONTINUE 110 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 120 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 120 CONTINUE END IF * END IF * RETURN * * End of PB_SLASET * END SUBROUTINE PB_SLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PB_SLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha. * * A (input/output) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_SLASCAL * END SUBROUTINE PB_SLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) REAL A( LDA, * ) * .. * * Purpose * ======= * * PB_SLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) REAL array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP REAL DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME REAL PB_SRAND EXTERNAL LSAME, PB_SRAND * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) .OR. LSAME( AFORM, 'C' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( ( LSAME( AFORM, 'S' ) ).OR.( LSAME( AFORM, 'H' ) ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = PB_SRAND( 0 ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = PB_SRAND( 0 ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = PB_SRAND( 0 ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = PB_SRAND( 0 ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = PB_SRAND( 0 ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = PB_SRAND( 0 ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * END IF * RETURN * * End of PB_SLAGEN * END REAL FUNCTION PB_SRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. External Functions .. REAL PB_SRAN EXTERNAL PB_SRAN * .. * .. Executable Statements .. * PB_SRAND = ONE - TWO * PB_SRAN( IDUMM ) * RETURN * * End of PB_SRAND * END REAL FUNCTION PB_SRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_SRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648E+9, $ POW16 = 6.5536E+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_SRAN = ( REAL( IRAND( 1 ) ) + POW16 * REAL( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_SRAN * END scalapack-1.8.0/PBLAS/TESTING/pdblas2tst.f0000640000175000017500000034647110363532303017542 0ustar muammarmuammar PROGRAM PDBLA2TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PDBLA2TST is the main testing program for the PBLAS Level 2 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 60 lines: * 'Level 2 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PDBLAS2TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * 1.0D0 value of ALPHA * 1.0D0 value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PDGEMV T put F for no test in the same column * PDSYMV T put F for no test in the same column * PDTRMV T put F for no test in the same column * PDTRSV T put F for no test in the same column * PDGER T put F for no test in the same column * PDSYR T put F for no test in the same column * PDSYR2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * DBLESZ indicates the length in bytes on the given platform * for a double precision real. By default, DBLESZ is set to * eight. * * MEM DOUBLE PRECISION array * MEM is an array of dimension TOTMEM / DBLESZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, DBLESZ, TOTMEM, $ MEMSIZ, NSUBS DOUBLE PRECISION ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, ZERO = 0.0D+0, $ ONE = 1.0D+0, PADVAL = -9923.0D+0, $ NSUBS = 7, ROGUE = -1.0D+10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IGAP, IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, $ INBA, INBX, INBY, INCX, INCY, IPA, IPG, IPMATA, $ IPMATX, IPMATY, IPOSTA, IPOSTX, IPOSTY, IPREA, $ IPREX, IPREY, IPX, IPY, IVERB, IX, IXSEED, IY, $ IYSEED, J, JA, JX, JY, K, LDA, LDX, LDY, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY, TSKIP, TSTCNT REAL THRESH DOUBLE PRECISION ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCX( DLEN_ ), $ DESCXR( DLEN_ ), DESCY( DLEN_ ), $ DESCYR( DLEN_ ), IAVAL( MAXTESTS ), IERR( 6 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DCHEKPAD, PB_DESCSET2, PB_DFILLPAD, $ PB_DLASCAL, PB_DLASET, PB_PDLAPRNT, $ PDBLA2TSTINFO, PDBLAS2TSTCHK, PDBLAS2TSTCHKE, $ PDCHKARG2, PDCHKVOUT, PDGEMV, PDGER, PDLAGEN, $ PDLASCAL, PDLASET, PDMPRNT, PDSYMV, PDSYR, $ PDSYR2, PDTRMV, PDTRSV, PDVPRNT, PMDESCCHK, $ PMDIMCHK, PVDESCCHK, PVDIMCHK * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MOD * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PDGEMV ', 'PDSYMV ', 'PDTRMV ', $ 'PDTRSV ', 'PDGER ', 'PDSYR ', $ 'PDSYR2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDBLA2TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, MAXTESTS, NGRIDS, $ PVAL, MAXGRIDS, QVAL, MAXGRIDS, NBLOG, LTEST, $ SOF, TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PDBLAS2TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) INBA = INBAVAL( J ) MBA = MBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) INBX = INBXVAL( J ) MBX = MBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) INBY = INBYVAL( J ) MBY = MBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPX = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREX IPY = IPX + DESCX( LLD_ )*NQX + IPOSTX + IPREY IPMATA = IPY + DESCY( LLD_ )*NQY + IPOSTY IPMATX = IPMATA + MA*NA IPMATY = IPMATX + MX*NX IPG = IPMATY + MAX( MX*NX, MY*NY ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPG + MAX( M, N ) - 1 + $ MAX( MAX( IMBA, MBA ), $ MAX( MAX( IMBX, MBX ), $ MAX( IMBY, MBY ) ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*DBLESZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) SNAMES( K ) END IF * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ) THEN AFORM = 'S' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PDLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PDLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN * CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * ELSE * * If Y is not needed, generate a copy of X instead * CALL PB_DESCSET2( DESCYR, MX, NX, IMBX, INBX, MBX, $ NBX, -1, -1, ICTXT, MAX( 1, MX ) ) CALL PDLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCYR, IXSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A * IF( ( K.EQ.2 .OR. K.EQ.6 .OR. K.EQ.7 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PDLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) IF( K.NE.2 ) THEN CALL PB_DLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PDLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) IF( K.NE.2 ) THEN CALL PB_DLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( K.EQ.3 .OR. K.EQ.4 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PDLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_DLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PDLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_DLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) CALL PDLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_DLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PDLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_DLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PDLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_DLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / DBLE( MAX( NROWA, NCOLA ) ) CALL PDLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_DLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * END IF * * Pad the guard zones of A, X and Y * CALL PB_DFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * CALL PB_DFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_DFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PDCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PDLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_INITIAL_A', $ NOUT, MEM( IPG ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, 0, $ 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPG ) ) END IF * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PDLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PDLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF * IF( YCHECK( K ) ) THEN IF( IVERB.EQ.2 ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PDLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PDLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PDLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF END IF * * Call the Level 2 PBLAS routine * INFO = 0 IF( K.EQ.1 ) THEN * * Test PDGEMV * CALL PDGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PDSYMV * CALL PDSYMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.3 ) THEN * * Test PDTRMV * CALL PDTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PDTRSV * CALL PDTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.5 ) THEN * * Test PDGER * CALL PDGER( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.6 ) THEN * * Test PDSYR * CALL PDSYR( UPLO, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.7 ) THEN * * Test PDSYR2 * CALL PDSYR2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_DCHEKPAD( ICTXT, SNAMES( K ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), IPREA, $ IPOSTA, PADVAL ) * CALL PB_DCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), IPREX, $ IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_DCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check the computations * CALL PDBLAS2TSTCHK( ICTXT, NOUT, K, UPLO, TRANS, DIAG, M, $ N, ALPHA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, MEM( IPMATX ), $ MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, THRESH, ROGUE, $ MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PDCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Check input-only array arguments * CALL PDCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, IERR( 4 ) ) CALL PDCHKVOUT( NLX, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 5 ) ) * IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_A', $ SNAMES( K ) END IF * IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_X', $ SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PDCHKVOUT( NLY, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PDLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PDMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PDLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', $ NOUT, MEM( IPMATA ) ) END IF IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PDLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( NLX.GT.0 ) $ CALL PDVPRNT( ICTXT, NOUT, NLX, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PDLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PDLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PDMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PDLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( NLY.GT.0 ) $ CALL PDVPRNT( ICTXT, NOUT, NLY, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PDLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PDLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9980 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, ' Tested Subroutine: ', A ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9983 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9982 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9981 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9980 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9979 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9978 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9977 FORMAT( 2X, 'Testing Summary') 9976 FORMAT( 2X, 'End of Tests.' ) 9975 FORMAT( 2X, 'Tests started.' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PDBLA2TST * END SUBROUTINE PDBLA2TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, SOF, TEE, $ IAM, IGAP, IVERB, NPROCS, THRESH, ALPHA, $ BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PDBLA2TSTINFO get the needed startup information for testing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least seven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) DOUBLE PRECISION * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) DOUBLE PRECISION * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS+4 ) with NSUBS equal to 7. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, DGEBR2D, DGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D *ype real dble cplx zplx * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PDBLAS2TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 2 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the real double precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : ', G16.6 ) 9981 FORMAT( 2X, 'Beta : ', G16.6 ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PDBLA2TSTINFO * END SUBROUTINE PDBLAS2TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PDBLAS2TSTCHKE tests the error exits of the Level 2 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 7 (NSUBS). * If LTEST( 1 ) is .TRUE., PDGEMV will be tested; * If LTEST( 2 ) is .TRUE., PDSYMV will be tested; * If LTEST( 3 ) is .TRUE., PDTRMV will be tested; * If LTEST( 4 ) is .TRUE., PDTRSV will be tested; * If LTEST( 5 ) is .TRUE., PDGER will be tested; * If LTEST( 6 ) is .TRUE., PDSYR will be tested; * If LTEST( 7 ) is .TRUE., PDSYR2 will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 7 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PDDIMEE, PDGEMV, PDGER, $ PDMATEE, PDOPTEE, PDSYMV, PDSYR, PDSYR2, $ PDTRMV, PDTRSV, PDVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/21, 22, 23, 23, 24, 25, 27/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PDGEMV * I = 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDGEMV, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDGEMV, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDGEMV, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDGEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYMV, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYMV, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYMV, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDSYMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PDTRMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDTRMV, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDTRMV, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDTRMV, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDTRMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PDTRSV * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDTRSV, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDTRSV, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDTRSV, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDTRSV, SCODE( I ), SNAMES( I ) ) END IF * * Test PDGER * I = I + 1 IF( LTEST( I ) ) THEN CALL PDDIMEE( ICTXT, NOUT, PDGER, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDGER, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDGER, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYR * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYR, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYR, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDSYR, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYR, SCODE( I ), SNAMES( I ) ) END IF * * Test PDSYR2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PDOPTEE( ICTXT, NOUT, PDSYR2, SCODE( I ), SNAMES( I ) ) CALL PDDIMEE( ICTXT, NOUT, PDSYR2, SCODE( I ), SNAMES( I ) ) CALL PDVECEE( ICTXT, NOUT, PDSYR2, SCODE( I ), SNAMES( I ) ) CALL PDMATEE( ICTXT, NOUT, PDSYR2, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PDBLAS2TSTCHKE * END SUBROUTINE PDCHKARG2( ICTXT, NOUT, SNAME, UPLO, TRANS, DIAG, M, $ N, ALPHA, IA, JA, DESCA, IX, JX, DESCX, $ INCX, BETA, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCA( * ), DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PDCHKARG2 checks the input-only arguments of the Level 2 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 2 PBLAS * operation. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the TRANS option in the Level 2 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 2 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, TRANSREF, UPLOREF INTEGER I, IAREF, INCXREF, INCYREF, IXREF, IYREF, $ JAREF, JXREF, JYREF, MREF, MYCOL, MYROW, NPCOL, $ NPROW, NREF DOUBLE PRECISION ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCXREF( DLEN_ ), $ DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG TRANSREF = TRANS UPLOREF = UPLO MREF = M NREF = N ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IXREF = IX JXREF = JX DO 20 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 20 CONTINUE INCXREF = INCX BETAREF = BETA IYREF = IY JYREF = JY DO 30 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 30 CONTINUE INCYREF = INCY * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( TRANS, TRANSREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANS' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PDCHKARG2 * END SUBROUTINE PDBLAS2TSTCHK( ICTXT, NOUT, NROUT, UPLO, TRANS, DIAG, $ M, N, ALPHA, A, PA, IA, JA, DESCA, X, $ PX, IX, JX, DESCX, INCX, BETA, Y, PY, $ IY, JY, DESCY, INCY, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT, NROUT REAL THRESH DOUBLE PRECISION ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION A( * ), PA( * ), PX( * ), PY( * ), WORK( * ), $ X( * ), Y( * ) * .. * * Purpose * ======= * * PDBLAS2TSTCHK performs the computational tests of the Level 2 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PDGEMV will be tested; * else if NROUT = 2, PDSYMV will be tested; * else if NROUT = 3, PDTRMV will be tested; * else if NROUT = 4, PDTRSV will be tested; * else if NROUT = 5, PDGER will be tested; * else if NROUT = 6, PDSYR will be tested; * else if NROUT = 7, PDSYR2 will be tested; * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies if the matrix operand A is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of A. * * N (global input) INTEGER * On entry, N specifies the number of columns of A. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) DOUBLE PRECISION array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) DOUBLE PRECISION array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) DOUBLE PRECISION array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) DOUBLE PRECISION * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular or symmetric matrices. * * WORK (workspace) DOUBLE PRECISION array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, N ). This array is used to store the compu- * ted gauges (see PDMVCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DTRSV, PB_DLASET, PDCHKMIN, $ PDCHKVIN, PDMVCH, PDTRMV, PDVMCH, PDVMCH2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PDGEMV * * Check the resulting vector Y * CALL PDMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, $ IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, $ INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PDCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) IF( LSAME( TRANS, 'N' ) ) THEN CALL PDCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) ELSE CALL PDCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PDSYMV * * Check the resulting vector Y * CALL PDMVCH( ICTXT, 'No transpose', N, N, ALPHA, A, IA, JA, $ DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, $ JY, DESCY, INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PDCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PDTRMV * * Check the resulting vector X * CALL PDMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, Y, IX, $ JX, DESCX, INCX, ZERO, X, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PDTRSV * * Check the resulting vector X * CALL DTRSV( UPLO, TRANS, DIAG, N, A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ), X( IX+(JX-1)*DESCX( M_ ) ), INCX ) CALL PDTRMV( UPLO, TRANS, DIAG, N, PA, IA, JA, DESCA, PX, IX, $ JX, DESCX, INCX ) CALL PDMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, ZERO, Y, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_DLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_DLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PDCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PDGER * * Check the resulting matrix A * CALL PDVMCH( ICTXT, 'Ge', M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, JA, $ DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PDCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PDCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.6 ) THEN * * Test PDSYR * * Check the resulting matrix A * CALL PDVMCH( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, $ INCX, X, IX, JX, DESCX, INCX, A, PA, IA, JA, $ DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PDCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.7 ) THEN * * Test PDSYR2 * * Check the resulting matrix A * CALL PDVMCH2( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, INCX, $ Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, $ WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PDCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PDCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'X' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'Y' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9997 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9996 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PDBLAS2TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/PCBLAS1TST.dat0000640000175000017500000000310610363532303017404 0ustar muammarmuammar'Level 1 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PCBLAS1TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0E0, -3.0E0) value of ALPHA 4 number of tests problems 14 44 28 7 values of N 36 9 39 53 values of M_X 24 67 3 12 values of N_X 2 5 2 5 values of IMB_X 2 5 2 5 values of INB_X 2 5 2 5 values of MB_X 2 5 2 5 values of NB_X 0 0 0 0 values of RSRC_X 0 0 0 0 values of CSRC_X 5 3 1 1 values of IX 2 6 1 1 values of JX 1 9 1 53 values of INCX 2 6 35 14 values of M_Y 27 55 43 12 values of N_Y 2 5 2 5 values of IMB_Y 2 5 2 5 values of INB_Y 2 5 2 5 values of MB_Y 2 5 2 5 values of NB_Y 0 0 0 0 values of RSRC_Y 0 0 0 0 values of CSRC_Y 1 4 1 1 values of IY 7 6 1 1 values of JY 2 6 35 1 values of INCY PCSWAP T put F for no test in the same column PCSCAL T put F for no test in the same column PCSSCAL T put F for no test in the same column PCCOPY T put F for no test in the same column PCAXPY T put F for no test in the same column PCDOTU T put F for no test in the same column PCDOTC T put F for no test in the same column PSCNRM2 T put F for no test in the same column PSCASUM T put F for no test in the same column PCAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/PZBLAS1TST.dat0000640000175000017500000000310610363532303017433 0ustar muammarmuammar'Level 1 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS1TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0D0, -3.0D0) value of ALPHA 4 number of tests problems 14 44 28 7 values of N 36 9 39 53 values of M_X 24 67 3 12 values of N_X 2 5 2 5 values of IMB_X 2 5 2 5 values of INB_X 2 5 2 5 values of MB_X 2 5 2 5 values of NB_X 0 0 0 0 values of RSRC_X 0 0 0 0 values of CSRC_X 5 3 1 1 values of IX 2 6 1 1 values of JX 1 9 1 53 values of INCX 2 6 35 14 values of M_Y 27 55 43 12 values of N_Y 2 5 2 5 values of IMB_Y 2 5 2 5 values of INB_Y 2 5 2 5 values of MB_Y 2 5 2 5 values of NB_Y 0 0 0 0 values of RSRC_Y 0 0 0 0 values of CSRC_Y 1 4 1 1 values of IY 7 6 1 1 values of JY 2 6 35 1 values of INCY PZSWAP T put F for no test in the same column PZSCAL T put F for no test in the same column PZDSCAL T put F for no test in the same column PZCOPY T put F for no test in the same column PZAXPY T put F for no test in the same column PZDOTU T put F for no test in the same column PZDOTC T put F for no test in the same column PDZNRM2 T put F for no test in the same column PDZASUM T put F for no test in the same column PZAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/pcblas3tst.f0000640000175000017500000042076110363532303017535 0ustar muammarmuammar PROGRAM PCBLA3TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PCBLA3TST is the main testing program for the Level 3 PBLAS routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * * from the following 64 lines: * 'Level 3 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PCBLAS3TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0E0, 0.0E0) value of ALPHA * (1.0E0, 0.0E0) value of BETA * 2 number of tests problems * 'N' 'U' values of DIAG * 'L' 'R' values of SIDE * 'N' 'T' values of TRANSA * 'N' 'T' values of TRANSB * 'U' 'L' values of UPLO * 3 4 values of M * 3 4 values of N * 3 4 values of K * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_B * 6 10 values of N_B * 2 5 values of IMB_B * 2 5 values of INB_B * 2 5 values of MB_B * 2 5 values of NB_B * 0 1 values of RSRC_B * 0 0 values of CSRC_B * 1 1 values of IB * 1 1 values of JB * 6 10 values of M_C * 6 10 values of N_C * 2 5 values of IMB_C * 2 5 values of INB_C * 2 5 values of MB_C * 2 5 values of NB_C * 0 1 values of RSRC_C * 0 0 values of CSRC_C * 1 1 values of IC * 1 1 values of JC * PCGEMM T put F for no test in the same column * PCSYMM T put F for no test in the same column * PCHEMM T put F for no test in the same column * PCSYRK T put F for no test in the same column * PCHERK T put F for no test in the same column * PCSYR2K T put F for no test in the same column * PCHER2K T put F for no test in the same column * PCTRMM T put F for no test in the same column * PCTRSM T put F for no test in the same column * PCGEADD T put F for no test in the same column * PCTRADD T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * REALSZ INTEGER * CPLXSZ INTEGER * REALSZ and CPLXSZ indicate the length in bytes on the given * platform for a single precision real and a single precision * complex. By default, REALSZ is set to four and CPLXSZ is set * to eight. * * MEM COMPLEX array * MEM is an array of dimension TOTMEM / CPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, CPLXSZ, TOTMEM, $ MEMSIZ, NSUBS, REALSZ COMPLEX ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ CPLXSZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ, REALSZ = 4, $ ONE = ( 1.0E+0, 0.0E+0 ), $ PADVAL = ( -9923.0E+0, -9923.0E+0 ), $ ROGUE = ( -1.0E+10, 1.0E+10 ), $ ZERO = ( 0.0E+0, 0.0E+0 ), NSUBS = 11 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 ADIAGDO, AFORM, CFORM, DIAG, SIDE, TRANSA, $ TRANSB, UPLO INTEGER CSRCA, CSRCB, CSRCC, I, IA, IAM, IASEED, IB, $ IBSEED, IC, ICSEED, ICTXT, IGAP, IMBA, IMBB, $ IMBC, IMIDA, IMIDB, IMIDC, INBA, INBB, INBC, $ IPA, IPB, IPC, IPG, IPMATA, IPMATB, IPMATC, $ IPOSTA, IPOSTB, IPOSTC, IPREA, IPREB, IPREC, $ IPW, IVERB, J, JA, JB, JC, K, L, LDA, LDB, LDC, $ M, MA, MB, MBA, MBB, MBC, MC, MEMREQD, MPA, $ MPB, MPC, MYCOL, MYROW, N, NA, NB, NBA, NBB, $ NBC, NC, NCOLA, NCOLB, NCOLC, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQA, NQB, NQC, NROWA, $ NROWB, NROWC, NTESTS, OFFDA, OFFDC, RSRCA, $ RSRCB, RSRCC, TSKIP, TSTCNT REAL THRESH COMPLEX ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL BCHECK( NSUBS ), CCHECK( NSUBS ), $ LTEST( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), SIDEVAL( MAXTESTS ), $ TRNAVAL( MAXTESTS ), TRNBVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCBVAL( MAXTESTS ), $ CSCCVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCB( DLEN_ ), $ DESCBR( DLEN_ ), DESCC( DLEN_ ), $ DESCCR( DLEN_ ), IAVAL( MAXTESTS ), $ IBVAL( MAXTESTS ), ICVAL( MAXTESTS ), $ IERR( 6 ), IMBAVAL( MAXTESTS ), $ IMBBVAL( MAXTESTS ), IMBCVAL( MAXTESTS ), $ INBAVAL( MAXTESTS ), INBBVAL( MAXTESTS ), $ INBCVAL( MAXTESTS ), JAVAL( MAXTESTS ), $ JBVAL( MAXTESTS ), JCVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), KVAL( MAXTESTS ), $ MAVAL( MAXTESTS ), MBAVAL( MAXTESTS ), $ MBBVAL( MAXTESTS ), MBCVAL( MAXTESTS ), $ MBVAL( MAXTESTS ), MCVAL( MAXTESTS ), $ MVAL( MAXTESTS ), NAVAL( MAXTESTS ), $ NBAVAL( MAXTESTS ), NBBVAL( MAXTESTS ), $ NBCVAL( MAXTESTS ), NBVAL( MAXTESTS ), $ NCVAL( MAXTESTS ), NVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCAVAL( MAXTESTS ), RSCBVAL( MAXTESTS ), $ RSCCVAL( MAXTESTS ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_CCHEKPAD, PB_CFILLPAD, PB_CLASCAL, $ PB_CLASET, PB_DESCSET2, PB_PCLAPRNT, $ PCBLA3TSTINFO, PCBLAS3TSTCHK, PCBLAS3TSTCHKE, $ PCCHKARG3, PCCHKMOUT, PCGEADD, PCGEMM, PCHEMM, $ PCHER2K, PCHERK, PCIPSET, PCLAGEN, PCLASCAL, $ PCLASET, PCMPRNT, PCSYMM, PCSYR2K, PCSYRK, $ PCTRADD, PCTRMM, PCTRSM, PMDESCCHK, PMDIMCHK * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_FCEIL EXTERNAL PB_FCEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PCGEMM ', 'PCSYMM ', 'PCHEMM ', $ 'PCSYRK ', 'PCHERK ', 'PCSYR2K', $ 'PCHER2K', 'PCTRMM ', 'PCTRSM ', $ 'PCGEADD', 'PCTRADD'/ DATA BCHECK/.TRUE., .TRUE., .TRUE., .FALSE., $ .FALSE., .TRUE., .TRUE., .TRUE., .TRUE., $ .FALSE., .FALSE./ DATA CCHECK/.TRUE., .TRUE., .TRUE., .TRUE., .TRUE., $ .TRUE., .TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, * so that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IBSEED = 200 ICSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PCBLA3TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, NVAL, $ KVAL, MAVAL, NAVAL, IMBAVAL, MBAVAL, $ INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, IAVAL, $ JAVAL, MBVAL, NBVAL, IMBBVAL, MBBVAL, $ INBBVAL, NBBVAL, RSCBVAL, CSCBVAL, IBVAL, $ JBVAL, MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, ICVAL, $ JCVAL, MAXTESTS, NGRIDS, PVAL, MAXGRIDS, $ QVAL, MAXGRIDS, NBLOG, LTEST, SOF, TEE, IAM, $ IGAP, IVERB, NPROCS, THRESH, ALPHA, BETA, $ MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PCBLAS3TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) SIDE = SIDEVAL( J ) TRANSA = TRNAVAL( J ) TRANSB = TRNBVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) K = KVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) MBA = MBAVAL( J ) INBA = INBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MB = MBVAL( J ) NB = NBVAL( J ) IMBB = IMBBVAL( J ) MBB = MBBVAL( J ) INBB = INBBVAL( J ) NBB = NBBVAL( J ) RSRCB = RSCBVAL( J ) CSRCB = CSCBVAL( J ) IB = IBVAL( J ) JB = JBVAL( J ) * MC = MCVAL( J ) NC = NCVAL( J ) IMBC = IMBCVAL( J ) MBC = MBCVAL( J ) INBC = INBCVAL( J ) NBC = NBCVAL( J ) RSRCC = RSCCVAL( J ) CSRCC = CSCCVAL( J ) IC = ICVAL( J ) JC = JCVAL( J ) * IF( IAM.EQ.0 ) THEN * TSTCNT = TSTCNT + 1 * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, K, SIDE, UPLO, TRANSA, $ TRANSB, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IB, JB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IC, JC, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( SIDE, 'L' ).AND. $ .NOT.LSAME( SIDE, 'R' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'SIDE' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSA' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANSB, 'N' ).AND. $ .NOT.LSAME( TRANSB, 'T' ).AND. $ .NOT.LSAME( TRANSB, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANSB' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'B', DESCB, $ BLOCK_CYCLIC_2D_INB, MB, NB, IMBB, INBB, $ MBB, NBB, RSRCB, CSRCB, MPB, NQB, IPREB, $ IMIDB, IPOSTB, IGAP, GAPMUL, IERR( 2 ) ) * CALL PMDESCCHK( ICTXT, NOUT, 'C', DESCC, $ BLOCK_CYCLIC_2D_INB, MC, NC, IMBC, INBC, $ MBC, NBC, RSRCC, CSRCC, MPC, NQC, IPREC, $ IMIDC, IPOSTC, IGAP, GAPMUL, IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDB = MAX( 1, MB ) LDC = MAX( 1, MC ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPB = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREB IPC = IPB + DESCB( LLD_ )*NQB + IPOSTB + IPREC IPMATA = IPC + DESCC( LLD_ )*NQC + IPOSTC IPMATB = IPMATA + MA*NA IPMATC = IPMATB + MB*NB IPG = IPMATC + MAX( MB*NB, MC*NC ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * IPW = IPG + MAX( MAX( MAX( IMBA, MBA ), $ MAX( IMBB, MBB ) ), $ MAX( IMBC, MBC ) ) + MAX( M, MAX( N, K ) ) MEMREQD = IPW + PB_FCEIL( REAL( MAX( M, MAX( N, K ) ) ) * $ REAL( REALSZ ), REAL( CPLXSZ ) ) - 1 IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) MEMREQD*CPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 3 routines * DO 30 L = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( L ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9986 ) SNAMES( L ) END IF * * Define the size of the operands * IF( L.EQ.1 ) THEN * * PCGEMM * NROWC = M NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF( LSAME( TRANSB, 'N' ) ) THEN NROWB = K NCOLB = N ELSE NROWB = N NCOLB = K END IF * ELSE IF( L.EQ.2 .OR. L.EQ.3 ) THEN * * PCSYMM, PCHEMM * NROWC = M NCOLC = N NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF * ELSE IF( L.EQ.4 .OR. L.EQ.5 ) THEN * * PCSYRK, PCHERK * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K ELSE NROWA = K NCOLA = N END IF NROWB = 0 NCOLB = 0 * ELSE IF( L.EQ.6 .OR. L.EQ.7 ) THEN * * PCSYR2K, PCHER2K * NROWC = N NCOLC = N IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = N NCOLA = K NROWB = N NCOLB = K ELSE NROWA = K NCOLA = N NROWB = K NCOLB = N END IF * ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN NROWB = M NCOLB = N IF( LSAME( SIDE, 'L' ) ) THEN NROWA = M NCOLA = M ELSE NROWA = N NCOLA = N END IF NROWC = 0 NCOLC = 0 * ELSE IF( L.EQ.10 .OR. L.EQ.11 ) THEN * * PCGEADD, PCTRADD * IF( LSAME( TRANSA, 'N' ) ) THEN NROWA = M NCOLA = N ELSE NROWA = N NCOLA = M END IF NROWC = M NCOLC = N NROWB = 0 NCOLB = 0 * END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWB, NCOLB, 'B', IB, JB, $ DESCB, IERR( 2 ) ) CALL PMDIMCHK( ICTXT, NOUT, NROWC, NCOLC, 'C', IC, JC, $ DESCC, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF * * Check special values of TRANSA for symmetric and * hermitian rank-k and rank-2k updates. * IF( L.EQ.4 .OR. L.EQ.6 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'T' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9975 ) 'TRANSA' KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN IF( .NOT.LSAME( TRANSA, 'N' ).AND. $ .NOT.LSAME( TRANSA, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9975 ) 'TRANSA' KSKIP( L ) = KSKIP( L ) + 1 GO TO 30 END IF END IF * * Generate distributed matrices A, B and C * IF( L.EQ.2 ) THEN * * PCSYMM * AFORM = 'S' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.3 ) THEN * * PCHEMM * AFORM = 'H' ADIAGDO = 'N' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE IF( L.EQ.4 .OR. L.EQ.6 ) THEN * * PCSYRK, PCSYR2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'S' OFFDC = IC - JC * ELSE IF( L.EQ.5 .OR. L.EQ.7 ) THEN * * PCHERK, PCHER2K * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'H' OFFDC = IC - JC * ELSE IF( ( L.EQ.9 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN * * PCTRSM * AFORM = 'N' ADIAGDO = 'D' OFFDA = IA - JA CFORM = 'N' OFFDC = 0 * ELSE * * Default values * AFORM = 'N' ADIAGDO = 'N' OFFDA = 0 CFORM = 'N' OFFDC = 0 * END IF * CALL PCLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) * IF( BCHECK( L ) ) $ CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCB, IBSEED, MEM( IPB ), $ DESCB( LLD_ ) ) * IF( CCHECK( L ) ) $ CALL PCLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCC, ICSEED, MEM( IPC ), $ DESCC( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PCLAGEN( .FALSE., AFORM, ADIAGDO, OFFDA, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) * IF( BCHECK( L ) ) THEN CALL PB_DESCSET2( DESCBR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCBR, IBSEED, MEM( IPMATB ), $ DESCBR( LLD_ ) ) END IF * IF( CCHECK( L ) ) THEN * CALL PB_DESCSET2( DESCCR, MC, NC, IMBC, INBC, MBC, $ NBC, -1, -1, ICTXT, MAX( 1, MC ) ) CALL PCLAGEN( .FALSE., CFORM, 'No diag', OFFDC, MC, $ NC, 1, 1, DESCCR, ICSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * ELSE * * If C is not needed, generate a copy of B instead * CALL PB_DESCSET2( DESCCR, MB, NB, IMBB, INBB, MBB, $ NBB, -1, -1, ICTXT, MAX( 1, MB ) ) CALL PCLAGEN( .FALSE., 'None', 'No diag', 0, MB, NB, $ 1, 1, DESCCR, IBSEED, MEM( IPMATC ), $ DESCCR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A, B, C * IF( ( ( L.EQ.2 ).OR. ( L.EQ.3 ) ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is symmetric or Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PCLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PCLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) * END IF * ELSE IF( ( ( L.EQ.4 ).OR.( L.EQ.5 ).OR.( L.EQ.6 ).OR. $ ( L.EQ.7 ) ).AND. $ ( MAX( NROWC, NCOLC ).GT.1 ) ) THEN * * The distributed matrix C is symmetric or Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PCLASET( 'Upper', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC, JC+1, $ DESCC ) CALL PB_CLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of C. * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PCLASET( 'Lower', NROWC-1, NCOLC-1, ROGUE, $ ROGUE, MEM( IPC ), IC+1, JC, $ DESCC ) CALL PB_CLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * ELSE IF( L.EQ.8 .OR. L.EQ.9 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PCLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_CLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PCLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_CLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.9 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) CALL PCLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_CLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PCLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_CLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PCLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_CLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( L.EQ.9 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ CMPLX( REAL( MAX( NROWA, NCOLA ) ) ) CALL PCLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_CLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * ELSE IF( L.EQ.11 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix C is lower triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PCLASET( 'Upper', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC, $ JC+1, DESCC ) CALL PB_CLASET( 'Upper', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC-1+JC*LDC ), LDC ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix C is upper triangular * IF( MAX( NROWC, NCOLC ).GT.1 ) THEN CALL PCLASET( 'Lower', NROWC-1, NCOLC-1, $ ROGUE, ROGUE, MEM( IPC ), IC+1, $ JC, DESCC ) CALL PB_CLASET( 'Lower', NROWC-1, NCOLC-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATC+IC+(JC-1)*LDC ), $ LDC ) END IF * END IF * END IF * * Pad the guard zones of A, B and C * CALL PB_CFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_CFILLPAD( ICTXT, MPB, NQB, MEM( IPB-IPREB ), $ DESCB( LLD_ ), IPREB, IPOSTB, $ PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_CFILLPAD( ICTXT, MPC, NQC, MEM( IPC-IPREC ), $ DESCC( LLD_ ), IPREC, IPOSTC, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PCCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PCLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, $ 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPW ) ) END IF * IF( BCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PCLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, JB, $ DESCB, 0, 0, $ 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MB, NB, MEM( IPB ), 1, 1, DESCB, $ 0, 0, 'PARALLEL_INITIAL_B', NOUT, $ MEM( IPW ) ) END IF END IF * IF( CCHECK( L ) ) THEN IF( IVERB.EQ.2 ) THEN CALL PB_PCLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, JC, $ DESCC, 0, 0, $ 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PCLAPRNT( MC, NC, MEM( IPC ), 1, 1, DESCC, $ 0, 0, 'PARALLEL_INITIAL_C', NOUT, $ MEM( IPW ) ) END IF END IF * * Call the Level 3 PBLAS routine * INFO = 0 IF( L.EQ.1 ) THEN * * Test PCGEMM * CALL PCGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.2 ) THEN * * Test PCSYMM * CALL PCSYMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.3 ) THEN * * Test PCHEMM * CALL PCIPSET( 'Bignum', NROWA, MEM( IPA ), IA, JA, $ DESCA ) * CALL PCHEMM( SIDE, UPLO, M, N, ALPHA, MEM( IPA ), IA, $ JA, DESCA, MEM( IPB ), IB, JB, DESCB, $ BETA, MEM( IPC ), IC, JC, DESCC ) * CALL PCIPSET( 'Zero', NROWA, MEM( IPA ), IA, JA, $ DESCA ) * ELSE IF( L.EQ.4 ) THEN * * Test PCSYRK * CALL PCSYRK( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.5 ) THEN * * Test PCHERK * IF( ( ( CMPLX( REAL( ALPHA ) ).NE.ZERO ).AND. $ ( K.NE.0 ) ).OR. $ ( CMPLX( REAL( BETA ) ).NE.ONE ) ) $ CALL PCIPSET( 'Bignum', N, MEM( IPC ), IC, JC, $ DESCC ) * CALL PCHERK( UPLO, TRANSA, N, K, REAL( ALPHA ), $ MEM( IPA ), IA, JA, DESCA, REAL( BETA ), $ MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.6 ) THEN * * Test PCSYR2K * CALL PCSYR2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, BETA, MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.7 ) THEN * * Test PCHER2K * IF( ( ( ALPHA.NE.ZERO ).AND.( K.NE.0 ) ).OR. $ ( CMPLX( REAL( BETA ) ).NE.ONE ) ) $ CALL PCIPSET( 'Bignum', N, MEM( IPC ), IC, JC, $ DESCC ) * CALL PCHER2K( UPLO, TRANSA, N, K, ALPHA, MEM( IPA ), $ IA, JA, DESCA, MEM( IPB ), IB, JB, $ DESCB, REAL( BETA ), MEM( IPC ), IC, JC, $ DESCC ) * ELSE IF( L.EQ.8 ) THEN * * Test PCTRMM * CALL PCTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * ELSE IF( L.EQ.9 ) THEN * * Test PCTRSM * CALL PCTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ MEM( IPA ), IA, JA, DESCA, MEM( IPB ), $ IB, JB, DESCB ) * * ELSE IF( L.EQ.10 ) THEN * * Test PCGEADD * CALL PCGEADD( TRANSA, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, BETA, MEM( IPC ), IC, JC, DESCC ) * ELSE IF( L.EQ.11 ) THEN * * Test PCTRADD * CALL PCTRADD( UPLO, TRANSA, M, N, ALPHA, MEM( IPA ), $ IA, JA, DESCA, BETA, MEM( IPC ), IC, JC, $ DESCC ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( L ) = KSKIP( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_CCHEKPAD( ICTXT, SNAMES( L ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), $ IPREA, IPOSTA, PADVAL ) * IF( BCHECK( L ) ) THEN CALL PB_CCHEKPAD( ICTXT, SNAMES( L ), MPB, NQB, $ MEM( IPB-IPREB ), DESCB( LLD_ ), $ IPREB, IPOSTB, PADVAL ) END IF * IF( CCHECK( L ) ) THEN CALL PB_CCHEKPAD( ICTXT, SNAMES( L ), MPC, NQC, $ MEM( IPC-IPREC ), DESCC( LLD_ ), $ IPREC, IPOSTC, PADVAL ) END IF * * Check the computations * CALL PCBLAS3TSTCHK( ICTXT, NOUT, L, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, $ MEM( IPMATA ), MEM( IPA ), IA, JA, $ DESCA, MEM( IPMATB ), MEM( IPB ), $ IB, JB, DESCB, BETA, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, THRESH, $ ROGUE, MEM( IPG ), MEM( IPW ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PCCHKARG3( ICTXT, NOUT, SNAMES( L ), SIDE, UPLO, $ TRANSA, TRANSB, DIAG, M, N, K, ALPHA, IA, $ JA, DESCA, IB, JB, DESCB, BETA, IC, JC, $ DESCC, INFO ) * * Check input-only array arguments * CALL PCCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), $ MEM( IPA ), IA, JA, DESCA, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_A', $ SNAMES( L ) END IF * IF( BCHECK( L ) ) THEN CALL PCCHKMOUT( NROWB, NCOLB, MEM( IPMATB ), $ MEM( IPB ), IB, JB, DESCB, IERR( 5 ) ) IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_B', $ SNAMES( L ) END IF END IF * IF( CCHECK( L ) ) THEN CALL PCCHKMOUT( NROWC, NCOLC, MEM( IPMATC ), $ MEM( IPC ), IC, JC, DESCC, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) 'PARALLEL_C', $ SNAMES( L ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN KFAIL( L ) = KFAIL( L ) + 1 ERRFLG = .TRUE. IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9985 ) SNAMES( L ) ELSE KPASS( L ) = KPASS( L ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( L ) END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PCLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PCMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PCLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) END IF IF( BCHECK( L ) ) THEN IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MB, NB, $ MEM( IPMATB ), LDB, 0, 0, $ 'SERIAL_B' ) CALL PB_PCLAPRNT( MB, NB, MEM( IPB ), 1, 1, $ DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PCMPRNT( ICTXT, NOUT, NROWB, NCOLB, $ MEM( IPMATB+IB-1+(JB-1)*LDB ), $ LDB, 0, 0, 'SERIAL_B' ) CALL PB_PCLAPRNT( NROWB, NCOLB, MEM( IPB ), IB, $ JB, DESCB, 0, 0, 'PARALLEL_B', $ NOUT, MEM( IPMATB ) ) END IF END IF IF( CCHECK( L ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PCMPRNT( ICTXT, NOUT, MC, NC, $ MEM( IPMATC ), LDC, 0, 0, $ 'SERIAL_C' ) CALL PB_PCLAPRNT( MC, NC, MEM( IPC ), 1, 1, $ DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( ( NROWB.GT.0 ).AND.( NCOLB.GT.0 ) ) $ CALL PCMPRNT( ICTXT, NOUT, NROWC, NCOLC, $ MEM( IPMATC+IC-1+(JC-1)*LDC ), $ LDC, 0, 0, 'SERIAL_C' ) CALL PB_PCLAPRNT( NROWC, NCOLC, MEM( IPC ), IC, $ JC, DESCC, 0, 0, 'PARALLEL_C', $ NOUT, MEM( IPMATC ) ) END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9982 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9978 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = 9979 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9981 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '-------------------' ) 9994 FORMAT( 2X, ' M N K SIDE UPLO TRANSA ', $ 'TRANSB DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,1X,I6,6X,A1,5X,A1,7X,A1,7X,A1,5X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IB JB MB NB IMBB INBB', $ ' MBB NBB RSRCB CSRCB' ) 9989 FORMAT( 2X, ' IC JC MC NC IMBC INBC', $ ' MBC NBC RSRCC CSRCC' ) 9988 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9987 FORMAT( 'Not enough memory. Need: ', I12 ) 9986 FORMAT( 2X, ' Tested Subroutine: ', A ) 9985 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9983 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9982 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9981 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9980 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9979 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9978 FORMAT( 2X, 'Testing Summary') 9977 FORMAT( 2X, 'End of Tests.' ) 9976 FORMAT( 2X, 'Tests started.' ) 9975 FORMAT( 2X, ' ***** ', A, ' has an incorrect value: ', $ ' BYPASS *****' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PCBLA3TST * END SUBROUTINE PCBLA3TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, SIDEVAL, $ TRNAVAL, TRNBVAL, UPLOVAL, MVAL, $ NVAL, KVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, $ CSCAVAL, IAVAL, JAVAL, MBVAL, NBVAL, $ IMBBVAL, MBBVAL, INBBVAL, NBBVAL, $ RSCBVAL, CSCBVAL, IBVAL, JBVAL, $ MCVAL, NCVAL, IMBCVAL, MBCVAL, $ INBCVAL, NBCVAL, RSCCVAL, CSCCVAL, $ ICVAL, JCVAL, LDVAL, NGRIDS, PVAL, $ LDPVAL, QVAL, LDQVAL, NBLOG, LTEST, SOF, $ TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), SIDEVAL( LDVAL ), $ TRNAVAL( LDVAL ), TRNBVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCBVAL( LDVAL ), $ CSCCVAL( LDVAL ), IAVAL( LDVAL ), $ IBVAL( LDVAL ), ICVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBBVAL( LDVAL ), $ IMBCVAL( LDVAL ), INBAVAL( LDVAL ), $ INBBVAL( LDVAL ), INBCVAL( LDVAL ), $ JAVAL( LDVAL ), JBVAL( LDVAL ), JCVAL( LDVAL ), $ KVAL( LDVAL ), MAVAL( LDVAL ), MBAVAL( LDVAL ), $ MBBVAL( LDVAL ), MBCVAL( LDVAL ), $ MBVAL( LDVAL ), MCVAL( LDVAL ), MVAL( LDVAL ), $ NAVAL( LDVAL ), NBAVAL( LDVAL ), $ NBBVAL( LDVAL ), NBCVAL( LDVAL ), $ NBVAL( LDVAL ), NCVAL( LDVAL ), NVAL( LDVAL ), $ PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCBVAL( LDVAL ), $ RSCCVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PCBLA3TSTINFO get the needed startup information for testing various * Level 3 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * SIDEVAL (global output) CHARACTER array * On entry, SIDEVAL is an array of dimension LDVAL. On exit, * this array contains the values of SIDE to run the code with. * * TRNAVAL (global output) CHARACTER array * On entry, TRNAVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSA to run the code * with. * * TRNBVAL (global output) CHARACTER array * On entry, TRNBVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANSB to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * KVAL (global output) INTEGER array * On entry, KVAL is an array of dimension LDVAL. On exit, this * array contains the values of K to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MBVAL (global output) INTEGER array * On entry, MBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( M_ ) to run the code * with. * * NBVAL (global output) INTEGER array * On entry, NBVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCB( N_ ) to run the code * with. * * IMBBVAL (global output) INTEGER array * On entry, IMBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( IMB_ ) to run the * code with. * * MBBVAL (global output) INTEGER array * On entry, MBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( MB_ ) to run the * code with. * * INBBVAL (global output) INTEGER array * On entry, INBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( INB_ ) to run the * code with. * * NBBVAL (global output) INTEGER array * On entry, NBBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( NB_ ) to run the * code with. * * RSCBVAL (global output) INTEGER array * On entry, RSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( RSRC_ ) to run the * code with. * * CSCBVAL (global output) INTEGER array * On entry, CSCBVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCB( CSRC_ ) to run the * code with. * * IBVAL (global output) INTEGER array * On entry, IBVAL is an array of dimension LDVAL. On exit, this * array contains the values of IB to run the code with. * * JBVAL (global output) INTEGER array * On entry, JBVAL is an array of dimension LDVAL. On exit, this * array contains the values of JB to run the code with. * * MCVAL (global output) INTEGER array * On entry, MCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( M_ ) to run the code * with. * * NCVAL (global output) INTEGER array * On entry, NCVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCC( N_ ) to run the code * with. * * IMBCVAL (global output) INTEGER array * On entry, IMBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( IMB_ ) to run the * code with. * * MBCVAL (global output) INTEGER array * On entry, MBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( MB_ ) to run the * code with. * * INBCVAL (global output) INTEGER array * On entry, INBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( INB_ ) to run the * code with. * * NBCVAL (global output) INTEGER array * On entry, NBCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( NB_ ) to run the * code with. * * RSCCVAL (global output) INTEGER array * On entry, RSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( RSRC_ ) to run the * code with. * * CSCCVAL (global output) INTEGER array * On entry, CSCCVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCC( CSRC_ ) to run the * code with. * * ICVAL (global output) INTEGER array * On entry, ICVAL is an array of dimension LDVAL. On exit, this * array contains the values of IC to run the code with. * * JCVAL (global output) INTEGER array * On entry, JCVAL is an array of dimension LDVAL. On exit, this * array contains the values of JC to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, SIDE, TRANSA, TRANSB, UPLO, * M, N, K, DESCA(:), IA, JA, DESCB(:), IB, JB, DESCC(:), IC, * JC. This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eleven. On * exit, if LTEST( i ) is .TRUE., the i-th Level 3 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) COMPLEX * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+38*NMAT+NSUBS+4 ) with NSUBS equal to 11. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J REAL EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, CGEBR2D, CGEBS2D, $ ICOPY, IGEBR2D, IGEBS2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PCBLAS3TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( SIDEVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRNBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( KVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCBVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JBVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBCVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCCVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( ICVAL ( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JCVAL ( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL CGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( SIDEVAL( J ) ) WORK( I+2 ) = ICHAR( TRNAVAL( J ) ) WORK( I+3 ) = ICHAR( TRNBVAL( J ) ) WORK( I+4 ) = ICHAR( UPLOVAL( J ) ) I = I + 5 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, KVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JBVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCCVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, ICVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JCVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 3 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex single precision '// $ 'Level 3 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PSLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL CGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 38*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) SIDEVAL( J ) = CHAR( WORK( I+1 ) ) TRNAVAL( J ) = CHAR( WORK( I+2 ) ) TRNBVAL( J ) = CHAR( WORK( I+3 ) ) UPLOVAL( J ) = CHAR( WORK( I+4 ) ) I = I + 5 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, KVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JBVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCCVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, ICVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JCVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9981 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PCBLA3TSTINFO * END SUBROUTINE PCBLAS3TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PCBLAS3TSTCHKE tests the error exits of the Level 3 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 11 (NSUBS). * If LTEST( 1 ) is .TRUE., PCGEMM will be tested; * If LTEST( 2 ) is .TRUE., PCSYMM will be tested; * If LTEST( 3 ) is .TRUE., PCHEMM will be tested; * If LTEST( 4 ) is .TRUE., PCSYRK will be tested; * If LTEST( 5 ) is .TRUE., PCHERK will be tested; * If LTEST( 6 ) is .TRUE., PCSYR2K will be tested; * If LTEST( 7 ) is .TRUE., PCHER2K will be tested; * If LTEST( 8 ) is .TRUE., PCTRMM will be tested; * If LTEST( 9 ) is .TRUE., PCTRSM will be tested; * If LTEST( 10 ) is .TRUE., PCGEADD will be tested; * If LTEST( 11 ) is .TRUE., PCTRADD will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 11 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PCDIMEE, PCGEADD, PCGEMM, $ PCHEMM, PCHER2K, PCHERK, PCMATEE, PCOPTEE, $ PCSYMM, PCSYR2K, PCSYRK, PCTRADD, PCTRMM, $ PCTRSM * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/31, 32, 32, 33, 34, 35, 36, 38, 38, 39, $ 40/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PCGEMM * I = 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCGEMM, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCGEMM, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCGEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCSYMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCSYMM, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCSYMM, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCSYMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHEMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHEMM, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHEMM, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHEMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCSYRK * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCSYRK, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCSYRK, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCSYRK, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHERK * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHERK, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHERK, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHERK, SCODE( I ), SNAMES( I ) ) END IF * * Test PCSYR2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCSYR2K, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCSYR2K, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCSYR2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PCHER2K * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCHER2K, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCHER2K, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCHER2K, SCODE( I ), SNAMES( I ) ) END IF * * Test PCTRMM * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCTRMM, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCTRMM, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCTRMM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCTRSM * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCTRSM, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCTRSM, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCTRSM, SCODE( I ), SNAMES( I ) ) END IF * * Test PCGEADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCGEADD, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCGEADD, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCGEADD, SCODE( I ), SNAMES( I ) ) END IF * * Test PCTRADD * I = I + 1 IF( LTEST( I ) ) THEN CALL PCOPTEE( ICTXT, NOUT, PCTRADD, SCODE( I ), SNAMES( I ) ) CALL PCDIMEE( ICTXT, NOUT, PCTRADD, SCODE( I ), SNAMES( I ) ) CALL PCMATEE( ICTXT, NOUT, PCTRADD, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PCBLAS3TSTCHKE * END SUBROUTINE PCCHKARG3( ICTXT, NOUT, SNAME, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, IA, JA, $ DESCA, IB, JB, DESCB, BETA, IC, JC, DESCC, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT COMPLEX ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*7 SNAME INTEGER DESCA( * ), DESCB( * ), DESCC( * ) * .. * * Purpose * ======= * * PCCHKARG3 checks the input-only arguments of the Level 3 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies the SIDE option in the Level 3 PBLAS * operation. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 3 PBLAS * operation. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the TRANSA option in the Level 3 * PBLAS operation. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the TRANSB option in the Level 3 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 3 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * K (global input) INTEGER * On entry, K specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, SIDEREF, TRANSAREF, TRANSBREF, UPLOREF INTEGER I, IAREF, IBREF, ICREF, JAREF, JBREF, JCREF, $ KREF, MREF, MYCOL, MYROW, NPCOL, NPROW, NREF COMPLEX ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCBREF( DLEN_ ), $ DESCCREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG SIDEREF = SIDE TRANSAREF = TRANSA TRANSBREF = TRANSB UPLOREF = UPLO MREF = M NREF = N KREF = K ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IBREF = IB JBREF = JB DO 20 I = 1, DLEN_ DESCBREF( I ) = DESCB( I ) 20 CONTINUE BETAREF = BETA ICREF = IC JCREF = JC DO 30 I = 1, DLEN_ DESCCREF( I ) = DESCC( I ) 30 CONTINUE * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( SIDE, SIDEREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'SIDE' ELSE IF( .NOT. LSAME( TRANSA, TRANSAREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSA' ELSE IF( .NOT. LSAME( TRANSB, TRANSBREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANSB' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( K.NE.KREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'K' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IB.NE.IBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IB' ELSE IF( JB.NE.JBREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JB' ELSE IF( DESCB( DTYPE_ ).NE.DESCBREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( DTYPE_ )' ELSE IF( DESCB( M_ ).NE.DESCBREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( M_ )' ELSE IF( DESCB( N_ ).NE.DESCBREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( N_ )' ELSE IF( DESCB( IMB_ ).NE.DESCBREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( IMB_ )' ELSE IF( DESCB( INB_ ).NE.DESCBREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( INB_ )' ELSE IF( DESCB( MB_ ).NE.DESCBREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( MB_ )' ELSE IF( DESCB( NB_ ).NE.DESCBREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( NB_ )' ELSE IF( DESCB( RSRC_ ).NE.DESCBREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( RSRC_ )' ELSE IF( DESCB( CSRC_ ).NE.DESCBREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CSRC_ )' ELSE IF( DESCB( CTXT_ ).NE.DESCBREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( CTXT_ )' ELSE IF( DESCB( LLD_ ).NE.DESCBREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCB( LLD_ )' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IC.NE.ICREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IC' ELSE IF( JC.NE.JCREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JC' ELSE IF( DESCC( DTYPE_ ).NE.DESCCREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( DTYPE_ )' ELSE IF( DESCC( M_ ).NE.DESCCREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( M_ )' ELSE IF( DESCC( N_ ).NE.DESCCREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( N_ )' ELSE IF( DESCC( IMB_ ).NE.DESCCREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( IMB_ )' ELSE IF( DESCC( INB_ ).NE.DESCCREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( INB_ )' ELSE IF( DESCC( MB_ ).NE.DESCCREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( MB_ )' ELSE IF( DESCC( NB_ ).NE.DESCCREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( NB_ )' ELSE IF( DESCC( RSRC_ ).NE.DESCCREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( RSRC_ )' ELSE IF( DESCC( CSRC_ ).NE.DESCCREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CSRC_ )' ELSE IF( DESCC( CTXT_ ).NE.DESCCREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( CTXT_ )' ELSE IF( DESCC( LLD_ ).NE.DESCCREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCC( LLD_ )' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PCCHKARG3 * END SUBROUTINE PCBLAS3TSTCHK( ICTXT, NOUT, NROUT, SIDE, UPLO, TRANSA, $ TRANSB, DIAG, M, N, K, ALPHA, A, PA, IA, $ JA, DESCA, B, PB, IB, JB, DESCB, BETA, $ C, PC, IC, JC, DESCC, THRESH, ROGUE, $ WORK, RWORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N, $ NOUT, NROUT REAL THRESH COMPLEX ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) REAL RWORK( * ) COMPLEX A( * ), B( * ), C( * ), PA( * ), PB( * ), $ PC( * ), WORK( * ) * .. * * Purpose * ======= * * PCBLAS3TSTCHK performs the computational tests of the Level 3 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PCGEMM will be tested; * else if NROUT = 2, PCSYMM will be tested; * else if NROUT = 3, PCHEMM will be tested; * else if NROUT = 4, PCSYRK will be tested; * else if NROUT = 5, PCHERK will be tested; * else if NROUT = 6, PCSYR2K will be tested; * else if NROUT = 7, PCHER2K will be tested; * else if NROUT = 8, PCTRMM will be tested; * else if NROUT = 9, PCTRSM will be tested; * else if NROUT = 10, PCGEADD will be tested; * else if NROUT = 11, PCTRADD will be tested; * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies if the multiplication should be per- * formed from the left or the right. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * PB (local input) COMPLEX array * On entry, PB is an array of dimension (DESCB( LLD_ ),*). This * array contains the local entries of the matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) COMPLEX * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular, symmetric or Hermitian ma- * trices. * * WORK (workspace) COMPLEX array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, MAX( N, K ) ). This array is used to store * a copy of a column of C (see PCMMCH). * * RWORK (workspace) REAL array * On entry, RWORK is an array of dimension LRWORK where LRWORK * is at least MAX( M, MAX( N, K ) ). This array is used to sto- * re the computed gauges (see PCMMCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on B has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on C has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL RZERO PARAMETER ( RZERO = 0.0E+0 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW REAL ERR COMPLEX ALPHA1, BETA1 * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CTRSM, PB_CLASET, PCCHKMIN, $ PCMMCH, PCMMCH1, PCMMCH2, PCMMCH3, PCTRMM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, REAL * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PCGEMM * * Check the resulting matrix C * CALL PCMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, JC, $ DESCC, WORK, RWORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, M, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, K, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF IF( LSAME( TRANSB, 'N' ) ) THEN CALL PCCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PCCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PCSYMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PCMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) ELSE CALL PCMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_CLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_CLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PCCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PCCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PCHEMM * * Check the resulting matrix C * IF( LSAME( SIDE, 'L' ) ) THEN CALL PCMMCH( ICTXT, 'No transpose', 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) ELSE CALL PCMMCH( ICTXT, 'No transpose', 'No transpose', M, N, N, $ ALPHA, B, IB, JB, DESCB, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_CLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( SIDE, 'L' ) ) THEN CALL PB_CLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF * IF( LSAME( SIDE, 'L' ) ) THEN CALL PCCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF CALL PCCHKMIN( ERR, M, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PCSYRK * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCMMCH1( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, BETA, C, PC, IC, JC, DESCC, $ WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PCMMCH1( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, IA, $ JA, DESCA, BETA, C, PC, IC, JC, DESCC, WORK, $ RWORK, ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.5 ) THEN * * Test PCHERK * * Check the resulting matrix C * BETA1 = CMPLX( REAL( BETA ), RZERO ) ALPHA1 = CMPLX( REAL( ALPHA ), RZERO ) IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCMMCH1( ICTXT, UPLO, 'Hermitian', N, K, ALPHA1, A, IA, $ JA, DESCA, BETA1, C, PC, IC, JC, DESCC, WORK, $ RWORK, ERR, IERR( 3 ) ) ELSE CALL PCMMCH1( ICTXT, UPLO, 'Conjugate transpose', N, K, $ ALPHA1, A, IA, JA, DESCA, BETA1, C, PC, IC, $ JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PCSYR2K * * Check the resulting matrix C * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCMMCH2( ICTXT, UPLO, 'No transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PCMMCH2( ICTXT, UPLO, 'Transpose', N, K, ALPHA, A, $ IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, $ IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PCCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PCCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PCCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PCHER2K * * Check the resulting matrix C * BETA1 = CMPLX( REAL( BETA ), RZERO ) IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCMMCH2( ICTXT, UPLO, 'Hermitian', N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA1, C, PC, IC, $ JC, DESCC, WORK, RWORK, ERR, IERR( 3 ) ) ELSE CALL PCMMCH2( ICTXT, UPLO, 'Conjugate transpose', N, K, $ ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, $ BETA1, C, PC, IC, JC, DESCC, WORK, RWORK, ERR, $ IERR( 3 ) ) END IF * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, N, K, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PCCHKMIN( ERR, N, K, B, PB, IB, JB, DESCB, IERR( 2 ) ) ELSE CALL PCCHKMIN( ERR, K, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PCCHKMIN( ERR, K, N, B, PB, IB, JB, DESCB, IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PCTRMM * * Check the resulting matrix B * IF( LSAME( SIDE, 'L' ) ) THEN CALL PCMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, $ ALPHA, A, IA, JA, DESCA, C, IB, JB, DESCB, $ ZERO, B, PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) ELSE CALL PCMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, $ ALPHA, C, IB, JB, DESCB, A, IA, JA, DESCA, $ ZERO, B, PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.9 ) THEN * * Test PCTRSM * * Check the resulting matrix B * CALL CTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ), $ B( IB+(JB-1)*DESCB( M_ ) ), DESCB( M_ ) ) CALL PCTRMM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, PA, IA, JA, $ DESCA, PB, IB, JB, DESCB ) IF( LSAME( SIDE, 'L' ) ) THEN CALL PCMMCH( ICTXT, TRANSA, 'No transpose', M, N, M, ALPHA, $ A, IA, JA, DESCA, B, IB, JB, DESCB, ZERO, C, $ PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) ELSE CALL PCMMCH( ICTXT, 'No transpose', TRANSA, M, N, N, ALPHA, $ B, IB, JB, DESCB, A, IA, JA, DESCA, ZERO, C, $ PB, IB, JB, DESCB, WORK, RWORK, ERR, $ IERR( 2 ) ) END IF * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) ELSE IF( ERR.GT.THRESH ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ERR END IF * * Check the input-only arguments * IF( LSAME( SIDE, 'L' ) ) THEN IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', M-1, M-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', M, M, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, M, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_CLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_CLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) END IF END IF CALL PCCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF ELSE IF( NROUT.EQ.10 ) THEN * * Test PCGEADD * * Check the resulting matrix C * CALL PCMMCH3( 'All', TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * ELSE IF( NROUT.EQ.11 ) THEN * * Test PCTRADD * * Check the resulting matrix C * CALL PCMMCH3( UPLO, TRANSA, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, IERR( 3 ) ) * * Check the input-only arguments * IF( LSAME( TRANSA, 'N' ) ) THEN CALL PCCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) ELSE CALL PCCHKMIN( ERR, N, M, A, PA, IA, JA, DESCA, IERR( 1 ) ) END IF * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'B' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'C' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9997 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PCBLAS3TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/pzblastst.f0000640000175000017500000147216410363532303017506 0ustar muammarmuammar SUBROUTINE PZOPTEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZOPTEE tests whether the PBLAS respond correctly to a bad option * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PZCHKOPT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2nd option * APOS = 2 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 3rd option * APOS = 3 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2'nd option * APOS = 2 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 .OR. SCODE.EQ.40 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 2'nd option * APOS = 2 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'S', APOS ) * * Check 2nd option * APOS = 2 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'U', APOS ) * * Check 3rd option * APOS = 3 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 4th option * APOS = 4 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'D', APOS ) * * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st option * APOS = 1 CALL PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * END IF * RETURN * * End of PZOPTEE * END SUBROUTINE PZCHKOPT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZCHKOPT tests the option ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the option to be * checked. ARGNAM can either be 'D', 'S', 'A', 'B', or 'U'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PZCALLSUB, PZSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. CHARACTER DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PZSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'D' ) ) THEN * * Generate bad DIAG option * DIAG = '/' * ELSE IF( LSAME( ARGNAM, 'S' ) ) THEN * * Generate bad SIDE option * SIDE = '/' * ELSE IF( LSAME( ARGNAM, 'A' ) ) THEN * * Generate bad TRANSA option * TRANSA = '/' * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Generate bad TRANSB option * TRANSB = '/' * ELSE IF( LSAME( ARGNAM, 'U' ) ) THEN * * Generate bad UPLO option * UPLO = '/' * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PZCHKOPT * END SUBROUTINE PZDIMEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZDIMEE tests whether the PBLAS respond correctly to a bad dimension * argument. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PZCHKDIM * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 .OR. SCODE.EQ.12 .OR. SCODE.EQ.13 .OR. $ SCODE.EQ.14 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) dimension * APOS = 1 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st dimension * APOS = 2 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.22 .OR. SCODE.EQ.25 .OR. SCODE.EQ.26 .OR. $ SCODE.EQ.27 ) THEN * * Check 1st (and only) dimension * APOS = 2 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) dimension * APOS = 4 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.24 ) THEN * * Check 1st dimension * APOS = 1 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 3rd dimension * APOS = 5 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.32 ) THEN * * Check 1st dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 .OR. SCODE.EQ.35 .OR. $ SCODE.EQ.36 ) THEN * * Check 1st dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * * Check 2nd dimension * APOS = 4 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'K', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st dimension * APOS = 1 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 2 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st dimension * APOS = 5 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 6 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st dimension * APOS = 2 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st dimension * APOS = 3 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'M', APOS ) * * Check 2nd dimension * APOS = 4 CALL PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'N', APOS ) * END IF * RETURN * * End of PZDIMEE * END SUBROUTINE PZCHKDIM( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZCHKDIM tests the dimension ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the dimension to be * checked. ARGNAM can either be 'M', 'N' or 'K'. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the option ARGNAM * to be tested. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER INFOT * .. * .. External Subroutines .. EXTERNAL PCHKPBE, PZCALLSUB, PZSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER KDIM, MDIM, NDIM COMMON /PBLASN/KDIM, MDIM, NDIM * .. * .. Executable Statements .. * * Reiniatilize the dummy arguments to correct values * CALL PZSETPBLAS( ICTXT ) * IF( LSAME( ARGNAM, 'M' ) ) THEN * * Generate bad MDIM * MDIM = -1 * ELSE IF( LSAME( ARGNAM, 'N' ) ) THEN * * Generate bad NDIM * NDIM = -1 * ELSE * * Generate bad KDIM * KDIM = -1 * END IF * * Set INFOT to the position of the bad dimension argument * INFOT = ARGPOS * * Call the PBLAS routine * CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * RETURN * * End of PZCHKDIM * END SUBROUTINE PZVECEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZVECEE tests whether the PBLAS respond correctly to a bad vector * argument. Each vector is described by: , I, J, * DESC, INC. Out of all these, only I, J, * DESC, and INC can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PZCHKMAT * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * * Check 1st vector * APOS = 2 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 7 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.12 .OR. SCODE.EQ.15 ) THEN * * Check 1st (and only) vector * APOS = 3 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.13 ) THEN * * Check 1st vector * APOS = 3 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 8 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.14 ) THEN * * Check 1st (and only) vector * APOS = 4 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * * Check 1st vector * APOS = 9 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 15 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st vector * APOS = 8 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 14 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.23 ) THEN * * Check 1st (and only) vector * APOS = 9 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st vector * APOS = 4 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * * Check 2nd vector * APOS = 9 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'Y', APOS ) * ELSE IF( SCODE.EQ.26 .OR. SCODE.EQ.27 ) THEN * * Check 1'st (and only) vector * APOS = 4 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'X', APOS ) * END IF * RETURN * * End of PZVECEE * END SUBROUTINE PZMATEE( ICTXT, NOUT, SUBPTR, SCODE, SNAME ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*7 SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZMATEE tests whether the PBLAS respond correctly to a bad matrix * argument. Each matrix is described by: , I, J, * and DESC. Out of all these, only I, J and DESC * can be tested. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER APOS * .. * .. External Subroutines .. EXTERNAL PZCHKMAT * .. * .. Executable Statements .. * * Level 2 PBLAS * IF( SCODE.EQ.21 .OR. SCODE.EQ.23 ) THEN * * Check 1st (and only) matrix * APOS = 5 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.22 ) THEN * * Check 1st (and only) matrix * APOS = 4 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.24 .OR. SCODE.EQ.27 ) THEN * * Check 1st (and only) matrix * APOS = 14 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * ELSE IF( SCODE.EQ.25 .OR. SCODE.EQ.26 ) THEN * * Check 1st (and only) matrix * APOS = 9 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * * Check 1st matrix * APOS = 7 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 16 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.32 .OR. SCODE.EQ.35 .OR. SCODE.EQ.36 ) THEN * * Check 1st matrix * APOS = 6 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * * Check 3nd matrix * APOS = 15 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.33 .OR. SCODE.EQ.34 ) THEN * * Check 1st matrix * APOS = 6 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.37 ) THEN * * Check 1st matrix * APOS = 4 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 9 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.38 ) THEN * * Check 1st matrix * APOS = 8 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 12 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'B', APOS ) * ELSE IF( SCODE.EQ.39 ) THEN * * Check 1st matrix * APOS = 5 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 10 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * ELSE IF( SCODE.EQ.40 ) THEN * * Check 1st matrix * APOS = 6 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'A', APOS ) * * Check 2nd matrix * APOS = 11 CALL PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, 'C', APOS ) * END IF * RETURN * * End of PZMATEE * END SUBROUTINE PZSETPBLAS( ICTXT ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT * .. * * Purpose * ======= * * PZSETPBLAS initializes *all* the dummy arguments to correct values. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION RONE COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ RONE = 1.0D+0 ) * .. * .. External Subroutines .. EXTERNAL PB_DESCSET2 * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM DOUBLE PRECISION USCLR COMPLEX*16 SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMPLEX*16 A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Set default values for options * DIAG = 'N' SIDE = 'L' TRANSA = 'N' TRANSB = 'N' UPLO = 'U' * * Set default values for scalars * KDIM = 1 MDIM = 1 NDIM = 1 ISCLR = 1 SCLR = ONE USCLR = RONE * * Set default values for distributed matrix A * A( 1, 1 ) = ONE A( 2, 1 ) = ONE A( 1, 2 ) = ONE A( 2, 2 ) = ONE IA = 1 JA = 1 CALL PB_DESCSET2( DESCA, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix B * B( 1, 1 ) = ONE B( 2, 1 ) = ONE B( 1, 2 ) = ONE B( 2, 2 ) = ONE IB = 1 JB = 1 CALL PB_DESCSET2( DESCB, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix C * C( 1, 1 ) = ONE C( 2, 1 ) = ONE C( 1, 2 ) = ONE C( 2, 2 ) = ONE IC = 1 JC = 1 CALL PB_DESCSET2( DESCC, 2, 2, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) * * Set default values for distributed matrix X * X( 1 ) = ONE X( 2 ) = ONE IX = 1 JX = 1 CALL PB_DESCSET2( DESCX, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCX = 1 * * Set default values for distributed matrix Y * Y( 1 ) = ONE Y( 2 ) = ONE IY = 1 JY = 1 CALL PB_DESCSET2( DESCY, 2, 1, 1, 1, 1, 1, 0, 0, ICTXT, 2 ) INCY = 1 * RETURN * * End of PZSETPBLAS * END SUBROUTINE PZCHKMAT( ICTXT, NOUT, SUBPTR, SCODE, SNAME, ARGNAM, $ ARGPOS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 ARGNAM INTEGER ARGPOS, ICTXT, NOUT, SCODE * .. * .. Array Arguments .. CHARACTER*(*) SNAME * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZCHKMAT tests the matrix (or vector) ARGNAM in any PBLAS routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * ARGNAM (global input) CHARACTER*(*) * On entry, ARGNAM specifies the name of the matrix or vector * to be checked. ARGNAM can either be 'A', 'B' or 'C' when one * wants to check a matrix, and 'X' or 'Y' for a vector. * * ARGPOS (global input) INTEGER * On entry, ARGPOS indicates the position of the first argument * of the matrix (or vector) ARGNAM. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER DESCMULT PARAMETER ( DESCMULT = 100 ) * .. * .. Local Scalars .. INTEGER I, INFOT, NPROW, NPCOL, MYROW, MYCOL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCHKPBE, PZCALLSUB, PZSETPBLAS * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Common Blocks .. INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ARGNAM, 'A' ) ) THEN * * Check IA. Set all other OK, bad IA * CALL PZSETPBLAS( ICTXT ) IA = -1 INFOT = ARGPOS + 1 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JA. Set all other OK, bad JA * CALL PZSETPBLAS( ICTXT ) JA = -1 INFOT = ARGPOS + 2 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCA. Set all other OK, bad DESCA * DO 10 I = 1, DLEN_ * * Set I'th entry of DESCA to incorrect value, rest ok. * CALL PZSETPBLAS( ICTXT ) DESCA( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCA, CSRCA, LDA * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PZSETPBLAS( ICTXT ) * * Test RSRCA >= NPROW * IF( I.EQ.RSRC_ ) $ DESCA( I ) = NPROW * * Test CSRCA >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCA( I ) = NPCOL * * Test LDA >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCA( I ) = 1 ELSE DESCA( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 10 CONTINUE * ELSE IF( LSAME( ARGNAM, 'B' ) ) THEN * * Check IB. Set all other OK, bad IB * CALL PZSETPBLAS( ICTXT ) IB = -1 INFOT = ARGPOS + 1 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JB. Set all other OK, bad JB * CALL PZSETPBLAS( ICTXT ) JB = -1 INFOT = ARGPOS + 2 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCB. Set all other OK, bad DESCB * DO 20 I = 1, DLEN_ * * Set I'th entry of DESCB to incorrect value, rest ok. * CALL PZSETPBLAS( ICTXT ) DESCB( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCB, CSRCB, LDB * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PZSETPBLAS( ICTXT ) * * Test RSRCB >= NPROW * IF( I.EQ.RSRC_ ) $ DESCB( I ) = NPROW * * Test CSRCB >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCB( I ) = NPCOL * * Test LDB >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCB( I ) = 1 ELSE DESCB( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 20 CONTINUE * ELSE IF( LSAME( ARGNAM, 'C' ) ) THEN * * Check IC. Set all other OK, bad IC * CALL PZSETPBLAS( ICTXT ) IC = -1 INFOT = ARGPOS + 1 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JC. Set all other OK, bad JC * CALL PZSETPBLAS( ICTXT ) JC = -1 INFOT = ARGPOS + 2 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCC. Set all other OK, bad DESCC * DO 30 I = 1, DLEN_ * * Set I'th entry of DESCC to incorrect value, rest ok. * CALL PZSETPBLAS( ICTXT ) DESCC( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCC, CSRCC, LDC * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PZSETPBLAS( ICTXT ) * * Test RSRCC >= NPROW * IF( I.EQ.RSRC_ ) $ DESCC( I ) = NPROW * * Test CSRCC >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCC( I ) = NPCOL * * Test LDC >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCC( I ) = 1 ELSE DESCC( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 30 CONTINUE * ELSE IF( LSAME( ARGNAM, 'X' ) ) THEN * * Check IX. Set all other OK, bad IX * CALL PZSETPBLAS( ICTXT ) IX = -1 INFOT = ARGPOS + 1 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JX. Set all other OK, bad JX * CALL PZSETPBLAS( ICTXT ) JX = -1 INFOT = ARGPOS + 2 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCX. Set all other OK, bad DESCX * DO 40 I = 1, DLEN_ * * Set I'th entry of DESCX to incorrect value, rest ok. * CALL PZSETPBLAS( ICTXT ) DESCX( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCX, CSRCX, LDX * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PZSETPBLAS( ICTXT ) * * Test RSRCX >= NPROW * IF( I.EQ.RSRC_ ) $ DESCX( I ) = NPROW * * Test CSRCX >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCX( I ) = NPCOL * * Test LDX >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCX( I ) = 1 ELSE DESCX( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 40 CONTINUE * * Check INCX. Set all other OK, bad INCX * CALL PZSETPBLAS( ICTXT ) INCX = -1 INFOT = ARGPOS + 4 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * ELSE * * Check IY. Set all other OK, bad IY * CALL PZSETPBLAS( ICTXT ) IY = -1 INFOT = ARGPOS + 1 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check JY. Set all other OK, bad JY * CALL PZSETPBLAS( ICTXT ) JY = -1 INFOT = ARGPOS + 2 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Check DESCY. Set all other OK, bad DESCY * DO 50 I = 1, DLEN_ * * Set I'th entry of DESCY to incorrect value, rest ok. * CALL PZSETPBLAS( ICTXT ) DESCY( I ) = -2 INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * * Extra tests for RSRCY, CSRCY, LDY * IF( ( I.EQ.RSRC_ ) .OR. ( I.EQ.CSRC_ ) .OR. $ ( I.EQ.LLD_ ) ) THEN * CALL PZSETPBLAS( ICTXT ) * * Test RSRCY >= NPROW * IF( I.EQ.RSRC_ ) $ DESCY( I ) = NPROW * * Test CSRCY >= NPCOL * IF( I.EQ.CSRC_ ) $ DESCY( I ) = NPCOL * * Test LDY >= MAX(1, PB_NUMROC(...)). Set to 1 as mat 2x2. * IF( I.EQ.LLD_ ) THEN IF( MYROW.EQ.0 .AND.MYCOL.EQ.0 ) THEN DESCY( I ) = 1 ELSE DESCY( I ) = 0 END IF END IF * INFOT = ( ( ARGPOS + 3 ) * DESCMULT ) + I CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * 50 CONTINUE * * Check INCY. Set all other OK, bad INCY * CALL PZSETPBLAS( ICTXT ) INCY = -1 INFOT = ARGPOS + 4 CALL PZCALLSUB( SUBPTR, SCODE ) CALL PCHKPBE( ICTXT, NOUT, SNAME, INFOT ) * END IF * RETURN * * End of PZCHKMAT * END SUBROUTINE PZCALLSUB( SUBPTR, SCODE ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER SCODE * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZCALLSUB calls the subroutine SUBPTR with the calling sequence iden- * tified by SCODE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SUBPTR (global input) SUBROUTINE * On entry, SUBPTR is a subroutine. SUBPTR must be declared * EXTERNAL in the calling subroutine. * * SCODE (global input) INTEGER * On entry, SCODE specifies the calling sequence code. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Common Blocks .. CHARACTER*1 DIAG, SIDE, TRANSA, TRANSB, UPLO INTEGER IA, IB, IC, INCX, INCY, ISCLR, IX, IY, JA, JB, $ JC, JX, JY, KDIM, MDIM, NDIM DOUBLE PRECISION USCLR COMPLEX*16 SCLR INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ ), $ DESCX( DLEN_ ), DESCY( DLEN_ ) COMPLEX*16 A( 2, 2 ), B( 2, 2 ), C( 2, 2 ), X( 2 ), Y( 2 ) COMMON /PBLASC/DIAG, SIDE, TRANSA, TRANSB, UPLO COMMON /PBLASD/DESCA, DESCB, DESCC, DESCX, DESCY COMMON /PBLASI/IA, IB, IC, INCX, INCY, ISCLR, IX, IY, $ JA, JB, JC, JX, JY COMMON /PBLASM/A, B, C COMMON /PBLASN/KDIM, MDIM, NDIM COMMON /PBLASS/SCLR, USCLR COMMON /PBLASV/X, Y * .. * .. Executable Statements .. * * Level 1 PBLAS * IF( SCODE.EQ.11 ) THEN * CALL SUBPTR( NDIM, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY ) * ELSE IF( SCODE.EQ.12 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.13 ) THEN * CALL SUBPTR( NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, JY, $ DESCY, INCY ) * ELSE IF( SCODE.EQ.14 ) THEN * CALL SUBPTR( NDIM, SCLR, ISCLR, X, IX, JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.15 ) THEN * CALL SUBPTR( NDIM, USCLR, X, IX, JX, DESCX, INCX ) * * Level 2 PBLAS * ELSE IF( SCODE.EQ.21 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.22 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, A, IA, JA, DESCA, X, IX, JX, $ DESCX, INCX, SCLR, Y, IY, JY, DESCY, INCY ) * ELSE IF( SCODE.EQ.23 ) THEN * CALL SUBPTR( UPLO, TRANSA, DIAG, NDIM, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX ) * ELSE IF( SCODE.EQ.24 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * ELSE IF( SCODE.EQ.25 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.26 ) THEN * CALL SUBPTR( UPLO, NDIM, USCLR, X, IX, JX, DESCX, INCX, A, IA, $ JA, DESCA ) * ELSE IF( SCODE.EQ.27 ) THEN * CALL SUBPTR( UPLO, NDIM, SCLR, X, IX, JX, DESCX, INCX, Y, IY, $ JY, DESCY, INCY, A, IA, JA, DESCA ) * * Level 3 PBLAS * ELSE IF( SCODE.EQ.31 ) THEN * CALL SUBPTR( TRANSA, TRANSB, MDIM, NDIM, KDIM, SCLR, A, IA, JA, $ DESCA, B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.32 ) THEN * CALL SUBPTR( SIDE, UPLO, MDIM, NDIM, SCLR, A, IA, JA, DESCA, B, $ IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.33 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.34 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, USCLR, A, IA, JA, DESCA, $ USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.35 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, SCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.36 ) THEN * CALL SUBPTR( UPLO, TRANSA, NDIM, KDIM, SCLR, A, IA, JA, DESCA, $ B, IB, JB, DESCB, USCLR, C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.37 ) THEN * CALL SUBPTR( MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, C, IC, $ JC, DESCC ) * ELSE IF( SCODE.EQ.38 ) THEN * CALL SUBPTR( SIDE, UPLO, TRANSA, DIAG, MDIM, NDIM, SCLR, A, IA, $ JA, DESCA, B, IB, JB, DESCB ) * ELSE IF( SCODE.EQ.39 ) THEN * CALL SUBPTR( TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, SCLR, $ C, IC, JC, DESCC ) * ELSE IF( SCODE.EQ.40 ) THEN * CALL SUBPTR( UPLO, TRANSA, MDIM, NDIM, SCLR, A, IA, JA, DESCA, $ SCLR, C, IC, JC, DESCC ) * END IF * RETURN * * End of PZCALLSUB * END SUBROUTINE PZERRSET( ERR, ERRMAX, XTRUE, X ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERR, ERRMAX COMPLEX*16 X, XTRUE * .. * * Purpose * ======= * * PZERRSET computes the absolute difference ERR = |XTRUE - X| and com- * pares it with zero. ERRMAX accumulates the absolute error difference. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERR (local output) DOUBLE PRECISION * On exit, ERR specifies the absolute difference |XTRUE - X|. * * ERRMAX (local input/local output) DOUBLE PRECISION * On entry, ERRMAX specifies a previously computed error. On * exit ERRMAX is the accumulated error MAX( ERRMAX, ERR ). * * XTRUE (local input) COMPLEX*16 * On entry, XTRUE specifies the true value. * * X (local input) COMPLEX*16 * On entry, X specifies the value to be compared to XTRUE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION PDDIFF EXTERNAL PDDIFF * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX * .. * .. Executable Statements .. * ERR = ABS( PDDIFF( DBLE( XTRUE ), DBLE( X ) ) ) ERR = MAX( ERR, ABS( PDDIFF( DIMAG( XTRUE ), DIMAG( X ) ) ) ) * ERRMAX = MAX( ERRMAX, ERR ) * RETURN * * End of PZERRSET * END SUBROUTINE PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N DOUBLE PRECISION ERRMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 PX( * ), X( * ) * .. * * Purpose * ======= * * PZCHKVIN checks that the submatrix sub( PX ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) DOUBLE PRECISION * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( X ) and sub( PX ). * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX*16 array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIX, IN, IXCOL, $ IXROW, J, JB, JJX, JN, KK, LDPX, LDX, LL, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PB_INFOG2L, PZERRSET * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) * LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( N.EQ.1 ) THEN * IF( ( MYROW.EQ.IXROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.IXCOL .OR. COLREP ) ) $ CALL PZERRSET( ERR, ERRMAX, X( IX+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) * ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * IF( MYROW.EQ.IXROW .OR. ROWREP ) THEN * ICURCOL = IXCOL IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 10 J = JX, JN CALL PZERRSET( ERR, ERRMAX, X( IX+(J-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) JJX = JJX + 1 10 CONTINUE END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 30 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 20 KK = 0, JB-1 CALL PZERRSET( ERR, ERRMAX, X( IX+(J+KK-1)*LDX ), $ PX( IIX+(JJX+KK-1)*LDPX ) ) 20 CONTINUE * JJX = JJX + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 30 CONTINUE * END IF * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * IF( MYCOL.EQ.IXCOL .OR. COLREP ) THEN * ICURROW = IXROW IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 40 I = IX, IN CALL PZERRSET( ERR, ERRMAX, X( I+(JX-1)*LDX ), $ PX( IIX+(JJX-1)*LDPX ) ) IIX = IIX + 1 40 CONTINUE END IF ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * DO 50 KK = 0, IB-1 CALL PZERRSET( ERR, ERRMAX, X( I+KK+(JX-1)*LDX ), $ PX( IIX+KK+(JJX-1)*LDPX ) ) 50 CONTINUE * IIX = IIX + IB * END IF * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PZCHKVIN * END SUBROUTINE PZCHKVOUT( N, X, PX, IX, JX, DESCX, INCX, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INFO, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 PX( * ), X( * ) * .. * * Purpose * ======= * * PZCHKVOUT checks that the matrix PX \ sub( PX ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector operand * sub( X ). N must be at least zero. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX*16 array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, ICURROW, II, IMBX, INBX, $ J, JB, JJ, KK, LDPX, LDX, LL, MBX, MPALL, $ MYCOL, MYCOLDIST, MYROW, MYROWDIST, NBX, NPCOL, $ NPROW, NQALL DOUBLE PRECISION EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PZERRSET * .. * .. External Functions .. INTEGER PB_NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCX( M_ ).LE.0 ).OR.( DESCX( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCX( M_ ), 1, DESCX( IMB_ ), DESCX( MB_ ), $ MYROW, DESCX( RSRC_ ), NPROW ) NQALL = PB_NUMROC( DESCX( N_ ), 1, DESCX( INB_ ), DESCX( NB_ ), $ MYCOL, DESCX( CSRC_ ), NPCOL ) * MBX = DESCX( MB_ ) NBX = DESCX( NB_ ) LDX = DESCX( M_ ) LDPX = DESCX( LLD_ ) ICURROW = DESCX( RSRC_ ) ICURCOL = DESCX( CSRC_ ) ROWREP = ( ICURROW.EQ.-1 ) COLREP = ( ICURCOL.EQ.-1 ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN IMBX = DESCX( IMB_ ) ELSE IMBX = MBX END IF IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN INBX = DESCX( INB_ ) ELSE INBX = NBX END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - ICURROW + NPROW, NPROW ) END IF IF( COLREP ) THEN MYCOLDIST = 0 ELSE MYCOLDIST = MOD( MYCOL - ICURCOL + NPCOL, NPCOL ) END IF II = 1 JJ = 1 * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * I = 1 IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) IB = MIN( DESCX( M_ ), DESCX( IMB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. J+KK.GT.JX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 10 CONTINUE 20 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 50 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 30 CONTINUE 40 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 50 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * DO 110 I = DESCX( IMB_ ) + 1, DESCX( M_ ), MBX IB = MIN( DESCX( M_ ) - I + 1, MBX ) * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN * IF( MYCOLDIST.EQ.0 ) THEN J = 1 ELSE J = DESCX( INB_ ) + ( MYCOLDIST - 1 ) * NBX + 1 END IF * JJ = 1 JB = MIN( MAX( 0, DESCX( N_ ) - J + 1 ), INBX ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 60 CONTINUE 70 CONTINUE IF( COLREP ) THEN J = J + INBX ELSE J = J + INBX + ( NPCOL - 1 ) * NBX END IF * DO 100 JJ = INBX+1, NQALL, NBX JB = MIN( NQALL-JJ+1, NBX ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.NE.IX .OR. J+KK.LT.JX .OR. $ J+KK.GT.JX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 80 CONTINUE 90 CONTINUE * IF( COLREP ) THEN J = J + NBX ELSE J = J + NPCOL * NBX END IF * 100 CONTINUE * II = II + IB * END IF * ICURROW = MOD( ICURROW + 1, NPROW ) * 110 CONTINUE * ELSE * * sub( X ) is a column vector * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) JB = MIN( DESCX( N_ ), DESCX( INB_ ) ) * DO 130 KK = 0, JB-1 DO 120 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. I+LL.GT.IX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 120 CONTINUE 130 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 160 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 150 KK = 0, JB-1 DO 140 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 140 CONTINUE 150 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 160 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 220 J = DESCX( INB_ ) + 1, DESCX( N_ ), NBX JB = MIN( DESCX( N_ ) - J + 1, NBX ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCX( IMB_ ) + ( MYROWDIST - 1 ) * MBX + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCX( M_ ) - I + 1 ), IMBX ) DO 180 KK = 0, JB-1 DO 170 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 170 CONTINUE 180 CONTINUE IF( ROWREP ) THEN I = I + IMBX ELSE I = I + IMBX + ( NPROW - 1 ) * MBX END IF * DO 210 II = IMBX+1, MPALL, MBX IB = MIN( MPALL-II+1, MBX ) * DO 200 KK = 0, JB-1 DO 190 LL = 0, IB-1 IF( J+KK.NE.JX .OR. I+LL.LT.IX .OR. $ I+LL.GT.IX+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ X( I+LL+(J+KK-1)*LDX ), $ PX( II+LL+(JJ+KK-1)*LDPX ) ) 190 CONTINUE 200 CONTINUE * IF( ROWREP ) THEN I = I + MBX ELSE I = I + NPROW * MBX END IF * 210 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * 220 CONTINUE * END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PZCHKVOUT * END SUBROUTINE PZCHKMIN( ERRMAX, M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N DOUBLE PRECISION ERRMAX * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 PA( * ), A( * ) * .. * * Purpose * ======= * * PZCHKMIN checks that the submatrix sub( PA ) remained unchanged. The * local array entries are compared element by element, and their dif- * ference is tested against 0.0 as well as the epsilon machine. Notice * that this difference should be numerically exactly the zero machine, * but because of the possible fluctuation of some of the data we flag- * ged differently a difference less than twice the epsilon machine. The * largest error is also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ERRMAX (global output) DOUBLE PRECISION * On exit, ERRMAX specifies the largest absolute element-wise * difference between sub( A ) and sub( PA ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * operand sub( A ). N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ KK, LDA, LDPA, LL, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, EPS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PB_INFOG2L, PZERRSET * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if posssible * IF( ( M.EQ.0 ).OR.( N.EQ.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, $ JJA, IAROW, IACOL ) * II = IIA JJ = JJA LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) ICURROW = IAROW ICURCOL = IACOL ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA + JB - 1 * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * DO 40 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 10 K = 0, IB-1 CALL PZERRSET( ERR, ERRMAX, A( IA+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 10 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 20 K = 0, IB-1 CALL PZERRSET( ERR, ERRMAX, A( I+K+(JA+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 20 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = IIA ICURROW = IAROW 40 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining column blocks * DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN DO 80 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 50 K = 0, IB-1 CALL PZERRSET( ERR, ERRMAX, A( IA+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 50 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) * * Loop over remaining block of rows * DO 70 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN DO 60 K = 0, IB-1 CALL PZERRSET( ERR, ERRMAX, $ A( I+K+(J+H-1)*LDA ), $ PA( II+K+(JJ+H-1)*LDPA ) ) 60 CONTINUE II = II + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) 70 CONTINUE * II = IIA ICURROW = IAROW 80 CONTINUE * JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 90 CONTINUE * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PZCHKMIN * END SUBROUTINE PZCHKMOUT( M, N, A, PA, IA, JA, DESCA, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), PA( * ) * .. * * Purpose * ======= * * PZCHKMOUT checks that the matrix PA \ sub( PA ) remained unchanged. * The local array entries are compared element by element, and their * difference is tested against 0.0 as well as the epsilon machine. No- * tice that this difference should be numerically exactly the zero ma- * chine, but because of the possible movement of some of the data we * flagged differently a difference less than twice the epsilon machine. * The largest error is reported. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( PA ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( PA ). N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, * If INFO > 0, the maximum abolute error found is in (0,eps], * If INFO < 0, the maximum abolute error found is in (eps,+oo). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER I, IB, ICTXT, ICURCOL, II, IMBA, J, JB, JJ, KK, $ LDA, LDPA, LL, MPALL, MYCOL, MYROW, MYROWDIST, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERR, ERRMAX * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, PZERRSET * .. * .. External Functions .. INTEGER PB_NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * INFO = 0 ERRMAX = ZERO * * Quick return if possible * IF( ( DESCA( M_ ).LE.0 ).OR.( DESCA( N_ ).LE.0 ) ) $ RETURN * * Start the operations * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * MPALL = PB_NUMROC( DESCA( M_ ), 1, DESCA( IMB_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) * LDA = DESCA( M_ ) LDPA = DESCA( LLD_ ) * II = 1 JJ = 1 ROWREP = ( DESCA( RSRC_ ).EQ.-1 ) COLREP = ( DESCA( CSRC_ ).EQ.-1 ) ICURCOL = DESCA( CSRC_ ) IF( MYROW.EQ.DESCA( RSRC_ ) .OR. ROWREP ) THEN IMBA = DESCA( IMB_ ) ELSE IMBA = DESCA( MB_ ) END IF IF( ROWREP ) THEN MYROWDIST = 0 ELSE MYROWDIST = MOD( MYROW - DESCA( RSRC_ ) + NPROW, NPROW ) END IF * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * J = 1 IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) JB = MIN( DESCA( N_ ), DESCA( INB_ ) ) * DO 20 KK = 0, JB-1 DO 10 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 10 CONTINUE 20 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 50 II = IMBA + 1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 40 KK = 0, JB-1 DO 30 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 30 CONTINUE 40 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 50 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * DO 110 J = DESCA( INB_ ) + 1, DESCA( N_ ), DESCA( NB_ ) JB = MIN( DESCA( N_ ) - J + 1, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN * IF( MYROWDIST.EQ.0 ) THEN I = 1 ELSE I = DESCA( IMB_ ) + ( MYROWDIST - 1 ) * DESCA( MB_ ) + 1 END IF * II = 1 IB = MIN( MAX( 0, DESCA( M_ ) - I + 1 ), IMBA ) DO 70 KK = 0, JB-1 DO 60 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 60 CONTINUE 70 CONTINUE IF( ROWREP ) THEN I = I + IMBA ELSE I = I + IMBA + ( NPROW - 1 ) * DESCA( MB_ ) END IF * DO 100 II = IMBA+1, MPALL, DESCA( MB_ ) IB = MIN( MPALL-II+1, DESCA( MB_ ) ) * DO 90 KK = 0, JB-1 DO 80 LL = 0, IB-1 IF( I+LL.LT.IA .OR. I+LL.GT.IA+M-1 .OR. $ J+KK.LT.JA .OR. J+KK.GT.JA+N-1 ) $ CALL PZERRSET( ERR, ERRMAX, $ A( I+LL+(J+KK-1)*LDA ), $ PA( II+LL+(JJ+KK-1)*LDPA ) ) 80 CONTINUE 90 CONTINUE * IF( ROWREP ) THEN I = I + DESCA( MB_ ) ELSE I = I + NPROW * DESCA( MB_ ) END IF * 100 CONTINUE * JJ = JJ + JB * END IF * ICURCOL = MOD( ICURCOL + 1, NPCOL ) * INSERT MODE 110 CONTINUE * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERRMAX, 1, KK, LL, -1, $ -1, -1 ) * IF( ERRMAX.GT.ZERO .AND. ERRMAX.LE.EPS ) THEN INFO = 1 ELSE IF( ERRMAX.GT.EPS ) THEN INFO = -1 END IF * RETURN * * End of PZCHKMOUT * END SUBROUTINE PZMPRNT( ICTXT, NOUT, M, N, A, LDA, IRPRNT, ICPRNT, $ CMATNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, IRPRNT, LDA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZMPRNT prints to the standard output an array A of size m by n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). The leading m * by n part of this array is printed. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array A to be printed. LDA must be at least MAX( 1, M ). * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM specifies the identifier of the matrix to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 20 J = 1, N * DO 10 I = 1, M * WRITE( NOUT, FMT = 9999 ) CMATNM, I, J, $ DBLE( A( I, J ) ), DIMAG( A( I, J ) ) * 10 CONTINUE * 20 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', D30.18, '+i*(', $ D30.18, ')' ) * RETURN * * End of PZMPRNT * END SUBROUTINE PZVPRNT( ICTXT, NOUT, N, X, INCX, IRPRNT, ICPRNT, $ CVECNM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICPRNT, ICTXT, INCX, IRPRNT, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CVECNM COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZVPRNT prints to the standard output an vector x of length n. Only * the process of coordinates ( IRPRNT, ICPRNT ) is printing. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * N (global input) INTEGER * On entry, N specifies the length of the vector X. N must be * at least zero. * * X (global input) COMPLEX*16 array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the process row coordinate of the * printing process. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the process column coordinate of * the printing process. * * CVECNM (global input) CHARACTER*(*) * On entry, CVECNM specifies the identifier of the vector to be * printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN * WRITE( NOUT, FMT = * ) DO 10 I = 1, 1 + ( N-1 )*INCX, INCX * WRITE( NOUT, FMT = 9999 ) CVECNM, I, DBLE( X( I ) ), $ DIMAG( X( I ) ) * 10 CONTINUE * END IF * 9999 FORMAT( 1X, A, '(', I6, ')=', D30.18, '+i*(', D30.18, ')' ) * RETURN * * End of PZVPRNT * END SUBROUTINE PZMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, $ DESCY, INCY, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PZMVCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies which matrix-vector product is to * be computed as follows: * If TRANS = 'T', * sub( Y ) = BETA * sub( Y ) + sub( A )**T * sub( X ), * else if TRANS = 'C', * sub( Y ) = BETA * sub( Y ) + sub( A )**H * sub( X ), * otherwise * sub( Y ) = BETA * sub( Y ) + sub( A ) * sub( X ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX*16 array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, ROWREP, TRAN INTEGER I, IB, ICURCOL, ICURROW, IIY, IN, IOFFA, IOFFX, $ IOFFY, IYCOL, IYROW, J, JB, JJY, JN, KK, LDA, $ LDPY, LDX, LDY, ML, MYCOL, MYROW, NL, NPCOL, $ NPROW DOUBLE PRECISION EPS, ERRI, GTMP COMPLEX*16 C, TBETA, YTMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( C ) = ABS( DBLE( C ) ) + ABS( DIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN TBETA = ONE ELSE TBETA = BETA END IF * TRAN = LSAME( TRANS, 'T' ) CTRAN = LSAME( TRANS, 'C' ) IF( TRAN.OR.CTRAN ) THEN ML = N NL = M ELSE ML = M NL = N END IF * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in Y using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * IOFFY = IY + ( JY - 1 ) * LDY DO 40 I = 1, ML YTMP = ZERO GTMP = RZERO IOFFX = IX + ( JX - 1 ) * LDX IF( TRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 10 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 10 CONTINUE ELSE IF( CTRAN )THEN IOFFA = IA + ( JA + I - 2 ) * LDA DO 20 J = 1, NL YTMP = YTMP + DCONJG( A( IOFFA ) ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + 1 IOFFX = IOFFX + INCX 20 CONTINUE ELSE IOFFA = IA + I - 1 + ( JA - 1 ) * LDA DO 30 J = 1, NL YTMP = YTMP + A( IOFFA ) * X( IOFFX ) GTMP = GTMP + ABS1( A( IOFFA ) ) * ABS1( X( IOFFX ) ) IOFFA = IOFFA + LDA IOFFX = IOFFX + INCX 30 CONTINUE END IF G( I ) = ABS1( ALPHA )*GTMP + ABS1( TBETA )*ABS1( Y( IOFFY ) ) Y( IOFFY ) = ALPHA * YTMP + TBETA * Y( IOFFY ) IOFFY = IOFFY + INCY 40 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPY = DESCY( LLD_ ) IOFFY = IY + ( JY - 1 ) * LDY CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, IIY, $ JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( ( -JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, ML ) JN = JY + JB - 1 * DO 50 J = JY, JN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( J-JY+1 ).NE.RZERO ) $ ERRI = ERRI / G( J-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 50 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 70 J = JN+1, JY+ML-1, DESCY( NB_ ) JB = MIN( JY+ML-J, DESCY( NB_ ) ) * DO 60 KK = 0, JB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( J+KK-JY+1 ).NE.RZERO ) $ ERRI = ERRI / G( J+KK-JY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 JJY = JJY + 1 END IF * IOFFY = IOFFY + INCY * 60 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 70 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, ML ) IN = IY + IB - 1 * DO 80 I = IY, IN * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) ) / EPS IF( G( I-IY+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 80 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 100 I = IN+1, IY+ML-1, DESCY( MB_ ) IB = MIN( IY+ML-I, DESCY( MB_ ) ) * DO 90 KK = 0, IB-1 * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN ERRI = ABS( PY( IIY+(JJY-1)*LDPY ) - Y( IOFFY ) )/EPS IF( G( I+KK-IY+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IY+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIY = IIY + 1 END IF * IOFFY = IOFFY + INCY * 90 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 100 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PZMVCH * END SUBROUTINE PZVMCH( ICTXT, TRANS, UPLO, M, N, ALPHA, X, IX, JX, $ DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, $ IA, JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PZVMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed in * the complex cases: * if TRANS = 'C', * sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**H, * otherwise * sub( A ) := sub( A ) + alpha * sub( X ) * sub( Y )**T. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFX, IOFFY, J, JJA, KK, LDA, LDPA, $ LDX, LDY, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION EPS, ERRI, GTMP COMPLEX*16 ATMP, C * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( C ) = ABS( DBLE( C ) ) + ABS( DIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * CTRAN = LSAME( TRANS, 'C' ) UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFY = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND * IOFFX = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IF( CTRAN ) THEN ATMP = X( IOFFX ) * DCONJG( Y( IOFFY ) ) ELSE ATMP = X( IOFFX ) * Y( IOFFY ) END IF GTMP = ABS1( X( IOFFX ) ) * ABS1( Y( IOFFY ) ) G( I ) = ABS1( ALPHA ) * GTMP + ABS1( A( IOFFA ) ) A( IOFFA ) = ALPHA * ATMP + A( IOFFA ) * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PZVMCH * END SUBROUTINE PZVMCH2( ICTXT, UPLO, M, N, ALPHA, X, IX, JX, DESCX, $ INCX, Y, IY, JY, DESCY, INCY, A, PA, IA, $ JA, DESCA, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), PA( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PZVMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of the submatrix sub( A ) * is to be referenced as follows: * If UPLO = 'L', only the lower triangular part, * If UPLO = 'U', only the upper triangular part, * else the entire matrix is to be referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * operand matrix A. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the subma- * trix operand matrix A. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX( M, N ). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, LOWER, ROWREP, UPPER INTEGER I, IACOL, IAROW, IB, IBEG, ICURROW, IEND, IIA, $ IN, IOFFA, IOFFXI, IOFFXJ, IOFFYI, IOFFYJ, J, $ JJA, KK, LDA, LDPA, LDX, LDY, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERRI, GTMP COMPLEX*16 C, ATMP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( C ) = ABS( DBLE( C ) ) + ABS( DIMAG( C ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) * LDA = MAX( 1, DESCA( M_ ) ) LDX = MAX( 1, DESCX( M_ ) ) LDY = MAX( 1, DESCY( M_ ) ) * * Compute expected result in A using data in A, X and Y. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 70 J = 1, N * IOFFXJ = IX + ( JX - 1 ) * LDX + ( J - 1 ) * INCX IOFFYJ = IY + ( JY - 1 ) * LDY + ( J - 1 ) * INCY * IF( LOWER ) THEN IBEG = J IEND = M DO 10 I = 1, J-1 G( I ) = ZERO 10 CONTINUE ELSE IF( UPPER ) THEN IBEG = 1 IEND = J DO 20 I = J+1, M G( I ) = ZERO 20 CONTINUE ELSE IBEG = 1 IEND = M END IF * DO 30 I = IBEG, IEND IOFFA = IA + I - 1 + ( JA + J - 2 ) * LDA IOFFXI = IX + ( JX - 1 ) * LDX + ( I - 1 ) * INCX IOFFYI = IY + ( JY - 1 ) * LDY + ( I - 1 ) * INCY ATMP = ALPHA * X( IOFFXI ) * DCONJG( Y( IOFFYJ ) ) ATMP = ATMP + Y( IOFFYI ) * DCONJG( ALPHA * X( IOFFXJ ) ) GTMP = ABS1( ALPHA * X( IOFFXI ) ) * ABS1( Y( IOFFYJ ) ) GTMP = GTMP + ABS1( Y( IOFFYI ) ) * $ ABS1( DCONJG( ALPHA * X( IOFFXJ ) ) ) G( I ) = GTMP + ABS1( A( IOFFA ) ) A( IOFFA ) = A( IOFFA ) + ATMP * 30 CONTINUE * * Compute the error ratio for this result. * INFO = 0 ERR = ZERO LDPA = DESCA( LLD_ ) IOFFA = IA + ( JA + J - 2 ) * LDA CALL PB_INFOG2L( IA, JA+J-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ROWREP = ( IAROW.EQ.-1 ) COLREP = ( IACOL.EQ.-1 ) * IF( MYCOL.EQ.IACOL .OR. COLREP ) THEN * ICURROW = IAROW IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA + IB - 1 * DO 40 I = IA, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA ) - A( IOFFA ) )/EPS IF( G( I-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 40 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 60 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * DO 50 KK = 0, IB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PA( IIA+(JJA-1)*LDPA )-A( IOFFA ) )/EPS IF( G( I+KK-IA+1 ).NE.ZERO ) $ ERRI = ERRI / G( I+KK-IA+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ INFO = 1 IIA = IIA + 1 END IF * IOFFA = IOFFA + 1 * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 80 * 70 CONTINUE * 80 CONTINUE * RETURN * * End of PZVMCH2 * END SUBROUTINE PZMMCH( ICTXT, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, $ JA, DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANSA, TRANSB INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, M, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), B( * ), C( * ), CT( * ), PC( * ) * .. * * Purpose * ======= * * PZMMCH checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies if the matrix operand A is to be * transposed. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies if the matrix operand B is to be * transposed. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANSA = 'N' (resp. TRANSA <> 'N') in PxGEMM, PxSYRK, * PxSYR2K, PxHERK and PxHER2K. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX*16 array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX*16 array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRANA, CTRANB, ROWREP, TRANA, TRANB INTEGER I, IBB, ICCOL, ICROW, ICURROW, IIC, IN, IOFFA, $ IOFFB, IOFFC, J, JJC, KK, LDA, LDB, LDC, LDPC, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION EPS, ERRI COMPLEX*16 Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * TRANA = LSAME( TRANSA, 'T' ).OR.LSAME( TRANSA, 'C' ) TRANB = LSAME( TRANSB, 'T' ).OR.LSAME( TRANSB, 'C' ) CTRANA = LSAME( TRANSA, 'C' ) CTRANB = LSAME( TRANSB, 'C' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 240 J = 1, N * IOFFC = IC + ( JC + J - 2 ) * LDC DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( .NOT.TRANA .AND. .NOT.TRANB ) THEN DO 30 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 20 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS( A( IOFFA ) ) * $ ABS( B( IOFFB ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA .AND. .NOT.TRANB ) THEN IF( CTRANA ) THEN DO 50 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + DCONJG( A( IOFFA ) ) * $ B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 KK = 1, K IOFFB = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 60 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA .AND. TRANB ) THEN IF( CTRANB ) THEN DO 90 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 80 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * $ DCONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 100 I = 1, M IOFFA = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA .AND. TRANB ) THEN IF( CTRANA ) THEN IF( CTRANB ) THEN DO 130 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 120 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + DCONJG( A( IOFFA ) ) * $ DCONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 140 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + DCONJG( A( IOFFA ) ) * $ B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB ) THEN DO 170 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 160 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * $ DCONJG( B( IOFFB ) ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 KK = 1, K IOFFB = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 180 I = 1, M IOFFA = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFA ) * B( IOFFB ) G( I ) = G( I ) + ABS1( A( IOFFA ) ) * $ ABS1( B( IOFFB ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF * DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 200 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, M ) IN = IC + IBB - 1 * DO 210 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 210 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 230 I = IN+1, IC+M-1, DESCC( MB_ ) IBB = MIN( IC+M-I, DESCC( MB_ ) ) * DO 220 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 220 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 230 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 250 * 240 CONTINUE * 250 CONTINUE * RETURN * * End of PZMMCH * END SUBROUTINE PZMMCH1( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, BETA, C, PC, IC, JC, DESCC, CT, G, $ ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, ICTXT, INFO, JA, JC, K, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), C( * ), CT( * ), PC( * ) * .. * * Purpose * ======= * * PZMMCH1 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * when TRANS = 'N' (resp. TRANS <> 'N'). K must be at least * zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX*16 array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX*16 array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFC, J, JJC, KK, LDA, $ LDC, LDPC, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION EPS, ERRI COMPLEX*16 Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) HTRAN = LSAME( TRANS, 'H' ) * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAK ) * A( IOFFAN ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 40 CONTINUE 50 CONTINUE ELSE IF( HTRAN ) THEN DO 70 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA DO 60 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA CT( I ) = CT( I ) + A( IOFFAN ) * $ DCONJG( A( IOFFAK ) ) G( I ) = G( I ) + ABS1( A( IOFFAK ) ) * $ ABS1( A( IOFFAN ) ) 60 CONTINUE 70 CONTINUE ELSE DO 90 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA DO 80 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA CT( I ) = CT( I ) + DCONJG( A( IOFFAN ) ) * $ A( IOFFAK ) G( I ) = G( I ) + ABS1( DCONJG( A( IOFFAN ) ) ) * $ ABS1( A( IOFFAK ) ) 80 CONTINUE 90 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = ALPHA*CT( I ) + BETA * C( IOFFC ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PZMMCH1 * END SUBROUTINE PZMMCH2( ICTXT, UPLO, TRANS, N, K, ALPHA, A, IA, JA, $ DESCA, B, IB, JB, DESCB, BETA, C, PC, IC, $ JC, DESCC, CT, G, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IB, IC, ICTXT, INFO, JA, JB, JC, K, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCC( * ) DOUBLE PRECISION G( * ) COMPLEX*16 A( * ), B( * ), C( * ), CT( * ), $ PC( * ) * .. * * Purpose * ======= * * PZMMCH2 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrices A and B have * to be transposed or not before computing the matrix-matrix * product. * * N (global input) INTEGER * On entry, N specifies the order the submatrix operand C. N * must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns (resp. rows) of A * and B when TRANS = 'N' (resp. TRANS <> 'N'). K must be at * least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (DESCB( M_ ),*). This * array contains a local copy of the initial entire matrix PB. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX*16 array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * CT (workspace) COMPLEX*16 array * On entry, CT is an array of dimension at least MAX(M,N,K). CT * holds a copy of the current column of C. * * G (workspace) DOUBLE PRECISION array * On entry, G is an array of dimension at least MAX(M,N,K). G * is used to compute the gauges. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLREP, HTRAN, NOTRAN, ROWREP, TRAN, UPPER INTEGER I, IBB, IBEG, ICCOL, ICROW, ICURROW, IEND, IIC, $ IN, IOFFAK, IOFFAN, IOFFBK, IOFFBN, IOFFC, J, $ JJC, KK, LDA, LDB, LDC, LDPC, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION EPS, ERRI COMPLEX*16 Z * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT * .. * .. Statement Functions .. DOUBLE PRECISION ABS1 ABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * EPS = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) HTRAN = LSAME( TRANS, 'H' ) NOTRAN = LSAME( TRANS, 'N' ) TRAN = LSAME( TRANS, 'T' ) * LDA = MAX( 1, DESCA( M_ ) ) LDB = MAX( 1, DESCB( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) * * Compute expected result in C using data in A, B and C. * Compute gauges in G. This part of the computation is performed * by every process in the grid. * DO 140 J = 1, N * IF( UPPER ) THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF * DO 10 I = 1, N CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE * IF( NOTRAN ) THEN DO 30 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 20 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRAN ) THEN DO 50 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 40 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + ALPHA * ( $ A( IOFFAN ) * B( IOFFBK ) + $ B( IOFFBN ) * A( IOFFAK ) ) G( I ) = G( I ) + ABS( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 40 CONTINUE 50 CONTINUE ELSE IF( HTRAN ) THEN DO 70 KK = 1, K IOFFAK = IA + J - 1 + ( JA + KK - 2 ) * LDA IOFFBK = IB + J - 1 + ( JB + KK - 2 ) * LDB DO 60 I = IBEG, IEND IOFFAN = IA + I - 1 + ( JA + KK - 2 ) * LDA IOFFBN = IB + I - 1 + ( JB + KK - 2 ) * LDB CT( I ) = CT( I ) + $ ALPHA * A( IOFFAN ) * DCONJG( B( IOFFBK ) ) + $ B( IOFFBN ) * DCONJG( ALPHA * A( IOFFAK ) ) G( I ) = G( I ) + ABS1( ALPHA ) * ( $ ABS1( A( IOFFAN ) ) * ABS1( B( IOFFBK ) ) + $ ABS1( B( IOFFBN ) ) * ABS1( A( IOFFAK ) ) ) 60 CONTINUE 70 CONTINUE ELSE DO 90 KK = 1, K IOFFAK = IA + KK - 1 + ( JA + J - 2 ) * LDA IOFFBK = IB + KK - 1 + ( JB + J - 2 ) * LDB DO 80 I = IBEG, IEND IOFFAN = IA + KK - 1 + ( JA + I - 2 ) * LDA IOFFBN = IB + KK - 1 + ( JB + I - 2 ) * LDB CT( I ) = CT( I ) + $ ALPHA * DCONJG( A( IOFFAN ) ) * B( IOFFBK ) + $ DCONJG( ALPHA * B( IOFFBN ) ) * A( IOFFAK ) G( I ) = G( I ) + ABS1( ALPHA ) * ( $ ABS1( DCONJG( A( IOFFAN ) ) * B( IOFFBK ) ) + $ ABS1( DCONJG( B( IOFFBN ) ) * A( IOFFAK ) ) ) 80 CONTINUE 90 CONTINUE END IF * IOFFC = IC + IBEG - 1 + ( JC + J - 2 ) * LDC * DO 100 I = IBEG, IEND CT( I ) = CT( I ) + BETA * C( IOFFC ) G( I ) = G( I ) + ABS1( BETA )*ABS1( C( IOFFC ) ) C( IOFFC ) = CT( I ) IOFFC = IOFFC + 1 100 CONTINUE * * Compute the error ratio for this result. * ERR = RZERO INFO = 0 LDPC = DESCC( LLD_ ) IOFFC = IC + ( JC + J - 2 ) * LDC CALL PB_INFOG2L( IC, JC+J-1, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) ICURROW = ICROW ROWREP = ( ICROW.EQ.-1 ) COLREP = ( ICCOL.EQ.-1 ) * IF( MYCOL.EQ.ICCOL .OR. COLREP ) THEN * IBB = DESCC( IMB_ ) - IC + 1 IF( IBB.LE.0 ) $ IBB = ( ( -IBB ) / DESCC( MB_ ) + 1 )*DESCC( MB_ ) + IBB IBB = MIN( IBB, N ) IN = IC + IBB - 1 * DO 110 I = IC, IN * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) ) / EPS IF( G( I-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IC+N-1, DESCC( MB_ ) IBB = MIN( IC+N-I, DESCC( MB_ ) ) * DO 120 KK = 0, IBB-1 * IF( MYROW.EQ.ICURROW .OR. ROWREP ) THEN ERRI = ABS( PC( IIC+(JJC-1)*LDPC ) - $ C( IOFFC ) )/EPS IF( G( I+KK-IC+1 ).NE.RZERO ) $ ERRI = ERRI / G( I+KK-IC+1 ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ INFO = 1 IIC = IIC + 1 END IF * IOFFC = IOFFC + 1 * 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) IF( INFO.NE.0 ) $ GO TO 150 * 140 CONTINUE * 150 CONTINUE * RETURN * * End of PZMMCH2 * END SUBROUTINE PZMMCH3( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, $ BETA, C, PC, IC, JC, DESCC, ERR, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, M, N DOUBLE PRECISION ERR COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), PC( * ) * .. * * Purpose * ======= * * PZMMCH3 checks the results of the computational tests. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which part of C should contain the * result. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies whether the matrix A has to be * transposed or not before computing the matrix-matrix addi- * tion. * * M (global input) INTEGER * On entry, M specifies the number of rows of C. * * N (global input) INTEGER * On entry, N specifies the number of columns of C. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (DESCC( M_ ),*). This * array contains a local copy of the initial entire matrix PC. * * PC (local input) COMPLEX*16 array * On entry, PC is an array of dimension (DESCC( LLD_ ),*). This * array contains the local pieces of the matrix PC. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * ERR (global output) DOUBLE PRECISION * On exit, ERR specifies the largest error in absolute value. * * INFO (global output) INTEGER * On exit, if INFO <> 0, the result is less than half accurate. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, CTRAN, LOWER, NOTRAN, ROWREP, UPPER INTEGER I, ICCOL, ICROW, ICTXT, IIC, IOFFA, IOFFC, J, $ JJC, LDA, LDC, LDPC, MYCOL, MYROW, NPCOL, $ NPROW DOUBLE PRECISION ERR0, ERRI, PREC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGAMX2D, IGSUM2D, PB_INFOG2L, $ PZERRAXPBY * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX * .. * .. Executable Statements .. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * PREC = PDLAMCH( ICTXT, 'eps' ) * UPPER = LSAME( UPLO, 'U' ) LOWER = LSAME( UPLO, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) CTRAN = LSAME( TRANS, 'C' ) * * Compute expected result in C using data in A and C. This part of * the computation is performed by every process in the grid. * INFO = 0 ERR = ZERO * LDA = MAX( 1, DESCA( M_ ) ) LDC = MAX( 1, DESCC( M_ ) ) LDPC = MAX( 1, DESCC( LLD_ ) ) ROWREP = ( DESCC( RSRC_ ).EQ.-1 ) COLREP = ( DESCC( CSRC_ ).EQ.-1 ) * IF( NOTRAN ) THEN * DO 20 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( JA - 1 + J - JC ) * LDA * DO 10 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFA = IOFFA + 1 IOFFC = IOFFC + 1 * 10 CONTINUE * 20 CONTINUE * ELSE IF( CTRAN ) THEN * DO 40 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 30 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, DCONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, DCONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PZERRAXPBY( ERRI, ALPHA, DCONJG( A( IOFFA ) ), $ BETA, C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 30 CONTINUE * 40 CONTINUE * ELSE * DO 60 J = JC, JC + N - 1 * IOFFC = IC + ( J - 1 ) * LDC IOFFA = IA + ( J - JC ) + ( JA - 1 ) * LDA * DO 50 I = IC, IC + M - 1 * IF( UPPER ) THEN IF( ( J - JC ).GE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE IF( LOWER ) THEN IF( ( J - JC ).LE.( I - IC ) ) THEN CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) ELSE ERRI = ZERO END IF ELSE CALL PZERRAXPBY( ERRI, ALPHA, A( IOFFA ), BETA, $ C( IOFFC ), PREC ) END IF * CALL PB_INFOG2L( I, J, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) IF( ( MYROW.EQ.ICROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICCOL .OR. COLREP ) ) THEN ERR0 = ABS( PC( IIC+(JJC-1)*LDPC )-C( IOFFC ) ) IF( ERR0.GT.ERRI ) $ INFO = 1 ERR = MAX( ERR, ERR0 ) END IF * IOFFC = IOFFC + 1 IOFFA = IOFFA + LDA * 50 CONTINUE * 60 CONTINUE * END IF * * If INFO = 0, all results are at least half accurate. * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, I, J, -1, -1, $ MYCOL ) * RETURN * * End of PZMMCH3 * END SUBROUTINE PZERRAXPBY( ERRBND, ALPHA, X, BETA, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC COMPLEX*16 ALPHA, BETA, X, Y * .. * * Purpose * ======= * * PZERRAXPBY serially computes y := beta*y + alpha * x and returns a * scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (global input) COMPLEX*16 * On entry, X specifies the scalar x to be scaled. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * Y (global input/global output) COMPLEX*16 * On entry, Y specifies the scalar y to be added. On exit, Y * contains the resulting scalar y. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS COMPLEX*16 TMP * .. * .. Intrinsic Functions .. * .. * .. Executable Statements .. * SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = ALPHA * X IF( DBLE( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + DBLE( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - DBLE( TMP ) * FACT END IF IF( DIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + DIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - DIMAG( TMP ) * FACT END IF * TMP = BETA * Y IF( DBLE( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + DBLE( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - DBLE( TMP ) * FACT END IF IF( DIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + DIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - DIMAG( TMP ) * FACT END IF * Y = ( BETA * Y ) + ( ALPHA * X ) * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PZERRAXPBY * END SUBROUTINE PZIPSET( TOGGLE, N, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TOGGLE INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZIPSET sets the imaginary part of the diagonal entries of an n by n * matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). This is used to * test the PBLAS routines for complex Hermitian matrices, which are * either not supposed to access or use the imaginary parts of the dia- * gonals, or supposed to set them to zero. The value used to set the * imaginary part of the diagonals depends on the value of TOGGLE. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TOGGLE (global input) CHARACTER*1 * On entry, TOGGLE specifies the set-value to be used as fol- * lows: * If TOGGLE = 'Z' or 'z', the imaginary part of the diago- * nals are set to zero, * If TOGGLE = 'B' or 'b', the imaginary part of the diago- * nals are set to a large value. * * N (global input) INTEGER * On entry, N specifies the order of sub( A ). N must be at * least zero. * * A (local input/local output) pointer to COMPLEX*16 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the diagonals of * sub( A ) have been updated as specified by TOGGLE. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLREP, GODOWN, GOLEFT, ROWREP INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP DOUBLE PRECISION ALPHA, ATMP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.LE.0 ) $ RETURN * IF( LSAME( TOGGLE, 'Z' ) ) THEN ALPHA = ZERO ELSE IF( LSAME( TOGGLE, 'B' ) ) THEN ALPHA = PDLAMCH( ICTXT, 'Epsilon' ) ALPHA = ALPHA / PDLAMCH( ICTXT, 'Safe minimum' ) END IF * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( NP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 ROWREP = ( DESCA2( RSRC_ ).EQ.-1 ) COLREP = ( DESCA2( CSRC_ ).EQ.-1 ) LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( ROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( COLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA + LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = DBLE( A( IJOFFA + I*LDAP1 ) ) A( IJOFFA + I*LDAP1 ) = DCMPLX( ATMP, ALPHA ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PZIPSET * END DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 CMACH INTEGER ICTXT * .. * * Purpose * ======= * * * .. Local Scalars .. CHARACTER*1 TOP INTEGER IDUMM DOUBLE PRECISION TEMP * .. * .. External Subroutines .. EXTERNAL DGAMN2D, DGAMX2D, PB_TOPGET * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH, LSAME * .. * .. Executable Statements .. * TEMP = DLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL DGAMX2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL DGAMN2D( ICTXT, 'All', TOP, 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PDLAMCH = TEMP * RETURN * * End of PDLAMCH * END SUBROUTINE PZLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IA, JA, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASET initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) deno- * ted by sub( A ) to beta on the diagonal and alpha on the offdiago- * nals. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the part of the submatrix sub( A ) * to be set: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant to which the offdiagonal elements are to be set. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta, i.e., the constant * to which the diagonal elements are to be set. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be set. On exit, the * leading m by n submatrix sub( A ) is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * and IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT, ISCOLREP, ISROWREP, LOWER, $ UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, JJA, $ JJMAX, JOFFA, JOFFD, LCMT, LCMT00, LDA, LMBLOC, $ LNBLOC, LOW, M1, MB, MBLKD, MBLKS, MBLOC, MP, $ MRCOL, MRROW, MYCOL, MYROW, N1, NB, NBLKD, $ NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, QNB, TMP1, $ UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_ZLASET * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * ISROWREP = ( DESCA2( RSRC_ ).LT.0 ) ISCOLREP = ( DESCA2( CSRC_ ).LT.0 ) LDA = DESCA2( LLD_ ) * UPPER = .NOT.( LSAME( UPLO, 'L' ) ) LOWER = .NOT.( LSAME( UPLO, 'U' ) ) * IF( ( ( LOWER.AND.UPPER ).AND.( ALPHA.EQ.BETA ) ).OR. $ ( ISROWREP .AND. ISCOLREP ) ) THEN IF( ( MP.GT.0 ).AND.( NQ.GT.0 ) ) $ CALL PB_ZLASET( UPLO, MP, NQ, 0, ALPHA, BETA, $ A( IIA + ( JJA - 1 ) * LDA ), LDA ) RETURN END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) CALL PB_BINFO( 0, MP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( ISROWREP ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( ISCOLREP ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * M1 = MP N1 = NQ * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_ZLASET( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, BETA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_ZLASET( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ ALPHA, A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_ZLASET( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ ALPHA, A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASET( UPLO, MBLOC, INBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASET( 'ALL', TMP1, INBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASET( 'ALL', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_ZLASET( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_ZLASET( 'All', IMBLOC, TMP1, 0, ALPHA, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_ZLASET( 'All', M1, TMP1, 0, ALPHA, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASET( UPLO, MBLOC, NBLOC, LCMT, ALPHA, BETA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASET( 'All', TMP1, NBLOC, 0, ALPHA, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASET( 'All', TMP1, N1, 0, ALPHA, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * RETURN * * End of PZLASET * END SUBROUTINE PZLASCAL( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TYPE INTEGER IA, JA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASCAL scales the m by n submatrix A(IA:IA+M-1,JA:JA+N-1) denoted * by sub( A ) by the scalar alpha. TYPE specifies if sub( A ) is full, * upper triangular, lower triangular or upper Hessenberg. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * On entry, TYPE specifies the type of the input submatrix as * follows: * = 'L' or 'l': sub( A ) is a lower triangular matrix, * = 'U' or 'u': sub( A ) is an upper triangular matrix, * = 'H' or 'h': sub( A ) is an upper Hessenberg matrix, * otherwise sub( A ) is a full matrix. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( A ) are overwritten by * the local entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 UPLO LOGICAL GODOWN, GOLEFT, LOWER, UPPER INTEGER IACOL, IAROW, ICTXT, IIA, IIMAX, ILOW, IMB1, $ IMBLOC, INB1, INBLOC, IOFFA, IOFFD, ITYPE, $ IUPP, JJA, JJMAX, JOFFA, JOFFD, LCMT, LCMT00, $ LDA, LMBLOC, LNBLOC, LOW, M1, MB, MBLKD, MBLKS, $ MBLOC, MP, MRCOL, MRROW, MYCOL, MYROW, N1, NB, $ NBLKD, NBLKS, NBLOC, NPCOL, NPROW, NQ, PMB, $ QNB, TMP1, UPP * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS, PB_INFOG2L, PB_ZLASCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_NUMROC EXTERNAL LSAME, PB_NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 UPLO = TYPE UPPER = .FALSE. LOWER = .TRUE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 UPLO = TYPE UPPER = .TRUE. LOWER = .FALSE. IOFFD = 0 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 UPLO = 'U' UPPER = .TRUE. LOWER = .FALSE. IOFFD = 1 ELSE ITYPE = 0 UPLO = 'A' UPPER = .TRUE. LOWER = .TRUE. IOFFD = 0 END IF * * Compute local indexes * IF( ITYPE.EQ.0 ) THEN * * Full matrix * CALL PB_INFOG2L( IA, JA, DESCA2, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) MP = PB_NUMROC( M, IA, DESCA2( IMB_ ), DESCA2( MB_ ), MYROW, $ DESCA2( RSRC_ ), NPROW ) NQ = PB_NUMROC( N, JA, DESCA2( INB_ ), DESCA2( NB_ ), MYCOL, $ DESCA2( CSRC_ ), NPCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * LDA = DESCA2( LLD_ ) IOFFA = IIA + ( JJA - 1 ) * LDA * CALL PB_ZLASCAL( 'All', MP, NQ, 0, ALPHA, A( IOFFA ), LDA ) * ELSE * * Trapezoidal matrix * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * IF( MP.LE.0 .OR. NQ.LE.0 ) $ RETURN * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, * LNBLOC, ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) LDA = DESCA2( LLD_ ) * CALL PB_BINFO( IOFFD, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * M1 = MP N1 = NQ IOFFA = IIA - 1 JOFFA = JJA - 1 IIMAX = IOFFA + MP JJMAX = JOFFA + NQ * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and * update LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * CALL PB_ZLASCAL( UPLO, IMBLOC, INBLOC, LCMT00, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IF( GODOWN ) THEN IF( UPPER .AND. NQ.GT.INBLOC ) $ CALL PB_ZLASCAL( 'All', IMBLOC, NQ-INBLOC, 0, ALPHA, $ A( IIA+(JOFFA+INBLOC)*LDA ), LDA ) IIA = IIA + IMBLOC M1 = M1 - IMBLOC ELSE IF( LOWER .AND. MP.GT.IMBLOC ) $ CALL PB_ZLASCAL( 'All', MP-IMBLOC, INBLOC, 0, ALPHA, $ A( IIA+IMBLOC+JOFFA*LDA ), LDA ) JJA = JJA + INBLOC N1 = N1 - INBLOC END IF * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 10 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 10 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 20 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASCAL( UPLO, MBLOC, INBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 20 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, INBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = IOFFA - IIA + 1 M1 = M1 - TMP1 N1 = N1 - INBLOC LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 30 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 30 END IF * TMP1 = MIN( JOFFA, JJMAX ) - JJA + 1 IF( LOWER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IIA+(JJA-1)*LDA ), LDA ) JJA = JJA + TMP1 N1 = N1 - TMP1 END IF * IF( NBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 40 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC CALL PB_ZLASCAL( UPLO, IMBLOC, NBLOC, LCMT, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 40 END IF * TMP1 = N1 - JOFFD + JJA - 1 IF( UPPER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', IMBLOC, TMP1, 0, ALPHA, $ A( IIA+JOFFD*LDA ), LDA ) * TMP1 = JOFFA - JJA + 1 M1 = M1 - IMBLOC N1 = N1 - TMP1 LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * IF( LOWER .AND. M1.GT.0 .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', M1, TMP1, 0, ALPHA, $ A( IOFFA+1+(JJA-1)*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * END IF * NBLOC = NB 50 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 60 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 60 END IF * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 IF( UPPER .AND. TMP1.GT.0 ) THEN CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) IIA = IIA + TMP1 M1 = M1 - TMP1 END IF * IF( MBLKS.LE.0 ) $ RETURN * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 70 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC CALL PB_ZLASCAL( UPLO, MBLOC, NBLOC, LCMT, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 70 END IF * TMP1 = M1 - IOFFD + IIA - 1 IF( LOWER .AND. TMP1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, NBLOC, 0, ALPHA, $ A( IOFFD+1+JOFFA*LDA ), LDA ) * TMP1 = MIN( IOFFA, IIMAX ) - IIA + 1 M1 = M1 - TMP1 N1 = N1 - NBLOC LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC * IF( UPPER .AND. TMP1.GT.0 .AND. N1.GT.0 ) $ CALL PB_ZLASCAL( 'All', TMP1, N1, 0, ALPHA, $ A( IIA+JOFFA*LDA ), LDA ) * IIA = IOFFA + 1 JJA = JOFFA + 1 * GO TO 50 * END IF * END IF * RETURN * * End of PZLASCAL * END SUBROUTINE PZLAGEN( INPLACE, AFORM, DIAG, OFFA, M, N, IA, JA, $ DESCA, IASEED, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE CHARACTER*1 AFORM, DIAG INTEGER IA, IASEED, JA, LDA, M, N, OFFA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PZLAGEN generates (or regenerates) a submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the generated submatrix is diago- * nally dominant or not as follows: * DIAG = 'D' : sub( A ) is diagonally dominant, * DIAG = 'N' : sub( A ) is not diagonally dominant. * * OFFA (global input) INTEGER * On entry, OFFA specifies the offdiagonal of the underlying * matrix A(1:DESCA(M_),1:DESCA(N_)) of interest when the subma- * trix is symmetric, Hermitian or diagonally dominant. OFFA = 0 * specifies the main diagonal, OFFA > 0 specifies a subdiago- * nal, and OFFA < 0 specifies a superdiagonal (see further de- * tails). * * M (global input) INTEGER * On entry, M specifies the global number of matrix rows of the * submatrix sub( A ) to be generated. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of matrix columns of * the submatrix sub( A ) to be generated. N must be at least * zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IASEED (global input) INTEGER * On entry, IASEED specifies the seed number to generate the * matrix A. IASEED must be at least zero. * * A (local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). On exit, this array contains the * local entries of the randomly generated submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. When INPLACE is .FALSE., LDA is usually DESCA(LLD_). * This restriction is however not enforced, and this subroutine * requires only that LDA >= MAX( 1, Mp ) where * * Mp = PB_NUMROC( M, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ). * * PB_NUMROC is a ScaLAPACK tool function; MYROW, MYCOL, NPROW * and NPCOL can be determined by calling the BLACS subroutine * BLACS_GRIDINFO. * * Further Details * =============== * * OFFD is tied to the matrix described by DESCA, as opposed to the * piece that is currently (re)generated. This is a global information * independent from the distribution parameters. Below are examples of * the meaning of OFFD for a global 7 by 5 matrix: * * --------------------------------------------------------------------- * OFFD | 0 -1 -2 -3 -4 0 -1 -2 -3 -4 0 -1 -2 -3 -4 * -------|------------------------------------------------------------- * | | OFFD=-1 | OFFD=0 OFFD=2 * | V V * 0 | . d . . . -> d . . . . . . . . . * 1 | . . d . . . d . . . . . . . . * 2 | . . . d . . . d . . -> d . . . . * 3 | . . . . d . . . d . . d . . . * 4 | . . . . . . . . . d . . d . . * 5 | . . . . . . . . . . . . . d . * 6 | . . . . . . . . . . . . . . d * --------------------------------------------------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL DIAGDO, SYMM, HERM, NOTRAN INTEGER CSRC, I, IACOL, IAROW, ICTXT, IIA, ILOCBLK, $ ILOCOFF, ILOW, IMB, IMB1, IMBLOC, IMBVIR, INB, $ INB1, INBLOC, INBVIR, INFO, IOFFDA, ITMP, IUPP, $ IVIR, JJA, JLOCBLK, JLOCOFF, JVIR, LCMT00, $ LMBLOC, LNBLOC, LOW, MAXMN, MB, MBLKS, MP, $ MRCOL, MRROW, MYCDIST, MYCOL, MYRDIST, MYROW, $ NB, NBLKS, NPCOL, NPROW, NQ, NVIR, RSRC, UPP COMPLEX*16 ALPHA * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ), IMULADD( 4, JMP_LEN ), $ IRAN( 2 ), JMP( JMP_LEN ), MULADD0( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_CHKMAT, PB_DESCTRANS, PB_INITJMP, $ PB_INITMULADD, PB_JUMP, PB_JUMPIT, PB_LOCINFO, $ PB_SETLOCRAN, PB_SETRAN, PB_ZLAGEN, PXERBLA, $ PZLADOM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Data Statements .. DATA ( MULADD0( I ), I = 1, 4 ) / 20077, 16838, $ 12345, 0 / * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Test the input arguments * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1000 + CTXT_ ) ELSE SYMM = LSAME( AFORM, 'S' ) HERM = LSAME( AFORM, 'H' ) NOTRAN = LSAME( AFORM, 'N' ) DIAGDO = LSAME( DIAG, 'D' ) IF( .NOT.( SYMM.OR.HERM.OR.NOTRAN ) .AND. $ .NOT.( LSAME( AFORM, 'T' ) ) .AND. $ .NOT.( LSAME( AFORM, 'C' ) ) ) THEN INFO = -2 ELSE IF( ( .NOT.DIAGDO ) .AND. $ ( .NOT.LSAME( DIAG, 'N' ) ) ) THEN INFO = -3 END IF CALL PB_CHKMAT( ICTXT, M, 5, N, 6, IA, JA, DESCA2, 10, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZLAGEN', -INFO ) RETURN END IF * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) IMB = DESCA2( IMB_ ) INB = DESCA2( INB_ ) RSRC = DESCA2( RSRC_ ) CSRC = DESCA2( CSRC_ ) * * Figure out local information about the distributed matrix operand * CALL PB_AINFOG2L( M, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, MP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * IOFFDA = JA + OFFA - IA CALL PB_BINFO( IOFFDA, MP, NQ, IMB1, INB1, MB, NB, MRROW, $ MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, $ LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) * * Initialize ILOCBLK, ILOCOFF, MYRDIST, JLOCBLK, JLOCOFF, MYCDIST * This values correspond to the square virtual underlying matrix * of size MAX( M_ + MAX( 0, -OFFA ), N_ + MAX( 0, OFFA ) ) used * to set up the random sequence. For practical purposes, the size * of this virtual matrix is upper bounded by M_ + N_ - 1. * ITMP = MAX( 0, -OFFA ) IVIR = IA + ITMP IMBVIR = IMB + ITMP NVIR = DESCA2( M_ ) + ITMP * CALL PB_LOCINFO( IVIR, IMBVIR, MB, MYROW, RSRC, NPROW, ILOCBLK, $ ILOCOFF, MYRDIST ) * ITMP = MAX( 0, OFFA ) JVIR = JA + ITMP INBVIR = INB + ITMP NVIR = MAX( MAX( NVIR, DESCA2( N_ ) + ITMP ), $ DESCA2( M_ ) + DESCA2( N_ ) - 1 ) * CALL PB_LOCINFO( JVIR, INBVIR, NB, MYCOL, CSRC, NPCOL, JLOCBLK, $ JLOCOFF, MYCDIST ) * IF( SYMM .OR. HERM .OR. NOTRAN ) THEN * CALL PB_INITJMP( .TRUE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_ZLAGEN( 'Lower', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( SYMM .OR. HERM .OR. ( .NOT. NOTRAN ) ) THEN * CALL PB_INITJMP( .FALSE., NVIR, IMBVIR, INBVIR, IMBLOC, INBLOC, $ MB, NB, RSRC, CSRC, NPROW, NPCOL, 2, JMP ) * * Compute constants to jump JMP( * ) numbers in the sequence * CALL PB_INITMULADD( MULADD0, JMP, IMULADD ) * * Compute and set the random value corresponding to A( IA, JA ) * CALL PB_SETLOCRAN( IASEED, ILOCBLK, JLOCBLK, ILOCOFF, JLOCOFF, $ MYRDIST, MYCDIST, NPROW, NPCOL, JMP, $ IMULADD, IRAN ) * CALL PB_ZLAGEN( 'Upper', AFORM, A( IIA, JJA ), LDA, LCMT00, $ IRAN, MBLKS, IMBLOC, MB, LMBLOC, NBLKS, INBLOC, $ NB, LNBLOC, JMP, IMULADD ) * END IF * IF( DIAGDO ) THEN * MAXMN = MAX( DESCA2( M_ ), DESCA2( N_ ) ) IF( HERM ) THEN ALPHA = DCMPLX( DBLE( 2 * MAXMN ), ZERO ) ELSE ALPHA = DCMPLX( DBLE( NVIR ), DBLE( MAXMN ) ) END IF * IF( IOFFDA.GE.0 ) THEN CALL PZLADOM( INPLACE, MIN( MAX( 0, M-IOFFDA ), N ), ALPHA, $ A, MIN( IA+IOFFDA, IA+M-1 ), JA, DESCA ) ELSE CALL PZLADOM( INPLACE, MIN( M, MAX( 0, N+IOFFDA ) ), ALPHA, $ A, IA, MIN( JA-IOFFDA, JA+N-1 ), DESCA ) END IF * END IF * RETURN * * End of PZLAGEN * END SUBROUTINE PZLADOM( INPLACE, N, ALPHA, A, IA, JA, DESCA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL INPLACE INTEGER IA, JA, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLADOM adds alpha to the diagonal entries of an n by n submatrix * sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * INPLACE (global input) LOGICAL * On entry, INPLACE specifies if the matrix should be generated * in place or not. If INPLACE is .TRUE., the local random array * to be generated will start in memory at the local memory lo- * cation A( 1, 1 ), otherwise it will start at the local posi- * tion induced by IA and JA. * * N (global input) INTEGER * On entry, N specifies the global order of the submatrix * sub( A ) to be modified. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. On exit, the local entries * of this array corresponding to the main diagonal of sub( A ) * have been updated. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL GODOWN, GOLEFT INTEGER I, IACOL, IAROW, ICTXT, IIA, IJOFFA, ILOW, $ IMB1, IMBLOC, INB1, INBLOC, IOFFA, IOFFD, IUPP, $ JJA, JOFFA, JOFFD, LCMT, LCMT00, LDA, LDAP1, $ LMBLOC, LNBLOC, LOW, MB, MBLKD, MBLKS, MBLOC, $ MRCOL, MRROW, MYCOL, MYROW, NB, NBLKD, NBLKS, $ NBLOC, NP, NPCOL, NPROW, NQ, PMB, QNB, UPP COMPLEX*16 ATMP * .. * .. Local Scalars .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_AINFOG2L, PB_BINFO, $ PB_DESCTRANS * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, MAX, MIN * .. * .. Executable Statements .. * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * * Get grid parameters * ICTXT = DESCA2( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( N.EQ.0 ) $ RETURN * CALL PB_AINFOG2L( N, N, IA, JA, DESCA2, NPROW, NPCOL, MYROW, $ MYCOL, IMB1, INB1, NP, NQ, IIA, JJA, IAROW, $ IACOL, MRROW, MRCOL ) * * Decide where the entries shall be stored in memory * IF( INPLACE ) THEN IIA = 1 JJA = 1 END IF * * Initialize LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, * ILOW, LOW, IUPP, and UPP. * MB = DESCA2( MB_ ) NB = DESCA2( NB_ ) * CALL PB_BINFO( 0, NP, NQ, IMB1, INB1, MB, NB, MRROW, MRCOL, $ LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, $ LNBLOC, ILOW, LOW, IUPP, UPP ) * IOFFA = IIA - 1 JOFFA = JJA - 1 LDA = DESCA2( LLD_ ) LDAP1 = LDA + 1 * IF( DESCA2( RSRC_ ).LT.0 ) THEN PMB = MB ELSE PMB = NPROW * MB END IF IF( DESCA2( CSRC_ ).LT.0 ) THEN QNB = NB ELSE QNB = NPCOL * NB END IF * * Handle the first block of rows or columns separately, and update * LCMT00, MBLKS and NBLKS. * GODOWN = ( LCMT00.GT.IUPP ) GOLEFT = ( LCMT00.LT.ILOW ) * IF( .NOT.GODOWN .AND. .NOT.GOLEFT ) THEN * * LCMT00 >= ILOW && LCMT00 <= IUPP * IF( LCMT00.GE.0 ) THEN IJOFFA = IOFFA+LCMT00 + ( JOFFA - 1 ) * LDA DO 10 I = 1, MIN( INBLOC, MAX( 0, IMBLOC - LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 10 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFA - LCMT00 - 1 ) * LDA DO 20 I = 1, MIN( IMBLOC, MAX( 0, INBLOC + LCMT00 ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 20 CONTINUE END IF GOLEFT = ( ( LCMT00 - ( IUPP - UPP + PMB ) ).LT.ILOW ) GODOWN = .NOT.GOLEFT * END IF * IF( GODOWN ) THEN * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * 30 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 30 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 40 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.ILOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 50 I = 1, MIN( INBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 50 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 60 I = 1, MIN( MBLOC, MAX( 0, INBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 60 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 40 END IF * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * ELSE IF( GOLEFT ) THEN * LCMT00 = LCMT00 + LOW - ILOW + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + INBLOC * 70 CONTINUE IF( NBLKS.GT.0 .AND. LCMT00.LT.LOW ) THEN LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NB GO TO 70 END IF * LCMT = LCMT00 NBLKD = NBLKS JOFFD = JOFFA * NBLOC = NB 80 CONTINUE IF( NBLKD.GT.0 .AND. LCMT.LE.IUPP ) THEN IF( NBLKD.EQ.1 ) $ NBLOC = LNBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFA + LCMT + ( JOFFD - 1 ) * LDA DO 90 I = 1, MIN( NBLOC, MAX( 0, IMBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 90 CONTINUE ELSE IJOFFA = IOFFA + ( JOFFD - LCMT - 1 ) * LDA DO 100 I = 1, MIN( IMBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 100 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT + QNB NBLKS = NBLKD NBLKD = NBLKD - 1 JOFFA = JOFFD JOFFD = JOFFD + NBLOC GO TO 80 END IF * LCMT00 = LCMT00 - ( IUPP - UPP + PMB ) MBLKS = MBLKS - 1 IOFFA = IOFFA + IMBLOC * END IF * NBLOC = NB 110 CONTINUE IF( NBLKS.GT.0 ) THEN IF( NBLKS.EQ.1 ) $ NBLOC = LNBLOC 120 CONTINUE IF( MBLKS.GT.0 .AND. LCMT00.GT.UPP ) THEN LCMT00 = LCMT00 - PMB MBLKS = MBLKS - 1 IOFFA = IOFFA + MB GO TO 120 END IF * LCMT = LCMT00 MBLKD = MBLKS IOFFD = IOFFA * MBLOC = MB 130 CONTINUE IF( MBLKD.GT.0 .AND. LCMT.GE.LOW ) THEN IF( MBLKD.EQ.1 ) $ MBLOC = LMBLOC IF( LCMT.GE.0 ) THEN IJOFFA = IOFFD + LCMT + ( JOFFA - 1 ) * LDA DO 140 I = 1, MIN( NBLOC, MAX( 0, MBLOC - LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 140 CONTINUE ELSE IJOFFA = IOFFD + ( JOFFA - LCMT - 1 ) * LDA DO 150 I = 1, MIN( MBLOC, MAX( 0, NBLOC + LCMT ) ) ATMP = A( IJOFFA + I*LDAP1 ) A( IJOFFA + I*LDAP1 ) = ALPHA + $ DCMPLX( ABS( DBLE( ATMP ) ), $ ABS( DIMAG( ATMP ) ) ) 150 CONTINUE END IF LCMT00 = LCMT LCMT = LCMT - PMB MBLKS = MBLKD MBLKD = MBLKD - 1 IOFFA = IOFFD IOFFD = IOFFD + MBLOC GO TO 130 END IF * LCMT00 = LCMT00 + QNB NBLKS = NBLKS - 1 JOFFA = JOFFA + NBLOC GO TO 110 * END IF * RETURN * * End of PZLADOM * END SUBROUTINE PB_PZLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PB_PZLAPRNT prints to the standard output a submatrix sub( A ) deno- * ting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and printed by * the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the row index of the printing pro- * cess. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the column index of the printing * process. * * CMATNM (global input) CHARACTER*(*) * On entry, CMATNM is the name of the matrix to be printed. * * NOUT (global input) INTEGER * On entry, NOUT specifies the output unit number. When NOUT is * equal to 6, the submatrix is printed on the screen. * * WORK (local workspace) COMPLEX*16 array * On entry, WORK is a work array of dimension at least equal to * MAX( IMB_A, MB_A ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW, PCOL, PROW * .. * .. Local Arrays .. INTEGER DESCA2( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_DESCTRANS, PB_PZLAPRN2 * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Convert descriptor * CALL PB_DESCTRANS( DESCA, DESCA2 ) * CALL BLACS_GRIDINFO( DESCA2( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( DESCA2( RSRC_ ).GE.0 ) THEN IF( DESCA2( CSRC_ ).GE.0 ) THEN CALL PB_PZLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, ICPRNT, $ CMATNM, NOUT, DESCA2( RSRC_ ), $ DESCA2( CSRC_ ), WORK ) ELSE DO 10 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Colum-replicated array -- ' , $ 'copy in process column: ', PCOL CALL PB_PZLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, DESCA2( RSRC_ ), $ PCOL, WORK ) 10 CONTINUE END IF ELSE IF( DESCA2( CSRC_ ).GE.0 ) THEN DO 20 PROW = 0, NPROW - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Row-replicated array -- ' , $ 'copy in process row: ', PROW CALL PB_PZLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, $ DESCA2( CSRC_ ), WORK ) 20 CONTINUE ELSE DO 40 PROW = 0, NPROW - 1 DO 30 PCOL = 0, NPCOL - 1 IF( ( MYROW.EQ.IRPRNT ).AND.( MYCOL.EQ.ICPRNT ) ) $ WRITE( NOUT, * ) 'Replicated array -- ' , $ 'copy in process (', PROW, ',', PCOL, ')' CALL PB_PZLAPRN2( M, N, A, IA, JA, DESCA2, IRPRNT, $ ICPRNT, CMATNM, NOUT, PROW, PCOL, $ WORK ) 30 CONTINUE 40 CONTINUE END IF END IF * RETURN * * End of PB_PZLAPRNT * END SUBROUTINE PB_PZLAPRN2( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, PROW, PCOL, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT, PCOL, PROW * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL AISCOLREP, AISROWREP INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, LDW, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, PB_INFOG2L, $ ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG, MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL PB_INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) II = IIA JJ = JJA IF( DESCA( RSRC_ ).LT.0 ) THEN AISROWREP = .TRUE. IAROW = PROW ICURROW = PROW ELSE AISROWREP = .FALSE. ICURROW = IAROW END IF IF( DESCA( CSRC_ ).LT.0 ) THEN AISCOLREP = .TRUE. IACOL = PCOL ICURCOL = PCOL ELSE AISCOLREP = .FALSE. ICURCOL = IACOL END IF LDA = DESCA( LLD_ ) LDW = MAX( DESCA( IMB_ ), DESCA( MB_ ) ) * * Handle the first block of column separately * JB = DESCA( INB_ ) - JA + 1 IF( JB.LE.0 ) $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB JB = MIN( JB, N ) JN = JA+JB-1 DO 60 H = 0, JB-1 IB = DESCA( IMB_ ) - IA + 1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IB = DESCA( IMB_ )-IA+1 IF( IB.LE.0 ) $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB IB = MIN( IB, M ) IN = IA+IB-1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, LDW, ICURROW, $ ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB IF( .NOT.AISROWREP ) $ ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB IF( .NOT.AISCOLREP ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT( 1X, A, '(', I6, ',', I6, ')=', D30.18, '+i*(', $ D30.18, ')' ) * RETURN * * End of PB_PZLAPRN2 * END SUBROUTINE PB_ZFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX*16 CHKVAL * .. * .. Array Arguments .. COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PB_ZFILLPAD surrounds a two dimensional local array with a guard-zone * initialized to the value CHKVAL. The user may later call the routine * PB_ZCHEKPAD to discover if the guardzone has been violated. There are * three guardzones. The first is a buffer of size IPRE that is before * the start of the array. The second is the buffer of size IPOST which * is after the end of the array to be padded. Finally, there is a guard * zone inside every column of the array to be padded, in the elements * of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). On exit, this * array is the padded array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) COMPLEX*16 * On entry, CHKVAL specifies the value to pad the array with. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no pre-guardzone in PB_ZFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = '(A)' ) $ 'WARNING no post-guardzone in PB_ZFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + ( LDA - M ) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PB_ZFILLPAD * END SUBROUTINE PB_ZCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX*16 CHKVAL * .. * .. Array Arguments .. CHARACTER*(*) MESS COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PB_ZCHEKPAD checks that the padding around a local array has not been * overwritten since the call to PB_ZFILLPAD. Three types of errors are * reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells that the 5th element of the IPRE long buffer has been overwrit- * ten with the value 18, where it should still have the value CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells that the 19th element after the end of A was overwritten with * the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells that the element at the 12th row and 3rd column of A was over- * written with the value of 22, where it should still have the value of * CHKVAL. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * MESS (local input) CHARACTER*(*) * On entry, MESS is a ttring containing a user-defined message. * * M (local input) INTEGER * On entry, M specifies the number of rows in the local array * A. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns in the local ar- * ray A. N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the local * array to be padded. LDA must be at least MAX( 1, M ). * * IPRE (local input) INTEGER * On entry, IPRE specifies the size of the guard zone to put * before the start of the padded array. * * IPOST (local input) INTEGER * On entry, IPOST specifies the size of the guard zone to put * after the end of the padded array. * * CHKVAL (local input) COMPLEX*16 * On entry, CHKVAL specifies the value to pad the array with. * * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. CHARACTER*1 TOP INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_TOPGET * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ DBLE( A( I ) ), DIMAG( A( I ) ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PB_ZCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, DBLE( A( I ) ), $ DIMAG( A( I ) ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PB_ZCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, DBLE( A( I ) ), $ DIMAG( A( I ) ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', TOP ) CALL IGAMX2D( ICTXT, 'All', TOP, 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G20.7, '+ i*', $ G20.7 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G20.7, $ '+ i*', G20.7 ) * RETURN * * End of PB_ZCHEKPAD * END SUBROUTINE PB_ZLASET( UPLO, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PB_ZLASET initializes a two-dimensional array A to beta on the diago- * nal specified by IOFFD and alpha on the offdiagonals. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * = 'D' or 'd' Only the diagonal of A is set, * Otherwise: All of the array A is set. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the value to which the offdiagonal * array elements are set to. * * BETA (input) COMPLEX*16 * On entry, BETA specifies the value to which the diagonal ar- * ray elements are set to. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be set, and the strictly lower trape- * zoidal part of A is not referenced; When IUPLO = 'L' or 'l', * the leading m by n part of the array A must contain the * lower trapezoidal part of the matrix as specified by IOFFD to * be set, and the strictly upper trapezoidal part of A is * not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA and the strictly lower triangular * part of the array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA and the strictly upper triangular * part of the array to ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 50 I = 1, JTMP - 1 A( I, J ) = ALPHA 50 CONTINUE A( JTMP, J ) = BETA 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the array to BETA on the diagonal. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 90 CONTINUE * ELSE * * Set the array to BETA on the diagonal and ALPHA on the * offdiagonal. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA 100 CONTINUE 110 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 120 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 120 CONTINUE END IF * END IF * RETURN * * End of PB_ZLASET * END SUBROUTINE PB_ZLASCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PB_ZLASCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA * A( I, J ) 10 CONTINUE 20 CONTINUE DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = ALPHA * A( I, J ) 50 CONTINUE 60 CONTINUE DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = ALPHA * A( I, J ) 100 CONTINUE 110 CONTINUE * END IF * RETURN * * End of PB_ZLASCAL * END SUBROUTINE PB_ZLAGEN( UPLO, AFORM, A, LDA, LCMT00, IRAN, MBLKS, $ IMBLOC, MB, LMBLOC, NBLKS, INBLOC, NB, $ LNBLOC, JMP, IMULADD ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO, AFORM INTEGER IMBLOC, INBLOC, LCMT00, LDA, LMBLOC, LNBLOC, $ MB, MBLKS, NB, NBLKS * .. * .. Array Arguments .. INTEGER IMULADD( 4, * ), IRAN( * ), JMP( * ) COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PB_ZLAGEN locally initializes an array A. * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the lower (UPLO='L') trape- * zoidal part or the upper (UPLO='U') trapezoidal part is to be * generated when the matrix to be generated is symmetric or * Hermitian. For all the other values of AFORM, the value of * this input argument is ignored. * * AFORM (global input) CHARACTER*1 * On entry, AFORM specifies the type of submatrix to be genera- * ted as follows: * AFORM = 'S', sub( A ) is a symmetric matrix, * AFORM = 'H', sub( A ) is a Hermitian matrix, * AFORM = 'T', sub( A ) is overrwritten with the transpose * of what would normally be generated, * AFORM = 'C', sub( A ) is overwritten with the conjugate * transpose of what would normally be genera- * ted. * AFORM = 'N', a random submatrix is generated. * * A (local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, *). On exit, * this array contains the local entries of the randomly genera- * ted submatrix sub( A ). * * LDA (local input) INTEGER * On entry, LDA specifies the local leading dimension of the * array A. LDA must be at least one. * * LCMT00 (global input) INTEGER * On entry, LCMT00 is the LCM value specifying the off-diagonal * of the underlying matrix of interest. LCMT00=0 specifies the * main diagonal, LCMT00 > 0 specifies a subdiagonal, LCMT00 < 0 * specifies superdiagonals. * * IRAN (local input) INTEGER array * On entry, IRAN is an array of dimension 2 containing respec- * tively the 16-lower and 16-higher bits of the encoding of the * entry of the random sequence corresponding locally to the * first local array entry to generate. Usually, this array is * computed by PB_SETLOCRAN. * * MBLKS (local input) INTEGER * On entry, MBLKS specifies the local number of blocks of rows. * MBLKS is at least zero. * * IMBLOC (local input) INTEGER * On entry, IMBLOC specifies the number of rows (size) of the * local uppest blocks. IMBLOC is at least zero. * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * LMBLOC (local input) INTEGER * On entry, LMBLOC specifies the number of rows (size) of the * local lowest blocks. LMBLOC is at least zero. * * NBLKS (local input) INTEGER * On entry, NBLKS specifies the local number of blocks of co- * lumns. NBLKS is at least zero. * * INBLOC (local input) INTEGER * On entry, INBLOC specifies the number of columns (size) of * the local leftmost blocks. INBLOC is at least zero. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * LNBLOC (local input) INTEGER * On entry, LNBLOC specifies the number of columns (size) of * the local rightmost blocks. LNBLOC is at least zero. * * JMP (local input) INTEGER array * On entry, JMP is an array of dimension JMP_LEN containing the * different jump values used by the random matrix generator. * * IMULADD (local input) INTEGER array * On entry, IMULADD is an array of dimension (4, JMP_LEN). The * jth column of this array contains the encoded initial cons- * tants a_j and c_j to jump from X( n ) to X( n + JMP( j ) ) * (= a_j * X( n ) + c_j) in the random sequence. IMULADD(1:2,j) * contains respectively the 16-lower and 16-higher bits of the * constant a_j, and IMULADD(3:4,j) contains the 16-lower and * 16-higher bits of the constant c_j. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER JMP_1, JMP_COL, JMP_IMBV, JMP_INBV, JMP_LEN, $ JMP_MB, JMP_NB, JMP_NPIMBLOC, JMP_NPMB, $ JMP_NQINBLOC, JMP_NQNB, JMP_ROW PARAMETER ( JMP_1 = 1, JMP_ROW = 2, JMP_COL = 3, $ JMP_MB = 4, JMP_IMBV = 5, JMP_NPMB = 6, $ JMP_NPIMBLOC = 7, JMP_NB = 8, JMP_INBV = 9, $ JMP_NQNB = 10, JMP_NQINBLOC = 11, $ JMP_LEN = 11 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IB, IBLK, II, IK, ITMP, JB, JBLK, JJ, JK, $ JTMP, LCMTC, LCMTR, LOW, MNB, UPP COMPLEX*16 DUMMY * .. * .. Local Arrays .. INTEGER IB0( 2 ), IB1( 2 ), IB2( 2 ), IB3( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_JUMPIT * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PB_DRAND EXTERNAL LSAME, PB_DRAND * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Executable Statements .. * DO 10 I = 1, 2 IB1( I ) = IRAN( I ) IB2( I ) = IRAN( I ) IB3( I ) = IRAN( I ) 10 CONTINUE * IF( LSAME( AFORM, 'N' ) ) THEN * * Generate random matrix * JJ = 1 * DO 50 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * DO 40 JK = JJ, JJ + JB - 1 * II = 1 * DO 30 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * * Blocks are IB by JB * DO 20 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 20 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 30 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 40 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 50 CONTINUE * ELSE IF( LSAME( AFORM, 'T' ) ) THEN * * Generate the transpose of the matrix that would be normally * generated. * II = 1 * DO 90 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 80 IK = II, II + IB - 1 * JJ = 1 * DO 70 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 60 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 60 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 70 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 80 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 90 CONTINUE * ELSE IF( LSAME( AFORM, 'S' ) ) THEN * * Generate a symmetric matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 170 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 160 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 150 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 100 IK = II, II + IB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 100 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 110 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 110 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 120 IK = II, ITMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 120 CONTINUE * DO 130 IK = ITMP, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 130 CONTINUE * END IF * ELSE * DO 140 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 140 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 150 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 160 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 170 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 250 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 240 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 230 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 180 JK = JJ, JJ + JB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 180 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 190 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 190 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 200 JK = JJ, JTMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 200 CONTINUE * DO 210 JK = JTMP, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 210 CONTINUE * END IF * ELSE * DO 220 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 220 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 230 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 240 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 250 CONTINUE * END IF * ELSE IF( LSAME( AFORM, 'C' ) ) THEN * * Generate the conjugate transpose of the matrix that would be * normally generated. * II = 1 * DO 290 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC ELSE IB = MB END IF * DO 280 IK = II, II + IB - 1 * JJ = 1 * DO 270 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC ELSE JB = NB END IF * * Blocks are IB by JB * DO 260 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 260 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 270 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 280 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 290 CONTINUE * ELSE IF( LSAME( AFORM, 'H' ) ) THEN * * Generate a Hermitian matrix * IF( LSAME( UPLO, 'L' ) ) THEN * * generate lower trapezoidal part * JJ = 1 LCMTC = LCMT00 * DO 370 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * DO 360 JK = JJ, JJ + JB - 1 * II = 1 LCMTR = LCMTC * DO 350 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * * Blocks are IB by JB * IF( LCMTR.GT.UPP ) THEN * DO 300 IK = II, II + IB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 300 CONTINUE * ELSE IF( LCMTR.GE.LOW ) THEN * JTMP = JK - JJ + 1 MNB = MAX( 0, -LCMTR ) * IF( JTMP.LE.MIN( MNB, JB ) ) THEN * DO 310 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 310 CONTINUE * ELSE IF( ( JTMP.GE.( MNB + 1 ) ) .AND. $ ( JTMP.LE.MIN( IB-LCMTR, JB ) ) ) THEN * ITMP = II + JTMP + LCMTR - 1 * DO 320 IK = II, ITMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 320 CONTINUE * IF( ITMP.LE.( II + IB - 1 ) ) THEN DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) A( ITMP, JK ) = DCMPLX( DBLE( DUMMY ), $ ZERO ) END IF * DO 330 IK = ITMP + 1, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 330 CONTINUE * END IF * ELSE * DO 340 IK = II, II + IB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ PB_DRAND( 0 ) ) 340 CONTINUE * END IF * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 350 CONTINUE * * Jump one column * CALL PB_JUMPIT( IMULADD( 1, JMP_COL ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 360 CONTINUE * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB3, IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 370 CONTINUE * ELSE * * generate upper trapezoidal part * II = 1 LCMTR = LCMT00 * DO 450 IBLK = 1, MBLKS * IF( IBLK.EQ.1 ) THEN IB = IMBLOC UPP = IMBLOC - 1 ELSE IF( IBLK.EQ.MBLKS ) THEN IB = LMBLOC UPP = MB - 1 ELSE IB = MB UPP = MB - 1 END IF * DO 440 IK = II, II + IB - 1 * JJ = 1 LCMTC = LCMTR * DO 430 JBLK = 1, NBLKS * IF( JBLK.EQ.1 ) THEN JB = INBLOC LOW = 1 - INBLOC ELSE IF( JBLK.EQ.NBLKS ) THEN JB = LNBLOC LOW = 1 - NB ELSE JB = NB LOW = 1 - NB END IF * * Blocks are IB by JB * IF( LCMTC.LT.LOW ) THEN * DO 380 JK = JJ, JJ + JB - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 380 CONTINUE * ELSE IF( LCMTC.LE.UPP ) THEN * ITMP = IK - II + 1 MNB = MAX( 0, LCMTC ) * IF( ITMP.LE.MIN( MNB, IB ) ) THEN * DO 390 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 390 CONTINUE * ELSE IF( ( ITMP.GE.( MNB + 1 ) ) .AND. $ ( ITMP.LE.MIN( JB+LCMTC, IB ) ) ) THEN * JTMP = JJ + ITMP - LCMTC - 1 * DO 400 JK = JJ, JTMP - 1 DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 400 CONTINUE * IF( JTMP.LE.( JJ + JB - 1 ) ) THEN DUMMY = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) A( IK, JTMP ) = DCMPLX( DBLE( DUMMY ), $ ZERO ) END IF * DO 410 JK = JTMP + 1, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 410 CONTINUE * END IF * ELSE * DO 420 JK = JJ, JJ + JB - 1 A( IK, JK ) = DCMPLX( PB_DRAND( 0 ), $ -PB_DRAND( 0 ) ) 420 CONTINUE * END IF * JJ = JJ + JB * IF( JBLK.EQ.1 ) THEN * * Jump INBLOC + ( NPCOL - 1 ) * NB columns * LCMTC = LCMTC + JMP( JMP_NQINBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQINBLOC ), IB1, $ IB0 ) * ELSE * * Jump NPCOL * NB columns * LCMTC = LCMTC + JMP( JMP_NQNB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NQNB ), IB1, $ IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) * 430 CONTINUE * * Jump one row * CALL PB_JUMPIT( IMULADD( 1, JMP_ROW ), IB2, IB0 ) * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) * 440 CONTINUE * II = II + IB * IF( IBLK.EQ.1 ) THEN * * Jump IMBLOC + ( NPROW - 1 ) * MB rows * LCMTR = LCMTR - JMP( JMP_NPIMBLOC ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPIMBLOC ), IB3, IB0 ) * ELSE * * Jump NPROW * MB rows * LCMTR = LCMTR - JMP( JMP_NPMB ) CALL PB_JUMPIT( IMULADD( 1, JMP_NPMB ), IB3, IB0 ) * END IF * IB1( 1 ) = IB0( 1 ) IB1( 2 ) = IB0( 2 ) IB2( 1 ) = IB0( 1 ) IB2( 2 ) = IB0( 2 ) IB3( 1 ) = IB0( 1 ) IB3( 2 ) = IB0( 2 ) * 450 CONTINUE * END IF * END IF * RETURN * * End of PB_ZLAGEN * END DOUBLE PRECISION FUNCTION PB_DRAND( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAND generates the next number in the random sequence. This func- * tion ensures that this number will be in the interval ( -1.0, 1.0 ). * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION PB_DRAN EXTERNAL PB_DRAN * .. * .. Executable Statements .. * PB_DRAND = ONE - TWO * PB_DRAN( IDUMM ) * RETURN * * End of PB_DRAND * END DOUBLE PRECISION FUNCTION PB_DRAN( IDUMM ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER IDUMM * .. * * Purpose * ======= * * PB_DRAN generates the next number in the random sequence. * * Arguments * ========= * * IDUMM (local input) INTEGER * This argument is ignored, but necessary to a FORTRAN 77 func- * tion. * * Further Details * =============== * * On entry, the array IRAND stored in the common block RANCOM contains * the information (2 integers) required to generate the next number in * the sequence X( n ). This number is computed as * * X( n ) = ( 2^16 * IRAND( 2 ) + IRAND( 1 ) ) / d, * * where the constant d is the largest 32 bit positive integer. The * array IRAND is then updated for the generation of the next number * X( n+1 ) in the random sequence as follows X( n+1 ) = a * X( n ) + c. * The constants a and c should have been preliminarily stored in the * array IACS as 2 pairs of integers. The initial set up of IRAND and * IACS is performed by the routine PB_SETRAN. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION DIVFAC, POW16 PARAMETER ( DIVFAC = 2.147483648D+9, $ POW16 = 6.5536D+4 ) * .. * .. Local Arrays .. INTEGER J( 2 ) * .. * .. External Subroutines .. EXTERNAL PB_LADD, PB_LMUL * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Common Blocks .. INTEGER IACS( 4 ), IRAND( 2 ) COMMON /RANCOM/ IRAND, IACS * .. * .. Save Statements .. SAVE /RANCOM/ * .. * .. Executable Statements .. * PB_DRAN = ( DBLE( IRAND( 1 ) ) + POW16 * DBLE( IRAND( 2 ) ) ) / $ DIVFAC * CALL PB_LMUL( IRAND, IACS, J ) CALL PB_LADD( J, IACS( 3 ), IRAND ) * RETURN * * End of PB_DRAN * END scalapack-1.8.0/PBLAS/TESTING/pzblas2tst.f0000640000175000017500000035407610363532303017570 0ustar muammarmuammar PROGRAM PZBLA2TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA2TST is the main testing program for the PBLAS Level 2 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 61 lines: * 'Level 2 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS2TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 16.0 threshold value of test ratio * 10 value of the logical computational blocksize NB * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * (1.0D0, 0.0D0) value of BETA * 2 number of tests problems * 'U' 'L' values of UPLO * 'N' 'T' values of TRANS * 'N' 'U' values of DIAG * 3 4 values of M * 3 4 values of N * 6 10 values of M_A * 6 10 values of N_A * 2 5 values of IMB_A * 2 5 values of INB_A * 2 5 values of MB_A * 2 5 values of NB_A * 0 1 values of RSRC_A * 0 0 values of CSRC_A * 1 1 values of IA * 1 1 values of JA * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PZGEMV T put F for no test in the same column * PZHEMV T put F for no test in the same column * PZTRMV T put F for no test in the same column * PZTRSV T put F for no test in the same column * PZGERU T put F for no test in the same column * PZGERC T put F for no test in the same column * PZHER T put F for no test in the same column * PZHER2 T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, ZPLXSZ, TOTMEM, $ MEMSIZ, NSUBS, DBLESZ COMPLEX*16 ONE, PADVAL, ZERO, ROGUE PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ ZPLXSZ = 16, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / ZPLXSZ, DBLESZ = 8, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ), $ ROGUE = ( -1.0D+10, 1.0D+10 ), $ ONE = ( 1.0D+0, 0.0D+0 ), NSUBS = 8 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE CHARACTER*1 AFORM, DIAG, DIAGDO, TRANS, UPLO INTEGER CSRCA, CSRCX, CSRCY, I, IA, IAM, IASEED, ICTXT, $ IGAP, IMBA, IMBX, IMBY, IMIDA, IMIDX, IMIDY, $ INBA, INBX, INBY, INCX, INCY, IPA, IPG, IPMATA, $ IPMATX, IPMATY, IPOSTA, IPOSTX, IPOSTY, IPREA, $ IPREX, IPREY, IPX, IPY, IVERB, IX, IXSEED, IY, $ IYSEED, J, JA, JX, JY, K, LDA, LDX, LDY, M, MA, $ MBA, MBX, MBY, MEMREQD, MPA, MPX, MPY, MX, MY, $ MYCOL, MYROW, N, NA, NBA, NBX, NBY, NCOLA, $ NGRIDS, NLX, NLY, NOUT, NPCOL, NPROCS, NPROW, $ NQA, NQX, NQY, NROWA, NTESTS, NX, NY, OFFD, $ RSRCA, RSRCX, RSRCY, TSKIP, TSTCNT REAL THRESH COMPLEX*16 ALPHA, BETA, SCALE * .. * .. Local Arrays .. LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) CHARACTER*1 DIAGVAL( MAXTESTS ), TRANVAL( MAXTESTS ), $ UPLOVAL( MAXTESTS ) CHARACTER*80 OUTFILE INTEGER CSCAVAL( MAXTESTS ), CSCXVAL( MAXTESTS ), $ CSCYVAL( MAXTESTS ), DESCA( DLEN_ ), $ DESCAR( DLEN_ ), DESCX( DLEN_ ), $ DESCXR( DLEN_ ), DESCY( DLEN_ ), $ DESCYR( DLEN_ ), IAVAL( MAXTESTS ), IERR( 6 ), $ IMBAVAL( MAXTESTS ), IMBXVAL( MAXTESTS ), $ IMBYVAL( MAXTESTS ), INBAVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JAVAL( MAXTESTS ), JXVAL( MAXTESTS ), $ JYVAL( MAXTESTS ) INTEGER KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MAVAL( MAXTESTS ), $ MBAVAL( MAXTESTS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MVAL( MAXTESTS ), $ MXVAL( MAXTESTS ), MYVAL( MAXTESTS ), $ NAVAL( MAXTESTS ), NBAVAL( MAXTESTS ), $ NBXVAL( MAXTESTS ), NBYVAL( MAXTESTS ), $ NVAL( MAXTESTS ), NXVAL( MAXTESTS ), $ NYVAL( MAXTESTS ), PVAL( MAXTESTS ), $ QVAL( MAXTESTS ), RSCAVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PZLAPRNT, PB_ZCHEKPAD, $ PB_ZFILLPAD, PB_ZLASCAL, PB_ZLASET, PMDESCCHK, $ PMDIMCHK, PVDESCCHK, PVDIMCHK, PZBLA2TSTINFO, $ PZBLAS2TSTCHK, PZBLAS2TSTCHKE, PZCHKARG2, $ PZCHKVOUT, PZGEMV, PZGERC, PZGERU, PZHEMV, $ PZHER, PZHER2, PZIPSET, PZLAGEN, PZLASCAL, $ PZLASET, PZMPRNT, PZTRMV, PZTRSV, PZVPRNT * .. * .. External Functions .. LOGICAL LSAME INTEGER PB_FCEIL EXTERNAL PB_FCEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, MAX, MOD, REAL * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZGEMV ', 'PZHEMV ', 'PZTRMV ', $ 'PZTRSV ', 'PZGERU ', 'PZGERC ', $ 'PZHER ', 'PZHER2 '/ DATA YCHECK/.TRUE., .TRUE., .FALSE., .FALSE., $ .TRUE., .TRUE., .FALSE., .TRUE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler won't abort on errors, so * that the tester will detect unsupported operations. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IASEED = 100 IXSEED = 200 IYSEED = 300 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA2TSTINFO( OUTFILE, NOUT, NTESTS, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, IMBAVAL, $ MBAVAL, INBAVAL, NBAVAL, RSCAVAL, CSCAVAL, $ IAVAL, JAVAL, MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, CSCYVAL, $ IYVAL, JYVAL, INCYVAL, MAXTESTS, NGRIDS, $ PVAL, MAXGRIDS, QVAL, MAXGRIDS, NBLOG, LTEST, $ SOF, TEE, IAM, IGAP, IVERB, NPROCS, THRESH, $ ALPHA, BETA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9975 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PZBLAS2TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * DIAG = DIAGVAL( J ) TRANS = TRANVAL( J ) UPLO = UPLOVAL( J ) * M = MVAL( J ) N = NVAL( J ) * MA = MAVAL( J ) NA = NAVAL( J ) IMBA = IMBAVAL( J ) INBA = INBAVAL( J ) MBA = MBAVAL( J ) NBA = NBAVAL( J ) RSRCA = RSCAVAL( J ) CSRCA = CSCAVAL( J ) IA = IAVAL( J ) JA = JAVAL( J ) * MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) INBX = INBXVAL( J ) MBX = MBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) * MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) INBY = INBYVAL( J ) MBY = MBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) M, N, UPLO, TRANS, DIAG * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9991 ) IA, JA, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9990 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9988 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9989 ) IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY * WRITE( NOUT, FMT = 9995 ) * END IF * * Check the validity of the input test parameters * IF( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'UPLO' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'TRANS' TSKIP = TSKIP + 1 GO TO 40 END IF * IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) TRANS WRITE( NOUT, FMT = 9997 ) 'DIAG' TSKIP = TSKIP + 1 GO TO 40 END IF * * Check and initialize the matrix descriptors * CALL PMDESCCHK( ICTXT, NOUT, 'A', DESCA, $ BLOCK_CYCLIC_2D_INB, MA, NA, IMBA, INBA, $ MBA, NBA, RSRCA, CSRCA, MPA, NQA, IPREA, $ IMIDA, IPOSTA, IGAP, GAPMUL, IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 2 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 3 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 .OR. $ IERR( 3 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDA = MAX( 1, MA ) LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * the distributed matrices A, X and Y. * IPA = IPREA + 1 IPX = IPA + DESCA( LLD_ )*NQA + IPOSTA + IPREX IPY = IPX + DESCX( LLD_ )*NQX + IPOSTX + IPREY IPMATA = IPY + DESCY( LLD_ )*NQY + IPOSTY IPMATX = IPMATA + MA*NA IPMATY = IPMATX + MX*NX IPG = IPMATY + MAX( MX*NX, MY*NY ) * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPG + PB_FCEIL( REAL( MAX( M, N ) ) * $ REAL( DBLESZ ), REAL( ZPLXSZ ) ) - 1 + $ MAX( MAX( IMBA, MBA ), $ MAX( MAX( IMBX, MBX ), $ MAX( IMBY, MBY ) ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 2 routines * DO 30 K = 1, NSUBS * * Continue only if this subroutine has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) SNAMES( K ) END IF * * Define the size of the operands * IF( K.EQ.1 ) THEN NROWA = M NCOLA = N IF( LSAME( TRANS, 'N' ) ) THEN NLX = N NLY = M ELSE NLX = M NLY = N END IF ELSE IF( K.EQ.5 .OR. K.EQ.6 ) THEN NROWA = M NCOLA = N NLX = M NLY = N ELSE NROWA = N NCOLA = N NLX = N NLY = N END IF * * Check the validity of the operand sizes * CALL PMDIMCHK( ICTXT, NOUT, NROWA, NCOLA, 'A', IA, JA, $ DESCA, IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLX, 'X', IX, JX, DESCX, $ INCX, IERR( 2 ) ) CALL PVDIMCHK( ICTXT, NOUT, NLY, 'Y', IY, JY, DESCY, $ INCY, IERR( 3 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 .OR. $ IERR( 3 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices A, X and Y * IF( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ) THEN AFORM = 'H' DIAGDO = 'N' OFFD = IA - JA ELSE IF( ( K.EQ.4 ).AND.( LSAME( DIAG, 'N' ) ) ) THEN AFORM = 'N' DIAGDO = 'D' OFFD = IA - JA ELSE AFORM = 'N' DIAGDO = 'N' OFFD = 0 END IF * CALL PZLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCA, IASEED, MEM( IPA ), $ DESCA( LLD_ ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCAR, MA, NA, IMBA, INBA, MBA, NBA, $ -1, -1, ICTXT, MAX( 1, MA ) ) CALL PZLAGEN( .FALSE., AFORM, DIAGDO, OFFD, MA, NA, $ 1, 1, DESCAR, IASEED, MEM( IPMATA ), $ DESCAR( LLD_ ) ) CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN * CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * ELSE * * If Y is not needed, generate a copy of X instead * CALL PB_DESCSET2( DESCYR, MX, NX, IMBX, INBX, MBX, $ NBX, -1, -1, ICTXT, MAX( 1, MX ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, $ 1, 1, DESCYR, IXSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) * END IF * * Zero non referenced part of the matrices A * IF( ( K.EQ.2 .OR. K.EQ.7 .OR. K.EQ.8 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN * * The distributed matrix A is Hermitian * IF( LSAME( UPLO, 'L' ) ) THEN * * Zeros the strict upper triangular part of A. * CALL PZLASET( 'Upper', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA, JA+1, DESCA ) IF( K.NE.2 ) THEN CALL PB_ZLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Zeros the strict lower triangular part of A. * CALL PZLASET( 'Lower', NROWA-1, NCOLA-1, ROGUE, $ ROGUE, MEM( IPA ), IA+1, JA, DESCA ) IF( K.NE.2 ) THEN CALL PB_ZLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ROGUE, ROGUE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( K.EQ.3 .OR. K.EQ.4 ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * * The distributed matrix A is lower triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PZLASET( 'Upper', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA, $ JA+1, DESCA ) CALL PB_ZLASET( 'Upper', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA-1+JA*LDA ), $ LDA ) END IF * ELSE * CALL PZLASET( 'Upper', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_ZLASET( 'Upper', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) CALL PZLASCAL( 'Lower', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA+1, JA, $ DESCA ) CALL PB_ZLASCAL( 'Lower', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * The distributed matrix A is upper triangular * IF( LSAME( DIAG, 'N' ) ) THEN * IF( MAX( NROWA, NCOLA ).GT.1 ) THEN CALL PZLASET( 'Lower', NROWA-1, NCOLA-1, $ ROGUE, ROGUE, MEM( IPA ), IA+1, $ JA, DESCA ) CALL PB_ZLASET( 'Lower', NROWA-1, NCOLA-1, 0, $ ZERO, ZERO, $ MEM( IPMATA+IA+(JA-1)*LDA ), $ LDA ) END IF * ELSE * CALL PZLASET( 'Lower', NROWA, NCOLA, ROGUE, ONE, $ MEM( IPA ), IA, JA, DESCA ) CALL PB_ZLASET( 'Lower', NROWA, NCOLA, 0, ZERO, $ ONE, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA ) IF( ( K.EQ.4 ).AND. $ ( MAX( NROWA, NCOLA ).GT.1 ) ) THEN SCALE = ONE / $ DCMPLX( DBLE( MAX( NROWA, NCOLA ) ) ) CALL PZLASCAL( 'Upper', NROWA-1, NCOLA-1, $ SCALE, MEM( IPA ), IA, JA+1, $ DESCA ) CALL PB_ZLASCAL( 'Upper', NROWA-1, NCOLA-1, $ 0, SCALE, $ MEM( IPMATA+IA-1+JA*LDA ), LDA ) END IF * END IF * END IF * END IF * * Pad the guard zones of A, X and Y * CALL PB_ZFILLPAD( ICTXT, MPA, NQA, MEM( IPA-IPREA ), $ DESCA( LLD_ ), IPREA, IPOSTA, PADVAL ) * CALL PB_ZFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_ZFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT-only arguments. * INFO = 0 CALL PZCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN CALL PB_PZLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_INITIAL_A', $ NOUT, MEM( IPG ) ) ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, 0, $ 0, 'PARALLEL_INITIAL_A', NOUT, $ MEM( IPG ) ) END IF * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PZLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PZLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, $ 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPG ) ) END IF * IF( YCHECK( K ) ) THEN IF( IVERB.EQ.2 ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PZLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) ELSE CALL PB_PZLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPG ) ) END IF END IF * * Call the Level 2 PBLAS routine * INFO = 0 IF( K.EQ.1 ) THEN * * Test PZGEMV * CALL PZGEMV( TRANS, M, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PZHEMV * CALL PZIPSET( 'Bignum', N, MEM( IPA ), IA, JA, DESCA ) * CALL PZHEMV( UPLO, N, ALPHA, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPY ), IY, JY, DESCY, INCY ) * CALL PZIPSET( 'Zero', N, MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.3 ) THEN * * Test PZTRMV * CALL PZTRMV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PZTRSV * CALL PZTRSV( UPLO, TRANS, DIAG, N, MEM( IPA ), IA, JA, $ DESCA, MEM( IPX ), IX, JX, DESCX, INCX ) * ELSE IF( K.EQ.5 ) THEN * * Test PZGERU * CALL PZGERU( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.6 ) THEN * * Test PZGERC * CALL PZGERC( M, N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY, $ MEM( IPA ), IA, JA, DESCA ) * ELSE IF( K.EQ.7 ) THEN * * Test PZHER * IF( DCMPLX( DBLE( ALPHA ) ).NE.ZERO ) $ CALL PZIPSET( 'Bignum', N, MEM( IPA ), IA, JA, $ DESCA ) * CALL PZHER( UPLO, N, DBLE( ALPHA ), MEM( IPX ), IX, $ JX, DESCX, INCX, MEM( IPA ), IA, JA, $ DESCA ) * ELSE IF( K.EQ.8 ) THEN * * Test PZHER2 * IF( ALPHA.NE.ZERO ) $ CALL PZIPSET( 'Bignum', N, MEM( IPA ), IA, JA, $ DESCA ) * CALL PZHER2( UPLO, N, ALPHA, MEM( IPX ), IX, JX, $ DESCX, INCX, MEM( IPY ), IY, JY, DESCY, $ INCY, MEM( IPA ), IA, JA, DESCA ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9974 ) INFO GO TO 30 END IF * * Check padding * CALL PB_ZCHEKPAD( ICTXT, SNAMES( K ), MPA, NQA, $ MEM( IPA-IPREA ), DESCA( LLD_ ), IPREA, $ IPOSTA, PADVAL ) * CALL PB_ZCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), IPREX, $ IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_ZCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check the computations * CALL PZBLAS2TSTCHK( ICTXT, NOUT, K, UPLO, TRANS, DIAG, M, $ N, ALPHA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, MEM( IPMATX ), $ MEM( IPX ), IX, JX, DESCX, INCX, $ BETA, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, THRESH, ROGUE, $ MEM( IPG ), INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( MOD( INFO / 4, 2 ).EQ.1 ) THEN IERR( 3 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 IERR( 3 ) = 1 END IF * * Check input-only scalar arguments * INFO = 1 CALL PZCHKARG2( ICTXT, NOUT, SNAMES( K ), UPLO, TRANS, $ DIAG, M, N, ALPHA, IA, JA, DESCA, IX, $ JX, DESCX, INCX, BETA, IY, JY, DESCY, $ INCY, INFO ) * * Check input-only array arguments * CALL PZCHKMOUT( NROWA, NCOLA, MEM( IPMATA ), MEM( IPA ), $ IA, JA, DESCA, IERR( 4 ) ) CALL PZCHKVOUT( NLX, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 5 ) ) * IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_A', $ SNAMES( K ) END IF * IF( IERR( 5 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_X', $ SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PZCHKVOUT( NLY, MEM( IPMATY ), MEM( IPY ), IY, $ JY, DESCY, INCY, IERR( 6 ) ) IF( IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9982 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE.0 .OR. IERR( 5 ).NE.0 .OR. $ IERR( 6 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9984 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9983 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MA, NA, MEM( IPMATA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PZLAPRNT( MA, NA, MEM( IPA ), 1, 1, DESCA, $ 0, 0, 'PARALLEL_A', NOUT, $ MEM( IPMATA ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( ( NROWA.GT.0 ).AND.( NCOLA.GT.0 ) ) $ CALL PZMPRNT( ICTXT, NOUT, NROWA, NCOLA, $ MEM( IPMATA+IA-1+(JA-1)*LDA ), $ LDA, 0, 0, 'SERIAL_A' ) CALL PB_PZLAPRNT( NROWA, NCOLA, MEM( IPA ), IA, JA, $ DESCA, 0, 0, 'PARALLEL_A', $ NOUT, MEM( IPMATA ) ) END IF IF( IERR( 5 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PZLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( NLX.GT.0 ) $ CALL PZVPRNT( ICTXT, NOUT, NLX, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PZLAPRNT( 1, NLX, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PZLAPRNT( NLX, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 6 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PZLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 3 ).NE.0 ) THEN IF( NLY.GT.0 ) $ CALL PZVPRNT( ICTXT, NOUT, NLY, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PZLAPRNT( 1, NLY, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PZLAPRNT( NLY, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9977 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = 9978 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9980 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9976 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, ' ------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' M N UPLO TRANS DIAG' ) 9993 FORMAT( 5X,I6,1X,I6,9X,A1,11X,A1,10X,A1 ) 9992 FORMAT( 2X, ' IA JA MA NA IMBA INBA', $ ' MBA NBA RSRCA CSRCA' ) 9991 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5 ) 9990 FORMAT( 2X, ' IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9989 FORMAT( 5X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I6, $ 1X,I5,1X,I5,1X,I6 ) 9988 FORMAT( 2X, ' IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9987 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9986 FORMAT( 'Not enough memory. Need: ', I12 ) 9985 FORMAT( 2X, ' Tested Subroutine: ', A ) 9984 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9983 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9982 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9981 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9980 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9979 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9978 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9977 FORMAT( 2X, 'Testing Summary') 9976 FORMAT( 2X, 'End of Tests.' ) 9975 FORMAT( 2X, 'Tests started.' ) 9974 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PZBLA2TST * END SUBROUTINE PZBLA2TSTINFO( SUMMRY, NOUT, NMAT, DIAGVAL, TRANVAL, $ UPLOVAL, MVAL, NVAL, MAVAL, NAVAL, $ IMBAVAL, MBAVAL, INBAVAL, NBAVAL, $ RSCAVAL, CSCAVAL, IAVAL, JAVAL, $ MXVAL, NXVAL, IMBXVAL, MBXVAL, $ INBXVAL, NBXVAL, RSCXVAL, CSCXVAL, $ IXVAL, JXVAL, INCXVAL, MYVAL, NYVAL, $ IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, $ INCYVAL, LDVAL, NGRIDS, PVAL, LDPVAL, $ QVAL, LDQVAL, NBLOG, LTEST, SOF, TEE, $ IAM, IGAP, IVERB, NPROCS, THRESH, ALPHA, $ BETA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, NBLOG, $ NGRIDS, NMAT, NOUT, NPROCS REAL THRESH COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY CHARACTER*1 DIAGVAL( LDVAL ), TRANVAL( LDVAL ), $ UPLOVAL( LDVAL ) LOGICAL LTEST( * ) INTEGER CSCAVAL( LDVAL ), CSCXVAL( LDVAL ), $ CSCYVAL( LDVAL ), IAVAL( LDVAL ), $ IMBAVAL( LDVAL ), IMBXVAL( LDVAL ), $ IMBYVAL( LDVAL ), INBAVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JAVAL( LDVAL ), $ JXVAL( LDVAL ), JYVAL( LDVAL ), MAVAL( LDVAL ), $ MBAVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NAVAL( LDVAL ), $ NBAVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCAVAL( LDVAL ), RSCXVAL( LDVAL ), $ RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA2TSTINFO get the needed startup information for testing various * Level 2 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * DIAGVAL (global output) CHARACTER array * On entry, DIAGVAL is an array of dimension LDVAL. On exit, * this array contains the values of DIAG to run the code with. * * TRANVAL (global output) CHARACTER array * On entry, TRANVAL is an array of dimension LDVAL. On exit, * this array contains the values of TRANS to run the code * with. * * UPLOVAL (global output) CHARACTER array * On entry, UPLOVAL is an array of dimension LDVAL. On exit, * this array contains the values of UPLO to run the code with. * * MVAL (global output) INTEGER array * On entry, MVAL is an array of dimension LDVAL. On exit, this * array contains the values of M to run the code with. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MAVAL (global output) INTEGER array * On entry, MAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( M_ ) to run the code * with. * * NAVAL (global output) INTEGER array * On entry, NAVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCA( N_ ) to run the code * with. * * IMBAVAL (global output) INTEGER array * On entry, IMBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( IMB_ ) to run the * code with. * * MBAVAL (global output) INTEGER array * On entry, MBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( MB_ ) to run the * code with. * * INBAVAL (global output) INTEGER array * On entry, INBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( INB_ ) to run the * code with. * * NBAVAL (global output) INTEGER array * On entry, NBAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( NB_ ) to run the * code with. * * RSCAVAL (global output) INTEGER array * On entry, RSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( RSRC_ ) to run the * code with. * * CSCAVAL (global output) INTEGER array * On entry, CSCAVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCA( CSRC_ ) to run the * code with. * * IAVAL (global output) INTEGER array * On entry, IAVAL is an array of dimension LDVAL. On exit, this * array contains the values of IA to run the code with. * * JAVAL (global output) INTEGER array * On entry, JAVAL is an array of dimension LDVAL. On exit, this * array contains the values of JA to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DIAG, TRANS, UPLO, M, N, DESCA(:), * IA, JA, DESCX(:), IX, JX, INCX, DESCY(:), IY, JY and INCY. * This is also the maximum number of test cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * NBLOG (global output) INTEGER * On exit, NBLOG specifies the logical computational block size * to run the tests with. NBLOG must be at least one. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least eight. On * exit, if LTEST( i ) is .TRUE., the i-th Level 2 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * THRESH (global output) REAL * On exit, THRESH specifies the threshhold value for the test * ratio. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * BETA (global output) COMPLEX*16 * On exit, BETA specifies the value of beta to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 3, 2*NGRIDS+37*NMAT+NSUBS+4 ) with NSUBS equal to 8. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D *ype real dble cplx zplx * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS2TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Read the threshold value for test ratio * READ( NIN, FMT = * ) THRESH IF( THRESH.LT.0.0 ) $ THRESH = 16.0 * * Get logical computational block size * READ( NIN, FMT = * ) NBLOG IF( NBLOG.LT.1 ) $ NBLOG = 32 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 120 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 120 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA, BETA * READ( NIN, FMT = * ) ALPHA READ( NIN, FMT = * ) BETA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 120 ENDIF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( UPLOVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( TRANVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( DIAGVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JAVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 120 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, BETA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT WORK( 3 ) = NBLOG CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 DO 70 J = 1, NMAT WORK( I ) = ICHAR( DIAGVAL( J ) ) WORK( I+1 ) = ICHAR( TRANVAL( J ) ) WORK( I+2 ) = ICHAR( UPLOVAL( J ) ) I = I + 3 70 CONTINUE CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, MVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JAVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 80 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 80 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 2 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex double precision '// $ 'Level 2 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9979 ) NBLOG WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9980 ) THRESH WRITE( NOUT, FMT = 9982 ) ALPHA WRITE( NOUT, FMT = 9981 ) BETA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 90 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 90 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, BETA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) NBLOG = WORK( 3 ) * I = 2*NGRIDS + 37*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 DO 100 J = 1, NMAT DIAGVAL( J ) = CHAR( WORK( I ) ) TRANVAL( J ) = CHAR( WORK( I+1 ) ) UPLOVAL( J ) = CHAR( WORK( I+2 ) ) I = I + 3 100 CONTINUE CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, MVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JAVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 110 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 110 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 120 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) 9981 FORMAT( 2X, 'Beta : (', G16.6, $ ',', G16.6, ')' ) 9980 FORMAT( 2X, 'Threshold value : ', G16.6 ) 9979 FORMAT( 2X, 'Logical block size : ', I6 ) * * End of PZBLA2TSTINFO * END SUBROUTINE PZBLAS2TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PZBLAS2TSTCHKE tests the error exits of the Level 2 PBLAS. * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 8 (NSUBS). * If LTEST( 1 ) is .TRUE., PZGEMV will be tested; * If LTEST( 2 ) is .TRUE., PZHEMV will be tested; * If LTEST( 3 ) is .TRUE., PZTRMV will be tested; * If LTEST( 4 ) is .TRUE., PZTRSV will be tested; * If LTEST( 5 ) is .TRUE., PZGERU will be tested; * If LTEST( 6 ) is .TRUE., PZGERC will be tested; * If LTEST( 7 ) is .TRUE., PZHER will be tested; * If LTEST( 8 ) is .TRUE., PZHER2 will be tested; * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 8 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PZDIMEE, PZGEMV, PZGERC, $ PZGERU, PZHEMV, PZHER, PZHER2, PZMATEE, $ PZOPTEE, PZTRMV, PZTRSV, PZVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/21, 22, 23, 23, 24, 24, 26, 27/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PZGEMV * I = 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZGEMV, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZGEMV, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZGEMV, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZGEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHEMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHEMV, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHEMV, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHEMV, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZHEMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PZTRMV * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZTRMV, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZTRMV, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZTRMV, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZTRMV, SCODE( I ), SNAMES( I ) ) END IF * * Test PZTRSV * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZTRSV, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZTRSV, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZTRSV, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZTRSV, SCODE( I ), SNAMES( I ) ) END IF * * Test PZGERU * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZGERU, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZGERU, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZGERU, SCODE( I ), SNAMES( I ) ) END IF * * Test PZGERC * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZGERC, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZGERC, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZGERC, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHER * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHER, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHER, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZHER, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHER, SCODE( I ), SNAMES( I ) ) END IF * * Test PZHER2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PZOPTEE( ICTXT, NOUT, PZHER2, SCODE( I ), SNAMES( I ) ) CALL PZDIMEE( ICTXT, NOUT, PZHER2, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZHER2, SCODE( I ), SNAMES( I ) ) CALL PZMATEE( ICTXT, NOUT, PZHER2, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PZBLAS2TSTCHKE * END SUBROUTINE PZCHKARG2( ICTXT, NOUT, SNAME, UPLO, TRANS, DIAG, M, $ N, ALPHA, IA, JA, DESCA, IX, JX, DESCX, $ INCX, BETA, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCA( * ), DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PZCHKARG2 checks the input-only arguments of the Level 2 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies the UPLO option in the Level 2 PBLAS * operation. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the TRANS option in the Level 2 * PBLAS operation. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies the DIAG option in the Level 2 PBLAS * operation. * * M (global input) INTEGER * On entry, M specifies the dimension of the submatrix ope- * rands. * * N (global input) INTEGER * On entry, N specifies the dimension of the submatrix ope- * rands. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. CHARACTER*1 DIAGREF, TRANSREF, UPLOREF INTEGER I, IAREF, INCXREF, INCYREF, IXREF, IYREF, $ JAREF, JXREF, JYREF, MREF, MYCOL, MYROW, NPCOL, $ NPROW, NREF COMPLEX*16 ALPHAREF, BETAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCAREF( DLEN_ ), DESCXREF( DLEN_ ), $ DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * DIAGREF = DIAG TRANSREF = TRANS UPLOREF = UPLO MREF = M NREF = N ALPHAREF = ALPHA IAREF = IA JAREF = JA DO 10 I = 1, DLEN_ DESCAREF( I ) = DESCA( I ) 10 CONTINUE IXREF = IX JXREF = JX DO 20 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 20 CONTINUE INCXREF = INCX BETAREF = BETA IYREF = IY JYREF = JY DO 30 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 30 CONTINUE INCYREF = INCY * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( .NOT. LSAME( DIAG, DIAGREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DIAG' ELSE IF( .NOT. LSAME( TRANS, TRANSREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'TRANS' ELSE IF( .NOT. LSAME( UPLO, UPLOREF ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'UPLO' ELSE IF( M.NE.MREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'M' ELSE IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE IF( IA.NE.IAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IA' ELSE IF( JA.NE.JAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JA' ELSE IF( DESCA( DTYPE_ ).NE.DESCAREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( DTYPE_ )' ELSE IF( DESCA( M_ ).NE.DESCAREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( M_ )' ELSE IF( DESCA( N_ ).NE.DESCAREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( N_ )' ELSE IF( DESCA( IMB_ ).NE.DESCAREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( IMB_ )' ELSE IF( DESCA( INB_ ).NE.DESCAREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( INB_ )' ELSE IF( DESCA( MB_ ).NE.DESCAREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( MB_ )' ELSE IF( DESCA( NB_ ).NE.DESCAREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( NB_ )' ELSE IF( DESCA( RSRC_ ).NE.DESCAREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( RSRC_ )' ELSE IF( DESCA( CSRC_ ).NE.DESCAREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CSRC_ )' ELSE IF( DESCA( CTXT_ ).NE.DESCAREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( CTXT_ )' ELSE IF( DESCA( LLD_ ).NE.DESCAREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCA( LLD_ )' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( BETA.NE.BETAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'BETA' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.NE.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PZCHKARG2 * END SUBROUTINE PZBLAS2TSTCHK( ICTXT, NOUT, NROUT, UPLO, TRANS, DIAG, $ M, N, ALPHA, A, PA, IA, JA, DESCA, X, $ PX, IX, JX, DESCX, INCX, BETA, Y, PY, $ IY, JY, DESCY, INCY, THRESH, ROGUE, $ WORK, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER IA, ICTXT, INCX, INCY, INFO, IX, IY, JA, JX, $ JY, M, N, NOUT, NROUT REAL THRESH COMPLEX*16 ALPHA, BETA, ROGUE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ), PA( * ), PX( * ), PY( * ), X( * ), $ Y( * ) * .. * * Purpose * ======= * * PZBLAS2TSTCHK performs the computational tests of the Level 2 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PZGEMV will be tested; * else if NROUT = 2, PZHEMV will be tested; * else if NROUT = 3, PZTRMV will be tested; * else if NROUT = 4, PZTRSV will be tested; * else if NROUT = 5, PZGERU will be tested; * else if NROUT = 6, PZGERC will be tested; * else if NROUT = 7, PZHER will be tested; * else if NROUT = 8, PZHER2 will be tested; * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies if the upper or lower part of the * matrix operand is to be referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies if the matrix operand A is to be * transposed. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies if the triangular matrix operand is * unit or non-unit. * * M (global input) INTEGER * On entry, M specifies the number of rows of A. * * N (global input) INTEGER * On entry, N specifies the number of columns of A. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (DESCA( M_ ),*). This * array contains a local copy of the initial entire matrix PA. * * PA (local input) COMPLEX*16 array * On entry, PA is an array of dimension (DESCA( LLD_ ),*). This * array contains the local entries of the matrix PA. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX*16 array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX*16 array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * THRESH (global input) REAL * On entry, THRESH is the threshold value for the test ratio. * * ROGUE (global input) COMPLEX*16 * On entry, ROGUE specifies the constant used to pad the * non-referenced part of triangular, symmetric or Hermitian ma- * trices. * * WORK (workspace) DOUBLE PRECISION array * On entry, WORK is an array of dimension LWORK where LWORK is * at least MAX( M, N ). This array is used to store the compu- * ted gauges (see PZMVCH). * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on A has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/4, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D+0 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR COMPLEX*16 ALPHA1 * .. * .. Local Arrays .. INTEGER IERR( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_ZLASET, PZCHKMIN, PZCHKVIN, $ PZMVCH, PZTRMV, PZVMCH, PZVMCH2, ZTRSV * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, DBLE * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * DO 10 I = 1, 3 IERR( I ) = 0 10 CONTINUE * IF( NROUT.EQ.1 ) THEN * * Test PZGEMV * * Check the resulting vector Y * CALL PZMVCH( ICTXT, TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, $ IX, JX, DESCX, INCX, BETA, Y, PY, IY, JY, DESCY, $ INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PZCHKMIN( ERR, M, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) IF( LSAME( TRANS, 'N' ) ) THEN CALL PZCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) ELSE CALL PZCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, $ IERR( 2 ) ) END IF * ELSE IF( NROUT.EQ.2 ) THEN * * Test PZHEMV * * Check the resulting vector Y * CALL PZMVCH( ICTXT, 'No transpose', N, N, ALPHA, A, IA, JA, $ DESCA, X, IX, JX, DESCX, INCX, BETA, Y, PY, IY, $ JY, DESCY, INCY, WORK, ERR, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) CALL PZCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.3 ) THEN * * Test PZTRMV * * Check the resulting vector X * CALL PZMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, Y, IX, $ JX, DESCX, INCX, ZERO, X, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.4 ) THEN * * Test PZTRSV * * Check the resulting vector X * CALL ZTRSV( UPLO, TRANS, DIAG, N, A( IA+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ), X( IX+(JX-1)*DESCX( M_ ) ), INCX ) CALL PZTRMV( UPLO, TRANS, DIAG, N, PA, IA, JA, DESCA, PX, IX, $ JX, DESCX, INCX ) CALL PZMVCH( ICTXT, TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ JX, DESCX, INCX, ZERO, Y, PX, IX, JX, DESCX, INCX, $ WORK, ERR, IERR( 2 ) ) * IF( IERR( 2 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * IF( LSAME( UPLO, 'L' ) ) THEN IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Upper', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+JA*DESCA( M_ ) ), DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Upper', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF ELSE IF( LSAME( DIAG, 'N' ) ) THEN CALL PB_ZLASET( 'Lower', N-1, N-1, 0, ROGUE, ROGUE, $ A( IA+1+(JA-1)*DESCA( M_ ) ), $ DESCA( M_ ) ) ELSE CALL PB_ZLASET( 'Lower', N, N, 0, ROGUE, ONE, $ A( IA+(JA-1)*DESCA( M_ ) ), DESCA( M_ ) ) END IF END IF CALL PZCHKMIN( ERR, N, N, A, PA, IA, JA, DESCA, IERR( 1 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PZGERU * * Check the resulting matrix A * CALL PZVMCH( ICTXT, 'No transpose', 'Ge', M, N, ALPHA, X, IX, $ JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, PA, $ IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PZCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PZCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.6 ) THEN * * Test PZGERC * * Check the resulting matrix A * CALL PZVMCH( ICTXT, 'Conjugate transpose', 'Ge', M, N, ALPHA, $ X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, $ A, PA, IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PZCHKVIN( ERR, M, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PZCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * ELSE IF( NROUT.EQ.7 ) THEN * * Test PZHER * * Check the resulting matrix A * ALPHA1 = DCMPLX( DBLE( ALPHA ), RZERO ) CALL PZVMCH( ICTXT, 'Conjugate transpose', UPLO, N, N, ALPHA1, $ X, IX, JX, DESCX, INCX, X, IX, JX, DESCX, INCX, A, $ PA, IA, JA, DESCA, WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PZCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) * ELSE IF( NROUT.EQ.8 ) THEN * * Test PZHER2 * * Check the resulting matrix A * CALL PZVMCH2( ICTXT, UPLO, N, N, ALPHA, X, IX, JX, DESCX, INCX, $ Y, IY, JY, DESCY, INCY, A, PA, IA, JA, DESCA, $ WORK, ERR, IERR( 1 ) ) IF( IERR( 1 ).NE.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) ELSE IF( ERR.GT.DBLE( THRESH ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9996 ) ERR END IF * * Check the input-only arguments * CALL PZCHKVIN( ERR, N, X, PX, IX, JX, DESCX, INCX, IERR( 2 ) ) CALL PZCHKVIN( ERR, N, Y, PY, IY, JY, DESCY, INCY, IERR( 3 ) ) * END IF * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'A' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'X' END IF * IF( IERR( 3 ).NE.0 ) THEN INFO = INFO + 4 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'Y' END IF * 9999 FORMAT( 2X, ' ***** ERROR: Matrix operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9997 FORMAT( 2X, ' ***** FATAL ERROR - Computed result is less ', $ 'than half accurate *****' ) 9996 FORMAT( 2X, ' ***** Test completed with maximum test ratio: ', $ F11.5, ' SUSPECT *****' ) * RETURN * * End of PZBLAS2TSTCHK * END scalapack-1.8.0/PBLAS/TESTING/PZBLAS3TST.dat0000640000175000017500000000515710363532303017445 0ustar muammarmuammar'Level 3 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PZBLAS3TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 16.0 threshold value of test ratio 10 value of the logical computational blocksize NB 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q (2.0D0, -4.0D0) value of ALPHA (3.0D0, -2.0D0) value of BETA 4 number of tests problems 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' values of DIAG 'L' 'L' 'L' 'L' 'L' 'L' 'L' 'L' values of SIDE 'N' 'N' 'N' 'N' 'T' 'T' 'T' 'T' values of TRANSA 'N' 'N' 'T' 'T' 'N' 'N' 'T' 'T' values of TRANSB 'U' 'L' 'U' 'L' 'U' 'L' 'U' 'L' values of UPLO 22 30 8 20 3 5 11 8 values of M 23 29 7 21 3 14 11 8 values of N 24 28 9 22 3 9 11 8 values of K 31 31 31 31 31 31 31 31 values of M_A 31 31 31 31 31 31 31 31 values of N_A 4 5 6 3 2 1 4 5 values of IMB_A 4 5 6 3 2 1 4 5 values of INB_A 4 5 6 3 2 1 4 5 values of MB_A 4 5 6 3 2 1 4 5 values of NB_A 0 0 0 0 0 0 0 0 values of RSRC_A 0 0 0 0 0 0 0 0 values of CSRC_A 5 1 7 7 1 12 1 11 values of IA 5 1 7 7 1 12 1 11 values of JA 32 32 32 32 32 32 32 32 values of M_B 32 32 32 32 32 32 32 32 values of N_B 4 5 6 3 2 1 4 5 values of IMB_B 4 5 6 3 2 1 4 5 values of INB_B 4 5 6 3 2 1 4 5 values of MB_B 4 5 6 3 2 1 4 5 values of NB_B 0 0 0 0 0 0 0 0 values of RSRC_B 0 0 0 0 0 0 0 0 values of CSRC_B 5 1 7 7 1 12 1 11 values of IB 5 1 7 7 1 12 1 11 values of JB 35 35 35 35 35 35 35 35 values of M_C 35 35 35 35 35 35 35 35 values of N_C 4 5 6 3 2 1 4 5 values of IMB_C 4 5 6 3 2 1 4 5 values of INB_C 4 5 6 3 2 1 4 5 values of MB_C 4 5 6 3 2 1 4 5 values of NB_C 0 0 0 0 0 0 0 0 values of RSRC_C 0 0 0 0 0 0 0 0 values of CSRC_C 5 1 7 7 1 12 1 11 values of IC 5 1 7 7 1 12 1 11 values of JC PZGEMM T put F for no test in the same column PZSYMM T put F for no test in the same column PZHEMM T put F for no test in the same column PZSYRK T put F for no test in the same column PZHERK T put F for no test in the same column PZSYR2K T put F for no test in the same column PZHER2K T put F for no test in the same column PZTRMM T put F for no test in the same column PZTRSM T put F for no test in the same column PZGEADD T put F for no test in the same column PZTRADD T put F for no test in the same column scalapack-1.8.0/PBLAS/TESTING/pzblas1tst.f0000640000175000017500000042074610363532303017565 0ustar muammarmuammar PROGRAM PZBLA1TST * * -- PBLAS testing driver (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * Purpose * ======= * * PZBLA1TST is the main testing program for the PBLAS Level 1 routines. * * The program must be driven by a short data file. An annotated exam- * ple of a data file can be obtained by deleting the first 3 characters * from the following 46 lines: * 'Level 1 PBLAS, Testing input file' * 'Intel iPSC/860 hypercube, gamma model.' * 'PZBLAS1TST.SUMM' output file name (if any) * 6 device out * F logical flag, T to stop on failures * F logical flag, T to test error exits * 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors * 10 the leading dimension gap * 1 number of process grids (ordered pairs of P & Q) * 2 2 1 4 2 3 8 values of P * 2 2 4 1 3 2 1 values of Q * (1.0D0, 0.0D0) value of ALPHA * 2 number of tests problems * 3 4 values of N * 6 10 values of M_X * 6 10 values of N_X * 2 5 values of IMB_X * 2 5 values of INB_X * 2 5 values of MB_X * 2 5 values of NB_X * 0 1 values of RSRC_X * 0 0 values of CSRC_X * 1 1 values of IX * 1 1 values of JX * 1 1 values of INCX * 6 10 values of M_Y * 6 10 values of N_Y * 2 5 values of IMB_Y * 2 5 values of INB_Y * 2 5 values of MB_Y * 2 5 values of NB_Y * 0 1 values of RSRC_Y * 0 0 values of CSRC_Y * 1 1 values of IY * 1 1 values of JY * 6 1 values of INCY * PZSWAP T put F for no test in the same column * PZSCAL T put F for no test in the same column * PZDSCAL T put F for no test in the same column * PZCOPY T put F for no test in the same column * PZAXPY T put F for no test in the same column * PZDOTU T put F for no test in the same column * PZDOTC T put F for no test in the same column * PDZNRM2 T put F for no test in the same column * PDZASUM T put F for no test in the same column * PZAMAX T put F for no test in the same column * * Internal Parameters * =================== * * TOTMEM INTEGER * TOTMEM is a machine-specific parameter indicating the maxi- * mum amount of available memory per process in bytes. The * user should customize TOTMEM to his platform. Remember to * leave room in memory for the operating system, the BLACS * buffer, etc. For example, on a system with 8 MB of memory * per process (e.g., one processor on an Intel iPSC/860), the * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS, * code, BLACS buffer, etc). However, for PVM, we usually set * TOTMEM = 2000000. Some experimenting with the maximum value * of TOTMEM may be required. By default, TOTMEM is 2000000. * * DBLESZ INTEGER * ZPLXSZ INTEGER * DBLESZ and ZPLXSZ indicate the length in bytes on the given * platform for a double precision real and a double precision * complex. By default, DBLESZ is set to eight and ZPLXSZ is set * to sixteen. * * MEM COMPLEX*16 array * MEM is an array of dimension TOTMEM / ZPLXSZ. * All arrays used by SCALAPACK routines are allocated from this * array MEM and referenced by pointers. The integer IPA, for * example, is a pointer to the starting element of MEM for the * matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER MAXTESTS, MAXGRIDS, GAPMUL, ZPLXSZ, TOTMEM, $ MEMSIZ, NSUBS DOUBLE PRECISION RZERO COMPLEX*16 PADVAL, ZERO PARAMETER ( MAXTESTS = 20, MAXGRIDS = 20, GAPMUL = 10, $ ZPLXSZ = 16, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / ZPLXSZ, $ PADVAL = ( -9923.0D+0, -9923.0D+0 ), $ RZERO = 0.0D+0, ZERO = ( 0.0D+0, 0.0D+0 ), $ NSUBS = 10 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL ERRFLG, SOF, TEE INTEGER CSRCX, CSRCY, I, IAM, ICTXT, IGAP, IMBX, IMBY, $ IMIDX, IMIDY, INBX, INBY, INCX, INCY, IPMATX, $ IPMATY, IPOSTX, IPOSTY, IPREX, IPREY, IPW, IPX, $ IPY, IVERB, IX, IXSEED, IY, IYSEED, J, JX, JY, $ K, LDX, LDY, MBX, MBY, MEMREQD, MPX, MPY, MX, $ MY, MYCOL, MYROW, N, NBX, NBY, NGRIDS, NOUT, $ NPCOL, NPROCS, NPROW, NQX, NQY, NTESTS, NX, NY, $ PISCLR, RSRCX, RSRCY, TSKIP, TSTCNT DOUBLE PRECISION PUSCLR COMPLEX*16 ALPHA, PSCLR * .. * .. Local Arrays .. CHARACTER*80 OUTFILE LOGICAL LTEST( NSUBS ), YCHECK( NSUBS ) INTEGER CSCXVAL( MAXTESTS ), CSCYVAL( MAXTESTS ), $ DESCX( DLEN_ ), DESCXR( DLEN_ ), $ DESCY( DLEN_ ), DESCYR( DLEN_ ), IERR( 4 ), $ IMBXVAL( MAXTESTS ), IMBYVAL( MAXTESTS ), $ INBXVAL( MAXTESTS ), INBYVAL( MAXTESTS ), $ INCXVAL( MAXTESTS ), INCYVAL( MAXTESTS ), $ IXVAL( MAXTESTS ), IYVAL( MAXTESTS ), $ JXVAL( MAXTESTS ), JYVAL( MAXTESTS ), $ KFAIL( NSUBS ), KPASS( NSUBS ), KSKIP( NSUBS ), $ KTESTS( NSUBS ), MBXVAL( MAXTESTS ), $ MBYVAL( MAXTESTS ), MXVAL( MAXTESTS ), $ MYVAL( MAXTESTS ), NBXVAL( MAXTESTS ), $ NBYVAL( MAXTESTS ), NVAL( MAXTESTS ), $ NXVAL( MAXTESTS ), NYVAL( MAXTESTS ), $ PVAL( MAXTESTS ), QVAL( MAXTESTS ), $ RSCXVAL( MAXTESTS ), RSCYVAL( MAXTESTS ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ IGSUM2D, PB_DESCSET2, PB_PZLAPRNT, PB_ZCHEKPAD, $ PB_ZFILLPAD, PDZASUM, PDZNRM2, PVDESCCHK, $ PVDIMCHK, PZAMAX, PZAXPY, PZBLA1TSTINFO, $ PZBLAS1TSTCHK, PZBLAS1TSTCHKE, PZCHKARG1, $ PZCHKVOUT, PZCOPY, PZDOTC, PZDOTU, PZDSCAL, $ PZLAGEN, PZMPRNT, PZSCAL, PZSWAP, PZVPRNT * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MOD * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) LOGICAL ABRTFLG INTEGER INFO, NBLOG COMMON /SNAMEC/SNAMES COMMON /INFOC/INFO, NBLOG COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SNAMES/'PZSWAP ', 'PZSCAL ', $ 'PZDSCAL', 'PZCOPY ', 'PZAXPY ', $ 'PZDOTU ', 'PZDOTC ', 'PDZNRM2', $ 'PDZASUM', 'PZAMAX'/ DATA YCHECK/.TRUE., .FALSE., .FALSE., .TRUE., $ .TRUE., .TRUE., .TRUE., .FALSE., .FALSE., $ .FALSE./ * .. * .. Executable Statements .. * * Initialization * * Set flag so that the PBLAS error handler will abort on errors. * ABRTFLG = .FALSE. * * So far no error, will become true as soon as one error is found. * ERRFLG = .FALSE. * * Test counters * TSKIP = 0 TSTCNT = 0 * * Seeds for random matrix generations. * IXSEED = 100 IYSEED = 200 * * So far no tests have been performed. * DO 10 I = 1, NSUBS KPASS( I ) = 0 KSKIP( I ) = 0 KFAIL( I ) = 0 KTESTS( I ) = 0 10 CONTINUE * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PZBLA1TSTINFO( OUTFILE, NOUT, NTESTS, NVAL, MXVAL, NXVAL, $ IMBXVAL, MBXVAL, INBXVAL, NBXVAL, RSCXVAL, $ CSCXVAL, IXVAL, JXVAL, INCXVAL, MYVAL, $ NYVAL, IMBYVAL, MBYVAL, INBYVAL, NBYVAL, $ RSCYVAL, CSCYVAL, IYVAL, JYVAL, INCYVAL, $ MAXTESTS, NGRIDS, PVAL, MAXGRIDS, QVAL, $ MAXGRIDS, LTEST, SOF, TEE, IAM, IGAP, IVERB, $ NPROCS, ALPHA, MEM ) * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = 9979 ) WRITE( NOUT, FMT = * ) END IF * * If TEE is set then Test Error Exits of routines. * IF( TEE ) $ CALL PZBLAS1TSTCHKE( LTEST, NOUT, NPROCS ) * * Loop over different process grids * DO 60 I = 1, NGRIDS * NPROW = PVAL( I ) NPCOL = QVAL( I ) * * Make sure grid information is correct * IERR( 1 ) = 0 IF( NPROW.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPROW', NPROW IERR( 1 ) = 1 ELSE IF( NPCOL.LT.1 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'GRID SIZE', 'NPCOL', NPCOL IERR( 1 ) = 1 ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS IERR( 1 ) = 1 END IF * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9997 ) 'GRID' TSKIP = TSKIP + 1 GO TO 60 END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 60 * * Loop over number of tests * DO 50 J = 1, NTESTS * * Get the test parameters * N = NVAL( J ) MX = MXVAL( J ) NX = NXVAL( J ) IMBX = IMBXVAL( J ) MBX = MBXVAL( J ) INBX = INBXVAL( J ) NBX = NBXVAL( J ) RSRCX = RSCXVAL( J ) CSRCX = CSCXVAL( J ) IX = IXVAL( J ) JX = JXVAL( J ) INCX = INCXVAL( J ) MY = MYVAL( J ) NY = NYVAL( J ) IMBY = IMBYVAL( J ) MBY = MBYVAL( J ) INBY = INBYVAL( J ) NBY = NBYVAL( J ) RSRCY = RSCYVAL( J ) CSRCY = CSCYVAL( J ) IY = IYVAL( J ) JY = JYVAL( J ) INCY = INCYVAL( J ) * IF( IAM.EQ.0 ) THEN TSTCNT = TSTCNT + 1 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9996 ) TSTCNT, NPROW, NPCOL WRITE( NOUT, FMT = * ) * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IX, JX, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9992 ) WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9993 ) N, IY, JY, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY WRITE( NOUT, FMT = 9995 ) END IF * * Check the validity of the input and initialize DESC_ * CALL PVDESCCHK( ICTXT, NOUT, 'X', DESCX, $ BLOCK_CYCLIC_2D_INB, MX, NX, IMBX, INBX, $ MBX, NBX, RSRCX, CSRCX, INCX, MPX, NQX, $ IPREX, IMIDX, IPOSTX, IGAP, GAPMUL, $ IERR( 1 ) ) CALL PVDESCCHK( ICTXT, NOUT, 'Y', DESCY, $ BLOCK_CYCLIC_2D_INB, MY, NY, IMBY, INBY, $ MBY, NBY, RSRCY, CSRCY, INCY, MPY, NQY, $ IPREY, IMIDY, IPOSTY, IGAP, GAPMUL, $ IERR( 2 ) ) * IF( IERR( 1 ).GT.0 .OR. IERR( 2 ).GT.0 ) THEN TSKIP = TSKIP + 1 GO TO 40 END IF * LDX = MAX( 1, MX ) LDY = MAX( 1, MY ) * * Assign pointers into MEM for matrices corresponding to * vectors X and Y. Ex: IPX starts at position MEM( IPREX+1 ). * IPX = IPREX + 1 IPY = IPX + DESCX( LLD_ ) * NQX + IPOSTX + IPREY IPMATX = IPY + DESCY( LLD_ ) * NQY + IPOSTY IPMATY = IPMATX + MX * NX IPW = IPMATY + MY * NY * * Check if sufficient memory. * Requirement = mem for local part of parallel matrices + * mem for whole matrices for comp. check + * mem for recving comp. check error vals. * MEMREQD = IPW - 1 + $ MAX( MAX( IMBX, MBX ), MAX( IMBY, MBY ) ) IERR( 1 ) = 0 IF( MEMREQD.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9990 ) MEMREQD*ZPLXSZ IERR( 1 ) = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 ) * IF( IERR( 1 ).GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9991 ) TSKIP = TSKIP + 1 GO TO 40 END IF * * Loop over all PBLAS 1 routines * DO 30 K = 1, NSUBS * * Continue only if this sub has to be tested. * IF( .NOT.LTEST( K ) ) $ GO TO 30 * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9989 ) SNAMES( K ) END IF * * Check the validity of the operand sizes * CALL PVDIMCHK( ICTXT, NOUT, N, 'X', IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PVDIMCHK( ICTXT, NOUT, N, 'Y', IY, JY, DESCY, INCY, $ IERR( 2 ) ) * IF( IERR( 1 ).NE.0 .OR. IERR( 2 ).NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 GO TO 30 END IF * * Generate distributed matrices X and Y * CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCX, IXSEED, MEM( IPX ), $ DESCX( LLD_ ) ) IF( YCHECK( K ) ) $ CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCY, IYSEED, MEM( IPY ), $ DESCY( LLD_ ) ) * * Generate entire matrices on each process. * CALL PB_DESCSET2( DESCXR, MX, NX, IMBX, INBX, MBX, NBX, $ -1, -1, ICTXT, MAX( 1, MX ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MX, NX, 1, $ 1, DESCXR, IXSEED, MEM( IPMATX ), $ DESCXR( LLD_ ) ) IF( YCHECK( K ) ) THEN CALL PB_DESCSET2( DESCYR, MY, NY, IMBY, INBY, MBY, $ NBY, -1, -1, ICTXT, MAX( 1, MY ) ) CALL PZLAGEN( .FALSE., 'None', 'No diag', 0, MY, NY, $ 1, 1, DESCYR, IYSEED, MEM( IPMATY ), $ DESCYR( LLD_ ) ) END IF * * Pad the guard zones of X, and Y * CALL PB_ZFILLPAD( ICTXT, MPX, NQX, MEM( IPX-IPREX ), $ DESCX( LLD_ ), IPREX, IPOSTX, PADVAL ) * IF( YCHECK( K ) ) THEN CALL PB_ZFILLPAD( ICTXT, MPY, NQY, MEM( IPY-IPREY ), $ DESCY( LLD_ ), IPREY, IPOSTY, $ PADVAL ) END IF * * Initialize the check for INPUT only args. * INFO = 0 CALL PZCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * INFO = 0 PSCLR = ZERO PUSCLR = RZERO PISCLR = 0 * * Print initial parallel data if IVERB >= 2. * IF( IVERB.EQ.2 ) THEN IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PZLAPRNT( 1, N, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PZLAPRNT( N, 1, MEM( IPX ), IX, JX, DESCX, $ 0, 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) END IF IF( YCHECK( K ) ) THEN IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PZLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) ELSE CALL PB_PZLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, $ 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF END IF ELSE IF( IVERB.GE.3 ) THEN CALL PB_PZLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, 0, $ 0, 'PARALLEL_INITIAL_X', NOUT, $ MEM( IPW ) ) IF( YCHECK( K ) ) $ CALL PB_PZLAPRNT( MY, NY, MEM( IPY ), 1, 1, DESCY, $ 0, 0, 'PARALLEL_INITIAL_Y', NOUT, $ MEM( IPW ) ) END IF * * Call the PBLAS routine * IF( K.EQ.1 ) THEN * * Test PZSWAP * CALL PZSWAP( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.2 ) THEN * * Test PZSCAL * PSCLR = ALPHA CALL PZSCAL( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.3 ) THEN * * Test PZDSCAL * PUSCLR = DBLE( ALPHA ) CALL PZDSCAL( N, DBLE( ALPHA ), MEM( IPX ), IX, JX, $ DESCX, INCX ) * ELSE IF( K.EQ.4 ) THEN * * Test PZCOPY * CALL PZCOPY( N, MEM( IPX ), IX, JX, DESCX, INCX, $ MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.5 ) THEN * * Test PZAXPY * PSCLR = ALPHA CALL PZAXPY( N, ALPHA, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.6 ) THEN * * Test PZDOTU * CALL PZDOTU( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.7 ) THEN * * Test PZDOTC * CALL PZDOTC( N, PSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX, MEM( IPY ), IY, JY, DESCY, INCY ) * ELSE IF( K.EQ.8 ) THEN * * Test PDZNRM2 * CALL PDZNRM2( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.9 ) THEN * * Test PDZASUM * CALL PDZASUM( N, PUSCLR, MEM( IPX ), IX, JX, DESCX, $ INCX ) * ELSE IF( K.EQ.10 ) THEN * CALL PZAMAX( N, PSCLR, PISCLR, MEM( IPX ), IX, JX, $ DESCX, INCX ) * END IF * * Check if the operation has been performed. * IF( INFO.NE.0 ) THEN KSKIP( K ) = KSKIP( K ) + 1 IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9978 ) INFO GO TO 30 END IF * * Check the computations * CALL PZBLAS1TSTCHK( ICTXT, NOUT, K, N, PSCLR, PUSCLR, $ PISCLR, MEM( IPMATX ), MEM( IPX ), $ IX, JX, DESCX, INCX, MEM( IPMATY ), $ MEM( IPY ), IY, JY, DESCY, INCY, $ INFO ) IF( MOD( INFO, 2 ).EQ.1 ) THEN IERR( 1 ) = 1 ELSE IF( MOD( INFO / 2, 2 ).EQ.1 ) THEN IERR( 2 ) = 1 ELSE IF( INFO.NE.0 ) THEN IERR( 1 ) = 1 IERR( 2 ) = 1 END IF * * Check padding * CALL PB_ZCHEKPAD( ICTXT, SNAMES( K ), MPX, NQX, $ MEM( IPX-IPREX ), DESCX( LLD_ ), $ IPREX, IPOSTX, PADVAL ) IF( YCHECK( K ) ) THEN CALL PB_ZCHEKPAD( ICTXT, SNAMES( K ), MPY, NQY, $ MEM( IPY-IPREY ), DESCY( LLD_ ), $ IPREY, IPOSTY, PADVAL ) END IF * * Check input-only scalar arguments * INFO = 1 CALL PZCHKARG1( ICTXT, NOUT, SNAMES( K ), N, ALPHA, IX, $ JX, DESCX, INCX, IY, JY, DESCY, INCY, $ INFO ) * * Check input-only array arguments * CALL PZCHKVOUT( N, MEM( IPMATX ), MEM( IPX ), IX, JX, $ DESCX, INCX, IERR( 3 ) ) * IF( IERR( 3 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_X', SNAMES( K ) END IF * IF( YCHECK( K ) ) THEN CALL PZCHKVOUT( N, MEM( IPMATY ), MEM( IPY ), IY, JY, $ DESCY, INCY, IERR( 4 ) ) IF( IERR( 4 ).NE.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9986 ) 'PARALLEL_Y', $ SNAMES( K ) END IF END IF * * Only node 0 prints computational test result * IF( INFO.NE.0 .OR. IERR( 1 ).NE.0 .OR. $ IERR( 2 ).NE.0 .OR. IERR( 3 ).NE.0 .OR. $ IERR( 4 ).NE. 0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9988 ) SNAMES( K ) KFAIL( K ) = KFAIL( K ) + 1 ERRFLG = .TRUE. ELSE IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9987 ) SNAMES( K ) KPASS( K ) = KPASS( K ) + 1 END IF * * Dump matrix if IVERB >= 1 and error. * IF( IVERB.GE.1 .AND. ERRFLG ) THEN IF( IERR( 3 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MX, NX, MEM( IPMATX ), $ LDX, 0, 0, 'SERIAL_X' ) CALL PB_PZLAPRNT( MX, NX, MEM( IPX ), 1, 1, DESCX, $ 0, 0, 'PARALLEL_X', NOUT, $ MEM( IPMATX ) ) ELSE IF( IERR( 1 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PZVPRNT( ICTXT, NOUT, N, $ MEM( IPMATX+IX-1+(JX-1)*LDX ), $ INCX, 0, 0, 'SERIAL_X' ) IF( INCX.EQ.DESCX( M_ ) ) THEN CALL PB_PZLAPRNT( 1, N, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PZLAPRNT( N, 1, MEM( IPX ), IX, JX, $ DESCX, 0, 0, 'PARALLEL_X', $ NOUT, MEM( IPMATX ) ) END IF END IF IF( YCHECK( K ) ) THEN IF( IERR( 4 ).NE.0 .OR. IVERB.GE.3 ) THEN CALL PZMPRNT( ICTXT, NOUT, MY, NY, $ MEM( IPMATY ), LDY, 0, 0, $ 'SERIAL_Y' ) CALL PB_PZLAPRNT( MY, NY, MEM( IPY ), 1, 1, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE IF( IERR( 2 ).NE.0 ) THEN IF( N.GT.0 ) $ CALL PZVPRNT( ICTXT, NOUT, N, $ MEM( IPMATY+IY-1+(JY-1)*LDY ), $ INCY, 0, 0, 'SERIAL_Y' ) IF( INCY.EQ.DESCY( M_ ) ) THEN CALL PB_PZLAPRNT( 1, N, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) ELSE CALL PB_PZLAPRNT( N, 1, MEM( IPY ), IY, JY, $ DESCY, 0, 0, 'PARALLEL_Y', $ NOUT, MEM( IPMATX ) ) END IF END IF END IF END IF * * Leave if error and "Stop On Failure" * IF( SOF.AND.ERRFLG ) $ GO TO 70 * 30 CONTINUE * 40 IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9985 ) J END IF * 50 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 60 CONTINUE * * Come here, if error and "Stop On Failure" * 70 CONTINUE * * Before printing out final stats, add TSKIP to all skips * DO 80 I = 1, NSUBS IF( LTEST( I ) ) THEN KSKIP( I ) = KSKIP( I ) + TSKIP KTESTS( I ) = KSKIP( I ) + KFAIL( I ) + KPASS( I ) END IF 80 CONTINUE * * Print results * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9981 ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9983 ) WRITE( NOUT, FMT = 9982 ) * DO 90 I = 1, NSUBS WRITE( NOUT, FMT = 9984 ) '|', SNAMES( I ), KTESTS( I ), $ KPASS( I ), KFAIL( I ), KSKIP( I ) 90 CONTINUE WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) WRITE( NOUT, FMT = * ) * END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'ILLEGAL ', A, ': ', A, ' = ', I10, $ ' should be at least 1' ) 9998 FORMAT( 'ILLEGAL GRID: NPROW*NPCOL = ', I4, $ '. It can be at most', I4 ) 9997 FORMAT( 'Bad ', A, ' parameters: going on to next test case.' ) 9996 FORMAT( 2X, 'Test number ', I4 , ' started on a ', I6, ' x ', $ I6, ' process grid.' ) 9995 FORMAT( 2X, '---------------------------------------------------', $ '--------------------------' ) 9994 FORMAT( 2X, ' N IX JX MX NX IMBX INBX', $ ' MBX NBX RSRCX CSRCX INCX' ) 9993 FORMAT( 2X,I6,1X,I6,1X,I6,1X,I6,1X,I6,1X,I5,1X,I5,1X,I5,1X,I5,1X, $ I5,1X,I5,1X,I6 ) 9992 FORMAT( 2X, ' N IY JY MY NY IMBY INBY', $ ' MBY NBY RSRCY CSRCY INCY' ) 9991 FORMAT( 'Not enough memory for this test: going on to', $ ' next test case.' ) 9990 FORMAT( 'Not enough memory. Need: ', I12 ) 9989 FORMAT( 2X, ' Tested Subroutine: ', A ) 9988 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' FAILED ',' *****' ) 9987 FORMAT( 2X, ' ***** Computational check: ', A, ' ', $ ' PASSED ',' *****' ) 9986 FORMAT( 2X, ' ***** ERROR ***** Matrix operand ', A, $ ' modified by ', A, ' *****' ) 9985 FORMAT( 2X, 'Test number ', I4, ' completed.' ) 9984 FORMAT( 2X,A1,2X,A7,8X,I4,6X,I4,5X,I4,4X,I4 ) 9983 FORMAT( 2X, ' SUBROUTINE TOTAL TESTS PASSED FAILED ', $ 'SKIPPED' ) 9982 FORMAT( 2X, ' ---------- ----------- ------ ------ ', $ '-------' ) 9981 FORMAT( 2X, 'Testing Summary') 9980 FORMAT( 2X, 'End of Tests.' ) 9979 FORMAT( 2X, 'Tests started.' ) 9978 FORMAT( 2X, ' ***** Operation not supported, error code: ', $ I5, ' *****' ) * STOP * * End of PZBLA1TST * END SUBROUTINE PZBLA1TSTINFO( SUMMRY, NOUT, NMAT, NVAL, MXVAL, $ NXVAL, IMBXVAL, MBXVAL, INBXVAL, $ NBXVAL, RSCXVAL, CSCXVAL, IXVAL, $ JXVAL, INCXVAL, MYVAL, NYVAL, IMBYVAL, $ MBYVAL, INBYVAL, NBYVAL, RSCYVAL, $ CSCYVAL, IYVAL, JYVAL, INCYVAL, $ LDVAL, NGRIDS, PVAL, LDPVAL, QVAL, $ LDQVAL, LTEST, SOF, TEE, IAM, IGAP, $ IVERB, NPROCS, ALPHA, WORK ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. LOGICAL SOF, TEE INTEGER IAM, IGAP, IVERB, LDPVAL, LDQVAL, LDVAL, $ NGRIDS, NMAT, NOUT, NPROCS COMPLEX*16 ALPHA * .. * .. Array Arguments .. CHARACTER*( * ) SUMMRY LOGICAL LTEST( * ) INTEGER CSCXVAL( LDVAL ), CSCYVAL( LDVAL ), $ IMBXVAL( LDVAL ), IMBYVAL( LDVAL ), $ INBXVAL( LDVAL ), INBYVAL( LDVAL ), $ INCXVAL( LDVAL ), INCYVAL( LDVAL ), $ IXVAL( LDVAL ), IYVAL( LDVAL ), JXVAL( LDVAL ), $ JYVAL( LDVAL ), MBXVAL( LDVAL ), $ MBYVAL( LDVAL ), MXVAL( LDVAL ), $ MYVAL( LDVAL ), NBXVAL( LDVAL ), $ NBYVAL( LDVAL ), NVAL( LDVAL ), NXVAL( LDVAL ), $ NYVAL( LDVAL ), PVAL( LDPVAL ), QVAL( LDQVAL ), $ RSCXVAL( LDVAL ), RSCYVAL( LDVAL ), WORK( * ) * .. * * Purpose * ======= * * PZBLA1TSTINFO get the needed startup information for testing various * Level 1 PBLAS routines, and transmits it to all processes. * * Notes * ===== * * For packing the information we assumed that the length in bytes of an * integer is equal to the length in bytes of a real single precision. * * Arguments * ========= * * SUMMRY (global output) CHARACTER*(*) * On exit, SUMMRY is the name of output (summary) file (if * any). SUMMRY is only defined for process 0. * * NOUT (global output) INTEGER * On exit, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NMAT (global output) INTEGER * On exit, NMAT specifies the number of different test cases. * * NVAL (global output) INTEGER array * On entry, NVAL is an array of dimension LDVAL. On exit, this * array contains the values of N to run the code with. * * MXVAL (global output) INTEGER array * On entry, MXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( M_ ) to run the code * with. * * NXVAL (global output) INTEGER array * On entry, NXVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCX( N_ ) to run the code * with. * * IMBXVAL (global output) INTEGER array * On entry, IMBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( IMB_ ) to run the * code with. * * MBXVAL (global output) INTEGER array * On entry, MBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( MB_ ) to run the * code with. * * INBXVAL (global output) INTEGER array * On entry, INBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( INB_ ) to run the * code with. * * NBXVAL (global output) INTEGER array * On entry, NBXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( NB_ ) to run the * code with. * * RSCXVAL (global output) INTEGER array * On entry, RSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( RSRC_ ) to run the * code with. * * CSCXVAL (global output) INTEGER array * On entry, CSCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCX( CSRC_ ) to run the * code with. * * IXVAL (global output) INTEGER array * On entry, IXVAL is an array of dimension LDVAL. On exit, this * array contains the values of IX to run the code with. * * JXVAL (global output) INTEGER array * On entry, JXVAL is an array of dimension LDVAL. On exit, this * array contains the values of JX to run the code with. * * INCXVAL (global output) INTEGER array * On entry, INCXVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCX to run the code with. * * MYVAL (global output) INTEGER array * On entry, MYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( M_ ) to run the code * with. * * NYVAL (global output) INTEGER array * On entry, NYVAL is an array of dimension LDVAL. On exit, this * array contains the values of DESCY( N_ ) to run the code * with. * * IMBYVAL (global output) INTEGER array * On entry, IMBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( IMB_ ) to run the * code with. * * MBYVAL (global output) INTEGER array * On entry, MBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( MB_ ) to run the * code with. * * INBYVAL (global output) INTEGER array * On entry, INBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( INB_ ) to run the * code with. * * NBYVAL (global output) INTEGER array * On entry, NBYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( NB_ ) to run the * code with. * * RSCYVAL (global output) INTEGER array * On entry, RSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( RSRC_ ) to run the * code with. * * CSCYVAL (global output) INTEGER array * On entry, CSCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of DESCY( CSRC_ ) to run the * code with. * * IYVAL (global output) INTEGER array * On entry, IYVAL is an array of dimension LDVAL. On exit, this * array contains the values of IY to run the code with. * * JYVAL (global output) INTEGER array * On entry, JYVAL is an array of dimension LDVAL. On exit, this * array contains the values of JY to run the code with. * * INCYVAL (global output) INTEGER array * On entry, INCYVAL is an array of dimension LDVAL. On exit, * this array contains the values of INCY to run the code with. * * LDVAL (global input) INTEGER * On entry, LDVAL specifies the maximum number of different va- * lues that can be used for DESCX(:), IX, JX, INCX, DESCY(:), * IY, JY and INCY. This is also the maximum number of test * cases. * * NGRIDS (global output) INTEGER * On exit, NGRIDS specifies the number of different values that * can be used for P and Q. * * PVAL (global output) INTEGER array * On entry, PVAL is an array of dimension LDPVAL. On exit, this * array contains the values of P to run the code with. * * LDPVAL (global input) INTEGER * On entry, LDPVAL specifies the maximum number of different * values that can be used for P. * * QVAL (global output) INTEGER array * On entry, QVAL is an array of dimension LDQVAL. On exit, this * array contains the values of Q to run the code with. * * LDQVAL (global input) INTEGER * On entry, LDQVAL specifies the maximum number of different * values that can be used for Q. * * LTEST (global output) LOGICAL array * On entry, LTEST is an array of dimension at least ten. On * exit, if LTEST( i ) is .TRUE., the i-th Level 1 PBLAS routine * will be tested. See the input file for the ordering of the * routines. * * SOF (global output) LOGICAL * On exit, if SOF is .TRUE., the tester will stop on the first * detected failure. Otherwise, it won't. * * TEE (global output) LOGICAL * On exit, if TEE is .TRUE., the tester will perform the error * exit tests. These tests won't be performed otherwise. * * IAM (local input) INTEGER * On entry, IAM specifies the number of the process executing * this routine. * * IGAP (global output) INTEGER * On exit, IGAP specifies the user-specified gap used for pad- * ding. IGAP must be at least zero. * * IVERB (global output) INTEGER * On exit, IVERB specifies the output verbosity level: 0 for * pass/fail, 1, 2 or 3 for matrix dump on errors. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes. * * ALPHA (global output) COMPLEX*16 * On exit, ALPHA specifies the value of alpha to be used in all * the test cases. * * WORK (local workspace) INTEGER array * On entry, WORK is an array of dimension at least * MAX( 2, 2*NGRIDS+23*NMAT+NSUBS+4 ) with NSUBS equal to 10. * This array is used to pack all output arrays in order to send * the information in one message. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NIN, NSUBS PARAMETER ( NIN = 11, NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL LTESTT INTEGER I, ICTXT, J DOUBLE PRECISION EPS * .. * .. Local Arrays .. CHARACTER*7 SNAMET CHARACTER*79 USRINFO * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D, $ IGEBS2D, SGEBR2D, SGEBS2D, ZGEBR2D, ZGEBS2D * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Common Blocks .. CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='PZBLAS1TST.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Read the flag that indicates if Stop on Failure * READ( NIN, FMT = * ) SOF * * Read the flag that indicates if Test Error Exits * READ( NIN, FMT = * ) TEE * * Read the verbosity level * READ( NIN, FMT = * ) IVERB IF( IVERB.LT.0 .OR. IVERB.GT.3 ) $ IVERB = 0 * * Read the leading dimension gap * READ( NIN, FMT = * ) IGAP IF( IGAP.LT.0 ) $ IGAP = 0 * * Get number of grids * READ( NIN, FMT = * ) NGRIDS IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDPVAL GO TO 100 ELSE IF( NGRIDS.GT.LDQVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Grids', LDQVAL GO TO 100 END IF * * Get values of P and Q * READ( NIN, FMT = * ) ( PVAL( I ), I = 1, NGRIDS ) READ( NIN, FMT = * ) ( QVAL( I ), I = 1, NGRIDS ) * * Read ALPHA * READ( NIN, FMT = * ) ALPHA * * Read number of tests. * READ( NIN, FMT = * ) NMAT IF( NMAT.LT.1 .OR. NMAT.GT.LDVAL ) THEN WRITE( NOUT, FMT = 9998 ) 'Tests', LDVAL GO TO 100 END IF * * Read in input data into arrays. * READ( NIN, FMT = * ) ( NVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCXVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IMBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( MBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( NBYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( RSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( CSCYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( IYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( JYVAL( I ), I = 1, NMAT ) READ( NIN, FMT = * ) ( INCYVAL( I ), I = 1, NMAT ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 10 I = 1, NSUBS LTEST( I ) = .FALSE. 10 CONTINUE 20 CONTINUE READ( NIN, FMT = 9996, END = 50 ) SNAMET, LTESTT DO 30 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 40 30 CONTINUE * WRITE( NOUT, FMT = 9995 )SNAMET GO TO 100 * 40 CONTINUE LTEST( I ) = LTESTT GO TO 20 * 50 CONTINUE * * Close input file * CLOSE ( NIN ) * * For pvm only: if virtual machine not set up, allocate it and * spawn the correct number of processes. * IF( NPROCS.LT.1 ) THEN NPROCS = 0 DO 60 I = 1, NGRIDS NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) ) 60 CONTINUE CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * * Pack information arrays and broadcast * CALL ZGEBS2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1 ) * WORK( 1 ) = NGRIDS WORK( 2 ) = NMAT CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, WORK, 2 ) * I = 1 IF( SOF ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 IF( TEE ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 WORK( I ) = IVERB I = I + 1 WORK( I ) = IGAP I = I + 1 CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 ) I = I + NGRIDS CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCXVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IMBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, MBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, NBYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, RSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, CSCYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, IYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, JYVAL, 1, WORK( I ), 1 ) I = I + NMAT CALL ICOPY( NMAT, INCYVAL, 1, WORK( I ), 1 ) I = I + NMAT * DO 70 J = 1, NSUBS IF( LTEST( J ) ) THEN WORK( I ) = 1 ELSE WORK( I ) = 0 END IF I = I + 1 70 CONTINUE I = I - 1 CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) 'Level 1 PBLAS testing program.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'Tests of the complex double precision '// $ 'Level 1 PBLAS' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9993 ) NMAT WRITE( NOUT, FMT = 9992 ) NGRIDS WRITE( NOUT, FMT = 9990 ) $ 'P', ( PVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( PVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9990 ) $ 'Q', ( QVAL(I), I = 1, MIN(NGRIDS, 5) ) IF( NGRIDS.GT.5 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 6, $ MIN( 10, NGRIDS ) ) IF( NGRIDS.GT.10 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 11, $ MIN( 15, NGRIDS ) ) IF( NGRIDS.GT.15 ) $ WRITE( NOUT, FMT = 9991 ) ( QVAL(I), I = 16, NGRIDS ) WRITE( NOUT, FMT = 9988 ) SOF WRITE( NOUT, FMT = 9987 ) TEE WRITE( NOUT, FMT = 9983 ) IGAP WRITE( NOUT, FMT = 9986 ) IVERB WRITE( NOUT, FMT = 9982 ) ALPHA IF( LTEST( 1 ) ) THEN WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9985 ) SNAMES( 1 ), ' ... No ' END IF DO 80 I = 2, NSUBS IF( LTEST( I ) ) THEN WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... Yes' ELSE WRITE( NOUT, FMT = 9984 ) SNAMES( I ), ' ... No ' END IF 80 CONTINUE WRITE( NOUT, FMT = 9994 ) EPS WRITE( NOUT, FMT = * ) * ELSE * * If in pvm, must participate setting up virtual machine * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Compute machine epsilon * EPS = PDLAMCH( ICTXT, 'eps' ) * CALL ZGEBR2D( ICTXT, 'All', ' ', 1, 1, ALPHA, 1, 0, 0 ) * CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, WORK, 2, 0, 0 ) NGRIDS = WORK( 1 ) NMAT = WORK( 2 ) * I = 2*NGRIDS + 23*NMAT + NSUBS + 4 CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 ) * I = 1 IF( WORK( I ).EQ.1 ) THEN SOF = .TRUE. ELSE SOF = .FALSE. END IF I = I + 1 IF( WORK( I ).EQ.1 ) THEN TEE = .TRUE. ELSE TEE = .FALSE. END IF I = I + 1 IVERB = WORK( I ) I = I + 1 IGAP = WORK( I ) I = I + 1 CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 ) I = I + NGRIDS CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 ) I = I + NGRIDS CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCXVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IMBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, MBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, NBYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, RSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, CSCYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, IYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, JYVAL, 1 ) I = I + NMAT CALL ICOPY( NMAT, WORK( I ), 1, INCYVAL, 1 ) I = I + NMAT * DO 90 J = 1, NSUBS IF( WORK( I ).EQ.1 ) THEN LTEST( J ) = .TRUE. ELSE LTEST( J ) = .FALSE. END IF I = I + 1 90 CONTINUE * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 100 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( ' Number of values of ',5A, ' is less than 1 or greater ', $ 'than ', I2 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) 9996 FORMAT( A7, L2 ) 9995 FORMAT( ' Subprogram name ', A7, ' not recognized', $ /' ******* TESTS ABANDONED *******' ) 9994 FORMAT( 2X, 'Relative machine precision (eps) is taken to be ', $ E18.6 ) 9993 FORMAT( 2X, 'Number of Tests : ', I6 ) 9992 FORMAT( 2X, 'Number of process grids : ', I6 ) 9991 FORMAT( 2X, ' : ', 5I6 ) 9990 FORMAT( 2X, A1, ' : ', 5I6 ) 9988 FORMAT( 2X, 'Stop on failure flag : ', L6 ) 9987 FORMAT( 2X, 'Test for error exits flag : ', L6 ) 9986 FORMAT( 2X, 'Verbosity level : ', I6 ) 9985 FORMAT( 2X, 'Routines to be tested : ', A, A8 ) 9984 FORMAT( 2X, ' ', A, A8 ) 9983 FORMAT( 2X, 'Leading dimension gap : ', I6 ) 9982 FORMAT( 2X, 'Alpha : (', G16.6, $ ',', G16.6, ')' ) * * End of PZBLA1TSTINFO * END SUBROUTINE PZBLAS1TSTCHKE( LTEST, INOUT, NPROCS ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INOUT, NPROCS * .. * .. Array Arguments .. LOGICAL LTEST( * ) * .. * * Purpose * ======= * * PZBLAS1TSTCHKE tests the error exits of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * LTEST (global input) LOGICAL array * On entry, LTEST is an array of dimension at least 10 (NSUBS). * If LTEST( 1 ) is .TRUE., PZSWAP will be tested; * If LTEST( 2 ) is .TRUE., PZSCAL will be tested; * If LTEST( 3 ) is .TRUE., PZDSCAL will be tested; * If LTEST( 4 ) is .TRUE., PZCOPY will be tested; * If LTEST( 5 ) is .TRUE., PZAXPY will be tested; * If LTEST( 6 ) is .TRUE., PZDOTU will be tested; * If LTEST( 7 ) is .TRUE., PZDOTC will be tested; * If LTEST( 8 ) is .TRUE., PDZNRM2 will be tested; * If LTEST( 9 ) is .TRUE., PDZASUM will be tested; * If LTEST( 10 ) is .TRUE., PZAMAX will be tested. * * INOUT (global input) INTEGER * On entry, INOUT specifies the unit number for output file. * When INOUT is 6, output to screen, when INOUT = 0, output to * stderr. INOUT is only defined in process 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of processes cal- * ling this routine. * * Calling sequence encodings * ========================== * * code Formal argument list Examples * * 11 (n, v1,v2) _SWAP, _COPY * 12 (n,s1, v1 ) _SCAL, _SCAL * 13 (n,s1, v1,v2) _AXPY, _DOT_ * 14 (n,s1,i1,v1 ) _AMAX * 15 (n,u1, v1 ) _ASUM, _NRM2 * * 21 ( trans, m,n,s1,m1,v1,s2,v2) _GEMV * 22 (uplo, n,s1,m1,v1,s2,v2) _SYMV, _HEMV * 23 (uplo,trans,diag, n, m1,v1 ) _TRMV, _TRSV * 24 ( m,n,s1,v1,v2,m1) _GER_ * 25 (uplo, n,s1,v1, m1) _SYR * 26 (uplo, n,u1,v1, m1) _HER * 27 (uplo, n,s1,v1,v2,m1) _SYR2, _HER2 * * 31 ( transa,transb, m,n,k,s1,m1,m2,s2,m3) _GEMM * 32 (side,uplo, m,n, s1,m1,m2,s2,m3) _SYMM, _HEMM * 33 ( uplo,trans, n,k,s1,m1, s2,m3) _SYRK * 34 ( uplo,trans, n,k,u1,m1, u2,m3) _HERK * 35 ( uplo,trans, n,k,s1,m1,m2,s2,m3) _SYR2K * 36 ( uplo,trans, n,k,s1,m1,m2,u2,m3) _HER2K * 37 ( m,n, s1,m1, s2,m3) _TRAN_ * 38 (side,uplo,transa, diag,m,n, s1,m1,m2 ) _TRMM, _TRSM * 39 ( trans, m,n, s1,m1, s2,m3) _GEADD * 40 ( uplo,trans, m,n, s1,m1, s2,m3) _TRADD * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER NSUBS PARAMETER ( NSUBS = 10 ) * .. * .. Local Scalars .. LOGICAL ABRTSAV INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER SCODE( NSUBS ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, PDZASUM, PDZNRM2, PZAMAX, $ PZAXPY, PZCOPY, PZDIMEE, PZDOTC, PZDOTU, $ PZDSCAL, PZSCAL, PZSWAP, PZVECEE * .. * .. Common Blocks .. LOGICAL ABRTFLG INTEGER NOUT CHARACTER*7 SNAMES( NSUBS ) COMMON /SNAMEC/SNAMES COMMON /PBERRORC/NOUT, ABRTFLG * .. * .. Data Statements .. DATA SCODE/11, 12, 12, 11, 13, 13, 13, 15, 15, 14/ * .. * .. Executable Statements .. * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes. * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Set ABRTFLG to FALSE so that the PBLAS error handler won't abort * on errors during these tests and set the output device unit for * it. * ABRTSAV = ABRTFLG ABRTFLG = .FALSE. NOUT = INOUT * * Test PZSWAP * I = 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZSWAP, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZSWAP, SCODE( I ), SNAMES( I ) ) END IF * * Test PZSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZSCAL, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PZDSCAL * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZDSCAL, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZDSCAL, SCODE( I ), SNAMES( I ) ) END IF * * Test PZCOPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZCOPY, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZCOPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PZAXPY * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZAXPY, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZAXPY, SCODE( I ), SNAMES( I ) ) END IF * * Test PZDOTU * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZDOTU, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZDOTU, SCODE( I ), SNAMES( I ) ) END IF * * Test PZDOTC * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZDOTC, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZDOTC, SCODE( I ), SNAMES( I ) ) END IF * * PDZNRM2 * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PDZNRM2, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PDZNRM2, SCODE( I ), SNAMES( I ) ) END IF * * Test PDZASUM * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PDZASUM, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PDZASUM, SCODE( I ), SNAMES( I ) ) END IF * * Test PZAMAX * I = I + 1 IF( LTEST( I ) ) THEN CALL PZDIMEE( ICTXT, NOUT, PZAMAX, SCODE( I ), SNAMES( I ) ) CALL PZVECEE( ICTXT, NOUT, PZAMAX, SCODE( I ), SNAMES( I ) ) END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) * CALL BLACS_GRIDEXIT( ICTXT ) * * Reset ABRTFLG to the value it had before calling this routine * ABRTFLG = ABRTSAV * 9999 FORMAT( 2X, 'Error-exit tests completed.' ) * RETURN * * End of PZBLAS1TSTCHKE * END SUBROUTINE PZCHKARG1( ICTXT, NOUT, SNAME, N, ALPHA, IX, JX, $ DESCX, INCX, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT COMPLEX*16 ALPHA * .. * .. Array Arguments .. CHARACTER*(*) SNAME INTEGER DESCX( * ), DESCY( * ) * .. * * Purpose * ======= * * PZCHKARG1 checks the input-only arguments of the Level 1 PBLAS. When * INFO = 0, this routine makes a copy of its arguments (which are INPUT * only arguments to PBLAS routines). Otherwise, it verifies the values * of these arguments against the saved copies. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * SNAME (global input) CHARACTER*(*) * On entry, SNAME specifies the subroutine name calling this * subprogram. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global input/global output) INTEGER * When INFO = 0 on entry, the values of the arguments which are * INPUT only arguments to a PBLAS routine are copied into sta- * tic variables and INFO is unchanged on exit. Otherwise, the * values of the arguments are compared against the saved co- * pies. In case no error has been found INFO is zero on return, * otherwise it is non zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. INTEGER I, INCXREF, INCYREF, IXREF, IYREF, JXREF, $ JYREF, MYCOL, MYROW, NPCOL, NPROW, NREF COMPLEX*16 ALPHAREF * .. * .. Local Arrays .. CHARACTER*15 ARGNAME INTEGER DESCXREF( DLEN_ ), DESCYREF( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D * .. * .. Save Statements .. SAVE * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check if first call. If yes, then save. * IF( INFO.EQ.0 ) THEN * NREF = N IXREF = IX JXREF = JX DO 10 I = 1, DLEN_ DESCXREF( I ) = DESCX( I ) 10 CONTINUE INCXREF = INCX IYREF = IY JYREF = JY DO 20 I = 1, DLEN_ DESCYREF( I ) = DESCY( I ) 20 CONTINUE INCYREF = INCY ALPHAREF = ALPHA * ELSE * * Test saved args. Return with first mismatch. * ARGNAME = ' ' IF( N.NE.NREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'N' ELSE IF( IX.NE.IXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IX' ELSE IF( JX.NE.JXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JX' ELSE IF( DESCX( DTYPE_ ).NE.DESCXREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( DTYPE_ )' ELSE IF( DESCX( M_ ).NE.DESCXREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( M_ )' ELSE IF( DESCX( N_ ).NE.DESCXREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( N_ )' ELSE IF( DESCX( IMB_ ).NE.DESCXREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( IMB_ )' ELSE IF( DESCX( INB_ ).NE.DESCXREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( INB_ )' ELSE IF( DESCX( MB_ ).NE.DESCXREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( MB_ )' ELSE IF( DESCX( NB_ ).NE.DESCXREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( NB_ )' ELSE IF( DESCX( RSRC_ ).NE.DESCXREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( RSRC_ )' ELSE IF( DESCX( CSRC_ ).NE.DESCXREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CSRC_ )' ELSE IF( DESCX( CTXT_ ).NE.DESCXREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( CTXT_ )' ELSE IF( DESCX( LLD_ ).NE.DESCXREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCX( LLD_ )' ELSE IF( INCX.NE.INCXREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCX' ELSE IF( IY.NE.IYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'IY' ELSE IF( JY.NE.JYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'JY' ELSE IF( DESCY( DTYPE_ ).NE.DESCYREF( DTYPE_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( DTYPE_ )' ELSE IF( DESCY( M_ ).NE.DESCYREF( M_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( M_ )' ELSE IF( DESCY( N_ ).NE.DESCYREF( N_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( N_ )' ELSE IF( DESCY( IMB_ ).NE.DESCYREF( IMB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( IMB_ )' ELSE IF( DESCY( INB_ ).NE.DESCYREF( INB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( INB_ )' ELSE IF( DESCY( MB_ ).NE.DESCYREF( MB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( MB_ )' ELSE IF( DESCY( NB_ ).NE.DESCYREF( NB_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( NB_ )' ELSE IF( DESCY( RSRC_ ).NE.DESCYREF( RSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( RSRC_ )' ELSE IF( DESCY( CSRC_ ).NE.DESCYREF( CSRC_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CSRC_ )' ELSE IF( DESCY( CTXT_ ).NE.DESCYREF( CTXT_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( CTXT_ )' ELSE IF( DESCY( LLD_ ).NE.DESCYREF( LLD_ ) ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'DESCY( LLD_ )' ELSE IF( INCY.NE.INCYREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'INCY' ELSE IF( ALPHA.NE.ALPHAREF ) THEN WRITE( ARGNAME, FMT = '(A)' ) 'ALPHA' ELSE INFO = 0 END IF * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN * IF( INFO.GT.0 ) THEN WRITE( NOUT, FMT = 9999 ) ARGNAME, SNAME ELSE WRITE( NOUT, FMT = 9998 ) SNAME END IF * END IF * END IF * 9999 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' FAILED changed ', A, ' *****' ) 9998 FORMAT( 2X, ' ***** Input-only parameter check: ', A, $ ' PASSED *****' ) * RETURN * * End of PZCHKARG1 * END LOGICAL FUNCTION PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) * .. * * Purpose * ======= * * PISINSCOPE returns .TRUE. if the calling process is in the scope of * sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) and .FALSE. if it is * not. This routine is used to determine which processes should check * the answer returned by some Level 1 PBLAS routines. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * N (global input) INTEGER * The length of the subvector sub( X ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, ROWREP INTEGER IIX, IXCOL, IXROW, JJX, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PB_INFOG2L * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN * * This is the special case, find process owner of IX, JX, and * only this process is the scope. * PISINSCOPE = ( ( IXROW.EQ.MYROW .OR. ROWREP ) .AND. $ ( IXCOL.EQ.MYCOL .OR. COLREP ) ) * ELSE * IF( INCX.EQ.DESCX( M_ ) ) THEN * * row vector * PISINSCOPE = ( MYROW.EQ.IXROW .OR. ROWREP ) * ELSE * * column vector * PISINSCOPE = ( MYCOL.EQ.IXCOL .OR. COLREP ) * END IF * END IF * RETURN * * End of PISINSCOPE * END SUBROUTINE PZBLAS1TSTCHK( ICTXT, NOUT, NROUT, N, PSCLR, PUSCLR, $ PISCLR, X, PX, IX, JX, DESCX, INCX, Y, $ PY, IY, JY, DESCY, INCY, INFO ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INCX, INCY, INFO, IX, IY, JX, JY, N, $ NOUT, NROUT, PISCLR DOUBLE PRECISION PUSCLR COMPLEX*16 PSCLR * .. * .. Array Arguments .. INTEGER DESCX( * ), DESCY( * ) COMPLEX*16 PX( * ), PY( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PZBLAS1TSTCHK performs the computational tests of the Level 1 PBLAS. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA( DTYPE_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA( IMB_ ) The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA( INB_ ) The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA( MB_ ) The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA( NB_ ) The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_NUMROC: * Lr( IA, K ) = PB_NUMROC( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_NUMROC( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * NOUT (global input) INTEGER * On entry, NOUT specifies the unit number for the output file. * When NOUT is 6, output to screen, when NOUT is 0, output to * stderr. NOUT is only defined for process 0. * * NROUT (global input) INTEGER * On entry, NROUT specifies which routine will be tested as * follows: * If NROUT = 1, PZSWAP will be tested; * else if NROUT = 2, PZSCAL will be tested; * else if NROUT = 3, PZDSCAL will be tested; * else if NROUT = 4, PZCOPY will be tested; * else if NROUT = 5, PZAXPY will be tested; * else if NROUT = 6, PZDOTU will be tested; * else if NROUT = 7, PZDOTC will be tested; * else if NROUT = 8, PDZNRM2 will be tested; * else if NROUT = 9, PDZASUM will be tested; * else if NROUT = 10, PZAMAX will be tested. * * N (global input) INTEGER * On entry, N specifies the length of the subvector operands. * * PSCLR (global input) COMPLEX*16 * On entry, depending on the value of NROUT, PSCLR specifies * the scalar ALPHA, or the output scalar returned by the PBLAS, * i.e., the dot product, the 2-norm, the absolute sum or the * value of AMAX. * * PUSCLR (global input) DOUBLE PRECISION * On entry, PUSCLR specifies the real part of the scalar ALPHA * used by the real scaling, the 2-norm, or the absolute sum * routines. PUSCLR is not used in the real versions of this * routine. * * PISCLR (global input) DOUBLE PRECISION * On entry, PISCLR specifies the value of the global index re- * turned by PZAMAX, otherwise PISCLR is not used. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (DESCX( M_ ),*). This * array contains a local copy of the initial entire matrix PX. * * PX (local input) COMPLEX*16 array * On entry, PX is an array of dimension (DESCX( LLD_ ),*). This * array contains the local entries of the matrix PX. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (DESCY( M_ ),*). This * array contains a local copy of the initial entire matrix PY. * * PY (local input) COMPLEX*16 array * On entry, PY is an array of dimension (DESCY( LLD_ ),*). This * array contains the local entries of the matrix PY. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * INFO (global output) INTEGER * On exit, if INFO = 0, no error has been found, otherwise * if( MOD( INFO, 2 ) = 1 ) then an error on X has been found, * if( MOD( INFO/2, 2 ) = 1 ) then an error on Y has been found. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RZERO COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ RZERO = 0.0D+0 ) INTEGER BLOCK_CYCLIC_2D_INB, CSRC_, CTXT_, DLEN_, $ DTYPE_, IMB_, INB_, LLD_, MB_, M_, NB_, N_, $ RSRC_ PARAMETER ( BLOCK_CYCLIC_2D_INB = 2, DLEN_ = 11, $ DTYPE_ = 1, CTXT_ = 2, M_ = 3, N_ = 4, $ IMB_ = 5, INB_ = 6, MB_ = 7, NB_ = 8, $ RSRC_ = 9, CSRC_ = 10, LLD_ = 11 ) * .. * .. Local Scalars .. LOGICAL COLREP, INXSCOPE, INYSCOPE, ROWREP INTEGER I, IB, ICURCOL, ICURROW, IDUMM, IIX, IIY, IN, $ IOFFX, IOFFY, ISCLR, IXCOL, IXROW, IYCOL, $ IYROW, J, JB, JJX, JJY, JN, KK, LDX, LDY, $ MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ERR, ERRMAX, PREC, USCLR COMPLEX*16 SCLR * .. * .. Local Arrays .. INTEGER IERR( 6 ) CHARACTER*5 ARGIN1, ARGIN2, ARGOUT1, ARGOUT2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, PB_INFOG2L, PZCHKVIN, $ PZDERRSCAL, PZERRASUM, PZERRAXPY, PZERRDOTC, $ PZERRDOTU, PZERRNRM2, PZERRSCAL, ZCOPY, ZSWAP * .. * .. External Functions .. LOGICAL PISINSCOPE INTEGER IZAMAX DOUBLE PRECISION PDLAMCH EXTERNAL IZAMAX, PDLAMCH, PISINSCOPE * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ARGIN1 = ' ' ARGIN2 = ' ' ARGOUT1 = ' ' ARGOUT2 = ' ' DO 10 I = 1, 6 IERR( I ) = 0 10 CONTINUE * PREC = PDLAMCH( ICTXT, 'precision' ) * IF( NROUT.EQ.1 ) THEN * * Test PZSWAP * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL ZSWAP( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PZCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.2 ) THEN * * Test PZSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 20 J = JX, JN * CALL PZERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 20 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 40 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 30 KK = 0, JB-1 * CALL PZERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 30 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 40 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 50 I = IX, IN * CALL PZERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 50 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 70 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 60 KK = 0, IB-1 * CALL PZERRSCAL( ERR, PSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 60 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 70 CONTINUE * END IF * ELSE IF( NROUT.EQ.3 ) THEN * * Test PZDSCAL * LDX = DESCX( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PB_INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) ICURROW = IXROW ICURCOL = IXCOL ROWREP = ( IXROW.EQ.-1 ) COLREP = ( IXCOL.EQ.-1 ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is a row vector * JB = DESCX( INB_ ) - JX + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCX( NB_ ) + 1 ) * DESCX( NB_ ) + JB JB = MIN( JB, N ) JN = JX + JB - 1 * DO 80 J = JX, JN * CALL PZDERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 80 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 100 J = JN+1, JX+N-1, DESCX( NB_ ) JB = MIN( JX+N-J, DESCX( NB_ ) ) * DO 90 KK = 0, JB-1 * CALL PZDERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 JJX = JJX + 1 END IF * IOFFX = IOFFX + INCX * 90 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 100 CONTINUE * ELSE * * sub( X ) is a column vector * IB = DESCX( IMB_ ) - IX + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCX( MB_ ) + 1 ) * DESCX( MB_ ) + IB IB = MIN( IB, N ) IN = IX + IB - 1 * DO 110 I = IX, IN * CALL PZDERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX * 110 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 130 I = IN+1, IX+N-1, DESCX( MB_ ) IB = MIN( IX+N-I, DESCX( MB_ ) ) * DO 120 KK = 0, IB-1 * CALL PZDERRSCAL( ERR, PUSCLR, X( IOFFX ), PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PX( IIX+(JJX-1)*LDX ) - X( IOFFX ) ).GT. $ ERR ) $ IERR( 1 ) = 1 IIX = IIX + 1 END IF * IOFFX = IOFFX + INCX 120 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 130 CONTINUE * END IF * ELSE IF( NROUT.EQ.4 ) THEN * * Test PZCOPY * IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL ZCOPY( N, X( IOFFX ), INCX, Y( IOFFY ), INCY ) CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PZCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) * ELSE IF( NROUT.EQ.5 ) THEN * * Test PZAXPY * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) LDY = DESCY( LLD_ ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PB_INFOG2L( IY, JY, DESCY, NPROW, NPCOL, MYROW, MYCOL, $ IIY, JJY, IYROW, IYCOL ) ICURROW = IYROW ICURCOL = IYCOL ROWREP = ( IYROW.EQ.-1 ) COLREP = ( IYCOL.EQ.-1 ) * IF( INCY.EQ.DESCY( M_ ) ) THEN * * sub( Y ) is a row vector * JB = DESCY( INB_ ) - JY + 1 IF( JB.LE.0 ) $ JB = ( (-JB ) / DESCY( NB_ ) + 1 ) * DESCY( NB_ ) + JB JB = MIN( JB, N ) JN = JY + JB - 1 * DO 140 J = JY, JN * CALL PZERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 140 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 160 J = JN+1, JY+N-1, DESCY( NB_ ) JB = MIN( JY+N-J, DESCY( NB_ ) ) * DO 150 KK = 0, JB-1 * CALL PZERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF JJY = JJY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 150 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 160 CONTINUE * ELSE * * sub( Y ) is a column vector * IB = DESCY( IMB_ ) - IY + 1 IF( IB.LE.0 ) $ IB = ( (-IB ) / DESCY( MB_ ) + 1 ) * DESCY( MB_ ) + IB IB = MIN( IB, N ) IN = IY + IB - 1 * DO 170 I = IY, IN * CALL PZERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 170 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * DO 190 I = IN+1, IY+N-1, DESCY( MB_ ) IB = MIN( IY+N-I, DESCY( MB_ ) ) * DO 180 KK = 0, IB-1 * CALL PZERRAXPY( ERR, PSCLR, X( IOFFX ), Y( IOFFY ), $ PREC ) * IF( ( MYROW.EQ.ICURROW .OR. ROWREP ) .AND. $ ( MYCOL.EQ.ICURCOL .OR. COLREP ) ) THEN IF( ABS( PY( IIY+(JJY-1)*LDY ) - Y( IOFFY ) ).GT. $ ERR ) THEN IERR( 2 ) = 1 END IF IIY = IIY + 1 END IF * IOFFX = IOFFX + INCX IOFFY = IOFFY + INCY * 180 CONTINUE * ICURROW = MOD( ICURROW+1, NPROW ) * 190 CONTINUE * END IF * ELSE IF( NROUT.EQ.6 ) THEN * * Test PZDOTU * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PZCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PZERRDOTU( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOTU' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOTU' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.7 ) THEN * * Test PZDOTC * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) CALL PZCHKVIN( ERRMAX, N, Y, PY, IY, JY, DESCY, INCY, $ IERR( 2 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IOFFY = IY + ( JY - 1 ) * DESCY( M_ ) CALL PZERRDOTC( ERR, N, SCLR, X( IOFFX ), INCX, Y( IOFFY ), $ INCY, PREC ) INXSCOPE = PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) INYSCOPE = PISINSCOPE( ICTXT, N, IY, JY, DESCY, INCY ) IF( INXSCOPE.OR.INYSCOPE ) THEN IF( ABS( PSCLR - SCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'DOTC' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF ELSE SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'DOTC' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.8 ) THEN * * Test PDZNRM2 * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PZERRNRM2( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ).GT.ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF ELSE USCLR = RZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'NRM2' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.9 ) THEN * * Test PDZASUM * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) CALL PZERRASUM( ERR, N, USCLR, X( IOFFX ), INCX, PREC ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN IF( ABS( PUSCLR - USCLR ) .GT. ERR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF ELSE USCLR = RZERO IF( PUSCLR.NE.USCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'ASUM' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9994 ) USCLR, PUSCLR END IF END IF END IF * ELSE IF( NROUT.EQ.10 ) THEN * * Test PZAMAX * CALL PZCHKVIN( ERRMAX, N, X, PX, IX, JX, DESCX, INCX, $ IERR( 1 ) ) IOFFX = IX + ( JX - 1 ) * DESCX( M_ ) IF( PISINSCOPE( ICTXT, N, IX, JX, DESCX, INCX ) ) THEN ISCLR = IZAMAX( N, X( IOFFX ), INCX ) IF( N.LT.1 ) THEN SCLR = ZERO ELSE IF( ( INCX.EQ.1 ).AND.( DESCX( M_ ).EQ.1 ).AND. $ ( N.EQ.1 ) ) THEN ISCLR = JX SCLR = X( IOFFX ) ELSE IF( INCX.EQ.DESCX( M_ ) ) THEN ISCLR = JX + ISCLR - 1 SCLR = X( IX + ( ISCLR - 1 ) * DESCX( M_ ) ) ELSE ISCLR = IX + ISCLR - 1 SCLR = X( ISCLR + ( JX - 1 ) * DESCX( M_ ) ) END IF * IF( PSCLR.NE.SCLR ) THEN IERR( 3 ) = 1 WRITE( ARGIN1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF * IF( PISCLR.NE.ISCLR ) THEN IERR( 5 ) = 1 WRITE( ARGIN2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9998 ) ARGIN2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF ELSE ISCLR = 0 SCLR = ZERO IF( PSCLR.NE.SCLR ) THEN IERR( 4 ) = 1 WRITE( ARGOUT1, FMT = '(A)' ) 'AMAX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT1 WRITE( NOUT, FMT = 9996 ) SCLR, PSCLR END IF END IF IF( PISCLR.NE.ISCLR ) THEN IERR( 6 ) = 1 WRITE( ARGOUT2, FMT = '(A)' ) 'INDX' IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = 9997 ) ARGOUT2 WRITE( NOUT, FMT = 9995 ) ISCLR, PISCLR END IF END IF END IF * END IF * * Find IERR across all processes * CALL IGAMX2D( ICTXT, 'All', ' ', 6, 1, IERR, 6, IDUMM, IDUMM, -1, $ -1, 0 ) * * Encode the errors found in INFO * IF( IERR( 1 ).NE.0 ) THEN INFO = INFO + 1 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'X' END IF * IF( IERR( 2 ).NE.0 ) THEN INFO = INFO + 2 IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'Y' END IF * IF( IERR( 3 ).NE.0 ) $ INFO = INFO + 4 * IF( IERR( 4 ).NE.0 ) $ INFO = INFO + 8 * IF( IERR( 5 ).NE.0 ) $ INFO = INFO + 16 * IF( IERR( 6 ).NE.0 ) $ INFO = INFO + 32 * 9999 FORMAT( 2X, ' ***** ERROR: Vector operand ', A, $ ' is incorrect.' ) 9998 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' in scope is incorrect.' ) 9997 FORMAT( 2X, ' ***** ERROR: Output scalar result ', A, $ ' out of scope is incorrect.' ) 9996 FORMAT( 2X, ' ***** Expected value is: ', D30.18, '+i*(', $ D30.18, '),', /2X, ' Obtained value is: ', $ D30.18, '+i*(', D30.18, ')' ) 9995 FORMAT( 2X, ' ***** Expected value is: ', I6, /2X, $ ' Obtained value is: ', I6 ) 9994 FORMAT( 2X, ' ***** Expected value is: ', D30.18, /2X, $ ' Obtained value is: ', D30.18 ) * RETURN * * End of PZBLAS1TSTCHK * END SUBROUTINE PZERRDOTU( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N DOUBLE PRECISION ERRBND, PREC COMPLEX*16 SCLR * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * PZERRDOTU serially computes the dot product X**T * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) COMPLEX*16 * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) COMPLEX*16 array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) COMPLEX*16 array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY DOUBLE PRECISION ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N * SCLR = SCLR + X( IX ) * Y( IY ) * TMP = DBLE( X( IX ) ) * DBLE( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = - DIMAG( X( IX ) ) * DIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = DIMAG( X( IX ) ) * DBLE( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * TMP = DBLE( X( IX ) ) * DIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * IX = IX + INCX IY = IY + INCY * 10 CONTINUE * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PZERRDOTU * END SUBROUTINE PZERRDOTC( ERRBND, N, SCLR, X, INCX, Y, INCY, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N DOUBLE PRECISION ERRBND, PREC COMPLEX*16 SCLR * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * PZERRDOTC serially computes the dot product X**H * Y and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If dot1 = SCLR and dot2 are two different computed results, and dot1 * is being assumed to be correct, we require * * abs( dot1 - dot2 ) <= ERRBND = ERRFACT * abs( dot1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operands. * * SCLR (global output) COMPLEX*16 * On exit, SCLR specifies the dot product of the two vectors * X and Y. * * X (global input) COMPLEX*16 array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (global input) COMPLEX*16 array * On entry, Y is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremen- * ted array Y must contain the vector y. * * INCY (global input) INTEGER. * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IX, IY DOUBLE PRECISION ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS, TMP * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX * .. * .. Executable Statements .. * IX = 1 IY = 1 SCLR = ZERO SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = TWO * ( ONE + PREC ) ADDBND = TWO * TWO * TWO * PREC * DO 10 I = 1, N * SCLR = SCLR + DCONJG( X( IX ) ) * Y( IY ) * TMP = DBLE( X( IX ) ) * DBLE( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = DIMAG( X( IX ) ) * DIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMRPOS = SUMRPOS + TMP * FACT ELSE SUMRNEG = SUMRNEG - TMP * FACT END IF * TMP = - DIMAG( X( IX ) ) * DBLE( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * TMP = DBLE( X( IX ) ) * DIMAG( Y ( IY ) ) IF( TMP.GE.ZERO ) THEN SUMIPOS = SUMIPOS + TMP * FACT ELSE SUMINEG = SUMINEG - TMP * FACT END IF * IX = IX + INCX IY = IY + INCY * 10 CONTINUE * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PZERRDOTC * END SUBROUTINE PZERRNRM2( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ERRBND, PREC, USCLR * .. * .. Array Arguments .. COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZERRNRM2 serially computes the 2-norm the vector X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If norm1 = SCLR and norm2 are two different computed results, and * norm1 being assumed to be correct, we require * * abs( norm1 - norm2 ) <= ERRBND = ERRFACT * abs( norm1 ), * * where ERRFACT is computed as the maximum of the positive and negative * partial sums multiplied by a constant proportional to the machine * precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) DOUBLE PRECISION * On exit, USCLR specifies the 2-norm of the vector X. * * X (global input) COMPLEX*16 array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ABSXI, ADDBND, FACT, SCALE, SSQ, SUMSCA, SUMSSQ * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG * .. * .. Executable Statements .. * USCLR = ZERO SUMSSQ = ONE SUMSCA = ZERO ADDBND = TWO * TWO * TWO * PREC FACT = ONE + TWO * ( ( ONE + PREC )**3 - ONE ) * SCALE = ZERO SSQ = ONE DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX IF( DBLE( X( IX ) ).NE.ZERO ) THEN ABSXI = ABS( DBLE( X( IX ) ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF IF( DIMAG( X( IX ) ).NE.ZERO ) THEN ABSXI = ABS( DIMAG( X( IX ) ) ) IF( SCALE.LT.ABSXI )THEN SUMSSQ = ONE + ( SSQ*( SCALE/ABSXI )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = ONE + SSQ*( SCALE/ABSXI )**2 SUMSCA = ABSXI SCALE = ABSXI ELSE SUMSSQ = SSQ + ( ( ABSXI/SCALE )**2 ) * FACT ERRBND = ADDBND * SUMSSQ SUMSSQ = SUMSSQ + ERRBND SSQ = SSQ + ( ABSXI/SCALE )**2 END IF END IF 10 CONTINUE * USCLR = SCALE * SQRT( SSQ ) * * Error on square root * ERRBND = SQRT( SUMSSQ ) * ( ONE + TWO * ( 1.00001D+0 * PREC ) ) * ERRBND = ( SUMSCA * ERRBND ) - USCLR * RETURN * * End of PZERRNRM2 * END SUBROUTINE PZERRASUM( ERRBND, N, USCLR, X, INCX, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ERRBND, PREC, USCLR * .. * .. Array Arguments .. COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZERRASUM serially computes the sum of absolute values of the vector * X and returns a scaled relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies a scaled relative acceptable error * bound. In this case the error bound is just the absolute sum * multiplied by a constant proportional to the machine preci- * sion. * * N (global input) INTEGER * On entry, N specifies the length of the vector operand. * * USCLR (global output) DOUBLE PRECISION * On exit, USCLR specifies the sum of absolute values of the * vector X. * * X (global input) COMPLEX*16 array * On entry, X is an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremen- * ted array X must contain the vector x. * * INCX (global input) INTEGER. * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION TWO, ZERO PARAMETER ( TWO = 2.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IX DOUBLE PRECISION ADDBND * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG * .. * .. Executable Statements .. * IX = 1 USCLR = ZERO ADDBND = TWO * TWO * TWO * PREC * DO 10 IX = 1, 1 + ( N - 1 )*INCX, INCX USCLR = USCLR + ABS( DBLE( X( IX ) ) ) + $ ABS( DIMAG( X( IX ) ) ) 10 CONTINUE * ERRBND = ADDBND * USCLR * RETURN * * End of PZERRASUM * END SUBROUTINE PZERRSCAL( ERRBND, PSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC COMPLEX*16 PSCLR, X * .. * * Purpose * ======= * * PZERRSCAL serially computes the product PSCLR * X and returns a sca- * led relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) COMPLEX*16 * On entry, PSCLR specifies the scale factor. * * X (global input/global output) COMPLEX*16 * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION TWO PARAMETER ( TWO = 2.0D+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * X = PSCLR * X * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PZERRSCAL * END SUBROUTINE PZDERRSCAL( ERRBND, PUSCLR, X, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC, PUSCLR COMPLEX*16 X * .. * * Purpose * ======= * * PZDERRSCAL serially computes the product PUSCLR * X and returns a * scaled relative acceptable error bound on the result. * * Notes * ===== * * If s1 = PUSCLR*X and s2 are two different computed results, and s1 is * being assumed to be correct, we require * * abs( s1 - s2 ) <= ERRBND = ERRFACT * abs( s1 ), * * where ERRFACT is computed as two times the machine precision. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PUSCLR (global input) DOUBLE PRECISION * On entry, PUSCLR specifies the real scale factor. * * X (global input/global output) COMPLEX*16 * On entry, X specifies the scalar to be scaled. On exit, X is * the scaled entry. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION TWO PARAMETER ( TWO = 2.0D+0 ) * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG * .. * .. Executable Statements .. * X = DCMPLX( PUSCLR * DBLE( X ), PUSCLR * DIMAG( X ) ) * ERRBND = ( TWO * PREC ) * ABS( X ) * RETURN * * End of PZDERRSCAL * END SUBROUTINE PZERRAXPY( ERRBND, PSCLR, X, Y, PREC ) * * -- PBLAS test routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION ERRBND, PREC COMPLEX*16 PSCLR, X, Y * .. * * Purpose * ======= * * PZERRAXPY serially computes Y := Y + PSCLR * X and returns a scaled * relative acceptable error bound on the result. * * Arguments * ========= * * ERRBND (global output) DOUBLE PRECISION * On exit, ERRBND specifies the scaled relative acceptable er- * ror bound. * * PSCLR (global input) COMPLEX*16 * On entry, PSCLR specifies the scale factor. * * X (global input) COMPLEX*16 * On entry, X specifies the scalar to be scaled. * * Y (global input/global output) COMPLEX*16 * On entry, Y specifies the scalar to be added. On exit, Y con- * tains the resulting scalar. * * PREC (global input) DOUBLE PRECISION * On entry, PREC specifies the machine precision. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO, ZERO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0, $ ZERO = 0.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION ADDBND, FACT, SUMINEG, SUMIPOS, SUMRNEG, $ SUMRPOS COMPLEX*16 TMP * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG, MAX * .. * .. Executable Statements .. * SUMIPOS = ZERO SUMINEG = ZERO SUMRPOS = ZERO SUMRNEG = ZERO FACT = ONE + TWO * PREC ADDBND = TWO * TWO * TWO * PREC * TMP = PSCLR * X IF( DBLE( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + DBLE( TMP ) * FACT ELSE SUMRNEG = SUMRNEG - DBLE( TMP ) * FACT END IF IF( DIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + DIMAG( TMP ) * FACT ELSE SUMINEG = SUMINEG - DIMAG( TMP ) * FACT END IF * TMP = Y IF( DBLE( TMP ).GE.ZERO ) THEN SUMRPOS = SUMRPOS + DBLE( TMP ) ELSE SUMRNEG = SUMRNEG - DBLE( TMP ) END IF IF( DIMAG( TMP ).GE.ZERO ) THEN SUMIPOS = SUMIPOS + DIMAG( TMP ) ELSE SUMINEG = SUMINEG - DIMAG( TMP ) END IF * Y = Y + ( PSCLR * X ) * ERRBND = ADDBND * MAX( MAX( SUMRPOS, SUMRNEG ), $ MAX( SUMIPOS, SUMINEG ) ) * RETURN * * End of PZERRAXPY * END scalapack-1.8.0/PBLAS/TESTING/PSBLAS1TST.dat0000640000175000017500000000273710363532303017435 0ustar muammarmuammar'Level 1 PBLAS, Testing input file' 'Intel iPSC/860 hypercube, gamma model.' 'PSBLAS1TST.SUMM' output file name (if any) 6 device out F logical flag, T to stop on failures T logical flag, T to test error exits 0 verbosity, 0 for pass/fail, 1-3 for matrix dump on errors 10 the leading dimension gap 4 number of process grids (ordered pairs of P & Q) 2 1 2 1 4 2 3 8 values of P 2 2 1 4 1 3 2 1 values of Q 2.0E0 value of ALPHA 4 number of tests problems 14 44 28 7 values of N 36 9 39 53 values of M_X 24 67 3 12 values of N_X 2 5 2 5 values of IMB_X 2 5 2 5 values of INB_X 2 5 2 5 values of MB_X 2 5 2 5 values of NB_X 0 0 0 0 values of RSRC_X 0 0 0 0 values of CSRC_X 5 3 1 1 values of IX 2 6 1 1 values of JX 1 9 1 53 values of INCX 2 6 35 14 values of M_Y 27 55 43 12 values of N_Y 2 5 2 5 values of IMB_Y 2 5 2 5 values of INB_Y 2 5 2 5 values of MB_Y 2 5 2 5 values of NB_Y 0 0 0 0 values of RSRC_Y 0 0 0 0 values of CSRC_Y 1 4 1 1 values of IY 7 6 1 1 values of JY 2 6 35 1 values of INCY PSSWAP T put F for no test in the same column PSSCAL T put F for no test in the same column PSCOPY T put F for no test in the same column PSAXPY T put F for no test in the same column PSDOT T put F for no test in the same column PSNRM2 T put F for no test in the same column PSASUM T put F for no test in the same column PSAMAX T put F for no test in the same column scalapack-1.8.0/PBLAS/SRC/0000750000175000017500000000000010605234724014610 5ustar muammarmuammarscalapack-1.8.0/PBLAS/SRC/pssyr_.c0000640000175000017500000003367210363532303016302 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssyr_( F_CHAR_T UPLO, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * A, int * IA, int * JA, int * DESCA ) #else void pssyr_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PSSYR performs the symmetric rank 1 operation * * sub( A ) := alpha*sub( X )*sub( X )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Alpha is a scalar, sub( X ) is an n element subvector and sub( A ) is * an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X * corresponding to the entries of the subvector sub( X ) need * not be set on input. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYR", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PSSYR", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkmat( ctxt, "PSSYR", "A", *N, 2, *N, 2, Ai, Aj, Ad, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYR", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Local rank-1 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); XCld = XCd0[LLD_]; XRld = XRd0[LLD_]; if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) sger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cpsyr( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzsyr ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzsyr ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) sger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); /* * End of PSSYR */ } scalapack-1.8.0/PBLAS/SRC/pcdotc_.c0000640000175000017500000006710710363532303016376 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcdotc_( int * N, float * DOT, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcdotc_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; float * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PCDOTC forms the dot product of two subvectors, * * DOT := sub( X )**H * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) COMPLEX array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCDOTC", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PCDOTC", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PCDOTC", info ); return; } #endif DOT[REAL_PART] = ZERO; DOT[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cctypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cctypeset(); size = type->size; dot = type->Fvvdotc; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Ccgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Ccgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Ccgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Ccgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Ccgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Ccgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Ccgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Ccgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Ccgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Ccgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cctypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cctypeset(); /* * Compute DOT := sub( Y )**H * sub( X ) */ PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotc ); /* * Conjugate the result */ DOT[IMAG_PART] = -DOT[IMAG_PART]; } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cctypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); } /* * End of PCDOTC */ } scalapack-1.8.0/PBLAS/SRC/pzhemm_.c0000640000175000017500000005355410363532303016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzhemm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzhemm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PZHEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a Hermitian submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the Hermitian submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * Hermitian submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the Hermitian submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the Hermitian submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * Hermitian submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the Hermitian submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the Hermitian submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PZHEMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZHEMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PZHEMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZHEMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PZHEMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZHEMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PZHEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PZHEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, CONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, CONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PZHEMM */ } scalapack-1.8.0/PBLAS/SRC/pdatrmv_.c0000640000175000017500000006153110363532303016572 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDATRMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an n by n triangular subma- * trix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'T' or 't' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'C' or 'c' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Yroc, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr, YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper, usiz; double * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PDATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkvec( ctxt, "PDATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info ); PB_Cchkvec( ctxt, "PDATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PDATRMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cdtypeset(); size = usiz = type->size; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Scale YA in the case sub( Y ) has been reused */ if( notran && !( YApbY ) ) { /* * YA resides in (a) process column(s) */ if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Amp > 0 ) dascal_( &Amp, ((char *) tbeta), YA, &ione ); } } else if( !( notran ) && !( YApbY ) ) { /* * YA resides in (a) process row(s) */ if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Anq > 0 ) dascal_( &Anq, ((char *) tbeta), YA, &YAld ); } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, usiz ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dagemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } if( notran ) { PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PDATRMV */ } scalapack-1.8.0/PBLAS/SRC/PBblas.h0000640000175000017500000007060310363532303016126 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * This file includes F77 BLAS definitions. All PBLAS routines include * this file. * * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define CNOTRAN 'N' #define CNOCONJG 'N' #define CTRAN 'T' #define CCONJG 'Z' #define CCOTRAN 'C' #define CALL 'A' #define CLOWER 'L' #define CUPPER 'U' #define CDIAGONAL 'D' #define CLEFT 'L' #define CRIGHT 'R' #define CUNIT 'U' #define CNOUNIT 'N' #define CINIT 'I' #define CNOINIT 'N' #define CFORWARD 'F' #define CBACKWARD 'B' #define CREUSE 'R' #define CALLOCATE 'A' #define NOTRAN "N" #define NOCONJG "N" #define TRAN "T" #define CONJG "Z" #define COTRAN "C" #define ALL "A" #define LOWER "L" #define UPPER "U" #define DIAGONAL "D" #define LEFT "L" #define RIGHT "R" #define UNIT "U" #define NOUNIT "N" #define INIT "I" #define NOINIT "N" #define FORWARD "F" #define BACKWARD "B" #define REUSE "R" #define ALLOCATE "A" #if( _F2C_CALL_ == _F2C_ADD_ ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine. No redefinition is necessary to have * the following FORTRAN to C interface: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) dgemm_(...) * * This is the PBLAS default. */ #endif #if( _F2C_CALL_ == _F2C_UPCASE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) DGEMM(...) */ #define srot_ SROT #define drot_ DROT #define sswap_ SSWAP #define dswap_ DSWAP #define cswap_ CSWAP #define zswap_ ZSWAP #define scopy_ SCOPY #define dcopy_ DCOPY #define ccopy_ CCOPY #define zcopy_ ZCOPY #define saxpy_ SAXPY #define daxpy_ DAXPY #define caxpy_ CAXPY #define zaxpy_ ZAXPY #define sscal_ SSCAL #define dscal_ DSCAL #define cscal_ CSCAL #define zscal_ ZSCAL #define csscal_ CSSCAL #define zdscal_ ZDSCAL #define sasum_ SASUM #define dasum_ DASUM #define scasum_ SCASUM #define dzasum_ DZASUM #define snrm2_ SNRM2 #define dnrm2_ DNRM2 #define scnrm2_ SCNRM2 #define dznrm2_ DZNRM2 #define sdot_ SDOT #define ddot_ DDOT #define cdotu_ CDOTU #define zdotu_ ZDOTU #define cdotc_ CDOTC #define zdotc_ ZDOTC #define isamax_ ISAMAX #define idamax_ IDAMAX #define icamax_ ICAMAX #define izamax_ IZAMAX #define sgemv_ SGEMV #define dgemv_ DGEMV #define cgemv_ CGEMV #define zgemv_ ZGEMV #define ssymv_ SSYMV #define dsymv_ DSYMV #define chemv_ CHEMV #define zhemv_ ZHEMV #define strmv_ STRMV #define dtrmv_ DTRMV #define ctrmv_ CTRMV #define ztrmv_ ZTRMV #define strsv_ STRSV #define dtrsv_ DTRSV #define ctrsv_ CTRSV #define ztrsv_ ZTRSV #define sger_ SGER #define dger_ DGER #define cgeru_ CGERU #define zgeru_ ZGERU #define cgerc_ CGERC #define zgerc_ ZGERC #define ssyr_ SSYR #define dsyr_ DSYR #define cher_ CHER #define zher_ ZHER #define ssyr2_ SSYR2 #define dsyr2_ DSYR2 #define cher2_ CHER2 #define zher2_ ZHER2 #define sgemm_ SGEMM #define dgemm_ DGEMM #define cgemm_ CGEMM #define zgemm_ ZGEMM #define ssymm_ SSYMM #define dsymm_ DSYMM #define csymm_ CSYMM #define chemm_ CHEMM #define zsymm_ ZSYMM #define zhemm_ ZHEMM #define strmm_ STRMM #define dtrmm_ DTRMM #define ctrmm_ CTRMM #define ztrmm_ ZTRMM #define strsm_ STRSM #define dtrsm_ DTRSM #define ctrsm_ CTRSM #define ztrsm_ ZTRSM #define ssyrk_ SSYRK #define dsyrk_ DSYRK #define csyrk_ CSYRK #define cherk_ CHERK #define zsyrk_ ZSYRK #define zherk_ ZHERK #define ssyr2k_ SSYR2K #define dsyr2k_ DSYR2K #define csyr2k_ CSYR2K #define cher2k_ CHER2K #define zsyr2k_ ZSYR2K #define zher2k_ ZHER2K #endif #if( _F2C_CALL_ == _F2C_NOCHANGE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) dgemm(...) */ #define srot_ srot #define drot_ drot #define sswap_ sswap #define dswap_ dswap #define cswap_ cswap #define zswap_ zswap #define scopy_ scopy #define dcopy_ dcopy #define ccopy_ ccopy #define zcopy_ zcopy #define saxpy_ saxpy #define daxpy_ daxpy #define caxpy_ caxpy #define zaxpy_ zaxpy #define sscal_ sscal #define dscal_ dscal #define cscal_ cscal #define zscal_ zscal #define csscal_ csscal #define zdscal_ zdscal #define sasum_ sasum #define dasum_ dasum #define scasum_ scasum #define dzasum_ dzasum #define snrm2_ snrm2 #define dnrm2_ dnrm2 #define scnrm2_ scnrm2 #define dznrm2_ dznrm2 #define sdot_ sdot #define ddot_ ddot #define cdotu_ cdotu #define zdotu_ zdotu #define cdotc_ cdotc #define zdotc_ zdotc #define isamax_ isamax #define idamax_ idamax #define icamax_ icamax #define izamax_ izamax #define sgemv_ sgemv #define dgemv_ dgemv #define cgemv_ cgemv #define zgemv_ zgemv #define ssymv_ ssymv #define dsymv_ dsymv #define chemv_ chemv #define zhemv_ zhemv #define strmv_ strmv #define dtrmv_ dtrmv #define ctrmv_ ctrmv #define ztrmv_ ztrmv #define strsv_ strsv #define dtrsv_ dtrsv #define ctrsv_ ctrsv #define ztrsv_ ztrsv #define sger_ sger #define dger_ dger #define cgeru_ cgeru #define zgeru_ zgeru #define cgerc_ cgerc #define zgerc_ zgerc #define ssyr_ ssyr #define dsyr_ dsyr #define cher_ cher #define zher_ zher #define ssyr2_ ssyr2 #define dsyr2_ dsyr2 #define cher2_ cher2 #define zher2_ zher2 #define sgemm_ sgemm #define dgemm_ dgemm #define cgemm_ cgemm #define zgemm_ zgemm #define ssymm_ ssymm #define dsymm_ dsymm #define csymm_ csymm #define chemm_ chemm #define zsymm_ zsymm #define zhemm_ zhemm #define strmm_ strmm #define dtrmm_ dtrmm #define ctrmm_ ctrmm #define ztrmm_ ztrmm #define strsm_ strsm #define dtrsm_ dtrsm #define ctrsm_ ctrsm #define ztrsm_ ztrsm #define ssyrk_ ssyrk #define dsyrk_ dsyrk #define csyrk_ csyrk #define cherk_ cherk #define zsyrk_ zsyrk #define zherk_ zherk #define ssyr2k_ ssyr2k #define dsyr2k_ dsyr2k #define csyr2k_ csyr2k #define cher2k_ cher2k #define zsyr2k_ zsyr2k #define zher2k_ zher2k #endif /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ #ifdef __STDC__ int isamax_ ( int *, char *, int * ); int idamax_ ( int *, char *, int * ); int icamax_ ( int *, char *, int * ); int izamax_ ( int *, char *, int * ); F_VOID_FCT saxpy_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT daxpy_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT caxpy_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT zaxpy_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT scopy_ ( int *, char *, int *, char *, int * ); F_VOID_FCT dcopy_ ( int *, char *, int *, char *, int * ); F_VOID_FCT ccopy_ ( int *, char *, int *, char *, int * ); F_VOID_FCT zcopy_ ( int *, char *, int *, char *, int * ); F_VOID_FCT sscal_ ( int *, char *, char *, int * ); F_VOID_FCT dscal_ ( int *, char *, char *, int * ); F_VOID_FCT cscal_ ( int *, char *, char *, int * ); F_VOID_FCT csscal_ ( int *, char *, char *, int * ); F_VOID_FCT zdscal_ ( int *, char *, char *, int * ); F_VOID_FCT zscal_ ( int *, char *, char *, int * ); F_VOID_FCT sswap_ ( int *, char *, int *, char *, int * ); F_VOID_FCT dswap_ ( int *, char *, int *, char *, int * ); F_VOID_FCT cswap_ ( int *, char *, int *, char *, int * ); F_VOID_FCT zswap_ ( int *, char *, int *, char *, int * ); F_VOID_FCT sgemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dgemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT cgemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zgemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT ssymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dsymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT chemv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zhemv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT strmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT dtrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT ctrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT ztrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT strsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT dtrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT ctrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT ztrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); F_VOID_FCT sger_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT dger_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT cgerc_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT cgeru_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT zgerc_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT zgeru_ ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT ssyr_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT dsyr_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT cher_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT zher_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT ssyr2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT dsyr2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT cher2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT zher2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT sgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT cgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT ssymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dsymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT csymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zsymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT chemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zhemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT ssyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dsyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT csyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zsyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cherk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zherk_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT ssyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dsyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT csyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zsyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT cher2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zher2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT strmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT dtrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT ctrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT ztrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT strsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT dtrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT ctrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); F_VOID_FCT ztrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); #else int isamax_ (); int idamax_ (); int icamax_ (); int izamax_ (); F_VOID_FCT saxpy_ (); F_VOID_FCT daxpy_ (); F_VOID_FCT caxpy_ (); F_VOID_FCT zaxpy_ (); F_VOID_FCT scopy_ (); F_VOID_FCT dcopy_ (); F_VOID_FCT ccopy_ (); F_VOID_FCT zcopy_ (); F_VOID_FCT sscal_ (); F_VOID_FCT dscal_ (); F_VOID_FCT cscal_ (); F_VOID_FCT csscal_ (); F_VOID_FCT zscal_ (); F_VOID_FCT zdscal_ (); F_VOID_FCT sswap_ (); F_VOID_FCT dswap_ (); F_VOID_FCT cswap_ (); F_VOID_FCT zswap_ (); F_VOID_FCT sgemv_ (); F_VOID_FCT dgemv_ (); F_VOID_FCT cgemv_ (); F_VOID_FCT zgemv_ (); F_VOID_FCT ssymv_ (); F_VOID_FCT dsymv_ (); F_VOID_FCT chemv_ (); F_VOID_FCT zhemv_ (); F_VOID_FCT strmv_ (); F_VOID_FCT dtrmv_ (); F_VOID_FCT ctrmv_ (); F_VOID_FCT ztrmv_ (); F_VOID_FCT strsv_ (); F_VOID_FCT dtrsv_ (); F_VOID_FCT ctrsv_ (); F_VOID_FCT ztrsv_ (); F_VOID_FCT sger_ (); F_VOID_FCT dger_ (); F_VOID_FCT cgerc_ (); F_VOID_FCT cgeru_ (); F_VOID_FCT zgerc_ (); F_VOID_FCT zgeru_ (); F_VOID_FCT ssyr_ (); F_VOID_FCT dsyr_ (); F_VOID_FCT cher_ (); F_VOID_FCT zher_ (); F_VOID_FCT ssyr2_ (); F_VOID_FCT dsyr2_ (); F_VOID_FCT cher2_ (); F_VOID_FCT zher2_ (); F_VOID_FCT sgemm_ (); F_VOID_FCT dgemm_ (); F_VOID_FCT cgemm_ (); F_VOID_FCT zgemm_ (); F_VOID_FCT ssymm_ (); F_VOID_FCT dsymm_ (); F_VOID_FCT csymm_ (); F_VOID_FCT zsymm_ (); F_VOID_FCT chemm_ (); F_VOID_FCT zhemm_ (); F_VOID_FCT ssyrk_ (); F_VOID_FCT dsyrk_ (); F_VOID_FCT csyrk_ (); F_VOID_FCT zsyrk_ (); F_VOID_FCT cherk_ (); F_VOID_FCT zherk_ (); F_VOID_FCT ssyr2k_ (); F_VOID_FCT dsyr2k_ (); F_VOID_FCT csyr2k_ (); F_VOID_FCT zsyr2k_ (); F_VOID_FCT cher2k_ (); F_VOID_FCT zher2k_ (); F_VOID_FCT strmm_ (); F_VOID_FCT dtrmm_ (); F_VOID_FCT ctrmm_ (); F_VOID_FCT ztrmm_ (); F_VOID_FCT strsm_ (); F_VOID_FCT dtrsm_ (); F_VOID_FCT ctrsm_ (); F_VOID_FCT ztrsm_ (); #endif scalapack-1.8.0/PBLAS/SRC/pcsscal_.c0000640000175000017500000002207510363532303016545 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcsscal_( int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pcsscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PCSSCAL multiplies an n element subvector sub( X ) by the real scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PCSSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PCSSCAL", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ONE ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); if( ALPHA[REAL_PART] == ZERO ) { cset_( &Xnq, type->zero, Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } else { csscal_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cctypeset(); if( ALPHA[REAL_PART] == ZERO ) { cset_( &Xnp, type->zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } else { csscal_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } } } return; } /* * End of PCSSCAL */ } scalapack-1.8.0/PBLAS/SRC/psasum_.c0000640000175000017500000002461310363532303016425 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psasum_( int * N, float * ASUM, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void psasum_( N, ASUM, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * ASUM; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSASUM returns the sum of absolute values of the entries of a subvec- * tor sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ASUM (local output) REAL * On exit, ASUM specifies the sum of absolute values of the * subvector sub( X ) only in its scope (See below for further * details). * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSASUM", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PSASUM", info ); return; } #endif /* * Initialize ASUM */ *ASUM = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute ASUM */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *ASUM = ABS( X[Xii+Xjj*Xd[LLD_]] ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; svasum_( &Xnq, ((char *) ASUM), ((char *)( X+(Xii+Xjj*Xld) )), &Xld ); } /* * If Xnq <= 0, ASUM is zero (see initialization above) */ if( ( npcol > 1 ) && ( Xcol >= 0 ) ) { /* * Combine the local results if npcol > 1 and Xcol >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { svasum_( &Xnp, ((char *) ASUM), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } /* * If Xnp <= 0, ASUM is zero (see initialization above) */ if( ( nprow > 1 ) && ( Xrow >= 0 ) ) { /* * Combine the local results if nprow > 1 and Xrow >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } /* * End of PSASUM */ } scalapack-1.8.0/PBLAS/SRC/pzagemv_.c0000640000175000017500000004540710363532303016572 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzagemv_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzagemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZAGEMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) ) * + abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A ) | * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'T' or 't', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'C' or 'c', * sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| + * |beta*sub( Y )|. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * one, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZAGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PZAGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PZAGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PZAGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PZAGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PZAGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PZAGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); utyp = PB_Cdtypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } one = utyp->one; if( nota ) { PB_Cpaxpby( utyp, NOCONJG, *M, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PZAGEMV */ } scalapack-1.8.0/PBLAS/SRC/pdgemv_.c0000640000175000017500000004317610363532303016404 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdgemv_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdgemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDGEMV performs one of the matrix-vector operations * * sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors * and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ), * * TRANS = 'T' or 't', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ), * * TRANS = 'C' or 'c', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PDGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PDGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PDGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PDGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PDGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PDGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( BETA[REAL_PART] == ZERO ) { dset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { dscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( BETA[REAL_PART] == ZERO ) { dset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { dscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { dgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { dgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PDGEMV */ } scalapack-1.8.0/PBLAS/SRC/pdtrmm_.c0000640000175000017500000005242110363532303016416 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtrmm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB ) #else void pdtrmm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; double * A, * B; #endif { /* * Purpose * ======= * * PDTRMM performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X'. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal TRANS = %c\n", TranOp ); info = -3; } if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PDTRMM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PDTRMM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PDTRMM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRMM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ChooseAB ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrmmAB( type, &Var, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } else { if( ( lside && notran ) || ( !( lside ) && !( notran ) ) ) { OpR = CCOMBINE; OpC = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } } else { OpR = CBCAST; OpC = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } } if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrmmB( type, &DirB, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PDTRMM */ } scalapack-1.8.0/PBLAS/SRC/pdsyr_.c0000640000175000017500000003374110363532303016260 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsyr_( F_CHAR_T UPLO, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * A, int * IA, int * JA, int * DESCA ) #else void pdsyr_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PDSYR performs the symmetric rank 1 operation * * sub( A ) := alpha*sub( X )*sub( X )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Alpha is a scalar, sub( X ) is an n element subvector and sub( A ) is * an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X * corresponding to the entries of the subvector sub( X ) need * not be set on input. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYR", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PDSYR", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkmat( ctxt, "PDSYR", "A", *N, 2, *N, 2, Ai, Aj, Ad, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYR", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Local rank-1 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); XCld = XCd0[LLD_]; XRld = XRd0[LLD_]; if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) dger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cpsyr( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzsyr ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzsyr ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) dger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); /* * End of PDSYR */ } scalapack-1.8.0/PBLAS/SRC/PTZBLAS/0000750000175000017500000000000010605234721015724 5ustar muammarmuammarscalapack-1.8.0/PBLAS/SRC/PTZBLAS/chescal.f0000640000175000017500000001665310363532303017507 0ustar muammarmuammar SUBROUTINE CHESCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CHESCAL scales a two-dimensional array A by the real scalar alpha. * The diagonal entries specified by IOFFD of A are supposed to be real. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the Hermitian * matrix to be scaled as specified by IOFFD, and the strictly * lower trapezoidal part of A is not referenced. When UPLO is * 'L' or 'l', the leading m by n part of the array A must con- * tain the lower trapezoidal part of the Hermitian matrix to be * scaled as specified by IOFFD, and the strictly upper trape- * zoidal part of A is not referenced. On exit, the entries of * the trapezoid part of A determined by UPLO and IOFFD are sca- * led. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL RONE, RZERO PARAMETER ( RONE = 1.0E+0, RZERO = 0.0E+0 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL CSSCAL, CTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( ALPHA.EQ.RONE ) THEN * * Zeros the imaginary part of the diagonals * IF( LSAME( UPLO, 'L' ).OR.LSAME( UPLO, 'U' ).OR. $ LSAME( UPLO, 'D' ) ) THEN DO 10 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CMPLX( REAL( A( JTMP, J ) ), RZERO ) 10 CONTINUE END IF RETURN ELSE IF( ALPHA.EQ.RZERO ) THEN CALL CTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) CALL CSSCAL( M, ALPHA, A( 1, J ), 1 ) 20 CONTINUE DO 30 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CMPLX( ALPHA * REAL( A( JTMP, J ) ), RZERO ) IF( M.GT.JTMP ) $ CALL CSSCAL( M-JTMP, ALPHA, A( JTMP + 1, J ), 1 ) 30 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 40 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD CALL CSSCAL( JTMP - 1, ALPHA, A( 1, J ), 1 ) A( JTMP, J ) = CMPLX( ALPHA * REAL( A( JTMP, J ) ), RZERO ) 40 CONTINUE DO 50 J = MAX( 0, MN ) + 1, N CALL CSSCAL( M, ALPHA, A( 1, J ), 1 ) 50 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 60 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CMPLX( ALPHA * REAL( A( JTMP, J ) ), RZERO ) 60 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 70 J = 1, N CALL CSSCAL( M, ALPHA, A( 1, J ), 1 ) 70 CONTINUE * END IF * RETURN * * End of CHESCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmdda.f0000640000175000017500000001050310363532303017317 0ustar muammarmuammar SUBROUTINE DMMDDA( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMDDA performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of DMMDDA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ctzpadcpy.f0000640000175000017500000002000610363532303020071 0ustar muammarmuammar SUBROUTINE CTZPADCPY( UPLO, DIAG, M, N, IOFFD, A, LDA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, UPLO INTEGER IOFFD, LDA, LDB, M, N * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CTZPADCPY copies an array A into an array B. The unchanged part of B * is padded with zeros. The diagonal of B specified by IOFFD may be set * to ones. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be copied as follows: * = 'L' or 'l': Lower triangular part is copied; the * strictly upper triangular part of B is * padded with zeros, * = 'U' or 'u': Upper triangular part is copied; the * strictly lower triangular part of B is * padded with zeros. * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not the diagonal of B is * to be set to ones or not as follows: * * DIAG = 'N' or 'n': the diagonals of A are copied into the * diagonals of B, otherwise the diagonals of B are set to ones. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * A (input) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be copied * as specified by IOFFD, UPLO and DIAG, and the strictly lower * trapezoidal part of A is not referenced; When UPLO = 'L',the * leading m by n part of the array A must contain the lower * trapezoidal part of the matrix to be copied as specified by * IOFFD, UPLO and DIAG and the strictly upper trapezoidal part * of A is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * B (output) COMPLEX array * On entry, B is an array of dimension (LDB,N). On exit, this * array contains the padded copy of A as specified by IOFFD, * UPLO and DIAG. * * LDB (input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, ITMP, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE * JTMP = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 50 J = MN + 1, JTMP ITMP = J + IOFFD DO 30 I = 1, ITMP - 1 B( I, J ) = ZERO 30 CONTINUE DO 40 I = ITMP, M B( I, J ) = A( I, J ) 40 CONTINUE 50 CONTINUE ELSE DO 80 J = MN + 1, JTMP ITMP = J + IOFFD DO 60 I = 1, ITMP - 1 B( I, J ) = ZERO 60 CONTINUE B( ITMP, J ) = ONE DO 70 I = ITMP + 1, M B( I, J ) = A( I, J ) 70 CONTINUE 80 CONTINUE END IF * DO 100 J = JTMP + 1, N DO 90 I = 1, M B( I, J ) = ZERO 90 CONTINUE 100 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * JTMP = MAX( 0, -IOFFD ) * DO 120 J = 1, JTMP DO 110 I = 1, M B( I, J ) = ZERO 110 CONTINUE 120 CONTINUE * MN = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 150 J = JTMP + 1, MN ITMP = J + IOFFD DO 130 I = 1, ITMP B( I, J ) = A( I, J ) 130 CONTINUE DO 140 I = ITMP + 1, M B( I, J ) = ZERO 140 CONTINUE 150 CONTINUE ELSE DO 180 J = JTMP + 1, MN ITMP = J + IOFFD DO 160 I = 1, ITMP - 1 B( I, J ) = A( I, J ) 160 CONTINUE B( ITMP, J ) = ONE DO 170 I = ITMP + 1, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE END IF * DO 200 J = MAX( 0, MN ) + 1, N DO 190 I = 1, M B( I, J ) = A( I, J ) 190 CONTINUE 200 CONTINUE * ELSE * DO 220 J = 1, N DO 210 I = 1, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE * END IF * RETURN * * End of CTZPADCPY * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmadd.f0000640000175000017500000001051710363532303017323 0ustar muammarmuammar SUBROUTINE CMMADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL CCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL CAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL CAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of CMMADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zset.f0000640000175000017500000000520110363532303017055 0ustar muammarmuammar SUBROUTINE ZSET( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 X( * ) * .. * * Purpose * ======= * * ZSET sets the entries of an n vector x to the scalar alpha. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (input/output) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are set to alpha. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZSET', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := alpha * IF( INCX.EQ.1 ) $ GO TO 20 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * DO 10 I = 1, N X( IX ) = ALPHA IX = IX + INCX 10 CONTINUE * RETURN * * code for increment equal to 1 * * clean-up loop * 20 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 40 * DO 30 I = 1, M X( I ) = ALPHA 30 CONTINUE IF( N.LT.4 ) $ RETURN * 40 MP1 = M + 1 DO 50 I = MP1, N, 4 X( I ) = ALPHA X( I + 1 ) = ALPHA X( I + 2 ) = ALPHA X( I + 3 ) = ALPHA 50 CONTINUE * RETURN * * End of ZSET * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dasymv.f0000640000175000017500000002042010363532303017373 0ustar muammarmuammar SUBROUTINE DASYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * DASYMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY DOUBLE PRECISION TALPHA, TEMP0, TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'DASYMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * ABS( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = ABS( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * ABS( A( J, J ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * ABS( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = ABS( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * ABS( A( J, J ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * ABS( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * ABS( A( J, J ) ) * DO 90, I = J + 1, N TEMP0 = ABS( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * ABS( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * ABS( A( J, J ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = ABS( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of DASYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zasymv.f0000640000175000017500000002076010363532303017430 0ustar muammarmuammar SUBROUTINE ZASYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION Y( * ) COMPLEX*16 A( LDA, * ), X( * ) * .. * * Purpose * ======= * * ZASYMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY DOUBLE PRECISION TALPHA, TEMP0, TEMP1, TEMP2 COMPLEX*16 ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'ZASYMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * CABS1( A( J, J ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * CABS1( A( J, J ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * CABS1( A( J, J ) ) * DO 90, I = J + 1, N TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * CABS1( A( J, J ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of ZASYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmtadd.f0000640000175000017500000001513610363532303017512 0ustar muammarmuammar SUBROUTINE DMMTADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMTADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL DSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL DCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL DAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL DAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL DSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of DMMTADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zvvdotu.f0000640000175000017500000000356310363532303017622 0ustar muammarmuammar SUBROUTINE ZVVDOTU( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX*16 DOT * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * ZVVDOTU computes the following dot product: * * dot = dot + x**T * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) COMPLEX*16 * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. COMPLEX*16 ZDOTU EXTERNAL ZDOTU * .. * .. Executable Statements .. * DOT = DOT + ZDOTU( N, X, INCX, Y, INCY ) * RETURN * * End of ZVVDOTU * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dcshft.f0000640000175000017500000000554210363532303017353 0ustar muammarmuammar SUBROUTINE DCSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * DCSHFT shifts columns of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A. M must be at * least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A to be * shifted. N must be at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the columns of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the co- * lumns are shifted to the right. When OFFSET is negative, the * columns of A are shifted to the left. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N+ABS(OFFSET) ). * On exit, A contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Further Details * =============== * * N=3 OFFSET=6 -OFFSET=6 N=3 * ------------------- ------------------- * | 1 2 3 4 5 6 7 8 9 | M | 1 2 3 4 5 6 7 8 9 | * ------------------- ------------------- * V V * ------------------- ------------------- * | 1 2 3 4 5 6 1 2 3 | M | 7 8 9 4 5 6 7 8 9 | * ------------------- ------------------- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = N, 1, -1 DO 10 I = 1, M A( I, J+OFFSET ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I, J-OFFSET ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of DCSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ztzscal.f0000640000175000017500000001530210363532303017565 0ustar muammarmuammar SUBROUTINE ZTZSCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZTZSCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL ZSCAL, ZTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ).OR.( ALPHA.EQ.ONE ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN CALL ZTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 10 J = 1, MIN( MN, N ) CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) 10 CONTINUE DO 20 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD IF( M.GE.JTMP ) $ CALL ZSCAL( M-JTMP+1, ALPHA, A( JTMP, J ), 1 ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 30 J = MAX( 0, -IOFFD ) + 1, MN CALL ZSCAL( J + IOFFD, ALPHA, A( 1, J ), 1 ) 30 CONTINUE DO 40 J = MAX( 0, MN ) + 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) 40 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 50 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 50 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 60 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) 60 CONTINUE * END IF * RETURN * * End of ZTZSCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmadd.f0000640000175000017500000001053310363532303017350 0ustar muammarmuammar SUBROUTINE ZMMADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL ZCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL ZAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL ZAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of ZMMADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmddat.f0000640000175000017500000001506610363532303017533 0ustar muammarmuammar SUBROUTINE SMMDDAT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMDDAT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) REAL array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL SCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL SAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL SAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of SMMDDAT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/sasqrtb.f0000640000175000017500000000210110363532303017543 0ustar muammarmuammar SUBROUTINE SASQRTB( A, B, C ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. REAL A, B, C * .. * * Purpose * ======= * * SASQRTB computes c := a * sqrt( b ) where a, b and c are scalars. * * Arguments * ========= * * A (input) REAL * On entry, A specifies the scalar a. * * B (input) REAL * On entry, B specifies the scalar b. * * C (output) REAL * On entry, C specifies the scalar c. On exit, c is overwritten * by the product of a and the square root of b. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC SQRT * .. * .. Executable Statements .. * C = A * SQRT( B ) * RETURN * * End of SASQRTB * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/Makefile0000640000175000017500000001107210363532303017364 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0) # # Module: Makefile # # Purpose: PTZBLAS source Makefile # # Creation date: April 1, 1998 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../../SLmake.inc ############################################################################ # # This is the makefile to create a library for the PBTZBLAS tools. # The files are grouped as follows: # # ALLPBTZ -- Auxiliary routines for Level 1, 2 and 3 PBLAS # # SPBTZ -- Single precision real PBLAS Level F77 tools routines # CPBTZ -- Single precision complex PBLAS Level F77 tools routines # DPBTZ -- Double precision real PBLAS Level F77 tools routines # ZPBTZ -- Double precision complex PBLAS Level F77 tools routines # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ../../../SLmake.inc file # definitions to match your compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # The library is called # pblas_$(PLAT).a # (see ../../../SLmake.inc for the definition of $(PLAT)). # # To remove the object files after the library is created, enter # make clean # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # ############################################################################ all: single double complex complex16 APBTZ = \ pxerbla.o IPBTZ = \ immadd.o immdda.o immtadd.o immddat.o SPBTZ = \ svasum.o sset.o scshft.o srshft.o \ svvdot.o smmadd.o smmcadd.o smmtadd.o \ smmtcadd.o smmdda.o smmddac.o smmddat.o \ smmddact.o stzpad.o stzpadcpy.o stzscal.o \ sagemv.o sasymv.o satrmv.o SCPBTZ = \ sasqrtb.o sascal.o CPBTZ = \ scvasum.o cset.o ccshft.o crshft.o \ cvvdotu.o cvvdotc.o cmmadd.o cmmcadd.o \ cmmtadd.o cmmtcadd.o cmmdda.o cmmddac.o \ cmmddat.o cmmddact.o ctzpad.o ctzpadcpy.o \ chescal.o ctzscal.o ctzcnjg.o cagemv.o \ cahemv.o catrmv.o casymv.o csymv.o \ csyr.o csyr2.o DPBTZ = \ dvasum.o dset.o dcshft.o drshft.o \ dvvdot.o dmmadd.o dmmcadd.o dmmtadd.o \ dmmtcadd.o dmmdda.o dmmddac.o dmmddat.o \ dmmddact.o dtzpad.o dtzpadcpy.o dtzscal.o \ dagemv.o dasymv.o datrmv.o DZPBTZ = \ dasqrtb.o dascal.o ZPBTZ = \ dzvasum.o zset.o zcshft.o zrshft.o \ zvvdotu.o zvvdotc.o zmmadd.o zmmcadd.o \ zmmtadd.o zmmtcadd.o zmmdda.o zmmddac.o \ zmmddat.o zmmddact.o ztzpad.o ztzpadcpy.o \ zhescal.o ztzscal.o ztzcnjg.o zagemv.o \ zahemv.o zatrmv.o zasymv.o zsymv.o \ zsyr.o zsyr2.o #--------------------------------------------------------------------------- $(APBTZ): $(FRC) $(IPBTZ): $(FRC) $(SPBTZ): $(FRC) $(SCPBTZ): $(FRC) $(CPBTZ): $(FRC) $(DPBTZ): $(FRC) $(DZPBTZ): $(FRC) $(ZPBTZ): $(FRC) #--------------------------------------------------------------------------- single: $(APBTZ) $(IPBTZ) $(SPBTZ) $(SCPBTZ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(APBTZ) $(IPBTZ) $(SPBTZ) \ $(SCPBTZ) $(RANLIB) $(SCALAPACKLIB) complex: $(APBTZ) $(IPBTZ) $(CPBTZ) $(SCPBTZ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(APBTZ) $(IPBTZ) $(CPBTZ) \ $(SCPBTZ) $(RANLIB) $(SCALAPACKLIB) double: $(APBTZ) $(IPBTZ) $(DPBTZ) $(DZPBTZ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(APBTZ) $(IPBTZ) $(DPBTZ) \ $(DZPBTZ) $(RANLIB) $(SCALAPACKLIB) complex16: $(APBTZ) $(IPBTZ) $(ZPBTZ) $(DZPBTZ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(APBTZ) $(IPBTZ) $(ZPBTZ) \ $(DZPBTZ) $(RANLIB) $(SCALAPACKLIB) #--------------------------------------------------------------------------- FRC: @FRC=$(FRC) clean: rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmddat.f0000640000175000017500000001520210363532303017532 0ustar muammarmuammar SUBROUTINE ZMMDDAT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMDDAT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL ZCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL ZAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL ZAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL ZCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL ZAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL ZAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of ZMMDDAT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmadd.f0000640000175000017500000001041710363532303017342 0ustar muammarmuammar SUBROUTINE SMMADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) REAL array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of SMMADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmdda.f0000640000175000017500000001042310363532303017337 0ustar muammarmuammar SUBROUTINE SMMDDA( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMDDA performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) REAL array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of SMMDDA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmddact.f0000640000175000017500000001507310363532303017674 0ustar muammarmuammar SUBROUTINE SMMDDACT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMDDACT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added into the leading m by * n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) REAL array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL SCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL SAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL SAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of SMMDDACT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cahemv.f0000640000175000017500000002122610363532303017340 0ustar muammarmuammar SUBROUTINE CAHEMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL Y( * ) COMPLEX A( LDA, * ), X( * ) * .. * * Purpose * ======= * * CAHEMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n Hermitian matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the Hermitian ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * Hermitian matrix and the strictly upper trapezoidal part of A * is not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the offdiagonal elements of A need not be set and * assumed to be zero. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY REAL TALPHA, TEMP0, TEMP1, TEMP2 COMPLEX ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL * .. * .. Statement Functions .. REAL CABS1 CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'CAHEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * ABS( REAL( A( J, J ) ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * ABS( REAL( A( J, J ) ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * ABS( REAL( A( J, J ) ) ) * DO 90, I = J + 1, N TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * ABS( REAL( A( J, J ) ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of CAHEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/stzpadcpy.f0000640000175000017500000001771710363532303020130 0ustar muammarmuammar SUBROUTINE STZPADCPY( UPLO, DIAG, M, N, IOFFD, A, LDA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, UPLO INTEGER IOFFD, LDA, LDB, M, N * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * STZPADCPY copies an array A into an array B. The unchanged part of B * is padded with zeros. The diagonal of B specified by IOFFD may be set * to ones. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be copied as follows: * = 'L' or 'l': Lower triangular part is copied; the * strictly upper triangular part of B is * padded with zeros, * = 'U' or 'u': Upper triangular part is copied; the * strictly lower triangular part of B is * padded with zeros. * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not the diagonal of B is * to be set to ones or not as follows: * * DIAG = 'N' or 'n': the diagonals of A are copied into the * diagonals of B, otherwise the diagonals of B are set to ones. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * A (input) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be copied * as specified by IOFFD, UPLO and DIAG, and the strictly lower * trapezoidal part of A is not referenced; When UPLO = 'L',the * leading m by n part of the array A must contain the lower * trapezoidal part of the matrix to be copied as specified by * IOFFD, UPLO and DIAG and the strictly upper trapezoidal part * of A is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * B (output) REAL array * On entry, B is an array of dimension (LDB,N). On exit, this * array contains the padded copy of A as specified by IOFFD, * UPLO and DIAG. * * LDB (input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, ITMP, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE * JTMP = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 50 J = MN + 1, JTMP ITMP = J + IOFFD DO 30 I = 1, ITMP - 1 B( I, J ) = ZERO 30 CONTINUE DO 40 I = ITMP, M B( I, J ) = A( I, J ) 40 CONTINUE 50 CONTINUE ELSE DO 80 J = MN + 1, JTMP ITMP = J + IOFFD DO 60 I = 1, ITMP - 1 B( I, J ) = ZERO 60 CONTINUE B( ITMP, J ) = ONE DO 70 I = ITMP + 1, M B( I, J ) = A( I, J ) 70 CONTINUE 80 CONTINUE END IF * DO 100 J = JTMP + 1, N DO 90 I = 1, M B( I, J ) = ZERO 90 CONTINUE 100 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * JTMP = MAX( 0, -IOFFD ) * DO 120 J = 1, JTMP DO 110 I = 1, M B( I, J ) = ZERO 110 CONTINUE 120 CONTINUE * MN = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 150 J = JTMP + 1, MN ITMP = J + IOFFD DO 130 I = 1, ITMP B( I, J ) = A( I, J ) 130 CONTINUE DO 140 I = ITMP + 1, M B( I, J ) = ZERO 140 CONTINUE 150 CONTINUE ELSE DO 180 J = JTMP + 1, MN ITMP = J + IOFFD DO 160 I = 1, ITMP - 1 B( I, J ) = A( I, J ) 160 CONTINUE B( ITMP, J ) = ONE DO 170 I = ITMP + 1, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE END IF * DO 200 J = MAX( 0, MN ) + 1, N DO 190 I = 1, M B( I, J ) = A( I, J ) 190 CONTINUE 200 CONTINUE * ELSE * DO 220 J = 1, N DO 210 I = 1, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE * END IF * RETURN * * End of STZPADCPY * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dtzscal.f0000640000175000017500000001523510363532303017544 0ustar muammarmuammar SUBROUTINE DTZSCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * DTZSCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL DSCAL, DTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ).OR.( ALPHA.EQ.ONE ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN CALL DTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 10 J = 1, MIN( MN, N ) CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) 10 CONTINUE DO 20 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD IF( M.GE.JTMP ) $ CALL DSCAL( M-JTMP+1, ALPHA, A( JTMP, J ), 1 ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 30 J = MAX( 0, -IOFFD ) + 1, MN CALL DSCAL( J + IOFFD, ALPHA, A( 1, J ), 1 ) 30 CONTINUE DO 40 J = MAX( 0, MN ) + 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) 40 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 50 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 50 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 60 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) 60 CONTINUE * END IF * RETURN * * End of DTZSCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/svasum.f0000640000175000017500000000267510363532303017422 0ustar muammarmuammar SUBROUTINE SVASUM( N, ASUM, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ASUM * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * SVASUM returns the sum of absolute values of the entries of a vector * x. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ASUM (output) REAL * On exit, ASUM specifies the sum of absolute values. * * X (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. REAL SASUM EXTERNAL SASUM * .. * .. Executable Statements .. * ASUM = SASUM( N, X, INCX ) * RETURN * * End of SVASUM * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmdda.f0000640000175000017500000001052310363532303017320 0ustar muammarmuammar SUBROUTINE CMMDDA( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMDDA performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL CCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL CAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL CAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of CMMDDA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmddact.f0000640000175000017500000001515310363532303017654 0ustar muammarmuammar SUBROUTINE DMMDDACT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMDDACT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added into the leading m by * n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL DCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL DAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL DAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of DMMDDACT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/immadd.f0000640000175000017500000000771210363532303017334 0ustar muammarmuammar SUBROUTINE IMMADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N INTEGER ALPHA, BETA * .. * .. Array Arguments .. INTEGER A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * IMMADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) INTEGER * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) INTEGER array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) INTEGER * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) INTEGER array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER ONE, ZERO PARAMETER ( ONE = 1, ZERO = 0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( I, J ) = A( I, J ) + B( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M B( I, J ) = BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of IMMADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zsyr2.f0000640000175000017500000001573010363532303017171 0ustar muammarmuammar SUBROUTINE ZSYR2( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * ZSYR2 performs the symmetric rank 2 operation * * A := alpha*x*y' + alpha*y*x' + A, * * where alpha is a complex scalar, x and y are n element vectors and A * is an n by n SY matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. On exit, the upper triangular part of the array A is * overwritten by the upper triangular part of the updated ma- * trix. When UPLO = 'L' or 'l', the leading n by n part of the * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. On exit, the lower triangular part of the * array A is overwritten by the lower triangular part of the * updated matrix. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY COMPLEX*16 TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( INCX.EQ.0 )THEN INFO = 5 ELSE IF( INCY.EQ.0 )THEN INFO = 7 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 9 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'ZSYR2', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) $ RETURN * * Set up the start points in X and Y if the increments are not both * unity. * IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN IF( INCX.GT.0 )THEN KX = 1 ELSE KX = 1 - ( N - 1 )*INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 )*INCY END IF JX = KX JY = KY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * IF( LSAME( UPLO, 'U' ) )THEN * * Form A when A is stored in the upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 20, J = 1, N IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( J ) TEMP2 = ALPHA*X( J ) DO 10, I = 1, J A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 10 CONTINUE END IF 20 CONTINUE ELSE DO 40, J = 1, N IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( JY ) TEMP2 = ALPHA*X( JX ) IX = KX IY = KY DO 30, I = 1, J A( I, J ) = A( I, J ) + X( IX )*TEMP1 $ + Y( IY )*TEMP2 IX = IX + INCX IY = IY + INCY 30 CONTINUE END IF JX = JX + INCX JY = JY + INCY 40 CONTINUE END IF ELSE * * Form A when A is stored in the lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 60, J = 1, N IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( J ) TEMP2 = ALPHA*X( J ) DO 50, I = J, N A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 50 CONTINUE END IF 60 CONTINUE ELSE DO 80, J = 1, N IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( JY ) TEMP2 = ALPHA*X( JX ) IX = JX IY = JY DO 70, I = J, N A( I, J ) = A( I, J ) + X( IX )*TEMP1 $ + Y( IY )*TEMP2 IX = IX + INCX IY = IY + INCY 70 CONTINUE END IF JX = JX + INCX JY = JY + INCY 80 CONTINUE END IF END IF * RETURN * * End of ZSYR2 * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmddact.f0000640000175000017500000001474410363532303017660 0ustar muammarmuammar SUBROUTINE CMMDDACT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMDDACT performs the following operation: * * A := alpha * A + beta * conjg( B' ), * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been conjugated and added to the * leading m by n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CSCAL * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = CONJG( B( J, I ) ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = CONJG( B( J, I ) ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = CONJG( B( J, I ) ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * CONJG( B( J, I ) ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * CONJG( B( J, I ) ) + $ ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * CONJG( B( J, I ) ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N A( J, I ) = CONJG( B( I, J ) ) 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = CONJG( B( I, J ) ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N A( J, I ) = CONJG( B( I, J ) ) + A( J, I ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * CONJG( B( I, J ) ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * CONJG( B( I, J ) ) + $ ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N A( J, I ) = BETA * CONJG( B( I, J ) ) + A( J, I ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of CMMDDACT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/sagemv.f0000640000175000017500000001744510363532303017367 0ustar muammarmuammar SUBROUTINE SAGEMV( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER INCX, INCY, LDA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * SAGEMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an m by n matrix. * * Arguments * ========= * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) REAL array of dimension ( LDA, n ). * On entry, A is an array of dimension ( LDA, N ). The leading * m by n part of the array A must contain the matrix of coef- * ficients. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, * the incremented array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry * with BETA non-zero, the incremented array Y must contain the * vector y. On exit, the incremented array Y is overwritten by * the updated vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY REAL ABSX, TALPHA, TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = 1 ELSE IF( M.LT.0 ) THEN INFO = 2 ELSE IF( N.LT.0 ) THEN INFO = 3 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SAGEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set LENX and LENY, the lengths of the vectors x and y, and set * up the start points in X and Y. * IF( LSAME( TRANS, 'N' ) ) THEN LENX = N LENY = M ELSE LENX = M LENY = N END IF IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( LENX - 1 )*INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( LENY - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, LENY Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, LENY Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, LENY Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, LENY Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) ) THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, M Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE ELSE DO 100, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, M Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) IY = IY + INCY 90 CONTINUE END IF JX = JX + INCX 100 CONTINUE END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * JY = KY IF( INCX.EQ.1 ) THEN DO 120, J = 1, N TEMP = ZERO DO 110, I = 1, M TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 110 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 120 CONTINUE ELSE DO 140, J = 1, N TEMP = ZERO IX = KX DO 130, I = 1, M TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) IX = IX + INCX 130 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 140 CONTINUE END IF END IF * RETURN * * End of SAGEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmcadd.f0000640000175000017500000001047310363532303017467 0ustar muammarmuammar SUBROUTINE CMMCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMCADD performs the following operation: * * B := alpha * conjg( A ) + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been conjugated and added to the * leading m by n part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CSCAL * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( I, J ) = CONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = CONJG( A( I, J ) ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( I, J ) = CONJG( A( I, J ) ) + B( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * CONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * CONJG( A( I, J ) ) + $ BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( I, J ) = ALPHA * CONJG( A( I, J ) ) + B( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of CMMCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/crshft.f0000640000175000017500000000617010363532303017367 0ustar muammarmuammar SUBROUTINE CRSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CRSHFT shifts rows of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A to be shifted. * M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A. N must be * at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the rows of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the rows * are shifted to the bottom. When OFFSET is negative, the rows * of A are shifted to the top. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). On exit, A * contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M+ABS(OFFSET) ). * * Further Details * =============== * * N N N N * --- --- --- --- * | 1 | | 1 | | 1 | | 7 | * | 2 | M = 3 | 2 | | 2 | M = 3 | 8 | * | 3 | | 3 | | 3 | | 9 | * | 4 | | 4 | | 4 | | 4 | * | 5 | becomes | 5 | | 5 | becomes | 5 | * | 6 | | 6 | | 6 | | 6 | * | 7 | | 1 | | 7 | | 7 | * | 8 | OFFSET = 6 | 2 | | 8 | OFFSET = -6 | 8 | * | 9 | | 3 | | 9 | | 9 | * --- --- --- --- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = 1, N DO 10 I = M, 1, -1 A( I+OFFSET, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I-OFFSET, J ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of CRSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmddac.f0000640000175000017500000001052310363532303017512 0ustar muammarmuammar SUBROUTINE ZMMDDAC( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMDDAC performs the following operation: * * A := alpha * A + beta * conjg( B ), * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been conjugated and added to the * leading m by n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = DCONJG( B( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = DCONJG( B( I, J ) ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = DCONJG( B( I, J ) ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * DCONJG( B( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * DCONJG( B( I, J ) ) + $ ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * DCONJG( B( I, J ) ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of ZMMDDAC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmddact.f0000640000175000017500000001477510363532303017713 0ustar muammarmuammar SUBROUTINE ZMMDDACT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMDDACT performs the following operation: * * A := alpha * A + beta * conjg( B' ), * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been conjugated and added to the * leading m by n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = DCONJG( B( J, I ) ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = DCONJG( B( J, I ) ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = DCONJG( B( J, I ) ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * DCONJG( B( J, I ) ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * DCONJG( B( J, I ) ) + $ ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * DCONJG( B( J, I ) ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N A( J, I ) = DCONJG( B( I, J ) ) 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = DCONJG( B( I, J ) ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N A( J, I ) = DCONJG( B( I, J ) ) + A( J, I ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * DCONJG( B( I, J ) ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * DCONJG( B( I, J ) ) + $ ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N A( J, I ) = BETA * DCONJG( B( I, J ) ) + A( J, I ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of ZMMDDACT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/scvasum.f0000640000175000017500000000271010363532303017553 0ustar muammarmuammar SUBROUTINE SCVASUM( N, ASUM, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ASUM * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * SCVASUM returns the sum of absolute values of the entries of a vector * x. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ASUM (output) COMPLEX * On exit, ASUM specifies the sum of absolute values. * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. REAL SCASUM EXTERNAL SCASUM * .. * .. Executable Statements .. * ASUM = SCASUM( N, X, INCX ) * RETURN * * End of SCVASUM * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dvasum.f0000640000175000017500000000272510363532303017377 0ustar muammarmuammar SUBROUTINE DVASUM( N, ASUM, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ASUM * .. * .. Array Arguments .. DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * DVASUM returns the sum of absolute values of the entries of a vector * x. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ASUM (output) DOUBLE PRECISION * On exit, ASUM specifies the sum of absolute values. * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION DASUM EXTERNAL DASUM * .. * .. Executable Statements .. * ASUM = DASUM( N, X, INCX ) * RETURN * * End of DVASUM * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ctzscal.f0000640000175000017500000001527410363532303017546 0ustar muammarmuammar SUBROUTINE CTZSCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CTZSCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL CSCAL, CTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ).OR.( ALPHA.EQ.ONE ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN CALL CTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 10 J = 1, MIN( MN, N ) CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) 10 CONTINUE DO 20 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD IF( M.GE.JTMP ) $ CALL CSCAL( M-JTMP+1, ALPHA, A( JTMP, J ), 1 ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 30 J = MAX( 0, -IOFFD ) + 1, MN CALL CSCAL( J + IOFFD, ALPHA, A( 1, J ), 1 ) 30 CONTINUE DO 40 J = MAX( 0, MN ) + 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) 40 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 50 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 50 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 60 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) 60 CONTINUE * END IF * RETURN * * End of CTZSCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmcadd.f0000640000175000017500000001042210363532303017501 0ustar muammarmuammar SUBROUTINE SMMCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMCADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) REAL array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of SMMCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/stzpad.f0000640000175000017500000002073010363532303017401 0ustar muammarmuammar SUBROUTINE STZPAD( UPLO, HERM, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 HERM, UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * STZPAD initializes a two-dimensional array A to beta on the diagonal * specified by IOFFD or zeros the imaginary part of those diagonals and * set the offdiagonals to alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'D' or 'd': diagonal specified by IOFFD is set; the * rest of the array A is unchanged, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * Otherwise: All of the array A is set. * * HERM (input) CHARACTER*1 * On entry, HERM specifies what should be done to the diagonals * as follows. When UPLO is 'L', 'l', 'D', 'd', 'U' or 'u' and * HERM is 'Z' or 'z', the imaginary part of the diagonals is * set to zero. Otherwise, the diagonals are set to beta. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha, i.e., the value * to which the offdiagonal entries of the array A determined by * UPLO and IOFFD are set. * * BETA (input) REAL * On entry, BETA specifies the scalar beta, i.e., the value to * which the diagonal entries specified by IOFFD of the array A * are set. BETA is not referenced when UPLO is 'L', 'l', 'U' or * 'u' and HERM is 'Z'. * * A (input/output) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be set as * specified by IOFFD, and the strictly lower trapezoidal part * of A is not referenced; When UPLO = 'L', the leading m by n * part of the array A must contain the lower trapezoidal part * of the matrix to be set as specified by IOFFD, and the * strictly upper trapezoidal part of A is not referenced. On * exit, the entries of the trapezoid part of A determined by * UPLO, HERM and IOFFD are set. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly lower triangular part of the * array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE * IF( LSAME( HERM, 'Z' ) ) THEN DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE ELSE DO 60 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 50 I = JTMP + 1, M A( I, J ) = ALPHA 50 CONTINUE 60 CONTINUE END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly upper triangular part of the * array to ALPHA. * MN = MIN( M - IOFFD, N ) IF( LSAME( HERM, 'Z' ) ) THEN DO 80 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 70 I = 1, JTMP - 1 A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE ELSE DO 100 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 90 I = 1, JTMP - 1 A( I, J ) = ALPHA 90 CONTINUE A( JTMP, J ) = BETA 100 CONTINUE END IF DO 120 J = MAX( 0, MN ) + 1, N DO 110 I = 1, M A( I, J ) = ALPHA 110 CONTINUE 120 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the diagonal to BETA * IF( .NOT.( LSAME( HERM, 'Z' ) ) ) THEN IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 130 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 130 CONTINUE END IF END IF * ELSE * * Set the diagonals to BETA and the offdiagonals to ALPHA. * DO 150 J = 1, N DO 140 I = 1, M A( I, J ) = ALPHA 140 CONTINUE 150 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 160 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 160 CONTINUE END IF * END IF * RETURN * * End of STZPAD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/stzscal.f0000640000175000017500000001520510363532303017560 0ustar muammarmuammar SUBROUTINE STZSCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N REAL ALPHA * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * STZSCAL scales a two-dimensional array A by the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL SSCAL, STZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ).OR.( ALPHA.EQ.ONE ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN CALL STZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 10 J = 1, MIN( MN, N ) CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) 10 CONTINUE DO 20 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD IF( M.GE.JTMP ) $ CALL SSCAL( M-JTMP+1, ALPHA, A( JTMP, J ), 1 ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 30 J = MAX( 0, -IOFFD ) + 1, MN CALL SSCAL( J + IOFFD, ALPHA, A( 1, J ), 1 ) 30 CONTINUE DO 40 J = MAX( 0, MN ) + 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) 40 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 50 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * A( JTMP, J ) 50 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 60 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) 60 CONTINUE * END IF * RETURN * * End of STZSCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/svvdot.f0000640000175000017500000000353310363532303017423 0ustar muammarmuammar SUBROUTINE SVVDOT( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N REAL DOT * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * SVVDOT computes the following dot product: * * dot = dot + x**T * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) REAL * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. REAL SDOT EXTERNAL SDOT * .. * .. Executable Statements .. * DOT = DOT + SDOT( N, X, INCX, Y, INCY ) * RETURN * * End of SVVDOT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmddat.f0000640000175000017500000001514610363532303017513 0ustar muammarmuammar SUBROUTINE DMMDDAT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMDDAT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL DCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL DAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL DAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of DMMDDAT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ctzpad.f0000640000175000017500000002205410363532303017362 0ustar muammarmuammar SUBROUTINE CTZPAD( UPLO, HERM, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 HERM, UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CTZPAD initializes a two-dimensional array A to beta on the diagonal * specified by IOFFD or zeros the imaginary part of those diagonals and * set the offdiagonals to alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'D' or 'd': diagonal specified by IOFFD is set; the * rest of the array A is unchanged, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * Otherwise: All of the array A is set. * * HERM (input) CHARACTER*1 * On entry, HERM specifies what should be done to the diagonals * as follows. When UPLO is 'L', 'l', 'D', 'd', 'U' or 'u' and * HERM is 'Z' or 'z', the imaginary part of the diagonals is * set to zero. Otherwise, the diagonals are set to beta. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha, i.e., the value * to which the offdiagonal entries of the array A determined by * UPLO and IOFFD are set. * * BETA (input) COMPLEX * On entry, BETA specifies the scalar beta, i.e., the value to * which the diagonal entries specified by IOFFD of the array A * are set. BETA is not referenced when UPLO is 'L', 'l', 'U' or * 'u' and HERM is 'Z'. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be set as * specified by IOFFD, and the strictly lower trapezoidal part * of A is not referenced; When UPLO = 'L', the leading m by n * part of the array A must contain the lower trapezoidal part * of the matrix to be set as specified by IOFFD, and the * strictly upper trapezoidal part of A is not referenced. On * exit, the entries of the trapezoid part of A determined by * UPLO, HERM and IOFFD are set. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL RZERO PARAMETER ( RZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly lower triangular part of the * array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE * IF( LSAME( HERM, 'Z' ) ) THEN DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CMPLX( REAL( A( JTMP, J ) ), RZERO ) DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE ELSE DO 60 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 50 I = JTMP + 1, M A( I, J ) = ALPHA 50 CONTINUE 60 CONTINUE END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly upper triangular part of the * array to ALPHA. * MN = MIN( M - IOFFD, N ) IF( LSAME( HERM, 'Z' ) ) THEN DO 80 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 70 I = 1, JTMP - 1 A( I, J ) = ALPHA 70 CONTINUE A( JTMP, J ) = CMPLX( REAL( A( JTMP, J ) ), RZERO ) 80 CONTINUE ELSE DO 100 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 90 I = 1, JTMP - 1 A( I, J ) = ALPHA 90 CONTINUE A( JTMP, J ) = BETA 100 CONTINUE END IF DO 120 J = MAX( 0, MN ) + 1, N DO 110 I = 1, M A( I, J ) = ALPHA 110 CONTINUE 120 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals. * IF( LSAME( HERM, 'Z' ) ) THEN IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 130 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CMPLX( REAL( A( JTMP, J ) ), RZERO ) 130 CONTINUE END IF ELSE IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 140 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 140 CONTINUE END IF END IF * ELSE * * Set the diagonals to BETA and the offdiagonals to ALPHA. * DO 160 J = 1, N DO 150 I = 1, M A( I, J ) = ALPHA 150 CONTINUE 160 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 170 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 170 CONTINUE END IF * END IF * RETURN * * End of CTZPAD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/satrmv.f0000640000175000017500000003014510363532303017411 0ustar muammarmuammar SUBROUTINE SATRMV( UPLO, TRANS, DIAG, N, ALPHA, A, LDA, X, INCX, $ BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER INCX, INCY, LDA, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * SATRMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n unit or non-unit, upper or lower triangular * matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the matrix A and * the strictly lower triangular part of A is not referenced. * When UPLO = 'L' or 'l', the leading n by n part of the array * A must contain the lower triangular part of the matrix A and * the strictly upper trapezoidal part of A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of A * are not referenced either, but are assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY LOGICAL NOUNIT REAL ABSX, TALPHA, TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO , 'U' ).AND. $ .NOT.LSAME( UPLO , 'L' ) )THEN INFO = 1 ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) )THEN INFO = 2 ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN INFO = 3 ELSE IF( N.LT.0 )THEN INFO = 4 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 7 ELSE IF( INCX.EQ.0 )THEN INFO = 9 ELSE IF( INCY.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'SATRMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * NOUNIT = LSAME( DIAG , 'N' ) * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, N Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, N Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, N Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) )THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, J - 1 Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 70 CONTINUE * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * ABS( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF END IF JX = JX + INCX 80 CONTINUE * ELSE * DO 100, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) IY = IY + INCY 90 CONTINUE * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * ABS( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF END IF JX = JX + INCX 100 CONTINUE * END IF * ELSE * JX = KX IF( INCY.EQ.1 ) THEN DO 120, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN * TEMP = TALPHA * ABSX * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * ABS( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF * DO 110, I = J + 1, N Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 110 CONTINUE END IF JX = JX + INCX 120 CONTINUE * ELSE * DO 140, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY + ( J - 1 ) * INCY * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * ABS( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF * DO 130, I = J + 1, N IY = IY + INCY Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) 130 CONTINUE END IF JX = JX + INCX 140 CONTINUE * END IF * END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JY = KY IF( INCX.EQ.1 ) THEN DO 160, J = 1, N * TEMP = ZERO * DO 150, I = 1, J - 1 TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 150 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + ABS( A( J, J ) * X( J ) ) ELSE TEMP = TEMP + ABS( X( J ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 160 CONTINUE * ELSE * DO 180, J = 1, N TEMP = ZERO IX = KX DO 170, I = 1, J - 1 TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) IX = IX + INCX 170 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + ABS( A( J, J ) * X( IX ) ) ELSE TEMP = TEMP + ABS( X( IX ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 180 CONTINUE * END IF * ELSE * JY = KY IF( INCX.EQ.1 ) THEN * DO 200, J = 1, N * IF( NOUNIT ) THEN TEMP = ABS( A( J, J ) * X( J ) ) ELSE TEMP = ABS( X( J ) ) END IF * DO 190, I = J + 1, N TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 190 CONTINUE * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 200 CONTINUE * ELSE * DO 220, J = 1, N * IX = KX + ( J - 1 ) * INCX * IF( NOUNIT ) THEN TEMP = ABS( A( J, J ) * X( IX ) ) ELSE TEMP = ABS( X( IX ) ) END IF * DO 210, I = J + 1, N IX = IX + INCX TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) 210 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 220 CONTINUE END IF END IF * END IF * RETURN * * End of SATRMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ccshft.f0000640000175000017500000000553110363532303017350 0ustar muammarmuammar SUBROUTINE CCSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CCSHFT shifts columns of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A. M must be at * least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A to be * shifted. N must be at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the columns of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the co- * lumns are shifted to the right. When OFFSET is negative, the * columns of A are shifted to the left. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N+ABS(OFFSET) ). * On exit, A contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Further Details * =============== * * N=3 OFFSET=6 -OFFSET=6 N=3 * ------------------- ------------------- * | 1 2 3 4 5 6 7 8 9 | M | 1 2 3 4 5 6 7 8 9 | * ------------------- ------------------- * V V * ------------------- ------------------- * | 1 2 3 4 5 6 1 2 3 | M | 7 8 9 4 5 6 7 8 9 | * ------------------- ------------------- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = N, 1, -1 DO 10 I = 1, M A( I, J+OFFSET ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I, J-OFFSET ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of CCSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dtzpad.f0000640000175000017500000002077410363532303017372 0ustar muammarmuammar SUBROUTINE DTZPAD( UPLO, HERM, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 HERM, UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * DTZPAD initializes a two-dimensional array A to beta on the diagonal * specified by IOFFD or zeros the imaginary part of those diagonals and * set the offdiagonals to alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'D' or 'd': diagonal specified by IOFFD is set; the * rest of the array A is unchanged, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * Otherwise: All of the array A is set. * * HERM (input) CHARACTER*1 * On entry, HERM specifies what should be done to the diagonals * as follows. When UPLO is 'L', 'l', 'D', 'd', 'U' or 'u' and * HERM is 'Z' or 'z', the imaginary part of the diagonals is * set to zero. Otherwise, the diagonals are set to beta. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha, i.e., the value * to which the offdiagonal entries of the array A determined by * UPLO and IOFFD are set. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta, i.e., the value to * which the diagonal entries specified by IOFFD of the array A * are set. BETA is not referenced when UPLO is 'L', 'l', 'U' or * 'u' and HERM is 'Z'. * * A (input/output) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be set as * specified by IOFFD, and the strictly lower trapezoidal part * of A is not referenced; When UPLO = 'L', the leading m by n * part of the array A must contain the lower trapezoidal part * of the matrix to be set as specified by IOFFD, and the * strictly upper trapezoidal part of A is not referenced. On * exit, the entries of the trapezoid part of A determined by * UPLO, HERM and IOFFD are set. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly lower triangular part of the * array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE * IF( LSAME( HERM, 'Z' ) ) THEN DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE ELSE DO 60 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 50 I = JTMP + 1, M A( I, J ) = ALPHA 50 CONTINUE 60 CONTINUE END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly upper triangular part of the * array to ALPHA. * MN = MIN( M - IOFFD, N ) IF( LSAME( HERM, 'Z' ) ) THEN DO 80 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 70 I = 1, JTMP - 1 A( I, J ) = ALPHA 70 CONTINUE 80 CONTINUE ELSE DO 100 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 90 I = 1, JTMP - 1 A( I, J ) = ALPHA 90 CONTINUE A( JTMP, J ) = BETA 100 CONTINUE END IF DO 120 J = MAX( 0, MN ) + 1, N DO 110 I = 1, M A( I, J ) = ALPHA 110 CONTINUE 120 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the diagonal to BETA * IF( .NOT.( LSAME( HERM, 'Z' ) ) ) THEN IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 130 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 130 CONTINUE END IF END IF * ELSE * * Set the diagonals to BETA and the offdiagonals to ALPHA. * DO 150 J = 1, N DO 140 I = 1, M A( I, J ) = ALPHA 140 CONTINUE 150 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 160 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 160 CONTINUE END IF * END IF * RETURN * * End of DTZPAD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ztzcnjg.f0000640000175000017500000002102110363532303017557 0ustar muammarmuammar SUBROUTINE ZTZCNJG( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZTZCNJG conjugates a two-dimensional array A and then scales it by * the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be conjugated and scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are conjugated and scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL ZTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN * CALL ZTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) * ELSE IF( ALPHA.EQ.ONE ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = DCONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE * DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = DCONJG( A( I, J ) ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = DCONJG( A( I, J ) ) 50 CONTINUE 60 CONTINUE * DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = DCONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCONJG( A( JTMP, J ) ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = DCONJG( A( I, J ) ) 100 CONTINUE 110 CONTINUE * END IF * ELSE * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 130 J = 1, MIN( MN, N ) DO 120 I = 1, M A( I, J ) = ALPHA * DCONJG( A( I, J ) ) 120 CONTINUE 130 CONTINUE * DO 150 J = MN + 1, MIN( M - IOFFD, N ) DO 140 I = J + IOFFD, M A( I, J ) = ALPHA * DCONJG( A( I, J ) ) 140 CONTINUE 150 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 170 J = MAX( 0, -IOFFD ) + 1, MN DO 160 I = 1, J + IOFFD A( I, J ) = ALPHA * DCONJG( A( I, J ) ) 160 CONTINUE 170 CONTINUE * DO 190 J = MAX( 0, MN ) + 1, N DO 180 I = 1, M A( I, J ) = ALPHA * DCONJG( A( I, J ) ) 180 CONTINUE 190 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 200 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * DCONJG( A( JTMP, J ) ) 200 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 220 J = 1, N DO 210 I = 1, M A( I, J ) = ALPHA * DCONJG( A( I, J ) ) 210 CONTINUE 220 CONTINUE * END IF * END IF * RETURN * * End of ZTZCNJG * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/immddat.f0000640000175000017500000001373510363532303017522 0ustar muammarmuammar SUBROUTINE IMMDDAT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N INTEGER ALPHA, BETA * .. * .. Array Arguments .. INTEGER A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * IMMDDAT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) INTEGER * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) INTEGER array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) INTEGER * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) INTEGER array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER ONE, ZERO PARAMETER ( ONE = 1, ZERO = 0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = B( J, I ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = B( J, I ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * B( J, I ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M A( I, J ) = ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N A( J, I ) = B( I, J ) 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N A( J, I ) = B( I, J ) + A( J, I ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N A( J, I ) = BETA * B( I, J ) + A( J, I ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N DO 310 I = 1, M A( I, J ) = ALPHA * A( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of IMMDDAT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmtadd.f0000640000175000017500000001515610363532303017513 0ustar muammarmuammar SUBROUTINE CMMTADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMTADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL CCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL CAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL CAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL CSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL CCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL CAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL CAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL CSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of CMMTADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmtcadd.f0000640000175000017500000001473110363532303017654 0ustar muammarmuammar SUBROUTINE CMMTCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMTCADD performs the following operation: * * B := alpha * conjg( A' ) + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been conjugated and added to the * leading n by m part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CSCAL * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( J, I ) = CONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = CONJG( A( I, J ) ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = CONJG( A( I, J ) ) + B( J, I ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * CONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * CONJG( A( I, J ) ) + $ BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( J, I ) = ALPHA * CONJG( A( I, J ) ) + B( J, I ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL CSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N B( I, J ) = CONJG( A( J, I ) ) 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = CONJG( A( J, I ) ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N B( I, J ) = CONJG( A( J, I ) ) + B( I, J ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * CONJG( A( J, I ) ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * CONJG( A( J, I ) ) + $ BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N B( I, J ) = ALPHA * CONJG( A( J, I ) ) + B( I, J ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL CSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of CMMTCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cset.f0000640000175000017500000000517310363532303017036 0ustar muammarmuammar SUBROUTINE CSET( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * CSET sets the entries of an n vector x to the scalar alpha. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (input/output) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are set to alpha. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CSET', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := alpha * IF( INCX.EQ.1 ) $ GO TO 20 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * DO 10 I = 1, N X( IX ) = ALPHA IX = IX + INCX 10 CONTINUE * RETURN * * code for increment equal to 1 * * clean-up loop * 20 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 40 * DO 30 I = 1, M X( I ) = ALPHA 30 CONTINUE IF( N.LT.4 ) $ RETURN * 40 MP1 = M + 1 DO 50 I = MP1, N, 4 X( I ) = ALPHA X( I + 1 ) = ALPHA X( I + 2 ) = ALPHA X( I + 3 ) = ALPHA 50 CONTINUE * RETURN * * End of CSET * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/sset.f0000640000175000017500000000516510363532303017057 0ustar muammarmuammar SUBROUTINE SSET( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ALPHA * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * SSET sets the entries of an n vector x to the scalar alpha. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha. * * X (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are set to alpha. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SSET', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := alpha * IF( INCX.EQ.1 ) $ GO TO 20 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * DO 10 I = 1, N X( IX ) = ALPHA IX = IX + INCX 10 CONTINUE * RETURN * * code for increment equal to 1 * * clean-up loop * 20 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 40 * DO 30 I = 1, M X( I ) = ALPHA 30 CONTINUE IF( N.LT.4 ) $ RETURN * 40 MP1 = M + 1 DO 50 I = MP1, N, 4 X( I ) = ALPHA X( I + 1 ) = ALPHA X( I + 2 ) = ALPHA X( I + 3 ) = ALPHA 50 CONTINUE * RETURN * * End of SSET * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmddac.f0000640000175000017500000001050610363532303017465 0ustar muammarmuammar SUBROUTINE DMMDDAC( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMDDAC performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of DMMDDAC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmcadd.f0000640000175000017500000001050210363532303017461 0ustar muammarmuammar SUBROUTINE DMMCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMCADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of DMMCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmtcadd.f0000640000175000017500000001514110363532303017651 0ustar muammarmuammar SUBROUTINE DMMTCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMTCADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL DSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL DCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL DAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL DAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL DSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of DMMTCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dtzpadcpy.f0000640000175000017500000001774710363532303020114 0ustar muammarmuammar SUBROUTINE DTZPADCPY( UPLO, DIAG, M, N, IOFFD, A, LDA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, UPLO INTEGER IOFFD, LDA, LDB, M, N * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DTZPADCPY copies an array A into an array B. The unchanged part of B * is padded with zeros. The diagonal of B specified by IOFFD may be set * to ones. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be copied as follows: * = 'L' or 'l': Lower triangular part is copied; the * strictly upper triangular part of B is * padded with zeros, * = 'U' or 'u': Upper triangular part is copied; the * strictly lower triangular part of B is * padded with zeros. * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not the diagonal of B is * to be set to ones or not as follows: * * DIAG = 'N' or 'n': the diagonals of A are copied into the * diagonals of B, otherwise the diagonals of B are set to ones. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * A (input) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be copied * as specified by IOFFD, UPLO and DIAG, and the strictly lower * trapezoidal part of A is not referenced; When UPLO = 'L',the * leading m by n part of the array A must contain the lower * trapezoidal part of the matrix to be copied as specified by * IOFFD, UPLO and DIAG and the strictly upper trapezoidal part * of A is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * B (output) DOUBLE PRECISION array * On entry, B is an array of dimension (LDB,N). On exit, this * array contains the padded copy of A as specified by IOFFD, * UPLO and DIAG. * * LDB (input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, ITMP, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE * JTMP = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 50 J = MN + 1, JTMP ITMP = J + IOFFD DO 30 I = 1, ITMP - 1 B( I, J ) = ZERO 30 CONTINUE DO 40 I = ITMP, M B( I, J ) = A( I, J ) 40 CONTINUE 50 CONTINUE ELSE DO 80 J = MN + 1, JTMP ITMP = J + IOFFD DO 60 I = 1, ITMP - 1 B( I, J ) = ZERO 60 CONTINUE B( ITMP, J ) = ONE DO 70 I = ITMP + 1, M B( I, J ) = A( I, J ) 70 CONTINUE 80 CONTINUE END IF * DO 100 J = JTMP + 1, N DO 90 I = 1, M B( I, J ) = ZERO 90 CONTINUE 100 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * JTMP = MAX( 0, -IOFFD ) * DO 120 J = 1, JTMP DO 110 I = 1, M B( I, J ) = ZERO 110 CONTINUE 120 CONTINUE * MN = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 150 J = JTMP + 1, MN ITMP = J + IOFFD DO 130 I = 1, ITMP B( I, J ) = A( I, J ) 130 CONTINUE DO 140 I = ITMP + 1, M B( I, J ) = ZERO 140 CONTINUE 150 CONTINUE ELSE DO 180 J = JTMP + 1, MN ITMP = J + IOFFD DO 160 I = 1, ITMP - 1 B( I, J ) = A( I, J ) 160 CONTINUE B( ITMP, J ) = ONE DO 170 I = ITMP + 1, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE END IF * DO 200 J = MAX( 0, MN ) + 1, N DO 190 I = 1, M B( I, J ) = A( I, J ) 190 CONTINUE 200 CONTINUE * ELSE * DO 220 J = 1, N DO 210 I = 1, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE * END IF * RETURN * * End of DTZPADCPY * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmddac.f0000640000175000017500000001042610363532303017505 0ustar muammarmuammar SUBROUTINE SMMDDAC( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMDDAC performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) REAL array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL SSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of SMMDDAC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dmmadd.f0000640000175000017500000001047710363532303017331 0ustar muammarmuammar SUBROUTINE DMMADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DMMADD performs the following operation: * * B := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been added to the leading m by n * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DSCAL * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL DCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) * DO 10 I = 1, M * B( I, J ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL DAXPY( M, ONE, A( 1, J ), 1, B( 1, J ), 1 ) * DO 50 I = 1, M * B( I, J ) = A( I, J ) + B( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL DAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) * DO 110 I = 1, M * B( I, J ) = ALPHA * A( I, J ) + B( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL DSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of DMMADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/srshft.f0000640000175000017500000000616510363532303017413 0ustar muammarmuammar SUBROUTINE SRSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * SRSHFT shifts rows of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A to be shifted. * M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A. N must be * at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the rows of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the rows * are shifted to the bottom. When OFFSET is negative, the rows * of A are shifted to the top. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N ). On exit, A * contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M+ABS(OFFSET) ). * * Further Details * =============== * * N N N N * --- --- --- --- * | 1 | | 1 | | 1 | | 7 | * | 2 | M = 3 | 2 | | 2 | M = 3 | 8 | * | 3 | | 3 | | 3 | | 9 | * | 4 | | 4 | | 4 | | 4 | * | 5 | becomes | 5 | | 5 | becomes | 5 | * | 6 | | 6 | | 6 | | 6 | * | 7 | | 1 | | 7 | | 7 | * | 8 | OFFSET = 6 | 2 | | 8 | OFFSET = -6 | 8 | * | 9 | | 3 | | 9 | | 9 | * --- --- --- --- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = 1, N DO 10 I = M, 1, -1 A( I+OFFSET, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I-OFFSET, J ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of SRSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zsyr.f0000640000175000017500000001303610363532303017104 0ustar muammarmuammar SUBROUTINE ZSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, LDA, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), X( * ) * .. * * Purpose * ======= * * ZSYR performs the symmetric rank 1 operation * * A := alpha*x*x' + A, * * where alpha is a complex scalar, x is an n element vector and A is an * n by n SY matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. On exit, the upper triangular part of the array A is * overwritten by the upper triangular part of the updated ma- * trix. When UPLO = 'L' or 'l', the leading n by n part of the * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. On exit, the lower triangular part of the * array A is overwritten by the lower triangular part of the * updated matrix. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, J, JX, KX COMPLEX*16 TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( INCX.EQ.0 ) THEN INFO = 5 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = 7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZSYR', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) $ RETURN * * Set the start point in X if the increment is not unity. * IF( INCX.LE.0 ) THEN KX = 1 - ( N-1 )*INCX ELSE IF( INCX.NE.1 ) THEN KX = 1 END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * IF( LSAME( UPLO, 'U' ) ) THEN * * Form A when A is stored in upper triangle. * IF( INCX.EQ.1 ) THEN DO 20 J = 1, N IF( X( J ).NE.ZERO ) THEN TEMP = ALPHA*X( J ) DO 10 I = 1, J A( I, J ) = A( I, J ) + X( I )*TEMP 10 CONTINUE END IF 20 CONTINUE ELSE JX = KX DO 40 J = 1, N IF( X( JX ).NE.ZERO ) THEN TEMP = ALPHA*X( JX ) IX = KX DO 30 I = 1, J A( I, J ) = A( I, J ) + X( IX )*TEMP IX = IX + INCX 30 CONTINUE END IF JX = JX + INCX 40 CONTINUE END IF ELSE * * Form A when A is stored in lower triangle. * IF( INCX.EQ.1 ) THEN DO 60 J = 1, N IF( X( J ).NE.ZERO ) THEN TEMP = ALPHA*X( J ) DO 50 I = J, N A( I, J ) = A( I, J ) + X( I )*TEMP 50 CONTINUE END IF 60 CONTINUE ELSE JX = KX DO 80 J = 1, N IF( X( JX ).NE.ZERO ) THEN TEMP = ALPHA*X( JX ) IX = JX DO 70 I = J, N A( I, J ) = A( I, J ) + X( IX )*TEMP IX = IX + INCX 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE END IF END IF * RETURN * * End of ZSYR * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cvvdotc.f0000640000175000017500000000355210363532303017547 0ustar muammarmuammar SUBROUTINE CVVDOTC( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX DOT * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * CVVDOTC computes the following dot product: * * dot = dot + x**H * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) COMPLEX * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. COMPLEX CDOTC EXTERNAL CDOTC * .. * .. Executable Statements .. * DOT = DOT + CDOTC( N, X, INCX, Y, INCY ) * RETURN * * End of CVVDOTC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/immdda.f0000640000175000017500000000771610363532303017340 0ustar muammarmuammar SUBROUTINE IMMDDA( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N INTEGER ALPHA, BETA * .. * .. Array Arguments .. INTEGER A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * IMMDDA performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) INTEGER * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) INTEGER array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) INTEGER * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) INTEGER array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER ONE, ZERO PARAMETER ( ONE = 1, ZERO = 0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = B( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = B( I, J ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * B( I, J ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M A( I, J ) = ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of IMMDDA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/csyr2.f0000640000175000017500000001571410363532303017144 0ustar muammarmuammar SUBROUTINE CSYR2( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * CSYR2 performs the symmetric rank 2 operation * * A := alpha*x*y' + alpha*y*x' + A, * * where alpha is a complex scalar, x and y are n element vectors and A * is an n by n SY matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. On exit, the upper triangular part of the array A is * overwritten by the upper triangular part of the updated ma- * trix. When UPLO = 'L' or 'l', the leading n by n part of the * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. On exit, the lower triangular part of the * array A is overwritten by the lower triangular part of the * updated matrix. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY COMPLEX TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( INCX.EQ.0 )THEN INFO = 5 ELSE IF( INCY.EQ.0 )THEN INFO = 7 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 9 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'CSYR2', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) $ RETURN * * Set up the start points in X and Y if the increments are not both * unity. * IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN IF( INCX.GT.0 )THEN KX = 1 ELSE KX = 1 - ( N - 1 )*INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 )*INCY END IF JX = KX JY = KY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * IF( LSAME( UPLO, 'U' ) )THEN * * Form A when A is stored in the upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 20, J = 1, N IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( J ) TEMP2 = ALPHA*X( J ) DO 10, I = 1, J A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 10 CONTINUE END IF 20 CONTINUE ELSE DO 40, J = 1, N IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( JY ) TEMP2 = ALPHA*X( JX ) IX = KX IY = KY DO 30, I = 1, J A( I, J ) = A( I, J ) + X( IX )*TEMP1 $ + Y( IY )*TEMP2 IX = IX + INCX IY = IY + INCY 30 CONTINUE END IF JX = JX + INCX JY = JY + INCY 40 CONTINUE END IF ELSE * * Form A when A is stored in the lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 60, J = 1, N IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( J ) TEMP2 = ALPHA*X( J ) DO 50, I = J, N A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 50 CONTINUE END IF 60 CONTINUE ELSE DO 80, J = 1, N IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN TEMP1 = ALPHA*Y( JY ) TEMP2 = ALPHA*X( JX ) IX = JX IY = JY DO 70, I = J, N A( I, J ) = A( I, J ) + X( IX )*TEMP1 $ + Y( IY )*TEMP2 IX = IX + INCX IY = IY + INCY 70 CONTINUE END IF JX = JX + INCX JY = JY + INCY 80 CONTINUE END IF END IF * RETURN * * End of CSYR2 * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmtcadd.f0000640000175000017500000001476210363532303017707 0ustar muammarmuammar SUBROUTINE ZMMTCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMTCADD performs the following operation: * * B := alpha * conjg( A' ) + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been conjugated and added to the * leading n by m part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( J, I ) = DCONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = DCONJG( A( I, J ) ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = DCONJG( A( I, J ) ) + B( J, I ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * DCONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * DCONJG( A( I, J ) ) + $ BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( J, I ) = ALPHA * DCONJG( A( I, J ) ) + B( J, I ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL ZSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N B( I, J ) = DCONJG( A( J, I ) ) 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = DCONJG( A( J, I ) ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N B( I, J ) = DCONJG( A( J, I ) ) + B( I, J ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * DCONJG( A( J, I ) ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * DCONJG( A( J, I ) ) + $ BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N B( I, J ) = ALPHA * DCONJG( A( J, I ) ) + B( I, J ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL ZSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of ZMMTCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zhescal.f0000640000175000017500000001667710363532303017544 0ustar muammarmuammar SUBROUTINE ZHESCAL( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZHESCAL scales a two-dimensional array A by the real scalar alpha. * The diagonal entries specified by IOFFD of A are supposed to be real. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the Hermitian * matrix to be scaled as specified by IOFFD, and the strictly * lower trapezoidal part of A is not referenced. When UPLO is * 'L' or 'l', the leading m by n part of the array A must con- * tain the lower trapezoidal part of the Hermitian matrix to be * scaled as specified by IOFFD, and the strictly upper trape- * zoidal part of A is not referenced. On exit, the entries of * the trapezoid part of A determined by UPLO and IOFFD are sca- * led. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RONE, RZERO PARAMETER ( RONE = 1.0D+0, RZERO = 0.0D+0 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL ZDSCAL, ZTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( ALPHA.EQ.RONE ) THEN * * Zeros the imaginary part of the diagonals * IF( LSAME( UPLO, 'L' ).OR.LSAME( UPLO, 'U' ).OR. $ LSAME( UPLO, 'D' ) ) THEN DO 10 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCMPLX( DBLE( A( JTMP, J ) ), RZERO ) 10 CONTINUE END IF RETURN ELSE IF( ALPHA.EQ.RZERO ) THEN CALL ZTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) RETURN END IF * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) CALL ZDSCAL( M, ALPHA, A( 1, J ), 1 ) 20 CONTINUE DO 30 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCMPLX( ALPHA * DBLE( A( JTMP, J ) ), RZERO ) IF( M.GT.JTMP ) $ CALL ZDSCAL( M-JTMP, ALPHA, A( JTMP + 1, J ), 1 ) 30 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 40 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD CALL ZDSCAL( JTMP - 1, ALPHA, A( 1, J ), 1 ) A( JTMP, J ) = DCMPLX( ALPHA * DBLE( A( JTMP, J ) ), RZERO ) 40 CONTINUE DO 50 J = MAX( 0, MN ) + 1, N CALL ZDSCAL( M, ALPHA, A( 1, J ), 1 ) 50 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 60 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCMPLX( ALPHA * DBLE( A( JTMP, J ) ), RZERO ) 60 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 70 J = 1, N CALL ZDSCAL( M, ALPHA, A( 1, J ), 1 ) 70 CONTINUE * END IF * RETURN * * End of ZHESCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/scshft.f0000640000175000017500000000552610363532303017374 0ustar muammarmuammar SUBROUTINE SCSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * SCSHFT shifts columns of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A. M must be at * least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A to be * shifted. N must be at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the columns of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the co- * lumns are shifted to the right. When OFFSET is negative, the * columns of A are shifted to the left. * * A (local input/local output) REAL array * On entry, A is an array of dimension ( LDA, N+ABS(OFFSET) ). * On exit, A contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Further Details * =============== * * N=3 OFFSET=6 -OFFSET=6 N=3 * ------------------- ------------------- * | 1 2 3 4 5 6 7 8 9 | M | 1 2 3 4 5 6 7 8 9 | * ------------------- ------------------- * V V * ------------------- ------------------- * | 1 2 3 4 5 6 1 2 3 | M | 7 8 9 4 5 6 7 8 9 | * ------------------- ------------------- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = N, 1, -1 DO 10 I = 1, M A( I, J+OFFSET ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I, J-OFFSET ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of SCSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ztzpad.f0000640000175000017500000002207110363532303017410 0ustar muammarmuammar SUBROUTINE ZTZPAD( UPLO, HERM, M, N, IOFFD, ALPHA, BETA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 HERM, UPLO INTEGER IOFFD, LDA, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZTZPAD initializes a two-dimensional array A to beta on the diagonal * specified by IOFFD or zeros the imaginary part of those diagonals and * set the offdiagonals to alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be set as follows: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of A is not changed, * = 'D' or 'd': diagonal specified by IOFFD is set; the * rest of the array A is unchanged, * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of A is not changed, * Otherwise: All of the array A is set. * * HERM (input) CHARACTER*1 * On entry, HERM specifies what should be done to the diagonals * as follows. When UPLO is 'L', 'l', 'D', 'd', 'U' or 'u' and * HERM is 'Z' or 'z', the imaginary part of the diagonals is * set to zero. Otherwise, the diagonals are set to beta. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha, i.e., the value * to which the offdiagonal entries of the array A determined by * UPLO and IOFFD are set. * * BETA (input) COMPLEX*16 * On entry, BETA specifies the scalar beta, i.e., the value to * which the diagonal entries specified by IOFFD of the array A * are set. BETA is not referenced when UPLO is 'L', 'l', 'U' or * 'u' and HERM is 'Z'. * * A (input/output) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be set as * specified by IOFFD, and the strictly lower trapezoidal part * of A is not referenced; When UPLO = 'L', the leading m by n * part of the array A must contain the lower trapezoidal part * of the matrix to be set as specified by IOFFD, and the * strictly upper trapezoidal part of A is not referenced. On * exit, the entries of the trapezoid part of A determined by * UPLO, HERM and IOFFD are set. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly lower triangular part of the * array to ALPHA. * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = ALPHA 10 CONTINUE 20 CONTINUE * IF( LSAME( HERM, 'Z' ) ) THEN DO 40 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCMPLX( DBLE( A( JTMP, J ) ), RZERO ) DO 30 I = JTMP + 1, M A( I, J ) = ALPHA 30 CONTINUE 40 CONTINUE ELSE DO 60 J = MN + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = BETA DO 50 I = JTMP + 1, M A( I, J ) = ALPHA 50 CONTINUE 60 CONTINUE END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals and set the strictly upper triangular part of the * array to ALPHA. * MN = MIN( M - IOFFD, N ) IF( LSAME( HERM, 'Z' ) ) THEN DO 80 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 70 I = 1, JTMP - 1 A( I, J ) = ALPHA 70 CONTINUE A( JTMP, J ) = DCMPLX( DBLE( A( JTMP, J ) ), RZERO ) 80 CONTINUE ELSE DO 100 J = MAX( 0, -IOFFD ) + 1, MN JTMP = J + IOFFD DO 90 I = 1, JTMP - 1 A( I, J ) = ALPHA 90 CONTINUE A( JTMP, J ) = BETA 100 CONTINUE END IF DO 120 J = MAX( 0, MN ) + 1, N DO 110 I = 1, M A( I, J ) = ALPHA 110 CONTINUE 120 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Set the diagonal to BETA or zero the imaginary part of the * diagonals. * IF( LSAME( HERM, 'Z' ) ) THEN IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 130 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = DCMPLX( DBLE( A( JTMP, J ) ), RZERO ) 130 CONTINUE END IF ELSE IF( ( IOFFD.LT.M ).AND.( IOFFD.GT.-N ) ) THEN DO 140 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 140 CONTINUE END IF END IF * ELSE * * Set the diagonals to BETA and the offdiagonals to ALPHA. * DO 160 J = 1, N DO 150 I = 1, M A( I, J ) = ALPHA 150 CONTINUE 160 CONTINUE IF( ALPHA.NE.BETA .AND. IOFFD.LT.M .AND. IOFFD.GT.-N ) THEN DO 170 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) A( J + IOFFD, J ) = BETA 170 CONTINUE END IF * END IF * RETURN * * End of ZTZPAD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dzvasum.f0000640000175000017500000000271610363532303017571 0ustar muammarmuammar SUBROUTINE DZVASUM( N, ASUM, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ASUM * .. * .. Array Arguments .. COMPLEX*16 X( * ) * .. * * Purpose * ======= * * DZVASUM returns the sum of absolute values of the entries of a vector * x. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ASUM (output) COMPLEX*16 * On exit, ASUM specifies the sum of absolute values. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION DZASUM EXTERNAL DZASUM * .. * .. Executable Statements .. * ASUM = DZASUM( N, X, INCX ) * RETURN * * End of DZVASUM * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmtadd.f0000640000175000017500000001505610363532303017532 0ustar muammarmuammar SUBROUTINE SMMTADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMTADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) REAL array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL SSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL SCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL SAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL SAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL SSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of SMMTADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ztzpadcpy.f0000640000175000017500000002001410363532303020117 0ustar muammarmuammar SUBROUTINE ZTZPADCPY( UPLO, DIAG, M, N, IOFFD, A, LDA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, UPLO INTEGER IOFFD, LDA, LDB, M, N * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZTZPADCPY copies an array A into an array B. The unchanged part of B * is padded with zeros. The diagonal of B specified by IOFFD may be set * to ones. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be copied as follows: * = 'L' or 'l': Lower triangular part is copied; the * strictly upper triangular part of B is * padded with zeros, * = 'U' or 'u': Upper triangular part is copied; the * strictly lower triangular part of B is * padded with zeros. * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not the diagonal of B is * to be set to ones or not as follows: * * DIAG = 'N' or 'n': the diagonals of A are copied into the * diagonals of B, otherwise the diagonals of B are set to ones. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * A (input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U', the leading m by n part of the array A must * contain the upper trapezoidal part of the matrix to be copied * as specified by IOFFD, UPLO and DIAG, and the strictly lower * trapezoidal part of A is not referenced; When UPLO = 'L',the * leading m by n part of the array A must contain the lower * trapezoidal part of the matrix to be copied as specified by * IOFFD, UPLO and DIAG and the strictly upper trapezoidal part * of A is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * B (output) COMPLEX*16 array * On entry, B is an array of dimension (LDB,N). On exit, this * array contains the padded copy of A as specified by IOFFD, * UPLO and DIAG. * * LDB (input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, ITMP, J, JTMP, MN * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Start the operations * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE * JTMP = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 50 J = MN + 1, JTMP ITMP = J + IOFFD DO 30 I = 1, ITMP - 1 B( I, J ) = ZERO 30 CONTINUE DO 40 I = ITMP, M B( I, J ) = A( I, J ) 40 CONTINUE 50 CONTINUE ELSE DO 80 J = MN + 1, JTMP ITMP = J + IOFFD DO 60 I = 1, ITMP - 1 B( I, J ) = ZERO 60 CONTINUE B( ITMP, J ) = ONE DO 70 I = ITMP + 1, M B( I, J ) = A( I, J ) 70 CONTINUE 80 CONTINUE END IF * DO 100 J = JTMP + 1, N DO 90 I = 1, M B( I, J ) = ZERO 90 CONTINUE 100 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * JTMP = MAX( 0, -IOFFD ) * DO 120 J = 1, JTMP DO 110 I = 1, M B( I, J ) = ZERO 110 CONTINUE 120 CONTINUE * MN = MIN( M - IOFFD, N ) * IF( LSAME( DIAG, 'N' ) ) THEN DO 150 J = JTMP + 1, MN ITMP = J + IOFFD DO 130 I = 1, ITMP B( I, J ) = A( I, J ) 130 CONTINUE DO 140 I = ITMP + 1, M B( I, J ) = ZERO 140 CONTINUE 150 CONTINUE ELSE DO 180 J = JTMP + 1, MN ITMP = J + IOFFD DO 160 I = 1, ITMP - 1 B( I, J ) = A( I, J ) 160 CONTINUE B( ITMP, J ) = ONE DO 170 I = ITMP + 1, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE END IF * DO 200 J = MAX( 0, MN ) + 1, N DO 190 I = 1, M B( I, J ) = A( I, J ) 190 CONTINUE 200 CONTINUE * ELSE * DO 220 J = 1, N DO 210 I = 1, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE * END IF * RETURN * * End of ZTZPADCPY * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/pxerbla.f0000640000175000017500000000364110363532303017533 0ustar muammarmuammar SUBROUTINE PXERBLA( ICTXT, SRNAME, INFO ) * * -- ScaLAPACK auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT, INFO * .. * .. Array Arguments .. CHARACTER*(*) SRNAME * .. * * Purpose * ======= * * PXERBLA is an error handler for the ScaLAPACK routines. It is called * by a ScaLAPACK routine if an input parameter has an invalid value. A * message is printed. Installers may consider modifying this routine in * order to call system-specific exception-handling facilities. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * SRNAME (global input) CHARACTER*(*) * On entry, SRNAME specifies the name of the routine which cal- * ling PXERBLA. * * INFO (global input) INTEGER * On entry, INFO specifies the position of the invalid parame- * ter in the parameter list of the calling routine. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * WRITE( *, FMT = 9999 ) MYROW, MYCOL, SRNAME, INFO * 9999 FORMAT( '{', I5, ',', I5, '}: On entry to ', A, $ ' parameter number ', I4, ' had an illegal value' ) * RETURN * * End of PXERBLA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/ctzcnjg.f0000640000175000017500000002077610363532303017550 0ustar muammarmuammar SUBROUTINE CTZCNJG( UPLO, M, N, IOFFD, ALPHA, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER IOFFD, LDA, M, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * CTZCNJG conjugates a two-dimensional array A and then scales it by * the scalar alpha. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which trapezoidal part of the ar- * ray A is to be conjugated and scaled as follows: * = 'L' or 'l': the lower trapezoid of A is scaled, * = 'U' or 'u': the upper trapezoid of A is scaled, * = 'D' or 'd': diagonal specified by IOFFD is scaled, * Otherwise: all of the array A is scaled. * * M (input) INTEGER * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha, i.e., the value * by which the diagonal and offdiagonal entries of the array A * as specified by UPLO and IOFFD are scaled. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading m by n part of the array * A must contain the upper trapezoidal part of the matrix as * specified by IOFFD to be scaled, and the strictly lower tra- * pezoidal part of A is not referenced; When UPLO = 'L' or 'l', * the leading m by n part of the array A must contain the lower * trapezoidal part of the matrix as specified by IOFFD to be * scaled, and the strictly upper trapezoidal part of A is not * referenced. On exit, the entries of the trapezoid part of A * determined by UPLO and IOFFD are conjugated and scaled. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d 'U' | | 'U' | * | 'L' 'D' | |d | * | d | M | d | * ---------------------------- | 'D' | * | d | * IOFFD < 0 | 'L' d | * | d| * N | | * ----------- ----------- * | d 'U'| * | d | IOFFD > 0 * M | 'D' | * | d| N * | 'L' | ---------------------------- * | | | 'U' | * | | |d | * | | | 'D' | * | | | d | * | | |'L' d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J, JTMP, MN * .. * .. External Subroutines .. EXTERNAL CTZPAD * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( ( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * * Start the operations * IF( ALPHA.EQ.ZERO ) THEN * CALL CTZPAD( UPLO, 'N', M, N, IOFFD, ZERO, ZERO, A, LDA ) * ELSE IF( ALPHA.EQ.ONE ) THEN * IF( LSAME( UPLO, 'L' ) ) THEN * MN = MAX( 0, -IOFFD ) DO 20 J = 1, MIN( MN, N ) DO 10 I = 1, M A( I, J ) = CONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE * DO 40 J = MN + 1, MIN( M - IOFFD, N ) DO 30 I = J + IOFFD, M A( I, J ) = CONJG( A( I, J ) ) 30 CONTINUE 40 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 60 J = MAX( 0, -IOFFD ) + 1, MN DO 50 I = 1, J + IOFFD A( I, J ) = CONJG( A( I, J ) ) 50 CONTINUE 60 CONTINUE * DO 80 J = MAX( 0, MN ) + 1, N DO 70 I = 1, M A( I, J ) = CONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 90 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = CONJG( A( JTMP, J ) ) 90 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 110 J = 1, N DO 100 I = 1, M A( I, J ) = CONJG( A( I, J ) ) 100 CONTINUE 110 CONTINUE * END IF * ELSE * IF( LSAME( UPLO, 'L' ) ) THEN * * Scales the lower triangular part of the array by ALPHA. * MN = MAX( 0, -IOFFD ) DO 130 J = 1, MIN( MN, N ) DO 120 I = 1, M A( I, J ) = ALPHA * CONJG( A( I, J ) ) 120 CONTINUE 130 CONTINUE * DO 150 J = MN + 1, MIN( M - IOFFD, N ) DO 140 I = J + IOFFD, M A( I, J ) = ALPHA * CONJG( A( I, J ) ) 140 CONTINUE 150 CONTINUE * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * * Scales the upper triangular part of the array by ALPHA. * MN = MIN( M - IOFFD, N ) DO 170 J = MAX( 0, -IOFFD ) + 1, MN DO 160 I = 1, J + IOFFD A( I, J ) = ALPHA * CONJG( A( I, J ) ) 160 CONTINUE 170 CONTINUE * DO 190 J = MAX( 0, MN ) + 1, N DO 180 I = 1, M A( I, J ) = ALPHA * CONJG( A( I, J ) ) 180 CONTINUE 190 CONTINUE * ELSE IF( LSAME( UPLO, 'D' ) ) THEN * * Scales the diagonal entries by ALPHA. * DO 200 J = MAX( 0, -IOFFD ) + 1, MIN( M - IOFFD, N ) JTMP = J + IOFFD A( JTMP, J ) = ALPHA * CONJG( A( JTMP, J ) ) 200 CONTINUE * ELSE * * Scales the entire array by ALPHA. * DO 220 J = 1, N DO 210 I = 1, M A( I, J ) = ALPHA * CONJG( A( I, J ) ) 210 CONTINUE 220 CONTINUE * END IF * END IF * RETURN * * End of CTZCNJG * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dasqrtb.f0000640000175000017500000000214510363532303017534 0ustar muammarmuammar SUBROUTINE DASQRTB( A, B, C ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. DOUBLE PRECISION A, B, C * .. * * Purpose * ======= * * DASQRTB computes c := a * sqrt( b ) where a, b and c are scalars. * * Arguments * ========= * * A (input) DOUBLE PRECISION * On entry, A specifies the scalar a. * * B (input) DOUBLE PRECISION * On entry, B specifies the scalar b. * * C (output) DOUBLE PRECISION * On entry, C specifies the scalar c. On exit, c is overwritten * by the product of a and the square root of b. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC SQRT * .. * .. Executable Statements .. * C = A * SQRT( B ) * RETURN * * End of DASQRTB * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dvvdot.f0000640000175000017500000000357710363532303017414 0ustar muammarmuammar SUBROUTINE DVVDOT( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N DOUBLE PRECISION DOT * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * DVVDOT computes the following dot product: * * dot = dot + x**T * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) DOUBLE PRECISION * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION DDOT EXTERNAL DDOT * .. * .. Executable Statements .. * DOT = DOT + DDOT( N, X, INCX, Y, INCY ) * RETURN * * End of DVVDOT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmddat.f0000640000175000017500000001516610363532303017514 0ustar muammarmuammar SUBROUTINE CMMDDAT( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMDDAT performs the following operation: * * A := alpha * A + beta * B', * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading n by m part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX array * On entry, B is an array of dimension ( LDB, M ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL CCOPY( M, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( J, I ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( J, I ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL CAXPY( M, ONE, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( J, I ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( J, I ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( J, I ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL CAXPY( M, BETA, B( J, 1 ), LDB, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( J, I ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 180 J = 1, M CALL CCOPY( N, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 170 I = 1, N * A( J, I ) = B( I, J ) * 170 CONTINUE 180 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N A( J, I ) = B( I, J ) + ALPHA * A( J, I ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL CAXPY( N, ONE, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 210 I = 1, N * A( J, I ) = B( I, J ) + A( J, I ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N A( J, I ) = BETA * B( I, J ) 230 CONTINUE 240 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N A( J, I ) = BETA * B( I, J ) + ALPHA * A( J, I ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL CAXPY( N, BETA, B( 1, J ), 1, A( J, 1 ), LDA ) * DO 270 I = 1, N * A( J, I ) = BETA * B( I, J ) + A( J, I ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 300 J = 1, N DO 290 I = 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 320 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 310 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of CMMDDAT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zsymv.f0000640000175000017500000001727010363532303017271 0ustar muammarmuammar SUBROUTINE ZSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * ZSYMV performs the following matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors, and * A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX*16 * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) COMPLEX*16 * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY COMPLEX*16 TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'ZSYMV ', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 )THEN KX = 1 ELSE KX = 1 - ( N - 1 )*INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := beta*y. * IF( BETA.NE.ONE )THEN IF( INCY.EQ.1 )THEN IF( BETA.EQ.ZERO )THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = BETA*Y( I ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO )THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = BETA*Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * IF( LSAME( UPLO, 'U' ) )THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 60, J = 1, N TEMP1 = ALPHA*X( J ) TEMP2 = ZERO DO 50, I = 1, J - 1 Y( I ) = Y( I ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( I ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 60 CONTINUE ELSE JX = KX JY = KY DO 80, J = 1, N TEMP1 = ALPHA*X( JX ) TEMP2 = ZERO IX = KX IY = KY DO 70, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( IX ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 JX = JX + INCX JY = JY + INCY 80 CONTINUE END IF ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 100, J = 1, N TEMP1 = ALPHA*X( J ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1*A( J, J ) DO 90, I = J + 1, N Y( I ) = Y( I ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( I ) 90 CONTINUE Y( J ) = Y( J ) + ALPHA*TEMP2 100 CONTINUE ELSE JX = KX JY = KY DO 120, J = 1, N TEMP1 = ALPHA*X( JX ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1*A( J, J ) IX = JX IY = JY DO 110, I = J + 1, N IX = IX + INCX IY = IY + INCY Y( IY ) = Y( IY ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( IX ) 110 CONTINUE Y( JY ) = Y( JY ) + ALPHA*TEMP2 JX = JX + INCX JY = JY + INCY 120 CONTINUE END IF END IF * RETURN * * End of ZSYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zahemv.f0000640000175000017500000002130110363532303017361 0ustar muammarmuammar SUBROUTINE ZAHEMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION Y( * ) COMPLEX*16 A( LDA, * ), X( * ) * .. * * Purpose * ======= * * ZAHEMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n Hermitian matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the Hermitian ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * Hermitian matrix and the strictly upper trapezoidal part of A * is not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the offdiagonal elements of A need not be set and * assumed to be zero. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY DOUBLE PRECISION TALPHA, TEMP0, TEMP1, TEMP2 COMPLEX*16 ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'ZAHEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * ABS( DBLE( A( J, J ) ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * ABS( DBLE( A( J, J ) ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * ABS( DBLE( A( J, J ) ) ) * DO 90, I = J + 1, N TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * ABS( DBLE( A( J, J ) ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of ZAHEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/sasymv.f0000640000175000017500000002032410363532303017415 0ustar muammarmuammar SUBROUTINE SASYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * SASYMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) REAL array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY REAL TALPHA, TEMP0, TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'SASYMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * ABS( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = ABS( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * ABS( A( J, J ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * ABS( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = ABS( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * ABS( A( J, J ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * ABS( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * ABS( A( J, J ) ) * DO 90, I = J + 1, N TEMP0 = ABS( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * ABS( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * ABS( A( J, J ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = ABS( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * ABS( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of SASYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/drshft.f0000640000175000017500000000620110363532303017363 0ustar muammarmuammar SUBROUTINE DRSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * DRSHFT shifts rows of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A to be shifted. * M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A. N must be * at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the rows of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the rows * are shifted to the bottom. When OFFSET is negative, the rows * of A are shifted to the top. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension ( LDA, N ). On exit, A * contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M+ABS(OFFSET) ). * * Further Details * =============== * * N N N N * --- --- --- --- * | 1 | | 1 | | 1 | | 7 | * | 2 | M = 3 | 2 | | 2 | M = 3 | 8 | * | 3 | | 3 | | 3 | | 9 | * | 4 | | 4 | | 4 | | 4 | * | 5 | becomes | 5 | | 5 | becomes | 5 | * | 6 | | 6 | | 6 | | 6 | * | 7 | | 1 | | 7 | | 7 | * | 8 | OFFSET = 6 | 2 | | 8 | OFFSET = -6 | 8 | * | 9 | | 3 | | 9 | | 9 | * --- --- --- --- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = 1, N DO 10 I = M, 1, -1 A( I+OFFSET, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I-OFFSET, J ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of DRSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zagemv.f0000640000175000017500000002026010363532303017363 0ustar muammarmuammar SUBROUTINE ZAGEMV( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER INCX, INCY, LDA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION Y( * ) COMPLEX*16 A( LDA, * ), X( * ) * .. * * Purpose * ======= * * ZAGEMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * or * * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an m by n matrix. * * Arguments * ========= * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + * abs( beta*y ) * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX*16 array of dimension ( LDA, n ). * On entry, A is an array of dimension ( LDA, N ). The leading * m by n part of the array A must contain the matrix of coef- * ficients. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, * the incremented array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry * with BETA non-zero, the incremented array Y must contain the * vector y. On exit, the incremented array Y is overwritten by * the updated vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY DOUBLE PRECISION ABSX, TALPHA, TEMP COMPLEX*16 ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = 1 ELSE IF( M.LT.0 ) THEN INFO = 2 ELSE IF( N.LT.0 ) THEN INFO = 3 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZAGEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set LENX and LENY, the lengths of the vectors x and y, and set * up the start points in X and Y. * IF( LSAME( TRANS, 'N' ) ) THEN LENX = N LENY = M ELSE LENX = M LENY = N END IF IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( LENX - 1 )*INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( LENY - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, LENY Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, LENY Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, LENY Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, LENY Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) ) THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, M Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE ELSE DO 100, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, M Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) IY = IY + INCY 90 CONTINUE END IF JX = JX + INCX 100 CONTINUE END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * JY = KY IF( INCX.EQ.1 ) THEN DO 120, J = 1, N TEMP = ZERO DO 110, I = 1, M TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 110 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 120 CONTINUE ELSE DO 140, J = 1, N TEMP = ZERO IX = KX DO 130, I = 1, M TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) IX = IX + INCX 130 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 140 CONTINUE END IF END IF * RETURN * * End of ZAGEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmcadd.f0000640000175000017500000001051610363532303017514 0ustar muammarmuammar SUBROUTINE ZMMCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMCADD performs the following operation: * * B := alpha * conjg( A ) + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, N ). On exit, the * leading m by n part of A has been conjugated and added to the * leading m by n part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( I, J ) = DCONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( I, J ) = DCONJG( A( I, J ) ) + BETA * B( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( I, J ) = DCONJG( A( I, J ) ) + B( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( I, J ) = ALPHA * DCONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( I, J ) = ALPHA * DCONJG( A( I, J ) ) + $ BETA * B( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( I, J ) = ALPHA * DCONJG( A( I, J ) ) + B( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, BETA, B( 1, J ), 1 ) * DO 150 I = 1, M * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of ZMMCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/csymv.f0000640000175000017500000001725110363532303017241 0ustar muammarmuammar SUBROUTINE CSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * CSYMV performs the following matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors, and * A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) COMPLEX * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY COMPLEX TEMP1, TEMP2 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'CSYMV ', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 )THEN KX = 1 ELSE KX = 1 - ( N - 1 )*INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := beta*y. * IF( BETA.NE.ONE )THEN IF( INCY.EQ.1 )THEN IF( BETA.EQ.ZERO )THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = BETA*Y( I ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO )THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = BETA*Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * IF( LSAME( UPLO, 'U' ) )THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 60, J = 1, N TEMP1 = ALPHA*X( J ) TEMP2 = ZERO DO 50, I = 1, J - 1 Y( I ) = Y( I ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( I ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 60 CONTINUE ELSE JX = KX JY = KY DO 80, J = 1, N TEMP1 = ALPHA*X( JX ) TEMP2 = ZERO IX = KX IY = KY DO 70, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( IX ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 JX = JX + INCX JY = JY + INCY 80 CONTINUE END IF ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN DO 100, J = 1, N TEMP1 = ALPHA*X( J ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1*A( J, J ) DO 90, I = J + 1, N Y( I ) = Y( I ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( I ) 90 CONTINUE Y( J ) = Y( J ) + ALPHA*TEMP2 100 CONTINUE ELSE JX = KX JY = KY DO 120, J = 1, N TEMP1 = ALPHA*X( JX ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1*A( J, J ) IX = JX IY = JY DO 110, I = J + 1, N IX = IX + INCX IY = IY + INCY Y( IY ) = Y( IY ) + TEMP1*A( I, J ) TEMP2 = TEMP2 + A( I, J )*X( IX ) 110 CONTINUE Y( JY ) = Y( JY ) + ALPHA*TEMP2 JX = JX + INCX JY = JY + INCY 120 CONTINUE END IF END IF * RETURN * * End of CSYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/immtadd.f0000640000175000017500000001372510363532303017521 0ustar muammarmuammar SUBROUTINE IMMTADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N INTEGER ALPHA, BETA * .. * .. Array Arguments .. INTEGER A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * IMMTADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) INTEGER * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) INTEGER array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) INTEGER * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) INTEGER array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. INTEGER ONE, ZERO PARAMETER ( ONE = 1, ZERO = 0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M B( J, I ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = A( I, J ) + B( J, I ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M B( J, I ) = ALPHA * A( I, J ) + B( J, I ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M DO 150 I = 1, N B( I, J ) = BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M DO 170 I = 1, N B( I, J ) = A( J, I ) 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M DO 210 I = 1, N B( I, J ) = A( J, I ) + B( I, J ) 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M DO 270 I = 1, N B( I, J ) = ALPHA * A( J, I ) + B( I, J ) 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M DO 310 I = 1, N B( I, J ) = BETA * B( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of IMMTADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cvvdotu.f0000640000175000017500000000355210363532303017571 0ustar muammarmuammar SUBROUTINE CVVDOTU( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX DOT * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * CVVDOTU computes the following dot product: * * dot = dot + x**T * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) COMPLEX * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. COMPLEX CDOTU EXTERNAL CDOTU * .. * .. Executable Statements .. * DOT = DOT + CDOTU( N, X, INCX, Y, INCY ) * RETURN * * End of CVVDOTU * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cmmddac.f0000640000175000017500000001050010363532303017456 0ustar muammarmuammar SUBROUTINE CMMDDAC( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CMMDDAC performs the following operation: * * A := alpha * A + beta * conjg( B ), * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been conjugated and added to the * leading m by n part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL CSCAL * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = CONJG( B( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = CONJG( B( I, J ) ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M A( I, J ) = CONJG( B( I, J ) ) + A( I, J ) 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * CONJG( B( I, J ) ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * CONJG( B( I, J ) ) + $ ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N DO 110 I = 1, M A( I, J ) = BETA * CONJG( B( I, J ) ) + A( I, J ) 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL CSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of CMMDDAC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dascal.f0000640000175000017500000001002510363532303017317 0ustar muammarmuammar SUBROUTINE DASCAL( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * DASCAL performs the following operation: * * x := abs( alpha ) * abs( x ), * * where alpha is a scalar and x is an n vector. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * X (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are mutiplied by alpha in absolute value. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DASCAL', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := abs( alpha ) * abs( x ) * IF( INCX.EQ.1 ) $ GO TO 40 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, N X( IX ) = ZERO IX = IX + INCX 10 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 20 I = 1, N X( IX ) = ABS( X( IX ) ) IX = IX + INCX 20 CONTINUE ELSE DO 30 I = 1, N X( IX ) = ABS( ALPHA * X( IX ) ) IX = IX + INCX 30 CONTINUE END IF * RETURN * * code for increment equal to 1 * * clean-up loop * 40 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 80 * IF( ALPHA.EQ.ZERO ) THEN DO 50 I = 1, M X( I ) = ZERO 50 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 60 I = 1, M X( I ) = ABS( X( I ) ) 60 CONTINUE ELSE DO 70 I = 1, M X( I ) = ABS( ALPHA * X( I ) ) 70 CONTINUE END IF * IF( N.LT.4 ) $ RETURN * 80 MP1 = M + 1 * IF( ALPHA.EQ.ZERO ) THEN DO 90 I = MP1, N, 4 X( I ) = ZERO X( I + 1 ) = ZERO X( I + 2 ) = ZERO X( I + 3 ) = ZERO 90 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 100 I = MP1, N, 4 X( I ) = ABS( X( I ) ) X( I + 1 ) = ABS( X( I + 1 ) ) X( I + 2 ) = ABS( X( I + 2 ) ) X( I + 3 ) = ABS( X( I + 3 ) ) 100 CONTINUE ELSE DO 110 I = MP1, N, 4 X( I ) = ABS( ALPHA * X( I ) ) X( I + 1 ) = ABS( ALPHA * X( I + 1 ) ) X( I + 2 ) = ABS( ALPHA * X( I + 2 ) ) X( I + 3 ) = ABS( ALPHA * X( I + 3 ) ) 110 CONTINUE END IF * RETURN * * End of DASCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zatrmv.f0000640000175000017500000003111210363532303017413 0ustar muammarmuammar SUBROUTINE ZATRMV( UPLO, TRANS, DIAG, N, ALPHA, A, LDA, X, INCX, $ BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER INCX, INCY, LDA, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION Y( * ) COMPLEX*16 A( LDA, * ), X( * ) * .. * * Purpose * ======= * * ZATRMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * or * * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n unit or non-unit, upper or lower triangular * matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + * abs( beta*y ) * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX*16 array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the matrix A and * the strictly lower triangular part of A is not referenced. * When UPLO = 'L' or 'l', the leading n by n part of the array * A must contain the lower triangular part of the matrix A and * the strictly upper trapezoidal part of A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of A * are not referenced either, but are assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY LOGICAL NOUNIT DOUBLE PRECISION ABSX, TALPHA, TEMP COMPLEX*16 ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO , 'U' ).AND. $ .NOT.LSAME( UPLO , 'L' ) )THEN INFO = 1 ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) )THEN INFO = 2 ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN INFO = 3 ELSE IF( N.LT.0 )THEN INFO = 4 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 7 ELSE IF( INCX.EQ.0 )THEN INFO = 9 ELSE IF( INCY.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'ZATRMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * NOUNIT = LSAME( DIAG , 'N' ) * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, N Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, N Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, N Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) )THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, J - 1 Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 70 CONTINUE * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * CABS1( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF END IF JX = JX + INCX 80 CONTINUE * ELSE * DO 100, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) IY = IY + INCY 90 CONTINUE * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * CABS1( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF END IF JX = JX + INCX 100 CONTINUE * END IF * ELSE * JX = KX IF( INCY.EQ.1 ) THEN DO 120, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN * TEMP = TALPHA * ABSX * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * CABS1( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF * DO 110, I = J + 1, N Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 110 CONTINUE END IF JX = JX + INCX 120 CONTINUE * ELSE * DO 140, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY + ( J - 1 ) * INCY * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * CABS1( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF * DO 130, I = J + 1, N IY = IY + INCY Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) 130 CONTINUE END IF JX = JX + INCX 140 CONTINUE * END IF * END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JY = KY IF( INCX.EQ.1 ) THEN DO 160, J = 1, N * TEMP = ZERO * DO 150, I = 1, J - 1 TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 150 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + CABS1( A( J, J ) ) * CABS1( X( J ) ) ELSE TEMP = TEMP + CABS1( X( J ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 160 CONTINUE * ELSE * DO 180, J = 1, N TEMP = ZERO IX = KX DO 170, I = 1, J - 1 TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) IX = IX + INCX 170 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + CABS1( A( J, J ) ) * CABS1( X( IX ) ) ELSE TEMP = TEMP + CABS1( X( IX ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 180 CONTINUE * END IF * ELSE * JY = KY IF( INCX.EQ.1 ) THEN * DO 200, J = 1, N * IF( NOUNIT ) THEN TEMP = CABS1( A( J, J ) ) * CABS1( X( J ) ) ELSE TEMP = CABS1( X( J ) ) END IF * DO 190, I = J + 1, N TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 190 CONTINUE * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 200 CONTINUE * ELSE * DO 220, J = 1, N * IX = KX + ( J - 1 ) * INCX * IF( NOUNIT ) THEN TEMP = CABS1( A( J, J ) ) * CABS1( X( IX ) ) ELSE TEMP = CABS1( X( IX ) ) END IF * DO 210, I = J + 1, N IX = IX + INCX TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) 210 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 220 CONTINUE END IF END IF * END IF * RETURN * * End of ZATRMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/smmtcadd.f0000640000175000017500000001506110363532303017671 0ustar muammarmuammar SUBROUTINE SMMTCADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SMMTCADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) REAL * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) REAL array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL SCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL SAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL SAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL SSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL SCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL SAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL SAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL SSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of SMMTCADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmtadd.f0000640000175000017500000001517210363532303017540 0ustar muammarmuammar SUBROUTINE ZMMTADD( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMTADD performs the following operation: * * B := alpha * A' + beta * B, * * where alpha, beta are scalars; A is an m by n matrix and B is an n by * m matrix. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and the number * of columns of B. M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of rows of B and the number * of columns of A. N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, M ). On exit, the * leading m by n part of A has been added to the leading n by m * part of B. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, N ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSCAL * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N CALL ZCOPY( M, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 10 I = 1, M * B( J, I ) = A( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M B( J, I ) = A( I, J ) + BETA * B( J, I ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL ZAXPY( M, ONE, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 50 I = 1, M * B( J, I ) = A( I, J ) + B( J, I ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M B( J, I ) = ALPHA * A( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M B( J, I ) = ALPHA * A( I, J ) + BETA * B( J, I ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL ZAXPY( M, ALPHA, A( 1, J ), 1, B( J, 1 ), LDB ) * DO 110 I = 1, M * B( J, I ) = ALPHA * A( I, J ) + B( J, I ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 140 J = 1, M DO 130 I = 1, N B( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 160 J = 1, M CALL ZSCAL( N, BETA, B( 1, J ), 1 ) * DO 150 I = 1, N * B( I, J ) = BETA * B( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, M CALL ZCOPY( N, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 170 I = 1, N * B( I, J ) = A( J, I ) * 170 CONTINUE 180 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 200 J = 1, M DO 190 I = 1, N B( I, J ) = A( J, I ) + BETA * B( I, J ) 190 CONTINUE 200 CONTINUE ELSE DO 220 J = 1, M CALL ZAXPY( N, ONE, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 210 I = 1, N * B( I, J ) = A( J, I ) + B( I, J ) * 210 CONTINUE 220 CONTINUE END IF ELSE IF( ALPHA.NE.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 240 J = 1, M DO 230 I = 1, N B( I, J ) = ALPHA * A( J, I ) 230 CONTINUE 240 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 260 J = 1, M DO 250 I = 1, N B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE ELSE DO 280 J = 1, M CALL ZAXPY( N, ALPHA, A( J, 1 ), LDA, B( 1, J ), 1 ) * DO 270 I = 1, N * B( I, J ) = ALPHA * A( J, I ) + B( I, J ) * 270 CONTINUE 280 CONTINUE END IF ELSE IF( BETA.EQ.ZERO ) THEN DO 300 J = 1, M DO 290 I = 1, N B( I, J ) = ZERO 290 CONTINUE 300 CONTINUE ELSE IF( BETA.NE.ONE ) THEN DO 320 J = 1, M CALL ZSCAL( N, BETA, B( 1, J ), 1 ) * DO 310 I = 1, N * B( I, J ) = BETA * B( I, J ) * 310 CONTINUE 320 CONTINUE END IF END IF END IF * RETURN * * End of ZMMTADD * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/catrmv.f0000640000175000017500000003104010363532303017364 0ustar muammarmuammar SUBROUTINE CATRMV( UPLO, TRANS, DIAG, N, ALPHA, A, LDA, X, INCX, $ BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER INCX, INCY, LDA, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL Y( * ) COMPLEX A( LDA, * ), X( * ) * .. * * Purpose * ======= * * CATRMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * or * * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n unit or non-unit, upper or lower triangular * matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + * abs( beta*y ) * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the matrix A and * the strictly lower triangular part of A is not referenced. * When UPLO = 'L' or 'l', the leading n by n part of the array * A must contain the lower triangular part of the matrix A and * the strictly upper trapezoidal part of A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of A * are not referenced either, but are assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY LOGICAL NOUNIT REAL ABSX, TALPHA, TEMP COMPLEX ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL * .. * .. Statement Functions .. REAL CABS1 CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO , 'U' ).AND. $ .NOT.LSAME( UPLO , 'L' ) )THEN INFO = 1 ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) )THEN INFO = 2 ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN INFO = 3 ELSE IF( N.LT.0 )THEN INFO = 4 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 7 ELSE IF( INCX.EQ.0 )THEN INFO = 9 ELSE IF( INCY.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'CATRMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * NOUNIT = LSAME( DIAG , 'N' ) * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, N Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, N Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, N Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) )THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, J - 1 Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 70 CONTINUE * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * CABS1( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF END IF JX = JX + INCX 80 CONTINUE * ELSE * DO 100, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) IY = IY + INCY 90 CONTINUE * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * CABS1( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF END IF JX = JX + INCX 100 CONTINUE * END IF * ELSE * JX = KX IF( INCY.EQ.1 ) THEN DO 120, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN * TEMP = TALPHA * ABSX * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * CABS1( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF * DO 110, I = J + 1, N Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 110 CONTINUE END IF JX = JX + INCX 120 CONTINUE * ELSE * DO 140, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY + ( J - 1 ) * INCY * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * CABS1( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF * DO 130, I = J + 1, N IY = IY + INCY Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) 130 CONTINUE END IF JX = JX + INCX 140 CONTINUE * END IF * END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JY = KY IF( INCX.EQ.1 ) THEN DO 160, J = 1, N * TEMP = ZERO * DO 150, I = 1, J - 1 TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 150 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + CABS1( A( J, J ) ) * CABS1( X( J ) ) ELSE TEMP = TEMP + CABS1( X( J ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 160 CONTINUE * ELSE * DO 180, J = 1, N TEMP = ZERO IX = KX DO 170, I = 1, J - 1 TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) IX = IX + INCX 170 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + CABS1( A( J, J ) ) * CABS1( X( IX ) ) ELSE TEMP = TEMP + CABS1( X( IX ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 180 CONTINUE * END IF * ELSE * JY = KY IF( INCX.EQ.1 ) THEN * DO 200, J = 1, N * IF( NOUNIT ) THEN TEMP = CABS1( A( J, J ) ) * CABS1( X( J ) ) ELSE TEMP = CABS1( X( J ) ) END IF * DO 190, I = J + 1, N TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 190 CONTINUE * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 200 CONTINUE * ELSE * DO 220, J = 1, N * IX = KX + ( J - 1 ) * INCX * IF( NOUNIT ) THEN TEMP = CABS1( A( J, J ) ) * CABS1( X( IX ) ) ELSE TEMP = CABS1( X( IX ) ) END IF * DO 210, I = J + 1, N IX = IX + INCX TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) 210 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 220 CONTINUE END IF END IF * END IF * RETURN * * End of CATRMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/datrmv.f0000640000175000017500000003024110363532303017367 0ustar muammarmuammar SUBROUTINE DATRMV( UPLO, TRANS, DIAG, N, ALPHA, A, LDA, X, INCX, $ BETA, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 DIAG, TRANS, UPLO INTEGER INCX, INCY, LDA, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * DATRMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n unit or non-unit, upper or lower triangular * matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * DIAG (input) CHARACTER*1 * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) DOUBLE PRECISION array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the matrix A and * the strictly lower triangular part of A is not referenced. * When UPLO = 'L' or 'l', the leading n by n part of the array * A must contain the lower triangular part of the matrix A and * the strictly upper trapezoidal part of A is not referenced. * Note that when DIAG = 'U' or 'u', the diagonal elements of A * are not referenced either, but are assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY LOGICAL NOUNIT DOUBLE PRECISION ABSX, TALPHA, TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO , 'U' ).AND. $ .NOT.LSAME( UPLO , 'L' ) )THEN INFO = 1 ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. $ .NOT.LSAME( TRANS, 'T' ).AND. $ .NOT.LSAME( TRANS, 'C' ) )THEN INFO = 2 ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. $ .NOT.LSAME( DIAG , 'N' ) )THEN INFO = 3 ELSE IF( N.LT.0 )THEN INFO = 4 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 7 ELSE IF( INCX.EQ.0 )THEN INFO = 9 ELSE IF( INCY.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'DATRMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * NOUNIT = LSAME( DIAG , 'N' ) * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, N Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, N Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, N Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) )THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, J - 1 Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 70 CONTINUE * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * ABS( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF END IF JX = JX + INCX 80 CONTINUE * ELSE * DO 100, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, J - 1 Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) IY = IY + INCY 90 CONTINUE * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * ABS( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF END IF JX = JX + INCX 100 CONTINUE * END IF * ELSE * JX = KX IF( INCY.EQ.1 ) THEN DO 120, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN * TEMP = TALPHA * ABSX * IF( NOUNIT ) THEN Y( J ) = Y( J ) + TEMP * ABS( A( J, J ) ) ELSE Y( J ) = Y( J ) + TEMP END IF * DO 110, I = J + 1, N Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 110 CONTINUE END IF JX = JX + INCX 120 CONTINUE * ELSE * DO 140, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY + ( J - 1 ) * INCY * IF( NOUNIT ) THEN Y( IY ) = Y( IY ) + TEMP * ABS( A( J, J ) ) ELSE Y( IY ) = Y( IY ) + TEMP END IF * DO 130, I = J + 1, N IY = IY + INCY Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) 130 CONTINUE END IF JX = JX + INCX 140 CONTINUE * END IF * END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * IF( LSAME( UPLO, 'U' ) )THEN JY = KY IF( INCX.EQ.1 ) THEN DO 160, J = 1, N * TEMP = ZERO * DO 150, I = 1, J - 1 TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 150 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + ABS( A( J, J ) * X( J ) ) ELSE TEMP = TEMP + ABS( X( J ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 160 CONTINUE * ELSE * DO 180, J = 1, N TEMP = ZERO IX = KX DO 170, I = 1, J - 1 TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) IX = IX + INCX 170 CONTINUE * IF( NOUNIT ) THEN TEMP = TEMP + ABS( A( J, J ) * X( IX ) ) ELSE TEMP = TEMP + ABS( X( IX ) ) END IF * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 180 CONTINUE * END IF * ELSE * JY = KY IF( INCX.EQ.1 ) THEN * DO 200, J = 1, N * IF( NOUNIT ) THEN TEMP = ABS( A( J, J ) * X( J ) ) ELSE TEMP = ABS( X( J ) ) END IF * DO 190, I = J + 1, N TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 190 CONTINUE * Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY * 200 CONTINUE * ELSE * DO 220, J = 1, N * IX = KX + ( J - 1 ) * INCX * IF( NOUNIT ) THEN TEMP = ABS( A( J, J ) * X( IX ) ) ELSE TEMP = ABS( X( IX ) ) END IF * DO 210, I = J + 1, N IX = IX + INCX TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) 210 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 220 CONTINUE END IF END IF * END IF * RETURN * * End of DATRMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dagemv.f0000640000175000017500000001754110363532303017345 0ustar muammarmuammar SUBROUTINE DAGEMV( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER INCX, INCY, LDA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) * .. * * Purpose * ======= * * DAGEMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an m by n matrix. * * Arguments * ========= * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the real scalar alpha. * * A (input) DOUBLE PRECISION array of dimension ( LDA, n ). * On entry, A is an array of dimension ( LDA, N ). The leading * m by n part of the array A must contain the matrix of coef- * ficients. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, * the incremented array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) DOUBLE PRECISION * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry * with BETA non-zero, the incremented array Y must contain the * vector y. On exit, the incremented array Y is overwritten by * the updated vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY DOUBLE PRECISION ABSX, TALPHA, TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = 1 ELSE IF( M.LT.0 ) THEN INFO = 2 ELSE IF( N.LT.0 ) THEN INFO = 3 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DAGEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set LENX and LENY, the lengths of the vectors x and y, and set * up the start points in X and Y. * IF( LSAME( TRANS, 'N' ) ) THEN LENX = N LENY = M ELSE LENX = M LENY = N END IF IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( LENX - 1 )*INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( LENY - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, LENY Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, LENY Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, LENY Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, LENY Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) ) THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, M Y( I ) = Y( I ) + TEMP * ABS( A( I, J ) ) 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE ELSE DO 100, J = 1, N ABSX = ABS( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, M Y( IY ) = Y( IY ) + TEMP * ABS( A( I, J ) ) IY = IY + INCY 90 CONTINUE END IF JX = JX + INCX 100 CONTINUE END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * JY = KY IF( INCX.EQ.1 ) THEN DO 120, J = 1, N TEMP = ZERO DO 110, I = 1, M TEMP = TEMP + ABS( A( I, J ) * X( I ) ) 110 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 120 CONTINUE ELSE DO 140, J = 1, N TEMP = ZERO IX = KX DO 130, I = 1, M TEMP = TEMP + ABS( A( I, J ) * X( IX ) ) IX = IX + INCX 130 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 140 CONTINUE END IF END IF * RETURN * * End of DAGEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/sascal.f0000640000175000017500000000777510363532303017360 0ustar muammarmuammar SUBROUTINE SASCAL( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N REAL ALPHA * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * SASCAL performs the following operation: * * x := abs( alpha ) * abs( x ), * * where alpha is a scalar and x is an n vector. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the scalar alpha. * * X (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are mutiplied by alpha in absolute value. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SASCAL', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := abs( alpha ) * abs( x ) * IF( INCX.EQ.1 ) $ GO TO 40 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, N X( IX ) = ZERO IX = IX + INCX 10 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 20 I = 1, N X( IX ) = ABS( X( IX ) ) IX = IX + INCX 20 CONTINUE ELSE DO 30 I = 1, N X( IX ) = ABS( ALPHA * X( IX ) ) IX = IX + INCX 30 CONTINUE END IF * RETURN * * code for increment equal to 1 * * clean-up loop * 40 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 80 * IF( ALPHA.EQ.ZERO ) THEN DO 50 I = 1, M X( I ) = ZERO 50 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 60 I = 1, M X( I ) = ABS( X( I ) ) 60 CONTINUE ELSE DO 70 I = 1, M X( I ) = ABS( ALPHA * X( I ) ) 70 CONTINUE END IF * IF( N.LT.4 ) $ RETURN * 80 MP1 = M + 1 * IF( ALPHA.EQ.ZERO ) THEN DO 90 I = MP1, N, 4 X( I ) = ZERO X( I + 1 ) = ZERO X( I + 2 ) = ZERO X( I + 3 ) = ZERO 90 CONTINUE ELSE IF( ALPHA.EQ.ONE ) THEN DO 100 I = MP1, N, 4 X( I ) = ABS( X( I ) ) X( I + 1 ) = ABS( X( I + 1 ) ) X( I + 2 ) = ABS( X( I + 2 ) ) X( I + 3 ) = ABS( X( I + 3 ) ) 100 CONTINUE ELSE DO 110 I = MP1, N, 4 X( I ) = ABS( ALPHA * X( I ) ) X( I + 1 ) = ABS( ALPHA * X( I + 1 ) ) X( I + 2 ) = ABS( ALPHA * X( I + 2 ) ) X( I + 3 ) = ABS( ALPHA * X( I + 3 ) ) 110 CONTINUE END IF * RETURN * * End of SASCAL * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zmmdda.f0000640000175000017500000001053710363532303017354 0ustar muammarmuammar SUBROUTINE ZMMDDA( M, N, ALPHA, A, LDA, BETA, B, LDB ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZMMDDA performs the following operation: * * A := alpha * A + beta * B, * * where alpha, beta are scalars and A and B are m by n matrices. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A and B. M must * be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A and B. * N must be at least zero. * * ALPHA (local input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A need * not be set on input. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). On exit, the * leading m by n part of B has been added to the leading m by n * part of A. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BETA (local input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B need not * be set on input. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension ( LDB, N ). * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least max( 1, M ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSCAL * .. * .. Executable Statements .. * IF( BETA.EQ.ONE ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 20 J = 1, N CALL ZCOPY( M, B( 1, J ), 1, A( 1, J ), 1 ) * DO 10 I = 1, M * A( I, J ) = B( I, J ) * 10 CONTINUE 20 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = B( I, J ) + ALPHA * A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N CALL ZAXPY( M, ONE, B( 1, J ), 1, A( 1, J ), 1 ) * DO 50 I = 1, M * A( I, J ) = B( I, J ) + A( I, J ) * 50 CONTINUE 60 CONTINUE END IF ELSE IF( BETA.NE.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M A( I, J ) = BETA * B( I, J ) 70 CONTINUE 80 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 100 J = 1, N DO 90 I = 1, M A( I, J ) = BETA * B( I, J ) + ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE ELSE DO 120 J = 1, N CALL ZAXPY( M, BETA, B( 1, J ), 1, A( 1, J ), 1 ) * DO 110 I = 1, M * A( I, J ) = BETA * B( I, J ) + A( I, J ) * 110 CONTINUE 120 CONTINUE END IF ELSE IF( ALPHA.EQ.ZERO ) THEN DO 140 J = 1, N DO 130 I = 1, M A( I, J ) = ZERO 130 CONTINUE 140 CONTINUE ELSE IF( ALPHA.NE.ONE ) THEN DO 160 J = 1, N CALL ZSCAL( M, ALPHA, A( 1, J ), 1 ) * DO 150 I = 1, M * A( I, J ) = ALPHA * A( I, J ) * 150 CONTINUE 160 CONTINUE END IF END IF * RETURN * * End of ZMMDDA * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zvvdotc.f0000640000175000017500000000356310363532303017600 0ustar muammarmuammar SUBROUTINE ZVVDOTC( N, DOT, X, INCX, Y, INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX*16 DOT * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * ZVVDOTC computes the following dot product: * * dot = dot + x**H * y, * * where x and y are n vectors. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * DOT (input/output) COMPLEX*16 * On exit, DOT is updated with the dot product of the vectors x * and y. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (input) COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented * array Y must contain the vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. COMPLEX*16 ZDOTC EXTERNAL ZDOTC * .. * .. Executable Statements .. * DOT = DOT + ZDOTC( N, X, INCX, Y, INCY ) * RETURN * * End of ZVVDOTC * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/csyr.f0000640000175000017500000001302510363532303017053 0ustar muammarmuammar SUBROUTINE CSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, LDA, N COMPLEX ALPHA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), X( * ) * .. * * Purpose * ======= * * CSYR performs the symmetric rank 1 operation * * A := alpha*x*x' + A, * * where alpha is a complex scalar, x is an n element vector and A is an * n by n SY matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) COMPLEX * On entry, ALPHA specifies the scalar alpha. * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * A (input/output) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. On exit, the upper triangular part of the array A is * overwritten by the upper triangular part of the updated ma- * trix. When UPLO = 'L' or 'l', the leading n by n part of the * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. On exit, the lower triangular part of the * array A is overwritten by the lower triangular part of the * updated matrix. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * ===================================================================== * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, J, JX, KX COMPLEX TEMP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( INCX.EQ.0 ) THEN INFO = 5 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = 7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CSYR', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) $ RETURN * * Set the start point in X if the increment is not unity. * IF( INCX.LE.0 ) THEN KX = 1 - ( N-1 )*INCX ELSE IF( INCX.NE.1 ) THEN KX = 1 END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * IF( LSAME( UPLO, 'U' ) ) THEN * * Form A when A is stored in upper triangle. * IF( INCX.EQ.1 ) THEN DO 20 J = 1, N IF( X( J ).NE.ZERO ) THEN TEMP = ALPHA*X( J ) DO 10 I = 1, J A( I, J ) = A( I, J ) + X( I )*TEMP 10 CONTINUE END IF 20 CONTINUE ELSE JX = KX DO 40 J = 1, N IF( X( JX ).NE.ZERO ) THEN TEMP = ALPHA*X( JX ) IX = KX DO 30 I = 1, J A( I, J ) = A( I, J ) + X( IX )*TEMP IX = IX + INCX 30 CONTINUE END IF JX = JX + INCX 40 CONTINUE END IF ELSE * * Form A when A is stored in lower triangle. * IF( INCX.EQ.1 ) THEN DO 60 J = 1, N IF( X( J ).NE.ZERO ) THEN TEMP = ALPHA*X( J ) DO 50 I = J, N A( I, J ) = A( I, J ) + X( I )*TEMP 50 CONTINUE END IF 60 CONTINUE ELSE JX = KX DO 80 J = 1, N IF( X( JX ).NE.ZERO ) THEN TEMP = ALPHA*X( JX ) IX = JX DO 70 I = J, N A( I, J ) = A( I, J ) + X( IX )*TEMP IX = IX + INCX 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE END IF END IF * RETURN * * End of CSYR * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/cagemv.f0000640000175000017500000002020610363532303017334 0ustar muammarmuammar SUBROUTINE CAGEMV( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER INCX, INCY, LDA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL Y( * ) COMPLEX A( LDA, * ), X( * ) * .. * * Purpose * ======= * * CAGEMV performs one of the matrix-vector operations * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * or * * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an m by n matrix. * * Arguments * ========= * * TRANS (input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ) * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ) * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + * abs( beta*y ) * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX array of dimension ( LDA, n ). * On entry, A is an array of dimension ( LDA, N ). The leading * m by n part of the array A must contain the matrix of coef- * ficients. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, * the incremented array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at * least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry * with BETA non-zero, the incremented array Y must contain the * vector y. On exit, the incremented array Y is overwritten by * the updated vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY REAL ABSX, TALPHA, TEMP COMPLEX ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL * .. * .. Statement Functions .. REAL CABS1 CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = 1 ELSE IF( M.LT.0 ) THEN INFO = 2 ELSE IF( N.LT.0 ) THEN INFO = 3 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CAGEMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set LENX and LENY, the lengths of the vectors x and y, and set * up the start points in X and Y. * IF( LSAME( TRANS, 'N' ) ) THEN LENX = N LENY = M ELSE LENX = M LENY = N END IF IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( LENX - 1 )*INCX END IF IF( INCY.GT.0 ) THEN KY = 1 ELSE KY = 1 - ( LENY - 1 )*INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through A. * * First form y := abs( beta*y ). * IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 20, I = 1, LENY Y( I ) = ABS( Y( I ) ) 20 CONTINUE ELSE DO 30, I = 1, LENY Y( I ) = ABS( BETA * Y( I ) ) 30 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 40, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 40 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 50, I = 1, LENY Y( IY ) = ABS( Y( IY ) ) IY = IY + INCY 50 CONTINUE ELSE DO 60, I = 1, LENY Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 60 CONTINUE END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( TRANS, 'N' ) ) THEN * * Form y := abs( alpha ) * abs( A ) * abs( x ) + y. * JX = KX IF( INCY.EQ.1 ) THEN DO 80, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX DO 70, I = 1, M Y( I ) = Y( I ) + TEMP * CABS1( A( I, J ) ) 70 CONTINUE END IF JX = JX + INCX 80 CONTINUE ELSE DO 100, J = 1, N ABSX = CABS1( X( JX ) ) IF( ABSX.NE.ZERO ) THEN TEMP = TALPHA * ABSX IY = KY DO 90, I = 1, M Y( IY ) = Y( IY ) + TEMP * CABS1( A( I, J ) ) IY = IY + INCY 90 CONTINUE END IF JX = JX + INCX 100 CONTINUE END IF * ELSE * * Form y := abs( alpha ) * abs( A' ) * abs( x ) + y. * JY = KY IF( INCX.EQ.1 ) THEN DO 120, J = 1, N TEMP = ZERO DO 110, I = 1, M TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( I ) ) 110 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 120 CONTINUE ELSE DO 140, J = 1, N TEMP = ZERO IX = KX DO 130, I = 1, M TEMP = TEMP + CABS1( A( I, J ) ) * CABS1( X( IX ) ) IX = IX + INCX 130 CONTINUE Y( JY ) = Y( JY ) + TALPHA * TEMP JY = JY + INCY 140 CONTINUE END IF END IF * RETURN * * End of CAGEMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/dset.f0000640000175000017500000000521510363532303017034 0ustar muammarmuammar SUBROUTINE DSET( N, ALPHA, X, INCX ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER INCX, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * DSET sets the entries of an n vector x to the scalar alpha. * * Arguments * ========= * * N (input) INTEGER * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. * * X (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. On exit, entries of the * incremented array X are set to alpha. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, INFO, IX, M, MP1 * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( N.LT.0 ) THEN INFO = 1 ELSE IF( INCX.EQ.0 ) THEN INFO = 4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DSET', INFO ) RETURN END IF * * Quick return if possible. * IF( N.LE.0 ) $ RETURN * * Form x := alpha * IF( INCX.EQ.1 ) $ GO TO 20 * * code for increments not equal to 1 * * Set up the start point in X. * IF( INCX.GT.0 ) THEN IX = 1 ELSE IX = 1 - ( N - 1 ) * INCX END IF * DO 10 I = 1, N X( IX ) = ALPHA IX = IX + INCX 10 CONTINUE * RETURN * * code for increment equal to 1 * * clean-up loop * 20 M = MOD( N, 4 ) * IF( M.EQ.0 ) $ GO TO 40 * DO 30 I = 1, M X( I ) = ALPHA 30 CONTINUE IF( N.LT.4 ) $ RETURN * 40 MP1 = M + 1 DO 50 I = MP1, N, 4 X( I ) = ALPHA X( I + 1 ) = ALPHA X( I + 2 ) = ALPHA X( I + 3 ) = ALPHA 50 CONTINUE * RETURN * * End of DSET * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/casymv.f0000640000175000017500000002070610363532303017401 0ustar muammarmuammar SUBROUTINE CASYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, $ INCY ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. CHARACTER*1 UPLO INTEGER INCX, INCY, LDA, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL Y( * ) COMPLEX A( LDA, * ), X( * ) * .. * * Purpose * ======= * * CASYMV performs the following matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * where alpha and beta are real scalars, y is a real vector, x is a * vector and A is an n by n symmetric matrix. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * On entry, UPLO specifies whether the upper or lower triangu- * lar part of the array A is to be referenced as follows: * * UPLO = 'U' or 'u' Only the upper triangular part of A is * to be referenced. * UPLO = 'L' or 'l' Only the lower triangular part of A is * to be referenced. * * N (input) INTEGER * On entry, N specifies the order of the matrix A. N must be at * least zero. * * ALPHA (input) REAL * On entry, ALPHA specifies the real scalar alpha. * * A (input) COMPLEX array * On entry, A is an array of dimension (LDA,N). Before entry * with UPLO = 'U' or 'u', the leading n by n part of the array * A must contain the upper triangular part of the symmetric ma- * trix and the strictly lower triangular part of A is not refe- * renced. When UPLO = 'L' or 'l', the leading n by n part of * the array A must contain the lower triangular part of the * symmetric matrix and the strictly upper trapezoidal part of A * is not referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, N ). * * X (input) COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented * array X must contain the vector x. * * INCX (input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (input) REAL * On entry, BETA specifies the real scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y (input/output) REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). Before entry with BETA non- * zero, the incremented array Y must contain the vector y. On * exit, the incremented array Y is overwritten by the updated * vector y. * * INCY (input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY REAL TALPHA, TEMP0, TEMP1, TEMP2 COMPLEX ZDUM * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, REAL * .. * .. Statement Functions .. REAL CABS1 CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF ( .NOT.LSAME( UPLO, 'U' ).AND. $ .NOT.LSAME( UPLO, 'L' ) )THEN INFO = 1 ELSE IF( N.LT.0 )THEN INFO = 2 ELSE IF( LDA.LT.MAX( 1, N ) )THEN INFO = 5 ELSE IF( INCX.EQ.0 )THEN INFO = 7 ELSE IF( INCY.EQ.0 )THEN INFO = 10 END IF IF( INFO.NE.0 )THEN CALL XERBLA( 'CASYMV', INFO ) RETURN END IF * * Quick return if possible. * IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) $ RETURN * * Set up the start points in X and Y. * IF( INCX.GT.0 ) THEN KX = 1 ELSE KX = 1 - ( N - 1 ) * INCX END IF IF( INCY.GT.0 )THEN KY = 1 ELSE KY = 1 - ( N - 1 ) * INCY END IF * * Start the operations. In this version the elements of A are * accessed sequentially with one pass through the triangular part * of A. * * First form y := abs( beta * y ). * IF( BETA.NE.ONE ) THEN IF( INCY.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN DO 10, I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, N Y( I ) = ABS( BETA * Y( I ) ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO ) THEN DO 30, I = 1, N Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, N Y( IY ) = ABS( BETA * Y( IY ) ) IY = IY + INCY 40 CONTINUE END IF END IF END IF * IF( ALPHA.EQ.ZERO ) $ RETURN * TALPHA = ABS( ALPHA ) * IF( LSAME( UPLO, 'U' ) ) THEN * * Form y when A is stored in upper triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN DO 60, J = 1, N TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO DO 50, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) 50 CONTINUE Y( J ) = Y( J ) + TEMP1 * CABS1( A( J, J ) ) + $ ALPHA * TEMP2 * 60 CONTINUE * ELSE * JX = KX JY = KY * DO 80, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO IX = KX IY = KY * DO 70, I = 1, J - 1 TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) IX = IX + INCX IY = IY + INCY 70 CONTINUE Y( JY ) = Y( JY ) + TEMP1 * CABS1( A( J, J ) ) + $ ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 80 CONTINUE * END IF * ELSE * * Form y when A is stored in lower triangle. * IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) ) THEN * DO 100, J = 1, N * TEMP1 = TALPHA * CABS1( X( J ) ) TEMP2 = ZERO Y( J ) = Y( J ) + TEMP1 * CABS1( A( J, J ) ) * DO 90, I = J + 1, N TEMP0 = CABS1( A( I, J ) ) Y( I ) = Y( I ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( I ) ) * 90 CONTINUE * Y( J ) = Y( J ) + ALPHA * TEMP2 * 100 CONTINUE * ELSE * JX = KX JY = KY * DO 120, J = 1, N TEMP1 = TALPHA * CABS1( X( JX ) ) TEMP2 = ZERO Y( JY ) = Y( JY ) + TEMP1 * CABS1( A( J, J ) ) IX = JX IY = JY * DO 110, I = J + 1, N * IX = IX + INCX IY = IY + INCY TEMP0 = CABS1( A( I, J ) ) Y( IY ) = Y( IY ) + TEMP1 * TEMP0 TEMP2 = TEMP2 + TEMP0 * CABS1( X( IX ) ) * 110 CONTINUE * Y( JY ) = Y( JY ) + ALPHA * TEMP2 JX = JX + INCX JY = JY + INCY * 120 CONTINUE * END IF * END IF * RETURN * * End of CASYMV * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zcshft.f0000640000175000017500000000553410363532303017402 0ustar muammarmuammar SUBROUTINE ZCSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZCSHFT shifts columns of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A. M must be at * least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A to be * shifted. N must be at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the columns of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the co- * lumns are shifted to the right. When OFFSET is negative, the * columns of A are shifted to the left. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N+ABS(OFFSET) ). * On exit, A contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Further Details * =============== * * N=3 OFFSET=6 -OFFSET=6 N=3 * ------------------- ------------------- * | 1 2 3 4 5 6 7 8 9 | M | 1 2 3 4 5 6 7 8 9 | * ------------------- ------------------- * V V * ------------------- ------------------- * | 1 2 3 4 5 6 1 2 3 | M | 7 8 9 4 5 6 7 8 9 | * ------------------- ------------------- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = N, 1, -1 DO 10 I = 1, M A( I, J+OFFSET ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I, J-OFFSET ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of ZCSHFT * END scalapack-1.8.0/PBLAS/SRC/PTZBLAS/zrshft.f0000640000175000017500000000617310363532303017421 0ustar muammarmuammar SUBROUTINE ZRSHFT( M, N, OFFSET, A, LDA ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER LDA, M, N, OFFSET * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * ZRSHFT shifts rows of an m by n array A by OFFSET. * * Arguments * ========= * * M (local input) INTEGER * On entry, M specifies the number of rows of A to be shifted. * M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the number of columns of A. N must be * at least zero. * * OFFSET (local input) INTEGER * On entry, OFFSET specifies the offset by which the rows of * A should be shifted. OFFSET can be positive or negative (see * below for further details). When OFFSET is positive, the rows * are shifted to the bottom. When OFFSET is negative, the rows * of A are shifted to the top. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension ( LDA, N ). On exit, A * contains the shifted array. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M+ABS(OFFSET) ). * * Further Details * =============== * * N N N N * --- --- --- --- * | 1 | | 1 | | 1 | | 7 | * | 2 | M = 3 | 2 | | 2 | M = 3 | 8 | * | 3 | | 3 | | 3 | | 9 | * | 4 | | 4 | | 4 | | 4 | * | 5 | becomes | 5 | | 5 | becomes | 5 | * | 6 | | 6 | | 6 | | 6 | * | 7 | | 1 | | 7 | | 7 | * | 8 | OFFSET = 6 | 2 | | 8 | OFFSET = -6 | 8 | * | 9 | | 3 | | 9 | | 9 | * --- --- --- --- * OFFSET >= 0 OFFSET <= 0 * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * IF( ( OFFSET.EQ.0 ).OR.( M.LE.0 ).OR.( N.LE.0 ) ) $ RETURN * IF( OFFSET.GT.0 ) THEN DO 20 J = 1, N DO 10 I = M, 1, -1 A( I+OFFSET, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, M A( I, J ) = A( I-OFFSET, J ) 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of ZRSHFT * END scalapack-1.8.0/PBLAS/SRC/Makefile0000640000175000017500000001464210363532303016253 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0) # # Module: Makefile # # Purpose: SRC Makefile # # Creation date: April 1, 1998 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc ############################################################################ # # This is the makefile to create a library for the PBLAS. # The files are grouped as follows: # # PSBLAS1 -- Single precision real PBLAS1 routines # PCBLAS1 -- Single precision complex PBLAS1 routines # PDBLAS1 -- Double precision real PBLAS1 routines # PZBLAS1 -- Double precision complex PBLAS1 routines # # PSBLAS2 -- Single precision real PBLAS2 routines # PCBLAS2 -- Single precision complex PBLAS2 routines # PDBLAS2 -- Double precision real PBLAS2 routines # PZBLAS2 -- Double precision complex PBLAS2 routines # # PSBLAS3 -- Single precision real PBLAS3 routines # PCBLAS3 -- Single precision complex PBLAS3 routines # PDBLAS3 -- Double precision real PBLAS3 routines # PZBLAS3 -- Double precision complex PBLAS3 routines # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ../../SLmake.inc file # definitions to match your compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # # To remove the object files after the library is created, enter # make clean # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # ############################################################################ all: single double complex complex16 APPBLAS = pilaenv.o #--------------------------------------------------------------------------- # Comment out the next 5 definitions if you already have the Level 1 PBLAS. #--------------------------------------------------------------------------- PIBLAS1 = picopy_.o PSBLAS1 = psswap_.o psscal_.o pscopy_.o psaxpy_.o psdot_.o psnrm2_.o \ psasum_.o psamax_.o PCBLAS1 = pcswap_.o pcscal_.o pcsscal_.o pccopy_.o pcaxpy_.o pcdotu_.o \ pcdotc_.o pscnrm2_.o pscasum_.o pcamax_.o PDBLAS1 = pdswap_.o pdscal_.o pdcopy_.o pdaxpy_.o pddot_.o pdnrm2_.o \ pdasum_.o pdamax_.o PZBLAS1 = pzswap_.o pzscal_.o pzdscal_.o pzcopy_.o pzaxpy_.o pzdotu_.o \ pzdotc_.o pdznrm2_.o pdzasum_.o pzamax_.o #--------------------------------------------------------------------------- # Comment out the next 4 definitions if you already have the Level 2 PBLAS. #--------------------------------------------------------------------------- PSBLAS2 = psgemv_.o psger_.o pssymv_.o pssyr_.o pssyr2_.o pstrmv_.o \ pstrsv_.o psagemv_.o psasymv_.o psatrmv_.o PCBLAS2 = pcgemv_.o pcgerc_.o pcgeru_.o pchemv_.o pcher_.o pcher2_.o \ pctrmv_.o pctrsv_.o pcagemv_.o pcahemv_.o pcatrmv_.o PDBLAS2 = pdgemv_.o pdger_.o pdsymv_.o pdsyr_.o pdsyr2_.o pdtrmv_.o \ pdtrsv_.o pdagemv_.o pdasymv_.o pdatrmv_.o PZBLAS2 = pzgemv_.o pzgerc_.o pzgeru_.o pzhemv_.o pzher_.o pzher2_.o \ pztrmv_.o pztrsv_.o pzagemv_.o pzahemv_.o pzatrmv_.o #--------------------------------------------------------------------------- # Comment out the next 4 definitions if you already have the Level 3 PBLAS. #--------------------------------------------------------------------------- PSBLAS3 = psgeadd_.o psgemm_.o pssymm_.o pssyr2k_.o pssyrk_.o pstradd_.o \ pstran_.o pstrmm_.o pstrsm_.o PCBLAS3 = pcgeadd_.o pcgemm_.o pchemm_.o pcher2k_.o pcherk_.o pcsymm_.o \ pcsyr2k_.o pcsyrk_.o pctradd_.o pctranc_.o pctranu_.o pctrmm_.o \ pctrsm_.o PDBLAS3 = pdgeadd_.o pdgemm_.o pdsymm_.o pdsyr2k_.o pdsyrk_.o pdtradd_.o \ pdtran_.o pdtrmm_.o pdtrsm_.o PZBLAS3 = pzgeadd_.o pzgemm_.o pzhemm_.o pzher2k_.o pzherk_.o pzsymm_.o \ pzsyr2k_.o pzsyrk_.o pztradd_.o pztranc_.o pztranu_.o pztrmm_.o \ pztrsm_.o #--------------------------------------------------------------------------- $(APPBLAS): $(FRC) $(PIBLAS1): $(FRC) $(PSBLAS1): $(FRC) $(PCBLAS1): $(FRC) $(PDBLAS1): $(FRC) $(PZBLAS1): $(FRC) $(PSBLAS2): $(FRC) $(PCBLAS2): $(FRC) $(PDBLAS2): $(FRC) $(PZBLAS2): $(FRC) $(PSBLAS3): $(FRC) $(PCBLAS3): $(FRC) $(PDBLAS3): $(FRC) $(PZBLAS3): $(FRC) PIBLAS = $(PIBLAS1) $(APPBLAS) PSBLAS = $(PSBLAS1) $(PSBLAS2) $(PSBLAS3) $(APPBLAS) PCBLAS = $(PCBLAS1) $(PCBLAS2) $(PCBLAS3) $(APPBLAS) PDBLAS = $(PDBLAS1) $(PDBLAS2) $(PDBLAS3) $(APPBLAS) PZBLAS = $(PZBLAS1) $(PZBLAS2) $(PZBLAS3) $(APPBLAS) integer: $(PIBLAS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PIBLAS) $(RANLIB) $(SCALAPACKLIB) single: integer $(PSBLAS) ( cd PBBLAS; $(MAKE) single ) ( cd PTZBLAS; $(MAKE) single ) ( cd PTOOLS; $(MAKE) single ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PSBLAS) $(RANLIB) $(SCALAPACKLIB) double: integer $(PDBLAS) ( cd PBBLAS; $(MAKE) double ) ( cd PTZBLAS; $(MAKE) double ) ( cd PTOOLS; $(MAKE) double ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PDBLAS) $(RANLIB) $(SCALAPACKLIB) complex: integer $(PCBLAS) ( cd PBBLAS; $(MAKE) complex ) ( cd PTZBLAS; $(MAKE) complex ) ( cd PTOOLS; $(MAKE) complex ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PCBLAS) $(RANLIB) $(SCALAPACKLIB) complex16: integer $(PZBLAS) ( cd PBBLAS; $(MAKE) complex16 ) ( cd PTZBLAS; $(MAKE) complex16 ) ( cd PTOOLS; $(MAKE) complex16 ) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PZBLAS) $(RANLIB) $(SCALAPACKLIB) FRC: @FRC=$(FRC) clean : rm -f *.o ( cd PBBLAS; $(MAKE) clean ) ( cd PTZBLAS; $(MAKE) clean ) ( cd PTOOLS; $(MAKE) clean ) .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c # # To compile without input argument checking replace previous line by # .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) -DNO_ARGCHK $*.c # # Note that when the PBLAS routines have been compiled without input ar- # gument checking, it is impossible to test PBLAS error exits. Trying to # test PBLAS error exits will then cause the tester executable to hang. # scalapack-1.8.0/PBLAS/SRC/pztradd_.c0000640000175000017500000003373410363532303016571 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pztradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZTRADD adds a trapezoidal matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C', * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * upper or lower trapezoidal submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of the * array C containing the upper or lower triangular part of the * triangular submatrix sub( C ) is to be referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * triangular submatrix sub( C ) is to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * triangular submatrix sub( C ) is to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the upper triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly lower tri- * angular part of the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the lower triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly upper tri- * angular part of the submatrix sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAC, TranOp, UploC, ctop, rtop; int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol, nprow, upper; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRADD", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRADD", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PZTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PZTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cztypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cztypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added, but not * transposed. This selection is based on the current setting for the BLACS * broadcast operations. */ rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_Cptradd( PB_Cztypeset(), &DirAC, &UploC, ( notran ? NOTRAN : ( ( TranOp == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PZTRADD */ } scalapack-1.8.0/PBLAS/SRC/pztrsv_.c0000640000175000017500000007615010363532303016470 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztrsv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pztrsv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PZTRSV solves one of the systems of equations * * sub( A )*sub( X ) = b, or sub( A )'*sub( X ) = b, or * * conjg( sub( A )' )*sub( X ) = b, * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * b and sub( X ) are n element subvectors and sub( A ) is an n by n * unit, or non-unit, upper or lower triangular submatrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * sub( X ) = b. * * TRANS = 'T' or 't' sub( A )' * sub( X ) = b. * * TRANS = 'C' or 'c' conjg( sub( A )' ) * sub( X ) = b. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On entry, sub( X ) is the n element right-hand side * b. On exit, sub( X ) is overwritten with the solution subvec- * tor. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, btop, ctop, * negone, * one, * zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, XACapbX, XACfr, XACld, XACsum, XARapbX, XARfr, XARld, XARsum, Xi, Xj, ctxt, info, ione=1, k, kb, kbnext, kbprev, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XACd[DLEN_], XARd[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XAC = NULL, * XAR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PZTRSV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PZTRSV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRSV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); size = type->size; one = type->one; zero = type->zero; negone = type->negone; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the last * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ if( Akp > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow, Arow, nprow ); Akp -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) zgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol ); Czgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) zset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Akp > 0 && Anq0 > 0 ) zgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } else { if( Anq0 > 0 ) zgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Czgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the first * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Anp0 = Anp - Akp ) > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow, nprow ); Anp0 -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) zgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol ); Czgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) zset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Anp0 > 0 && Anq0 > 0 ) zgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp+ktmp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp+ktmp, 0, XACld, size ), &ione ); } else { if( Anq0 > 0 ) zgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Czgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } } else { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAR in process row owning the first row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq0 = Anq - Akq ) > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol, npcol ); Anq0 -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) zgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow ); Czgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) zset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Anq0 > 0 ) zgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq+ktmp, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq+ktmp, XARld, size ), &XARld ); } else { if( Anp0 > 0 ) zgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Czgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process row owning the last row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later * is only locally updated. */ if( Akq > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol, Acol, npcol ); Akq -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) zgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow ); Czgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) zset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Akq > 0 ) zgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } else { if( Anp0 > 0 ) zgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Czgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } } if( XACfr ) free( XAC ); if( XARfr ) free( XAR ); /* * End of PZTRSV */ } scalapack-1.8.0/PBLAS/SRC/pzsyr2k_.c0000640000175000017500000005231110363532303016535 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzsyr2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzsyr2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PZSYR2K performs one of the symmetric rank 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) and sub( B ) are n by k submatrices in the * first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't', K specifies the number of rows * of the submatrices sub( A ) and sub( B ). K must be at least * zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYR2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYR2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PZSYR2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZSYR2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PZSYR2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZSYR2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PZSYR2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PZSYR2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZSYR2K */ } scalapack-1.8.0/PBLAS/SRC/psnrm2_.c0000640000175000017500000003637510363532303016346 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psnrm2_( int * N, float * NORM2, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void psnrm2_( N, NORM2, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * NORM2; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSNRM2 computes the 2-norm of a subvector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * NORM2 (local output) REAL * On exit, NORM2 specifies the 2-norm of the subvector sub( X ) * only in its scope (See below for further details). * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, dst, dist, info, k, mycol, mydist, myrow, npcol, nprow, src; float scale, ssq, temp1, temp2; /* * .. Local Arrays .. */ int Xd[DLEN_]; float * Xptr = NULL, work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSNRM2", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PSNRM2", info ); return; } #endif /* * Initialize NORM2 */ *NORM2 = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute NORM2 */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) *NORM2 = ABS( X[Xii+Xjj*Xd[LLD_]] ); return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; Xptr = X+(Xii+Xjj*Xld); for( k = 0; k < Xnq; k++ ) { if( *Xptr != ZERO ) { temp1 = ABS( *Xptr ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += Xld; } } /* * If Xnq <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process row * 0 if npcol > 1 and Xcol >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Csgesd2d( ctxt, 2, 1, ((char*) work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Csgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, myrow, src ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process row. */ top = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Csgebs2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2, myrow, 0 ); } /* * Compute NORM2 redundantly NORM2 = WORK( 1 ) * SQRT( WORK( 2 ) ) */ sasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ sasqrtb_( &scale, &ssq, NORM2 ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { Xptr = X+(Xii+Xjj*Xd[LLD_]); for( k = 0; k < Xnp; k++ ) { if( *Xptr != ZERO ) { temp1 = ABS( *Xptr ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr++; } } /* * If Xnp <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process * column 0 if nprow > 1 and Xrow >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Csgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Csgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, src, mycol ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process column */ top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Csgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2, 0, mycol ); } /* * Compute NORM2 redundantly NORM2 = WORK[0] * SQRT( WORK[1] ) */ sasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ sasqrtb_( &scale, &ssq, NORM2 ); } } return; } /* * End of PSNRM2 */ } scalapack-1.8.0/PBLAS/SRC/pztranu_.c0000640000175000017500000002270410363532303016617 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztranu_( int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pztranu_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZTRANU transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PZTRANU", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PZTRANU", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRANU", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cztypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PZTRANU */ } scalapack-1.8.0/PBLAS/SRC/pcatrmv_.c0000640000175000017500000006170110363532303016570 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCATRMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) ) * + abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an n by n triangular subma- * trix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'T' or 't' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'C' or 'c' * sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| + * |beta*sub( Y )|. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Yroc, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr, YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper, usiz; float * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCATRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCATRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PCATRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PCATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkvec( ctxt, "PCATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info ); PB_Cchkvec( ctxt, "PCATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PCATRMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); size = type->size; utyp = PB_Cstypeset(); usiz = type->usiz; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Scale YA in the case sub( Y ) has been reused */ if( notran && !( YApbY ) ) { /* * YA resides in (a) process column(s) */ if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Amp > 0 ) sascal_( &Amp, ((char *) tbeta), YA, &ione ); } } else if( !( notran ) && !( YApbY ) ) { /* * YA resides in (a) process row(s) */ if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Anq > 0 ) sascal_( &Anq, ((char *) tbeta), YA, &YAld ); } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, usiz ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cagemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } if( notran ) { PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PCATRMV */ } scalapack-1.8.0/PBLAS/SRC/pcsyr2k_.c0000640000175000017500000005226510363532303016516 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcsyr2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcsyr2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PCSYR2K performs one of the symmetric rank 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) and sub( B ) are n by k submatrices in the * first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't', K specifies the number of rows * of the submatrices sub( A ) and sub( B ). K must be at least * zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYR2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYR2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PCSYR2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCSYR2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PCSYR2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCSYR2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PCSYR2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PCSYR2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCSYR2K */ } scalapack-1.8.0/PBLAS/SRC/pdger_.c0000640000175000017500000002735610363532303016225 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdger_( int * M, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY, double * A, int * IA, int * JA, int * DESCA ) #else void pdger_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDGER performs the rank 1 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PDGER", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PDGER", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PDGER", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PDGER", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { dger_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], ((char *) (A+(Aii+Ajj*Ald))), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PDGER */ } scalapack-1.8.0/PBLAS/SRC/pzhemv_.c0000640000175000017500000005621110363532303016425 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzhemv_( F_CHAR_T UPLO, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzhemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZHEMV performs the matrix-vector operation * * sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, sub( X ) and sub( Y ) are n element sub- * vectors and sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; double * tbeta; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PZHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PZHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PZHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PZHEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * When alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { zset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { zscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { zset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { zscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( type, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) { if( ( tbeta[REAL_PART] == ZERO ) && ( tbeta[IMAG_PART] == ZERO ) ) { zset_( &Anq, ((char *) tbeta), YR, &YRld ); } else { zscal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) { if( ( tbeta[REAL_PART] == ZERO ) && ( tbeta[IMAG_PART] == ZERO ) ) { zset_( &Amp, ((char *) tbeta), YC, &ione ); } else { zscal_( &Amp, ((char *) tbeta), YC, &ione ); } } } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); zgemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzhemv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzhemv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, size ), &ione ); zgemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PZHEMV */ } scalapack-1.8.0/PBLAS/SRC/pcscal_.c0000640000175000017500000002240110602576752016367 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcscal_( int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pcscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PCSCAL multiplies an n element subvector sub( X ) by the scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PCSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PCSCAL", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { if ( ( ALPHA[REAL_PART] == ONE ) && ( ALPHA[IMAG_PART] == ZERO ) ) return; /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { cset_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } else { cscal_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { if ( ( ALPHA[REAL_PART] == ONE ) && ( ALPHA[IMAG_PART] == ZERO ) ) return; /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cctypeset(); if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { cset_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } else { cscal_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } } } return; } /* * End of PCSCAL */ } scalapack-1.8.0/PBLAS/SRC/pdasum_.c0000640000175000017500000002464510363532303016413 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdasum_( int * N, double * ASUM, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdasum_( N, ASUM, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * ASUM; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDASUM returns the sum of absolute values of the entries of a subvec- * tor sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ASUM (local output) DOUBLE PRECISION * On exit, ASUM specifies the sum of absolute values of the * subvector sub( X ) only in its scope (See below for further * details). * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDASUM", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PDASUM", info ); return; } #endif /* * Initialize ASUM */ *ASUM = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute ASUM */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *ASUM = ABS( X[Xii+Xjj*Xd[LLD_]] ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; dvasum_( &Xnq, ((char *) ASUM), ((char *)( X+(Xii+Xjj*Xld) )), &Xld ); } /* * If Xnq <= 0, ASUM is zero (see initialization above) */ if( ( npcol > 1 ) && ( Xcol >= 0 ) ) { /* * Combine the local results if npcol > 1 and Xcol >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { dvasum_( &Xnp, ((char *) ASUM), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } /* * If Xnp <= 0, ASUM is zero (see initialization above) */ if( ( nprow > 1 ) && ( Xrow >= 0 ) ) { /* * Combine the local results if nprow > 1 and Xrow >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } /* * End of PDASUM */ } scalapack-1.8.0/PBLAS/SRC/pdtran_.c0000640000175000017500000002250710363532303016405 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtran_( int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdtran_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PDTRAN transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PDTRAN", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PDTRAN", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRAN", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cdtypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cdtypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cdtypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PDTRAN */ } scalapack-1.8.0/PBLAS/SRC/pdasymv_.c0000640000175000017500000005600310363532303016576 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdasymv_( F_CHAR_T UPLO, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdasymv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDASYMV performs the matrix-vector operation * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a n element real subvec- * tor, sub( X ) is an n element subvector and sub( A ) is an n by n * symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper, usiz; double * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PDASYMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PDASYMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PDASYMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PDASYMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cdtypeset(); size = usiz = type->size; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( utyp, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) dascal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) dascal_( &Amp, ((char *) tbeta), YC, &ione ); } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); dagemv_( C2F_CHAR( TRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzasymv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzasymv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, usiz ), &ione ); dagemv_( C2F_CHAR( TRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || Yrow < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || Ycol < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PDASYMV */ } scalapack-1.8.0/PBLAS/SRC/psswap_.c0000640000175000017500000007617710363532303016446 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psswap_( int * N, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psswap_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PSSWAP swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * one, * top, tran, * zero; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, XisD, XisR, Xinb1D, XinbD, XisRow, Xii, Xj, Xjj, Xld, Xlinc, Xm, XmyprocD, XmyprocR, Xn, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xroc, Xrow, Ycol, Yi, Yii, Yinb1D, YinbD, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, Ym, YmyprocD, YmyprocR, Yn, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, cdst, csrc, ctxt, dst, gcdPQ, info, ione=1, k, l, lcmPQ, lcmb, mycol, myrow, npcol, npq, nprow, p, q, rdst, rsrc, src, size; PBTYP_T * type; PB_VM_T VM; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; char * buf = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PSSWAP", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PSSWAP", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PSSWAP", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_ ]; XnbD = Xd[NB_ ]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_ ]; XnbD = Xd[MB_ ]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_ ]; YnbD = Yd[NB_ ]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_ ]; YnbD = Yd[MB_ ]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is distributed but not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector to * swap sub( X ) with. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cstypeset(); size = type->size; /* * A purely local operation occurs iff the operands start in the same process * and, if either the grid is mono-dimensional or there is a single local block * to be swapped or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { sswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } if( RRorCC && XisR && YisR ) return; } } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to swap the distributed vectors in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( RRorCC && ( XprocR != YprocR ) ) { /* * Both operands are distributed in the same direction, but reside in different * process rows or columns. */ if( XmyprocR == XprocR ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( XisRow ) { Csgesd2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); Csgerv2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); } else { Csgesd2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); Csgerv2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); } } } if( YmyprocR == YprocR ) { YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( YisRow ) { Csgesd2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); Csgerv2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); } else { Csgesd2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); Csgerv2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); } } } } else { /* * General case when just one message needs to be exchanged */ if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) send it to the corresponding * process owning s piece of sub ( Y ). */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( YisRow ) { rdst = YprocR; cdst = dst; } else { rdst = dst; cdst = YprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { sswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( XisRow ) Csgesd2d( ctxt, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); else Csgesd2d( ctxt, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); } } } if( YmyprocR == YprocR ) { /* * The processes owning a piece of sub( Y ) receive the corresponding piece * of sub( X ) and send the piece of sub( Y ) they own to the same process. */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { src = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); src = MPosMod( src, XnprocsD ); if( XisRow ) { rsrc = XprocR; csrc = src; } else { rsrc = src; csrc = XprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( YnpD * size ); if( XisRow ) Csgerv2d( ctxt, 1, YnpD, buf, 1, rsrc, csrc ); else Csgerv2d( ctxt, YnpD, 1, buf, YnpD, rsrc, csrc ); if( YisRow ) Csgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); else Csgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); scopy_( &YnpD, buf, &ione, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); if( buf ) free( buf ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding piece * of sub( Y ). */ if( XnpD > 0 ) { if( ( myrow != rdst ) || ( mycol != cdst ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Csgerv2d( ctxt, 1, XnpD, buf, 1, rdst, cdst ); else Csgerv2d( ctxt, XnpD, 1, buf, XnpD, rdst, cdst ); scopy_( &XnpD, buf, &ione, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc ); if( buf ) free( buf ); } } } } } else if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { /* * General case */ tran = ( RRorCC ? CNOTRAN : CTRAN ); if( XisRow ) { Xscope = CCOLUMN; Xm = 1; rsrc = XprocR; } else { Xscope = CROW; Xn = 1; csrc = XprocR; } if( YisRow ) { Yscope = CCOLUMN; Ym = 1; rdst = YprocR; } else { Yscope = CROW; Yn = 1; cdst = YprocR; } lcmb = PB_Clcm( XnprocsD * XnbD, YnprocsD * YnbD ); one = type->one; zero = type->zero; gcdPQ = PB_Cgcd( XnprocsD, YnprocsD ); lcmPQ = ( XnprocsD / gcdPQ ) * YnprocsD; for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { Xroc = MModAdd( XprocD, p, XnprocsD ); Yroc = MModAdd( YprocD, q, YnprocsD ); if( ( XmyprocD == Xroc ) || ( YmyprocD == Yroc ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, Xroc, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, Yroc, YprocD, YnprocsD ); PB_CVMinit( &VM, 0, XnpD, YnpD, Xinb1D, Yinb1D, XnbD, YnbD, p, q, XnprocsD, YnprocsD, lcmb ); if( npq = PB_CVMnpq( &VM ) ) { if( ( RRorCC && ( Xroc == Yroc ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( Xroc == YprocR ) && ( XprocR == Yroc ) ) ) { /* * If I am at the intersection of the process cross, or simply common to the * process rows or columns owning sub( X ) and sub( Y ) */ if( ( YmyprocD == Yroc ) && ( YmyprocR == YprocR ) ) { PB_CVMswp( type, &VM, ROW, &Xscope, &tran, npq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Ylinc ); } } else { /* * Perform the message exchange: pack the data I own, send it, receive the * remote data, and unpack it. */ if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) ) { if( XisRow ) { Xn = npq; } else { Xm = npq; } if( YisRow ) { Yn = npq; cdst = Yroc; } else { Ym = npq; rdst = Yroc; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, ROW, &Xscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, zero, buf, Xm ); Csgesd2d( ctxt, Xm, Xn, buf, Xm, rdst, cdst ); Csgerv2d( ctxt, Ym, Yn, buf, Ym, rdst, cdst ); PB_CVMpack( type, &VM, ROW, &Xscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, one, buf, Ym ); if( buf ) free ( buf ); } if( ( YmyprocR == YprocR ) && ( YmyprocD == Yroc ) ) { if( XisRow ) { Xn = npq; csrc = Xroc; } else { Xm = npq; rsrc = Xroc; } if( YisRow ) { Yn = npq; } else { Ym = npq; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, COLUMN, &Yscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, zero, buf, Ym ); Csgesd2d( ctxt, Ym, Yn, buf, Ym, rsrc, csrc ); Csgerv2d( ctxt, Xm, Xn, buf, Xm, rsrc, csrc ); PB_CVMpack( type, &VM, COLUMN, &Yscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, one, buf, Xm ); if( buf ) free ( buf ); } } } } p = MModAdd1( p, XnprocsD ); q = MModAdd1( q, YnprocsD ); } } } if( XisR ) { /* * Replicate sub( X ) when necessary */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Csgebs2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Csgebr2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Csgebs2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Csgebr2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XmyprocD, XprocR ); } } } if( YisR ) { /* * Replicate sub( Y ) when necessary */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) Csgebs2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Csgebr2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) Csgebs2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Csgebr2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YmyprocD, YprocR ); } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ PB_CpswapND( PB_Cstypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ PB_CpswapND( PB_Cstypeset(), *N, ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed. */ PB_CpswapNN( PB_Cstypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } /* * End of PSSWAP */ } scalapack-1.8.0/PBLAS/SRC/pcgeadd_.c0000640000175000017500000002725010363532303016504 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcgeadd_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcgeadd_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCGEADD adds a matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C'. * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, DirC, ctop, rtop; int Ai, Aj, Ci, Cj, TrA, ctxt, info, mycol, myrow, npcol, nprow, notran; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ notran = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !notran ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCGEADD", "Illegal TRANS = %c\n", TrA ); info = -1; } if( notran ) PB_Cchkmat( ctxt, "PCGEADD", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); else PB_Cchkmat( ctxt, "PCGEADD", "A", *N, 3, *M, 2, Ai, Aj, Ad, 8, &info ); PB_Cchkmat( ctxt, "PCGEADD", "C", *M, 2, *N, 3, Ci, Cj, Cd, 13, &info ); } if( info ) { PB_Cabort( ctxt, "PCGEADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added. This * selection is based on the current setting for the BLACS broadcast operations. */ if( notran ) { rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) { DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpgeadd( PB_Cctypeset(), &DirA, &DirC, NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else if( TrA == CTRAN ) { PB_Cptran( PB_Cctypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cptran( PB_Cctypeset(), CONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } /* * End of PCGEADD */ } scalapack-1.8.0/PBLAS/SRC/pssyrk_.c0000640000175000017500000004501310363532303016445 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pssyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PSSYRK performs one of the symmetric rank k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't' or 'C' or 'c', K specifies the number of rows of the sub- * matrix sub( A ). K must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYRK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYRK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PSSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PSSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYRK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PSSYRK */ } scalapack-1.8.0/PBLAS/SRC/pzher_.c0000640000175000017500000003451110363532303016243 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzher_( F_CHAR_T UPLO, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * A, int * IA, int * JA, int * DESCA ) #else void pzher_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PZHER performs the Hermitian rank 1 operation * * sub( A ) := alpha*sub( X )*conjg( sub( X )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Alpha is a real scalar, sub( X ) is an n element subvector and * sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X * corresponding to the entries of the subvector sub( X ) need * not be set on input. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; cmplx16 Calpha; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZHER", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PZHER", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkmat( ctxt, "PZHER", "A", *N, 2, *N, 2, Ai, Aj, Ad, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PZHER", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Local rank-1 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); XCld = XCd0[LLD_]; XRld = XRd0[LLD_]; Calpha[REAL_PART] = ALPHA[REAL_PART]; Calpha[IMAG_PART] = ZERO; if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) zgerc_( &Akp, &Anq0, ((char *) Calpha), XC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cpsyr( type, UPPER, kb, 1, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzher ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr( type, LOWER, kb, 1, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzher ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) zgerc_( &Amp0, &Anq0, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); /* * End of PZHER */ } scalapack-1.8.0/PBLAS/SRC/psamax_.c0000640000175000017500000004524010363532303016405 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psamax_( int * N, float * AMAX, int * INDX, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void psamax_( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * INDX, * IX, * JX, * N; float * AMAX; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSAMAX computes the global index of the maximum element in absolute * value of a subvector sub( X ). The global index is returned in INDX * and the value of that element is returned in AMAX, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * AMAX (global output) REAL array * On exit, AMAX specifies the largest entry in absolute value * of the subvector sub( X ) only in its scope (See below for * further details). * * INDX (global output) INTEGER * On exit, INDX specifies the global index of the maximum ele- * ment in absolute value of the subvector sub( X ) only in its * scope (See below for further details). * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char cbtop, cctop, rbtop, rctop; int Xcol, Xgindx, Xi, Xii, Ximb, Xinb, Xj, Xjj, Xlindx, Xld, Xmb, Xnb, Xnp, Xnq, Xrow, Xsrc, ctxt, dist, dst, idumm, info, k, maxpos, mycol, mydist, myrow, npcol, nprow, src; /* * .. Local Arrays .. */ int Xd[DLEN_]; float work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSAMAX", "X", *N, 1, Xi, Xj, Xd, *INCX, 7, &info ); if( info ) { PB_Cabort( ctxt, "PSAMAX", info ); return; } #endif /* * Initialize INDX and AMAX */ *INDX = 0; *AMAX = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *INCX == 1 ) && ( Xd[M_] == 1 ) && ( *N == 1 ) ) { /* * Make sure I own some data and compute INDX and AMAX */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *INDX = *JX; *AMAX = X[Xii+Xjj*Xd[LLD_]]; } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( ( rctop == CTOP_DEFAULT ) || ( rctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { Xld = Xd[LLD_]; Xlindx = Xjj - 1 + isamax_( &Xnq, ((char*)(X+(Xii+Xjj*Xld))), &Xld ); Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); work[0] = X[Xii+Xlindx*Xld]; work[1] = ((float )( Xgindx+1 )); } else { work[0] = ZERO; work[1] = ZERO; } /* * Combine the local results using a 1-tree topology within process column 0 * if npcol > 1 or Xcol >= 0, i.e sub( X ) is distributed. */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Csgesd2d( ctxt, 2, 1, ((char*)work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Csgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2, myrow, src ); if( ABS( work[0] ) < ABS( work[2] ) ) { work[0] = work[2]; work[1] = work[3]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of INDX and AMAX within * their process row. */ rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Csgebs2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2, myrow, 0 ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ *AMAX = work[0]; *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : ( (int)(work[1]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; Xlindx = Xjj - 1 + isamax_( &Xnq, ((char*)(X+(Xii+Xjj*Xld))), &Xld ); *AMAX = X[Xii+Xlindx*Xld]; } else { *AMAX = ZERO; } if( Xcol >= 0 ) { /* * Combine leave on all the local maximum if Xcol >= 0, i.e sub( X ) is * distributed */ Csgamx2d( ctxt, ROW, &rctop, 1, 1, ((char*)AMAX), 1, &idumm, &maxpos, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( *AMAX != ZERO ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == maxpos ) { Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); *INDX = Xgindx + 1; Cigebs2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1, myrow, maxpos ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *JX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : Xlindx + 1 ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ( cctop == CTOP_DEFAULT ) || ( cctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { Xld = Xd[LLD_]; Xlindx = Xii - 1 + isamax_( &Xnp, ((char*)(X+(Xii+Xjj*Xld))), INCX ); Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); work[0] = X[Xlindx+Xjj*Xld]; work[1] = ((float )( Xgindx+1 )); } else { work[0] = ZERO; work[1] = ZERO; } /* * Combine the local results using a 1-tree topology within process row 0 * if nprow > 1 or Xrow >= 0, i.e sub( X ) is distributed. */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Csgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Csgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2, src, mycol ); if( ABS( work[0] ) < ABS( work[2] ) ) { work[0] = work[2]; work[1] = work[3]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process row 0 broadcasts the combined values of INDX and AMAX within their * process column. */ cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Csgebs2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2, 0, mycol ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ *AMAX = work[0]; *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : ( (int)(work[1]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; Xlindx = Xii - 1 + isamax_( &Xnp, ((char*)(X+(Xii+Xjj*Xld))), INCX ); *AMAX = X[Xlindx+Xjj*Xld]; } else { *AMAX = ZERO; } if( Xrow >= 0 ) { /* * Combine leave on all the local maximum if Xrow >= 0, i.e sub( X ) is * distributed. */ Csgamx2d( ctxt, COLUMN, &cctop, 1, 1, ((char*)AMAX), 1, &maxpos, &idumm, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( *AMAX != ZERO ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == maxpos ) { Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); *INDX = Xgindx + 1; Cigebs2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1, maxpos, mycol ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *IX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : Xlindx + 1 ); } } } return; } /* * End of PSAMAX */ } scalapack-1.8.0/PBLAS/SRC/pdswap_.c0000640000175000017500000007623110363532303016416 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdswap_( int * N, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdswap_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PDSWAP swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * one, * top, tran, * zero; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, XisD, XisR, Xinb1D, XinbD, XisRow, Xii, Xj, Xjj, Xld, Xlinc, Xm, XmyprocD, XmyprocR, Xn, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xroc, Xrow, Ycol, Yi, Yii, Yinb1D, YinbD, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, Ym, YmyprocD, YmyprocR, Yn, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, cdst, csrc, ctxt, dst, gcdPQ, info, ione=1, k, l, lcmPQ, lcmb, mycol, myrow, npcol, npq, nprow, p, q, rdst, rsrc, src, size; PBTYP_T * type; PB_VM_T VM; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; char * buf = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PDSWAP", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PDSWAP", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PDSWAP", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_ ]; XnbD = Xd[NB_ ]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_ ]; XnbD = Xd[MB_ ]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_ ]; YnbD = Yd[NB_ ]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_ ]; YnbD = Yd[MB_ ]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is distributed but not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector to * swap sub( X ) with. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cdtypeset(); size = type->size; /* * A purely local operation occurs iff the operands start in the same process * and, if either the grid is mono-dimensional or there is a single local block * to be swapped or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } if( RRorCC && XisR && YisR ) return; } } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to swap the distributed vectors in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( RRorCC && ( XprocR != YprocR ) ) { /* * Both operands are distributed in the same direction, but reside in different * process rows or columns. */ if( XmyprocR == XprocR ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( XisRow ) { Cdgesd2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); Cdgerv2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); } else { Cdgesd2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); Cdgerv2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); } } } if( YmyprocR == YprocR ) { YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( YisRow ) { Cdgesd2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); Cdgerv2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); } else { Cdgesd2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); Cdgerv2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); } } } } else { /* * General case when just one message needs to be exchanged */ if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) send it to the corresponding * process owning s piece of sub ( Y ). */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( YisRow ) { rdst = YprocR; cdst = dst; } else { rdst = dst; cdst = YprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( XisRow ) Cdgesd2d( ctxt, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); else Cdgesd2d( ctxt, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); } } } if( YmyprocR == YprocR ) { /* * The processes owning a piece of sub( Y ) receive the corresponding piece * of sub( X ) and send the piece of sub( Y ) they own to the same process. */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { src = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); src = MPosMod( src, XnprocsD ); if( XisRow ) { rsrc = XprocR; csrc = src; } else { rsrc = src; csrc = XprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( YnpD * size ); if( XisRow ) Cdgerv2d( ctxt, 1, YnpD, buf, 1, rsrc, csrc ); else Cdgerv2d( ctxt, YnpD, 1, buf, YnpD, rsrc, csrc ); if( YisRow ) Cdgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); else Cdgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); dcopy_( &YnpD, buf, &ione, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); if( buf ) free( buf ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding piece * of sub( Y ). */ if( XnpD > 0 ) { if( ( myrow != rdst ) || ( mycol != cdst ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Cdgerv2d( ctxt, 1, XnpD, buf, 1, rdst, cdst ); else Cdgerv2d( ctxt, XnpD, 1, buf, XnpD, rdst, cdst ); dcopy_( &XnpD, buf, &ione, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc ); if( buf ) free( buf ); } } } } } else if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { /* * General case */ tran = ( RRorCC ? CNOTRAN : CTRAN ); if( XisRow ) { Xscope = CCOLUMN; Xm = 1; rsrc = XprocR; } else { Xscope = CROW; Xn = 1; csrc = XprocR; } if( YisRow ) { Yscope = CCOLUMN; Ym = 1; rdst = YprocR; } else { Yscope = CROW; Yn = 1; cdst = YprocR; } lcmb = PB_Clcm( XnprocsD * XnbD, YnprocsD * YnbD ); one = type->one; zero = type->zero; gcdPQ = PB_Cgcd( XnprocsD, YnprocsD ); lcmPQ = ( XnprocsD / gcdPQ ) * YnprocsD; for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { Xroc = MModAdd( XprocD, p, XnprocsD ); Yroc = MModAdd( YprocD, q, YnprocsD ); if( ( XmyprocD == Xroc ) || ( YmyprocD == Yroc ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, Xroc, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, Yroc, YprocD, YnprocsD ); PB_CVMinit( &VM, 0, XnpD, YnpD, Xinb1D, Yinb1D, XnbD, YnbD, p, q, XnprocsD, YnprocsD, lcmb ); if( npq = PB_CVMnpq( &VM ) ) { if( ( RRorCC && ( Xroc == Yroc ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( Xroc == YprocR ) && ( XprocR == Yroc ) ) ) { /* * If I am at the intersection of the process cross, or simply common to the * process rows or columns owning sub( X ) and sub( Y ) */ if( ( YmyprocD == Yroc ) && ( YmyprocR == YprocR ) ) { PB_CVMswp( type, &VM, ROW, &Xscope, &tran, npq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Ylinc ); } } else { /* * Perform the message exchange: pack the data I own, send it, receive the * remote data, and unpack it. */ if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) ) { if( XisRow ) { Xn = npq; } else { Xm = npq; } if( YisRow ) { Yn = npq; cdst = Yroc; } else { Ym = npq; rdst = Yroc; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, ROW, &Xscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, zero, buf, Xm ); Cdgesd2d( ctxt, Xm, Xn, buf, Xm, rdst, cdst ); Cdgerv2d( ctxt, Ym, Yn, buf, Ym, rdst, cdst ); PB_CVMpack( type, &VM, ROW, &Xscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, one, buf, Ym ); if( buf ) free ( buf ); } if( ( YmyprocR == YprocR ) && ( YmyprocD == Yroc ) ) { if( XisRow ) { Xn = npq; csrc = Xroc; } else { Xm = npq; rsrc = Xroc; } if( YisRow ) { Yn = npq; } else { Ym = npq; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, COLUMN, &Yscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, zero, buf, Ym ); Cdgesd2d( ctxt, Ym, Yn, buf, Ym, rsrc, csrc ); Cdgerv2d( ctxt, Xm, Xn, buf, Xm, rsrc, csrc ); PB_CVMpack( type, &VM, COLUMN, &Yscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, one, buf, Xm ); if( buf ) free ( buf ); } } } } p = MModAdd1( p, XnprocsD ); q = MModAdd1( q, YnprocsD ); } } } if( XisR ) { /* * Replicate sub( X ) when necessary */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Cdgebs2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Cdgebr2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Cdgebs2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Cdgebr2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XmyprocD, XprocR ); } } } if( YisR ) { /* * Replicate sub( Y ) when necessary */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) Cdgebs2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Cdgebr2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) Cdgebs2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Cdgebr2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YmyprocD, YprocR ); } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ PB_CpswapND( PB_Cdtypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ PB_CpswapND( PB_Cdtypeset(), *N, ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed. */ PB_CpswapNN( PB_Cdtypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } /* * End of PDSWAP */ } scalapack-1.8.0/PBLAS/SRC/pzdotu_.c0000640000175000017500000006673010363532303016450 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzdotu_( int * N, double * DOT, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzdotu_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; double * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PZDOTU forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) COMPLEX*16 array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZDOTU", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PZDOTU", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PZDOTU", info ); return; } #endif DOT[REAL_PART] = ZERO; DOT[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cztypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cztypeset(); size = type->size; dot = type->Fvvdotu; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Czgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Czgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Czgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Czgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Czgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Czgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Czgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Czgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Czgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Czgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Czgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cztypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cztypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotu ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cztypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } /* * End of PZDOTU */ } scalapack-1.8.0/PBLAS/SRC/pzgemm_.c0000640000175000017500000005071310363532303016414 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzgemm_( F_CHAR_T TRANSA, F_CHAR_T TRANSB, int * M, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzgemm_( TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANSA, TRANSB; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PZGEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * conjg(A(IA:IA+K-1,JA:JA+M-1)') if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * conjg(B(IB:IB+N-1,JB:JB+K-1)') if TRANSB = 'C'. * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * * TRANSB = 'C' or 'c' op( sub( B ) ) = conjg( sub( B )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, DirBC, OpC, OpR, TrA, TrB, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ChooseBC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, nota, notb, npcol, nprow; double ABest, ACest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANSA )[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( F2C_CHAR( TRANSB )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1001 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZGEMM", "Illegal TRANSA = %c\n", TrA ); info = -1; } else if( ( !notb ) && ( TrB != CTRAN ) && ( TrB != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZGEMM", "Illegal TRANSB = %c\n", TrB ); info = -2; } if( nota ) PB_Cchkmat( ctxt, "PZGEMM", "A", *M, 3, *K, 5, Ai, Aj, Ad, 10, &info ); else PB_Cchkmat( ctxt, "PZGEMM", "A", *K, 5, *M, 3, Ai, Aj, Ad, 10, &info ); if( notb ) PB_Cchkmat( ctxt, "PZGEMM", "B", *K, 5, *N, 4, Bi, Bj, Bd, 14, &info ); else PB_Cchkmat( ctxt, "PZGEMM", "B", *N, 4, *K, 5, Bi, Bj, Bd, 14, &info ); PB_Cchkmat( ctxt, "PZGEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PZGEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * If alpha or K is zero, sub( C ) := beta * sub( C ). */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char * ) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABest: both operands sub( A ) and sub( B ) are communicated (M, N >> K) * ACest: both operands sub( A ) and sub( C ) are communicated (K, N >> M) * BCest: both operands sub( B ) and sub( C ) are communicated (M, K >> N) */ ABest = (double)(*K); ACest = (double)(*M); BCest = (double)(*N); if( notb ) { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); ABest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[MB_], nprow ); BCest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ACest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } else { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); BCest *= CBRATIO * ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *K, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } ChooseAB = ( ( ABest <= ( 1.3 * BCest ) ) && ( ABest <= ( 1.3 * ACest ) ) ); ChooseBC = ( ( BCest <= ACest ) && ( ( 1.3 * BCest ) <= ABest ) ); /* * BLACS topologies are enforced iff M, N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) && ( *K > nb ) ); if( ChooseAB ) { OpR = CBCAST; OpC = CBCAST; } else if( ChooseBC ) { if( nota ) { OpR = CCOMBINE; OpC = CBCAST; } else { OpR = CBCAST; OpC = CCOMBINE; } } else { if( notb ) { OpR = CBCAST; OpC = CCOMBINE; } else { OpR = CCOMBINE; OpC = CBCAST; } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_IRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_IRING ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirAB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirBC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); if( ChooseAB ) { PB_CpgemmAB( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else if( ChooseBC ) { PB_CpgemmBC( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { PB_CpgemmAC( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZGEMM */ } scalapack-1.8.0/PBLAS/SRC/pdnrm2_.c0000640000175000017500000003642710363532303016325 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdnrm2_( int * N, double * NORM2, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdnrm2_( N, NORM2, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * NORM2; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDNRM2 computes the 2-norm of a subvector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * NORM2 (local output) DOUBLE PRECISION * On exit, NORM2 specifies the 2-norm of the subvector sub( X ) * only in its scope (See below for further details). * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, dst, dist, info, k, mycol, mydist, myrow, npcol, nprow, src; double scale, ssq, temp1, temp2; /* * .. Local Arrays .. */ int Xd[DLEN_]; double * Xptr = NULL, work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDNRM2", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PDNRM2", info ); return; } #endif /* * Initialize NORM2 */ *NORM2 = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute NORM2 */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) *NORM2 = ABS( X[Xii+Xjj*Xd[LLD_]] ); return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; Xptr = X+(Xii+Xjj*Xld); for( k = 0; k < Xnq; k++ ) { if( *Xptr != ZERO ) { temp1 = ABS( *Xptr ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += Xld; } } /* * If Xnq <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process row * 0 if npcol > 1 and Xcol >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Cdgesd2d( ctxt, 2, 1, ((char*) work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Cdgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, myrow, src ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process row. */ top = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Cdgebs2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2, myrow, 0 ); } /* * Compute NORM2 redundantly NORM2 = WORK( 1 ) * SQRT( WORK( 2 ) ) */ dasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ dasqrtb_( &scale, &ssq, NORM2 ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { Xptr = X+(Xii+Xjj*Xd[LLD_]); for( k = 0; k < Xnp; k++ ) { if( *Xptr != ZERO ) { temp1 = ABS( *Xptr ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr++; } } /* * If Xnp <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process * column 0 if nprow > 1 and Xrow >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Cdgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, src, mycol ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process column */ top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2, 0, mycol ); } /* * Compute NORM2 redundantly NORM2 = WORK[0] * SQRT( WORK[1] ) */ dasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ dasqrtb_( &scale, &ssq, NORM2 ); } } return; } /* * End of PDNRM2 */ } scalapack-1.8.0/PBLAS/SRC/pdtrmv_.c0000640000175000017500000004752510363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdtrmv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PDTRMV performs one of the matrix-vector operations * * sub( X ) := sub( A )*sub( X ) or sub( X ) := sub( A )'*sub( X ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * sub( X ) is an n element subvector and sub( A ) is an n by n unit, * or non-unit, upper or lower triangular submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( X ) := sub( A ) * sub( X ). * * TRANS = 'T' or 't' sub( X ) := sub( A )' * sub( X ). * * TRANS = 'C' or 'c' sub( X ) := sub( A )' * sub( X ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the transfor- * med subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, * one, * tbeta, top, *zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, YAfr, YAld, YApbY, YAsum, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PDTRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PDTRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRMV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); size = type->size; one = type->one; zero = type->zero; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process column(s), then zero it. */ if( Xroc == CCOLUMN ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process columns(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { dset_( &Xnp, zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ), &ione ); } } } } else { /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process row(s), then zero it. */ if( Xroc == CROW ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; dset_( &Xnq, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); } } } } /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, size ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } if( YAfr ) free( YA ); /* * End of PDTRMV */ } scalapack-1.8.0/PBLAS/SRC/pzdscal_.c0000640000175000017500000002211610363532303016551 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzdscal_( int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pzdscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PZDSCAL multiplies an n element subvector sub( X ) by the real scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PZDSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PZDSCAL", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ONE ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); if( ALPHA[REAL_PART] == ZERO ) { zset_( &Xnq, type->zero, Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } else { zdscal_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cztypeset(); if( ALPHA[REAL_PART] == ZERO ) { zset_( &Xnp, type->zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } else { zdscal_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } } } return; } /* * End of PZDSCAL */ } scalapack-1.8.0/PBLAS/SRC/psger_.c0000640000175000017500000002727210363532303016241 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psger_( int * M, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY, float * A, int * IA, int * JA, int * DESCA ) #else void psger_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSGER performs the rank 1 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PSGER", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PSGER", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PSGER", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PSGER", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { sger_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], ((char *) (A+(Aii+Ajj*Ald))), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PSGER */ } scalapack-1.8.0/PBLAS/SRC/psaxpy_.c0000640000175000017500000002247310363532303016443 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psaxpy_( int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psaxpy_( N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PSAXPY adds one subvector to another, * * sub( Y ) := sub( Y ) + alpha * sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER. * On entry, N specifies the length of the subvectors to be * added. N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PSAXPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PSAXPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PSAXPY", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, ROW, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, COLUMN, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PSAXPY */ } scalapack-1.8.0/PBLAS/SRC/pzgemv_.c0000640000175000017500000004361210363532303016425 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzgemv_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzgemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZGEMV performs one of the matrix-vector operations * * sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors * and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ), * * TRANS = 'T' or 't', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ), * * TRANS = 'C' or 'c', * sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) + * beta*sub( Y ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PZGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PZGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PZGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PZGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PZGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PZGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * When alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { zset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { zscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { zset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { zscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PZGEMV */ } scalapack-1.8.0/PBLAS/SRC/pcgeru_.c0000640000175000017500000002742210363532303016403 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcgeru_( int * M, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY, float * A, int * IA, int * JA, int * DESCA ) #else void pcgeru_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCGERU performs the rank 1 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCGERU", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PCGERU", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PCGERU", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PCGERU", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cgeru_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PCGERU */ } scalapack-1.8.0/PBLAS/SRC/pcswap_.c0000640000175000017500000007620510363532303016416 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcswap_( int * N, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcswap_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PCSWAP swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * one, * top, tran, * zero; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, XisD, XisR, Xinb1D, XinbD, XisRow, Xii, Xj, Xjj, Xld, Xlinc, Xm, XmyprocD, XmyprocR, Xn, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xroc, Xrow, Ycol, Yi, Yii, Yinb1D, YinbD, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, Ym, YmyprocD, YmyprocR, Yn, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, cdst, csrc, ctxt, dst, gcdPQ, info, ione=1, k, l, lcmPQ, lcmb, mycol, myrow, npcol, npq, nprow, p, q, rdst, rsrc, src, size; PBTYP_T * type; PB_VM_T VM; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; char * buf = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCSWAP", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PCSWAP", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PCSWAP", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_ ]; XnbD = Xd[NB_ ]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_ ]; XnbD = Xd[MB_ ]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_ ]; YnbD = Yd[NB_ ]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_ ]; YnbD = Yd[MB_ ]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is distributed but not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector to * swap sub( X ) with. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cctypeset(); size = type->size; /* * A purely local operation occurs iff the operands start in the same process * and, if either the grid is mono-dimensional or there is a single local block * to be swapped or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { cswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } if( RRorCC && XisR && YisR ) return; } } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to swap the distributed vectors in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( RRorCC && ( XprocR != YprocR ) ) { /* * Both operands are distributed in the same direction, but reside in different * process rows or columns. */ if( XmyprocR == XprocR ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( XisRow ) { Ccgesd2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); Ccgerv2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); } else { Ccgesd2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); Ccgerv2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); } } } if( YmyprocR == YprocR ) { YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( YisRow ) { Ccgesd2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); Ccgerv2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); } else { Ccgesd2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); Ccgerv2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); } } } } else { /* * General case when just one message needs to be exchanged */ if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) send it to the corresponding * process owning s piece of sub ( Y ). */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( YisRow ) { rdst = YprocR; cdst = dst; } else { rdst = dst; cdst = YprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { cswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( XisRow ) Ccgesd2d( ctxt, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); else Ccgesd2d( ctxt, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); } } } if( YmyprocR == YprocR ) { /* * The processes owning a piece of sub( Y ) receive the corresponding piece * of sub( X ) and send the piece of sub( Y ) they own to the same process. */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { src = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); src = MPosMod( src, XnprocsD ); if( XisRow ) { rsrc = XprocR; csrc = src; } else { rsrc = src; csrc = XprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( YnpD * size ); if( XisRow ) Ccgerv2d( ctxt, 1, YnpD, buf, 1, rsrc, csrc ); else Ccgerv2d( ctxt, YnpD, 1, buf, YnpD, rsrc, csrc ); if( YisRow ) Ccgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); else Ccgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); ccopy_( &YnpD, buf, &ione, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); if( buf ) free( buf ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding piece * of sub( Y ). */ if( XnpD > 0 ) { if( ( myrow != rdst ) || ( mycol != cdst ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Ccgerv2d( ctxt, 1, XnpD, buf, 1, rdst, cdst ); else Ccgerv2d( ctxt, XnpD, 1, buf, XnpD, rdst, cdst ); ccopy_( &XnpD, buf, &ione, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc ); if( buf ) free( buf ); } } } } } else if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { /* * General case */ tran = ( RRorCC ? CNOTRAN : CTRAN ); if( XisRow ) { Xscope = CCOLUMN; Xm = 1; rsrc = XprocR; } else { Xscope = CROW; Xn = 1; csrc = XprocR; } if( YisRow ) { Yscope = CCOLUMN; Ym = 1; rdst = YprocR; } else { Yscope = CROW; Yn = 1; cdst = YprocR; } lcmb = PB_Clcm( XnprocsD * XnbD, YnprocsD * YnbD ); one = type->one; zero = type->zero; gcdPQ = PB_Cgcd( XnprocsD, YnprocsD ); lcmPQ = ( XnprocsD / gcdPQ ) * YnprocsD; for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { Xroc = MModAdd( XprocD, p, XnprocsD ); Yroc = MModAdd( YprocD, q, YnprocsD ); if( ( XmyprocD == Xroc ) || ( YmyprocD == Yroc ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, Xroc, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, Yroc, YprocD, YnprocsD ); PB_CVMinit( &VM, 0, XnpD, YnpD, Xinb1D, Yinb1D, XnbD, YnbD, p, q, XnprocsD, YnprocsD, lcmb ); if( npq = PB_CVMnpq( &VM ) ) { if( ( RRorCC && ( Xroc == Yroc ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( Xroc == YprocR ) && ( XprocR == Yroc ) ) ) { /* * If I am at the intersection of the process cross, or simply common to the * process rows or columns owning sub( X ) and sub( Y ) */ if( ( YmyprocD == Yroc ) && ( YmyprocR == YprocR ) ) { PB_CVMswp( type, &VM, ROW, &Xscope, &tran, npq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Ylinc ); } } else { /* * Perform the message exchange: pack the data I own, send it, receive the * remote data, and unpack it. */ if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) ) { if( XisRow ) { Xn = npq; } else { Xm = npq; } if( YisRow ) { Yn = npq; cdst = Yroc; } else { Ym = npq; rdst = Yroc; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, ROW, &Xscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, zero, buf, Xm ); Ccgesd2d( ctxt, Xm, Xn, buf, Xm, rdst, cdst ); Ccgerv2d( ctxt, Ym, Yn, buf, Ym, rdst, cdst ); PB_CVMpack( type, &VM, ROW, &Xscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, one, buf, Ym ); if( buf ) free ( buf ); } if( ( YmyprocR == YprocR ) && ( YmyprocD == Yroc ) ) { if( XisRow ) { Xn = npq; csrc = Xroc; } else { Xm = npq; rsrc = Xroc; } if( YisRow ) { Yn = npq; } else { Ym = npq; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, COLUMN, &Yscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, zero, buf, Ym ); Ccgesd2d( ctxt, Ym, Yn, buf, Ym, rsrc, csrc ); Ccgerv2d( ctxt, Xm, Xn, buf, Xm, rsrc, csrc ); PB_CVMpack( type, &VM, COLUMN, &Yscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, one, buf, Xm ); if( buf ) free ( buf ); } } } } p = MModAdd1( p, XnprocsD ); q = MModAdd1( q, YnprocsD ); } } } if( XisR ) { /* * Replicate sub( X ) when necessary */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Ccgebr2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Ccgebr2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XmyprocD, XprocR ); } } } if( YisR ) { /* * Replicate sub( Y ) when necessary */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Ccgebr2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) Ccgebs2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Ccgebr2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YmyprocD, YprocR ); } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ PB_CpswapND( PB_Cctypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ PB_CpswapND( PB_Cctypeset(), *N, ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed. */ PB_CpswapNN( PB_Cctypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } /* * End of PCSWAP */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/0000750000175000017500000000000010605234724015630 5ustar muammarmuammarscalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cgcd.c0000640000175000017500000000433210363532303017213 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cgcd( int M, int N ) #else int PB_Cgcd( M, N ) /* * .. Scalar Arguments .. */ int M, N; #endif { /* * Purpose * ======= * * PB_Cgcd computes and returns the Greatest Common Divisor (GCD) of two * positive integers M and N using a binary gcd algorithm. * * Arguments * ========= * * M (input) INTEGER * On entry, M must be at least zero. * * N (input) INTEGER * On entry, N must be at least zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int gcd=1, m_val, n_val, t; /* .. * .. Executable Statements .. * */ if( M > N ) { m_val = N; n_val = M; } else { m_val = M; n_val = N; } while( m_val > 0 ) { while( !( m_val & 1 ) ) { /* * m is even */ m_val >>= 1; /* if n is odd, gcd( m, n ) = gcd( m / 2, n ) */ /* * if n is odd, gcd( m, n ) = gcd( m / 2, n ) */ if( !( n_val & 1 ) ) { /* * otherwise gcd( m, n ) = 2 * gcd( m / 2, n / 2 ) */ n_val >>= 1; gcd <<= 1; } } /* * m is odd now. If n is odd, gcd( m, n ) = gcd( m, ( m - n ) / 2 ). * Otherwise, gcd( m, n ) = gcd ( m, n / 2 ). */ n_val -= ( n_val & 1 ) ? m_val : 0; n_val >>= 1; while( n_val >= m_val ) { /* * If n is odd, gcd( m, n ) = gcd( m, ( m - n ) / 2 ). * Otherwise, gcd( m, n ) = gcd ( m, n / 2 ) */ n_val -= ( n_val & 1 ) ? m_val : 0; n_val >>= 1; } /* * n < m, gcd( m, n ) = gcd( n, m ) */ t = n_val; n_val = m_val; m_val = t; } return ( n_val * gcd ); /* * End of PB_Cgcd */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cbinfo.c0000640000175000017500000002415510363532303017560 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cbinfo( int OFFD, int M, int N, int IMB1, int INB1, int MB, int NB, int MRROW, int MRCOL, int * LCMT00, int * MBLKS, int * NBLKS, int * IMBLOC, int * INBLOC, int * LMBLOC, int * LNBLOC, int * ILOW, int * LOW, int * IUPP, int * UPP ) #else void PB_Cbinfo( OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, LCMT00, MBLKS, NBLKS, IMBLOC, INBLOC, LMBLOC, LNBLOC, ILOW, LOW, IUPP, UPP ) /* * .. Scalar Arguments .. */ int * ILOW, IMB1, * IMBLOC, INB1, * INBLOC, * IUPP, * LCMT00, * LMBLOC, * LNBLOC, * LOW, M, MB, * MBLKS, MRCOL, MRROW, N, NB, * NBLKS, OFFD, * UPP; #endif { /* * Purpose * ======= * * PB_Cbinfo initializes the local information of an m by n local array * owned by the process of relative coordinates ( MRROW, MRCOL ). Note * that if m or n is less or equal than zero, there is no data, in which * case this process does not need the local information computed by * this routine to proceed. * * Arguments * ========= * * OFFD (global input) INTEGER * On entry, OFFD specifies the off-diagonal of the underlying * matrix of interest as follows: * OFFD = 0 specifies the main diagonal, * OFFD > 0 specifies lower subdiagonals, and * OFFD < 0 specifies upper superdiagonals. * * M (local input) INTEGER * On entry, M specifies the local number of rows of the under- * lying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the local number of columns of the un- * derlying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). N must be at least zero. * * IMB1 (global input) INTEGER * On input, IMB1 specifies the global true size of the first * block of rows of the underlying global submatrix. IMB1 must * be at least MIN( 1, M ). * * INB1 (global input) INTEGER * On input, INB1 specifies the global true size of the first * block of columns of the underlying global submatrix. INB1 * must be at least MIN( 1, N ). * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * MRROW (local input) INTEGER * On entry, MRROW specifies the relative row coordinate of the * process that possesses these M rows. MRROW must be least zero * and strictly less than NPROW. * * MRCOL (local input) INTEGER * On entry, MRCOL specifies the relative column coordinate of * the process that possesses these N columns. MRCOL must be * least zero and strictly less than NPCOL. * * LCMT00 (local output) INTEGER * On exit, LCMT00 is the LCM value of the left upper block of * this m by n local block owned by the process of relative co- * ordinates ( MRROW, MRCOL ). * * MBLKS (local output) INTEGER * On exit, MBLKS specifies the local number of blocks of rows * corresponding to M. MBLKS must be at least zero. * * NBLKS (local output) INTEGER * On exit, NBLKS specifies the local number of blocks of co- * lumns corresponding to N. NBLKS must be at least zero. * * IMBLOC (local output) INTEGER * On exit, IMBLOC specifies the number of rows (size) of the * uppest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). IMBLOC is at least * MIN( 1, M ). * * INBLOC (local output) INTEGER * On exit, INBLOC specifies the number of columns (size) of * the leftmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). INBLOC is * at least MIN( 1, N ). * * LMBLOC (local output) INTEGER * On exit, LMBLOC specifies the number of rows (size) of the * lowest blocks of this m by n local array owned by the process * of relative coordinates ( MRROW, MRCOL ). LMBLOC is at least * MIN( 1, M ). * * LNBLOC (local output) INTEGER * On exit, LNBLOC specifies the number of columns (size) of the * rightmost blocks of this m by n local array owned by the * process of relative coordinates ( MRROW, MRCOL ). LNBLOC is * at least MIN( 1, N ). * * ILOW (local output) INTEGER * On exit, ILOW is the lower bound characterizing the first co- * lumn block owning offdiagonals of this m by n array. ILOW * must be less or equal than zero. * * LOW (global output) INTEGER * On exit, LOW is the lower bound characterizing the column * blocks with te exception of the first one (see ILOW) owning * offdiagonals of this m by n array. LOW must be less or equal * than zero. * * IUPP (local output) INTEGER * On exit, IUPP is the upper bound characterizing the first row * block owning offdiagonals of this m by n array. IUPP must be * greater or equal than zero. * * UPP (global output) INTEGER * On exit, UPP is the upper bound characterizing the row * blocks with te exception of the first one (see IUPP) owning * offdiagonals of this m by n array. UPP must be greater or * equal than zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int tmp1; /* .. * .. Executable Statements .. * */ /* * Initialize LOW, ILOW, UPP, IUPP, LMBLOC, LNBLOC, IMBLOC, INBLOC, MBLKS, * NBLKS and LCMT00. */ *LOW = 1 - NB; *UPP = MB - 1; *LCMT00 = OFFD; if( ( M <= 0 ) || ( N <= 0 ) ) { /* * If the local virtual array is empty, then simplify the remaining of the * initialization. */ *IUPP = ( MRROW ? MB - 1 : ( IMB1 > 0 ? IMB1 - 1 : 0 ) ); *IMBLOC = 0; *MBLKS = 0; *LMBLOC = 0; *ILOW = ( MRCOL ? 1 - NB : ( INB1 > 0 ? 1 - INB1 : 0 ) ); *INBLOC = 0; *NBLKS = 0; *LNBLOC = 0; *LCMT00 += ( *LOW - *ILOW + MRCOL * NB ) - ( *IUPP - *UPP + MRROW * MB ); return; } if( MRROW ) { /* * I am not in the first relative process row. Use the first local row block * size MB to initialize the VM structure. */ *IMBLOC = MIN( M, MB ); *IUPP = MB - 1; *LCMT00 -= IMB1 - MB + MRROW * MB; *MBLKS = ( M - 1 ) / MB + 1; *LMBLOC = M - ( M / MB ) * MB; if( !( *LMBLOC ) ) *LMBLOC = MB; if( MRCOL ) { /* * I am not in the first relative process column. Use the first local column * block size NB to initialize the VM structure. */ *INBLOC = MIN( N, NB ); *ILOW = 1 - NB; *LCMT00 += INB1 - NB + MRCOL * NB; *NBLKS = ( N - 1 ) / NB + 1; *LNBLOC = N - ( N / NB ) * NB; if( !( *LNBLOC ) ) *LNBLOC = NB; } else { /* * I am in the first relative process column. Use the first column block size * INB1 to initialize the VM structure. */ *INBLOC = INB1; *ILOW = 1 - INB1; tmp1 = N - INB1; if( tmp1 ) { /* * There is more than one column block. Compute the number of local column * blocks and the size of the last one. */ *NBLKS = ( tmp1 - 1 ) / NB + 2; *LNBLOC = tmp1 - ( tmp1 / NB ) * NB; if( !( *LNBLOC ) ) *LNBLOC = NB; } else { /* * There is only one column block. */ *NBLKS = 1; *LNBLOC = INB1; } } } else { /* * I am in the first relative process row. Use the first row block size IMB1 to * initialize the VM structure. */ *IMBLOC = IMB1; *IUPP = IMB1 - 1; tmp1 = M - IMB1; if( tmp1 ) { /* * There is more than one row block. Compute the number of local row blocks and * the size of the last one. */ *MBLKS = ( tmp1 - 1 ) / MB + 2; *LMBLOC = tmp1 - ( tmp1 / MB ) * MB; if( !( *LMBLOC ) ) *LMBLOC = MB; } else { /* * There is only one row block. */ *MBLKS = 1; *LMBLOC = IMB1; } if( MRCOL ) { /* * I am not in the first relative process column. Use the first local column * block size NB to initialize the VM structure. */ *INBLOC = MIN( N, NB ); *ILOW = 1 - NB; *LCMT00 += INB1 - NB + MRCOL * NB; *NBLKS = ( N - 1 ) / NB + 1; *LNBLOC = N - ( N / NB ) * NB; if( !( *LNBLOC ) ) *LNBLOC = NB; } else { /* * I am in the first relative process column. Use the first column block size * INB1 to initialize the VM structure. */ *INBLOC = INB1; *ILOW = 1 - INB1; tmp1 = N - INB1; if( tmp1 ) { /* * There is more than one column block. Compute the number of local column * blocks and the size of the last one. */ *NBLKS = ( tmp1 - 1 ) / NB + 2; *LNBLOC = tmp1 - ( tmp1 / NB ) * NB; if( !( *LNBLOC ) ) *LNBLOC = NB; } else { /* * There is only one column block. */ *NBLKS = 1; *LNBLOC = INB1; } } } /* * End of PB_Cbinfo */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplapad.c0000640000175000017500000003500110363532303017714 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplapad( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M, int N, char * ALPHA, char * BETA, char * A, int IA, int JA, int * DESCA ) #else void PB_Cplapad( TYPE, UPLO, CONJUG, M, N, ALPHA, BETA, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ char * CONJUG, * UPLO; int IA, JA, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A; #endif { /* * Purpose * ======= * * PB_Cplapad initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) * denoted by sub( A ) to beta on the diagonal or zeros the imaginary * part of those diagonals and set the offdiagonals to alpha. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies the part of the submatrix sub( A ) * to be set: * = 'L' or 'l': Lower triangular part is set; the strictly * upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is set; the strictly * lower triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies what should be done to the diago- * nals as follows. When UPLO is 'L', 'l', 'U' or 'u' and CONJUG * is 'Z' or 'z', the imaginary part of the diagonals is set to * zero. Otherwise, the diagonals are set to beta. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant to which the offdiagonal elements are to be set. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta, i.e., the constant * to which the diagonal elements are to be set. BETA is not re- * ferenced when UPLO is 'L', 'l', 'U' or 'u' and CONJUG is 'Z'. * * A (local input/local output) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be set. On exit, the * leading m by n submatrix sub( A ) is set as follows: * * UPLO = 'L' or 'l', A(IA+i-1,JA+j-1)=ALPHA, j+1<=i<=M, 1<=j<=N * UPLO = 'U' or 'u', A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=j-1, 1<=j<=N * otherwise, A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=M, 1<=j<=N * and IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char type; int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, ctxt, izero=0, k, kb, ktmp, mn, mycol, myrow, nb, npcol, nprow, size; TZPAD_T pad; /* * .. Local Arrays .. */ int Ad0[DLEN_]; char * Aptr = NULL; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Quick return if I don't own any of sub( A ). */ Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; size = TYPE->size; type = TYPE->type; pad = TYPE->Ftzpad; Aptr = Mptr( A, Aii, Ajj, Ald, size ); /* * When the entire sub( A ) needs to be padded and alpha is equal to beta, or * sub( A ) is replicated in all processes, just call the local routine. */ if( type == SREAL ) { if( ( ( Mupcase( UPLO[0] ) == CALL ) && ( ((float*)(ALPHA))[REAL_PART] == ((float*)(BETA ))[REAL_PART] ) ) || ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) { pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, BETA, Aptr, &Ald ); return; } } else if( type == DREAL ) { if( ( ( Mupcase( UPLO[0] ) == CALL ) && ( ((double*)(ALPHA))[REAL_PART] == ((double*)(BETA ))[REAL_PART] ) ) || ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) { pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, BETA, Aptr, &Ald ); return; } } else if( type == SCPLX ) { if( ( ( Mupcase( UPLO[0] ) == CALL ) && ( ((float*)(ALPHA))[REAL_PART] == ((float*)(BETA ))[REAL_PART] ) && ( ((float*)(ALPHA))[IMAG_PART] == ((float*)(BETA ))[IMAG_PART] ) ) || ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) { pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, BETA, Aptr, &Ald ); return; } } else if( type == DCPLX ) { if( ( ( Mupcase( UPLO[0] ) == CALL ) && ( ((double*)(ALPHA))[REAL_PART] == ((double*)(BETA ))[REAL_PART] ) && ( ((double*)(ALPHA))[IMAG_PART] == ((double*)(BETA ))[IMAG_PART] ) ) || ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) { pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, BETA, Aptr, &Ald ); return; } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and two times the least common multiple of nprow * and npcol. */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); mn = MIN( M, N ); if( Mupcase( UPLO[0] ) == CLOWER ) { /* * Lower triangle of sub( A ): proceed by block of columns. For each block of * columns, operate on the logical diagonal block first and then the remaining * rows of that block of columns. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) ); PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 = Amp - Akp ) > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero, ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } else if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Upper triangle of sub( A ): proceed by block of columns. For each block of * columns, operate on the trailing rows and then the logical diagonal block * of that block of columns. When M < N, the last columns of sub( A ) are * handled together. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero, ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 ); } if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } else { /* * All of sub( A ): proceed by block of columns. For each block of columns, * operate on the trailing rows, then the logical diagonal block, and finally * the remaining rows of that block of columns. When M < N, the last columns * of sub( A ) are handled together. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero, ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cplapd2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 ); Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp0 = Amp - Akp ) > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero, ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } /* * End of PB_Cplapad */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_COutV.c0000640000175000017500000003072610363532303017361 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_COutV( PBTYP_T * TYPE, char * ROWCOL, char * ZEROIT, int M, int N, int * DESCA, int K, char * * YAPTR, int * DYA, int * YAFREE, int * YASUM ) #else void PB_COutV( TYPE, ROWCOL, ZEROIT, M, N, DESCA, K, YAPTR, DYA, YAFREE, YASUM ) /* * .. Scalar Arguments .. */ char * ROWCOL, * ZEROIT; int * YAFREE, K, M, N, * YASUM; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DYA; char * * YAPTR; #endif { /* * Purpose * ======= * * PB_COutV returns a pointer to an array that contains a one-dimensio- * nal ouput zero subvector which is replicated over the rows or columns * of a submatrix described by DESCA. On return, the subvector is speci- * fied by a pointer to some data, a descriptor array describing its * layout, a logical value indicating if this local piece of data has * been dynamically allocated by this function, a logical value speci- * fying if sum reduction should occur. This routine is specifically * designed for traditional Level 2 and 3 PBLAS operations using an out- * put only vector such as PxTRMV, or PxTRMM. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * ROWCOL (global input) pointer to CHAR * On entry, ROWCOL specifies if this routine should return a * row or column subvector replicated over the underlying subma- * trix as follows: * = 'R' or 'r': A row subvector is returned, * = 'C' or 'c': A column subvector is returned. * * M (global input) INTEGER * On entry, M specifies the number of rows of the underlying * submatrix described by DESCA. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the underlying * submatrix described by DESCA. N must be at least zero. * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * K (global input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension of the subvector sub( Y ). K must be at least zero. * * YAPTR (local output) pointer to pointer to CHAR * On exit, * YAPTR is an array containing the same data as the * subvector sub( Y ) which is replicated over the rows or co- * lumns of the underlying matrix as specified by ROWCOL and * DESCA. * * DYA (global and local output) INTEGER array * On exit, DYA is a descriptor array of dimension DLEN_ descri- * bing the data layout of the data pointed to by * YAPTR. * * YAFREE (local output) INTEGER * On exit, YAFREE specifies if it was possible to reuse the * subvector sub( Y ), i.e., if some dynamic memory was alloca- * ted for the data pointed to by * YAPTR or not. When YAFREE is * zero, no dynamic memory was allocated. Otherwise, some dyna- * mic memory was allocated by this function that one MUST re- * lease as soon as possible. * * YASUM (global output) INTEGER * On exit, YASUM specifies if a global sum reduction should be * performed to obtain the correct sub( Y ). When YASUM is zero, * no reduction is to be performed, otherwise reduction should * occur. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Aimb, Ainb, Amb, Amp, Anb, Anq, Arow, Yld, ctxt, izero=0, nprow, myrow, npcol, mycol; char * zero; /* .. * .. Executable Statements .. * */ /* * Initialize the output parameters to a default value */ *YAFREE = 0; *YASUM = 0; *YAPTR = NULL; /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) { if( Mupcase( ROWCOL[0] ) == CROW ) { PB_Cdescset( DYA, K, N, 1, DESCA[INB_], 1, DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 ); } else { PB_Cdescset( DYA, M, K, DESCA[IMB_], 1, DESCA[MB_], 1, DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], DESCA[LLD_] ); } return; } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Arow = DESCA[RSRC_]; Acol = DESCA[CSRC_]; if( Mupcase( ROWCOL[0] ) == CROW ) { /* * Want a row vector */ Ainb = DESCA[INB_]; Anb = DESCA[NB_]; Anq = PB_Cnumroc( N, 0, Ainb, Anb, mycol, Acol, npcol ); Yld = MAX( 1, K ); if( ( Arow < 0 ) || ( nprow == 1 ) || ( PB_Cspan( M, 0, DESCA[IMB_], DESCA[MB_], Arow, nprow ) ) ) { /* * A spans all process rows. Y should be reduced iff A is not replicated and * there is more than just one process row in the process grid. */ *YASUM = ( ( Arow >= 0 ) && ( nprow > 1 ) ); /* * Allocate the space for Y in the processes owning at least one column of A, * and initialize it to zero if requested. */ if( Anq > 0 ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; if( Mupcase( ZEROIT[0] ) == CINIT ) { zero = TYPE->zero; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, zero, zero, *YAPTR, &Yld ); } } /* * Describe the newly created operand */ PB_Cdescset( DYA, K, N, K, Ainb, 1, Anb, -1, Acol, ctxt, Yld ); } else { /* * A spans only one process row. There is no need to reduce Y or even to * allocate some space for it outside this process row. */ *YASUM = 0; if( ( myrow == Arow ) && ( Anq > 0 ) ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; if( Mupcase( ZEROIT[0] ) == CINIT ) { zero = TYPE->zero; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, zero, zero, *YAPTR, &Yld ); } } /* * Describe the newly created operand */ PB_Cdescset( DYA, K, N, K, Ainb, 1, Anb, Arow, Acol, ctxt, Yld ); } } else { /* * Want a column vector */ Aimb = DESCA[ IMB_ ]; Amb = DESCA[ MB_ ]; Amp = PB_Cnumroc( M, 0, Aimb, Amb, myrow, Arow, nprow ); Yld = MAX( 1, Amp ); if( ( Acol < 0 ) || ( npcol == 1 ) || ( PB_Cspan( N, 0, DESCA[INB_], DESCA[NB_], Acol, npcol ) ) ) { /* * A spans all process columns. Y should be reduced iff A is not replicated and * there is more than just one process column in the process grid. */ *YASUM = ( ( Acol >= 0 ) && ( npcol > 1 ) ); /* * Allocate the space for Y in the processes owning at least one row of A, and * initialize it to zero if requested. */ if( Amp > 0 ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; if( Mupcase( ZEROIT[0] ) == CINIT ) { zero = TYPE->zero; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, zero, zero, *YAPTR, &Yld ); } } /* * Describe the newly created operand */ PB_Cdescset( DYA, M, K, Aimb, K, Amb, 1, Arow, -1, ctxt, Yld ); } else { /* * A spans only one process column. There is no need to reduce Y or even to * allocate some space for it outside this process column. */ *YASUM = 0; if( ( mycol == Acol ) && ( Amp > 0 ) ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; if( Mupcase( ZEROIT[0] ) == CINIT ) { zero = TYPE->zero; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, zero, zero, *YAPTR, &Yld ); } } /* * Describe the newly created operand */ PB_Cdescset( DYA, M, K, Aimb, K, Amb, 1, Arow, Acol, ctxt, Yld ); } } /* * End of PB_COutV */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsyr2k.c0000640000175000017500000002311610363532303020107 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsyr2k( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * AC, int LDAC, char * BC, int LDBC, char * AR, int LDAR, char * BR, int LDBR, char * C, int LDC ) #else void PB_Ctzsyr2k( TYPE, UPLO, M, N, K, IOFFD, ALPHA, AC, LDAC, BC, LDBC, AR, LDAR, BR, LDBR, C, LDC ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDAC, LDAR, LDBC, LDBR, LDC, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * AC, * AR, * BC, * BR, * C; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctzsyr2k performs the trapezoidal symmetric o Hermitian rank 2k * operation: * * C := alpha * AC * BR + alpha * BC * AR + C, * * where alpha is a scalar, AC and BC are m by k matrices, AR and BR are * n by k matrices and C is an m by n trapezoidal symmetric or Hermitian * matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix C is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of C is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of C is referenced, * * otherwise all of the matrix C is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix C. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix C. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of columns of the matrices * AC and BC, and the number of rows of the matrices AR and BR. * K must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of C as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal C( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal C( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal C( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * AC (input) pointer to CHAR * On entry, AC is an array of dimension (LDAC,K) containing the * m by k matrix AC. * * LDAC (input) INTEGER * On entry, LDAC specifies the leading dimension of the array * AC. LDAC must be at least max( 1, M ). * * BC (input) pointer to CHAR * On entry, BC is an array of dimension (LDBC,K) containing the * m by k matrix BC. * * LDBC (input) INTEGER * On entry, LDBC specifies the leading dimension of the array * BC. LDBC must be at least max( 1, M ). * * AR (input) pointer to CHAR * On entry, AR is an array of dimension (LDAR,N) containing the * k by n matrix AR. * * LDAR (input) INTEGER * On entry, LDAR specifies the leading dimension of the array * AR. LDAR must be at least K. * * BR (input) pointer to CHAR * On entry, BR is an array of dimension (LDBR,N) containing the * k by n matrix BR. * * LDBR (input) INTEGER * On entry, LDBR specifies the leading dimension of the array * BR. LDBR must be at least K. * * C (input/output) pointer to CHAR * On entry, C is an array of dimension (LDC,N) containing the m * by n matrix C. Only the trapezoidal part of C determined by * UPLO and IOFFD is updated. * * LDC (input) INTEGER * On entry, LDC specifies the leading dimension of the array C. * LDC must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, j1, m1, mn, n1, size; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsyr2k( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( BC, i1, 0, LDBC, size ), &LDBC, one, Mptr( C, i1, j1, LDC, size ), &LDC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } TYPE->Fsyr2k( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, m1, 0, LDAC, size ), &LDAC, Mptr( BC, m1, 0, LDBC, size ), &LDBC, one, Mptr( C, m1, j1, LDC, size ), &LDC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, BC, &LDBC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, ALPHA, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } /* * End of PB_Ctzsyr2k */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CGatherV.c0000640000175000017500000006554510363532303020033 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CGatherV( PBTYP_T * TYPE, char * ALLOC, char * DIRECA, int M, int N, char * A, int IA, int JA, int * DESCA, char * AROC, char * * B, int * DESCB, int * BFREE ) #else void PB_CGatherV( TYPE, ALLOC, DIRECA, M, N, A, IA, JA, DESCA, AROC, B, DESCB, BFREE ) /* * .. Scalar Arguments .. */ char * ALLOC, * AROC, * DIRECA; int * BFREE, IA, JA, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * * B; #endif { /* * Purpose * ======= * * PB_CGatherV aggregates a submatrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1) * into a one-dimensional multivector B. The submatrix sub( A ) is spe- * cified on input to this routine that is reused whenever possible. On * return, the one-dimensional multivector is specified by a pointer to * some data, a descriptor array describing its layout and a logical va- * lue indicating if this local piece of data has been dynamically allo- * cated by this function. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * ALLOC (global input) pointer to CHAR * On entry, ALLOC specifies if data should be allocated even * when unnecessary as follows: * ALLOC = 'A' or 'a' data allocation is enforced, * ALLOC = 'R' or 'r' data is reused when possible. * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be aggregated as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and, * Ka is at least Lc( N, JA ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the submatrix * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix, * and a column matrix otherwise. * * B (local output) pointer to pointer to CHAR * On exit, * B is an array containing the aggregated submatrix * sub( A ). * * DESCB (global and local output) INTEGER array * On exit, DESCB is a descriptor array of dimension DLEN_ des- * cribing the data layout of the data pointed to by * B. * * BFREE (local output) INTEGER * On exit, BFREE specifies if it has been possible to reuse * the submatrix sub( A ), i.e., if some dynamic memory was al- * located for the data pointed to by *B or not. When BFREE is * zero, no dynamic memory was allocated. Otherwise, some dyna- * mic memory was allocated by this function that one MUST re- * lease as soon as possible. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one, * zero; int Afwd, AggRow, AiiD, AiiR, Ainb1D, Ainb1R, Ald, AmyprocD, AmyprocR, AnR, AnbD, AnbR, AnnxtL, AnnxtR, AnpD, AnpR, AnpreR, AnprocsR, ArocR, AsrcD, AsrcR, Bld, Bsrc_, ctxt, k, kb, kblks, kn, ktmp, mycol, mydist, mydistnb, myrow, nlen, npcol, nprow, offset, size, srcdist; MMADD_T add; MMSHFT_T shft; /* * .. Local Arrays .. */ char * Aptr = NULL, * Bptr = NULL; /* .. * .. Executable Statements .. * */ /* * Initialize the output parameters to a default value */ *BFREE = 0; *B = NULL; /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) { PB_Cdescset( DESCB, M, N, DESCA[IMB_], DESCA[INB_], DESCA[MB_], DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 ); return; } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( ( AggRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 ) { /* * Accumulate rows of sub( A ) */ AnbR = DESCA[MB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiR, &AiiD, &AsrcR, &AsrcD ); Ainb1D = PB_Cfirstnb( N, JA, DESCA[INB_], AnbD ); AnpD = PB_Cnumroc( N, 0, Ainb1D, AnbD, mycol, AsrcD, npcol ); /* * If sub( A ) is either replicated or spans only one process row, no data needs * to be exchanged by the processes, the operation is purely local. */ if( !( PB_Cspan( M, IA, DESCA[IMB_], AnbR, AsrcR, nprow ) ) ) { if( Mupcase( ALLOC[0] ) == CREUSE ) { /* * sub( A ) can be reused */ if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) ) { /* * If I own some entries of sub( A ), set *B */ Bld = Ald; *B = Mptr( A, AiiR, AiiD, Ald, TYPE->size ); } else { Bld = 1; } } else { /* * sub( A ) cannot be reused, make a copy of it. */ if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) ) { /* * If I own some entries of sub( A ), allocate space for the copy, and copy the * data. */ Bld = M; if( AnpD > 0 ) { size = TYPE->size; *B = PB_Cmalloc( AnpD * M * size ); *BFREE = 1; TYPE->Fmmadd( &M, &AnpD, TYPE->one, Mptr( A, AiiR, AiiD, Ald, size ), &Ald, TYPE->zero, *B, &Bld ); } } else { Bld = 1; } } /* * Describe the resulting operand */ PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt, Bld ); return; } AnR = M; Bsrc_ = RSRC_; AmyprocR = myrow; AmyprocD = mycol; AnprocsR = nprow; Ainb1R = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbR ); AnpR = PB_Cnumroc( M, 0, Ainb1R, AnbR, myrow, AsrcR, nprow ); } else { /* * Accumulate columns of sub( A ) */ AnbD = DESCA[MB_ ]; AnbR = DESCA[NB_ ]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiD, &AiiR, &AsrcD, &AsrcR ); Ainb1D = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbD ); AnpD = PB_Cnumroc( M, 0, Ainb1D, AnbD, myrow, AsrcD, nprow ); /* * If sub( A ) is either replicated or spans only one process column, no data * needs to be exchanged by the processes, the operation is purely local. */ if( !( PB_Cspan( N, JA, DESCA[INB_], AnbR, AsrcR, npcol ) ) ) { if( Mupcase( ALLOC[0] ) == CREUSE ) { /* * sub( A ) can be reused */ Bld = Ald; if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) ) /* * If I own some entries of sub( A ), set *B */ *B = Mptr( A, AiiD, AiiR, Ald, TYPE->size ); } else { /* * sub( A ) cannot be reused, make a copy of it. */ Bld = MAX( 1, AnpD ); if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) ) { /* * If I own some entries of sub( A ), allocate space for the copy, and copy the * data. */ if( AnpD > 0 ) { size = TYPE->size; *B = PB_Cmalloc( AnpD * N * size ); *BFREE = 1; TYPE->Fmmadd( &AnpD, &N, TYPE->one, Mptr( A, AiiD, AiiR, Ald, size ), &Ald, TYPE->zero, *B, &Bld ); } } } /* * Describe the resulting operand */ PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt, Bld ); return; } AnR = N; Bsrc_ = CSRC_; AmyprocR = mycol; AmyprocD = myrow; AnprocsR = npcol; Ainb1R = PB_Cfirstnb( N, JA, DESCA[INB_], AnbR ); AnpR = PB_Cnumroc( N, 0, Ainb1R, AnbR, mycol, AsrcR, npcol ); } /* * sub( A ) is not replicated and spans more than one process row or column. * Forward row (resp. column) accumulation will leave the resulting operand in * the process(es) where the global row IA+M-1 (resp. global column JA+N-1) * resides. */ if( ( Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ) ) != 0 ) { if( ( AnpD > 0 ) && ( AnpR > 0 ) ) { /* * Compute how may rows or columns are before me -> AnpreR */ AnpreR = PB_Cnpreroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR, AnprocsR ); if( AnpreR == 0 ) { /* * If zero rows or columns are before me, I must be the source, so send my piece * to the process after me in the grid. */ if( AggRow ) { TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald, TYPE->size ), Ald, MModAdd1( AmyprocR, AnprocsR ), AmyprocD ); } else { TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR, Ald, TYPE->size ), Ald, AmyprocD, MModAdd1( AmyprocR, AnprocsR ) ); } } else if( AnpreR > 0 ) { /* * Otherwise, allocate some space for the rows or columns I have and the ones * globally preceeding the ones I have, that I am about to receive. */ size = TYPE->size; one = TYPE->one; zero = TYPE->zero; add = TYPE->Fmmadd; *B = Bptr = PB_Cmalloc( ( AnpreR + AnpR ) * AnpD * size ); nlen = AnpreR; mydistnb = MModSub( AmyprocR, AsrcR, AnprocsR ) * AnbR; kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ? ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 ); offset = kblks * AnbR; kn = Ainb1R + mydistnb - AnbR; kn = MIN( kn, AnpreR ) + ( MAX( 1, kblks ) - 1 ) * mydistnb; if( AggRow ) { shft = TYPE->Frshft; Aptr = Mptr( A, AiiR, AiiD, Ald, size ); Bld = AnpreR + AnpR; /* * Receive the rows globally preceeding the ones I have */ TYPE->Cgerv2d( ctxt, AnpreR, AnpD, *B, Bld, MModSub1( AmyprocR, AnprocsR ), AmyprocD ); /* * Sort the received buffer and insert at the correct place the rows of sub( A ) * I own (from bottom to top). */ if( ( ( AnpR - 1 ) / AnbR ) == kblks ) { kb = AnpR - offset; add( &kb, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ), &Ald, zero, Mptr( Bptr, nlen+offset, 0, Bld, size ), &Bld ); } for( k = kblks; k >= 1; k-- ) { kb = nlen - kn; shft( &kb, &AnpD, &offset, Mptr( Bptr, kn, 0, Bld, size ), &Bld ); offset -= AnbR; add( &AnbR, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ), &Ald, zero, Mptr( Bptr, kn+offset, 0, Bld, size ), &Bld ); kn -= mydistnb; nlen -= kb; } if( AnpreR + AnpR != AnR ) { /* * If I am not the last process, i.e I am not supposed to own all of the AnR * rows by the end of the operation, then send the sorted buffer to the next * process and release the dynamically allocated buffer. */ TYPE->Cgesd2d( ctxt, AnpreR+AnpR, AnpD, *B, Bld, MModAdd1( AmyprocR, AnprocsR ), AmyprocD ); if( *B ) free( *B ); } } else { shft = TYPE->Fcshft; Aptr = Mptr( A, AiiD, AiiR, Ald, size ); Bld = MAX( 1, AnpD ); /* * Receive the columns globally preceeding the ones I have */ TYPE->Cgerv2d( ctxt, AnpD, AnpreR, *B, Bld, AmyprocD, MModSub1( AmyprocR, AnprocsR ) ); /* * Sort the received buffer and insert at the correct place the columns of * sub( A ) I own (from right to left). */ if( ( ( AnpR - 1 ) / AnbR ) == kblks ) { kb = AnpR - offset; add( &AnpD, &kb, one, Mptr( Aptr, 0, offset, Ald, size ), &Ald, zero, Mptr( Bptr, 0, nlen+offset, Bld, size ), &Bld ); } for( k = kblks; k >= 1; k-- ) { kb = nlen - kn; shft( &AnpD, &kb, &offset, Mptr( Bptr, 0, kn, Bld, size ), &Bld ); offset -= AnbR; add( &AnpD, &AnbR, one, Mptr( Aptr, 0, offset, Ald, size ), &Ald, zero, Mptr( Bptr, 0, kn + offset, Bld, size ), &Bld ); kn -= mydistnb; nlen -= kb; } if( AnpreR + AnpR != AnR ) { /* * If I am not the last process, i.e I am not supposed to own all of the AnR * columns by the end of the operation, then send the sorted buffer to the next * process and release the dynamically allocated buffer. */ TYPE->Cgesd2d( ctxt, AnpD, AnpreR+AnpR, *B, Bld, AmyprocD, MModAdd1( AmyprocR, AnprocsR ) ); if( *B ) free( *B ); } } } } } else { /* * Backward accumulation, compute the process row or column coordinate ArocR, * that is going to have the resulting operand. */ ArocR = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR, AnprocsR ); if( ( AnpD > 0 ) && ( AnpR > 0 ) ) { /* * Compute how may rows or columns are after me -> AnnxtR */ AnnxtR = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR, AnprocsR ); AnnxtL = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, ArocR, AsrcR, AnprocsR ); if( ( AnnxtR = MModSub( AnnxtR, AnnxtL, AnR ) ) == 0 ) { /* * If zero rows or columns are after me, I must be the source, so send my piece * to the process before me in the grid. */ if( AggRow ) { TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald, TYPE->size ), Ald, MModSub1( AmyprocR, AnprocsR ), AmyprocD ); } else { TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR, Ald, TYPE->size ), Ald, AmyprocD, MModSub1( AmyprocR, AnprocsR ) ); } } else if( AnnxtR > 0 ) { /* * Otherwise, allocate some space for the rows or columns I have and the ones * globally following the ones I have, that I am about to receive. */ size = TYPE->size; one = TYPE->one; zero = TYPE->zero; add = TYPE->Fmmadd; *B = Bptr = PB_Cmalloc( ( AnnxtR + AnpR ) * AnpD * size ); kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ? ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 ); mydist = MModSub( ArocR, AmyprocR, AnprocsR ); mydistnb = mydist * AnbR; srcdist = MModSub( ArocR, AsrcR, AnprocsR ); if( AggRow ) { shft = TYPE->Frshft; Aptr = Mptr( A, AiiR, AiiD, Ald, size ); Bld = AnnxtR + AnpR; /* * Receive the rows globally following the ones I have */ TYPE->Cgerv2d( ctxt, AnnxtR, AnpD, Mptr( *B, AnpR, 0, Bld, size ), Bld, MModAdd1( AmyprocR, AnprocsR ), AmyprocD ); /* * Sort the received buffer and insert at the correct place the rows of sub( A ) * I own (from top to bottom). */ if( mydist > srcdist ) { offset = -AnpR; kb = Ainb1R + srcdist*AnbR; } else if( mydist == srcdist ) { add( &Ainb1R, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, Ainb1R, 0, Ald, size ); Bptr = Mptr( Bptr, Ainb1R, 0, Ald, size ); offset = Ainb1R - AnpR; kb = mydistnb; } else { add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, AnbR, 0, Ald, size ); Bptr = Mptr( Bptr, AnbR, 0, Ald, size ); offset = AnbR - AnpR; kb = mydistnb; } for( k = kblks; k >= 1; k-- ) { shft( &kb, &AnpD, &offset, Bptr, &Bld ); Bptr = Mptr( Bptr, kb, 0, Bld, size ); add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, AnbR, 0, Ald, size ); Bptr = Mptr( Bptr, AnbR, 0, Ald, size ); offset += AnbR; kb = mydistnb; } if( AnnxtR + AnpR != AnR ) { /* * If I am not the last process, i.e I am not supposed to own all of the AnR * rows by the end of the operation, then send the sorted buffer to the previous * process and release the dynamically allocated buffer. */ TYPE->Cgesd2d( ctxt, AnnxtR+AnpR, AnpD, *B, Bld, MModSub1( AmyprocR, AnprocsR ), AmyprocD ); if( *B ) free( *B ); } } else { shft = TYPE->Fcshft; Aptr = Mptr( A, AiiD, AiiR, Ald, size ); Bld = MAX( 1, AnpD ); /* * Receive the columns globally following the ones I have */ TYPE->Cgerv2d( ctxt, AnpD, AnnxtR, Mptr( *B, 0, AnpR, Bld, size ), Bld, AmyprocD, MModAdd1( AmyprocR, AnprocsR ) ); /* * Sort the received buffer and insert at the correct place the columns of * sub( A ) I own (from left to right). */ if( mydist > srcdist ) { offset = -AnpR; kb = Ainb1R + srcdist*AnbR; } else if( mydist == srcdist ) { add( &AnpD, &Ainb1R, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, 0, Ainb1R, Ald, size ); Bptr = Mptr( Bptr, 0, Ainb1R, Bld, size ); offset = Ainb1R - AnpR; kb = mydistnb; } else { add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, 0, AnbR, Ald, size ); Bptr = Mptr( Bptr, 0, AnbR, Bld, size ); offset = AnbR - AnpR; kb = mydistnb; } for( k = kblks; k >= 1; k-- ) { shft( &AnpD, &kb, &offset, Bptr, &Bld ); Bptr = Mptr( Bptr, 0, kb, Bld, size ); add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld ); Aptr = Mptr( Aptr, 0, AnbR, Ald, size ); Bptr = Mptr( Bptr, 0, AnbR, Bld, size ); offset += AnbR; kb = mydistnb; } if( AnnxtR + AnpR != AnR ) { /* * If I am not the last process, i.e I am not supposed to own all of the AnR * columns by the end of the operation, then send the sorted buffer to the * previous process and release the dynamically allocated buffer. */ TYPE->Cgesd2d( ctxt, AnpD, AnnxtR+AnpR, *B, Bld, AmyprocD, MModSub1( AmyprocR, AnprocsR ) ); if( *B ) free( *B ); } } } } } /* * Describe the resulting operand */ if( AggRow ) { PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt, M ); } else { PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt, MAX( 1, AnpD ) ); } /* * Compute globally in which process row or column the resulting operand is * residing and set *BFREE accordingly. */ if( Afwd ) { if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR ) { /* * If sub( A ) is spanning all process rows or columns of the grid, the result * must be in the process row or column preceeding the one owning IA or JA, * don't you think ? */ DESCB[Bsrc_] = MModSub1( AsrcR, AnprocsR ); } else { /* * Otherwise, the result is in the process row or column where the row IA+M-1 * or column JA+N-1 of sub( A ) resides. */ DESCB[Bsrc_] = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR, AnprocsR ); } if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) ) *BFREE = 1; } else { if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR ) { /* * If sub( A ) is spanning all process rows or columns of the grid, the result * must be in the process row or column following the one owning IA+M-1 or * JA+N-1, don't you think ? */ DESCB[Bsrc_] = MModAdd1( ArocR, AnprocsR ); } else { /* * Otherwise, the result is in the process row or column where the row IA or * column JA of sub( A ) resides. */ DESCB[Bsrc_] = AsrcR; } if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) ) *BFREE = 1; } /* * End of PB_CGatherV */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cptran.c0000640000175000017500000005145410363532303017611 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cptran( PBTYP_T * TYPE, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_Cptran( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG; int IA, IC, JA, JC, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCC; char * A, * C; #endif { /* * Purpose * ======= * * PB_Cptran transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, * * op( X ) = X' or op( X ) = conjg( X )'. * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( C ) as follows: * CONJUG = 'N' or 'n': * sub( C ) := beta*sub( C ) + alpha*sub( A )' * otherwise * sub( C ) := beta*sub( C ) + alpha*conjg( sub( A ) )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Aroc, Croc, * one, * talpha, * tbeta, * zero; int ACnD, ACnR, Abufld, AcurrocR, Afr, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff, ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R, CisR, Ckk, Cld, CmyprocD, CmyprocR, CnbD, CnbR, CnpD, CnpR, CnprocsD, CnprocsR, Coff, CrocD, CrocR, CsrcR, ctxt, col2row, gcdPQ, k, kb, kbb, l, lcmPQ, lcmb, maxp, maxq, mycol, myrow, ncpq, npcol, npq, nprow, nrpq, p, q, size; PB_VM_T VM; /* * .. Local Arrays .. */ int DBUFA[DLEN_], DBUFC[DLEN_]; char * Abuf = NULL, * Cbuf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Loop over the rows of sub( C ) when M <= N, and the columns of sub( C ) * otherwise. */ col2row = ( ( M <= N ) || ( nprow == 1 ) || ( DESCA[RSRC_] == -1 ) ); if( col2row ) { AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_]; CinbR = DESCC[IMB_]; CnbR = DESCC[MB_]; CsrcR = DESCC[RSRC_]; /* * If sub( A ) only spans one process column and sub( C ) spans only one process * row, then there is no need to pack the data. */ if( !( PB_Cspan( M, JA, AinbR, AnbR, AsrcR, npcol ) ) && !( PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) ) { PB_Cpaxpby( TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA, COLUMN, BETA, C, IC, JC, DESCC, ROW ); return; } /* * Compute local information for sub( A ) and sub( C ) */ ACnR = M; ACnD = N; AmyprocD = CmyprocR = myrow; AnprocsD = CnprocsR = nprow; AmyprocR = CmyprocD = mycol; CnprocsD = AnprocsR = npcol; AiD = IA; AiR = JA; Aroc = CCOLUMN; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); CiD = JC; CiR = IC; Croc = CROW; CinbD = DESCC[INB_]; CnbD = DESCC[NB_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, CnprocsR, CnprocsD, CmyprocR, CmyprocD, &CiiR, &CiiD, &CrocR, &CrocD ); } else { AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_]; CinbR = DESCC[INB_]; CnbR = DESCC[NB_]; CsrcR = DESCC[CSRC_]; /* * If sub( A ) only spans one process row and sub( C ) spans only one process * column, then there is no need to pack the data. */ if( !( PB_Cspan( N, IA, AinbR, AnbR, AsrcR, nprow ) ) && !( PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) ) { PB_Cpaxpby( TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA, ROW, BETA, C, IC, JC, DESCC, COLUMN ); return; } /* * Compute local information for sub( A ) and sub( C ) */ ACnD = M; ACnR = N; AmyprocR = CmyprocD = myrow; AnprocsR = CnprocsD = nprow; AmyprocD = CmyprocR = mycol; AnprocsD = CnprocsR = npcol; AiD = JA; AiR = IA; Aroc = CROW; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); CiD = IC; CiR = JC; Croc = CCOLUMN; CinbD = DESCC[IMB_]; CnbD = DESCC[MB_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, CnprocsD, CnprocsR, CmyprocD, CmyprocR, &CiiD, &CiiR, &CrocD, &CrocR ); } size = TYPE->size; one = TYPE->one; zero = TYPE->zero; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); Ainb1D = PB_Cfirstnb( ACnD, AiD, AinbD, AnbD ); AnpD = PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD ); Ainb1R = PB_Cfirstnb( ACnR, AiR, AinbR, AnbR ); AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ); Cinb1D = PB_Cfirstnb( ACnD, CiD, CinbD, CnbD ); CnpD = PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, CmyprocD, CrocD, CnprocsD ); Cinb1R = PB_Cfirstnb( ACnR, CiR, CinbR, CnbR ); CisR = ( ( CsrcR < 0 ) || ( CnprocsR == 1 ) ); lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : CnprocsR ) ) * CnbR, ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR ); gcdPQ = PB_Cgcd( maxp, maxq ); lcmPQ = ( maxp / gcdPQ ) * maxq; /* * Loop over the processes of the virtual grid */ for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, AnprocsR ) ); CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, CnprocsR ) ); if( ( AisR || ( AmyprocR == AcurrocR ) ) || ( CisR || ( CmyprocR == CcurrocR ) ) ) { Ckk = CiiR; Akk = AiiR; /* * Initialize local virtual matrix in process (p,q) */ CnpR = PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, CnprocsR ); AnpR = PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q, maxp, maxq, lcmb ); /* * Find how many diagonals in this virtual process */ npq = PB_CVMnpq( &VM ); /* * Re-adjust the number of rows or columns to be (un)packed, in order to * average the message sizes. */ if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 ); if( col2row ) { while( npq ) { kbb = MIN( kbb, npq ); /* * Find out how many columns of sub( A ) and rows of sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < kbb ) ) != 0 ) { /* * If columns of sub( A ) are not contiguous, then allocate the buffer and * pack the kbb columns of sub( A ). */ Abufld = MAX( 1, AnpD ); if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * kbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, kbb, AnpD, one, Mptr( A, AiiD, Akk, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size ); } PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD, AcurrocR, ctxt, Abufld ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * rows of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < kbb ) ) != 0 ) { /* * If rows of sub( C ) are not contiguous, then allocate receiving buffer. */ Cbufld = kbb; talpha = one; tbeta = zero; if( CisR || ( CmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * kbb * size ); } else { /* * Otherwise, re-use sub( C ) directly. */ Cbufld = Cld; talpha = ALPHA; tbeta = BETA; if( CisR || ( CmyprocR == CcurrocR ) ) Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size ); } PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, CcurrocR, CrocD, ctxt, Cbufld ); /* * Transpose the one-dimensional buffer Abuf into Cbuf. */ PB_Cpaxpby( TYPE, CONJUG, ACnD, kbb, talpha, Abuf, 0, 0, DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc ); /* * Release the buffer containing the packed columns of sub( A ) */ if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Unpack the kbb rows of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, &Croc, UNPACKING, NOTRAN, kbb, CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, size ), Cld, ALPHA, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } /* * Update the local column index of sub( A ) and the local row index of sub( C ) */ PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); npq -= kbb; } } else { while( npq ) { kbb = MIN( kbb, npq ); /* * Find out how many rows of sub( A ) and columns of sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); /* * Compute the descriptor DBUFA for the buffer that will contained the packed * rows of sub( A ). */ if( ( Afr = ( ncpq < kbb ) ) != 0 ) { /* * If rows of sub( A ) are not contiguous, then allocate the buffer and pack * the kbb rows of sub( A ). */ Abufld = kbb; if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * kbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, kbb, AnpD, one, Mptr( A, Akk, AiiD, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size ); } PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, AcurrocR, ArocD, ctxt, Abufld ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * columns of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < kbb ) ) != 0 ) { /* * If columns of sub( C ) are not contiguous, then allocate receiving buffer. */ Cbufld = MAX( 1, CnpD ); talpha = one; tbeta = zero; if( CisR || ( CmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * kbb * size ); } else { Cbufld = Cld; talpha = ALPHA; tbeta = BETA; if( CisR || ( CmyprocR == CcurrocR ) ) Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size ); } PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD, CcurrocR, ctxt, Cbufld ); /* * Transpose the one-dimensional buffer Abuf into Cbuf. */ PB_Cpaxpby( TYPE, CONJUG, kbb, ACnD, talpha, Abuf, 0, 0, DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc ); /* * Release the buffer containing the packed rows of sub( A ) */ if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Unpack the kbb columns of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, &Croc, UNPACKING, NOTRAN, kbb, CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, size ), Cld, ALPHA, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } /* * Update the local row index of sub( A ) and the local column index of sub( C ) */ PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); npq -= kbb; } } } p = MModAdd1( p, maxp ); q = MModAdd1( q, maxq ); } } /* * End of PB_Cptran */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplapd2.c0000640000175000017500000003013110363532303017634 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplapd2( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M, int N, char * ALPHA, char * BETA, char * A, int IA, int JA, int * DESCA ) #else void PB_Cplapd2( TYPE, UPLO, CONJUG, M, N, ALPHA, BETA, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ char * CONJUG, * UPLO; int IA, JA, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A; #endif { /* * .. Local Scalars .. */ char UploA, herm; int Acol, Aii, Aimb1, Ainb1, Aoffi, Ajj, Ald, Amb, Amp, Anb, Anq, Aoffj, Arcol, Arow, Arrow, GoEast, GoSouth, iimax, ilow, imbloc, inbloc, ioffd, iupp, izero=0, jjmax, joffd, lcmt, lcmt00, lmbloc, lnbloc, low, lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; TZPAD_T pad; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ). */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Ald = DESCA[LLD_]; PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( ( ( Arow < 0 ) || ( nprow == 1 ) ) ? Amb : nprow * Amb ); qnb = ( ( ( Acol < 0 ) || ( npcol == 1 ) ) ? Anb : npcol * Anb ); size = TYPE->size; pad = TYPE->Ftzpad; UploA = Mupcase( UPLO[0] ); herm = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) ); upper = ( UploA != CLOWER ); lower = ( UploA != CUPPER ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ pad( C2F_CHAR( UPLO ), C2F_CHAR( &herm ), &imbloc, &inbloc, &lcmt00, ALPHA, BETA, Mptr( A, Aii, Ajj, Ald, size ), &Ald ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be set and one is * planning to go south in the table, it is neccessary to take care of the * remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &imbloc, &tmp1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), &Ald ); } Aii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be set and one is * planning to go east in the table, it is neccessary to take care of the * remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &inbloc, &izero, ALPHA, ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), &Ald ); } Ajj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row index in A. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; } /* * Set the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &n1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; pad( C2F_CHAR( UPLO ), C2F_CHAR( &herm ), &mbloc, &inbloc, &lcmt, ALPHA, BETA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; } /* * Set the lower triangular part of sub( A ) when necessary. */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &inbloc, &izero, ALPHA, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; /* * When the upper triangular part of sub( A ) should be set, take care of the * n1 remaining columns of these tmp1 rows immediately. */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &n1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; } /* * Set the lower triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &m1, &tmp1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald ); Ajj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; pad( C2F_CHAR( UPLO ), C2F_CHAR( &herm ), &imbloc, &nbloc, &lcmt, ALPHA, BETA, Mptr( A, Aii, joffd+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; joffd += nbloc; } /* * Set the upper triangular part of sub( A ) when necessary. */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &imbloc, &tmp1, &izero, ALPHA, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), &Ald ); tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; /* * When the lower triangular part of sub( A ) should be set, take care of the * m1 remaining rows of these tmp1 columns immediately. */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &m1, &tmp1, &izero, ALPHA, ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; } /* * Set the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &n1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; pad( C2F_CHAR( UPLO ), C2F_CHAR( &herm ), &mbloc, &nbloc, &lcmt, ALPHA, BETA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; } /* * Set the lower triangular part of sub( A ) when necessary. */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &nbloc, &izero, ALPHA, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; /* * When the upper triangular part of sub( A ) should be set, take care of the * n1 remaining columns of these tmp1 rows immediately. */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( &herm ), &tmp1, &n1, &izero, ALPHA, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } /* * End of PB_Cplapd2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpaxpbyDN.c0000640000175000017500000011146510363532303020211 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpaxpbyDN( PBTYP_T * TYPE, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * AROC, char * BETA, char * B, int IB, int JB, int * DESCB, char * BROC ) #else void PB_CpaxpbyDN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ) /* * .. Scalar Arguments .. */ char * AROC, * BROC, * CONJUG; int IA, IB, JA, JB, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CpaxpbyDN adds one submatrix to another, * * sub( B ) := beta * sub( B ) + alpha * sub( A ), or, * * sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ), * * where sub( A ) is distributed and sub( B ) is not distributed. * * sub( A ) always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or * 'r' sub( A ) resides in a process row, otherwise sub( A ) resides in * a process column. When sub( A ) resides in a process row and BROC is * 'R' or 'r' or sub( A ) resides in a process column and BROC is 'C' or * 'c', then sub( B ) denotes B( IB:IB+M-1, JB:JB+N-1 ), and otherwise * sub( B ) denotes B(IB:IB+N-1,JB:JB+M-1). * otherwise. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( B ) as follows: * CONJUG = 'N' or 'n': * sub( B ) := beta*sub( B ) + alpha*sub( A ), * otherwise * sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A cor- * responding to the entries of the submatrix sub( A ) need not * be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least * Lc( 1, JA+N-1 ). Before entry, this array contains the local * entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the subvector * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector, * and a column vector otherwise. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B corres- * ponding to the entries of the submatrix sub( B ) need not be * set on input. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where LLD_B * is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and * sub( B ) are both distributed along a process column or a * process row. In that case, Kb is at least Lc( 1, JB+N-1 ). * Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and * Kb is at least Lc( 1, JB+M-1 ). Before entry, this array * contains the local entries of the matrix B. On exit, sub( B ) * is overwritten with the updated submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BROC (global input) pointer to CHAR * On entry, BROC specifies the orientation of the subvector * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector, * and a column vector otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top, * zero; int Acol, Aii, Ainb1D, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR, AnD, AnbD, AnpD, AnprocsD, AprocD, AprocR, Aroc, Arow, Bcol, Bii, BisR, BisRow, Bjj, Bld, Bm, BmyprocD, BmyprocR, Bn, BnprocsD, BprocR, Broc, Brow, RRorCC, ctxt, izero=0, k, kbb, kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size; MMADD_T add; TZPAD_T pad; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 ) { AnD = N; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; AprocD = Acol; AprocR = Arow; AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol; AisR = ( ( Arow == -1 ) || ( nprow == 1 ) ); Ainb1D = PB_Cfirstnb( AnD, JA, DESCA[INB_], AnbD ); } else { AnD = M; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; AprocD = Arow; AprocR = Acol; AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow; AisR = ( ( Acol == -1 ) || ( npcol == 1 ) ); Ainb1D = PB_Cfirstnb( AnD, IA, DESCA[IMB_], AnbD ); } /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ... */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 ) { Bld = DESCB[LLD_]; BmyprocD = mycol; BnprocsD = npcol; BprocR = Brow; BmyprocR = myrow; BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) ); } else { Bld = DESCB[LLD_]; BmyprocD = myrow; BnprocsD = nprow; BprocR = Bcol; BmyprocR = mycol; BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) ); } /* * Are sub( A ) and sub( B ) both row or column vectors ? */ RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) ); /* * Select the local add routine accordingly */ size = TYPE->size; /* * sub( A ) is distributed and sub( B ) is not distributed */ if( !( BisR ) ) { /* * sub( B ) is not replicated. Since this operation is local if sub( B ) and * sub( A ) are both row or column vectors, choose AprocR = BprocR when RRorCC, * and AprocR = 0 otherwise. */ if( AisR ) { AprocR = ( ( RRorCC ) ? BprocR : 0 ); } /* * Now, it is just like sub( A ) is not replicated, this information however is * kept in AisR for later use. */ if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) ) { if( RRorCC ) { /* * sub( A ) and sub( B ) are both row or column vectors */ zero = TYPE->zero; if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; pad = TYPE->Ftzpad; AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD ); /* * sub( A ) and sub( B ) are in the same process row or column */ if( AprocR == BprocR ) { /* * In each process, the distributed part of sub( A ) is added to sub( B ). In * the other processes, this replicated of sub( B ) is set to zero for later * reduction. */ if( AnpD > 0 ) { Aroc = AprocD; if( BisRow ) { kk = Ajj; ktmp = JB + N; kn = JB + Ainb1D; } else { kk = Aii; ktmp = IB + M; kn = IB + Ainb1D; } if( AmyprocD == Aroc ) { if( BisRow ) add( &M, &Ainb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Ainb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Ainb1D; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &Ainb1D, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ainb1D, &N, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); for( k = kn; k < ktmp; k += AnbD ) { kbb = ktmp - k; kbb = MIN( kbb, AnbD ); if( AmyprocD == Aroc ) { if( BisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, kk, Ald, size ), &Ald, BETA, Mptr( B, Bii, k, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, kk, Ajj, Ald, size ), &Ald, BETA, Mptr( B, k, Bjj, Bld, size ), &Bld ); kk += kbb; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &kbb, &izero, zero, zero, Mptr( B, Bii, k, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kbb, &N, &izero, zero, zero, Mptr( B, k, Bjj, Bld, size ), &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); } } else { /* * If I don't own any entries of sub( A ), then zero the entire sub( B ) * residing in this process. */ pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &N, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } /* * Replicate locally scattered sub( B ) by reducing it */ scope = ( BisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); TYPE->Cgsum2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld, size ), Bld, -1, 0 ); } else { /* * sub( A ) and sub( B ) are in a different process row or column */ if( AmyprocR == AprocR ) { /* * If I own a piece of sub( A ), then send it to the corresponding process row * or column where sub( B ) resides. */ if( AnpD > 0 ) { if( AisRow ) TYPE->Cgesd2d( ctxt, M, AnpD, Mptr( A, Aii, Ajj, Ald, size ), Ald, BprocR, BmyprocD ); else TYPE->Cgesd2d( ctxt, AnpD, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, BmyprocD, BprocR ); } } if( BmyprocR == BprocR ) { /* * If I own sub( B ), then receive and unpack distributed part of sub( A ) that * should be added to sub( B ). Combine the results. */ if( AnpD > 0 ) { if( BisRow ) { ktmp = JB + N; kn = JB + Ainb1D; buf = PB_Cmalloc( M * AnpD * size ); TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR, AmyprocD ); } else { ktmp = IB + M; kn = IB + Ainb1D; buf = PB_Cmalloc( AnpD * N * size ); TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, AmyprocD, AprocR ); } Aroc = AprocD; kk = 0; if( AmyprocD == Aroc ) { if( BisRow ) add( &M, &Ainb1D, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Ainb1D; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &Ainb1D, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ainb1D, &N, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); for( k = kn; k < ktmp; k += AnbD ) { kbb = ktmp - k; kbb = MIN( kbb, AnbD ); if( AmyprocD == Aroc ) { if( BisRow ) add( &M, &kbb, ALPHA, Mptr( buf, 0, kk, M, size ), &M, BETA, Mptr( B, Bii, k, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( buf, kk, 0, AnpD, size ), &AnpD, BETA, Mptr( B, k, Bjj, Bld, size ), &Bld ); kk += kbb; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &kbb, &izero, zero, zero, Mptr( B, Bii, k, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kbb, &N, &izero, zero, zero, Mptr( B, k, Bjj, Bld, size ), &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); } if( buf ) free( buf ); } else { /* * If I don't own any entries of sub( A ), then zero the entire sub( B ) * residing in this process. */ pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &N, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } /* * Replicate locally scattered sub( B ) by reducing it */ scope = ( BisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); TYPE->Cgsum2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld, size ), Bld, -1, 0 ); } } } else { /* * sub( A ) and sub( B ) are not both row or column vectors */ zero = TYPE->zero; if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; pad = TYPE->Ftzpad; Broc = 0; if( BisRow ) { ktmp = JB + M; kn = JB + Ainb1D; } else { ktmp = IB + N; kn = IB + Ainb1D; } /* * Loop over the processes in which sub( A ) resides, for each process find the * next process Xroc. Exchange and add the data. */ for( p = 0; p < AnprocsD; p++ ) { mydist = MModSub( p, AprocD, AnprocsD ); myproc = MModAdd( AprocD, mydist, AnprocsD ); if( ( BprocR == p ) && ( AprocR == Broc ) ) { if( BmyprocR == p ) { /* * local add at the intersection of the process cross */ AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD, AnprocsD ); if( AnpD > 0 ) { Aroc = AprocD; kk = ( BisRow ? Aii : Ajj ); if( myproc == Aroc ) { if( BmyprocD == Broc ) { if( AisRow ) add( &M, &Ainb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Ainb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Ainb1D; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &N, &Ainb1D, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ainb1D, &M, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } } Aroc = MModAdd1( Aroc, AnprocsD ); for( k = kn; k < ktmp; k += AnbD ) { kbb = ktmp - k; kbb = MIN( kbb, AnbD ); if( myproc == Aroc ) { if( BmyprocD == Broc ) { if( AisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, kk, Ald, size ), &Ald, BETA, Mptr( B, k, Bjj, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, kk, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, k, Bld, size ), &Bld ); kk += kbb; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &N, &kbb, &izero, zero, zero, Mptr( B, Bii, k, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kbb, &M, &izero, zero, zero, Mptr( B, k, Bjj, Bld, size ), &Bld ); } } Aroc = MModAdd1( Aroc, AnprocsD ); } } } } else { /* * Message exchange */ if( ( AmyprocR == AprocR ) && ( AmyprocD == p ) ) { AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD, AnprocsD ); if( AnpD > 0 ) { if( AisRow ) TYPE->Cgesd2d( ctxt, M, AnpD, Mptr( A, Aii, Ajj, Ald, size ), Ald, Broc, BprocR ); else TYPE->Cgesd2d( ctxt, AnpD, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, BprocR, Broc ); } } if( BmyprocR == BprocR ) { AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD, AnprocsD ); if( AnpD > 0 ) { Aroc = AprocD; kk = 0; if( BmyprocD == Broc ) { if( AisRow ) { buf = PB_Cmalloc( M * AnpD * size ); TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR, p ); } else { buf = PB_Cmalloc( AnpD * N * size ); TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, p, AprocR ); } } if( myproc == Aroc ) { if( BmyprocD == Broc ) { if( AisRow ) add( &M, &Ainb1D, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Ainb1D; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &N, &Ainb1D, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ainb1D, &M, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } } Aroc = MModAdd1( Aroc, AnprocsD ); for( k = kn; k < ktmp; k += AnbD ) { kbb = ktmp - k; kbb = MIN( kbb, AnbD ); if( myproc == Aroc ) { if( BmyprocD == Broc ) { if( AisRow ) add( &M, &kbb, ALPHA, Mptr( buf, 0, kk, M, size ), &M, BETA, Mptr( B, k, Bjj, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( buf, kk, 0, AnpD, size ), &AnpD, BETA, Mptr( B, Bii, k, Bld, size ), &Bld ); kk += kbb; } else { if( BisRow ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &N, &kbb, &izero, zero, zero, Mptr( B, Bii, k, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kbb, &M, &izero, zero, zero, Mptr( B, k, Bjj, Bld, size ), &Bld ); } } Aroc = MModAdd1( Aroc, AnprocsD ); } if( ( BmyprocD == Broc ) && ( buf ) ) free( buf ); } } } Broc = MModAdd1( Broc, BnprocsD ); } if( BmyprocR == BprocR ) { /* * Replicate locally scattered sub( B ) by reducing it */ scope = ( BisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); TYPE->Cgsum2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld, size ), Bld, -1, 0 ); } } } if( BisR ) { /* * Replicate sub( B ) */ if( BisRow ) { if( AisRow ) { Bm = M; Bn = N; } else { Bm = N; Bn = M; } top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( BmyprocR == BprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld, BprocR, BmyprocD ); } else { if( AisRow ) { Bm = N; Bn = M; } else { Bm = M; Bn = N; } top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( BmyprocR == BprocR ) TYPE->Cgebs2d( ctxt, ROW, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, ROW, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld, BmyprocD, BprocR ); } } } else { /* * sub( B ) is replicated in every process. Add the data in process row or * column AprocR when sub( A ) is not replicated and in every process otherwise. */ if( AisR || ( AmyprocR == AprocR ) ) { zero = TYPE->zero; if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; } pad = TYPE->Ftzpad; AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD ); if( AnpD > 0 ) { Aroc = AprocD; kk = ( AisRow ? Ajj : Aii ); if( BisRow ) { ktmp = JB + ( RRorCC ? N : M ); kn = JB + Ainb1D; } else { ktmp = IB + ( RRorCC ? M : N ); kn = IB + Ainb1D; } if( AmyprocD == Aroc ) { if( AisRow ) add( &M, &Ainb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Ainb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Ainb1D; } else { if( RRorCC ) { if( AisRow ) { Bm = M; Bn = Ainb1D; } else { Bm = Ainb1D; Bn = N; } } else { if( AisRow ) { Bm = Ainb1D; Bn = M; } else { Bm = N; Bn = Ainb1D; } } pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Bm, &Bn, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); for( k = kn; k < ktmp; k += AnbD ) { kbb = ktmp - k; kbb = MIN( kbb, AnbD ); if( BisRow ) { buf = Mptr( B, Bii, k, Bld, size ); } else { buf = Mptr( B, k, Bjj, Bld, size ); } if( AmyprocD == Aroc ) { if( AisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, kk, Ald, size ), &Ald, BETA, buf, &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, kk, Ajj, Ald, size ), &Ald, BETA, buf, &Bld ); kk += kbb; } else { if( RRorCC ) { if( AisRow ) { Bm = M; Bn = kbb; } else { Bm = kbb; Bn = N; } } else { if( AisRow ) { Bm = kbb; Bn = M; } else { Bm = N; Bn = kbb; } } pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Bm, &Bn, &izero, zero, zero, buf, &Bld ); } Aroc = MModAdd1( Aroc, AnprocsD ); } } else { if( RRorCC ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &M, &N, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &N, &M, &izero, zero, zero, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } /* * Replicate locally scattered sub( B ) by reducing it in the process scope of * sub( A ) */ scope = ( AisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); if( RRorCC ) TYPE->Cgsum2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld, size ), Bld, -1, 0 ); else TYPE->Cgsum2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld, size ), Bld, -1, 0 ); } if( !AisR ) { /* * If sub( A ) is not replicated, then broadcast the result to the other pro- * cesses that own a piece of sub( B ), but were not involved in the above * addition operation. */ if( RRorCC ) { Bm = M; Bn = N; } else { Bm = N; Bn = M; } if( AisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( AmyprocR == AprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld, AprocR, AmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( AmyprocR == AprocR ) TYPE->Cgebs2d( ctxt, ROW, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, ROW, top, Bm, Bn, Mptr( B, Bii, Bjj, Bld, size ), Bld, AmyprocD, AprocR ); } } } /* * End of PB_CpaxpbyDN */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpaxpby.c0000640000175000017500000007136210363532303017770 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpaxpby( PBTYP_T * TYPE, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * AROC, char * BETA, char * B, int IB, int JB, int * DESCB, char * BROC ) #else void PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ) /* * .. Scalar Arguments .. */ char * AROC, * BROC, * CONJUG; int IA, IB, JA, JB, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_Cpaxpby adds one submatrix to another, * * sub( B ) := beta * sub( B ) + alpha * sub( A ), or, * * sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ), * * where both submatrices are distributed along one dimension; sub( A ) * always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or 'r' * sub( A ) is distributed along a process row, otherwise sub( A ) * is distributed along a process column. When sub( A ) is distributed * along a process row and BROC is 'R' or 'r' or sub( A ) is distributed * along a process column and BROC is 'C' or 'c', then sub( B ) denotes * B(IB:IB+M-1,JB:JB+N-1), and B(IB:IB+N-1,JB:JB+M-1) otherwise. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( B ) as follows: * CONJUG = 'N' or 'n': * sub( B ) := beta*sub( B ) + alpha*sub( A ), * otherwise * sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A cor- * responding to the entries of the submatrix sub( A ) need not * be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least * Lc( 1, JA+N-1 ). Before entry, this array contains the local * entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the subvector * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector, * and a column vector otherwise. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B corres- * ponding to the entries of the submatrix sub( B ) need not be * set on input. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where LLD_B * is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and * sub( B ) are both distributed along a process column or a * process row. In that case, Kb is at least Lc( 1, JB+N-1 ). * Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and * Kb is at least Lc( 1, JB+M-1 ). Before entry, this array * contains the local entries of the matrix B. On exit, sub( B ) * is overwritten with the updated submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BROC (global input) pointer to CHAR * On entry, BROC specifies the orientation of the subvector * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector, * and a column vector otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char ascope, bscope, * buf = NULL, * one, * top, tran, * zero; int Acol, Aii, AinbD, Ainb1D, AisD, AisR, AisRow, AiD, Ajj, Ald, AmyprocD, AmyprocR, AnbD, AnD, AnR, AnpD, AnprocsD, AnprocsR, AprocD, AprocR, Aroc, Arow, Bcol, Bii, BinbD, Binb1D, BisD, BisR, BisRow, BiD, Bjj, Bld, BmyprocD, BmyprocR, BnbD, BnD, BnR, BnpD, BnprocsD, BnprocsR, BprocD, BprocR, Broc, Brow, BsrcD, OneBlock, OneDgrid, RRorCC, Square, cdst, csrc, ctxt, dst, gcdPQ, k, l, lcmPQ, lcmb, ma, mb, mycol, myrow, na, nb, npcol, npq, nprow, p, q, rdst, rsrc, size, src; PB_VM_T VM; MMADD_T add; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); /* * Determine if sub( A ) is distributed or not */ if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 ) AisD = ( ( DESCA[CSRC_] >= 0 ) && ( ( AnprocsD = npcol ) > 1 ) ); else AisD = ( ( DESCA[RSRC_] >= 0 ) && ( ( AnprocsD = nprow ) > 1 ) ); /* * Determine if sub( B ) is distributed or not */ if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 ) BisD = ( ( DESCB[CSRC_] >= 0 ) && ( ( BnprocsD = npcol ) > 1 ) ); else BisD = ( ( DESCB[RSRC_] >= 0 ) && ( ( BnprocsD = nprow ) > 1 ) ); /* * AisD && BisD <=> both operands are indeed distributed */ if( AisD && BisD ) { /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); if( AisRow ) { AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; AiD = JA; AnD = N; AnR = M; AprocD = Acol; AmyprocD = mycol; AprocR = Arow; AmyprocR = myrow; AnprocsR = nprow; AisR = ( ( DESCA[ RSRC_ ] == -1 ) || ( AnprocsR == 1 ) ); } else { AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; AiD = IA; AnD = M; AnR = N; AprocD = Arow; AmyprocD = myrow; AprocR = Acol; AmyprocR = mycol; AnprocsR = npcol; AisR = ( ( DESCA[ CSRC_ ] == -1 ) || ( AnprocsR == 1 ) ); } Ainb1D = PB_Cfirstnb( AnD, AiD, AinbD, AnbD ); /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ... */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); if( BisRow ) { BinbD = DESCB[ INB_ ]; BnbD = DESCB[ NB_ ]; BsrcD = DESCB[ CSRC_ ]; Bld = DESCB[ LLD_ ]; BiD = JB; if( AisRow ) { BnD = N; BnR = M; } else { BnD = M; BnR = N; } BprocD = Bcol; BmyprocD = mycol; BprocR = Brow; BmyprocR = myrow; BnprocsR = nprow; BisR = ( ( DESCB[ RSRC_ ] == -1 ) || ( BnprocsR == 1 ) ); } else { BinbD = DESCB[ IMB_ ]; BnbD = DESCB[ MB_ ]; BsrcD = DESCB[ RSRC_ ]; Bld = DESCB[ LLD_ ]; BiD = IB; if( AisRow ) { BnD = N; BnR = M; } else { BnD = M; BnR = N; } BprocD = Brow; BmyprocD = myrow; BprocR = Bcol; BmyprocR = mycol; BnprocsR = npcol; BisR = ( ( DESCB[ CSRC_ ] == -1 ) || ( BnprocsR == 1 ) ); } Binb1D = PB_Cfirstnb( BnD, BiD, BinbD, BnbD ); /* * Are sub( A ) and sub( B ) both row or column vectors ? */ RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) ); /* * Do sub( A ) and sub( B ) span more than one process ? */ OneDgrid = ( ( AnprocsD == 1 ) && ( BnprocsD == 1 ) ); OneBlock = ( ( Ainb1D >= AnD ) && ( Binb1D >= BnD ) ); /* * Are sub( A ) and sub( B ) distributed in the same manner ? */ Square = ( ( Ainb1D == Binb1D ) && ( AnbD == BnbD ) && ( AnprocsD == BnprocsD ) ); if( !( AisR ) ) { /* * sub( A ) is distributed but not replicated */ if( BisR ) { /* * If sub( A ) is not replicated, but sub( B ) is, a process row or column * BprocR need to be selected. It will contain the non-replicated vector to * add sub( A ) to. */ if( RRorCC ) { /* * sub( A ) and sub( B ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) ) { /* * sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD. * Enforce a purely local operation by choosing BprocR to be equal to AprocR. */ BprocR = AprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for BprocR to maximize the number of links, i.e reduce contention. */ BprocR = MModAdd1( AprocR, AnprocsR ); } } else { /* * sub( A ) and sub( B ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ BprocR = AprocD; } } else { /* * Neither sub( A ) nor sub( B ) are replicated. If I am not in process row or * column AprocR and not in process row or column BprocR, then quick return. */ if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) return; } } else { /* * sub( A ) is distributed and replicated (so no quick return possible) */ if( BisR ) { /* * sub( B ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( A ) and sub( B ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) ) { /* * sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD. * Enforce a purely local operation by choosing AprocR and BprocR to be equal * to zero. */ AprocR = BprocR = 0; } else { /* * Otherwise, communication has to occur, so select BprocR to be zero and the * next process row or column for AprocR in order to maximize the number of * used links, i.e reduce contention. */ BprocR = 0; AprocR = MModAdd1( BprocR, BnprocsR ); } } else { /* * sub( A ) and sub( B ) are distributed in orthogonal directions, select the * origin processes. */ AprocR = BprocD; BprocR = AprocD; } } else { /* * sub( B ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( A ) and sub( B ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) ) { /* * sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD. * Enforce a purely local operation by choosing AprocR to be equal to BprocR. */ AprocR = BprocR; if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) return; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for AprocR to maximize the number of links, i.e reduce contention. */ AprocR = MModAdd1( BprocR, BnprocsR ); } } else { /* * sub( A ) and sub( B ) are distributed in orthogonal directions, what is * chosen for AprocR does not really matter. Select the origin process. */ AprocR = BprocD; if( ( OneDgrid || OneBlock || Square ) && ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) return; } } } /* * Even if sub( A ) and/or sub( B ) are replicated, only two process row or * column are active, namely AprocR and BprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ size = TYPE->size; /* * A purely local operation occurs iff the operands start in the same process * and, if either the grid is mono-dimensional or there is a single local block * to be added or if both operands are aligned. */ if( ( ( RRorCC && ( AprocD == BprocD ) && ( AisR || BisR || ( AprocR == BprocR ) ) ) || ( !( RRorCC ) && ( BisR || ( AprocD == BprocR ) ) && ( AisR || ( AprocR == BprocD ) ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !AisR && ( AmyprocR == AprocR ) ) || ( AisR && ( BisR || BmyprocR == BprocR ) ) ) { AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD ); BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD ); if( ( AnpD > 0 ) && ( BnpD > 0 ) ) { /* * Select the local add routine accordingly to RRorCC */ if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; } /* * Local addition */ if( AisRow ) add( &AnR, &AnpD, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &AnpD, &AnR, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } } if( RRorCC && AisR && BisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to add the distributed vectors in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; } if( ( AisR && BisR ) || ( AmyprocR == AprocR ) ) { AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD ); if( AnpD > 0 ) { dst = BprocD + MModSub( AmyprocD, AprocD, AnprocsD ); dst = MPosMod( dst, BnprocsD ); if( AisRow ) { ma = AnR; na = AnpD; } else { ma = AnpD; na = AnR; } if( !( AisR && BisR ) ) { if( BisRow ) { rdst = BprocR; cdst = dst; } else { rdst = dst; cdst = BprocR; } } else { if( BisRow ) { if( !AisRow ) { rdst = AmyprocR; } else { rdst = MModAdd1( BmyprocR, BnprocsR ); } cdst = dst; } else { rdst = dst; if( AisRow ) { cdst = AmyprocR; } else { cdst = MModAdd1( BmyprocR, BnprocsR ); } } } if( ( myrow == rdst ) && ( mycol == cdst ) ) { add( &ma, &na, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } else { TYPE->Cgesd2d( ctxt, ma, na, Mptr( A, Aii, Ajj, Ald, size ), Ald, rdst, cdst ); } } } if( ( AisR && BisR ) || ( BmyprocR == BprocR ) ) { BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD ); if( BnpD > 0 ) { src = AprocD + MModSub( BmyprocD, BprocD, BnprocsD ); src = MPosMod( src, AnprocsD ); if( AisRow ) { ma = BnR; na = BnpD; } else { ma = BnpD; na = BnR; } if( !( AisR && BisR ) ) { if( AisRow ) { rsrc = AprocR; csrc = src; } else { rsrc = src; csrc = AprocR; } } else { if( AisRow ) { if( !BisRow ) { rsrc = BmyprocR; } else { rsrc = MModSub1( AmyprocR, AnprocsR ); } csrc = src; } else { rsrc = src; if( BisRow ) { csrc = BmyprocR; } else { csrc = MModSub1( AmyprocR, AnprocsR ); } } } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( BnpD * BnR * size ); TYPE->Cgerv2d( ctxt, ma, na, buf, ma, rsrc, csrc ); add( &ma, &na, ALPHA, buf, &ma, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); if( buf ) free( buf ); } } } if( AisR && BisR ) return; } else { /* * General case */ if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) tran = CCONJG; else tran = CNOTRAN; } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) tran = CCOTRAN; else tran = CTRAN; } if( AisRow ) { ascope = CCOLUMN; ma = AnR; } else { ascope = CROW; na = AnR; } bscope = ( BisRow ? CCOLUMN : CROW ); lcmb = PB_Clcm( AnprocsD * AnbD, BnprocsD * BnbD ); one = TYPE->one; zero = TYPE->zero; gcdPQ = PB_Cgcd( AnprocsD, BnprocsD ); lcmPQ = ( AnprocsD / gcdPQ ) * BnprocsD; for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { Aroc = MModAdd( AprocD, p, AnprocsD ); Broc = MModAdd( BprocD, q, BnprocsD ); if( ( AmyprocD == Aroc ) || ( BmyprocD == Broc ) ) { AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, Aroc, AprocD, AnprocsD ); BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, Broc, BprocD, BnprocsD ); PB_CVMinit( &VM, 0, AnpD, BnpD, Ainb1D, Binb1D, AnbD, BnbD, p, q, AnprocsD, BnprocsD, lcmb ); if( npq = PB_CVMnpq( &VM ) ) { if( ( RRorCC && ( Aroc == Broc ) && ( AisR || ( AprocR == BprocR ) ) ) || ( !( RRorCC ) && ( Aroc == BprocR ) && ( AisR || ( AprocR == Broc ) ) ) ) { if( ( BmyprocD == Broc ) && ( BmyprocR == BprocR ) ) { PB_CVMloc( TYPE, &VM, ROW, &ascope, PACKING, &tran, npq, AnR, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), Bld ); } } else { if( ( AmyprocR == AprocR ) && ( AmyprocD == Aroc ) ) { if( AisRow ) { na = npq; } else { ma = npq; } buf = PB_Cmalloc( ma * na * size ); PB_CVMpack( TYPE, &VM, ROW, &ascope, PACKING, NOTRAN, npq, AnR, one, Mptr( A, Aii, Ajj, Ald, size ), Ald, zero, buf, ma ); if( BisRow ) { rdst = BprocR; cdst = Broc; } else { rdst = Broc; cdst = BprocR; } TYPE->Cgesd2d( ctxt, ma, na, buf, ma, rdst, cdst ); if( buf ) free ( buf ); } if( ( BmyprocR == BprocR ) && ( BmyprocD == Broc ) ) { if( AisRow ) { na = npq; rsrc = AprocR; csrc = Aroc; } else { ma = npq; rsrc = Aroc; csrc = AprocR; } buf = PB_Cmalloc( ma * na * size ); TYPE->Cgerv2d( ctxt, ma, na, buf, ma, rsrc, csrc ); PB_CVMpack( TYPE, &VM, COLUMN, &bscope, UNPACKING, &tran, npq, AnR, BETA, Mptr( B, Bii, Bjj, Bld, size ), Bld, ALPHA, buf, ma ); if( buf ) free ( buf ); } } } } p = MModAdd1( p, AnprocsD ); q = MModAdd1( q, BnprocsD ); } if( AisR ) AprocR = MModAdd1( AprocR, AnprocsR ); } } if( BisR ) { /* * Replicate sub( B ) */ BnpD = PB_Cnumroc( BnD, BiD, BinbD, BnbD, BmyprocD, BsrcD, BnprocsD ); if( BnpD > 0 ) { if( BisRow ) { bscope = CCOLUMN; mb = BnR; nb = BnpD; rsrc = BprocR; csrc = BmyprocD; } else { bscope = CROW; mb = BnpD; nb = BnR; rsrc = BmyprocD; csrc = BprocR; } top = PB_Ctop( &ctxt, BCAST, &bscope, TOP_GET ); if( BmyprocR == BprocR ) { TYPE->Cgebs2d( ctxt, &bscope, top, mb, nb, Mptr( B, Bii, Bjj, Bld, size ), Bld ); } else { TYPE->Cgebr2d( ctxt, &bscope, top, mb, nb, Mptr( B, Bii, Bjj, Bld, size ), Bld, rsrc, csrc ); } } } } else if( !( AisD ) && BisD ) { /* * sub( A ) is not distributed and sub( B ) is distributed. */ PB_CpaxpbyND( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ); } else if( AisD && !( BisD ) ) { /* * sub( A ) is distributed and sub( B ) is not distributed. */ PB_CpaxpbyDN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ); } else { /* * Neither sub( A ) nor sub( B ) are distributed. */ PB_CpaxpbyNN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ); } /* * End of PB_Cpaxpby */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpsymmAB.c0000640000175000017500000007515010363532303020034 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpsymmAB( PBTYP_T * TYPE, char * DIRECAB, char * CONJUG, char * SIDE, char * UPLO, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpsymmAB( TYPE, DIRECAB, CONJUG, SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECAB, * SIDE, * UPLO; int IA, IB, IC, JA, JB, JC, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_CpsymmAB performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric or Hermitian * submatrix and sub( B ) and sub( C ) are m by n submatrices. * * This is the outer-product algorithm using the logical aggregation * blocking technique. The submatrix operand sub( C ) stays in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECAB (global input) pointer to CHAR * On entry, DIRECAB specifies the direction in which the rows * or columns of sub( A ) and sub( B ) should be looped over as * follows: * DIRECAB = 'F' or 'f' forward or increasing, * DIRECAB = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( A ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( A ) is symmetric, * CONJUG = 'Z' or 'z' sub( A ) is Hermitian. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether the symmetric or Hermitian * submatrix sub( A ) appears on the left or right in the opera- * tion as follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the submatrix sub( A ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( A ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( A ) are referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric or Hermitian submatrix sub( A ), such that when * UPLO = 'U' or 'u', this array contains the local entries of * the upper triangular part of the submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric or Hermitian submatrix sub( A ), and the local * entries of the strictly upper triangular of sub( A ) are not * referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric or Hermitian submatrix sub( A ), such that when * UPLO = 'U' or 'u', this array contains the local entries of * the upper triangular part of the submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric or Hermitian submatrix sub( A ), and the local * entries of the strictly upper triangular of sub( A ) are not * referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char GatherDir, ScatterDir, * one, top, tran, * zero; int Afr, An, Bcol, Bcurcol, Bcurimb1, Bcurinb1, Bcurrow, Bfr, Bii, Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq, Bnq0, Brow, Ccol, Ccurcol, Ccurimb1, Ccurinb1, Ccurrow, Cii, Cimb, Cimb1, Cinb, Cinb1, Cjj, Cld, Cmb, Cmp, Cmp0, Cnb, Cnq, Cnq0, Crow, WABfr, WACfr, WBCfr, WBCsum, conjg, ctxt, fwd, k, kb, kbb, kend, kstart, kstep, ktmp, lside, mycol, myrow, npcol, nprow, size, upper; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Bd0 [DLEN_], Cd0 [DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WABd[DLEN_], WACd[DLEN_], WBCd [DLEN_]; char * Aptr = NULL, * Bptr = NULL, * Bptr0 = NULL, * Cptr0 = NULL, * WAB = NULL, * WAC = NULL, * WBC = NULL; /* .. * .. Executable Statements .. * */ /* * sub( C ) = beta * sub( C ) */ PB_Cplascal( TYPE, ALL, NOCONJG, M, N, BETA, C, IC, JC, DESCC ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); An = ( ( lside = ( Mupcase( SIDE[0] ) == CLEFT ) ) ? M : N ); upper = ( Mupcase( UPLO[0] ) == CUPPER ); tran = ( ( conjg = ( Mupcase( CONJUG[0] ) == CCONJG ) ) ? CCOTRAN : CTRAN ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; /* * Figure out the loop bounds accordingly to DIRECAB */ kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); if( ( fwd = ( Mupcase( DIRECAB[0] ) == CFORWARD ) ) != 0 ) { kstart = 0; kend = ( ( An - 1 ) / kb + 1 ) * kb; kstep = kb; GatherDir = ScatterDir = CFORWARD; } else { kstart = ( ( An - 1 ) / kb ) * kb; kend = kstep = -kb; GatherDir = ScatterDir = CBACKWARD; } /* * Compute local information for sub( B ) and sub( C ) */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; Bimb1 = PB_Cfirstnb( M, IB, Bimb, Bmb ); Bmp0 = PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( N, JB, Binb, Bnb ); Bnq0 = PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Crow, &Ccol ); Cimb = DESCC[IMB_]; Cinb = DESCC[INB_]; Cmb = DESCC[MB_ ]; Cnb = DESCC[NB_ ]; Cld = DESCC[LLD_]; Cimb1 = PB_Cfirstnb( M, IC, Cimb, Cmb ); Cmp0 = PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cinb1 = PB_Cfirstnb( N, JC, Cinb, Cnb ); Cnq0 = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol ); if( ( Cmp0 > 0 ) && ( Cnq0 > 0 ) ) Cptr0 = Mptr( C, Cii, Cjj, Cld, size ); if( lside ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( upper ) { for( k = kstart; k != kend; k += kstep ) { kbb = An - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &GatherDir, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) over * C( IC:IC+k+kbb-1, JC:JC+N-1 ) -> WAC */ PB_Cdescset( Cd0, ktmp, N, Cimb1, Cinb1, Cmb, Cnb, Crow, Ccol, ctxt, Cld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Cd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WAC, WACd, &WACfr ); /* * Zero lower triangle of WAC( k:k+kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJUG, kbb, kbb, zero, zero, WAC, k, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, k+1, 0, WACd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over C( IC:IC+k+kbb-1, JC:JC+N-1 ) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Cd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WBC, WBCd, &WBCfr ); /* * C( IC:IC+k+kbb-1, JC:JC+N-1 ) += ALPHA * WAC * WBC */ Cmp = PB_Cnumroc( ktmp, 0, Cimb1, Cmb, myrow, Crow, nprow ); if( ( Cmp > 0 ) && ( Cnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp, &Cnq0, &kbb, ALPHA, WAC, &WACd[LLD_], WBC, &WBCd[LLD_], one, Cptr0, &Cld ); if( WBCfr ) free( WBC ); if( Bfr ) free( Bptr ); /* * Replicate WAC = A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) over * B( IB:IB+k+kbb-1, JB:JB+N-1 ) -> WAB */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, WAC, 0, 0, WACd, COLUMN, &WAB, WABd, &WABfr ); /* * Zero lower triangle of WAB( k:k+kbb-1, 0:kbb-1 ) */ PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, zero, WAB, k, 0, WABd ); /* * WBC := ALPHA*A(IA:IA+k+kbb-1, JA+k:JA+k+kbb-1)'*B( IB:IB+k+kbb-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); Bmp = PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, ALPHA, WAB, &WABd[LLD_], Bptr0, &Bld, zero, WBC, &WBCd[LLD_] ); if( WABfr ) free( WAB ); if( WACfr ) free( WAC ); if( Afr ) free( Aptr ); if( WBCsum ) { WBCd[RSRC_] = PB_Cindxg2p( ( fwd ? k : k + kbb - 1 ), Cimb1, Cmb, Crow, Crow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WBC, WBCd[LLD_], WBCd[RSRC_], mycol ); } /* * C( IC+k:IC+k+kbb-1, JC:JC+N-1 ) := C( IC+k:IC+k+kbb-1, JC:JC+N-1 ) + WBC */ PB_CScatterV( TYPE, &ScatterDir, kbb, N, WBC, 0, 0, WBCd, ROW, one, C, IC+k, JC, DESCC, ROW ); if( WBCfr ) free( WBC ); } } else { for( k = kstart; k != kend; k += kstep ) { ktmp = An - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &GatherDir, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over C( IC+k:IC+M-1, JC:JC+N-1 ) */ Ccurimb1 = PB_Cfirstnb( ktmp, IC+k, Cimb, Cmb ); Ccurrow = PB_Cindxg2p( k, Cimb1, Cmb, Crow, Crow, nprow ); PB_Cdescset( Cd0, ktmp, N, Ccurimb1, Cinb1, Cmb, Cnb, Ccurrow, Ccol, ctxt, Cld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Cd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WAC, WACd, &WACfr ); /* * Zero upper triangle of WAC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJUG, kbb, kbb, zero, zero, WAC, 0, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 0, 1, WACd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over C( IC+k:IC+M-1, JC:JC+N-1 ) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Cd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WBC, WBCd, &WBCfr ); /* * C( IC+k:IC+M-1, JC:JC+N-1 ) += ALPHA * WAC * WBC */ Cmp = PB_Cnumroc( ktmp, k, Cimb1, Cmb, myrow, Crow, nprow ); if( ( Cmp > 0 ) && ( Cnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp, &Cnq0, &kbb, ALPHA, WAC, &WACd[LLD_], WBC, &WBCd[LLD_], one, Mptr( Cptr0, Cmp0-Cmp, 0, Cld, size ), &Cld ); if( WBCfr ) free( WBC ); if( Bfr ) free( Bptr ); /* * Replicate WAC = A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over * B( IB+k:IB+M-1, JB:JB+N-1 ) -> WAB */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, WAC, 0, 0, WACd, COLUMN, &WAB, WABd, &WABfr ); /* * Zero upper triangle of WAB( 0:kbb-1, 0:kbb-1 ) */ PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, zero, WAB, 0, 0, WABd ); /* * WBC := ALPHA*A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 )'*B( IB+k:IB+M-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); Bmp = PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, ALPHA, WAB, &WABd[LLD_], Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld, zero, WBC, &WBCd[LLD_] ); if( WABfr ) free( WAB ); if( WACfr ) free( WAC ); if( Afr ) free( Aptr ); if( WBCsum ) { WBCd[RSRC_] = PB_Cindxg2p( ( fwd ? k : k + kbb - 1 ), Cimb1, Cmb, Crow, Crow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WBC, WBCd[LLD_], WBCd[RSRC_], mycol ); } /* * C( IC+k:IC+k+kbb-1, JC:JC+N-1 ) := C( IC+k:IC+k+kbb-1, JC:JC+N-1 ) + WBC */ PB_CScatterV( TYPE, &ScatterDir, kbb, N, WBC, 0, 0, WBCd, ROW, one, C, IC+k, JC, DESCC, ROW ); if( WBCfr ) free( WBC ); } } } else { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( upper ) { for( k = kstart; k != kend; k += kstep ) { ktmp = An - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &GatherDir, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over C( IC:IC+M-1, JC+k:JC+N-1 ) */ Ccurinb1 = PB_Cfirstnb( ktmp, JC+k, Cinb, Cnb ); Ccurcol = PB_Cindxg2p( k, Cinb1, Cnb, Ccol, Ccol, npcol ); PB_Cdescset( Cd0, M, ktmp, Cimb1, Ccurinb1, Cmb, Cnb, Crow, Ccurcol, ctxt, Cld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Cd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WAC, WACd, &WACfr ); /* * Zero lower triangle of WAC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJUG, kbb, kbb, zero, zero, WAC, 0, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 1, 0, WACd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over C( IC:IC+M-1, JC+k:JC+N-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Cd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WBC, WBCd, &WBCfr ); /* * C( IC:IC+M-1, JC+k:JC+N-1 ) += ALPHA * WBC * WAC */ Cnq = PB_Cnumroc( ktmp, k, Cinb1, Cnb, mycol, Ccol, npcol ); if( ( Cmp0 > 0 ) && ( Cnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq, &kbb, ALPHA, WBC, &WBCd[LLD_], WAC, &WACd[LLD_], one, Mptr( Cptr0, 0, Cnq0-Cnq, Cld, size ), &Cld ); if( WBCfr ) free( WBC ); if( Bfr ) free( Bptr ); /* * Replicate WAC = A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over * B( IB:IB+M-1, JB+k:JB+N-1 ) -> WAB */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, WAC, 0, 0, WACd, ROW, &WAB, WABd, &WABfr ); /* * Zero lower triangle of WAB( 0:kbb-1, 0:kbb-1 ) */ PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, zero, WAB, 0, 0, WABd ); /* * WBC := ALPHA*B( IB:IB+M-1, JB+k:JB+N-1 )*A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 )' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); Bnq = PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Bmp0, &kbb, &Bnq, ALPHA, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld, WAB, &WABd[LLD_], zero, WBC, &WBCd[LLD_] ); if( WABfr ) free( WAB ); if( WACfr ) free( WAC ); if( Afr ) free( Aptr ); if( WBCsum ) { WBCd[CSRC_] = PB_Cindxg2p( ( fwd ? k : k + kbb - 1 ), Cinb1, Cnb, Ccol, Ccol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WBC, WBCd[LLD_], myrow, WBCd[CSRC_] ); } /* * C( IC:IC+M-1, JC+k:JC+k+kbb-1 ) := C( IC:IC+M-1, JC+k:JC+k+kbb-1 ) + WBC */ PB_CScatterV( TYPE, &ScatterDir, M, kbb, WBC, 0, 0, WBCd, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( WBCfr ) free( WBC ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = An - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &GatherDir, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over * C( IC:IC+M-1, JC:JC+k+kbb-1 ) -> WAC */ PB_Cdescset( Cd0, M, ktmp, Cimb1, Cinb1, Cmb, Cnb, Crow, Ccol, ctxt, Cld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Cd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WAC, WACd, &WACfr ); /* * Zero upper triangle of WAC( 0:kbb-1, k:k+kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJUG, kbb, kbb, zero, zero, WAC, 0, k, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 0, k+1, WACd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over C( IC:IC+M-1, JC:JC+k+kbb-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Cd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WBC, WBCd, &WBCfr ); /* * C( IC:IC+M-1, JC:JC+k+kbb-1 ) += ALPHA * WBC * WAC */ Cnq = PB_Cnumroc( ktmp, 0, Cinb1, Cnb, mycol, Ccol, npcol ); if( ( Cmp0 > 0 ) && ( Cnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq, &kbb, ALPHA, WBC, &WBCd[LLD_], WAC, &WACd[LLD_], one, Cptr0, &Cld ); if( WBCfr ) free( WBC ); if( Bfr ) free( Bptr ); /* * Replicate WAC = A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) -> WAB */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, WAC, 0, 0, WACd, ROW, &WAB, WABd, &WABfr ); /* * Zero upper triangle of WAB( 0:kbb-1, k:k+kbb-1 ) */ PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, zero, WAB, 0, k, WABd ); /* * WBC := ALPHA*B( IB:IB+M-1, JB:JB+k+kbb-1 )*A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 )' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); Bnq = PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Bmp0, &kbb, &Bnq, ALPHA, Bptr0, &Bld, WAB, &WABd[LLD_], zero, WBC, &WBCd[LLD_] ); if( WABfr ) free( WAB ); if( WACfr ) free( WAC ); if( Afr ) free( Aptr ); if( WBCsum ) { WBCd[CSRC_] = PB_Cindxg2p( ( fwd ? k : k + kbb - 1 ), Cinb1, Cnb, Ccol, Ccol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WBC, WBCd[LLD_], myrow, WBCd[CSRC_] ); } /* * C( IC:IC+M-1, JC+k:JC+k+kbb-1 ) := C( IC:IC+M-1, JC+k:JC+k+kbb-1 ) + WBC */ PB_CScatterV( TYPE, &ScatterDir, M, kbb, WBC, 0, 0, WBCd, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( WBCfr ) free( WBC ); } } } /* * End of PB_CpsymmAB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/Makefile0000640000175000017500000001222610363532303017267 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0) # # Module: Makefile # # Purpose: PTOOLS makefile # # Creation date: April 1, 1998 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../../SLmake.inc ############################################################################ # # This is the makefile to create a library for the PBLAS F77 tools. # The files are grouped as follows: # # ALLTOOLS -- Auxiliary routines for Level 1, 2 and 3 PBLAS # # STOOLS -- Single precision real PBLAS Level F77 tools routines # CTOOLS -- Single precision complex PBLAS Level F77 tools routines # DTOOLS -- Double precision real PBLAS Level F77 tools routines # ZTOOLS -- Double precision complex PBLAS Level F77 tools routines # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ../../../SLmake.inc file # definitions to match your compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # # To remove the object files after the library is created, enter # make clean # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # ############################################################################ all: single double complex complex16 #--------------------------------------------------------------------------- # The following line defines auxiliary routines needed by the PBLAS tools #--------------------------------------------------------------------------- ALLCTOOLS = \ PB_CGatherV.o PB_CInV.o PB_CInV2.o PB_CInOutV.o \ PB_CInOutV2.o PB_COutV.o PB_CScatterV.o PB_CVMinit.o \ PB_CVMloc.o PB_CVMnpq.o PB_CVMpack.o PB_CVMswp.o \ PB_CVMupdate.o PB_CVMcontig.o PB_Cabort.o PB_Cainfog2l.o \ PB_Cbinfo.o PB_Cchkmat.o PB_Cchkvec.o PB_Cconjg.o \ PB_Cgetbuf.o PB_Cinfog2l.o PB_Citypeset.o PB_Cgcd.o \ PB_Clcm.o PB_Cmalloc.o PB_Cnumroc.o PB_Cg2lrem.o \ PB_Cindxg2p.o PB_Cnnxtroc.o PB_Cnpreroc.o PB_CpswapNN.o \ PB_CpswapND.o PB_Cpdot11.o PB_CpdotNN.o PB_CpdotND.o \ PB_CpaxpbyNN.o PB_CpaxpbyND.o PB_CpaxpbyDN.o PB_Cpaxpby.o \ PB_CpgemmBC.o PB_CpgemmAC.o PB_CpgemmAB.o PB_Cplaprnt.o \ PB_Cplapad.o PB_Cplapd2.o PB_Cplascal.o PB_Cplasca2.o \ PB_Cplacnjg.o PB_Cpsym.o PB_CpsymmAB.o PB_CpsymmBC.o \ PB_Cpsyr.o PB_CpsyrkA.o PB_CpsyrkAC.o PB_Cpsyr2.o \ PB_Cpsyr2kA.o PB_Cpsyr2kAC.o PB_Cptrm.o PB_Cpgeadd.o \ PB_Cptradd.o PB_Cptran.o PB_CptrmmAB.o PB_CptrmmB.o \ PB_Cptrsm.o PB_CptrsmAB.o PB_CptrsmAB0.o PB_CptrsmAB1.o \ PB_CptrsmB.o PB_Cptrsv.o PB_Ctop.o PB_Ctzahemv.o \ PB_Ctzasymv.o PB_Ctzatrmv.o PB_Ctzhemm.o PB_Ctzhemv.o \ PB_Ctzher.o PB_Ctzherk.o PB_Ctzher2.o PB_Ctzher2k.o \ PB_Ctzsymm.o PB_Ctzsymv.o PB_Ctzsyr.o PB_Ctzsyrk.o \ PB_Ctzsyr2.o PB_Ctzsyr2k.o PB_Ctztrmm.o PB_Ctztrmv.o \ PB_Cwarn.o PB_freebuf_.o PB_topget_.o PB_topset_.o \ PB_Cdescset.o PB_Cdescribe.o PB_CargFtoC.o PB_Cfirstnb.o \ PB_Clastnb.o PB_Cspan.o #--------------------------------------------------------------------------- ALLTOOLS = $(ALLCTOOLS) $(ALLTOOLS): $(FRC) #--------------------------------------------------------------------------- # The C PBLAS tools #--------------------------------------------------------------------------- SCTOOLS = \ PB_Cstypeset.o DCTOOLS = \ PB_Cdtypeset.o CCTOOLS = \ PB_Cctypeset.o ZCTOOLS = \ PB_Cztypeset.o #--------------------------------------------------------------------------- STOOLS = $(SCTOOLS) CTOOLS = $(CCTOOLS) $(SCTOOLS) DTOOLS = $(DCTOOLS) ZTOOLS = $(ZCTOOLS) $(DCTOOLS) $(STOOLS): $(FRC) $(CTOOLS): $(FRC) $(DTOOLS): $(FRC) $(ZTOOLS): $(FRC) #--------------------------------------------------------------------------- SPTOOLS = $(ALLTOOLS) $(STOOLS) CPTOOLS = $(ALLTOOLS) $(CTOOLS) DPTOOLS = $(ALLTOOLS) $(DTOOLS) ZPTOOLS = $(ALLTOOLS) $(ZTOOLS) single: $(SPTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(SPTOOLS) $(RANLIB) $(SCALAPACKLIB) complex: $(CPTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(CPTOOLS) $(RANLIB) $(SCALAPACKLIB) double: $(DPTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(DPTOOLS) $(RANLIB) $(SCALAPACKLIB) complex16: $(ZPTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ZPTOOLS) $(RANLIB) $(SCALAPACKLIB) FRC: @FRC=$(FRC) clean: rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cdescribe.c0000640000175000017500000002270110363532303020236 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cdescribe( int M, int N, int IA, int JA, int * DA, int NPROW, int NPCOL, int MYROW, int MYCOL, int * II, int * JJ, int * LDA, int * IMB, int * INB, int * MB, int * NB, int * PROW, int * PCOL, int * DA0 ) #else void PB_Cdescribe( M, N, IA, JA, DA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, LDA, IMB, INB, MB, NB, PROW, PCOL, DA0 ) /* * .. Scalar Arguments .. */ int IA, *II, * IMB, * INB, JA, * JJ, * LDA, M, * MB, MYCOL, MYROW, N, * NB, NPCOL, NPROW, * PCOL, * PROW; /* * .. Array Arguments .. */ int * DA, * DA0; #endif { /* * Purpose * ======= * * PB_Cdescribe returns the global descriptor of a submatrix. This rou- * tine also computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by IA, JA. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. The true * global block sizes IMB, INB, MB and NB are also returned. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows being dealt out * starting from global index IA. M is also the number of rows * of the submatrix of interest. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns being dealt out * starting from global index JA. N is also the number of col- * umns of the submatrix of interest. M must be at least zero. * * IA (global input) INTEGER * On entry, IA specifies the global starting row index of the * submatrix. IA must at least zero. * * JA (global input) INTEGER * On entry, JA specifies the global starting column index of * the submatrix. JA must at least zero. * * DA (global and local input) INTEGER array * On entry, DA is an integer array of dimension DLEN_. This is * the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least zero. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least zero. * * LDA (local output) INTEGER * On exit, LDA specifies the local leading dimension of the lo- * cal array containing the distributed matrix entries. LDA must * be at least one. * * IMB (global output) INTEGER * On exit, IMB specifies the true global number of matrix rows * of the first block, if M rows are given out starting from the * global index IA. If M is equal zero, IMB is set to zero. * * INB (global output) INTEGER * On exit, INB specifies the true global number of matrix col- * umns of the first block, if N columns are given out starting * from the global index JA. If N is equal zero, INB is set to * zero. * * MB (global output) INTEGER * On exit, MB specifies the size of the blocks used to parti- * tion the matrix rows. MB is at least one. * * NB (global output) INTEGER * On exit, NB specifies the size of the blocks used to parti- * tion the matrix columns. NB is at least one. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero * and strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least * zero and strictly less than NPCOL otherwise. * * DA0 (global and local output) INTEGER array * On exit, DA0 is an integer array of dimension DLEN_. This is * the array descriptor of the submatrix A(IA:IA+M-1,JA:JA+N-1). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ *MB = DA[MB_]; Mfirstnb( *IMB, M, IA, DA[IMB_], *MB ); *NB = DA[NB_]; Mfirstnb( *INB, N, JA, DA[INB_], *NB ); *LDA = DA[LLD_]; Minfog2l( IA, JA, DA, NPROW, NPCOL, MYROW, MYCOL, *II, *JJ, *PROW, *PCOL ); MDescSet( DA0, M, N, *IMB, *INB, *MB, *NB, *PROW, *PCOL, DA[CTXT_], *LDA ); /* * End of PB_Cdescribe */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrsmAB1.c0000640000175000017500000003177010363532303020115 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrsmAB1( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * TRANSA, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * C, int * DESCC ) #else void PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, C, DESCC ) /* * .. Scalar Arguments .. */ char * DIAG, * SIDE, * TRANSA, * UPLO; int IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * .. Local Scalars .. */ char * negone, * one; int Acol, Acurcol, Acurrow, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj, Ald, Almb1, Alnb1, Amb, Amp0, An, Anb, Anq0, Anxtrow, Anxtcol, Arow, Bcol, Bii, Bimb, Binb, Bjj, Bld, Bmb, Bmp0, Bnb, Bnq0, Brow, Cld, ctxt, k=1, kb, kblks, lside, mycol, myrow, npcol, nprow, size, upper; MMADD_T mmadd; GERV2D_T recv; GESD2D_T send; GEMM_T gemm; TRSM_T trsm; /* * .. Local Arrays .. */ char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL, * Cptr = NULL; /* .. * .. Executable Statements .. * */ size = TYPE->size; lside = ( Mupcase( SIDE[0] ) == CLEFT ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol, Ald */ Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol, Bld ... */ Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); /* * Shorcuts when sub( B ) spans only one process row or column */ if( lside ) { Bnq0 = PB_Cnumroc( N, JB, Binb, Bnb, mycol, DESCB[CSRC_], npcol ); if( Bnq0 <= 0 ) return; Bmp0 = PB_Cnumroc( M, IB, Bimb, Bmb, myrow, DESCB[RSRC_], nprow ); if( !( PB_Cspan( M, IB, Bimb, Bmb, DESCB[RSRC_], nprow ) ) ) { if( Bmp0 > 0 ) { Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); TYPE->Fmmadd( &M, &Bnq0, TYPE->negone, C, &DESCC[LLD_], ALPHA, Bptr0, &Bld ); TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &M, &Bnq0, TYPE->one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, Bptr0, &Bld ); } return; } if( Bmp0 > 0 ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); } else { Bmp0 = PB_Cnumroc( M, IB, Bimb, Bmb, myrow, DESCB[RSRC_], nprow ); if( Bmp0 <= 0 ) return; Bnq0 = PB_Cnumroc( N, JB, Binb, Bnb, mycol, DESCB[CSRC_], npcol ); if( !( PB_Cspan( N, JB, Binb, Bnb, DESCB[CSRC_], npcol ) ) ) { if( Bnq0 > 0 ) { Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); TYPE->Fmmadd( &Bmp0, &N, TYPE->negone, C, &DESCC[LLD_], ALPHA, Bptr0, &Bld ); TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bmp0, &N, TYPE->one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, Bptr0, &Bld ); } return; } if( Bnq0 > 0 ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); } /* * Handle the general case now */ An = ( lside ? M : N ); upper = ( Mupcase( UPLO[0] ) == CUPPER ); negone = TYPE->negone; one = TYPE->one; recv = TYPE->Cgerv2d; send = TYPE->Cgesd2d; mmadd = TYPE->Fmmadd; gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm; /* * Compute more local information for sub( A ) */ Aimb = DESCA[IMB_]; Ainb = DESCA[INB_]; Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Aimb1 = PB_Cfirstnb( An, IA, Aimb, Amb ); Almb1 = PB_Clastnb ( An, IA, Aimb, Amb ); Amp0 = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Ainb1 = PB_Cfirstnb( An, JA, Ainb, Anb ); Alnb1 = PB_Clastnb ( An, JA, Ainb, Anb ); Anq0 = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size ); Cld = DESCC[LLD_]; if( lside ) { kblks = ( An > Aimb1 ? ( An - Aimb1 - 1 ) / Amb + 2 : 1 ); if( upper ) { Acurrow = Arow; Anxtrow = MModAdd1( Acurrow, nprow ); Aptr = Aptr0; Bptr = Bptr0; Cptr = C; while( k <= kblks ) { kb = ( k == 1 ? Aimb1 : ( k == kblks ? Almb1 : Amb ) ); An -= kb; if( myrow == Acurrow ) { /* * Add contribution of previous blocks of rows of sub( B ) to part of the * current block of rows of sub( B ) */ mmadd( &kb, &Bnq0, negone, Cptr, &Cld, ALPHA, Bptr, &Bld ); /* * Solve updated and current part of block of rows of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &kb, &Bnq0, one, Aptr, &Ald, Bptr, &Bld ); /* * Add contribution of part of the current block of rows of sub( B ) to the * remaining of the contribution of previous blocks of rows of sub( B ). Send * this remaining part to next process row. */ if( An > 0 ) { gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &An, &Bnq0, &kb, one, Mptr( Aptr, 0, kb, Ald, size ), &Ald, Bptr, &Bld, one, Mptr( Cptr, kb, 0, Cld, size ), &Cld ); send( ctxt, An, Bnq0, Mptr( Cptr, kb, 0, Cld, size ), Cld, Anxtrow, mycol ); } Aptr = Mptr( Aptr, kb, 0, Ald, size ); Bptr = Mptr( Bptr, kb, 0, Bld, size ); Cptr = C; } else if( myrow == Anxtrow ) { /* * Receive contribution of previous blocks of rows of sub( B ) to be added to * next block of rows of sub( B ) */ if( An > 0 ) recv( ctxt, An, Bnq0, Cptr, Cld, Acurrow, mycol ); } Aptr = Mptr( Aptr, 0, kb, Ald, size ); Acurrow = Anxtrow; Anxtrow = MModAdd1( Acurrow, nprow ); k += 1; } } else { k = kblks; Acurrow = PB_Cindxg2p( An-1, Aimb1, Amb, Arow, Arow, nprow ); Anxtrow = MModSub1( Acurrow, nprow ); while( k > 0 ) { kb = ( k == 1 ? Aimb1 : ( k == kblks ? Almb1 : Amb ) ); An -= kb; if( myrow == Acurrow ) { Aptr = Mptr( Aptr0, Amp0 - kb, 0, Ald, size ); Bptr = Mptr( Bptr0, Bmp0 - kb, 0, Bld, size ); Cptr = Mptr( C, An, 0, Cld, size ); /* * Add contribution of previous blocks of rows of sub( B ) to part of the * current block of rows of sub( B ) */ mmadd( &kb, &Bnq0, negone, Cptr, &Cld, ALPHA, Bptr, &Bld ); /* * Solve updated and current part of block of rows of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &kb, &Bnq0, one, Mptr( Aptr, 0, Anq0-kb, Ald, size ), &Ald, Bptr, &Bld ); /* * Add contribution of part of the current block of rows of sub( B ) to the * remaining of the contribution of previous blocks of rows of sub( B ). Send * this remaining part to next process row. */ if( An > 0 ) { gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &An, &Bnq0, &kb, one, Aptr, &Ald, Bptr, &Bld, one, C, &Cld ); send( ctxt, An, Bnq0, C, Cld, Anxtrow, mycol ); } Amp0 -= kb; Bmp0 -= kb; } else if( myrow == Anxtrow ) { /* * Receive contribution of previous blocks of rows of sub( B ) to be added to * next block of rows of sub( B ) */ if( An > 0 ) recv( ctxt, An, Bnq0, C, Cld, Acurrow, mycol ); } Anq0 -= kb; Acurrow = Anxtrow; Anxtrow = MModSub1( Acurrow, nprow ); k -= 1; } } } else { kblks = ( An > Ainb1 ? ( An - Ainb1 - 1 ) / Anb + 2 : 1 ); if( upper ) { k = kblks; Acurcol = PB_Cindxg2p( An-1, Ainb1, Anb, Acol, Acol, npcol ); Anxtcol = MModSub1( Acurcol, npcol ); while( k > 0 ) { kb = ( k == 1 ? Ainb1 : ( k == kblks ? Alnb1 : Anb ) ); An -= kb; if( mycol == Acurcol ) { Aptr = Mptr( Aptr0, 0, Anq0 - kb, Ald, size ); Bptr = Mptr( Bptr0, 0, Bnq0 - kb, Bld, size ); Cptr = Mptr( C, 0, An, Cld, size ); /* * Add contribution of previous blocks of columns of sub( B ) to part of the * current block of columns of sub( B ) */ mmadd( &Bmp0, &kb, negone, Cptr, &Cld, ALPHA, Bptr, &Bld ); /* * Solve updated and current part of block of columns of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bmp0, &kb, one, Mptr( Aptr, Amp0-kb, 0, Ald, size ), &Ald, Bptr, &Bld ); /* * Add contribution of part of the current block of columns of sub( B ) to the * remaining of the contribution of previous blocks of columns of sub( B ). * Send this remaining part to next process column. */ if( An > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &An, &kb, one, Bptr, &Bld, Aptr, &Ald, one, C, &Cld ); send( ctxt, Bmp0, An, C, Cld, myrow, Anxtcol ); } Anq0 -= kb; Bnq0 -= kb; } else if( mycol == Anxtcol ) { /* * Receive contribution of previous blocks of columns of sub( B ) to be added * to next block of columns of sub( B ) */ if( An > 0 ) recv( ctxt, Bmp0, An, C, Cld, myrow, Acurcol ); } Amp0 -= kb; Acurcol = Anxtcol; Anxtcol = MModSub1( Acurcol, npcol ); k -= 1; } } else { Acurcol = Acol; Anxtcol = MModAdd1( Acurcol, npcol ); Aptr = Aptr0; Bptr = Bptr0; Cptr = C; while( k <= kblks ) { kb = ( k == 1 ? Ainb1 : ( k == kblks ? Alnb1 : Anb ) ); An -= kb; if( mycol == Acurcol ) { /* * Add contribution of previous blocks of columns of sub( B ) to part of the * current block of columns of sub( B ) */ mmadd( &Bmp0, &kb, negone, Cptr, &Cld, ALPHA, Bptr, &Bld ); /* * Solve updated and current part of block of columns of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bmp0, &kb, one, Aptr, &Ald, Bptr, &Bld ); /* * Add contribution of part of the current block of columns of sub( B ) to the * remaining of the contribution of previous blocks of columns of sub( B ). * Send this remaining part to next process column. */ if( An > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &An, &kb, one, Bptr, &Bld, Mptr( Aptr, kb, 0, Ald, size ), &Ald, one, Mptr( Cptr, 0, kb, Cld, size ), &Cld ); send( ctxt, Bmp0, An, Mptr( Cptr, 0, kb, Cld, size ), Cld, myrow, Anxtcol ); } Aptr = Mptr( Aptr, 0, kb, Ald, size ); Bptr = Mptr( Bptr, 0, kb, Bld, size ); Cptr = C; } else if( mycol == Anxtcol ) { /* * Receive contribution of previous blocks of columns of sub( B ) to be added * to next block of columns of sub( B ). */ if( An > 0 ) recv( ctxt, Bmp0, An, Cptr, Cld, myrow, Acurcol ); } Aptr = Mptr( Aptr, kb, 0, Ald, size ); Acurcol = Anxtcol; Anxtcol = MModAdd1( Acurcol, npcol ); k += 1; } } } /* * End of PB_CptrsmAB1 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpsyrkA.c0000640000175000017500000005746110363532303017742 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpsyrkA( PBTYP_T * TYPE, char * DIRECA, char * CONJUG, char * UPLO, char * TRANS, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpsyrkA( TYPE, DIRECA, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECA, * TRANS, * UPLO; int IA, IC, JA, JC, K, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCC; char * A, * C; #endif { /* * Purpose * ======= * * PB_CpsyrkA performs one of the following symmetric or Hermitian rank * k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * or Hermitian submatrix and sub( A ) is an n by k submatrix in the * first case and a k by n submatrix in the second case. * * This is the outer-product algorithm using the logical LCM hybrid * and static blocking techniques. The submatrix operand sub( C ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be looped over as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( C ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( C ) is symmetric, * CONJUG = 'Z' or 'z' sub( C ) is Hermitian. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the submatrix sub( C ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( C ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( C ) are referenced. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't' or 'C' or 'c', K specifies the number of rows of the sub- * matrix sub( A ). K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly lower triangular * of sub( C ) are not referenced. On exit, the upper triangular * part of sub( C ) is overwritten by the upper triangular part * of the updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly upper triangular * of sub( C ) are not referenced. On exit, the lower triangular * part of sub( C ) is overwritten by the lower triangular part * of the updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int AcurrocR, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpR, AnprocsD, AnprocsR, ArocD, ArocR, Arocs, AsrcR, Ccol, Cii, Cimb1, Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp, Cmp0, Cnb, Cnq, Crow, WACfr, WACld, WACsum, WARfr, WARld, WARsum, Wkbb=0, ctxt, k, kb, kbb, l, lb, ltmp, maxp, mycol, myrow, notran, npcol, nprow, p=0, size, tmp, upper; GEMM_T gemm; TZSYR_T tzsyrk; /* * .. Local Arrays .. */ int Cd0[DLEN_], DBUFA[DLEN_], WACd0[DLEN_], WARd0[DLEN_]; char * Aptr = NULL, * Cptr = NULL, * WAC = NULL, * WAR = NULL; /* .. * .. Executable Statements .. * */ /* * sub( C ) := beta * sub( C ) */ PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute descriptor Cd0 for sub( C ) */ PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 ); Cmp = PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol ); if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { Cptr = Mptr( C, Cii, Cjj, Cld, size ); tzsyrk = ( ( Mupcase( CONJUG[0] ) == CNOCONJG ) ? PB_Ctzsyrk : PB_Ctzherk ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ Clcmb = 2 * kb * PB_Clcm( ( Crow >= 0 ? nprow : 1 ), ( Ccol >= 0 ? npcol : 1 ) ); } /* * Retrieve local information for sub( A ) */ if( ( notran = ( Mupcase( TRANS[0] ) == CNOTRAN ) ) != 0 ) { AiR = JA; AnprocsR = npcol; AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_]; } else { AiR = IA; AnprocsR = nprow; AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_]; } /* * If sub( A ) only spans one process row or column, then there is no need to * pack the data. */ if( !( PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) ) { /* * Replicate sub( A ) in process rows and columns spanned by sub( C ): WAC, WAR */ if( notran ) { PB_CInV( TYPE, NOCONJG, COLUMN, N, N, Cd0, K, A, IA, JA, DESCA, COLUMN, &WAC, WACd0, &WACfr ); PB_CInV( TYPE, CONJUG, ROW, N, N, Cd0, K, WAC, 0, 0, WACd0, COLUMN, &WAR, WARd0, &WARfr ); } else { PB_CInV( TYPE, NOCONJG, ROW, N, N, Cd0, K, A, IA, JA, DESCA, ROW, &WAR, WARd0, &WARfr ); PB_CInV( TYPE, CONJUG, COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0, ROW, &WAC, WACd0, &WACfr ); } /* * Perform the local update if I own some data */ if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { WACld = WACd0[LLD_]; WARld = WARd0[LLD_]; if( Mupcase( UPLO[0] ) == CUPPER ) { for( l = 0; l < N; l += Clcmb ) { lb = N - l; lb = MIN( lb, Clcmb ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Clp > 0 && Cnq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &K, ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); PB_Cpsyr( TYPE, UPPER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Cptr, l, l, Cd0, tzsyrk ); } } else { for( l = 0; l < N; l += Clcmb ) { lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); PB_Cpsyr( TYPE, LOWER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Cptr, l, l, Cd0, tzsyrk ); Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cmp0 = Cmp - Clp; Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Cmp0 > 0 && Cnq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); } } } if( WACfr ) free( WAC ); if( WARfr ) free( WAR ); return; } /* * Otherwise sub( A ) spans more than one process row or columns -> LCM hybrid */ Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); if( notran ) { AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow; PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); } else { AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol; PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); } Ainb1D = PB_Cfirstnb( N, AiD, AinbD, AnbD ); Ainb1R = PB_Cfirstnb( K, AiR, AinbR, AnbR ); AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ); /* * When sub( A ) is not replicated and backward pass on sub( A ), find the * virtual process (p,p) owning the last row or column of sub( A ). */ if( !( AisR ) && !( Afwd ) ) { tmp = PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR ); p = MModSub( tmp, ArocR, AnprocsR ); } /* * Allocate work space in process rows and columns spanned by sub( C ) */ PB_COutV( TYPE, COLUMN, NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr, &WACsum ); PB_COutV( TYPE, ROW, NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr, &WARsum ); /* * Loop over the virtual process grid induced by the rows or columns of sub( A ) */ maxp = ( AisR ? 1 : AnprocsR ); AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); for( k = 0; k < K; k += kb ) { kbb = K - k; kbb = MIN( kbb, kb ); while( Wkbb != kbb ) { /* * Ensure that the current virtual process (p,p) has something to contribute to * the replicated buffers WAC and WAR. */ while( AnpR == 0 ) { p = ( Afwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); } /* * Current virtual process (p,p) has something, find out how many rows or * columns could be used: Arocs. */ if( Wkbb == 0 ) { Arocs = ( AnpR < kbb ? AnpR : kbb ); } else { Arocs = kbb - Wkbb; Arocs = MIN( Arocs, AnpR ); } /* * The current virtual process (p,p) has Arocs rows or columns of sub( A ) * to contribute, replicate the data over sub( C ). */ if( notran ) { if( AisR || ( AmyprocR == AcurrocR ) ) { Aptr = Mptr( A, AiiD, AiiR, Ald, size ); AiiR += Arocs; } PB_Cdescset( DBUFA, N, Arocs, Ainb1D, Arocs, AnbD, Arocs, ArocD, AcurrocR, ctxt, Ald ); /* * Replicate Arocs columns of sub( A ) in process columns spanned by sub( C ) */ PB_CInV2( TYPE, NOCONJG, COLUMN, N, N, Cd0, Arocs, Aptr, 0, 0, DBUFA, COLUMN, WAC, Wkbb, WACd0 ); } else { if( AisR || ( AmyprocR == AcurrocR ) ) { Aptr = Mptr( A, AiiR, AiiD, Ald, size ); AiiR += Arocs; } PB_Cdescset( DBUFA, Arocs, N, Arocs, Ainb1D, Arocs, AnbD, AcurrocR, ArocD, ctxt, Ald ); /* * Replicate Arocs rows of sub( A ) in process rows spanned by sub( C ) */ PB_CInV2( TYPE, NOCONJG, ROW, N, N, Cd0, Arocs, Aptr, 0, 0, DBUFA, ROW, WAR, Wkbb, WARd0 ); } /* * Arocs rows or columns of sub( A ) have been replicated over sub( C ), * update the number of diagonals in this virtual process as well as the * number of rows or columns of sub( A ) that are in WAR or WAC. */ AnpR -= Arocs; Wkbb += Arocs; } if( notran ) { /* * WAR := WAC' */ PB_CInV2( TYPE, CONJUG, ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0, COLUMN, WAR, 0, WARd0 ); } else { /* * WAC := WAR' */ PB_CInV2( TYPE, CONJUG, COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0, ROW, WAC, 0, WACd0 ); } /* * Perform the local update if I own some data */ if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { WACld = WACd0[LLD_]; WARld = WARd0[LLD_]; if( upper ) { for( l = 0; l < N; l += Clcmb ) { lb = N - l; lb = MIN( lb, Clcmb ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Clp > 0 && Cnq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &kbb, ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); PB_Cpsyr( TYPE, UPPER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Cptr, l, l, Cd0, tzsyrk ); } } else { for( l = 0; l < N; l += Clcmb ) { lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); PB_Cpsyr( TYPE, LOWER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Cptr, l, l, Cd0, tzsyrk ); Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cmp0 = Cmp - Clp; Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Cmp0 > 0 && Cnq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); } } } Wkbb = 0; } if( WACfr ) free( WAC ); if( WARfr ) free( WAR ); /* * End of PB_CpsyrkA */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cgetbuf.c0000640000175000017500000000472510363532303017740 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ char * PB_Cgetbuf( char * MESS, int LENGTH ) #else char * PB_Cgetbuf( MESS, LENGTH ) /* * .. Scalar Arguments .. */ int LENGTH; /* * .. Array Arguments .. */ char * MESS; #endif { /* * Purpose * ======= * * PB_Cgetbuf allocates a dynamic memory buffer. The routine checks the * size of the already allocated buffer against the value of the formal * parameter LENGTH. If the current buffer is large enough, this a poin- * ter to it is returned. Otherwise, this function tries to allocate it. * In case of failure, the program is stopped by calling Cblacs_abort. * When LENGTH is zero, this function returns a NULL pointer. If the va- * lue of LENGTH is strictly less than zero, the buffer is released. * * Arguments * ========= * * MESS (local input) pointer to CHAR * On entry, MESS is a string containing a message to be printed * in case of allocation failure. * * LENGTH (local input) INTEGER * On entry, LENGTH specifies the length in bytes of the buffer * to be allocated. If LENGTH is less or equal than zero, this * function returns NULL. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static char * pblasbuf = NULL; static int pbbuflen = 0; /* .. * .. Executable Statements .. * */ if( LENGTH >= 0 ) { if( LENGTH > pbbuflen ) { if( pblasbuf ) free( pblasbuf ); pblasbuf = (char *) malloc( (unsigned) LENGTH ); if( !pblasbuf ) { (void) fprintf( stderr, "ERROR: Memory allocation failed\n%s\n", MESS ); Cblacs_abort( -1, -1 ); } pbbuflen = LENGTH; } } else if( pblasbuf ) { free( pblasbuf ); pblasbuf = NULL; pbbuflen = 0; } return( pblasbuf ); /* * End of PB_Cgetbuf */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpaxpbyNN.c0000640000175000017500000004434110363532303020221 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpaxpbyNN( PBTYP_T * TYPE, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * AROC, char * BETA, char * B, int IB, int JB, int * DESCB, char * BROC ) #else void PB_CpaxpbyNN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ) /* * .. Scalar Arguments .. */ char * AROC, * BROC, * CONJUG; int IA, IB, JA, JB, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CpaxpbyNN adds one submatrix to another, * * sub( B ) := beta * sub( B ) + alpha * sub( A ), or, * * sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ), * * where both submatrices are not distributed; sub( A ) always denotes * A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or 'r' sub( A ) resides in * a process row, otherwise sub( A ) resides in a process column. When * sub( A ) resides in a process row and BROC is 'R' or 'r' or * sub( A ) resides in a process column and BROC is 'C' or 'c', then * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1), and B(IB:IB+N-1,JB:JB+M-1) * otherwise. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( B ) as follows: * CONJUG = 'N' or 'n': * sub( B ) := beta*sub( B ) + alpha*sub( A ), * otherwise * sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A cor- * responding to the entries of the submatrix sub( A ) need not * be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least * Lc( 1, JA+N-1 ). Before entry, this array contains the local * entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the subvector * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector, * and a column vector otherwise. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B corres- * ponding to the entries of the submatrix sub( B ) need not be * set on input. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where LLD_B * is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and * sub( B ) are both distributed along a process column or a * process row. In that case, Kb is at least Lc( 1, JB+N-1 ). * Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and * Kb is at least Lc( 1, JB+M-1 ). Before entry, this array * contains the local entries of the matrix B. On exit, sub( B ) * is overwritten with the updated submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BROC (global input) pointer to CHAR * On entry, BROC specifies the orientation of the subvector * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector, * and a column vector otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int Acol, Aii, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR, AnprocsD, AnprocsR, AprocR, Arow, Bcol, Bii, BisR, BisRow, Bjj, Bld, BmyprocD, BmyprocR, BnprocsD, BnprocsR, BprocR, Brow, RRorCC, csrc, ctxt, iroca, mycol, myrow, npcol, nprow, p, rsrc, size; MMADD_T add; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 ) { Ald = DESCA[LLD_]; AmyprocD = mycol; AnprocsD = npcol; AprocR = Arow; AmyprocR = myrow; AnprocsR = nprow; AisR = ( ( Arow == -1 ) || ( AnprocsR == 1 ) ); } else { Ald = DESCA[LLD_]; AmyprocD = myrow; AnprocsD = nprow; AprocR = Acol; AmyprocR = mycol; AnprocsR = npcol; AisR = ( ( Acol == -1 ) || ( AnprocsR == 1 ) ); } /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ... */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 ) { Bld = DESCB[LLD_]; BmyprocD = mycol; BnprocsD = npcol; BprocR = Brow; BmyprocR = myrow; BnprocsR = nprow; BisR = ( ( Brow == -1 ) || ( BnprocsR == 1 ) ); } else { Bld = DESCB[LLD_]; BmyprocD = myrow; BnprocsD = nprow; BprocR = Bcol; BmyprocR = mycol; BnprocsR = npcol; BisR = ( ( Bcol == -1 ) || ( BnprocsR == 1 ) ); } /* * Are sub( A ) and sub( B ) both row or column vectors ? */ RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) ); /* * Neither sub( A ) nor sub( B ) are distributed */ if( !AisR ) { /* * sub( A ) is not replicated */ if( !( BisR ) ) { /* * sub( B ) is not replicated */ if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) /* * If I am not in AprocR or BprocR, then return immediately */ return; size = TYPE->size; if( RRorCC ) { /* * sub( A ) and sub( B ) are both row or column vectors */ if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; if( AprocR == BprocR ) { add( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } else { /* * sub( A ) and sub( B ) are in a different process row or column */ if( AmyprocR == AprocR ) { /* * Send sub( A ) to where sub( B ) resides. */ if( AisRow ) TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, BprocR, AmyprocD ); else TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, AmyprocD, BprocR ); } /* * receive sub( A ) and add it to sub( B ) */ if( BmyprocR == BprocR ) { buf = PB_Cmalloc( M * N * size ); if( BisRow ) TYPE->Cgerv2d( ctxt, M, N, buf, M, AprocR, BmyprocD ); else TYPE->Cgerv2d( ctxt, M, N, buf, M, BmyprocD, AprocR ); add( &M, &N, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); if( buf ) free( buf ); } } } else { /* * sub( A ) and sub( B ) are not both row or column vectors */ if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; iroca = 0; for( p = 0; p < BnprocsD; p++ ) { if( ( AprocR == p ) && ( BprocR == iroca ) ) { if( ( AmyprocR == p ) && ( AmyprocD == iroca ) ) { add( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } } else { if( ( AmyprocR == AprocR ) && ( AmyprocD == iroca ) ) { if( AisRow ) TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, p, BprocR ); else TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ), Ald, BprocR, p ); } if( ( BmyprocR == BprocR ) && ( BmyprocD == p ) ) { buf = PB_Cmalloc( M * N * size ); if( AisRow ) TYPE->Cgerv2d( ctxt, M, N, buf, M, AprocR, iroca ); else TYPE->Cgerv2d( ctxt, M, N, buf, M, iroca, AprocR ); add( &M, &N, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); if( buf ) free( buf ); } } iroca = MModAdd1( iroca, AnprocsD ); } } } else { /* * sub( B ) is replicated */ size = TYPE->size; if( AmyprocR == AprocR ) { if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) TYPE->Fmmcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else TYPE->Fmmadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) TYPE->Fmmtcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else TYPE->Fmmtadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } if( AisRow ) { scope = CCOLUMN; } else { scope = CROW; } top = PB_Ctop( &ctxt, BCAST, &scope, TOP_GET ); if( RRorCC ) TYPE->Cgebs2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebs2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld, size ), Bld ); } else { if( AisRow ) { scope = CCOLUMN; rsrc = AprocR; csrc = AmyprocD; } else { scope = CROW; rsrc = AmyprocD; csrc = AprocR; } top = PB_Ctop( &ctxt, BCAST, &scope, TOP_GET ); if( RRorCC ) TYPE->Cgebr2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld, size ), Bld, rsrc, csrc ); else TYPE->Cgebr2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld, size ), Bld, rsrc, csrc ); } } } else { /* * sub( A ) is replicated */ if( BisR || ( BmyprocR == BprocR ) ) { /* * If I own a piece of sub( B ), then add sub( A ) to it */ size = TYPE->size; if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) TYPE->Fmmcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else TYPE->Fmmadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) TYPE->Fmmtcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else TYPE->Fmmtadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } } } /* * End of PB_CpaxpbyNN */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzahemv.c0000640000175000017500000002260510363532303020137 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzahemv( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR ) #else void PB_Ctzahemv( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Ctzahemv performs the matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( y ), * * where alpha is a real scalar, y is a real vector, x is a vector and A * is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YC (input/output) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. On exit, YC is overwritten by the partially * updated vector y. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * YR (input/output) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. On exit, YR is overwritten by the partially * updated vector y. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, ione=1, j1, m1, mn, n1, size, usiz; AGEMV_T agemv; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; usiz = TYPE->usiz; one = TYPE->one; agemv = TYPE->Fagemv; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { agemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( COTRAN ), &M, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fahemv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YC, i1, 0, LDYC, usiz ), &ione ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; agemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, Mptr( YC, i1, 0, LDYC, usiz ), &ione ); agemv( C2F_CHAR( COTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YR, 0, j1, LDYR, usiz ), &LDYR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; usiz = TYPE->usiz; one = TYPE->one; agemv = TYPE->Fagemv; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { agemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( COTRAN ), &m1, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } TYPE->Fahemv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( XC, m1, 0, LDXC, size ), &ione, one, Mptr( YC, m1, 0, LDYC, usiz ), &ione ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; agemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, YC, &ione ); agemv( C2F_CHAR( COTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, XC, &ione, one, Mptr( YR, 0, j1, LDYR, usiz ), &LDYR ); } } else { one = TYPE->one; agemv = TYPE->Fagemv; agemv( C2F_CHAR( NOTRAN ), &M, &N, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( COTRAN ), &M, &N, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } /* * End of PB_Ctzahemv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzasymv.c0000640000175000017500000002255210363532303020177 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzasymv( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR ) #else void PB_Ctzasymv( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Ctzasymv performs the matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( y ), * * where alpha is a real scalar, y is a real vector, x is a vector and A * is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YC (input/output) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. On exit, YC is overwritten by the partially * updated vector y. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * YR (input/output) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. On exit, YR is overwritten by the partially * updated vector y. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, ione=1, j1, m1, mn, n1, size, usiz; AGEMV_T agemv; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; usiz = TYPE->usiz; one = TYPE->one; agemv = TYPE->Fagemv; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { agemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( TRAN ), &M, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fasymv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YC, i1, 0, LDYC, usiz ), &ione ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; agemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, Mptr( YC, i1, 0, LDYC, usiz ), &ione ); agemv( C2F_CHAR( TRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YR, 0, j1, LDYR, usiz ), &LDYR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; usiz = TYPE->usiz; one = TYPE->one; agemv = TYPE->Fagemv; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { agemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( TRAN ), &m1, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } TYPE->Fasymv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( XC, m1, 0, LDXC, size ), &ione, one, Mptr( YC, m1, 0, LDYC, usiz ), &ione ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; agemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, YC, &ione ); agemv( C2F_CHAR( TRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, XC, &ione, one, Mptr( YR, 0, j1, LDYR, usiz ), &LDYR ); } } else { one = TYPE->one; agemv = TYPE->Fagemv; agemv( C2F_CHAR( NOTRAN ), &M, &N, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); agemv( C2F_CHAR( TRAN ), &M, &N, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } /* * End of PB_Ctzasymv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cainfog2l.c0000640000175000017500000005360610363532303020167 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cainfog2l( int M, int N, int I, int J, int * DESC, int NPROW, int NPCOL, int MYROW, int MYCOL, int * IMB1, int * INB1, int * MP, int * NQ, int * II, int * JJ, int * PROW, int * PCOL, int * RPROW, int * RPCOL ) #else void PB_Cainfog2l( M, N, I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, PCOL, RPROW, RPCOL ) /* * .. Scalar Arguments .. */ int I, * II, * IMB1, * INB1, J, * JJ, M, * MP, MYCOL, MYROW, N, NPCOL, NPROW, * NQ, * PCOL, * PROW, * RPCOL, * RPROW; /* * .. Array Arguments .. */ int * DESC; #endif { /* * Purpose * ======= * * PB_Cainfog2l computes the starting local row and column indexes II, * JJ corresponding to the submatrix starting globally at the entry * pointed by I, J. This routine returns the coordinates in the grid of * the process owning the matrix entry of global indexes I, J, namely * PROW and PCOL. In addition, this routine computes the quantities MP * and NQ, which are respectively the local number of rows and columns * owned by the process of coordinate MYROW, MYCOL corresponding to the * global submatrix A(I:I+M-1,J:J+N-1). Finally, the size of the first * partial block and the relative process coordinates are also returned * respectively in IMB, INB and RPROW, RPCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the global number of rows of the subma- * trix. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of columns of the * submatrix. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least zero. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least zero. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * IMB1 (global output) INTEGER * On exit, IMB1 specifies the number of rows of the upper left * block of the submatrix. On exit, IMB1 is less or equal than * M and greater or equal than MIN( 1, M ). * * INB1 (global output) INTEGER * On exit, INB1 specifies the number of columns of the upper * left block of the submatrix. On exit, INB1 is less or equal * than N and greater or equal than MIN( 1, N ). * * MP (local output) INTEGER * On exit, MP specifies the local number of rows of the subma- * trix, that the processes of row coordinate MYROW own. MP is * at least zero. * * NQ (local output) INTEGER * On exit, NQ specifies the local number of columns of the * submatrix, that the processes of column coordinate MYCOL * own. NQ is at least zero. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least zero. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of * the submatrix. On exit, II is at least zero. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC(RSRC_) is -1 on input, and, at least zero and * strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC(CSRC_) is -1 on input, and, at least zero * and strictly less than NPCOL otherwise. * * RPROW (global output) INTEGER * On exit, RPROW specifies the relative row coordinate of the * process that possesses the first row I of the submatrix. On * exit, RPROW is -1 if DESC(RSRC_) is -1 on input, and, at * least zero and strictly less than NPROW otherwise. * * RPCOL (global output) INTEGER * On exit, RPCOL specifies the relative column coordinate of * the process that possesses the first column J of the subma- * trix. On exit, RPCOL is -1 if DESC(CSRC_) is -1 on input, * and, at least zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ilocblk, j1, mb, mydist, nb, nblocks, csrc, rsrc; /* .. * .. Executable Statements .. * */ /* * Retrieve the row distribution parameters */ mb = DESC[ MB_ ]; rsrc = DESC[ RSRC_ ]; if( ( rsrc == -1 ) || ( NPROW == 1 ) ) { /* * The rows are not distributed, or there is just one process row in the grid. * Therefore, the local and global indexes are the same, as well as the local * and global number of rows. Finally, the relative row process coordinate is * zero, since every process owns all rows. Note that the size of the first * row block can be zero only if M is zero. */ *II = I; if( ( *IMB1 = DESC[IMB_] - I ) <= 0 ) *IMB1 += ( ( -(*IMB1) ) / mb + 1 ) * mb; *IMB1 = MIN( *IMB1, M ); *MP = M; *PROW = rsrc; *RPROW = 0; } else { /* * Figure out PROW, II and IMB1 first. */ *IMB1 = DESC[IMB_]; if( I < *IMB1 ) /* Is I in first block range ? */ { /* * If I is in the first block of rows, then PROW is simply rsrc, II is I in * this process and zero elsewhere, and the size of the first block is the * IMB complement. */ *PROW = rsrc; *II = ( ( MYROW == *PROW ) ? I : 0 ); *IMB1 -= I; } else { /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source row process is the * process such that mydist=0, or equivalently MYROW == rsrc. * * Find out the global coordinate of the block of rows I belongs to (nblocks), * as well as the minimum local number of row blocks that every process has. * * when mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROW, I own ilocblk full blocks * but not I, or I own ilocblk + 1 blocks and the entry I refers to. */ i1 = I - *IMB1; if( MYROW == rsrc ) { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = i1 / mb + 1; *PROW = rsrc + nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three * possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in * which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which * case I and II can respectively be written as IMB + (nblocks-1)*MB + IL * and IMB+(ilocblk-1) * MB + IL. That is II = I + (ilocblk - nblocks)*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note * that when ilocblk is zero, II is just IMB. */ if( nblocks < NPROW ) { *II = *IMB1; } else { ilocblk = nblocks / NPROW; if( ilocblk * NPROW >= nblocks ) { *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb : *IMB1 + ( ilocblk - 1 ) * mb ); } else { *II = *IMB1 + ilocblk * mb; } } } else { /* * I is not in the first block, find out which process has it. */ nblocks = i1 / mb + 1; *PROW = rsrc + nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB. * When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk * full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL * and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ? i1 + ( 1 - nblocks ) * mb : 0 ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( MYROW == *PROW ) ? ( ilocblk - nblocks + 1 ) * mb + i1 : ilocblk * mb ) ); } } /* * Update the size of first block */ *IMB1 = nblocks * mb - i1; } /* * Now everything is just like M, I=0, IMB1, MB, PROW, NPROW. The discussion * goes as follows: compute my distance from the source process PROW so that * within this process coordinate system, the source process is the process * such that mydist = 0. Figure out MP. */ if( M <= *IMB1 ) { /* * M <= IMB1: if I am the source process, i.e. I own I (mydist = 0), MP is M * and 0 otherwise. */ *MP = ( ( MYROW == *PROW ) ? M : 0 ); } else { /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those M entries */ nblocks = ( M - *IMB1 ) / mb + 1; if( MYROW == *PROW ) { /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only two * possible cases: * * 1) When mydist = nblocks - ilocblk * NPROW = 0, that is NPROW divides * the global number of full blocks, then the source process PROW owns * one more block than the other processes; and M can be rewritten as * M = IMB1 + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block. * Similarly, the local value MP corresponding to M can be written as * MP = IMB1 + (ilocblk-1) * MB + LMB = M + ( ilocblk-1 - (nblocks-1) )*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROW, the source process only owns * full blocks, and therefore MP = IMB1 + ilocblk * MB. Note that when * ilocblk is zero, MP is just IMB1. */ if( nblocks < NPROW ) { *MP = *IMB1; } else { ilocblk = nblocks / NPROW; *MP = ( ( nblocks - ilocblk * NPROW ) ? *IMB1 + ilocblk * mb : M + ( ilocblk - nblocks ) * mb ); } } else { /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - *PROW ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, * * when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks of * size MB since I am not the source process, * * when mydist = nblocks - ilocblk * NPROW, * either the last block is not full and I own it, in which case * M = IMB1 + (nblocks - 1)*MB + LMB with LNB the size of the last block * such that MB > LMB > 0; the local value MP corresponding to M is given * by MP = ilocblk * MB + LMB = M - IMB1 + ( ilocblk - nblocks + 1 ) * MB; * or the last block is full and I am the first process owning only ilocblk * full blocks of size MB, that is M = IMB + ( nblocks - 1 ) * MB and * MP = ilocblk * MB = M - IMB + ( ilocblk - nblocks + 1 ) * MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *MP = ( ( mydist < 0 ) ? mb : ( ( mydist > 0 ) ? 0 : M - *IMB1 + mb * ( 1 - nblocks ) ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *MP = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( mydist > 0 ) ? ilocblk * mb : M - *IMB1 + mb * ( ilocblk - nblocks + 1 ) ) ); } } } /* * Finally figure out IMB1 and RPROW. Note that IMB1 can be zero when M = 0. */ *IMB1 = MIN( *IMB1, M ); if( ( *RPROW = MYROW - *PROW ) < 0 ) *RPROW += NPROW; } /* * Idem for the columns */ nb = DESC[ NB_ ]; csrc = DESC[ CSRC_ ]; if( ( csrc == -1 ) || ( NPCOL == 1 ) ) { *JJ = J; if( ( *INB1 = DESC[INB_] - J ) <= 0 ) *INB1 += ( ( -(*INB1) ) / nb + 1 ) * nb; *INB1 = MIN( *INB1, N ); *NQ = N; *PCOL = csrc; *RPCOL = 0; } else { *INB1 = DESC[INB_]; if( J < *INB1 ) { *PCOL = csrc; *JJ = ( ( MYCOL == *PCOL ) ? J : 0 ); *INB1 -= J; } else { j1 = J - *INB1; if( MYCOL == csrc ) { nblocks = j1 / nb + 1; *PCOL = csrc + nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( nblocks < NPCOL ) { *JJ = *INB1; } else { ilocblk = nblocks / NPCOL; if( ilocblk * NPCOL >= nblocks ) { *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb : *INB1 + ( ilocblk - 1 ) * nb ); } else { *JJ = *INB1 + ilocblk * nb; } } } else { nblocks = j1 / nb + 1; *PCOL = csrc + nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? j1 + ( 1 - nblocks ) * nb : 0 ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( MYCOL == *PCOL ) ? ( ilocblk - nblocks + 1 ) * nb + j1 : ilocblk * nb ) ); } } *INB1 = nblocks * nb - j1; } if( N <= *INB1 ) { *NQ = ( ( MYCOL == *PCOL ) ? N : 0 ); } else { nblocks = ( N - *INB1 ) / nb + 1; if( MYCOL == *PCOL ) { if( nblocks < NPCOL ) { *NQ = *INB1; } else { ilocblk = nblocks / NPCOL; *NQ = ( ( nblocks - ilocblk * NPCOL ) ? *INB1 + ilocblk * nb : N + ( ilocblk - nblocks ) * nb ); } } else { if( ( mydist = MYCOL - *PCOL ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *NQ = ( ( mydist < 0 ) ? nb : ( ( mydist > 0 ) ? 0 : N - *INB1 + nb * ( 1 - nblocks ) ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *NQ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( mydist > 0 ) ? ilocblk * nb : N - *INB1 + nb * ( ilocblk - nblocks + 1 ) ) ); } } } *INB1 = MIN( *INB1, N ); if( ( *RPCOL = MYCOL - *PCOL ) < 0 ) *RPCOL += NPCOL; } /* * End of PB_Cainfog2l */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMloc.c0000640000175000017500000006053610363532303017506 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_CVMloc( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS, char * UNPA, char * TRANS, int MN, int K, char * ALPHA, char * A, int LDA, char * BETA, char * B, int LDB ) #else int PB_CVMloc( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A, LDA, BETA, B, LDB ) /* * .. Scalar Arguments .. */ int K, LDA, LDB, MN; char * ALPHA, * BETA; /* * .. Array Arguments .. */ char * VROCS, * ROCS, * UNPA, * TRANS; PBTYP_T * TYPE; PB_VM_T * VM; char * A, * B; #endif { /* * Purpose * ======= * * PB_CVMloc packs a one-dimensional distributed array A into another * one-dimensional distributed array B, or unpacks a one-dimensional * distributed array B into a one-dimensional distributed array A. This * operation is triggered by a virtual distributed array. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * VROCS (local input) pointer to CHAR * On entry, VROCS specifies if the rows or columns of the vir- * tual distributed array grid should be used for the packing or * unpacking operation as follows: * VROCS = 'R' or 'r', the rows should be used, * VROCS = 'C' or 'c', the columns should be used. * * ROCS (local input) pointer to CHAR * On entry, ROCS specifies if rows or columns should be packed * or unpacked as follows: * ROCS = 'R' or 'r', rows should be (un)packed, * ROCS = 'C' or 'c', columns should be (un)packed. * * UNPA (local input) pointer to CHAR * On entry, UNPA specifies if the data should be packed or un- * packed as follows: * UNPA = 'P' or 'p', packing (A into B), * UNPA = 'U' or 'u', unpacking (B into A). * * TRANS (local input) pointer to CHAR * On entry, TRANS specifies if conjugation, transposition or * conjugate transposition should occur during the (un)packing * operation as follows: * TRANS = 'N' or 'n', natural (un)packing, * TRANS = 'Z' or 'z', conjugated (un)packing, * TRANS = 'T' or 'T', transposed (un)packing, * TRANS = 'C' or 'c', conjugate transposed (un)packing. * * MN (local input) INTEGER * On entry, MN specifies the number of rows or columns to be * (un)packed. MN must be at least zero. * * K (local input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension to be (un)packed. K must be at least zero. * * ALPHA (local input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) pointer to CHAR * On entry, A points to an array of dimension (LDA, Ka), where * Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c', * Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and * when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This * array contains unpacked data. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and * MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and * VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB ) * when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'. * * BETA (local input) pointer to CHAR * On entry, BETA specifies the scalar beta. * * B (local input/local output) pointer to CHAR * On entry, B points to an array of dimension (LDB, Kb). When * TRANS is 'N', 'n', 'Z' or 'z', Kb is K when ROCS is 'R'or * 'r', and when ROCS is 'C' or 'c', Kb is IMBLOC+(MBLKS-2)*MB+ * LMB when VROCS is 'C' or 'c' and when VROCS is 'R', Kb is * INBLOC+(NBLKS-2)*NB+LNB. When TRANS is 'T', 't', 'C' or 'c', * Kb is K when ROCS is 'C' or 'c' and when ROCS is 'R' or 'r', * Kb is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'C' or 'c' and * when VROCS is 'R' or 'r', Kb is INBLOC+(NBLKS-2)*NB+LNB. This * array contains unpacked data. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb, lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc, * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0, nprow, pmb, qnb, rows, size, tmp1, tmp2, upp; MMADD_T add; char * aptrd, * bptrd; /* .. * .. Executable Statements .. * */ mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks. */ if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 ); /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp; iupp = VM->iupp; nprow = VM->nprow; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low; ilow = VM->ilow; npcol = VM->npcol; if( Mupcase( UNPA[0] ) == CPACKING ) { /* * B is the distributed target, A is the distributed source */ if( Mupcase( TRANS[0] ) == CNOTRAN ) { /* * Add A to B */ notran = 1; add = TYPE->Fmmadd; } else if( Mupcase( TRANS[0] ) == CCONJG ) { /* * Add the conjugate of A to B */ notran = 1; add = TYPE->Fmmcadd; } else if( Mupcase( TRANS[0] ) == CTRAN ) { /* * Add the tranpose of A to B */ notran = 0; add = TYPE->Fmmtadd; } else { /* * Add the conjugate tranpose of A to B */ notran = 0; add = TYPE->Fmmtcadd; } } else { /* * A is the distributed target, B is the distributed source */ if( Mupcase( TRANS[0] ) == CNOTRAN ) { /* * Add B to A */ notran = 1; add = TYPE->Fmmdda; } else if( Mupcase( TRANS[0] ) == CCONJG ) { /* * Add the conjugate of B to A */ notran = 1; add = TYPE->Fmmddac; } else if( Mupcase( TRANS[0] ) == CTRAN ) { /* * Add the tranpose of B to A */ notran = 0; add = TYPE->Fmmddat; } else { /* * Add the conjugate tranpose of B to A */ notran = 0; add = TYPE->Fmmddact; } } size = TYPE->size; rows = ( Mupcase( ROCS[0] ) == CROW ); if( Mupcase( VROCS[0] ) == CROW ) { /* * (un)packing using rows of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A. */ inca = size; incb = ( notran ? size : LDB * size ); m = &tmp2; n = &K; } else { /* * (un)packing columns of k by mn array A */ inca = LDA * size; incb = ( notran ? LDB * size : size ); m = &K; n = &tmp2; } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( mblks < 2 ) ? imbloc : imbloc + ( mblks - 2 ) * mb + lmbloc ); npq = MIN( npq, kb ); if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB ); else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B-lcmt00*incb, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to A. The pointer to B remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to A accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; A += mb * inca; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; aptrd = A; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; aptrd += mbloc * inca; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to B. The pointer to A remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * B accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; B += nb * incb; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; bptrd = B; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, bptrd, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, bptrd-lcmt*incb, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; bptrd += nbloc * incb; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; A += mb * inca; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; B += nb * incb; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; aptrd = A; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; aptrd += mbloc * inca; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; B += nbloc * incb; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } else { /* * (un)packing using columns of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A */ inca = size; incb = ( notran ? size : LDB * size ); m = &tmp2; n = &K; } else { /* * (un)packing columns of k by mn array A */ inca = LDA * size; incb = ( notran ? LDB * size : size ); m = &K; n = &tmp2; } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( nblks < 2 ) ? inbloc : inbloc + ( nblks - 2 ) * nb + lnbloc ); npq = MIN( npq, kb ); if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB ); else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B+lcmt00*incb, &LDB ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to B. The pointer to A remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to B accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; B += mb * incb; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; bptrd = B; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; bptrd += mbloc * incb; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to A. The pointer to B remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * A accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; A += nb * inca; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; aptrd = A; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B+lcmt*incb, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; aptrd += nbloc * inca; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; B += mb * incb; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; A += nb * inca; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; bptrd = B; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; bptrd += mbloc * incb; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; A += nbloc * inca; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } /* * End of PB_CVMloc */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMnpq.c0000640000175000017500000002506510363532303017525 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_CVMnpq( PB_VM_T * VM ) #else int PB_CVMnpq( VM ) /* * .. Array Arguments .. */ PB_VM_T * VM; #endif { /* * Purpose * ======= * * PB_CVMnpq computes the number of diagonal entries in the virtual ma- * specified by VM. * * Arguments * ========= * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, Pmb, Qnb, gcdb, ilow, imbloc, inbloc, iupp, kmax, kmin, k1, k2, k3, lcmb, lcmp, lcmq, lcmt, lcmt00, lmbloc, lnbloc, low, l1, l2, l3, m, mb, mblkd, mblks, mbloc, n, nb, nblkd, nblks, nbloc, nlcmblks, npcol, npq=0, nprow, tmp1, tmp2, upp; /* .. * .. Executable Statements .. * */ m = VM->mp; n = VM->nq; /* * Quick return if I don't own any data. */ if( ( m == 0 ) || ( n == 0 ) ) return( 0 ); /* * The only valuable shortcut is when the virtual grid and the blocks are * square, and the offset is zero or the grid is 1x1. */ mb = VM->mb; nprow = VM->nprow; nb = VM->nb; npcol = VM->npcol; if( ( ( VM->offd == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) return( MIN( m, n ) ); else return( 0 ); } /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; mblks = VM->mblks; imbloc = VM->imbloc; lmbloc = VM->lmbloc; iupp = VM->iupp; upp = VM->upp; nblks = VM->nblks; inbloc = VM->inbloc; lnbloc = VM->lnbloc; ilow = VM->ilow; low = VM->low; lcmb = VM->lcmb; Pmb = nprow * mb; Qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ npq += ( lcmt00 >= 0 ? ( ( tmp2 = ( tmp1 = imbloc - lcmt00 ) > 0 ? tmp1 : 0 ) < inbloc ? tmp2 : inbloc ) : ( ( tmp2 = ( tmp1 = inbloc + lcmt00 ) > 0 ? tmp1 : 0 ) > imbloc ? imbloc : tmp2 ) ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + Pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value. */ lcmt00 -= iupp - upp + Pmb; mblks--; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= Pmb; mblks--; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; mbloc = mb; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; npq += ( lcmt >= 0 ? ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < inbloc ? tmp2 : inbloc ) : ( ( tmp2 = ( tmp1 = inbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ? mbloc : tmp2 ) ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt00 = lcmt; lcmt -= Pmb; mblks = mblkd--; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + Qnb; nblks--; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value. */ lcmt00 += low - ilow + Qnb; nblks--; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += Qnb; nblks--; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; nbloc = nb; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; npq += ( lcmt >= 0 ? ( ( tmp2 = ( tmp1 = imbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ? tmp2 : nbloc ) : ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > imbloc ? imbloc : tmp2 ) ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt00 = lcmt; lcmt += Qnb; nblks = nblkd--; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + Pmb; mblks--; } /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( lcmt00 < low || lcmt00 > upp ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= Pmb; mblks--; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += Qnb; nblks--; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * Figure out how many "full" lcm blocks are remaining. */ gcdb = ( Pmb * Qnb ) / lcmb; if( lcmt00 > 0 ) { kmin = - ( lcmb / gcdb ); kmax = ( lcmb - Qnb ) / gcdb; tmp1 = ( mblks - 1 ) / ( lcmp = lcmb / Pmb ); tmp2 = nblks / ( lcmq = lcmb / Qnb ); } else if( lcmt00 < 0 ) { kmin = - ( ( lcmb - Pmb ) / gcdb ); kmax = lcmb / gcdb; tmp1 = mblks / ( lcmp = lcmb / Pmb ); tmp2 = ( nblks - 1 ) / ( lcmq = lcmb / Qnb ); } else { kmin = - ( ( lcmb - Pmb ) / gcdb ); kmax = ( lcmb - Qnb ) / gcdb; tmp1 = mblks / ( lcmp = lcmb / Pmb ); tmp2 = nblks / ( lcmq = lcmb / Qnb ); } /* * The last block, even if it is an lcm block will be handled separately */ nlcmblks = MIN( tmp1, tmp2 ); if( nlcmblks ) nlcmblks--; /* * Compute the lcm block part, update mblks and nblks */ if( nlcmblks ) { tmp2 = 0; k1 = -lcmt00; k1 = ICEIL( k1, gcdb ); l1 = k1 - 1; l1 = MIN( l1, kmax ); k1 = MAX( k1, kmin ); k3 = upp - lcmt00; k3 = FLOOR( k3, gcdb ); k3 = MIN( k3, kmax ); l3 = low - lcmt00; l3 = ICEIL( l3, gcdb ); l3 = MAX( l3, kmin ); if( k1 <= k3 ) { k2 = mb - nb - lcmt00; k2 = ICEIL( k2, gcdb ); if( k2 < k1 ) { /* * k2 < k1 */ tmp1 = k3 - k1 + 1; tmp2 = tmp1 * ( mb - lcmt00 ); tmp1 *= ( k3 + k1 )*gcdb; tmp2 += ( tmp1 > 0 ? -( tmp1 / 2 ) : (-tmp1) / 2 ); } else if( k2 > k3 ) { /* * k2 = k3 + 1 */ tmp2 = ( k3 - k1 + 1 ) * nb; } else { /* * k1 <= k2 <= k3 */ tmp1 = k3 - k2 + 1; tmp2 = ( k2 - k1 ) * nb + tmp1 * ( mb - lcmt00 ); tmp1 *= ( k3 + k2 ) * gcdb; tmp2 += ( tmp1 > 0 ? -( tmp1 / 2 ) : (-tmp1) / 2 ); } } if( l3 <= l1 ) { l2 = mb - nb - lcmt00; l2 = FLOOR( l2, gcdb ); if( l2 > l1 ) { /* * l2 > l1 */ tmp1 = l1 - l3 + 1; tmp2 += tmp1 * ( nb + lcmt00 ); tmp1 *= ( l3 + l1 ) * gcdb; tmp2 += ( tmp1 > 0 ? ( tmp1 / 2 ) : -( (-tmp1) / 2 ) ); } else if( l2 < l3 ) { /* * l2 = l3 - 1 */ tmp2 += ( l1 - l3 + 1 ) * mb; } else { /* * l3 <= l2 <= l1 */ tmp1 = l2 - l3 + 1; tmp2 += ( l1 - l2 ) * mb + tmp1 * ( nb + lcmt00 ); tmp1 *= ( l3 + l2 ) * gcdb; tmp2 += ( tmp1 > 0 ? ( tmp1 / 2 ) : -( (-tmp1) / 2 ) ); } } npq += nlcmblks * tmp2; mblks -= nlcmblks * lcmp; nblks -= nlcmblks * lcmq; } /* * Handle last partial (lcm) block separately */ nbloc = nb; while( nblks ) { /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ if( nblks == 1 ) nbloc = lnbloc; while( mblks && lcmt00 > upp ) { lcmt00 -= Pmb; mblks--; } if( mblks <= 0 ) return( npq ); lcmt = lcmt00; mblkd = mblks; mbloc = mb; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; npq += ( lcmt >= 0 ? ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ? tmp2 : nbloc ) : ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ? mbloc : tmp2 ) ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt00 = lcmt; lcmt -= Pmb; mblks = mblkd--; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += Qnb; nblks--; /* * ... until there are no more columns. */ } /* * Return the number of diagonals found. */ return( npq ); /* * End of PB_CVMnpq */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cptradd.c0000640000175000017500000004672210363532303017745 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cptradd( PBTYP_T * TYPE, char * DIRECAC, char * UPLO, char * TRANS, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_Cptradd( TYPE, DIRECAC, UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * DIRECAC, * TRANS, * UPLO; int IA, IC, JA, JC, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCC; char * A, * C; #endif { /* * Purpose * ======= * * PB_Cptradd adds a trapezoidal matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C', * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * upper or lower trapezoidal submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECAC (global input) pointer to CHAR * On entry, DIRECAC specifies the direction in which the rows * or columns of sub( A ) and sub( C ) should be looped over as * follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of the * array C containing the upper or lower triangular part of the * triangular submatrix sub( C ) is to be referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * triangular submatrix sub( C ) is to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * triangular submatrix sub( C ) is to be * referenced. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrices * sub( A ) and sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatri- * ces sub( A ) and sub( C ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Dir, * one, * zero; int Afr, conjg, k, kb, kbb, kend, kstart, kstep, ktmp; /* * .. Local Arrays .. */ int DBUFA[DLEN_]; char * Aptr = NULL; /* .. * .. Executable Statements .. * */ /* * sub( C ) := beta * sub( C ) */ PB_Cplascal( TYPE, UPLO, NOCONJG, M, N, BETA, C, IC, JC, DESCC ); one = TYPE->one; zero = TYPE->zero; kb = pilaenv_( &DESCC[CTXT_], C2F_CHAR( &TYPE->type ) ); if( Mupcase( DIRECAC[0] ) == CFORWARD ) { Dir = CFORWARD; kstart = 0; kend = ( ( MIN( M, N ) - 1 ) / kb + 1 ) * kb; kstep = kb; } else { Dir = CBACKWARD; kstart = ( ( MIN( M, N ) - 1 ) / kb ) * kb; kend = kstep = -kb; } if( Mupcase( TRANS[0] ) == CNOTRAN ) { if( Mupcase( UPLO [0] ) == CUPPER ) { if( M >= N ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Scale A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) by ALPHA */ PB_Cplascal( TYPE, ALL, NOCONJG, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero lower triangle of A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, k+1, 0, DBUFA ); /* * C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) += A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CScatterV( TYPE, &Dir, ktmp, kbb, Aptr, 0, 0, DBUFA, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( Afr ) free( Aptr ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = N - k; /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Scale A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) by ALPHA */ PB_Cplascal( TYPE, ALL, NOCONJG, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero lower triangle of A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 1, 0, DBUFA ); /* * C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 ) += A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CScatterV( TYPE, &Dir, kbb, ktmp, Aptr, 0, 0, DBUFA, ROW, one, C, IC+k, JC+k, DESCC, ROW ); if( Afr ) free( Aptr ); } } } else { if( M >= N ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = M - k; /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Scale A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) by ALPHA */ PB_Cplascal( TYPE, ALL, NOCONJG, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero upper triangle of A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 0, 1, DBUFA ); /* * C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) += A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CScatterV( TYPE, &Dir, ktmp, kbb, Aptr, 0, 0, DBUFA, COLUMN, one, C, IC+k, JC+k, DESCC, COLUMN ); if( Afr ) free( Aptr ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Scale A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) by ALPHA */ PB_Cplascal( TYPE, ALL, NOCONJG, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero upper triangle of A( IA+k:IA+k+kbb-1, JA+k:JA:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 0, k+1, DBUFA ); /* * C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 ) += A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CScatterV( TYPE, &Dir, kbb, ktmp, Aptr, 0, 0, DBUFA, ROW, one, C, IC+k, JC, DESCC, ROW ); if( Afr ) free( Aptr ); } } } } else { conjg = ( Mupcase( TRANS[0] ) == CCOTRAN ); if( Mupcase( UPLO [0] ) == CUPPER ) { if( M >= N ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Scale A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) by ALPHA */ if( conjg ) PB_Cplacnjg( TYPE, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); else PB_Cplascal( TYPE, ALL, NOCONJG, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero upper triangle of A( IA+k:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 0, k+1, DBUFA ); /* * C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) += A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 )' */ PB_CScatterV( TYPE, &Dir, kbb, ktmp, Aptr, 0, 0, DBUFA, ROW, one, C, IC, JC+k, DESCC, COLUMN ); if( Afr ) free( Aptr ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = N - k; /* * Accumulate A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Scale A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 ) by ALPHA */ if( conjg ) PB_Cplacnjg( TYPE, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); else PB_Cplascal( TYPE, ALL, NOCONJG, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero upper triangle of A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 0, 1, DBUFA ); /* * C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 ) += A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 )' */ PB_CScatterV( TYPE, &Dir, ktmp, kbb, Aptr, 0, 0, DBUFA, COLUMN, one, C, IC+k, JC+k, DESCC, ROW ); if( Afr ) free( Aptr ); } } } else { if( M >= N ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = M - k; /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Scale A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 ) by ALPHA */ if( conjg ) PB_Cplacnjg( TYPE, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); else PB_Cplascal( TYPE, ALL, NOCONJG, kbb, ktmp, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero lower triangle of A( IA+k:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, 1, 0, DBUFA ); /* * C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) += A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 )' */ PB_CScatterV( TYPE, &Dir, kbb, ktmp, Aptr, 0, 0, DBUFA, ROW, one, C, IC+k, JC+k, DESCC, COLUMN ); if( Afr ) free( Aptr ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, &Dir, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Scale A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) by ALPHA */ if( conjg ) PB_Cplacnjg( TYPE, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); else PB_Cplascal( TYPE, ALL, NOCONJG, ktmp, kbb, ALPHA, Aptr, 0, 0, DBUFA ); /* * Zero lower triangle of A( IA+k:IA+k+kbb-1, JA+k:JA:JA+k+kbb-1 ) */ if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, Aptr, k+1, 0, DBUFA ); /* * C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 ) += A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 )' */ PB_CScatterV( TYPE, &Dir, ktmp, kbb, Aptr, 0, 0, DBUFA, COLUMN, one, C, IC+k, JC, DESCC, ROW ); if( Afr ) free( Aptr ); } } } } /* * End of PB_Cptradd */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CInOutV.c0000640000175000017500000007647110363532303017657 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CInOutV( PBTYP_T * TYPE, char * ROWCOL, int M, int N, int * DESCA, int K, char * BETA, char * Y, int IY, int JY, int * DESCY, char * YROC, char * * TBETA, char * * YAPTR, int * DYA, int * YAFREE, int * YASUM, int * YAPBY ) #else void PB_CInOutV( TYPE, ROWCOL, M, N, DESCA, K, BETA, Y, IY, JY, DESCY, YROC, TBETA, YAPTR, DYA, YAFREE, YASUM, YAPBY ) /* * .. Scalar Arguments .. */ char * BETA, * ROWCOL, * * TBETA, * YROC; int * YAPBY, * YAFREE, IY, JY, K, M, N, * YASUM; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCY, * DYA; char * Y, * * YAPTR; #endif { /* * Purpose * ======= * * PB_CInOutV returns a pointer to an array that contains a one-dimen- * sional input/output subvector which is replicated over the rows or * columns of a submatrix described by DESCA. A subvector is specified * on input to this routine that is reused whenever possible. On return, * the subvector is specified by a pointer to some data, a descriptor * array describing its layout, a logical value indicating if this local * piece of data has been dynamically allocated by this function, a lo- * gical value specifying if sum reduction should occur, and finally a * logical value specifying if it is necessary to copy back the alloca- * ted data to the original data. This routine is specifically designed * for traditional Level 2 like PBLAS operations using an input/output * vector such as PxGEMV, PxSYMV ... * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * ROWCOL (global input) pointer to CHAR * On entry, ROWCOL specifies if this routine should return a * row or column subvector replicated over the underlying subma- * trix as follows: * = 'R' or 'r': A row subvector is returned, * = 'C' or 'c': A column subvector is returned. * * M (global input) INTEGER * On entry, M specifies the number of rows of the underlying * submatrix described by DESCA. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the underlying * submatrix described by DESCA. N must be at least zero. * * DESCA (global and local input/output) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. EXCEPTIONALLY, THIS * INTERNAL ROUTINE MAY MODIFY DESCA IN ORDER TO MINIMIZE THE * AMOUNT OF DATA TO BE MOVED FOR THE VECTOR Y. SEE PxGEMV FOR * AN EXAMPLE. * * K (global input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension of the subvector sub( Y ). K must be at least zero. * * BETA (global input) pointer to CHAR * On entry, BETA is a scalar the input subvector sub( Y ) must * be scaled by. * * Y (local input) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( K, IY ) ) when YROC is 'R' or 'r' * and MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at * least Lc( 1, JY+Ly-1 ) when YROC is 'R' or 'r' and * Lc( K, JY ) otherwise. Ly is N when ROWCOL is 'R' or 'r' and * M otherwise. Before entry, this array contains the local * entries of the matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * YROC (global input) pointer to CHAR * On entry, YROC specifies the orientation of the subvector * sub( Y ). When YROC is 'R' or 'r', sub( Y ) is a row vector, * and a column vector otherwise. * * TBETA (local output) pointer to pointer to CHAR * On exit, * TBETA is a scalar to be used locally to scale the * data pointed to by * YAPTR, in order to obtain the correct * result in the original data sub( Y ). * * YAPTR (local output) pointer to pointer to CHAR * On exit, * YAPTR is an array containing the same data as the * subvector sub( Y ) which is replicated over the rows or co- * lumns of the underlying matrix as specified by ROWCOL and * DESCA. * * DYA (global and local output) INTEGER array * On exit, DYA is a descriptor array of dimension DLEN_ descri- * bing the data layout of the data pointed to by * YAPTR. * * YAFREE (local output) INTEGER * On exit, YAFREE specifies if it was possible to reuse the * subvector sub( Y ), i.e., if some dynamic memory was alloca- * ted for the data pointed to by * YAPTR or not. When YAFREE is * zero, no dynamic memory was allocated. Otherwise, some dyna- * mic memory was allocated by this function that one MUST re- * lease as soon as possible. * * YASUM (global output) INTEGER * On exit, YASUM specifies if a global sum reduction should be * performed to obtain the correct sub( Y ). When YASUM is zero, * no reduction is to be performed, otherwise reduction should * occur. * * YAPBY (global output) INTEGER * On exit, YAPBY specifies if the data pointed to by * YAPTR * must be move back onto sub( Y ) to obtain the correct result. * When YAPBY is zero, no supplementary data movement is neces- * sary, otherwise a data redistribution should occur. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Aimb, Ainb, AisD, AisR, Amb, Amp, Anb, Anq, Arow, Ycol, Yii, Yimb, Yimb1, Yinb, Yinb1, YisD, YisR, YisRow, Yjj, Yld, Ymb, Ymp, Ynb, Ynq, Yrow, ctxt, izero=0, nprow, myrow, npcol, mycol; /* .. * .. Executable Statements .. * */ /* * Initialize the output parameters to a default value */ *YAFREE = 0; *YASUM = 0; *YAPBY = 0; *YAPTR = NULL; *TBETA = BETA; /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) { if( Mupcase( ROWCOL[0] ) == CROW ) { PB_Cdescset( DYA, K, N, 1, DESCA[INB_], 1, DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 ); } else { PB_Cdescset( DYA, M, K, DESCA[IMB_], 1, DESCA[MB_], 1, DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], DESCA[LLD_] ); } return; } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCY[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ Minfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, Yii, Yjj, Yrow, Ycol ); /* * Is sub( Y ) distributed or not, replicated or not ? */ if( ( YisRow = ( Mupcase( YROC[0] ) == CROW ) ) != 0 ) { YisD = ( ( Ycol >= 0 ) && ( npcol > 1 ) ); YisR = ( ( Yrow == -1 ) || ( nprow == 1 ) ); } else { YisD = ( ( Yrow >= 0 ) && ( nprow > 1 ) ); YisR = ( ( Ycol == -1 ) || ( npcol == 1 ) ); } Aimb = DESCA[IMB_ ]; Ainb = DESCA[INB_ ]; Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Arow = DESCA[RSRC_]; Acol = DESCA[CSRC_]; if( Mupcase( ROWCOL[0] ) == CROW ) { /* * Want a row vector */ AisR = ( ( Arow < 0 ) || ( nprow == 1 ) ); if( YisRow ) { /* * It is possible to reuse sub( Y ) iff sub( Y ) is already a row vector. */ AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) ); Yinb = DESCY[INB_]; Ynb = DESCY[NB_]; Mfirstnb( Yinb1, N, JY, Yinb, Ynb ); /* * sub( Y ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * column and either N is smaller than the first blocksize of sub( Y ) and A, * or their column blocking factors match. */ if( ( !AisD && !YisD ) || ( ( AisD && YisD ) && ( ( Acol == Ycol ) && ( ( ( Ainb >= N ) && ( Yinb1 >= N ) ) || ( ( Ainb == Yinb1 ) && ( Anb == Ynb ) ) ) ) ) ) { Mnumroc( Ynq, N, 0, Yinb1, Ynb, mycol, Ycol, npcol ); Ymp = ( YisR ? K : ( ( myrow == Yrow ) ? K : 0 ) ); if( YisR ) { /* * If sub( Y ) is replicated, there is no need to move sub( Y ) after the * operation (*YAPBY = 0), and it can be reused. */ *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( Ynq > 0 ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( AisR ) { /* * If A is replicated as well, use BETA in every process row, and do not combine * the local results. */ *TBETA = BETA; *YASUM = 0; } else { /* * Otherwise, use BETA in process row Arow and zero elsewhere. Reduce the local * result if there is more than one row in the process grid. */ *TBETA = ( ( myrow == Arow ) ? BETA : TYPE->zero ); *YASUM = ( nprow > 1 ); /* * If some process rows do not own any entries of A, better set sub( Y ) to zero * in those processes. */ Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow ); if( Amp <= 0 ) TYPE->Ftzscal( C2F_CHAR( ALL ), &K, &Ynq, &izero, *TBETA, *YAPTR, &Yld ); } } else { /* * sub( Y ) is not replicated, the descriptor of A may need to be modified ... */ if( AisR ) { /* * If A is replicated, use only the copy in the process row where sub( Y ) * resides -> modify DESCA !!! */ *TBETA = BETA; *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; DESCA[ IMB_ ] = M; DESCA[ RSRC_ ] = Yrow; if( ( Ynq > 0 ) && ( Ymp > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { if( Mspan( M, 0, Aimb, Amb, Arow, nprow ) ) { /* * Otherwise, A is not replicated, let assume in addition that it spans more * than one process row */ *YASUM = ( nprow > 1 ); *YAPBY = 0; if( myrow == Yrow ) { /* * Reuse sub( Y ). If there is no entries of A in the process row where sub( Y ) * resides, better scale it by BETA immediately. */ *TBETA = BETA; Yld = DESCY[ LLD_ ]; if( Ynq > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow ); if( Amp <= 0 ) TYPE->Ftzscal( C2F_CHAR( ALL ), &K, &Ynq, &izero, *TBETA, *YAPTR, &Yld ); } } else { /* * Allocate space in the other process rows and initialize to zero. */ *TBETA = TYPE->zero; Yld = MAX( 1, K ); if( Ynq > 0 ) { *YAPTR = PB_Cmalloc( K * Ynq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Ynq, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } } else { /* * A spans only one process row */ if( Yrow == Arow ) { /* * A and sub( Y ) resides in the same process row */ *TBETA = BETA; *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( ( myrow == Yrow ) && ( Ynq > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { /* * If sub( Y ) resides in another process row, then allocate zero-data in * process row where A resides, and set *YAPBY to 1, so that this data will be * added (moved) after the local operation has been performed. */ *TBETA = TYPE->zero; *YASUM = 0; *YAPBY = 1; Yrow = Arow; Yld = MAX( 1, K ); if( myrow == Arow ) { if( Ynq > 0 ) { *YAPTR = PB_Cmalloc( K * Ynq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Ynq, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } } } } } /* * Describe the resulting operand. Note that when reduction should occur, Yrow * contains the destination row. Assuming every process row needs the result, * Yrow is then -1. */ MDescSet( DYA, K, N, K, Yinb1, 1, Ynb, Yrow, Ycol, ctxt, Yld ); return; } } /* * sub( Y ) cannot be reused, set TBETA to zero for the local operation, and * force YAPBY to 1 for the later update of sub( Y ). */ *TBETA = TYPE->zero; *YAPBY = 1; Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol ); Yld = MAX( 1, K ); if( YisR ) { /* * If sub( Y ) is replicated, allocate space in every process row owning some * columns of A and initialize it to zero. There may be some wasted space * (suppose A was residing in just one row), however, it is hoped that moving * back this data to sub( Y ) will then be cheaper ... */ *YASUM = ( AisR ? 0 : ( nprow > 1 ) ); Arow = -1; if( Anq > 0 ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } else { /* * sub( Y ) resides within only one process row */ if( AisR ) { /* * If A is replicated, then modify sub( A ) so that only one process row will * compute the result before moving it back to sub( Y ). */ *YASUM = 0; DESCA[ IMB_ ] = M; if( YisRow ) { /* * Choose different process row than Yrow for better performance (more links) * of the later move-back phase. */ DESCA[RSRC_] = Arow = MModSub1( Yrow, nprow ); } else { DESCA[RSRC_] = Arow = 0; } if( ( myrow == Arow ) && ( Anq > 0 ) ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } else { if( Mspan( M, 0, Aimb, Amb, Arow, nprow ) ) { /* * If A is not replicated, and spans more than just one process row, then * allocate space in every process row and zero it. */ *YASUM = ( nprow > 1 ); if( Anq > 0 ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, *TBETA, *TBETA, *YAPTR, &K ); } } else { /* * If A is not replicated, and spans only one process row, then allocate space * within that process row and zero it. */ *YASUM = 0; if( ( myrow == Arow ) && ( Anq > 0 ) ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, *TBETA, *TBETA, *YAPTR, &K ); } } } } /* * Describe the resulting operand. Note that when reduction should occur, Arow * contains the destination row. Assuming every process row needs the result, * Arow is then -1. */ MDescSet( DYA, K, N, K, Ainb, 1, Anb, Arow, Acol, ctxt, Yld ); } else { /* * Want a column vector */ AisR = ( ( Acol < 0 ) || ( npcol == 1 ) ); if( !YisRow ) { /* * It is possible to reuse sub( Y ) iff sub( Y ) is already a column vector. */ AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) ); Yimb = DESCY[IMB_]; Ymb = DESCY[MB_]; Mfirstnb( Yimb1, M, IY, Yimb, Ymb ); /* * sub( Y ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * row and either M is smaller than the first blocksize of sub( Y ) and A, or * their row blocking factors match. */ if( ( !AisD && !YisD ) || ( ( AisD && YisD ) && ( ( Arow == Yrow ) && ( ( ( Aimb >= M ) && ( Yimb1 >= M ) ) || ( ( Aimb == Yimb1 ) && ( Amb == Ymb ) ) ) ) ) ) { Mnumroc( Ymp, M, 0, Yimb1, Ymb, myrow, Yrow, nprow ); Ynq = ( YisR ? K : ( ( mycol == Ycol ) ? K : 0 ) ); if( YisR ) { /* * If sub( Y ) is replicated, there is no need to move sub( Y ) after the * operation (*YAPBY = 0), and it can be reused. */ *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( Ymp > 0 ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( AisR ) { /* * If A is replicated as well, use BETA in every process column, and do not * combine the local results. */ *TBETA = BETA; *YASUM = 0; } else { /* * Otherwise, use BETA in process column Acol and zero elsewhere. Reduce the * local result if there is more than one column in the process grid. */ *TBETA = ( ( mycol == Acol ) ? BETA : TYPE->zero ); *YASUM = ( npcol > 1 ); /* * If some process columns do not own any entries of A, better set sub( Y ) to * zero in those processes. */ Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol ); if( Anq <= 0 ) TYPE->Ftzscal( C2F_CHAR( ALL ), &Ymp, &K, &izero, *TBETA, *YAPTR, &Yld ); } } else { /* * sub( Y ) is not replicated, the descriptor of A may need to be modified ... */ if( AisR ) { /* * If A is replicated, use only the copy in the process column where sub( Y ) * resides -> modify DESCA !!! */ *TBETA = BETA; *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; DESCA[ INB_ ] = N; DESCA[ CSRC_ ] = Ycol; if( ( Ymp > 0 ) && ( Ynq > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { if( Mspan( N, 0, Ainb, Anb, Acol, npcol ) ) { /* * Otherwise, A is not replicated, let assume in addition that it spans more * than one process column */ *YASUM = ( npcol > 1 ); *YAPBY = 0; if( mycol == Ycol ) { /* * Reuse sub( Y ). If there is no entries of A in the process column where * sub( Y ) resides, better scale it by BETA immediately. */ *TBETA = BETA; Yld = DESCY[ LLD_ ]; if( Ymp > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol ); if( Anq <= 0 ) TYPE->Ftzscal( C2F_CHAR( ALL ), &Ymp, &K, &izero, *TBETA, *YAPTR, &Yld ); } } else { /* * Allocate space in the other process columns and initialize to zero. */ *TBETA = TYPE->zero; Yld = MAX( 1, Ymp ); if( Ymp > 0 ) { *YAPTR = PB_Cmalloc( Ymp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ymp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } } else { /* * A spans only one process column */ if( Ycol == Acol ) { /* * A and sub( Y ) resides in the same process column */ *TBETA = BETA; *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( ( mycol == Ycol ) && ( Ymp > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { /* * If sub( Y ) resides in another process column, then allocate zero-data in * process column where A resides, and set *YAPBY to 1, so that this data will * be added (moved) after the local operation has been performed. */ *TBETA = TYPE->zero; *YASUM = 0; *YAPBY = 1; Ycol = Acol; Yld = MAX( 1, Ymp ) ; if( mycol == Acol ) { if( Ymp > 0 ) { *YAPTR = PB_Cmalloc( Ymp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ymp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } } } } } /* * Describe the resulting operand. Note that when reduction should occur, Ycol * contains the destination column. Assuming every process column needs the * result, Ycol is then -1. */ MDescSet( DYA, M, K, Yimb1, K, Ymb, 1, Yrow, Ycol, ctxt, Yld ); return; } } /* * sub( Y ) cannot be reused, set TBETA to zero for the local operation, and * force YAPBY to 1 for the later update of sub( Y ). */ *TBETA = TYPE->zero; *YAPBY = 1; Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow ); Yld = MAX( 1, Amp ); if( YisR ) { /* * If sub( Y ) is replicated, allocate space in every process column owning some * rows of A and initialize it to zero. There may be some wasted space (suppose * A was residing in just one column), however, it is hoped that moving back * this data to sub( Y ) will then be cheaper ... */ *YASUM = ( AisR ? 0 : ( npcol > 1 ) ); Acol = -1; if( Amp > 0 ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } else { /* * sub( Y ) resides within only one process column */ if( AisR ) { /* * If A is replicated, then modify sub( A ) so that only one process column will * compute the result before moving it back to sub( Y ). */ *YASUM = 0; DESCA[ INB_ ] = N; if( YisRow ) { DESCA[ CSRC_ ] = Acol = 0; } else { /* * Choose different process column than Ycol for better performance (more links) * of the later move-back phase. */ DESCA[ CSRC_ ] = Acol = MModSub1( Ycol, npcol ); } if( ( mycol == Acol ) && ( Amp > 0 ) ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } else { if( Mspan( N, 0, Ainb, Anb, Acol, npcol ) ) { /* * If A is not replicated, and spans more than just one process column, then * allocate space in every process column and zero it. */ *YASUM = ( npcol > 1 ); if( Amp > 0 ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } else { /* * If A is not replicated, and spans only one process column, then allocate * space within that process column and zero it. */ *YASUM = 0; if( ( mycol == Acol ) && ( Amp > 0 ) ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, *TBETA, *TBETA, *YAPTR, &Yld ); } } } } /* * Describe the resulting operand. Note that when reduction should occur, Acol * contains the destination column. Assuming every process column needs the * result, Acol is then -1. */ MDescSet( DYA, M, K, Aimb, K, Amb, 1, Arow, Acol, ctxt, Yld ); } /* * End of PB_CInOutV */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzhemm.c0000640000175000017500000003315410363532303017766 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzhemm( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * BC, int LDBC, char * BR, int LDBR, char * CC, int LDCC, char * CR, int LDCR ) #else void PB_Ctzhemm( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, BC, LDBC, BR, LDBR, CC, LDCC, CR, LDCR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDBC, LDBR, LDCC, LDCR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * BC, * BR, * CC, * CR; #endif { /* * Purpose * ======= * * PB_Ctzhemm performs the matrix-matrix operation * * C := alpha * A * B + C, * * or * * C := alpha * B * A + C, * * where alpha is a scalar, B and C are m by k and k by n matrices and A * is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (input) pointer to CHAR * On entry, SIDE specifies whether op( A ) multiplies B from * the left or right as follows: * * SIDE = 'L' or 'l' C := alpha * A * B + C, * * SIDE = 'R' or 'r' C := alpha * B * A + C. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of rows of the matrices BR * and CR and the number of columns of the matrices BC and CC. K * must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BC (input) pointer to CHAR * On entry, BC is an array of dimension (LDBC,K) containing the * m by k matrix BC. * * LDBC (input) INTEGER * On entry, LDBC specifies the leading dimension of the array * BC. LDBC must be at least max( 1, M ). * * BR (input) pointer to CHAR * On entry, BR is an array of dimension (LDBR,N) containing the * k by n matrix BR. * * LDBR (input) INTEGER * On entry, LDBR specifies the leading dimension of the array * BR. LDBR must be at least K. * * CC (input/output) pointer to CHAR * On entry, CC is an array of dimension (LDCC,K) containing the * m by k matrix CC. On exit, CC is overwritten by the partially * updated matric CC. * * LDCC (input) INTEGER * On entry, LDCC specifies the leading dimension of the array * CC. LDCC must be at least max( 1, M ). * * CR (input/output) pointer to CHAR * On entry, CR is an array of dimension (LDCR,N) containing the * k by n matrix CR. On exit, CR is overwritten by the partially * updated matrix CR. * * LDCR (input) INTEGER * On entry, LDCR specifies the leading dimension of the array * CR. LDCR must be at least K. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Calph, * one, type; int i1, j1, m1, mn, n1, size; cmplx Calph8; cmplx16 Calph16; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; type = TYPE->type; if( type == SCPLX ) { Calph = ( (char *)(Calph8 ) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else if( type == DCPLX ) { Calph = ( (char *)(Calph16) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else { Calph = ALPHA; } if( Mupcase( SIDE[0] ) == CLEFT ) { if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( COTRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, Calph, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fhemm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &n1, &K, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); gemm( C2F_CHAR( COTRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, Calph, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( A, i1, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( COTRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, Calph, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } TYPE->Fhemm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &n1, &K, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( BC, m1, 0, LDBC, size ), &LDBC, one, Mptr( CC, m1, 0, LDCC, size ), &LDCC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( COTRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, Calph, BC, &LDBC, Mptr( A, 0, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &N, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( COTRAN ), C2F_CHAR( NOTRAN ), &K, &N, &M, Calph, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } } else { if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( COTRAN ), &M, &K, &n1, Calph, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fhemm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &K, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( COTRAN ), &m1, &K, &n1, Calph, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( A, i1, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( COTRAN ), &m1, &K, &n1, Calph, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } TYPE->Fhemm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &K, &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( COTRAN ), &M, &K, &n1, Calph, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, Mptr( A, 0, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( COTRAN ), &M, &K, &N, Calph, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &N, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } } /* * End of PB_Ctzhemm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cinfog2l.c0000640000175000017500000003205210363532303020016 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cinfog2l( int I, int J, int * DESC, int NPROW, int NPCOL, int MYROW, int MYCOL, int * II, int * JJ, int * PROW, int * PCOL ) #else void PB_Cinfog2l( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ, PROW, PCOL ) int I, * II, J, * JJ, MYCOL, MYROW, NPCOL, NPROW, * PCOL, * PROW; /* * .. Scalar Arguments .. */ /* * .. Array Arguments .. */ int * DESC; #endif { /* * Purpose * ======= * * PB_Cinfog2l computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by I, J. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least zero. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least zero. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least zero. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least zero. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero * and strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least * zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; /* .. * .. Executable Statements .. * */ /* * Retrieve the row distribution parameters */ imb = DESC[IMB_ ]; *PROW = DESC[RSRC_]; if( ( *PROW == -1 ) || ( NPROW == 1 ) ) { /* * The data is not distributed, or there is just one process row in the grid. */ *II = I; } else if( I < imb ) { /* * I refers to an entry in the first block of rows */ *II = ( MYROW == *PROW ? I : 0 ); } else { mb = DESC[MB_]; rsrc = *PROW; /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source process is the * process such that mydist = 0, or equivalently MYROW == rsrc. * * Find out the global coordinate of the block I belongs to (nblocks), as well * as the minimum local number of blocks that every process has. * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, I own ilocblk full blocks * but not I, or I own ilocblk + 1 blocks and the entry I refers to. */ if( MYROW == rsrc ) { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = ( I - imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three * possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in * which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which * case I and II can respectively be written as IMB + (nblocks-1)*NB + IL * and IMB + (ilocblk-1) * MB + IL. That is II = I + (ilocblk-nblocks)*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note * that when ilocblk is zero, II is just IMB. */ if( nblocks < NPROW ) { *II = imb; } else { ilocblk = nblocks / NPROW; if( ilocblk * NPROW >= nblocks ) { *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb : imb + ( ilocblk - 1 ) * mb ); } else { *II = imb + ilocblk * mb; } } } else { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = ( I -= imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB. * When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk * full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL * and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ? I + ( 1 - nblocks ) * mb : 0 ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( MYROW == *PROW ) ? ( ilocblk - nblocks + 1 ) * mb + I : ilocblk * mb ) ); } } } /* * Idem for the columns */ inb = DESC[INB_ ]; *PCOL = DESC[CSRC_]; if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) { *JJ = J; } else if( J < inb ) { *JJ = ( MYCOL == *PCOL ? J : 0 ); } else { nb = DESC[NB_]; csrc = *PCOL; if( MYCOL == csrc ) { nblocks = ( J - inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( nblocks < NPCOL ) { *JJ = inb; } else { ilocblk = nblocks / NPCOL; if( ilocblk * NPCOL >= nblocks ) { *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb : inb + ( ilocblk - 1 ) * nb ); } else { *JJ = inb + ilocblk * nb; } } } else { nblocks = ( J -= inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? J + ( 1 - nblocks )*nb : 0 ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( MYCOL == *PCOL ) ? ( ilocblk - nblocks + 1 ) * nb + J : ilocblk * nb ) ); } } } /* * End of PB_Cinfog2l */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplasca2.c0000640000175000017500000002742310363532303020011 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplasca2( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA ) #else void PB_Cplasca2( TYPE, UPLO, CONJUG, M, N, ALPHA, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ char * CONJUG, * UPLO; int IA, JA, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A; #endif { /* * .. Local Scalars .. */ char UploA, herm; int Acol, Arow, Aii, iimax, ilow, imbloc, Aimb1, inbloc, Ainb1, Aoffi, GoEast, GoSouth, ioffd, iupp, izero=0, Ajj, jjmax, Aoffj, joffd, lcmt, lcmt00, Ald, lmbloc, lnbloc, low, lower, m1, Amb, mbloc, mblkd, mblks, Amp, Arcol, Arrow, mycol, myrow, n1, Anb, nbloc, nblkd, nblks, npcol, nprow, Anq, pmb, qnb, size, tmp1, upp, upper; TZSCAL_T scal; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ). */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Ald = DESCA[LLD_]; PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( ( ( Arow < 0 ) || ( nprow == 1 ) ) ? Amb : nprow * Amb ); qnb = ( ( ( Acol < 0 ) || ( npcol == 1 ) ) ? Anb : npcol * Anb ); UploA = Mupcase( UPLO[0] ); upper = ( UploA != CLOWER ); lower = ( UploA != CUPPER ); herm = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) ); size = TYPE->size; scal = ( herm == CCONJG ? TYPE->Fhescal : TYPE->Ftzscal ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ scal( C2F_CHAR( UPLO ), &imbloc, &inbloc, &lcmt00, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be scaled and one is * planning to go south in the table, it is neccessary to take care of the * remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; scal( C2F_CHAR( ALL ), &imbloc, &tmp1, &izero, ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), &Ald ); } Aii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be scaled and one is * planning to go east in the table, it is neccessary to take care of the * remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; scal( C2F_CHAR( ALL ), &tmp1, &inbloc, &izero, ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), &Ald ); } Ajj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row index in A. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; } /* * Scale the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { scal( C2F_CHAR( ALL ), &tmp1, &n1, &izero, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; scal( C2F_CHAR( UPLO ), &mbloc, &inbloc, &lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; } /* * Scale the lower triangular part of sub( A ) when necessary. */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) scal( C2F_CHAR( ALL ), &tmp1, &inbloc, &izero, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; /* * When the upper triangular part of sub( A ) should be scaled, take care of the * n1 remaining columns of these tmp1 rows immediately. */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) scal( C2F_CHAR( ALL ), &tmp1, &n1, &izero, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; } /* * Scale the lower triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { scal( C2F_CHAR( ALL ), &m1, &tmp1, &izero, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald ); Ajj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; scal( C2F_CHAR( UPLO ), &imbloc, &nbloc, &lcmt, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; joffd += nbloc; } /* * Scale the upper triangular part of sub( A ) when necessary. */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) scal( C2F_CHAR( ALL ), &imbloc, &tmp1, &izero, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), &Ald ); tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; /* * When the lower triangular part of sub( A ) should be scaled, take care of the * m1 remaining rows of these tmp1 columns immediately. */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) scal( C2F_CHAR( ALL ), &m1, &tmp1, &izero, ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; } /* * Scale the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { scal( C2F_CHAR( ALL ), &tmp1, &n1, &izero, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; scal( C2F_CHAR( UPLO ), &mbloc, &nbloc, &lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; } /* * Scale the lower triangular part of sub( A ) when necessary. */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) scal( C2F_CHAR( ALL ), &tmp1, &nbloc, &izero, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), &Ald ); tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; /* * When the upper triangular part of sub( A ) should be scaled, take care of the * n1 remaining columns of these tmp1 rows immediately. */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) scal( C2F_CHAR( ALL ), &tmp1, &n1, &izero, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), &Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; } /* * End of PB_Cplasca2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMinit.c0000640000175000017500000002040510363532303017663 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CVMinit( PB_VM_T * VM, int OFFD, int M, int N, int IMB1, int INB1, int MB, int NB, int MRROW, int MRCOL, int NPROW, int NPCOL, int LCMB ) #else void PB_CVMinit( VM, OFFD, M, N, IMB1, INB1, MB, NB, MRROW, MRCOL, NPROW, NPCOL, LCMB ) /* * .. Scalar Arguments .. */ int IMB1, INB1, LCMB, M, MB, MRCOL, MRROW, N, NB, NPCOL, NPROW, OFFD; /* * .. Array Arguments .. */ PB_VM_T * VM; #endif { /* * Purpose * ======= * * PB_CVMinit initializes a virtual matrix with the information of an m * by n local array owned by the process of relative coordinates * ( MRROW, MRCOL ). * * Arguments * ========= * * VM (local output) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T. * On exit, VM points to the initialized structure containing * the virtual matrix information (see pblas.h). * * OFFD (global input) INTEGER * On entry, OFFD specifies the off-diagonal of the underlying * matrix of interest as follows: * OFFD = 0 specifies the main diagonal, * OFFD > 0 specifies lower subdiagonals, and * OFFD < 0 specifies upper superdiagonals. * * M (local input) INTEGER * On entry, M specifies the local number of rows of the under- * lying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). M must be at least zero. * * N (local input) INTEGER * On entry, N specifies the local number of columns of the un- * derlying matrix owned by the process of relative coordinates * ( MRROW, MRCOL ). N must be at least zero. * * IMB1 (global input) INTEGER * On input, IMB1 specifies the global true size of the first * block of rows of the underlying global submatrix. IMB1 must * be at least MIN( 1, M ). * * INB1 (global input) INTEGER * On input, INB1 specifies the global true size of the first * block of columns of the underlying global submatrix. INB1 * must be at least MIN( 1, N ). * * MB (global input) INTEGER * On entry, MB specifies the blocking factor used to partition * the rows of the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the blocking factor used to partition * the the columns of the matrix. NB must be at least one. * * MRROW (local input) INTEGER * On entry, MRROW specifies the relative row coordinate of the * process that possesses these M rows. MRROW must be least zero * and strictly less than NPROW. * * MRCOL (local input) INTEGER * On entry, MRCOL specifies the relative column coordinate of * the process that possesses these N columns. MRCOL must be * least zero and strictly less than NPCOL. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process col- * umns over which the matrix is distributed. NPCOL must be at * least one. * * LCMB (global input) INTEGER * On entry, LCMB specifies the least common multiple of * NPROW * MB and NPCOL * NB. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int tmp1; /* .. * .. Executable Statements .. * */ /* * Initialize the fields of the VM structure */ VM->offd = OFFD; VM->lcmt00 = OFFD; VM->mp = M; VM->imb1 = IMB1; VM->mb = MB; VM->upp = MB - 1; VM->prow = MRROW; VM->nprow = NPROW; VM->nq = N; VM->inb1 = INB1; VM->nb = NB; VM->low = 1 - NB; VM->pcol = MRCOL; VM->npcol = NPCOL; VM->lcmb = LCMB; if( ( M <= 0 ) || ( N <= 0 ) ) { /* * If the local virtual array is empty, then simplify the remaining of the * initialization. */ VM->imbloc = 0; VM->lmbloc = 0; VM->mblks = 0; VM->iupp = ( MRROW ? MB - 1 : ( IMB1 > 0 ? IMB1 - 1 : 0 ) ); VM->inbloc = 0; VM->lnbloc = 0; VM->nblks = 0; VM->ilow = ( MRCOL ? 1 - NB : ( INB1 > 0 ? 1 - INB1 : 0 ) ); VM->lcmt00 += ( VM->low - VM->ilow + MRCOL * NB ) - ( VM->iupp - VM->upp + MRROW * MB ); return; } if( MRROW ) { /* * I am not in the first relative process row. Use the first local row block * size MB to initialize the VM structure. */ VM->lcmt00 -= IMB1 - MB + MRROW * MB; VM->imbloc = MIN( M, MB ); VM->mblks = ( M - 1 ) / MB + 1; VM->iupp = MB - 1; VM->lmbloc = M - ( M / MB ) * MB; if( !( VM->lmbloc ) ) VM->lmbloc = MB; if( MRCOL ) { /* * I am not in the first relative process column. Use the first local column * block size NB to initialize the VM structure. */ VM->inbloc = MIN( N, NB ); VM->ilow = 1 - NB; VM->lcmt00 += INB1 - NB + MRCOL * NB; VM->nblks = ( N - 1 ) / NB + 1; VM->lnbloc = N - ( N / NB ) * NB; if( !( VM->lnbloc ) ) VM->lnbloc = NB; } else { /* * I am in the first relative process column. Use the first column block size * INB1 to initialize the VM structure. */ VM->inbloc = INB1; VM->ilow = 1 - INB1; tmp1 = N - INB1; if( tmp1 ) { /* * There is more than one column block. Compute the number of local column * blocks and the size of the last one. */ VM->nblks = ( tmp1 - 1 ) / NB + 2; VM->lnbloc = tmp1 - ( tmp1 / NB ) * NB; if( !( VM->lnbloc ) ) VM->lnbloc = NB; } else { /* * There is only one column block. */ VM->nblks = 1; VM->lnbloc = INB1; } } } else { /* * I am in the first relative process row. Use the first row block size IMB1 to * initialize the VM structure. */ VM->imbloc = IMB1; VM->iupp = IMB1 - 1; tmp1 = M - IMB1; if( tmp1 ) { /* * There is more than one row block. Compute the number of local row blocks and * the size of the last one. */ VM->mblks = ( tmp1 - 1 ) / MB + 2; VM->lmbloc = tmp1 - ( tmp1 / MB ) * MB; if( !( VM->lmbloc ) ) VM->lmbloc = MB; } else { /* * There is only one row block. */ VM->mblks = 1; VM->lmbloc = IMB1; } if( MRCOL ) { /* * I am not in the first relative process column. Use the first local column * block size NB to initialize the VM structure. */ VM->inbloc = MIN( N, NB ); VM->ilow = 1 - NB; VM->lcmt00 += INB1 - NB + MRCOL * NB; VM->nblks = ( N - 1 ) / NB + 1; VM->lnbloc = N - ( N / NB ) * NB; if( !( VM->lnbloc ) ) VM->lnbloc = NB; } else { /* * I am in the first relative process column. Use the first column block size * INB1 to initialize the VM structure. */ VM->inbloc = INB1; VM->ilow = 1 - INB1; tmp1 = N - INB1; if( tmp1 ) { /* * There is more than one column block. Compute the number of local column * blocks and the size of the last one. */ VM->nblks = ( tmp1 - 1 ) / NB + 2; VM->lnbloc = tmp1 - ( tmp1 / NB ) * NB; if( !( VM->lnbloc ) ) VM->lnbloc = NB; } else { /* * There is only one column block. */ VM->nblks = 1; VM->lnbloc = INB1; } } } /* * End of PB_CVMinit */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Citypeset.c0000640000175000017500000000606610363532303020332 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" PBTYP_T * PB_Citypeset() { /* * Purpose * ======= * * PB_Citypeset on the first call initializes a static structure contai- * ning typed information and returns a pointer to it. The other calls * to this routine just returns this pointer. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static int setup=0; static PBTYP_T TypeStruct; static int zero, one, negone; /* .. * .. Executable Statements .. * */ if( setup ) return( &TypeStruct ); setup = 1; TypeStruct.type = INT; TypeStruct.usiz = sizeof( int ); TypeStruct.size = sizeof( int ); zero = 0; one = 1; negone = -1; TypeStruct.zero = (char *) (&zero); TypeStruct.one = (char *) (&one); TypeStruct.negone = (char *) (&negone); TypeStruct.Cgesd2d = Cigesd2d; TypeStruct.Cgerv2d = Cigerv2d; TypeStruct.Cgebs2d = Cigebs2d; TypeStruct.Cgebr2d = Cigebr2d; TypeStruct.Cgsum2d = Cigsum2d; TypeStruct.Fmmadd = immadd_; TypeStruct.Fmmcadd = immadd_; TypeStruct.Fmmtadd = immtadd_; TypeStruct.Fmmtcadd = immtadd_; TypeStruct.Fmmdda = immdda_; TypeStruct.Fmmddac = immdda_; TypeStruct.Fmmddat = immddat_; TypeStruct.Fmmddact = immddat_; TypeStruct.Fcshft = NULL; TypeStruct.Frshft = NULL; TypeStruct.Fvvdotu = NULL; TypeStruct.Fvvdotc = NULL; TypeStruct.Fset = NULL; TypeStruct.Ftzpad = NULL; TypeStruct.Ftzpadcpy = NULL; TypeStruct.Ftzscal = NULL; TypeStruct.Fhescal = NULL; TypeStruct.Ftzcnjg = NULL; TypeStruct.Faxpy = NULL; TypeStruct.Fcopy = NULL; TypeStruct.Fswap = NULL; TypeStruct.Fgemv = NULL; TypeStruct.Fsymv = NULL; TypeStruct.Fhemv = NULL; TypeStruct.Ftrmv = NULL; TypeStruct.Ftrsv = NULL; TypeStruct.Fagemv = NULL; TypeStruct.Fasymv = NULL; TypeStruct.Fahemv = NULL; TypeStruct.Fatrmv = NULL; TypeStruct.Fgerc = NULL; TypeStruct.Fgeru = NULL; TypeStruct.Fsyr = NULL; TypeStruct.Fher = NULL; TypeStruct.Fsyr2 = NULL; TypeStruct.Fher2 = NULL; TypeStruct.Fgemm = NULL; TypeStruct.Fsymm = NULL; TypeStruct.Fhemm = NULL; TypeStruct.Fsyrk = NULL; TypeStruct.Fherk = NULL; TypeStruct.Fsyr2k = NULL; TypeStruct.Fher2k = NULL; TypeStruct.Ftrmm = NULL; TypeStruct.Ftrsm = NULL; return( &TypeStruct ); /* * End of PB_Citypeset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cspan.c0000640000175000017500000000502310363532303017415 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cspan( int N, int I, int INB, int NB, int SRCPROC, int NPROCS ) #else int PB_Cspan( N, I, INB, NB, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int I, INB, N, NB, NPROCS, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cspan returns 1 if the rows (resp. columns) I:I+N-1 spans more * than one process row (resp. column) and 0 otherwise. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix distribution. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * SRCPROC (global input) INTEGER * On entry, if SRCPROC = -1, the data is not distributed but * replicated, in which case this routine return 0. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ /* * If the data is replicated or if there is only one process in this dimension * of the process grid, the data does not span multiple processes. Finally, the * case where I belongs to the first block is handled separately. */ return( ( SRCPROC >= 0 ) && ( ( NPROCS > 1 ) && ( ( I < INB ) ? ( I + N > INB ) : ( I + N > INB + ( ( I - INB ) / NB + 1 ) * NB ) ) ) ); /* * End of PB_Cspan */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplaprnt.c0000640000175000017500000004314410363532303020142 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplaprnt( PBTYP_T * TYPE, int M, int N, char * A, int IA, int JA, int * DESCA, int IRPRNT, int ICPRNT, char * CMATNM ) #else void PB_Cplaprnt( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM ) /* * .. Scalar Arguments .. */ int IA, ICPRNT, IRPRNT, JA, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A, * CMATNM; #endif { /* * Purpose * ======= * * PB_Cplaprnt prints to the standard output the submatrix sub( A ) de- * noting A(IA:IA+M-1,JA:JA+N-1). The local pieces of sub( A ) are sent * and printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * IRPRNT (global input) INTEGER * On entry, IRPRNT specifies the row index of the printing pro- * cess. * * ICPRNT (global input) INTEGER * On entry, ICPRNT specifies the column index of the printing * process. * * CMATNM (global input) pointer to CHAR * On entry, CMATNM is the name of the matrix to be printed. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int mycol, myrow, npcol, nprow, pcol, prow; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * When sub( A ) is replicated, each copy is printed for debugging purposes. */ if( DESCA[ RSRC_ ] >= 0 ) { /* * sub( A ) is distributed onto the process rows of the grid */ if( DESCA[ CSRC_ ] >= 0 ) { /* * sub( A ) is distributed onto the process columns of the grid */ PB_Cplaprn2( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, DESCA[ RSRC_ ], DESCA[ CSRC_ ] ); } else { /* * sub( A ) is replicated in every process column of the grid */ for( pcol = 0; pcol < npcol; pcol++ ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) (void) fprintf( stdout, "Colum-replicated array -- copy in process column: %d\n", pcol ); PB_Cplaprn2( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, DESCA[ RSRC_ ], pcol ); } } } else { /* * sub( A ) is replicated in every process row of the grid */ if( DESCA[ CSRC_ ] >= 0 ) { /* * sub( A ) is distributed onto the process columns of the grid */ for( prow = 0; prow < nprow; prow++ ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) (void) fprintf( stdout, "Row-replicated array -- copy in process row: %d\n", prow ); PB_Cplaprn2( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, prow, DESCA[ CSRC_ ] ); } } else { /* * sub( A ) is replicated in every process column of the grid */ for( prow = 0; prow < nprow; prow++ ) { for( pcol = 0; pcol < npcol; pcol++ ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) (void) fprintf( stdout, "Replicated array -- copy in process (%d,%d)\n", prow, pcol ); PB_Cplaprn2( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, prow, pcol ); } } } } /* * End of PB_Cplaprnt */ } #ifdef __STDC__ void PB_Cplaprn2( PBTYP_T * TYPE, int M, int N, char * A, int IA, int JA, int * DESCA, int IRPRNT, int ICPRNT, char * CMATNM, int PROW, int PCOL ) #else void PB_Cplaprn2( TYPE, M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, CMATNM, PROW, PCOL ) /* * .. Scalar Arguments .. */ int IA, ICPRNT, IRPRNT, JA, M, N, PCOL, PROW; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A, * CMATNM; #endif { /* * .. Local Scalars .. */ char type; int Acol, Aii, AisColRep, AisRowRep, Ajj, Ald, Arow, ctxt, h, i, ib, icurcol, icurrow, ii, in, j, jb, jj, jn, ldw, mycol, myrow, npcol, nprow, size, usiz; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); /* * Save the local first index of each row and column sub( A ) */ ii = Aii; jj = Ajj; /* * When sub( A ) is row-replicated, print the copy in process row PROW. * Otherwise, print the distributed matrix rows starting in process row Arow. */ if( Arow < 0 ) { AisRowRep = 1; icurrow = Arow = PROW; } else { AisRowRep = 0; icurrow = Arow; } /* * When sub( A ) is column-replicated, print the copy in process column PCOL. * Otherwise, print the distributed matrix columns starting in process column * Acol. */ if( Acol < 0 ) { AisColRep = 1; icurcol = Acol = PCOL; } else { AisColRep = 0; icurcol = Acol; } type = TYPE->type; usiz = TYPE->usiz; size = TYPE->size; /* * Allocate buffer in printing process */ ldw = MAX( DESCA[ IMB_ ], DESCA[ MB_ ] ); if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) buf = PB_Cmalloc( ldw * size ); /* * Handle the first block of column separately */ jb = PB_Cfirstnb( N, JA, DESCA[INB_], DESCA[NB_] ); jn = JA + jb - 1; for( h = 0; h < jb; h++ ) { ib = PB_Cfirstnb( M, IA, DESCA[IMB_], DESCA[MB_] ); in = IA + ib - 1; if( ( icurrow == IRPRNT ) && ( icurcol == ICPRNT ) ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { PB_Cprnt( type, size, usiz, ib, Mptr( A, ii, jj+h, Ald, size ), IA+1, JA+h+1, CMATNM ); } } else { if( ( myrow == icurrow ) && ( mycol == icurcol ) ) { TYPE->Cgesd2d( ctxt, ib, 1, Mptr( A, ii, jj+h, Ald, size ), Ald, IRPRNT, ICPRNT ); } else if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { TYPE->Cgerv2d( ctxt, ib, 1, buf, ldw, icurrow, icurcol ); PB_Cprnt( type, size, usiz, ib, buf, IA+1, JA+h+1, CMATNM ); } } /* * Go to next block of rows */ if( myrow == icurrow ) ii += ib; if( !( AisRowRep ) ) icurrow = MModAdd1( icurrow, nprow ); Cblacs_barrier( ctxt, ALL ); /* * Loop over remaining block of rows */ for( i = in+1; i <= IA+M-1; i += DESCA[MB_] ) { ib = MIN( DESCA[MB_], IA+M-i ); if( ( icurrow == IRPRNT ) && ( icurcol == ICPRNT ) ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { PB_Cprnt( type, size, usiz, ib, Mptr( A, ii, jj+h, Ald, size ), i+1, JA+h+1, CMATNM ); } } else { if( ( myrow == icurrow ) && ( mycol == icurcol ) ) { TYPE->Cgesd2d( ctxt, ib, 1, Mptr( A, ii, jj+h, Ald, size ), Ald, IRPRNT, ICPRNT ); } else if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { TYPE->Cgerv2d( ctxt, ib, 1, buf, ldw, icurrow, icurcol ); PB_Cprnt( type, size, usiz, ib, buf, i+1, JA+h+1, CMATNM); } } /* * Go to next block of rows */ if( myrow == icurrow ) ii += ib; if( !( AisRowRep ) ) icurrow = MModAdd1( icurrow, nprow ); Cblacs_barrier( ctxt, ALL ); } /* * Restart at the first row to be printed */ ii = Aii; icurrow = Arow; } /* * Go to next block of columns */ if( mycol == icurcol ) jj += jb; if( !( AisColRep ) ) icurcol = MModAdd1( icurcol, npcol ); Cblacs_barrier( ctxt, ALL ); /* * Loop over remaining column blocks */ for( j = jn+1; j <= JA+N-1; j += DESCA[NB_] ) { jb = MIN( DESCA[NB_], JA+N-j ); for( h = 0; h < jb; h++ ) { ib = PB_Cfirstnb( M, IA, DESCA[IMB_], DESCA[MB_] ); in = IA + ib - 1; if( ( icurrow == IRPRNT ) && ( icurcol == ICPRNT ) ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { PB_Cprnt( type, size, usiz, ib, Mptr( A, ii, jj+h, Ald, size ), IA+1, j+h+1, CMATNM ); } } else { if( ( myrow == icurrow ) && ( mycol == icurcol ) ) { TYPE->Cgesd2d( ctxt, ib, 1, Mptr( A, ii, jj+h, Ald, size ), Ald, IRPRNT, ICPRNT ); } else if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { TYPE->Cgerv2d( ctxt, ib, 1, buf, ldw, icurrow, icurcol ); PB_Cprnt( type, size, usiz, ib, buf, IA+1, j+h+1, CMATNM ); } } /* * Go to next block of rows */ if( myrow == icurrow ) ii += ib; if( !( AisRowRep ) ) icurrow = MModAdd1( icurrow, nprow ); Cblacs_barrier( ctxt, ALL ); /* * Loop over remaining block of rows */ for( i = in+1; i <= IA+M-1; i += DESCA[MB_] ) { ib = MIN( DESCA[MB_], IA+M-i ); if( ( icurrow == IRPRNT ) && ( icurcol == ICPRNT ) ) { if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { PB_Cprnt( type, size, usiz, ib, Mptr( A, ii, jj+h, Ald, size ), i+1, j+h+1, CMATNM ); } } else { if( ( myrow == icurrow ) && ( mycol == icurcol ) ) { TYPE->Cgesd2d( ctxt, ib, 1, Mptr( A, ii, jj+h, Ald, size ), Ald, IRPRNT, ICPRNT ); } else if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) ) { TYPE->Cgerv2d( ctxt, ib, 1, buf, ldw, icurrow, icurcol ); PB_Cprnt( type, size, usiz, ib, buf, i+1, j+h+1, CMATNM ); } } /* * Go to next block of rows */ if( myrow == icurrow ) ii += ib; if( !( AisRowRep ) ) icurrow = MModAdd1( icurrow, nprow ); Cblacs_barrier( ctxt, ALL ); } /* * Restart at the first row to be printed */ ii = Aii; icurrow = Arow; } /* * Go to next block of columns */ if( mycol == icurcol ) jj += jb; if( !( AisColRep ) ) icurcol = MModAdd1( icurcol, npcol ); Cblacs_barrier( ctxt, ALL ); } if( ( myrow == IRPRNT ) && ( mycol == ICPRNT ) && ( buf ) ) free( buf ); /* * End of PB_Cplaprn2 */ } #ifdef __STDC__ void PB_Cprnt( char TYPE, int SIZE, int USIZ, int N, char * A, int IA, int JA, char * CMATNM ) #else void PB_Cprnt( TYPE, SIZE, USIZ, N, A, IA, JA, CMATNM ) /* * .. Scalar Arguments .. */ int IA, JA, N, SIZE, TYPE, USIZ; /* * .. Array Arguments .. */ char * A, * CMATNM; #endif { /* * .. Local Scalars .. */ int k; /* .. * .. Executable Statements .. * */ if( TYPE == INT ) for( k = 0; k < N; k++ ) (void) fprintf( stdout, "%s(%6d,%6d)=%8d\n", CMATNM, IA+k, JA, *((int *)(&A[k*SIZE])) ); else if( TYPE == SREAL ) for( k = 0; k < N; k++ ) (void) fprintf( stdout, "%s(%6d,%6d)=%16.8f\n", CMATNM, IA+k, JA, *((float *)(&A[k*SIZE])) ); else if( TYPE == DREAL ) for( k = 0; k < N; k++ ) (void) fprintf( stdout, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+k, JA, *((double *)(&A[k*SIZE])) ); else if( TYPE == SCPLX ) for( k = 0; k < N; k++ ) (void) fprintf( stdout, "%s(%6d,%6d)=%16.8f+i*(%16.8f)\n", CMATNM, IA+k, JA, *((float *)(&A[k*SIZE])), *((float *)(&A[k*SIZE+USIZ])) ); else if( TYPE == DCPLX ) for( k = 0; k < N; k++ ) (void) fprintf( stdout, "%s(%6d,%6d)=%30.18f+i*(%30.18f)\n", CMATNM, IA+k, JA, *((double *)(&A[k*SIZE])), *((double *)(&A[k*SIZE+USIZ])) ); /* * End of PB_Cprnt */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cchkvec.c0000640000175000017500000003142710363532303017726 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cchkvec( int ICTXT, char * ROUT, char * VNAME, int N, int NPOS0, int IX, int JX, int * DESCX, int INCX, int DPOS0, int * INFO ) #else void PB_Cchkvec( ICTXT, ROUT, VNAME, N, NPOS0, IX, JX, DESCX, INCX, DPOS0, INFO ) /* * .. Scalar Arguments .. */ int DPOS0, ICTXT, IX, * INFO, INCX, JX, N, NPOS0; /* * .. Array Arguments .. */ char * ROUT, * VNAME; int * DESCX; #endif { /* * Purpose * ======= * * PB_Cchkvec checks the validity of a descriptor vector DESCX, the * related global indexes IX, JX and the global increment INCX. If an * inconsistency is found among its parameters IX, JX, DESCX and INCX, * the routine returns an error code in INFO. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * input error checking routine. * * VNAME (global input) pointer to CHAR * On entry, VNAME specifies the name of the formal array argu- * ment in the calling routine. * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * * NPOS0 (global input) INTEGER * On entry, NPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter N appears. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * DPOS0 (global input) INTEGER * On entry, DPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter DESCX ap- * pears. Note that it is assumed that IX and JX are respecti- * vely 2 and 1 entries behind DESCX, and INCX is 1 entry after * DESCX. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had an * illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int dpos, icpos, ixpos, jxpos, mycol, myrow, np, npcol, npos, nprow, nq; /* .. * .. Executable Statements .. * */ /* * Want to find errors with MIN(), so if no error, set it to a big number. If * there already is an error, multiply by the the descriptor multiplier. */ if( *INFO >= 0 ) *INFO = BIGNUM; else if( *INFO < -DESCMULT ) *INFO = -(*INFO); else *INFO = -(*INFO) * DESCMULT; /* * Figure where in parameter list each parameter was, factoring in descriptor * multiplier */ npos = NPOS0 * DESCMULT; ixpos = ( DPOS0 - 2 ) * DESCMULT; jxpos = ( DPOS0 - 1 ) * DESCMULT; icpos = ( DPOS0 + 1 ) * DESCMULT; dpos = DPOS0 * DESCMULT + 1; /* * Get process grid information */ Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); /* * Are N, IX, JX, DESCX and INCX legal inputs ? */ if( N < 0 ) { /* * N must be at least zero */ *INFO = MIN( *INFO, npos ); PB_Cwarn( ICTXT, -1, ROUT, "%s sub( %s ) = %d, it must be at least 0", "Illegal length of", VNAME, N ); } if( IX < 0 ) { /* * IX must be at least zero */ *INFO = MIN( *INFO, ixpos ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal I%s = %d, I%s must be at least 1", VNAME, IX+1, VNAME ); } if( JX < 0 ) { /* * JX must be at least zero */ *INFO = MIN( *INFO, jxpos ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal J%s = %d, J%s must be at least 1", VNAME, JX+1, VNAME ); } if( DESCX[DTYPE_] != BLOCK_CYCLIC_2D_INB ) { /* * Internally, only the descriptor type BLOCK_CYCLIC_2D_INB is supported. */ *INFO = MIN( *INFO, dpos + DTYPE_ ); PB_Cwarn( ICTXT, -1, ROUT, "%s %d for matrix %s. PBLAS accepts: %d or %d", "Illegal descriptor type", DESCX[DTYPE_], VNAME, BLOCK_CYCLIC_2D, BLOCK_CYCLIC_2D_INB ); if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * No need to go any further ... */ return; } if( DESCX[CTXT_] != ICTXT ) { /* * Check if the context of X match the other contexts. Only intra-context * operations are supported. */ *INFO = MIN( *INFO, dpos + CTXT_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[CTXT_] = %d %s= %d", VNAME, DESCX[CTXT_], "does not match other operand's context ", ICTXT ); if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * No need to go any further ... */ return; } if( DESCX[IMB_] < 1 ) { /* * DESCX[IMB_] must be at least one */ *INFO = MIN( *INFO, dpos + IMB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[IMB_] = %d, DESC%s[IMB_] %s", VNAME, DESCX[IMB_], VNAME, "must be at least 1" ); } if( DESCX[INB_] < 1 ) { /* * DESCX[INB_] must be at least one */ *INFO = MIN( *INFO, dpos + INB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[INB_] = %d, DESC%s[INB_] %s", VNAME, DESCX[INB_], VNAME, "must be at least 1" ); } if( DESCX[MB_] < 1 ) { /* * DESCX[MB_] must be at least one */ *INFO = MIN( *INFO, dpos + MB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[MB_] = %d, DESC%s[MB_] %s", VNAME, DESCX[MB_], VNAME, "must be at least 1" ); } if( DESCX[NB_] < 1 ) { /* * DESCX[NB_] must be at least one */ *INFO = MIN( *INFO, dpos + NB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[NB_] = %d, DESC%s[NB_] %s", VNAME, DESCX[NB_], VNAME, "must be at least 1" ); } if( ( DESCX[RSRC_] < -1 ) || ( DESCX[RSRC_] >= nprow ) ) { /* * DESCX[RSRC_] must be either -1 (replication) or in the interval [0 .. nprow) */ *INFO = MIN( *INFO, dpos + RSRC_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[RSRC_] = %d, DESC%s[RSRC_] %s%d", VNAME, DESCX[RSRC_], VNAME, "must be either -1, or >= 0 and < ", nprow ); } if( ( DESCX[CSRC_] < -1 ) || ( DESCX[CSRC_] >= npcol ) ) { /* * DESCX[CSRC_] must be either -1 (replication) or in the interval [0 .. npcol) */ *INFO = MIN( *INFO, dpos + CSRC_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[CSRC_] = %d, DESC%s[CSRC_] %s%d", VNAME, DESCX[CSRC_], VNAME, "must be either -1, or >= 0 and < ", npcol ); } if( INCX != 1 && INCX != DESCX[M_] ) { /* * INCX must be either 1 or DESCX[M_] */ *INFO = MIN( *INFO, icpos ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal INC%s = %d, INC%s should be either 1 or %d", VNAME, DESCX[M_], VNAME ); } if( N == 0 ) { /* * NULL vector, relax some checks */ if( DESCX[M_] < 0 ) { /* * DESCX[M_] must be at least 0 */ *INFO = MIN( *INFO, dpos + M_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[M_] = %d, it must be at least 0", VNAME, DESCX[M_] ); } if( DESCX[N_] < 0 ) { /* * DESCX[N_] must be at least 0 */ *INFO = MIN( *INFO, dpos + N_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[N_] = %d, it must be at least 0", VNAME, DESCX[N_] ); } if( DESCX[LLD_] < 1 ) { /* * DESCX[LLD_] must be at least 1 */ *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least 1", VNAME, DESCX[LLD_] ); } } else { /* * more rigorous checks for non-degenerate vector */ if( DESCX[M_] < 1 ) { /* * DESCX[M_] must be at least 1 */ *INFO = MIN( *INFO, dpos + M_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[M_] = %d, it must be at least 1", VNAME, DESCX[M_]); } if( DESCX[N_] < 1 ) { /* * DESCX[N_] must be at least 1 */ *INFO = MIN( *INFO, dpos + N_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[N_] = %d, it must be at least 1", VNAME, DESCX[N_]); } if( ( DESCX[M_] >= 1 ) && ( DESCX[N_] >= 1 ) ) { if( INCX == DESCX[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( IX >= DESCX[M_] ) { /* * IX must be in [ 0 ... DESCX[M_]-1 ] */ *INFO = MIN( *INFO, ixpos ); PB_Cwarn( ICTXT, -1, ROUT, "%s I%s = %d, DESC%s[M_] = %d", "Array subscript out of bounds:", VNAME, IX+1, VNAME, DESCX[M_]); } if( JX+N > DESCX[N_] ) { /* * JX + N must be in [ 0 ... DESCX[N_]-1 ] */ *INFO = MIN( *INFO, jxpos ); PB_Cwarn( ICTXT, -1, ROUT, "%s N = %d, J%s = %d, DESC%s[N_] = %d", "Operation out of bounds:", N, VNAME, JX+1, VNAME, DESCX[N_]); } } else { /* * sub( X ) resides in (a) process column(s) */ if( JX >= DESCX[N_] ) { /* * JX must be in [ 0 ... DESCX[N_] ] */ *INFO = MIN( *INFO, jxpos ); PB_Cwarn( ICTXT, -1, ROUT, "%s J%s = %d, DESC%s[N_] = %d", "Array subscript out of bounds:", VNAME, JX+1, VNAME, DESCX[N_]); } if( IX+N > DESCX[M_] ) { /* * IX + N must be in [ 0 ... DESCX[M_] ] */ *INFO = MIN( *INFO, ixpos ); PB_Cwarn( ICTXT, -1, ROUT, "%s N = %d, I%s = %d, DESC%s[M_] = %d", "Operation out of bounds:", N, VNAME, IX+1, VNAME, DESCX[M_]); } } } /* * *INFO == BIGNUM => No errors have been found so far */ if( *INFO == BIGNUM ) { Mnumroc( np, DESCX[M_], 0, DESCX[IMB_], DESCX[MB_], myrow, DESCX[RSRC_], nprow ); if( DESCX[LLD_] < MAX( 1, np ) ) { Mnumroc( nq, DESCX[N_], 0, DESCX[INB_], DESCX[NB_], mycol, DESCX[CSRC_], npcol ); /* * DESCX[LLD_] must be at least 1 in order to be legal and this is enough if no * columns of X reside in this process */ if( DESCX[LLD_] < 1 ) { *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least 1", VNAME, DESCX[LLD_] ); } else if( nq > 0 ) { /* * Some columns of X reside in this process, DESCX[LLD_] must be at least * MAX( 1, np ) */ *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least %d", VNAME, DESCX[LLD_], MAX( 1, np ) ); } } } } /* * Prepare output: set INFO = 0 if no error, and divide by DESCMULT if error is * not in a descriptor entry. */ if( *INFO == BIGNUM ) *INFO = 0; else if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * End of PB_Cchkvec */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cnnxtroc.c0000640000175000017500000001610110363532303020146 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cnnxtroc( int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS ) #else int PB_Cnnxtroc( N, I, INB, NB, PROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int I, INB, N, NB, NPROCS, PROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cnnxtroc computes the number of next rows or columns of a subma- * trix that are possessed by processes closer to SRCPROC1 than PROC * where SRCPROC1 is the process owning the row or column globally in- * dexed by I. The submatrix is defined by giving out N rows or columns * starting from global index I. Therefore, if SRCPROC=0 and PROC=1, * then PB_Cnnxtroc returns the number of matrix rows or columns owned * by processes 2, 3 ... NPROCS-1. * * In fact, if the same exact parameters N, I, INB, NB, SRCPROC and * NPROCS are passed to PB_Cnpreroc, PB_Cnumroc and PB_Cnnxtroc produ- * cing respectively npre, np and nnxt, then npre + np + nnxt = N in * every process PROC. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix distribution. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, mydist, nblocks; /* .. * .. Executable Statements .. * */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * The data is not distributed, or there is just one process in this dimension * of the grid. */ return( 0 ); /* * Compute coordinate of process owning I and corresponding INB */ if( ( INB -= I ) <= 0 ) { /* * I is not in first block, find out which process has it and update size of * first block */ nblocks = ( -INB ) / NB + 1; SRCPROC += nblocks; SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS; INB += nblocks * NB; } /* * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. If the source * process owns the N rows or columns, nothing follows me ... */ if( N <= INB ) return( 0 ); /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source process is the * process such that mydist = 0, or equivalently PROC == SRCPROC. * * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. Then remark that * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full * and I own it, or the last block is full and I am the first process owning * only ilocblk full blocks. */ nblocks = ( N - INB ) / NB + 1; if( PROC == SRCPROC ) { /* * First note that I cannot be the source and the last process because mydist=0 * and NPROCS > 1. Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there * are only two possible cases: * * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS divides * the global number of full blocks, then the source process SRCPROC owns * one more block than the other processes; Thus, N can be rewritten as * N = INB + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block. * Similarly, the local value Np corresponding to the local number of rows * and columns owned by the source process is INB + (ilocblk-1)*NB + LNB, * that is N + ( ilocblk-1 - (nblocks-1) )*NB. Therefore, there must be * ( nblocks - ilocblk ) * NB rows or columns following me. Note that this * case cannot happen when ilocblk is zero, since nblocks is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only owns * full blocks, and therefore locally INB + ilocblk * NB rows or columns. * Thus, N - INB - ilocblk * NB rows or columns follow me. Note that when * ilocblk is zero, this becomes simply N - INB. */ if( nblocks < NPROCS ) return( N - INB ); ilocblk = nblocks / NPROCS; return( ( ( nblocks - ilocblk * NPROCS ) ? N - INB - ilocblk * NB : ( nblocks - ilocblk ) * NB ) ); } else { /* * I am not the source process. Compute my distance from the source process. */ if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS; /* * If I am the last process i.e. mydist = NPROCS - 1, nothing follows me. */ if( mydist == NPROCS - 1 ) return( 0 ); /* * Otherwise, when mydist >= nblocks - ilocblk * NPROCS, there are exactly * NB * ilocblk * ( NPROCS - mydist ) rows or columns after me including mine, * i.e NB * ilocblk * ( NPROCS - 1 - mydist ) rows or columns following me. * Finally, when 0 < mydist < nblocks - ilocblk * NPROCS, the number of rows * or columns preceeding me is INB + ilocblk * NB + mydist*( ilocblk+1 )*NB * including mine, therefore there are N-INB-NB*( ilocblk+mydist*(ilocblk+1) ) * rows or columns following me. */ if( nblocks < NPROCS ) return( ( ( mydist < nblocks ) ? N - mydist * NB - INB : 0 ) ); ilocblk = nblocks / NPROCS; return( ( ( mydist >= ( nblocks - ilocblk * NPROCS ) ) ? ( NPROCS - 1 - mydist ) * ilocblk * NB : N - INB - ( ilocblk * mydist + ilocblk + mydist )*NB ) ); } /* * End of PB_Cnnxtroc */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CInOutV2.c0000640000175000017500000010164610363532303017732 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CInOutV2( PBTYP_T * TYPE, char * CONJUG, char * ROWCOL, int M, int N, int KA, int * DESCA, int K, char * Y, int IY, int JY, int * DESCY, char * YROC, char * * YAPTR, int * DYA, int * YAFREE, int * YASUM, int * YAPBY ) #else void PB_CInOutV2( TYPE, CONJUG, ROWCOL, M, N, KA, DESCA, K, Y, IY, JY, DESCY, YROC, YAPTR, DYA, YAFREE, YASUM, YAPBY ) /* * .. Scalar Arguments .. */ char * CONJUG, * ROWCOL, * YROC; int * YAPBY, * YAFREE, IY, JY, K, KA, M, N, * YASUM; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCY, * DYA; char * Y, * * YAPTR; #endif { /* * Purpose * ======= * * PB_CInOutV2 returns a pointer to an array that contains a one-dimen- * sional input/output subvector which is replicated over the rows or * columns of a submatrix described by DESCA. A subvector is specified * on input to this routine that is reused whenever possible. On return, * the subvector is specified by a pointer to some data, a descriptor * array describing its layout, a logical value indicating if this local * piece of data has been dynamically allocated by this function, a lo- * gical value specifying if sum reduction should occur, and finally a * logical value specifying if it is necessary to copy back the alloca- * ted data to the original data. This routine is specifically designed * for traditional Level 2 like PBLAS operations using an input/output * vector such as PxTRSV. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies if this routine should return * the conjugate subvector as follows: * = 'N' or 'n': The initial subvector is returned, * = 'Z' or 'z': The conjugate subvector is returned. * * ROWCOL (global input) pointer to CHAR * On entry, ROWCOL specifies if this routine should return a * row or column subvector replicated over the underlying subma- * trix as follows: * = 'R' or 'r': A row subvector is returned, * = 'C' or 'c': A column subvector is returned. * * M (global input) INTEGER * On entry, M specifies the number of rows of the underlying * submatrix described by DESCA. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the underlying * submatrix described by DESCA. N must be at least zero. * * KA (global input) INTEGER * On entry, KA specifies a global row index when ROWCOL is 'R' * or 'r' and a global column index otherwise. This index deter- * mines a process row or column in which the output subvector * contains a copy of the input subvector. * * DESCA (global and local input/output) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. EXCEPTIONALLY, THIS * INTERNAL ROUTINE MAY MODIFY DESCA IN ORDER TO MINIMIZE THE * AMOUNT OF DATA TO BE MOVED FOR THE VECTOR Y. SEE PxGEMV FOR * AN EXAMPLE. * * K (global input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension of the subvector sub( Y ). K must be at least zero. * * Y (local input) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( K, IY ) ) when YROC is 'R' or 'r' * and MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at * least Lc( 1, JY+Ly-1 ) when YROC is 'R' or 'r' and * Lc( K, JY ) otherwise. Ly is N when ROWCOL is 'R' or 'r' and * M otherwise. Before entry, this array contains the local * entries of the matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * YROC (global input) pointer to CHAR * On entry, YROC specifies the orientation of the subvector * sub( Y ). When YROC is 'R' or 'r', sub( Y ) is a row vector, * and a column vector otherwise. * * YAPTR (local output) pointer to pointer to CHAR * On exit, * YAPTR is an array containing the same data as the * subvector sub( Y ) which is replicated over the rows or co- * lumns of the underlying matrix as specified by ROWCOL and * DESCA. * * DYA (global and local output) INTEGER array * On exit, DYA is a descriptor array of dimension DLEN_ descri- * bing the data layout of the data pointed to by * YAPTR. * * YAFREE (local output) INTEGER * On exit, YAFREE specifies if it was possible to reuse the * subvector sub( Y ), i.e., if some dynamic memory was alloca- * ted for the data pointed to by * YAPTR or not. When YAFREE is * zero, no dynamic memory was allocated. Otherwise, some dyna- * mic memory was allocated by this function that one MUST re- * lease as soon as possible. * * YASUM (global output) INTEGER * On exit, YASUM specifies if a global sum reduction should be * performed to obtain the correct sub( Y ). When YASUM is zero, * no reduction is to be performed, otherwise reduction should * occur. * * YAPBY (global output) INTEGER * On exit, YAPBY specifies if the data pointed to by * YAPTR * must be move back onto sub( Y ) to obtain the correct result. * When YAPBY is zero, no supplementary data movement is neces- * sary, otherwise a data redistribution should occur. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Acoldst, Aimb, Ainb, AisD, AisR, Amb, Amp, Anb, Anq, Arow, Arowdst, Ycol, Yii, Yimb, Yimb1, Yinb, Yinb1, YisD, YisR, YisRow, Yjj, Yld, Ymb, Ymp, Ynb, Ynq, Yrow, ctxt, izero=0, nprow, myrow, npcol, mycol; /* .. * .. Executable Statements .. * */ /* * Initialize the output parameters to a default value */ *YAFREE = 0; *YASUM = 0; *YAPBY = 0; *YAPTR = NULL; /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) { if( Mupcase( ROWCOL[0] ) == CROW ) { PB_Cdescset( DYA, K, N, 1, DESCA[INB_], 1, DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 ); } else { PB_Cdescset( DYA, M, K, DESCA[IMB_], 1, DESCA[MB_], 1, DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], DESCA[LLD_] ); } return; } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCY[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ Minfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, Yii, Yjj, Yrow, Ycol ); /* * Is sub( Y ) distributed or not, replicated or not ? */ if( ( YisRow = ( Mupcase( YROC[0] ) == CROW ) ) != 0 ) { YisD = ( ( Ycol >= 0 ) && ( npcol > 1 ) ); YisR = ( ( Yrow == -1 ) || ( nprow == 1 ) ); } else { YisD = ( ( Yrow >= 0 ) && ( nprow > 1 ) ); YisR = ( ( Ycol == -1 ) || ( npcol == 1 ) ); } Aimb = DESCA[ IMB_ ]; Ainb = DESCA[ INB_ ]; Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Arow = DESCA[ RSRC_ ]; Acol = DESCA[ CSRC_ ]; if( Mupcase( ROWCOL[0] ) == CROW ) { /* * Want a row vector */ AisR = ( ( Arow < 0 ) || ( nprow == 1 ) ); /* * Figure out in which process row sub( Y ) or a copy of it should be found */ Arowdst = PB_Cindxg2p( KA, Aimb, Amb, Arow, Arow, nprow ); if( YisRow && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { /* * It is possible to reuse sub( Y ) iff sub( Y ) is already a row vector and * the data does not need to be conjugated. */ AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) ); Yinb = DESCY[INB_]; Ynb = DESCY[NB_]; Yinb1 = PB_Cfirstnb( N, JY, Yinb, Ynb ); /* * sub( Y ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * column and either N is smaller than the first blocksize of sub( Y ) and A, * or their column blocking factors match. */ if( ( !AisD && !YisD ) || ( ( AisD && YisD ) && ( ( Acol == Ycol ) && ( ( ( Ainb >= N ) && ( Yinb1 >= N ) ) || ( ( Ainb == Yinb1 ) && ( Anb == Ynb ) ) ) ) ) ) { Ynq = PB_Cnumroc( N, 0, Yinb1, Ynb, mycol, Ycol, npcol ); Ymp = ( YisR ? K : ( ( myrow == Yrow ) ? K : 0 ) ); Yld = MAX( 1, K ); if( YisR ) { /* * If sub( Y ) is replicated, there is no need to move sub( Y ) after the * operation (*YAPBY = 0), and it can be reused where needed and zeroed out * elsewhere. */ *YASUM = ( AisR ? 0 : ( nprow > 1 ) ); *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( Ynq > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( !AisR && ( myrow != Arowdst ) ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Ynq, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * sub( Y ) is not replicated, the descriptor of A may need to be modified ... */ if( AisR ) { /* * If A is replicated, use only the copy in the process row where sub( Y ) * resides -> modify DESCA !!! */ *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; DESCA[ IMB_ ] = M; DESCA[ RSRC_ ] = Yrow; if( ( Ynq > 0 ) && ( Ymp > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { if( PB_Cspan( M, 0, Aimb, Amb, Arow, nprow ) ) { /* * Otherwise, A is not replicated, let assume in addition that it spans more * than one process row. */ *YASUM = ( nprow > 1 ); *YAPBY = 0; if( myrow == Yrow ) { /* * If sub( Y ) is not in the desired process row, send it there and zero it. * Otherwise, reuse it. */ Yld = DESCY[ LLD_ ]; if( Ynq > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( Yrow != Arowdst ) { TYPE->Cgesd2d( ctxt, K, Ynq, *YAPTR, Yld, Arowdst, mycol ); TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Ynq, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } } else { /* * Allocate space in the other process rows and initialize to zero. If sub( Y ) * was not in the desired process row, receive it. */ Yld = MAX( 1, K ); if( Ynq > 0 ) { *YAPTR = PB_Cmalloc( K * Ynq * TYPE->size ); *YAFREE = 1; if( ( Yrow != Arowdst ) && ( myrow == Arowdst ) ) TYPE->Cgerv2d( ctxt, K, Ynq, *YAPTR, Yld, Yrow, mycol ); else TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Ynq, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } } else { /* * A spans only one process row */ if( Yrow == Arow ) { /* * If A and sub( Y ) resides in the same process row, things are easy. */ *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( ( myrow == Yrow ) && ( Ynq > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { /* * Otherwise, sub( Y ) resides in another process row, thus allocate zero-data * in process row where a copy of sub( Y ) is desired, and receive it. Set * *YAPBY to 1, so that this data will be added (moved) after the local * operation has been performed. */ *YASUM = 0; *YAPBY = 1; if( Ynq > 0 ) { if( myrow == Yrow ) { Yld = DESCY[ LLD_ ]; TYPE->Cgesd2d( ctxt, K, Ynq, Mptr( Y, Yii, Yjj, Yld, TYPE->size ), Yld, Arowdst, mycol ); } else if( myrow == Arowdst ) { Yld = MAX( 1, K ); *YAPTR = PB_Cmalloc( K*Ynq*TYPE->size ); *YAFREE = 1; TYPE->Cgerv2d( ctxt, K, Ynq, *YAPTR, Yld, Yrow, mycol ); } } Yrow = Arowdst; } } } } /* * Describe the resulting operand. Note that when reduction should occur, Yrow * contains the destination row. Assuming every process row needs the result, * Yrow is then -1. */ PB_Cdescset( DYA, K, N, K, Yinb1, 1, Ynb, Yrow, Ycol, ctxt, Yld ); return; } } /* * sub( Y ) cannot be reused, force YAPBY to 1 for the later update of sub( Y ). */ *YAPBY = 1; Anq = PB_Cnumroc( N, 0, Ainb, Anb, mycol, Acol, npcol ); Yld = MAX( 1, K ); if( YisR ) { /* * If sub( Y ) is replicated, allocate space in every process row owning some * columns of A and initialize it to zero only where needed. There may be some * wasted space (suppose A was residing in just one row), however, it is hoped * that moving back this data to sub( Y ) will then be cheaper ... */ *YASUM = ( AisR ? 0 : ( nprow > 1 ) ); if( Anq > 0 ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; if( ( Arowdst >= 0 ) && ( myrow != Arowdst ) ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * sub( Y ) resides in only one process row */ if( AisR ) { /* * If A is replicated, then modify sub( A ) so that only one process row will * compute the result before moving it back to sub( Y ). */ *YASUM = 0; DESCA[ IMB_ ] = M; if( YisRow ) { /* * Choose a different process row than Yrow for better performance (more links) * in the later move-back phase. */ DESCA[RSRC_] = MModSub1( Yrow, nprow ); } else { DESCA[RSRC_] = 0; } if( ( myrow == ( Arowdst = DESCA[RSRC_] ) ) && ( Anq > 0 ) ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; } } else { if( PB_Cspan( M, 0, Aimb, Amb, Arow, nprow ) ) { /* * If A is not replicated, and spans more than just one process row, then * allocate space in every process row and zero it where needed. */ *YASUM = ( nprow > 1 ); if( Anq > 0 ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; if( myrow != Arowdst ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * If A is not replicated, and spans only one process row, then allocate space * within that process row. */ *YASUM = 0; if( ( myrow == Arowdst ) && ( Anq > 0 ) ) { *YAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *YAFREE = 1; } } } } /* * Describe the resulting operand. Note that when reduction should occur, * Arowdst contains the destination row. Assuming every process row needs the * result, Arowdst is then -1. */ PB_Cdescset( DYA, K, N, K, Ainb, 1, Anb, Arowdst, Acol, ctxt, Yld ); /* * Move sub( Y ) in the desired processes and with the correct layout */ if( YisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, N, TYPE->one, Y, IY, JY, DESCY, ROW, TYPE->zero, *YAPTR, 0, 0, DYA, ROW ); } else { PB_Cpaxpby( TYPE, CONJUG, N, K, TYPE->one, Y, IY, JY, DESCY, COLUMN, TYPE->zero, *YAPTR, 0, 0, DYA, ROW ); } } else { /* * Want a column vector with original data in col KA */ AisR = ( ( Acol < 0 ) || ( npcol == 1 ) ); /* * Figure out in which process column sub( Y ) or a copy of it should be found. */ Acoldst = PB_Cindxg2p( KA, Ainb, Anb, Acol, Acol, npcol ); if( !( YisRow ) && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { /* * It is possible to reuse sub( Y ) iff sub( Y ) is already a column vector and * the data does not need to be conjugated. */ AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) ); Yimb = DESCY[IMB_]; Ymb = DESCY[MB_]; Yimb1 = PB_Cfirstnb( M, IY, Yimb, Ymb ); /* * sub( Y ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * row and either M is smaller than the first blocksize of sub( Y ) and A, or * their row blocking factors match. */ if( ( !AisD && !YisD ) || ( ( AisD && YisD ) && ( ( Arow == Yrow ) && ( ( ( Aimb >= M ) && ( Yimb1 >= M ) ) || ( ( Aimb == Yimb1 ) && ( Amb == Ymb ) ) ) ) ) ) { Ymp = PB_Cnumroc( M, 0, Yimb1, Ymb, myrow, Yrow, nprow ); Ynq = ( YisR ? K : ( ( mycol == Ycol ) ? K : 0 ) ); Yld = MAX( 1, Ymp ); if( YisR ) { /* * If sub( Y ) is replicated, there is no need to move sub( Y ) after the * operation (*YAPBY = 0), and it can be reused where needed and zeroed out * elsewhere. */ *YASUM = ( AisR ? 0 : ( npcol > 1 ) ); *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( Ymp > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( !AisR && ( mycol != Acoldst ) ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ymp, &K, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * sub( Y ) is not replicated, the descriptor of A may need to be modified ... */ if( AisR ) { /* * If A is replicated, use only the copy in the process column where sub( Y ) * resides -> modify DESCA !!! */ *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; DESCA[ INB_ ] = N; DESCA[ CSRC_ ] = Ycol; if( ( Ymp > 0 ) && ( Ynq > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { if( PB_Cspan( N, 0, Ainb, Anb, Acol, npcol ) ) { /* * Otherwise, A is not replicated, let assume in addition that it spans more * than one process column. */ *YASUM = ( npcol > 1 ); *YAPBY = 0; if( mycol == Ycol ) { /* * If sub( Y ) is not in the desired process column, send it there and zero it. * Otherwise, reuse it. */ Yld = DESCY[ LLD_ ]; if( Ymp > 0 ) { *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); if( Ycol != Acoldst ) { TYPE->Cgesd2d( ctxt, Ymp, K, *YAPTR, Yld, myrow, Acoldst ); TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ymp, &K, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } } else { /* * Allocate space in the other process columns and initialize to zero. If * sub( Y ) was not in the desired process column, receive it. */ Yld = MAX( 1, Ymp ); if( Ymp > 0 ) { *YAPTR = PB_Cmalloc( Ymp * K * TYPE->size ); *YAFREE = 1; if( ( Ycol != Acoldst ) && ( mycol == Acoldst ) ) TYPE->Cgerv2d( ctxt, Ymp, K, *YAPTR, Yld, myrow, Ycol ); else TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Ymp, &K, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } } else { /* * A spans only one process column */ if( Ycol == Acol ) { /* * If A and sub( Y ) resides in the same process column, things are easy. */ *YASUM = 0; *YAPBY = 0; Yld = DESCY[ LLD_ ]; if( ( mycol == Ycol ) && ( Ymp > 0 ) ) *YAPTR = Mptr( Y, Yii, Yjj, Yld, TYPE->size ); } else { /* * Otherwise, sub( Y ) resides in another process column, thus allocate * zero-data in process column where a copy of sub( Y ) is desired, and receive * it. Set *YAPBY to 1, so that this data will be added (moved) after the local * operation has been performed. */ *YASUM = 0; *YAPBY = 1; if( Ymp > 0 ) { if( mycol == Ycol ) { Yld = DESCY[ LLD_ ]; TYPE->Cgesd2d( ctxt, Ymp, K, Mptr( Y, Yii, Yjj, Yld, TYPE->size ), Yld, myrow, Acoldst ); } else if( mycol == Acoldst ) { Yld = MAX( 1, Ymp ) ; *YAPTR = PB_Cmalloc( Ymp * K * TYPE->size ); *YAFREE = 1; TYPE->Cgerv2d( ctxt, Ymp, K, *YAPTR, Yld, myrow, Ycol ); } } Ycol = Acoldst; } } } } /* * Describe the resulting operand. Note that when reduction should occur, Ycol * contains the destination column. Assuming every process column needs the * result, Ycol is then -1. */ PB_Cdescset( DYA, M, K, Yimb1, K, Ymb, 1, Yrow, Ycol, ctxt, Yld ); return; } } /* * sub( Y ) cannot be reused, force YAPBY to 1 for the later update of sub( Y ). */ *YAPBY = 1; Amp = PB_Cnumroc( M, 0, Aimb, Amb, myrow, Arow, nprow ); Yld = MAX( 1, Amp ); if( YisR ) { /* * If sub( Y ) is replicated, allocate space in every process column owning some * columns of A and initialize it to zero only where needed. There may be some * wasted space (suppose A was residing in just one column), however, it is * hoped that moving back this data to sub( Y ) will then be cheaper ... */ *YASUM = ( AisR ? 0 : ( npcol > 1 ) ); if( Amp > 0 ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; if( ( Acoldst >= 0 ) && ( mycol != Acoldst ) ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * sub( Y ) resides in only one process column */ if( AisR ) { /* * If A is replicated, then modify sub( A ) so that only one process column will * compute the result before moving it back to sub( Y ). */ *YASUM = 0; DESCA[ INB_ ] = N; if( YisRow ) { DESCA[ CSRC_ ] = 0; } else { /* * Choose a different process column than Ycol for better performance (more * links) in the later move-back phase. */ DESCA[ CSRC_ ] = MModSub1( Ycol, npcol ); } if( ( mycol == ( Acoldst = DESCA[CSRC_] ) ) && ( Amp > 0 ) ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; } } else { if( PB_Cspan( N, 0, Ainb, Anb, Acol, npcol ) ) { /* * If A is not replicated, and spans more than just one process column, then * allocate space in every process column and zero it where needed. */ *YASUM = ( npcol > 1 ); if( Amp > 0 ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; if( mycol != Acoldst ) TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K, &izero, TYPE->zero, TYPE->zero, *YAPTR, &Yld ); } } else { /* * If A is not replicated, and spans only one process column, then allocate * space within that process column. */ *YASUM = 0; if( ( mycol == Acoldst ) && ( Amp > 0 ) ) { *YAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *YAFREE = 1; } } } } /* * Describe the resulting operand. Note that when reduction should occur, * Acoldst contains the destination column. Assuming every process column needs * the result, Acoldst is then -1. */ PB_Cdescset( DYA, M, K, Aimb, K, Amb, 1, Arow, Acoldst, ctxt, Yld ); /* * Move sub( Y ) in the desired processes and with the correct layout */ if( YisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, M, TYPE->one, Y, IY, JY, DESCY, ROW, TYPE->zero, *YAPTR, 0, 0, DYA, COLUMN ); } else { PB_Cpaxpby( TYPE, CONJUG, M, K, TYPE->one, Y, IY, JY, DESCY, COLUMN, TYPE->zero, *YAPTR, 0, 0, DYA, COLUMN ); } } /* * End of PB_CInOutV2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpdot11.c0000640000175000017500000005224110363532303017570 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpdot11( PBTYP_T * TYPE, int N, char * DOT, char * X, int IX, int JX, int * DESCX, int INCX, char * Y, int IY, int JY, int * DESCY, int INCY, VVDOT_T FDOT ) #else void PB_Cpdot11( TYPE, N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, FDOT ) /* * .. Scalar Arguments .. */ int INCX, INCY, IX, IY, JX, JY, N; char * DOT; PBTYP_T * TYPE; VVDOT_T FDOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; char * X, * Y; #endif { /* * Purpose * ======= * * PB_Cpdot11 forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ) or DOT := sub( X )**H * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * One subvector at least is assumed to be degenerated. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) pointer to CHAR * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * FDOT (local input) pointer to a function of type VVDOT * On entry, FDOT points to a subroutine that computes the local * dot product of two vectors. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * top; int RRorCC, Xcol, Xii, XisD, XisOne, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XprocD, XprocR, Xrow, Ycol, Yii, YisD, YisOne, YisR, YisRow, Yjj, YmyprocD, YmyprocR, YprocD, YprocR, Yrow, cdst, ctxt, ione=1, mycol, myrow, npcol, nprow, rdst; /* * .. Local Arrays .. */ int dbuf[DLEN_]; char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( ( XisRow = ( INCX == DESCX[ M_ ] ) ) != 0 ) { Xld = DESCX[ LLD_ ]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XisR = ( ( Xrow == -1 ) || ( nprow == 1 ) ); XisD = ( ( Xcol >= 0 ) && ( npcol > 1 ) ); } else { Xld = DESCX[ LLD_ ]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XisR = ( ( Xcol == -1 ) || ( npcol == 1 ) ); XisD = ( ( Xrow >= 0 ) && ( nprow > 1 ) ); } XisOne = ( ( N == 1 ) && ( DESCX[ M_ ] == 1 ) ); /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( ( YisRow = ( INCY == DESCY[ M_ ] ) ) != 0 ) { YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YisR = ( ( Yrow == -1 ) || ( nprow == 1 ) ); YisD = ( ( Ycol >= 0 ) && ( npcol > 1 ) ); } else { YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YisR = ( ( Ycol == -1 ) || ( npcol == 1 ) ); YisD = ( ( Yrow >= 0 ) && ( nprow > 1 ) ); } YisOne = ( ( N == 1 ) && ( DESCY[ M_ ] == 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * Copy sub( Y ) in sub( X )'s scope */ PB_Cdescset( dbuf, 1, 1, 1, 1, 1, 1, Xrow, Xcol, ctxt, 1 ); buf = PB_Cmalloc( TYPE->size ); PB_Cpaxpby( TYPE, NOCONJG, 1, 1, TYPE->one, Y, IY, JY, DESCY, ( YisRow ? ROW : COLUMN ), TYPE->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); /* * Compute the dot product in sub( X )'s scope */ if( XisR || ( XmyprocR == XprocR ) ) { if( ( XisD && ( XmyprocD == XprocD ) ) || ( !XisD ) ) FDOT( &ione, DOT, Mptr( X, Xii, Xjj, Xld, TYPE->size ), &Xlinc, buf, &ione ); if( XisD && !XisOne ) { Xscope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &Xscope, TOP_GET ); TYPE->Cgsum2d( ctxt, &Xscope, top, 1, 1, DOT, 1, -1, 0 ); } } if( buf ) free( buf ); /* * sub( X ) or sub( Y ) is a degenerated vector */ if( XisD && XisOne ) { /* * Since XisOne, sub( X ) must be a row vector */ if( XisR ) { /* * sub( X ) resides in one process column ( *, XprocD ) */ if( RRorCC ) { /* * sub( Y ) is a row vector as well */ if( YisR || YmyprocR == YprocR ) { /* * I am a process row owning sub( Y ) */ if( YisD && YisOne ) { /* * sub( Y ) resides in a process column ( *, YprocD ) */ if( XprocD != YprocD ) { if( XmyprocD == XprocD ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, XmyprocR, YprocD ); else if( YmyprocD == YprocD ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocR, XprocD ); } } else { /* * Every process in those rows needs the answer */ top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocD == XprocD ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocR, XprocD ); } } } else { /* * sub( Y ) is a column vector */ if( YisR ) { /* * sub( Y ) resides in every process column */ top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocD == XprocD ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocR, XprocD ); } else if( XprocD != YprocR ) { /* * sub( Y ) resides in process column YprocR */ if( XmyprocD == XprocD ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, XmyprocR, YprocR ); if( YmyprocR == YprocR ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocR, XprocD ); } } } else { /* * sub( X ) resides in one process ( XprocR, XprocD ) */ if( YisD && YisOne ) { /* * sub( Y ) resides in one process ( YprocR, YprocD ) if it is not replicated, * and in one process column ( *, YprocD ) otherwise */ if( ( XprocD != YprocD ) || ( !YisR && ( XprocR != YprocR ) ) ) { /* * ( XprocR, XprocD ) sends DOT to ( YprocR, YprocD ) if sub( Y ) is not repli- * cated, and to ( XprocR, YprocD ) otherwise */ rdst = ( YisR ? XprocR : YprocR ); if( ( XmyprocR == XprocR ) && ( XmyprocD == XprocD ) ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, rdst, YprocD ); if( ( YmyprocR == rdst ) && ( YmyprocD == YprocD ) ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XprocD ); } if( YisR && ( YmyprocD == YprocD ) ) { /* * Broadcast DOT within process column owning sub( Y ) */ top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, YprocD ); } } else if( !YisR ) { /* * sub( Y ) resides in one process row or column */ if( YisRow ) { Yscope = CROW; rdst = YprocR; cdst = XprocD; } else { Yscope = CCOLUMN; rdst = XprocR; cdst = YprocR; } /* * ( XprocR, XprocD ) sends DOT to ( YprocR, XprocD ) if sub( Y ) is a row * vector and to ( XprocR, YprocR ) otherwise. If RRorCC, then YisRow and the * send occurs iff XprocR != YprocR; Otherwise !YisRow, and the send occurs * iff XprocD is not YprocR. */ if( ( RRorCC && ( XprocR != YprocR ) ) || ( !( RRorCC ) && ( XprocD != YprocR ) ) ) { if( ( XmyprocR == XprocR ) && ( XmyprocD == XprocD ) ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, rdst, cdst ); if( ( myrow == rdst ) && ( mycol == cdst ) ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XprocD ); } /* * Broadcast the result in sub( Y )'s scope */ if( ( myrow == rdst ) && ( mycol == cdst ) ) { top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET ); TYPE->Cgebs2d( ctxt, &Yscope, top, 1, 1, DOT, 1 ); } else if( ( YisRow && ( myrow == rdst ) ) || ( !( YisRow ) && ( mycol == cdst ) ) ) { top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET ); TYPE->Cgebr2d( ctxt, &Yscope, top, 1, 1, DOT, 1, rdst, cdst ); } } else { /* * Every process in the grid needs the answer */ top = PB_Ctop( &ctxt, BCAST, ALL, TOP_GET ); if( ( XmyprocR == XprocR ) && ( XmyprocD == XprocD ) ) { TYPE->Cgebs2d( ctxt, ALL, top, 1, 1, DOT, 1 ); } else { TYPE->Cgebr2d( ctxt, ALL, top, 1, 1, DOT, 1, XprocR, XprocD ); } } } } else { /* * If XisR, then the result has already been sent in every process of the grid */ if( XisR ) return; if( RRorCC ) { /* * If YisD && YisOne => YisRow => XisRow, communication orthogonal to sub( X )'s * direction: only process column YprocD is involved. */ if( YisD && YisOne && ( YmyprocD != YprocD ) ) return; if( YisR ) { /* * YisR and sub( Y ) is // to sub( X ) => bcast orthogonal to sub( X ) direction */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocD, XprocR ); } } else if( XprocR != YprocR ) { /* * Send from one column/row to another if they differ */ if( XisRow ) { if( XmyprocR == XprocR ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YprocR, YmyprocD ); if( YmyprocR == YprocR ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { if( XmyprocR == XprocR ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YmyprocD, YprocR ); if( YmyprocR == YprocR ) TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocD, XprocR ); } } } else { /* * If XisRow then !YisRow and thus bcast result in all rows if YisR or in * process row YprocR otherwise. If !YisD || ( YisD && !YisOne ), then result * should be sent in the same processes because they span a row or a column of * the grid. */ if( XisRow || !( YisD ) || ( YisD && !( YisOne ) ) ) { if( YisR || YmyprocR == YprocR ) { if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocD, XprocR ); } } } else if( XprocR != YprocD ) { /* * YisD && YisOne => YisRow => !XisRow, so the column of processes owning * sub( X ) send the result to the column YprocD. The process rows involved * in the operation depend on YisR. */ if( YisR || YmyprocR == YprocR ) { if( XmyprocR == XprocR ) { TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YmyprocR, YprocD ); } if( YmyprocD == YprocD ) { TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, YmyprocR, XprocR ); } } } } } /* * End of PB_Cpdot11 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_topset_.c0000640000175000017500000000441410363532303020031 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_topset_( int * ICTXT, F_CHAR_T OP, F_CHAR_T SCOPE, F_CHAR_T TOP ) #else void PB_topset_( ICTXT, OP, SCOPE, TOP ) /* * .. Scalar Arguments .. */ int * ICTXT; /* * .. Array Arguments .. */ F_CHAR_T OP, SCOPE, TOP; #endif { /* * Purpose * ======= * * PB_topset_ initializes the row-, column- or all- broadcast and combi- * ne topologies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * OP (global input) pointer to CHAR * On input, OP specifies the BLACS operation defined as fol- * lows: * OP = 'B' or 'b', BLACS broadcast operation, * OP = 'C' or 'c', BLACS combine operation. * * SCOPE (global input) pointer to CHAR * On entry, SCOPE specifies the scope of the BLACS operation as * follows: * SCOPE = 'R' or 'r', rowwise broadcast or combine, * SCOPE = 'C' or 'c', column broadcast or combine, * SCOPE = 'A' or 'a', all broadcast or combine. * * TOP (global input) pointer to CHAR * On entry, TOP is a character string specifying the BLACS to- * pology to be used i.e. to be set for the given operation * specified by OP and SCOPE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ if( * F2C_CHAR( TOP ) != '!' ) (void) PB_Ctop( ICTXT, F2C_CHAR( OP ), F2C_CHAR( SCOPE ), F2C_CHAR( TOP ) ); /* * End of PB_topset_ */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpgemmAC.c0000640000175000017500000006247610363532303020004 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpgemmAC( PBTYP_T * TYPE, char * DIRECA, char * DIRECC, char * TRANSA, char * TRANSB, int M, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpgemmAC( TYPE, DIRECA, DIRECC, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * DIRECA, * DIRECC, * TRANSA, * TRANSB; int IA, IB, IC, JA, JB, JC, K, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_CpgemmAC performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * conjg(A(IA:IA+K-1,JA:JA+M-1)') if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * conjg(B(IB:IB+N-1,JB:JB+K-1)') if TRANSB = 'C'. * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * This is the inner-product algorithm using the logical LCM algorithmic * blocking technique. The submatrix operand sub( B ) stays in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be looped over as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * DIRECC (global input) pointer to CHAR * On entry, DIRECC specifies the direction in which the rows * or columns of sub( C ) should be looped over as follows: * DIRECC = 'F' or 'f' forward or increasing, * DIRECC = 'B' or 'b' backward or decreasing. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * TRANSB (global input) pointer to CHAR * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * TRANSB = 'C' or 'c' op( sub( B ) ) = conjg( sub( B )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Aroc, GemmTa, GemmTb, TrA, TrB, * one, * talpha, * tbeta, top, * zero; int Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff, ArocD, ArocR, AsrcR, Asrc_, Bcol, Bii, Bimb1, Binb1, Bjj, Bld, Bm, Bmb, Bmp, Bn, Bnb, Bnq, Brow, Cbufld, Ccol, Ccurrow, Cfr, Cfwd, Cii, Cimb, Cimb1, Cinb, Cinb1, CisR, Cjj, Ckk, Cld, Cmb, Cmp, Cnb, Cnq, Coff, Crow, Csrc, WAfr, WCfr, WCsum, ctxt, lcmb, m, maxp, maxpm1, maxq, mb, mbb, mycol, myrow, ncpq, nota, notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ PB_VM_T VM; int Bd0[DLEN_], DBUFA[DLEN_], DBUFC[DLEN_], WAd[DLEN_], WCd[DLEN_]; char * Abuf = NULL, * Bptr = NULL, * Cbuf = NULL, * WA = NULL, * WC = NULL; /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); Cfwd = ( Mupcase( DIRECC[0] ) == CFORWARD ); nota = ( ( TrA = Mupcase( TRANSA[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( TRANSB[0] ) ) == CNOTRAN ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; mb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ), sub( B ) and sub( C ) */ if( nota ) { AiD = JA; AiR = IA; Asrc_ = RSRC_; Aroc = CROW; AinbR = DESCA[IMB_ ]; AinbD = DESCA[INB_]; AnbR = DESCA[MB_ ]; AnbD = DESCA[NB_ ]; AsrcR = DESCA[Asrc_]; Ald = DESCA[LLD_]; AmyprocD = mycol; AnprocsD = npcol; AmyprocR = myrow; AnprocsR = nprow; PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); } else { AiD = IA; AiR = JA; Asrc_ = CSRC_; Aroc = CCOLUMN; AinbD = DESCA[IMB_ ]; AinbR = DESCA[INB_]; AnbD = DESCA[MB_ ]; AnbR = DESCA[NB_ ]; AsrcR = DESCA[Asrc_]; Ald = DESCA[LLD_]; AmyprocD = myrow; AnprocsD = nprow; AmyprocR = mycol; AnprocsR = npcol; PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); } Ainb1D = PB_Cfirstnb( K, AiD, AinbD, AnbD ); AnpD = PB_Cnumroc( K, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD ); Ainb1R = PB_Cfirstnb( M, AiR, AinbR, AnbR ); Cimb = DESCC[IMB_ ]; Cinb = DESCC[INB_]; Cmb = DESCC[MB_ ]; Cnb = DESCC[NB_ ]; Csrc = DESCC[RSRC_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Crow, &Ccol ); Cimb1 = PB_Cfirstnb( M, IC, Cimb, Cmb ); Cinb1 = PB_Cfirstnb( N, JC, Cinb, Cnb ); Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol ); /* * Retrieve the BLACS combine topology, compute conjugate of alpha for the * conjugate transpose case and set the transpose parameters to be passed to * the BLAS matrix multiply routine. */ if( notb ) { Bm = K; Bn = N; top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); talpha = ALPHA; GemmTa = ( nota ? CTRAN : TrA ); GemmTb = CNOTRAN; } else { Bm = N; Bn = K; top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( TrB == CCOTRAN ) { talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); GemmTb = ( ( TrA == CCOTRAN ) ? CTRAN : CCOTRAN ); } else { talpha = ALPHA; GemmTb = ( ( TrA == CCOTRAN ) ? CCOTRAN : CTRAN ); } GemmTa = CNOTRAN; } /* * Compute descriptor Bd0 for sub( B ) */ PB_Cdescribe( Bm, Bn, IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Bld, &Bimb1, &Binb1, &Bmb, &Bnb, &Brow, &Bcol, Bd0 ); Bmp = PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow ); Bnq = PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr = Mptr( B, Bii, Bjj, Bld, size ); /* * When sub( A ) is not replicated and backward pass on sub( A ), find the * virtual process q owning the last row or column of sub( A ). */ if( !( AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ) ) && !Afwd ) { tmp = PB_Cindxg2p( M - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR ); q = MModSub( tmp, ArocR, AnprocsR ); } /* * When sub( C ) is not replicated and backward pass on sub( C ), find the * virtual process p owning the last row or column of sub( C ). */ if( !( CisR = ( ( Crow < 0 ) || ( nprow == 1 ) ) ) && !Cfwd ) { tmp = PB_Cindxg2p( M - 1, Cimb1, Cmb, Crow, Crow, nprow ); p = MModSub( tmp, Crow, nprow ); } /* * Loop over the virtual process grid induced by the rows or columns of * sub( A ) and sub( C ). */ lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : nprow ) ) * Cmb, ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR ); m = M; maxpm1 = maxp - 1; while( m > 0 ) { /* * Initialize local virtual matrix in process (p,q) */ AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, AnprocsR ) ); Akk = PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, AnprocsR ); AnpR = PB_Cnumroc( M, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); Ccurrow = ( CisR ? -1 : MModAdd( Crow, p, nprow ) ); Ckk = PB_Cg2lrem( IC, Cimb, Cmb, Ccurrow, Csrc, nprow ); Cmp = PB_Cnumroc( M, 0, Cimb1, Cmb, Ccurrow, Crow, nprow ); PB_CVMinit( &VM, 0, Cmp, AnpR, Cimb1, Ainb1R, Cmb, AnbR, p, q, maxp, maxq, lcmb ); /* * Find how many diagonals in this virtual process */ npq = PB_CVMnpq( &VM ); m -= npq; /* * Re-adjust the number of rows or columns to be (un)packed, in order to * average the message sizes. */ if( npq ) mbb = npq / ( ( npq - 1 ) / mb + 1 ); while( npq ) { mbb = MIN( mbb, npq ); /* * Find out how many rows or columns of sub( A ) and sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); if( nota ) { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < mbb ) ) != 0 ) { /* * If rows of sub( A ) are not contiguous, then allocate the buffer and * pack the mbb rows of sub( A ). */ Abufld = mbb; if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * mbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, mbb, AnpD, one, Mptr( A, Akk, AiiD, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size ); } PB_Cdescset( DBUFA, mbb, K, mbb, Ainb1D, mbb, AnbD, AcurrocR, ArocD, ctxt, Abufld ); } else { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < mbb ) ) != 0 ) { /* * If columns of sub( A ) are not contiguous, then allocate the buffer and pack * the mbb columns of sub( A ). */ Abufld = MAX( 1, AnpD ); if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * mbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, mbb, AnpD, one, Mptr( A, AiiD, Akk, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size ); } PB_Cdescset( DBUFA, K, mbb, Ainb1D, mbb, AnbD, mbb, ArocD, AcurrocR, ctxt, Abufld ); } if( notb ) { /* * Replicate this panel of rows or columns of sub( A ) over sub( B ) -> WA */ PB_CInV( TYPE, NOCONJG, COLUMN, Bm, Bn, Bd0, mbb, Abuf, 0, 0, DBUFA, &Aroc, &WA, WAd, &WAfr ); /* * Allocate space for temporary results in scope of sub( B ) -> WC */ PB_COutV( TYPE, ROW, INIT, Bm, Bn, Bd0, mbb, &WC, WCd, &WCfr, &WCsum ); /* * Local matrix-matrix multiply iff I own some data */ if( Bmp > 0 && Bnq > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &mbb, &Bnq, &Bmp, talpha, WA, &WAd[LLD_], Bptr, &Bld, zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Accumulate the intermediate results in WC */ if( WCsum ) { WCd[RSRC_] = Ccurrow; if( Bnq > 0 ) gsum2d( ctxt, COLUMN, &top, mbb, Bnq, WC, WCd[LLD_], WCd[RSRC_], mycol ); } /* * Compute the descriptor DBUFC for the buffer that will contained the packed * rows of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < mbb ) ) != 0 ) { /* * If rows of sub( C ) are not contiguous, then allocate the buffer */ Cbufld = mbb; tbeta = zero; if( CisR || ( myrow == Ccurrow ) ) Cbuf = PB_Cmalloc( Cnq * mbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( myrow == Ccurrow ) ) Cbuf = Mptr( C, Ckk+Coff, Cjj, Cld, size ); } PB_Cdescset( DBUFC, mbb, N, mbb, Cinb1, mbb, Cnb, Ccurrow, Ccol, ctxt, Cbufld ); /* * Cbuf := Cbuf + WC */ PB_Cpaxpby( TYPE, NOCONJG, mbb, N, one, WC, 0, 0, WCd, ROW, tbeta, Cbuf, 0, 0, DBUFC, ROW ); /* * Unpack the mbb rows of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( myrow == Ccurrow ) ) ) { PB_CVMpack( TYPE, &VM, ROW, ROW, UNPACKING, NOTRAN, mbb, Cnq, BETA, Mptr( C, Ckk, Cjj, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } if( WCfr ) free( WC ); } else { /* * Replicate this panel of rows or columns of sub( A ) over sub( B ) -> WA */ PB_CInV( TYPE, NOCONJG, ROW, Bm, Bn, Bd0, mbb, Abuf, 0, 0, DBUFA, &Aroc, &WA, WAd, &WAfr ); /* * Allocate space for temporary results in scope of sub( A ) -> WC */ PB_COutV( TYPE, COLUMN, INIT, Bm, Bn, Bd0, mbb, &WC, WCd, &WCfr, &WCsum ); /* * Local matrix-matrix multiply iff I own some data */ if( Bmp > 0 && Bnq > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Bmp, &mbb, &Bnq, talpha, Bptr, &Bld, WA, &WAd[LLD_], zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Accumulate the intermediate results in WC */ if( WCsum ) { WCd[CSRC_] = 0; if( Bmp > 0 ) gsum2d( ctxt, ROW, &top, Bmp, mbb, WC, WCd[LLD_], myrow, WCd[CSRC_] ); } /* * Compute the descriptor DBUFC for the buffer that will contained the packed * rows of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < mbb ) ) != 0 ) { /* * If rows of sub( C ) are not contiguous, then allocate the buffer */ Cbufld = mbb; tbeta = zero; if( CisR || ( myrow == Ccurrow ) ) Cbuf = PB_Cmalloc( Cnq * mbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( myrow == Ccurrow ) ) Cbuf = Mptr( C, Ckk+Coff, Cjj, Cld, size ); } PB_Cdescset( DBUFC, mbb, N, mbb, Cinb1, mbb, Cnb, Ccurrow, Ccol, ctxt, Cbufld ); /* * Cbuf := Cbuf + WC' */ PB_Cpaxpby( TYPE, ( TrB == CCOTRAN ? CONJG : NOCONJG ), N, mbb, one, WC, 0, 0, WCd, COLUMN, tbeta, Cbuf, 0, 0, DBUFC, ROW ); /* * Unpack the mbb rows of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( myrow == Ccurrow ) ) ) { PB_CVMpack( TYPE, &VM, ROW, ROW, UNPACKING, NOTRAN, mbb, Cnq, BETA, Mptr( C, Ckk, Cjj, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } if( WCfr ) free( WC ); } /* * Update the local indexes of sub( B ) and sub( C ) */ PB_CVMupdate( &VM, mbb, &Ckk, &Akk ); npq -= mbb; } /* * Go to next or previous virtual process row or column */ if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) ) q = ( Afwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( Cfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); } if( TrB == CCOTRAN ) free( talpha ); /* * End of PB_CpgemmAC */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsymv.c0000640000175000017500000002233410363532303020034 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsymv( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR ) #else void PB_Ctzsymv( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Ctzsymv performs the matrix-vector operation * * y := alpha * A * x + y, * * where alpha is a scalar, x and y are n element vectors and A is an m * by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YC (input/output) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. On exit, YC is overwritten by the partially * updated vector y. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * YR (input/output) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. On exit, YR is overwritten by the partially * updated vector y. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, ione=1, j1, m1, mn, n1, size; GEMV_T gemv; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemv = TYPE->Fgemv; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( TRAN ), &M, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsymv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YC, i1, 0, LDYC, size ), &ione ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, Mptr( YC, i1, 0, LDYC, size ), &ione ); gemv( C2F_CHAR( TRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YR, 0, j1, LDYR, size ), &LDYR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemv = TYPE->Fgemv; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( TRAN ), &m1, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } TYPE->Fsymv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( XC, m1, 0, LDXC, size ), &ione, one, Mptr( YC, m1, 0, LDYC, size ), &ione ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, YC, &ione ); gemv( C2F_CHAR( TRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, XC, &ione, one, Mptr( YR, 0, j1, LDYR, size ), &LDYR ); } } else { one = TYPE->one; gemv = TYPE->Fgemv; gemv( C2F_CHAR( NOTRAN ), &M, &N, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( TRAN ), &M, &N, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } /* * End of PB_Ctzsymv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzher2k.c0000640000175000017500000002231510363532303020050 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzher2k( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * AC, int LDAC, char * BC, int LDBC, char * AR, int LDAR, char * BR, int LDBR, char * C, int LDC ) #else void PB_Ctzher2k( TYPE, UPLO, M, N, K, IOFFD, ALPHA, AC, LDAC, BC, LDBC, AR, LDAR, BR, LDBR, C, LDC ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDAC, LDAR, LDBC, LDBR, LDC, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * AC, * AR, * BC, * BR, * C; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctzher2k performs the trapezoidal symmetric or Hermitian rank k * operation: * * C := alpha * AC * AR + C, * * where alpha is a scalar, AC is an m by k matrix, AR is an k by n ma- * trix and C is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix C is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of C is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of C is referenced, * * otherwise all of the matrix C is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix C. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix C. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of columns of the matrix AC, * and the number of rows of the matrix AR. K must be at least * zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of C as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal C( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal C( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal C( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * AC (input) pointer to CHAR * On entry, AC is an array of dimension (LDAC,K) containing the * m by k matrix AC. * * LDAC (input) INTEGER * On entry, LDAC specifies the leading dimension of the array * AC. LDAC must be at least max( 1, M ). * * AR (input) pointer to CHAR * On entry, AR is an array of dimension (LDAR,N) containing the * k by n matrix AR. * * LDAR (input) INTEGER * On entry, LDAR specifies the leading dimension of the array * AR. LDAR must be at least K. * * C (input/output) pointer to CHAR * On entry, C is an array of dimension (LDC,N) containing the m * by n matrix A. Only the trapezoidal part of C determined by * UPLO and IOFFD is updated. * * LDC (input) INTEGER * On entry, LDC specifies the leading dimension of the array C. * LDC must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Calph, * one, type; int i1, j1, m1, mn, n1, size; cmplx Calph8; cmplx16 Calph16; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; type = TYPE->type; if( type == SCPLX ) { Calph = ( (char *)(Calph8 ) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else if( type == DCPLX ) { Calph = ( (char *)(Calph16) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else { Calph = ALPHA; } if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, Calph, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fher2k( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( BC, i1, 0, LDBC, size ), &LDBC, one, Mptr( C, i1, j1, LDC, size ), &LDC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, Calph, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, Calph, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } TYPE->Fher2k( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, m1, 0, LDAC, size ), &LDAC, Mptr( BC, m1, 0, LDBC, size ), &LDBC, one, Mptr( C, m1, j1, LDC, size ), &LDC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, Calph, BC, &LDBC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, ALPHA, AC, &LDAC, BR, &LDBR, one, C, &LDC ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, Calph, BC, &LDBC, AR, &LDAR, one, C, &LDC ); } /* * End of PB_Ctzher2k */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpdotND.c0000640000175000017500000006751010363532303017655 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpdotND( PBTYP_T * TYPE, int N, char * DOT, char * X, int IX, int JX, int * DESCX, int INCX, char * Y, int IY, int JY, int * DESCY, int INCY, VVDOT_T FDOT ) #else void PB_CpdotND( TYPE, N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, FDOT ) /* * .. Scalar Arguments .. */ int INCX, INCY, IX, IY, JX, JY, N; char * DOT; PBTYP_T * TYPE; VVDOT_T FDOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CpdotND forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ) or DOT := sub( X )**H * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * sub( X ) is assumed to be not distributed, and sub( Y ) is assumed to * be distributed. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) pointer to CHAR * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * FDOT (local input) pointer to a function of type VVDOT * On entry, FDOT points to a subroutine that computes the local * dot product of two vectors. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * top; int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnprocsD, XnprocsR, XprocR, Xroc, Xrow, Ycol, Yii, Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size; /* * .. Local Arrays .. */ char * Xptr = NULL, * Yptr = NULL, * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 ) { Xld = DESCX[LLD_]; Xlinc = Xld; XmyprocD = mycol; XnprocsD = npcol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); } else { Xld = DESCX[LLD_]; Xlinc = 1; XmyprocD = myrow; XnprocsD = nprow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 ) { YnbD = DESCY[NB_]; Yld = DESCY[LLD_]; Ylinc = Yld; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol; Yinb1D = PB_Cfirstnb( N, JY, DESCY[INB_], YnbD ); } else { YnbD = DESCY[MB_]; Yld = DESCY[LLD_]; Ylinc = 1; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow; Yinb1D = PB_Cfirstnb( N, IY, DESCY[IMB_], YnbD ); } YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * sub( X ) is not distributed and sub( Y ) is distributed */ if( !( XisR ) ) { /* * sub( X ) is not replicated. Since this operation is local if sub( X ) and * sub( Y ) are both row or column vectors, choose YprocR = XprocR when RRorCC, * and YprocR = 0 otherwise. */ if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); } /* * Now, it is just like sub( Y ) is not replicated, this information however is * kept in YisR for later use. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( XprocR == YprocR ) { /* * sub( X ) and sub( Y ) are in the same process row or column */ if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { size = TYPE->size; YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); /* * In a given process, the dot product is computed with sub( Y ) and the cor- * responding non distributed part of sub( X ). In the other processes, this * part of sub( X ) is simply ignored. */ if( YnpD > 0 ) { Yroc = YprocD; if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; } else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { FDOT( &Yinb1D, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( XisRow ) FDOT( &kbb, DOT, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); else FDOT( &kbb, DOT, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); kk += kbb; } Yroc = MModAdd1( Yroc, YnprocsD ); } } /* * Replicate locally scattered dot product by reducing it */ if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, 1, DOT, 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, 1, 1, DOT, 1, -1, 0 ); } } } else { /* * sub( X ) and sub( Y ) are in a different process row or column */ if( YmyprocR == YprocR ) { size = TYPE->size; YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); /* * If I own a piece of sub( Y ), then send it to the process row or column where * sub( X ) resides and receive the dot product when sub( Y ) is not replicated. */ if( YisRow ) { if( YnpD > 0 ) TYPE->Cgesd2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { if( YnpD > 0 ) TYPE->Cgesd2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocD, XprocR ); } } if( XmyprocR == XprocR ) { size = TYPE->size; YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); /* * If I own sub( X ), then receive the distributed part of sub( Y ) owned by * the process where sub( Y ) resides in my row or column. Compute the partial * dot product as if sub( Y ) would reside in the same process row or column as * sub( X ). Combine the local results. */ if( YnpD > 0 ) { buf = PB_Cmalloc( YnpD * size ); if( YisRow ) TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, XmyprocD ); else TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD, YprocR ); Yroc = YprocD; kk = 0; if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { FDOT( &Yinb1D, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); kk += Yinb1D; } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( XisRow ) FDOT( &kbb, DOT, Mptr( X, Xii, k, Xld, size ), &Xlinc, buf+kk*size, &ione ); else FDOT( &kbb, DOT, Mptr( X, k, Xjj, Xld, size ), &Xlinc, buf+kk*size, &ione ); kk += kbb; } Yroc = MModAdd1( Yroc, YnprocsD ); } if( buf ) free( buf ); } /* * Combine the local results within the process row or column XprocR and * send the result to the process row or column YprocR when sub( Y ) is not * replicated. */ if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, 1, DOT, 1, -1, 0 ); if( !YisR ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, 1, 1, DOT, 1, -1, 0 ); if( !YisR ) TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YmyprocD, YprocR ); } } } if( YisR ) { /* * If sub( Y ) is replicated, then bcast the result */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocD, XprocR ); } } } else { /* * sub( X ) and sub( Y ) are not both row or column vectors */ if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { size = TYPE->size; Xroc = 0; if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } /* * Loop over the processes in which sub( Y ) resides, for each process find the * next process Xroc and compute the dot product. After this, it will be needed * to reduce the local dot produsts as above. */ for( p = 0; p < YnprocsD; p++ ) { mydist = MModSub( p, YprocD, YnprocsD ); myproc = MModAdd( YprocD, mydist, YnprocsD ); if( ( XprocR == p ) && ( YprocR == Xroc ) ) { /* * Compute locally the partial dot product at the intersection of the process * cross. */ if( ( XmyprocR == p ) && ( XmyprocD == Xroc ) ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { Yroc = YprocD; kk = ( XisRow ? Yii : Yjj ); if( myproc == Yroc ) { FDOT( &Yinb1D, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( myproc == Yroc ) { if( XisRow ) FDOT( &kbb, DOT, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); else FDOT( &kbb, DOT, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); kk += kbb; } Yroc = MModAdd1( Yroc, YnprocsD ); } } } } else { /* * Message exchange */ if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { if( XisRow ) TYPE->Cgesd2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, Xroc ); else TYPE->Cgesd2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, Xroc, XprocR ); } } if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { buf = PB_Cmalloc( YnpD * size ); Yroc = YprocD; kk = 0; /* * Receive the piece of sub( Y ) that I should handle */ if( XisRow ) TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, p, YprocR ); else TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, p ); if( myproc == Yroc ) { FDOT( &Yinb1D, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); kk += Yinb1D; } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( myproc == Yroc ) { if( XisRow ) FDOT( &kbb, DOT, Mptr( X, Xii, k, Xld, size ), &Xlinc, buf+kk*size, &ione ); else FDOT( &kbb, DOT, Mptr( X, k, Xjj, Xld, size ), &Xlinc, buf+kk*size, &ione ); kk += kbb; } Yroc = MModAdd1( Yroc, YnprocsD ); } if( buf ) free( buf ); } } } Xroc = MModAdd1( Xroc, XnprocsD ); } /* * Combine the local results in sub( X )'s scope */ if( XmyprocR == XprocR ) { if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, 1, DOT, 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, 1, 1, DOT, 1, -1, 0 ); } } } /* * Broadcast the result in sub( Y )'s scope */ if( YisR || ( YmyprocR == YprocR ) ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, YmyprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, YmyprocR ); } } } } else { /* * sub( X ) is replicated in every process. Compute the local dot product in * process row or column YprocR when sub( Y ) is not replicated and in every * process otherwise. */ if( YisR || ( YmyprocR == YprocR ) ) { size = TYPE->size; Yroc = YprocD; kk = ( YisRow ? Yjj : Yii ); if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { FDOT( &Yinb1D, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( XisRow ) { Xptr = Mptr( X, Xii, k, Xld, size ); } else { Xptr = Mptr( X, k, Xjj, Xld, size ); } if( YisRow ) { Yptr = Mptr( Y, Yii, kk, Yld, size ); } else { Yptr = Mptr( Y, kk, Yjj, Yld, size ); } FDOT( &kbb, DOT, Xptr, &Xlinc, Yptr, &Ylinc ); kk += kbb; } Yroc = MModAdd1( Yroc, YnprocsD ); } } if( YisR ) { /* * sub( Y ) is replicated, combine the results in each process row or column. */ if( YisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, 1, DOT, 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, 1, 1, DOT, 1, -1, 0 ); } } else { /* * sub( Y ) is not replicated, combine the results in the entire grid at once. */ top = PB_Ctop( &ctxt, COMBINE, ALL, TOP_GET ); TYPE->Cgsum2d( ctxt, ALL, top, 1, 1, DOT, 1, -1, 0 ); } } /* * End of PB_CpdotND */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplascal.c0000640000175000017500000003452110363532303020100 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplascal( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA ) #else void PB_Cplascal( TYPE, UPLO, CONJUG, M, N, ALPHA, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ char * CONJUG, * UPLO; int IA, JA, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A; #endif { /* * Purpose * ======= * * PB_Cplascal scales by alpha an m by n submatrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies the part of the submatrix sub( A ) * to be scaled as follows: * = 'L' or 'l': Lower triangular part is scaled; the * strictly upper triangular part of sub( A ) is not changed; * = 'U' or 'u': Upper triangular part is scaled; the * strictly lower triangular part of sub( A ) is not changed; * Otherwise: All of the submatrix sub( A ) is scaled. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies what kind of scaling should be * done as follows: when UPLO is 'L', 'l', 'U' or 'u' and CONJUG * is 'Z' or 'z', alpha is assumed to be real and the imaginary * part of the diagonals are set to zero. Otherwise, alpha is of * the same type as the entries of sub( A ) and nothing particu- * lar is done to the diagonals of sub( A ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant with which the matrix elements are to be scaled. * * A (local input/local output) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be scaled. On exit, the * local entries of this array corresponding to the to the en- * tries of the submatrix sub( A ) are overwritten by the local * entries of the m by n scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, herm, type; int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, ctxt, izero=0, k, kb, ktmp, mn, mycol, myrow, nb, npcol, nprow, size; TZSCAL_T scal; /* * .. Local Arrays .. */ int Ad0[DLEN_]; char * Aptr = NULL; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * If alpha is zero, then call PB_Cplapad instead. */ type = TYPE->type; UploA = Mupcase( UPLO[0] ); herm = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) ); if( type == SREAL ) { if( ((float*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return; } else if( type == DREAL ) { if( ((double*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return; } else if( type == SCPLX ) { if( herm == CCONJG ) { if( ((float*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } } else { if( ((float*)(ALPHA))[IMAG_PART] == ZERO ) { if( ((float*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return; } } } else if( type == DCPLX ) { if( herm == CCONJG ) { if( ((double*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } } else { if( ((double*)(ALPHA))[IMAG_PART] == ZERO ) { if( ((double*)(ALPHA))[REAL_PART] == ZERO ) { PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA, JA, DESCA ); return; } else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return; } } } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Quick return if I don't own any of sub( A ). */ Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; size = TYPE->size; scal = ( herm == CCONJG ? TYPE->Fhescal : TYPE->Ftzscal ); Aptr = Mptr( A, Aii, Ajj, Ald, size ); /* * When the entire sub( A ) needs to be scaled or when sub( A ) is replicated in * all processes, just call the local routine. */ if( ( Mupcase( UPLO[0] ) == CALL ) || ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) { scal( C2F_CHAR( UPLO ), &Amp, &Anq, &izero, ALPHA, Aptr, &Ald ); return; } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and two times the least common multiple of nprow * and npcol. */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); mn = MIN( M, N ); if( Mupcase( UPLO[0] ) == CLOWER ) { /* * Lower triangle of sub( A ): proceed by block of columns. For each block of * columns, operate on the logical diagonal block first and then the remaining * rows of that block of columns. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) ); PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 = Amp - Akp ) > 0 ) scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } else if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Upper triangle of sub( A ): proceed by block of columns. For each block of * columns, operate on the trailing rows and then the logical diagonal block * of that block of columns. When M < N, the last columns of sub( A ) are * handled together. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 ) scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 ); } if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } else { /* * All of sub( A ): proceed by block of columns. For each block of columns, * operate on the trailing rows, then the logical diagonal block, and finally * the remaining rows of that block of columns. When M < N, the last columns * of sub( A ) are handled together. */ for( k = 0; k < mn; k += nb ) { kb = mn - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 ) scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cplasca2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, Aptr, k, k, Ad0 ); Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp0 = Amp - Akp ) > 0 ) scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } /* * End of PB_Cplascal */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMpack.c0000640000175000017500000005737710363532303017660 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_CVMpack( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS, char * UNPA, char * TRANS, int MN, int K, char * ALPHA, char * A, int LDA, char * BETA, char * B, int LDB ) #else int PB_CVMpack( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A, LDA, BETA, B, LDB ) /* * .. Scalar Arguments .. */ int K, LDA, LDB, MN; char * ALPHA, * BETA; /* * .. Array Arguments .. */ char * VROCS, * ROCS, * UNPA, * TRANS; PBTYP_T * TYPE; PB_VM_T * VM; char * A, * B; #endif { /* * Purpose * ======= * * PB_CVMpack packs a one-dimensional distributed array A into B, or * unpacks an array B into a one-dimensional distributed array A. This * operation is triggered by a virtual distributed array. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (see pblas.h). * * VM (local input) pointer to a VM structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * VROCS (local input) pointer to CHAR * On entry, VROCS specifies if the rows or columns of the vir- * tual distributed array grid should be used for the packing or * unpacking operation as follows: * VROCS = 'R' or 'r', the rows should be used, * VROCS = 'C' or 'c', the columns should be used. * * ROCS (local input) pointer to CHAR * On entry, ROCS specifies if rows or columns should be used * packed or unpacked as follows: * ROCS = 'R' or 'r', rows should be (un)packed, * ROCS = 'C' or 'c', columns should be (un)packed. * * UNPA (local input) pointer to CHAR * On entry, UNPA specifies if the data should be packed or un- * packed as follows: * UNPA = 'P' or 'p', packing, * UNPA = 'U' or 'u', unpacking. * * TRANS (local input) pointer to CHAR * On entry, TRANS specifies if conjugation, transposition or * conjugate transposition should occur during the (un)packing * operation as follows: * TRANS = 'N' or 'n', natural (un)packing, * TRANS = 'Z' or 'z', conjugated (un)packing, * TRANS = 'T' or 't', transposed (un)packing, * TRANS = 'C' or 'c', conjugate transposed (un)packing. * * MN (local input) INTEGER * On entry, MN specifies the length of the distributed dimen- * sion to be (un)packed. MN must be at least zero. * * K (local input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension to be (un)packed. K must be at least zero. * * ALPHA (local input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (local input/local output) pointer to CHAR * On entry, A points to an array of dimension (LDA, Ka), where * Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c', * Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and * when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This * array contains unpacked data. * * LDA (local input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and * MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and * VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB ) * when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'. * * BETA (local input) pointer to CHAR * On entry, BETA specifies the scalar beta. * * B (local input/local output) pointer to CHAR * On entry, B points to an array of dimension (LDB,*). When * ROCS is 'C' or 'c', and TRANS is 'N', 'n', 'Z' or 'Z', B * points to an K by MN array. When TRANS is 'T', 't', 'C' or * 'c', B points to an MN by K array. When ROCS is 'R' or 'r', * and TRANS is 'T', 't', 'C' or 'c', B points to an K by MN ar- * ray. When TRANS is 'N', 'n', 'Z' or 'z', B points to an MN by * K array. This array contains the packed data. * * LDB (local input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb, lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc, * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0, nprow, pmb, qnb, rows, size, tmp1, tmp2, upp; char * aptrd; MMADD_T add; /* .. * .. Executable Statements .. * */ mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks. */ if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 ); /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp; iupp = VM->iupp; nprow = VM->nprow; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low; ilow = VM->ilow; npcol = VM->npcol; if( Mupcase( UNPA[0] ) == CPACKING ) { /* * B is the target packed buffer, A is the distributed source */ if( Mupcase( TRANS[0] ) == CNOTRAN ) { /* * Add A to B */ notran = 1; add = TYPE->Fmmadd; } else if( Mupcase( TRANS[0] ) == CCONJG ) { /* * Add the conjugate of A to B */ notran = 1; add = TYPE->Fmmcadd; } else if( Mupcase( TRANS[0] ) == CTRAN ) { /* * Add the tranpose of A to B */ notran = 0; add = TYPE->Fmmtadd; } else { /* * Add the conjugate tranpose of A to B */ notran = 0; add = TYPE->Fmmtcadd; } } else { /* * B is the source packed buffer, A is the distributed target */ if( Mupcase( TRANS[0] ) == CNOTRAN ) { /* * Add B to A */ notran = 1; add = TYPE->Fmmdda; } else if( Mupcase( TRANS[0] ) == CCONJG ) { /* * Add the conjugate of B to A */ notran = 1; add = TYPE->Fmmddac; } else if( Mupcase( TRANS[0] ) == CTRAN ) { /* * Add the tranpose of B to A */ notran = 0; add = TYPE->Fmmddat; } else { /* * Add the conjugate tranpose of B to A */ notran = 0; add = TYPE->Fmmddact; } } size = TYPE->size; rows = ( Mupcase( ROCS[0] ) == CROW ); if( Mupcase( VROCS[0] ) == CROW ) { /* * (un)packing using rows of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A. */ inca = size; incb = ( notran ? size : LDB * size ); m = &tmp2; n = &K; } else { /* * (un)packing columns of k by mn array A */ inca = LDA * size; incb = ( notran ? LDB * size : size ); m = &K; n = &tmp2; } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( mblks < 2 ) ? imbloc : imbloc + ( mblks - 2 ) * mb + lmbloc ); npq = MIN( npq, kb ); if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB ); else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); B += tmp2 * incb; /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to A. The pointer to B remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to A accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; A += mb * inca; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; aptrd = A; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to B. The pointer to A remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * B accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; B += tmp2 * incb; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; A += mb*inca; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; aptrd = A; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } else { /* * (un)packing using columns of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A */ inca = size; incb = ( notran ? size : LDB * size ); m = &tmp2; n = &K; } else { /* * (un)packing columns of k by mn array A */ inca = LDA * size; incb = ( notran ? LDB * size : size ); m = &K; n = &tmp2; } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( nblks < 2 ) ? inbloc : inbloc + ( nblks - 2 ) * nb + lnbloc ); npq = MIN( npq, kb ); if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB ); else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); B += tmp2 * incb; /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to B. The pointer to A remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to B accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B, &LDB ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; B += tmp2 * incb; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to A. The pointer to B remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * A accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; A += nb * inca; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; aptrd = A; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; A += nb*inca; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A, &LDA, BETA, B, &LDB ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; B += tmp2 * incb; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; A += nbloc * inca; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } /* * End of PB_CVMpack */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cztypeset.c0000640000175000017500000000643510363532303020353 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" PBTYP_T * PB_Cztypeset() { /* * Purpose * ======= * * PB_Cztypeset on the first call initializes a static structure contai- * ning typed information and returns a pointer to it. The other calls * to this routine just returns this pointer. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static int setup=0; static PBTYP_T TypeStruct; static cmplx16 zero, one, negone; /* .. * .. Executable Statements .. * */ if( setup ) return( &TypeStruct ); setup = 1; TypeStruct.type = DCPLX; TypeStruct.usiz = sizeof( double ); TypeStruct.size = sizeof( cmplx16 ); zero [REAL_PART] = ZERO; zero [IMAG_PART] = ZERO; one [REAL_PART] = ONE; one [IMAG_PART] = ZERO; negone[REAL_PART] = -ONE; negone[IMAG_PART] = ZERO; TypeStruct.zero = ((char *) zero); TypeStruct.one = ((char *) one); TypeStruct.negone = ((char *) negone); TypeStruct.Cgesd2d = Czgesd2d; TypeStruct.Cgerv2d = Czgerv2d; TypeStruct.Cgebs2d = Czgebs2d; TypeStruct.Cgebr2d = Czgebr2d; TypeStruct.Cgsum2d = Czgsum2d; TypeStruct.Fmmadd = zmmadd_; TypeStruct.Fmmcadd = zmmcadd_; TypeStruct.Fmmtadd = zmmtadd_; TypeStruct.Fmmtcadd = zmmtcadd_; TypeStruct.Fmmdda = zmmdda_; TypeStruct.Fmmddac = zmmddac_; TypeStruct.Fmmddat = zmmddat_; TypeStruct.Fmmddact = zmmddact_; TypeStruct.Fcshft = zcshft_; TypeStruct.Frshft = zrshft_; TypeStruct.Fvvdotu = zvvdotu_; TypeStruct.Fvvdotc = zvvdotc_; TypeStruct.Fset = zset_; TypeStruct.Ftzpad = ztzpad_; TypeStruct.Ftzpadcpy = ztzpadcpy_; TypeStruct.Ftzscal = ztzscal_; TypeStruct.Fhescal = zhescal_; TypeStruct.Ftzcnjg = ztzcnjg_; TypeStruct.Faxpy = zaxpy_; TypeStruct.Fcopy = zcopy_; TypeStruct.Fswap = zswap_; TypeStruct.Fgemv = zgemv_; TypeStruct.Fsymv = zsymv_; TypeStruct.Fhemv = zhemv_; TypeStruct.Ftrmv = ztrmv_; TypeStruct.Ftrsv = ztrsv_; TypeStruct.Fagemv = zagemv_; TypeStruct.Fasymv = zasymv_; TypeStruct.Fahemv = zahemv_; TypeStruct.Fatrmv = zatrmv_; TypeStruct.Fgerc = zgerc_; TypeStruct.Fgeru = zgeru_; TypeStruct.Fsyr = zsyr_; TypeStruct.Fher = zher_; TypeStruct.Fsyr2 = zsyr2_; TypeStruct.Fher2 = zher2_; TypeStruct.Fgemm = zgemm_; TypeStruct.Fsymm = zsymm_; TypeStruct.Fhemm = zhemm_; TypeStruct.Fsyrk = zsyrk_; TypeStruct.Fherk = zherk_; TypeStruct.Fsyr2k = zsyr2k_; TypeStruct.Fher2k = zher2k_; TypeStruct.Ftrmm = ztrmm_; TypeStruct.Ftrsm = ztrsm_; return( &TypeStruct ); /* * End of PB_Cztypeset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpgeadd.c0000640000175000017500000005341710363532303017712 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpgeadd( PBTYP_T * TYPE, char * DIRECA, char * DIRECC, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_Cpgeadd( TYPE, DIRECA, DIRECC, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECA, * DIRECC; int IA, IC, JA, JC, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCC; char * A, * C; #endif { /* * Purpose * ======= * * PB_Cpgeadd adds a matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = conjg( X ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if CONJUG = 'N', * conjg(A(IA:IA+N-1,JA:JA+M-1)) if CONJUG = 'C'. * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be looped over as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * DIRECC (global input) pointer to CHAR * On entry, DIRECC specifies the direction in which the rows * or columns of sub( C ) should be looped over as follows: * DIRECC = 'F' or 'f' forward or increasing, * DIRECC = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( C ) as follows: * CONJUG = 'N' or 'n': * sub( C ) := beta*sub( C ) + alpha*sub( A )' * otherwise * sub( C ) := beta*sub( C ) + alpha*conjg( sub( A ) )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrices * sub( A ) and sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatri- * ces sub( A ) and sub( C ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char ACroc, * one, * talpha, * tbeta, * zero; int ACmyprocD, ACmyprocR, ACnD, ACnR, ACnprocsD, ACnprocsR, Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AnbD, AnbR, AnpD, AnpR, Aoff, ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr, Cfwd, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R, CisR, Ckk, Cld, CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR, CsrcR, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxpq, maxq, mycol, myrow, npcol, npq, nprow, ncpq, nrpq, p=0, q=0, row2row, size, tmp; PB_VM_T VM; /* * .. Local Arrays .. */ int DBUFA[DLEN_], DBUFC[DLEN_]; char * Abuf = NULL, * Cbuf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Loop over the rows of sub( C ) when M <= N, and the columns of sub( C ) * otherwise. */ row2row = ( ( M <= N ) || ( npcol == 1 ) || ( DESCA[CSRC_] == -1 ) ); if( row2row ) { AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_]; CinbR = DESCC[IMB_]; CnbR = DESCC[MB_]; CsrcR = DESCC[RSRC_]; /* * If sub( A ) and sub( C ) span only one process row, then there is no need * to pack the data. */ if( !( PB_Cspan( M, IA, AinbR, AnbR, AsrcR, nprow ) ) && !( PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) ) { PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, ROW, BETA, C, IC, JC, DESCC, ROW ); return; } /* * Compute local information for sub( A ) and sub( C ) */ ACnR = M; ACnD = N; ACmyprocR = myrow; ACnprocsR = nprow; ACmyprocD = mycol; ACnprocsD = npcol; ACroc = CROW; AiR = IA; AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); CiR = IC; CiD = JC; CinbD = DESCC[INB_]; CnbD = DESCC[NB_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD, &CiiR, &CiiD, &CrocR, &CrocD ); } else { AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_]; CinbR = DESCC[INB_]; CnbR = DESCC[NB_]; CsrcR = DESCC[CSRC_]; /* * If sub( A ) and sub( C ) span only one process column, then there is no need * to pack the data. */ if( !( PB_Cspan( N, JA, AinbR, AnbR, AsrcR, npcol ) ) && !( PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) ) { PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, COLUMN, BETA, C, IC, JC, DESCC, COLUMN ); return; } /* * Compute local information for sub( A ) and sub( C ) */ ACnR = N; ACnD = M; ACmyprocR = mycol; ACnprocsR = npcol; ACmyprocD = myrow; ACnprocsD = nprow; ACroc = CCOLUMN; AiR = JA; AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); CiR = JC; CiD = IC; CinbD = DESCC[IMB_]; CnbD = DESCC[MB_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR, &CiiD, &CiiR, &CrocD, &CrocR ); } size = TYPE->size; one = TYPE->one; zero = TYPE->zero; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); Ainb1D = PB_Cfirstnb( ACnD, AiD, AinbD, AnbD ); AnpD = PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, ACmyprocD, ArocD, ACnprocsD ); Ainb1R = PB_Cfirstnb( ACnR, AiR, AinbR, AnbR ); AisR = ( ( AsrcR < 0 ) || ( ACnprocsR == 1 ) ); Cinb1D = PB_Cfirstnb( ACnD, CiD, CinbD, CnbD ); CnpD = PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, ACmyprocD, CrocD, ACnprocsD ); Cinb1R = PB_Cfirstnb( ACnR, CiR, CinbR, CnbR ); CisR = ( ( CsrcR < 0 ) || ( ACnprocsR == 1 ) ); lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : ACnprocsR ) ) * CnbR, ( maxq = ( AisR ? 1 : ACnprocsR ) ) * AnbR ); Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); Cfwd = ( Mupcase( DIRECC[0] ) == CFORWARD ); /* * When sub( A ) is not replicated and backward pass on sub( A ), find the * virtual process q owning the last row or column of sub( A ). */ if( !( AisR ) && !( Afwd ) ) { tmp = PB_Cindxg2p( ACnR-1, Ainb1R, AnbR, ArocR, ArocR, ACnprocsR ); q = MModSub( tmp, ArocR, ACnprocsR ); } /* * When sub( C ) is not replicated and backward pass on sub( C ), find the * virtual process p owning the last row or column of sub( C ). */ if( !( CisR ) && !( Cfwd ) ) { tmp = PB_Cindxg2p( ACnR-1, Cinb1R, CnbR, CrocR, CrocR, ACnprocsR ); p = MModSub( tmp, CrocR, ACnprocsR ); } /* * Loop over the processes of the virtual grid */ maxpm1 = maxp - 1; maxpq = maxp * maxq; for( k = 0; k < maxpq; k++ ) { AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, ACnprocsR ) ); CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, ACnprocsR ) ); if( ( AisR || ( ACmyprocR == AcurrocR ) ) || ( CisR || ( ACmyprocR == CcurrocR ) ) ) { Ckk = CiiR; Akk = AiiR; /* * Initialize local virtual matrix in process (p,q) */ AnpR = PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, ACnprocsR ); CnpR = PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, ACnprocsR ); PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q, maxp, maxq, lcmb ); /* * Figure out how many diagonal entries in this new virtual process (npq). */ npq = PB_CVMnpq( &VM ); /* * Re-adjust the number of rows or columns to be (un)packed, in order to average * the message sizes. */ if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 ); if( row2row ) { while( npq ) { kbb = MIN( kbb, npq ); /* * Find out how many rows of sub( A ) and sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); /* * Compute the descriptor DBUFA for the buffer that will contained the packed * rows of sub( A ). */ if( ( Afr = ( ncpq < kbb ) ) != 0 ) { /* * If rows of sub( A ) are not contiguous, then allocate the buffer and pack * the kbb rows of sub( A ). */ Abufld = kbb; if( AisR || ( ACmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * kbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN, kbb, AnpD, one, Mptr( A, Akk, AiiD, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( ACmyprocR == AcurrocR ) ) Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size ); } PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, AcurrocR, ArocD, ctxt, Abufld ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * rows of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < kbb ) ) != 0 ) { /* * If rows of sub( C ) are not contiguous, then allocate receiving buffer. */ Cbufld = kbb; talpha = one; tbeta = zero; if( CisR || ( ACmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * kbb * size ); } else { /* * Otherwise, re-use sub( C ) directly. */ Cbufld = Cld; talpha = ALPHA; tbeta = BETA; if( CisR || ( ACmyprocR == CcurrocR ) ) Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size ); } PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, CcurrocR, CrocD, ctxt, Cbufld ); /* * Add the one-dimensional buffer Abuf into Cbuf. */ PB_Cpaxpby( TYPE, CONJUG, kbb, ACnD, talpha, Abuf, 0, 0, DBUFA, &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc ); /* * Release the buffer containing the packed rows of sub( A ) */ if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Unpack the kbb rows of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb, CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, size ), Cld, ALPHA, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } /* * Update the local row indexes of sub( A ) and sub( C ) */ PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); npq -= kbb; } } else { while( npq ) { kbb = MIN( kbb, npq ); /* * Find out how many columns of sub( A ) and sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < kbb ) ) != 0 ) { /* * If columns of sub( A ) are not contiguous, then allocate the buffer and * pack the kbb columns of sub( A ). */ Abufld = MAX( 1, AnpD ); if( AisR || ( ACmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * kbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN, kbb, AnpD, one, Mptr( A, AiiD, Akk, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( ACmyprocR == AcurrocR ) ) Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size ); } PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD, AcurrocR, ctxt, Abufld ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * columns of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < kbb ) ) != 0 ) { /* * If columns of sub( C ) are not contiguous, then allocate receiving buffer. */ Cbufld = MAX( 1, CnpD ); talpha = one; tbeta = zero; if( CisR || ( ACmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * kbb * size ); } else { Cbufld = Cld; talpha = ALPHA; tbeta = BETA; if( CisR || ( ACmyprocR == CcurrocR ) ) Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size ); } PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD, CcurrocR, ctxt, Cbufld ); /* * Add the one-dimensional buffer Abuf into Cbuf. */ PB_Cpaxpby( TYPE, CONJUG, ACnD, kbb, talpha, Abuf, 0, 0, DBUFA, &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc ); /* * Release the buffer containing the packed columns of sub( A ) */ if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Unpack the kbb columns of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb, CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, size ), Cld, ALPHA, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } /* * Update the local row index of sub( A ) and the local column index of sub( C ) */ PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); npq -= kbb; } } } /* * Go to the next virtual process (p,q) */ if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) ) q = ( Afwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( Cfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); } /* * End of PB_Cpgeadd */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cptrsv.c0000640000175000017500000007760310363532303017647 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cptrsv( PBTYP_T * TYPE, int FBCAST, char * UPLO, char * TRANS, char * DIAG, int N, char * A, int IA, int JA, int * DESCA, char * XC, int INCXC, char * XR, int INCXR ) #else void PB_Cptrsv( TYPE, FBCAST, UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, XC, INCXC, XR, INCXR ) /* * .. Scalar Arguments .. */ char * DIAG, * TRANS, * UPLO; int FBCAST, IA, INCXC, INCXR, JA, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A, * XC, * XR; #endif { /* * Purpose * ======= * * PB_Cptrsv solves one of the systems of equations * * sub( A )*X = b, or sub( A )'*X = b, or conjg( sub( A )' )*X = b, * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). * * b and X are n element subvectors and sub( A ) is an n by n unit, or * non-unit, upper or lower triangular submatrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * FBCAST (global input) INTEGER * On entry, FBCAST specifies whether the transposed of the vec- * tor solution should be broadcast or not when there is a pos- * sible ambiguity, i.e. when sub( A ) is just one block. When * FBCAST is zero, the solution vector is not broadcast, and the * the solution vector is broadcast otherwise. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * X = b, * * TRANS = 'T' or 't' sub( A )' * X = b, * * TRANS = 'C' or 'c' conjg( sub( A )' ) * X = b. * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 0, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * XC (local input/local output) pointer to CHAR * On entry, XC is an array of dimension (LLD_X,Kx), where Kx is * at least 1 and LLD_X is at least Lr( IA, N ). Before entry, * when TRANS is 'N' or 'n' this array contains the local en- * tries of the right-hand-side vector b. When TRANS is not 'N' * or 'n', the entries of XC should be zero. On exit, this array * contains the partial solution vector x. * * INCXC (local input) INTEGER * On entry, INCXC specifies the local increment of the vector * XC. * * XR (local input/local output) pointer to CHAR * On entry, XR is an array of dimension (LLD_X,Kx), where Kx is * least Lc( JA, N ) and LLD_X at least 1. Before entry, when * TRANS is 'N' or 'n' the entries of XR should be zero. Other- * wise this array contains the local entries of the right-hand- * side vector b. On exit, this array contains the partial so- * lution vector x. * * INCXR (local input) INTEGER * On entry, INCXR specifies the local increment of the vector * XR. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char btop, * negone, * one, * zero; int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep, AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp, Anq, Arow, Asrc, ChangeRoc=0, bcst, ctxt, ione=1, k=0, kb, kbprev=0, kbsize, mb1, mycol, myrow, n1, n1last, n1p, n1pprev=0, nb1, nlast, npcol, nprow, rocprev, size, tmp1, tmp2; AXPY_T axpy; COPY_T copy; VVSET_T set; GEMV_T gemv; TRSV_T trsv; GESD2D_T send; GERV2D_T recv; GEBS2D_T bsend; GEBR2D_T brecv; /* * .. Local Arrays .. */ char * Aprev = NULL, * Xd = NULL, * Xdprev = NULL, * Xprev = NULL, * work = NULL; /* .. * .. Executable Statements .. * */ if( N <= 0 ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); /* * Determine if sub( A ) spans more than one process row, and/or more than one * process column. */ Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ]; Aimb1 = PB_Cfirstnb( N, IA, DESCA[IMB_], Amb ); Anp = PB_Cnumroc( N, 0, Aimb1, Amb, myrow, Arow, nprow ); Ais1Row = !( PB_Cspan( N, 0, Aimb1, Amb, Arow, nprow ) ); Ainb1 = PB_Cfirstnb( N, JA, DESCA[INB_], Anb ); Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol ); Ais1Col = !( PB_Cspan( N, 0, Ainb1, Anb, Acol, npcol ) ); /* * When sub( A ) spans only one process, solve the system locally and return. */ if( Ais1Row && Ais1Col ) { if( Mupcase( TRANS[0] ) == CNOTRAN ) { if( Anq > 0 ) { if( Anp > 0 ) { TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald, TYPE->size ), &Ald, XC, &INCXC ); TYPE->Fcopy( &Anp, XC, &INCXC, XR, &INCXR ); } if( ( Arow >= 0 ) && FBCAST ) { btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == Arow ) TYPE->Cgebs2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR ); else TYPE->Cgebr2d( ctxt, COLUMN, &btop, 1, Anq, XR, INCXR, Arow, mycol ); } } } else { if( Anp > 0 ) { if( Anq > 0 ) { TYPE->Ftrsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &N, Mptr( A, Aii, Ajj, Ald, TYPE->size ), &Ald, XR, &INCXR ); TYPE->Fcopy( &Anq, XR, &INCXR, XC, &INCXC ); } if( Acol >= 0 && FBCAST ) { btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == Acol ) TYPE->Cgebs2d( ctxt, ROW, &btop, Anp, 1, XC, Anp ); else TYPE->Cgebr2d( ctxt, ROW, &btop, Anp, 1, XC, Anp, myrow, Acol ); } } } return; } /* * Retrieve from TYPE structure useful BLAS and BLACS functions. */ size = TYPE->size; negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero; axpy = TYPE->Faxpy; copy = TYPE->Fcopy; set = TYPE->Fset; gemv = TYPE->Fgemv; trsv = TYPE->Ftrsv; send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d; bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d; if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size ); if( Mupcase( TRANS[0] ) == CNOTRAN ) { if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) && ( myrow != Arow ) ) ) ) return; btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) ); AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) ); if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Initiate lookahead */ nlast = ( npcol - 1 ) * Anb; n1 = MAX( nlast, Anb ); nlast += Ainb1; n1last = n1 - Anb + MAX( Ainb1, Anb ); work = PB_Cmalloc( MIN( n1last, Anp ) * size ); tmp1 = N-1; Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow ); Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol ); rocprev = Alcol; Anpprev = Anp; Xprev = XC; Xdprev = XR; Aprev = A = Mptr( A, 0, Anq, Ald, size ); mb1 = PB_Clastnb( N, 0, Aimb1, Amb ); nb1 = PB_Clastnb( N, 0, Ainb1, Anb ); tmp1 = N - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); Asrc = Arow; n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc, nprow ); while( N > 0 ) { kbsize = kb * size; if( Ais1Col || ( mycol == Alcol ) ) { A -= Ald * kbsize; Anq -= kb; Xd = Mptr( XR, 0, Anq, INCXR, size ); } if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; } /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) { tmp1 = ( Anpprev - n1pprev ) * size; gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone, Aprev+tmp1, &Ald, Xdprev, &INCXR, one, Xprev+tmp1, &INCXC ); } /* * Send partial updated result to current column */ if( !( Ais1Col ) && ChangeRoc ) { if( mycol == rocprev ) { send( ctxt, n1pprev, 1, Xprev+(Anpprev-n1pprev)*size, n1pprev, myrow, Alcol ); } else if( mycol == Alcol ) { recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev ); axpy( &n1pprev, one, work, &ione, Mptr( Xprev, Anpprev-n1pprev, 0, INCXC, size ), &INCXC ); } } } /* * Solve current diagonal block */ if( Ais1Col || ( mycol == Alcol ) ) { if( AisRowRep || ( myrow == Alrow ) ) { trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &kb, Mptr( A, Anp, 0, Ald, size ), &Ald, Mptr( XC, Anp, 0, INCXC, size ), &INCXC ); copy( &kb, Mptr( XC, Anp, 0, INCXC, size ), &INCXC, Mptr( XR, 0, Anq, INCXR, size ), &INCXR ); } if( bcst ) { if( myrow == Alrow ) bsend( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR, size ), INCXR ); else brecv( ctxt, COLUMN, &btop, 1, kb, Mptr( XR, 0, Anq, INCXR, size ), INCXR, Alrow, mycol ); } } else { if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) ) set( &kb, zero, Mptr( XC, Anp, 0, INCXC, size ), &ione ); } /* * Finish previous update */ if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev, &Ald, Xdprev, &INCXR, one, Xprev, &INCXC ); /* * Save info of current step and update info for the next step */ if( Ais1Col || ( mycol == Alcol ) ) { Xdprev = Xd; Aprev = A; } if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; } n1pprev = n1p; rocprev = Alcol; kbprev = kb; k += kb; N -= kb; mb1 -= kb; if( mb1 == 0 ) { if( !( Ais1Row ) && ( Alrow >= 0 ) ) Alrow = MModSub1( Alrow, nprow ); mb1 = ( N > Aimb1 ? Amb : Aimb1 ); } nb1 -= kb; ChangeRoc = ( nb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Col ) && ( Alcol >= 0 ) ) Alcol = MModSub1( Alcol, npcol ); nb1 = ( N > Ainb1 ? Anb : Ainb1 ); } tmp1 = N - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc, nprow ); } } else { /* * Initiate lookahead */ n1 = ( MAX( npcol, 2 ) - 1 ) * Anb; work = PB_Cmalloc( MIN( n1, Anp ) * size ); Aprev = A; Xprev = XC; Xdprev = XR; Anpprev = Anp; mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol; tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb; Asrc = Arow; n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc, nprow ); while( kb > 0 ) { kbsize = kb * size; /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) gemv( C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone, Aprev, &Ald, Xdprev, &INCXR, one, Xprev, &INCXC ); /* * Send partial updated result to current column */ if( !( Ais1Col ) && ChangeRoc ) { if( mycol == rocprev ) { send( ctxt, n1pprev, 1, Xprev, n1pprev, myrow, Acol ); } else if( mycol == Acol ) { recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev ); axpy( &n1pprev, one, work, &ione, Xprev, &INCXC ); } } } /* * Solve current diagonal block */ if( Ais1Col || ( mycol == Acol ) ) { if( AisRowRep || ( myrow == Arow ) ) { trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &kb, A, &Ald, XC, &INCXC ); copy( &kb, XC, &INCXC, XR, &INCXR ); } if( bcst ) { if( myrow == Arow ) bsend( ctxt, COLUMN, &btop, 1, kb, XR, INCXR ); else brecv( ctxt, COLUMN, &btop, 1, kb, XR, INCXR, Arow, mycol ); } } else { if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) ) set( &kb, zero, XC, &INCXC ); } /* * Finish previous update */ if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) { tmp2 = n1pprev * size; gemv( C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev+tmp2, &Ald, Xdprev, &INCXR, one, Xprev+tmp2, &INCXC ); } Aprev += Ald * kbprev * size; } /* * Save info of current step and update info for the next step */ if( Ais1Col || ( mycol == Acol ) ) { A += Ald*kbsize; Xdprev = Xd = XR; XR += INCXR*kbsize; } if( AisRowRep || ( myrow == Arow ) ) { Xprev = ( XC += kbsize ); A += kbsize; Aprev += kbsize; Anpprev = ( Anp -= kb ); } n1pprev = n1p; rocprev = Acol; kbprev = kb; k += kb; N -= kb; mb1 -= kb; if( mb1 == 0 ) { if( !( Ais1Row ) && ( Arow >= 0 ) ) Arow = MModAdd1( Arow, nprow ); mb1 = MIN( Amb, N ); } nb1 -= kb; ChangeRoc = ( nb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Col ) && ( Acol >= 0 ) ) Acol = MModAdd1( Acol, npcol ); nb1 = MIN( Anb, N ); } tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb; n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Aimb1, Amb, myrow, Asrc, nprow ); } } } else { if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) && ( mycol != Acol ) ) ) ) return; btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) ); AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) ); if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Initiate lookahead */ n1 = ( MAX( nprow, 2 ) - 1 ) * Amb; work = PB_Cmalloc( MIN( n1, Anq ) * size ); Aprev = A; Xprev = XR; Xdprev = XC; Anpprev = Anq; mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow; tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb; Asrc = Acol; n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc, npcol ); while( kb > 0 ) { kbsize = kb * size; /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone, Aprev, &Ald, Xdprev, &INCXC, one, Xprev, &INCXR ); /* * Send partial updated result to current row */ if( !( Ais1Row ) && ChangeRoc ) { if( myrow == rocprev ) { send( ctxt, 1, n1pprev, Xprev, INCXR, Arow, mycol ); } else if( myrow == Arow ) { recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol ); axpy( &n1pprev, one, work, &ione, Xprev, &INCXR ); } } } /* * Solve current diagonal block */ if( Ais1Row || ( myrow == Arow ) ) { if( AisColRep || ( mycol == Acol ) ) { trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &kb, A, &Ald, XR, &INCXR ); copy( &kb, XR, &INCXR, XC, &INCXC ); } if( bcst ) { if( mycol == Acol ) bsend( ctxt, ROW, &btop, kb, 1, XC, kb ); else brecv( ctxt, ROW, &btop, kb, 1, XC, kb, myrow, Acol ); } } else { if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) ) set( &kb, zero, XR, &INCXR ); } /* * Finish previous update */ if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) { tmp2 = n1pprev * size; gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone, Aprev+Ald*tmp2, &Ald, Xdprev, &INCXC, one, Xprev+INCXR*tmp2, &INCXR ); } Aprev += kbprev * size; } /* * Save info of current step and update info for the next step */ if( Ais1Row || ( myrow == Arow ) ) { A += kbsize; Xdprev = Xd = XC; XC += kbsize; } if( AisColRep || ( mycol == Acol ) ) { Xprev = ( XR += INCXR * kbsize ); A += Ald * kbsize; Anpprev = ( Anq -= kb ); Aprev += Ald * kbsize; } n1pprev = n1p; rocprev = Arow; kbprev = kb; k += kb; N -= kb; nb1 -= kb; if( nb1 == 0 ) { if( !( Ais1Col ) && ( Acol >= 0 ) ) Acol = MModAdd1( Acol, npcol ); nb1 = MIN( Anb, N ); } mb1 -= kb; ChangeRoc = ( mb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Row ) && ( Arow >= 0 ) ) Arow = MModAdd1( Arow, nprow ); mb1 = MIN( Amb, N ); } tmp1 = N - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb; n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k+kb, Ainb1, Anb, mycol, Asrc, npcol ); } } else { /* * Initiate lookahead */ nlast = ( nprow - 1 ) * Amb; n1 = MAX( nlast, Amb ); nlast += Aimb1; n1last = n1 - Amb + MAX( Aimb1, Amb ); work = PB_Cmalloc( MIN( n1last, Anq ) * size ); tmp1 = N-1; Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow ); Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol ); rocprev = Alrow; Anpprev = Anq; Xprev = XR; Xdprev = XC; Aprev = A = Mptr( A, Anp, 0, Ald, size ); mb1 = PB_Clastnb( N, 0, Aimb1, Amb ); nb1 = PB_Clastnb( N, 0, Ainb1, Anb ); tmp1 = N - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); Asrc = Acol; n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc, npcol ); while( N > 0 ) { kbsize = kb * size; if( Ais1Row || ( myrow == Alrow ) ) { A -= kbsize; Anp -= kb; Xd = Mptr( XC, Anp, 0, INCXC, size ); } if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; } /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) { tmp1 = ( Anpprev - n1pprev ) * size; gemv( C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone, Aprev+Ald*tmp1, &Ald, Xdprev, &INCXC, one, Xprev+INCXR*tmp1, &INCXR ); } /* * Send partial updated result to current row */ if( !( Ais1Row ) && ChangeRoc ) { if( myrow == rocprev ) { send( ctxt, 1, n1pprev, Mptr( Xprev, 0, Anpprev-n1pprev, INCXR, size ), INCXR, Alrow, mycol ); } else if( myrow == Alrow ) { recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol ); axpy( &n1pprev, one, work, &ione, Mptr( Xprev, 0, Anpprev-n1pprev, INCXR, size ), &INCXR ); } } } /* * Solve current diagonal block */ if( Ais1Row || ( myrow == Alrow ) ) { if( AisColRep || ( mycol == Alcol ) ) { trsv( C2F_CHAR( UPLO ), C2F_CHAR( TRANS ), C2F_CHAR( DIAG ), &kb, Mptr( A, 0, Anq, Ald, size ), &Ald, Mptr( XR, 0, Anq, INCXR, size ), &INCXR ); copy( &kb, Mptr( XR, 0, Anq, INCXR, size ), &INCXR, Mptr( XC, 0, Anp, INCXC, size ), &INCXC ); } if( bcst ) { if( mycol == Alcol ) bsend( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC, size ), kb ); else brecv( ctxt, ROW, &btop, kb, 1, Mptr( XC, 0, Anp, INCXC, size ), kb, myrow, Alcol ); } } else { if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) ) set( &kb, zero, Mptr( XR, 0, Anq, INCXR, size ), &INCXR ); } /* * Finish previous update */ if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) gemv( C2F_CHAR( TRANS ), &kbprev, &tmp1, negone, Aprev, &Ald, Xdprev, &INCXC, one, Xprev, &INCXR ); /* * Save info of current step and update info for the next step */ if( Ais1Row || ( myrow == Alrow ) ) { Xdprev = Xd; Aprev = A; } if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; } n1pprev = n1p; rocprev = Alrow; kbprev = kb; k += kb; N -= kb; nb1 -= kb; if( nb1 == 0 ) { if( !( Ais1Col ) && ( Alcol >= 0 ) ) Alcol = MModSub1( Alcol, npcol ); nb1 = ( N > Ainb1 ? Anb : Ainb1 ); } mb1 -= kb; ChangeRoc = ( mb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Row ) && ( Alrow >= 0 ) ) Alrow = MModSub1( Alrow, nprow ); mb1 = ( N > Aimb1 ? Amb : Aimb1 ); } tmp1 = N - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc, npcol ); } } } if( work ) free( work ); /* * End of PB_Cptrsv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cconjg.c0000640000175000017500000000416010363532303017555 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cconjg( PBTYP_T * TYPE, char * ALPHA, char * CALPHA ) #else void PB_Cconjg( TYPE, ALPHA, CALPHA ) /* * .. Scalar Arguments .. */ char * ALPHA, * CALPHA; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Cconjg conjugates of the scalar alpha. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * ALPHA (local input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * CALPHA (local output) pointer to CHAR * On exit, CALPHA contains the conjugate of the scalar alpha. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ switch( TYPE->type ) { case SCPLX: ((float*)(CALPHA))[REAL_PART] = ((float*)(ALPHA))[REAL_PART]; ((float*)(CALPHA))[IMAG_PART] = -((float*)(ALPHA))[IMAG_PART]; break; case DCPLX: ((double*)(CALPHA))[REAL_PART] = ((double*)(ALPHA))[REAL_PART]; ((double*)(CALPHA))[IMAG_PART] = -((double*)(ALPHA))[IMAG_PART]; break; case SREAL: ((float*)(CALPHA))[REAL_PART] = ((float*)(ALPHA))[REAL_PART]; break; case DREAL: ((double*)(CALPHA))[REAL_PART] = ((double*)(ALPHA))[REAL_PART]; break; case INT: *((int*)(CALPHA)) = *((int*)(ALPHA)); break; default: ; } /* * End of PB_Cconjg */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpsymmBC.c0000640000175000017500000007701110363532303020034 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpsymmBC( PBTYP_T * TYPE, char * DIRECAB, char * CONJUG, char * SIDE, char * UPLO, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpsymmBC( TYPE, DIRECAB, CONJUG, SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECAB, * SIDE, * UPLO; int IA, IB, IC, JA, JB, JC, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_CpsymmBC performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric or Hermitian * submatrix and sub( B ) and sub( C ) are m by n submatrices. * * This is the inner-product algorithm using the logical LCM hybrid * and static blocking techniques. The submatrix operand sub( A ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECAB (global input) pointer to CHAR * On entry, DIRECAB specifies the direction in which the rows * or columns of sub( B ) should be looped over as follows: * DIRECAB = 'F' or 'f' forward or increasing, * DIRECAB = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( A ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( A ) is symmetric, * CONJUG = 'Z' or 'z' sub( A ) is Hermitian. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether the symmetric or Hermitian * submatrix sub( A ) appears on the left or right in the opera- * tion as follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the submatrix sub( A ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( A ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( A ) are referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric or Hermitian submatrix sub( A ), such that when * UPLO = 'U' or 'u', this array contains the local entries of * the upper triangular part of the submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric or Hermitian submatrix sub( A ), and the local * entries of the strictly upper triangular of sub( A ) are not * referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric or Hermitian submatrix sub( A ), such that when * UPLO = 'U' or 'u', this array contains the local entries of * the upper triangular part of the submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric or Hermitian submatrix sub( A ), and the local * entries of the strictly upper triangular of sub( A ) are not * referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char GemmTa, GemmTb, cctop, * one, rctop, * talphaCR, * talphaRC, * tbeta, * zero; int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq, Alq0, Amb, Amp, An, Anb, Anq, Arow, BCfwd, BCmyprocD, BCmyprocR, BCnD, BCnR, BCnprocsD, BCnprocsR, Bbufld, BcurrocR, Bfr, BiD, BiR, BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, Bkk, Bld, BnbD, BnbR, BnpD, BnpR, Boff, BrocD, BrocR, BsrcR, Cfr, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R, Ckk, CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR, CsrcR, Cbufld, CcurrocR, CisR, Cld, WBCfr, WBCld, WBRfr, WBRld, WCCfr, WCCld, WCCsum, WCRfr, WCRld, WCRsum, conjg, ctxt, l, lb, lcmb, lside, ltmp, maxp, maxpm1, maxq, mycol, myrow, n, nb, nbb, ncpq, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp, upper; TZSYM_T tzsymm; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ PB_VM_T VM; int Ad0 [DLEN_], DBUFB[DLEN_], DBUFC[DLEN_], WBCd[DLEN_], WBRd[DLEN_], WCCd [DLEN_], WCRd [DLEN_]; char * Aptr = NULL, * Bbuf = NULL, * Cbuf = NULL, * WBC = NULL, * WBR = NULL, * WCC = NULL, * WCR = NULL; /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); BCfwd = ( Mupcase( DIRECAB[0] ) == CFORWARD ); conjg = ( Mupcase( CONJUG [0] ) == CCONJG ); lside = ( Mupcase( SIDE [0] ) == CLEFT ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ), sub( B ) and sub( C ) */ if( lside ) { BCnD = An = M; BCnR = N; BCmyprocD = myrow; BCnprocsD = nprow; BCmyprocR = mycol; BCnprocsR = npcol; BiD = IB; BiR = JB; BinbD = DESCB[IMB_ ]; BinbR = DESCB[INB_]; BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ]; BsrcR = DESCB[CSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BCnprocsD, BCnprocsR, BCmyprocD, BCmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); CiD = IC; CiR = JC; CinbD = DESCC[IMB_ ]; CinbR = DESCC[INB_]; CnbD = DESCC[MB_ ]; CnbR = DESCC[NB_ ]; CsrcR = DESCC[CSRC_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, BCnprocsD, BCnprocsR, BCmyprocD, BCmyprocR, &CiiD, &CiiR, &CrocD, &CrocR ); } else { BCnD = An = N; BCnR = M; BCmyprocD = mycol; BCnprocsD = npcol; BCmyprocR = myrow; BCnprocsR = nprow; BiD = JB; BiR = IB; BinbR = DESCB[IMB_ ]; BinbD = DESCB[INB_]; BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ]; BsrcR = DESCB[RSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BCnprocsR, BCnprocsD, BCmyprocR, BCmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); CiD = JC; CiR = IC; CinbR = DESCC[IMB_ ]; CinbD = DESCC[INB_]; CnbR = DESCC[MB_ ]; CnbD = DESCC[NB_ ]; CsrcR = DESCC[RSRC_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, BCnprocsR, BCnprocsD, BCmyprocR, BCmyprocD, &CiiR, &CiiD, &CrocR, &CrocD ); } Binb1D = PB_Cfirstnb( BCnD, BiD, BinbD, BnbD ); BnpD = PB_Cnumroc( BCnD, 0, Binb1D, BnbD, BCmyprocD, BrocD, BCnprocsD ); Binb1R = PB_Cfirstnb( BCnR, BiR, BinbR, BnbR ); BisR = ( ( BsrcR < 0 ) || ( BCnprocsR == 1 ) ); Cinb1D = PB_Cfirstnb( BCnD, CiD, CinbD, CnbD ); CnpD = PB_Cnumroc( BCnD, 0, Cinb1D, CnbD, BCmyprocD, CrocD, BCnprocsD ); Cinb1R = PB_Cfirstnb( BCnR, CiR, CinbR, CnbR ); CisR = ( ( CsrcR < 0 ) || ( BCnprocsR == 1 ) ); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size ); /* * Retrieve the BLACS combine topologies, compute conjugate of alpha for the * Hermitian case and set the transpose parameters to be passed to the BLAS * matrix multiply routine. */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( conjg ) { tzsymm = PB_Ctzhemm; if( lside ) { talphaRC = ALPHA; GemmTa = CCOTRAN; GemmTb = CTRAN; talphaCR = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talphaCR ); } else { talphaCR = ALPHA; GemmTa = CTRAN; GemmTb = CCOTRAN; talphaRC = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talphaRC ); } } else { tzsymm = PB_Ctzsymm; talphaCR = talphaRC = ALPHA; GemmTa = CTRAN; GemmTb = CTRAN; } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ Alcmb = 2 * nb * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process q owning the last row or column of sub( B ). */ if( !( BisR ) && !( BCfwd ) ) { tmp = PB_Cindxg2p( BCnR - 1, Binb1R, BnbR, BrocR, BrocR, BCnprocsR ); q = MModSub( tmp, BrocR, BCnprocsR ); } /* * When sub( C ) is not replicated and backward pass on sub( C ), find the * virtual process p owning the last row or column of sub( C ). */ if( !( CisR ) && !( BCfwd ) ) { tmp = PB_Cindxg2p( BCnR - 1, Cinb1R, CnbR, CrocR, CrocR, BCnprocsR ); p = MModSub( tmp, CrocR, BCnprocsR ); } /* * Loop over the virtual process grid induced by the rows or columns of * sub( B ) and sub( C ). */ lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : BCnprocsR ) ) * CnbR, ( maxq = ( BisR ? 1 : BCnprocsR ) ) * BnbR ); n = BCnR; maxpm1 = maxp - 1; while( n > 0 ) { /* * Initialize local virtual matrix in process (p,q) */ BcurrocR = ( BisR ? -1 : MModAdd( BrocR, q, BCnprocsR ) ); Bkk = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BCnprocsR ); BnpR = PB_Cnumroc( BCnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BCnprocsR ); CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, BCnprocsR ) ); Ckk = PB_Cg2lrem( CiR, CinbR, CnbR, CcurrocR, CsrcR, BCnprocsR ); CnpR = PB_Cnumroc( BCnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, BCnprocsR ); PB_CVMinit( &VM, 0, CnpR, BnpR, Cinb1R, Binb1R, CnbR, BnbR, p, q, maxp, maxq, lcmb ); /* * Find how many diagonals in this virtual process */ npq = PB_CVMnpq( &VM ); n -= npq; /* * Re-adjust the number of rows or columns to be (un)packed, in order to * average the message sizes. */ if( npq ) nbb = npq / ( ( npq - 1 ) / nb + 1 ); while( npq ) { nbb = MIN( nbb, npq ); /* * Find out how many rows or columns of sub( B ) and sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Boff ); if( lside ) { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * columns of sub( B ). */ if( ( Bfr = ( ncpq < nbb ) ) != 0 ) { /* * If columns of sub( B ) are not contiguous, then allocate the buffer and * pack the kbb columns of sub( B ). */ Bbufld = MAX( 1, BnpD ); if( BisR || ( BCmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * nbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, COLUMN, PACKING, NOTRAN, nbb, BnpD, one, Mptr( B, BiiD, Bkk, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BCmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BiiD, Bkk+Boff, Bld, size ); } PB_Cdescset( DBUFB, BCnD, nbb, Binb1D, nbb, BnbD, nbb, BrocD, BcurrocR, ctxt, Bbufld ); /* * Replicate this panel of columns of sub( B ) as well as its transposed * over sub( A ) -> WBC, WBR */ PB_CInV( TYPE, NOCONJG, COLUMN, An, An, Ad0, nbb, Bbuf, 0, 0, DBUFB, COLUMN, &WBC, WBCd, &WBCfr ); PB_CInV( TYPE, NOCONJG, ROW, An, An, Ad0, nbb, WBC, 0, 0, WBCd, COLUMN, &WBR, WBRd, &WBRfr ); } else { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * rows of sub( B ). */ if( ( Bfr = ( ncpq < nbb ) ) != 0 ) { /* * If rows of sub( B ) are not contiguous, then allocate the buffer and pack * the kbb rows of sub( B ). */ Bbufld = nbb; if( BisR || ( BCmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * nbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, ROW, PACKING, NOTRAN, nbb, BnpD, one, Mptr( B, Bkk, BiiD, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BCmyprocR == BcurrocR ) ) Bbuf = Mptr( B, Bkk+Boff, BiiD, Bld, size ); } PB_Cdescset( DBUFB, nbb, BCnD, nbb, Binb1D, nbb, BnbD, BcurrocR, BrocD, ctxt, Bbufld ); /* * Replicate this panel of rows of sub( B ) as well as its transposed * over sub( A ) -> WBR, WBC */ PB_CInV( TYPE, NOCONJG, ROW, An, An, Ad0, nbb, Bbuf, 0, 0, DBUFB, ROW, &WBR, WBRd, &WBRfr ); PB_CInV( TYPE, NOCONJG, COLUMN, An, An, Ad0, nbb, WBR, 0, 0, WBRd, ROW, &WBC, WBCd, &WBCfr ); } /* * Allocate space for temporary results in scope of sub( A ) -> WCC, WCR */ PB_COutV( TYPE, COLUMN, INIT, An, An, Ad0, nbb, &WCC, WCCd, &WCCfr, &WCCsum ); PB_COutV( TYPE, ROW, INIT, An, An, Ad0, nbb, &WCR, WCRd, &WCRfr, &WCRsum ); /* * Local matrix-matrix multiply iff I own some data */ WBCld = WBCd[LLD_]; WBRld = WBRd[LLD_]; WCCld = WCCd[LLD_]; WCRld = WCRd[LLD_]; if( ( Amp > 0 ) && ( Anq > 0 ) ) { if( upper ) { /* * sub( A ) is upper triangular */ for( l = 0; l < An; l += Alcmb ) { lb = An - l; lb = MIN( lb, Alcmb ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); if( Alp > 0 && Alq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &GemmTb ), &Alp, &nbb, &Alq0, talphaRC, Mptr( Aptr, 0, Alq, Ald, size ), &Ald, Mptr( WBR, 0, Alq, WBRld, size ), &WBRld, one, WCC, &WCCld ); gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( NOTRAN ), &nbb, &Alq0, &Alp, talphaCR, WBC, &WBCld, Mptr( Aptr, 0, Alq, Ald, size ), &Ald, one, Mptr( WCR, 0, Alq, WCRld, size ), &WCRld ); } PB_Cpsym( TYPE, TYPE, SIDE, UPPER, lb, nbb, ALPHA, Aptr, l, l, Ad0, Mptr( WBC, Alp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Alq, WBRld, size ), WBRld, Mptr( WCC, Alp, 0, WCCld, size ), WCCld, Mptr( WCR, 0, Alq, WCRld, size ), WCRld, tzsymm ); } } else { /* * sub( A ) is lower triangular */ for( l = 0; l < An; l += Alcmb ) { lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( TYPE, TYPE, SIDE, LOWER, lb, nbb, ALPHA, Aptr, l, l, Ad0, Mptr( WBC, Alp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Alq, WBRld, size ), WBRld, Mptr( WCC, Alp, 0, WCCld, size ), WCCld, Mptr( WCR, 0, Alq, WCRld, size ), WCRld, tzsymm ); Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Alp0 = Amp - Alp; Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); if( Alp0 > 0 && Alq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &GemmTb ), &Alp0, &nbb, &Alq0, talphaRC, Mptr( Aptr, Alp, Alq, Ald, size ), &Ald, Mptr( WBR, 0, Alq, WBRld, size ), &WBRld, one, Mptr( WCC, Alp, 0, WCCld, size ), &WCCld ); gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( NOTRAN ), &nbb, &Alq0, &Alp0, talphaCR, Mptr( WBC, Alp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Alp, Alq, Ald, size ), &Ald, one, Mptr( WCR, 0, Alq, WCRld, size ), &WCRld ); } } } } if( WBCfr ) free( WBC ); if( WBRfr ) free( WBR ); if( Bfr && ( BisR || ( BCmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); if( lside ) { /* * Accumulate the intermediate results in WCC and WCR */ if( WCCsum ) { WCCd[CSRC_] = CcurrocR; if( Amp > 0 ) gsum2d( ctxt, ROW, &rctop, Amp, nbb, WCC, WCCld, myrow, WCCd[CSRC_] ); } if( WCRsum ) { WCRd[RSRC_] = 0; if( Anq > 0 ) gsum2d( ctxt, COLUMN, &cctop, nbb, Anq, WCR, WCRld, WCRd[RSRC_], mycol ); } /* * WCC := WCC + WCR' */ PB_Cpaxpby( TYPE, CONJUG, nbb, An, one, WCR, 0, 0, WCRd, ROW, one, WCC, 0, 0, WCCd, COLUMN ); if( WCRfr ) free( WCR ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * columns of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < nbb ) ) != 0 ) { /* * If columns of sub( C ) are not contiguous, then allocate the buffer */ Cbufld = MAX( 1, CnpD ); tbeta = zero; if( CisR || ( BCmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * nbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( BCmyprocR == CcurrocR ) ) Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size ); } PB_Cdescset( DBUFC, BCnD, nbb, Cinb1D, nbb, CnbD, nbb, CrocD, CcurrocR, ctxt, Cbufld ); /* * sub( C ) := beta * sub( C ) + WCC */ PB_Cpaxpby( TYPE, NOCONJG, An, nbb, one, WCC, 0, 0, WCCd, COLUMN, tbeta, Cbuf, 0, 0, DBUFC, COLUMN ); if( WCCfr ) free( WCC ); /* * Unpack the kbb columns of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( BCmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, COLUMN, UNPACKING, NOTRAN, nbb, CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } } else { /* * Accumulate the intermediate results in WCC and WCR */ if( WCCsum ) { WCCd[CSRC_] = 0; if( Amp > 0 ) gsum2d( ctxt, ROW, &rctop, Amp, nbb, WCC, WCCld, myrow, WCCd[CSRC_] ); } if( WCRsum ) { WCRd[RSRC_] = CcurrocR; if( Anq > 0 ) gsum2d( ctxt, COLUMN, &cctop, nbb, Anq, WCR, WCRld, WCRd[RSRC_], mycol ); } /* * WCR := WCR + WCC' */ PB_Cpaxpby( TYPE, CONJUG, An, nbb, one, WCC, 0, 0, WCCd, COLUMN, one, WCR, 0, 0, WCRd, ROW ); if( WCCfr ) free( WCC ); /* * Compute the descriptor DBUFC for the buffer that will contained the packed * rows of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < nbb ) ) != 0 ) { /* * If rows of sub( C ) are not contiguous, then allocate receiving buffer. */ Cbufld = nbb; tbeta = zero; if( CisR || ( BCmyprocR == CcurrocR ) ) Cbuf = PB_Cmalloc( CnpD * nbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( BCmyprocR == CcurrocR ) ) Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size ); } PB_Cdescset( DBUFC, nbb, BCnD, nbb, Cinb1D, nbb, CnbD, CcurrocR, CrocD, ctxt, Cbufld ); /* * sub( C ) := beta * sub( C ) + WCR */ PB_Cpaxpby( TYPE, NOCONJG, nbb, An, one, WCR, 0, 0, WCRd, ROW, tbeta, Cbuf, 0, 0, DBUFC, ROW ); if( WCRfr ) free( WCR ); /* * Unpack the kbb rows of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( BCmyprocR == CcurrocR ) ) ) { PB_CVMpack( TYPE, &VM, ROW, ROW, UNPACKING, NOTRAN, nbb, CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } } /* * Update the local indexes of sub( B ) and sub( C ) */ PB_CVMupdate( &VM, nbb, &Ckk, &Bkk ); npq -= nbb; } /* * Go to next or previous virtual process row or column */ if( ( BCfwd && ( p == maxpm1 ) ) || ( !( BCfwd ) && ( p == 0 ) ) ) q = ( BCfwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( BCfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); } if( conjg ) free( ( lside ? talphaCR : talphaRC ) ); /* * End of PB_CpsymmBC */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrmmAB.c0000640000175000017500000012242710363532303020026 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrmmAB( PBTYP_T * TYPE, char * VARIANT, char * SIDE, char * UPLO, char * TRANSA, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB ) #else void PB_CptrmmAB( TYPE, VARIANT, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ char * DIAG, * SIDE, * TRANSA, * UPLO, * VARIANT; int IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CptrmmAB performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * This is the outer-product algorithm using the logical aggregation * blocking technique. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * VARIANT (global input) pointer to CHAR * On entry, VARIANT specifies whether the left- or right-loo- * king variant of the algorithm should be used for the transpo- * se cases only, that is TRANSA is not 'N' or 'n'. When VARIANT * is 'L' or 'l', the left-looking variant is used, otherwise * the right-looking algorithm is selected. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char conjg, * one, top, * zero; int Afr, Bcol, Bcurcol, Bcurimb1, Bcurinb1, Bcurrow, Bfr, Bii, Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq, Bnq0, Brow, WAfr, WBfr, WBsum, ctxt, k, kb, kbb, ktmp, lside, mycol, myrow, notran, npcol, nprow, size, unit, upper; /* * .. Local Arrays .. */ int Bd0[DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WAd[DLEN_], WBd[DLEN_]; char * Aptr = NULL, * Bptr = NULL, * Bptr0 = NULL, * WA = NULL, * WB = NULL; GEMM_T gemm; GSUM2D_T gsum2d; /* .. * .. Executable Statements .. * */ lside = ( Mupcase( SIDE [0] ) == CLEFT ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( Mupcase( TRANSA[0] ) == CNOTRAN ); unit = ( Mupcase( DIAG [0] ) == CUNIT ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gsum2d = TYPE->Cgsum2d; gemm = TYPE->Fgemm; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol, Bld ... */ Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); Bimb1 = PB_Cfirstnb( M, IB, Bimb, Bmb ); Bmp0 = PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( N, JB, Binb, Bnb ); Bnq0 = PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); if( notran ) { if( lside ) { if( upper ) { for( k = 0; k < M; k += kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) over * B( IB:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( k:k+kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, k, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, k+1, 0, WAd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over * B( IB:IB+k+kbb-1, JB:JB+N-1) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Zero B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, kbb, N, zero, zero, B, IB+k, JB, DESCB ); /* * B( IB:IB+k+kbb-1, JB:JB+N-1 ) := ALPHA * WA * WB */ Bmp = PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, ALPHA, WA, &WAd[LLD_], WB, &WBd[LLD_], one, Bptr0, &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb ) { ktmp = M - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over * B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, 1, WAd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over * B( IB+k:IB+M-1, JB:JB+N-1 ) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Zero B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, kbb, N, zero, zero, B, IB+k, JB, DESCB ); /* * B( IB+k:IB+M-1, JB:JB+N-1 ) := ALPHA * WA * WB */ Bmp = PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, ALPHA, WA, &WAd[LLD_], WB, &WBd[LLD_], one, Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } else { if( upper ) { for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over * B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 1, 0, WAd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over * B( IB:IB+M-1, JB+k:JB+N-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Zero B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, M, kbb, zero, zero, B, IB, JB+k, DESCB ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := ALPHA * WB * WA */ Bnq = PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, ALPHA, WB, &WBd[LLD_], WA, &WAd[LLD_], one, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { for( k = 0; k < N; k += kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, k:k+kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, k, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, k+1, WAd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Zero B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, M, kbb, zero, zero, B, IB, JB+k, DESCB ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := ALPHA * WB * WA */ Bnq = PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, ALPHA, WB, &WBd[LLD_], WA, &WAd[LLD_], one, Bptr0, &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } } else { if( Mupcase( VARIANT[0] ) == CRIGHT ) { /* * Right looking variant for the transpose cases */ conjg = ( ( Mupcase( TRANSA[0] ) == CCOTRAN ) ? CCONJG : CNOCONJG ); if( lside ) { if( !upper ) { for( k = 0; k < M; k += kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 )' over * B( IB:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( k:k+kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, k, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, k+1, 0, WAd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over * B( IB:IB+k+kbb-1, JB:JB+N-1) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Zero B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, kbb, N, zero, zero, B, IB+k, JB, DESCB ); /* * B( IB:IB+k+kbb-1, JB:JB+N-1 ) := ALPHA * WA * WB */ Bmp = PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, ALPHA, WA, &WAd[LLD_], WB, &WBd[LLD_], one, Bptr0, &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb ) { ktmp = M - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 )' over * B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, 1, WAd ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, kbb, N, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) over * B( IB+k:IB+M-1, JB:JB+N-1 ) */ PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Zero B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, kbb, N, zero, zero, B, IB+k, JB, DESCB ); /* * B( IB+k:IB+M-1, JB:JB+N-1 ) := ALPHA * WA * WB */ Bmp = PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, ALPHA, WA, &WAd[LLD_], WB, &WBd[LLD_], one, Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } else { if( !upper ) { for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 )' over * B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 1, 0, WAd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over * B( IB:IB+M-1, JB+k:JB+N-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Zero B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, M, kbb, zero, zero, B, IB, JB+k, DESCB ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := ALPHA * WB * WA */ Bnq = PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, ALPHA, WB, &WBd[LLD_], WA, &WAd[LLD_], one, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { for( k = 0; k < N; k += kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 )' over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, k:k+kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, k, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, k+1, WAd ); /* * Accumulate B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, M, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate WB := B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) */ PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Zero B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) */ PB_Cplapad( TYPE, ALL, NOCONJG, M, kbb, zero, zero, B, IB, JB+k, DESCB ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := ALPHA * WB * WA */ Bnq = PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, ALPHA, WB, &WBd[LLD_], WA, &WAd[LLD_], one, Bptr0, &Bld ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } } else { /* * Left looking variant for the transpose cases */ if( lside ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( upper ) { for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) over * B( IB:IB+k+kbb-1, JB:JB+N-1 ) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( k:k+kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, k, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, k+1, 0, WAd ); /* * WB := A( IA:IA+k+kbb-1, JA+k:JA+k+kbb-1 )' * B( IB:IB+k+kbb-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bmp = PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, ALPHA, WA, &WAd[LLD_], Bptr0, &Bld, zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[RSRC_] = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WB, WBd[LLD_], WBd[RSRC_], mycol ); } if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) := WB */ PB_CScatterV( TYPE, FORWARD, kbb, N, WB, 0, 0, WBd, ROW, zero, B, IB+k, JB, DESCB, ROW ); if( WBfr ) free( WB ); } } else { for( k = 0; k < M; k += kb ) { ktmp = M - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over * B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, 1, WAd ); /* * WB := A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 )' * B( IB+k:IB+M-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bmp = PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, ALPHA, WA, &WAd[LLD_], Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld, zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[RSRC_] = PB_Cindxg2p( k + kbb - 1, Bimb1, Bmb, Brow, Brow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WB, WBd[LLD_], WBd[RSRC_], mycol ); } if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) := WB */ PB_CScatterV( TYPE, BACKWARD, kbb, N, WB, 0, 0, WBd, ROW, zero, B, IB+k, JB, DESCB, ROW ); if( WBfr ) free( WB ); } } } else { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( upper ) { for( k = 0; k < N; k += kb ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, ALLOCATE, FORWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over * B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero lower triangle of WA( 0:kbb-1, 0:kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb, kbb, zero, one, WA, 0, 0, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 1, 0, WAd ); /* * WB := B( IB:IB+M-1, JB+k:JB+N-1 ) * A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 )' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bnq = PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &kbb, &Bnq, ALPHA, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld, WA, &WAd[LLD_], zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[CSRC_] = PB_Cindxg2p( k + kbb - 1, Binb1, Bnb, Bcol, Bcol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WB, WBd[LLD_], myrow, WBd[CSRC_] ); } if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := WB */ PB_CScatterV( TYPE, BACKWARD, M, kbb, WB, 0, 0, WBd, COLUMN, zero, B, IB, JB+k, DESCB, COLUMN ); if( WBfr ) free( WB ); } } else { for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, ALLOCATE, BACKWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate WA := A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 ) over * B( IB:IB+M-1, JB:JB+k+kbb-1 ) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Zero upper triangle of WA( 0:kbb-1, k:k+kbb-1 ) */ if( unit ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb, kbb, zero, one, WA, 0, k, WAd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WA, 0, k+1, WAd ); /* * WB := B( IB:IB+M-1, JB:JB+k+kbb-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+k+kbb-1 )' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bnq = PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &kbb, &Bnq, ALPHA, Bptr0, &Bld, WA, &WAd[LLD_], zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[CSRC_] = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WB, WBd[LLD_], myrow, WBd[CSRC_] ); } if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) := WB */ PB_CScatterV( TYPE, FORWARD, M, kbb, WB, 0, 0, WBd, COLUMN, zero, B, IB, JB+k, DESCB, COLUMN ); if( WBfr ) free( WB ); } } } } } /* * End of PB_CptrmmAB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzherk.c0000640000175000017500000001744210363532303017773 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzherk( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * AC, int LDAC, char * AR, int LDAR, char * C, int LDC ) #else void PB_Ctzherk( TYPE, UPLO, M, N, K, IOFFD, ALPHA, AC, LDAC, AR, LDAR, C, LDC ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDAC, LDAR, LDC, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * AC, * AR, * C; #endif { /* * Purpose * ======= * * PB_Ctzherk performs the trapezoidal symmetric or Hermitian rank k * operation: * * C := alpha * AC * AR + C, * * where alpha is a scalar, AC is an m by k matrix, AR is an k by n ma- * trix and C is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix C is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of C is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of C is referenced, * * otherwise all of the matrix C is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix C. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix C. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of columns of the matrix AC, * and the number of rows of the matrix AR. K must be at least * zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of C as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal C( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal C( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal C( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * AC (input) pointer to CHAR * On entry, AC is an array of dimension (LDAC,K) containing the * m by k matrix AC. * * LDAC (input) INTEGER * On entry, LDAC specifies the leading dimension of the array * AC. LDAC must be at least max( 1, M ). * * AR (input) pointer to CHAR * On entry, AR is an array of dimension (LDAR,N) containing the * k by n matrix AR. * * LDAR (input) INTEGER * On entry, LDAR specifies the leading dimension of the array * AR. LDAR must be at least K. * * C (input/output) pointer to CHAR * On entry, C is an array of dimension (LDC,N) containing the m * by n matrix A. Only the trapezoidal part of C determined by * UPLO and IOFFD is updated. * * LDC (input) INTEGER * On entry, LDC specifies the leading dimension of the array C. * LDC must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, j1, m1, mn, n1, size; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, AR, &LDAR, one, C, &LDC ); if( ( n1 = MIN( M-IOFFD, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fherk( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, one, Mptr( C, i1, j1, LDC, size ), &LDC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, AC, &LDAC, AR, &LDAR, one, C, &LDC ); TYPE->Fherk( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, m1, 0, LDAC, size ), &LDAC, one, Mptr( C, m1, j1, LDC, size ), &LDC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); } } else { TYPE->Fgemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, ALPHA, AC, &LDAC, AR, &LDAR, TYPE->one, C, &LDC ); } /* * End of PB_Ctzherk */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cindxg2p.c0000640000175000017500000000540610363532303020034 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cindxg2p( int IG, int INB, int NB, int PROC, int SRCPROC, int NPROCS ) #else int PB_Cindxg2p( IG, INB, NB, PROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int IG, INB, NB, NPROCS, PROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cindxg2p computes the process coordinate which posseses the entry * of a matrix specified by a global index IG. * * Arguments * ========= * * IG (global input) INTEGER * On entry, IG specifies the global index of the matrix entry. * IG must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local dummy) INTEGER * On entry, PROC is a dummy argument in this case in order to * unify the calling sequence of the tool-routines. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * IG belongs to the first block, or the data is not distributed, or there is * just one process in this dimension of the grid. */ return( SRCPROC ); /* * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and * take the NPROCS modulo (definition of the block-cyclic data distribution). */ PROC = SRCPROC + 1 + ( IG - INB ) / NB; return( MPosMod( PROC, NPROCS ) ); /* * End of PB_Cindxg2p */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cplacnjg.c0000640000175000017500000001673710363532303020110 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cplacnjg( PBTYP_T * TYPE, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA ) #else void PB_Cplacnjg( TYPE, M, N, ALPHA, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int IA, JA, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A; #endif { /* * Purpose * ======= * * PB_Cplacnjg conjugates and scales by alpha and an m by n submatrix * sub( A ) denoting A(IA:IA+M-1,JA:JA+N-1). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha, i.e., the cons- * tant with which the matrix elements are to be scaled. * * A (local input/local output) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A to be scaled. On exit, the * local entries of this array corresponding to the to the en- * tries of the submatrix sub( A ) are overwritten by the local * entries of the m by n conjugated and scaled submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amb, Amp, Anb, Anq, izero=0, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad0[DLEN_]; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Quick return if I don't own any of sub( A ). */ Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; /* * Conjugate and scale local data */ TYPE->Ftzcnjg( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( A, Aii, Ajj, Ald, TYPE->size ), &Ald ); /* * End of PB_Cplacnjg */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CargFtoC.c0000640000175000017500000002530010363532303020001 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CargFtoC( int IF, int JF, int * DESCIN, int * IC, int * JC, int * DESCOUT ) #else void PB_CargFtoC( IF, JF, DESCIN, IC, JC, DESCOUT ) /* * .. Scalar Arguments .. */ int IF, JF, * IC, * JC; /* * .. Array Arguments .. */ int * DESCIN, * DESCOUT; #endif { /* * Purpose * ======= * * PB_CargFtoC converts a descriptor DESCIN of type BLOCK_CYCLIC_2D * or BLOCK_CYCLIC_INB_2D into a descriptor DESCOUT of type * BLOCK_CYCLIC_INB_2D. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ----------------------------------- * DTYPE_A (global) DESCA( DTYPE1_ ) The descriptor type. * CTXT_A (global) DESCA( CTXT1_ ) The BLACS context handle indicating * the NPROW x NPCOL BLACS process * grid A is distributed over. The * context itself is global, but the * handle (the integer value) may * vary. * M_A (global) DESCA( M1_ ) The number of rows in the distri- * buted matrix A, M_A >= 0. * N_A (global) DESCA( N1_ ) The number of columns in the dis- * tributed matrix A, N_A >= 0. * MB_A (global) DESCA( MB1_ ) The blocking factor used to distri- * bute the rows of A, MB_A > 0. * NB_A (global) DESCA( NB1_ ) The blocking factor used to distri- * bute the columns of A, NB_A > 0. * RSRC_A (global) DESCA( RSRC1_ ) The process row over which the * first row of the matrix A is dis- * tributed, NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA( CSRC1_ ) The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA( LLD1_ ) The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, MB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, NB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * IF (global input) INTEGER * On entry, IF specifies the global row Fortran index of the * distributed subarray described by DESCIN. IF must be at least * one. * * JF (global input) INTEGER * On entry, JF specifies the global column Fortran index of * the distributed subarray described by DESCIN. JF must be at * least one. * * DESCIN (global and local input) INTEGER array * On entry, DESCIN is an array of dimension DLEN1_ or DLEN_ as * specified by its first entry DESCIN( DTYPE_ ). DESCIN is the * source array descriptor of type BLOCK_CYCLIC_2D or of type * BLOCK_CYCLIC_2D_INB. * * IC (global output) INTEGER * On exit, IC specifies the global row C index of the distribu- * ted subarray described by DESCOUT. IC = IF - 1, i.e IC is at * least zero. * * JC (global output) INTEGER * On entry, JC specifies the global column Fortran index of * the distributed subarray described by DESCOUT. JC = JF - 1, * i.e JC is at least zero. * * DESCOUT (global and local output) INTEGER array * On entry, DESCOUT is an array of dimension DLEN_. DESCOUT is * the target array descriptor of type BLOCK_CYCLIC_2D_INB. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ *IC = IF - 1; *JC = JF - 1; if ( DESCIN[DTYPE_] == BLOCK_CYCLIC_2D ) { DESCOUT[DTYPE_] = BLOCK_CYCLIC_2D_INB; DESCOUT[M_ ] = DESCIN[M1_ ]; DESCOUT[N_ ] = DESCIN[N1_ ]; DESCOUT[IMB_ ] = DESCIN[MB1_ ]; DESCOUT[INB_ ] = DESCIN[NB1_ ]; DESCOUT[MB_ ] = DESCIN[MB1_ ]; DESCOUT[NB_ ] = DESCIN[NB1_ ]; DESCOUT[RSRC_ ] = DESCIN[RSRC1_ ]; DESCOUT[CSRC_ ] = DESCIN[CSRC1_ ]; DESCOUT[CTXT_ ] = DESCIN[CTXT1_ ]; DESCOUT[LLD_ ] = DESCIN[LLD1_ ]; } else if ( DESCIN[DTYPE_] == BLOCK_CYCLIC_2D_INB ) { DESCOUT[DTYPE_] = BLOCK_CYCLIC_2D_INB; DESCOUT[M_ ] = DESCIN[M_ ]; DESCOUT[N_ ] = DESCIN[N_ ]; DESCOUT[IMB_ ] = DESCIN[IMB_ ]; DESCOUT[INB_ ] = DESCIN[INB_ ]; DESCOUT[MB_ ] = DESCIN[MB_ ]; DESCOUT[NB_ ] = DESCIN[NB_ ]; DESCOUT[RSRC_ ] = DESCIN[RSRC_ ]; DESCOUT[CSRC_ ] = DESCIN[CSRC_ ]; DESCOUT[CTXT_ ] = DESCIN[CTXT_ ]; DESCOUT[LLD_ ] = DESCIN[LLD_ ]; } else { DESCOUT[DTYPE_] = DESCIN[0]; DESCOUT[CTXT_ ] = DESCIN[1]; DESCOUT[M_ ] = 0; DESCOUT[N_ ] = 0; DESCOUT[IMB_ ] = 1; DESCOUT[INB_ ] = 1; DESCOUT[MB_ ] = 1; DESCOUT[NB_ ] = 1; DESCOUT[RSRC_ ] = 0; DESCOUT[CSRC_ ] = 0; DESCOUT[LLD_ ] = 1; } /* * End of PB_CargFtoC */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrmmB.c0000640000175000017500000005752610363532303017734 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrmmB( PBTYP_T * TYPE, char * DIRECB, char * SIDE, char * UPLO, char * TRANSA, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB ) #else void PB_CptrmmB( TYPE, DIRECB, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ char * DIAG, * DIRECB, * SIDE, * TRANSA, * UPLO; int IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CptrmmB performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * This is the inner-product algorithm using the logical LCM hybrid * and static blocking techniques. The submatrix operand sub( A ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECB (global input) pointer to CHAR * On entry, DIRECB specifies the direction in which the rows * or columns of sub( B ) should be looped over as follows: * DIRECB = 'F' or 'f' forward or increasing, * DIRECB = 'B' or 'b' backward or decreasing. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Broc, GemmTa, GemmTb, TranOp, WBroc, WCroc, conjg, * one, * talpha, * tbeta, top, * zero; int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq, Alq0, Amb, Amp, An, Anb, Anq, Arow, BcurrocR, Bfwd, BiiD, BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD, BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR, BsrcR, LNorRT, WBfr, WBld, WCfr, WCld, WCpbY, WCsum, ctxt, l, lb, lside, ltmp, mycol, myrow, n, nb, nbb, notran, npcol, nprow, p=0, size, tmp, upper; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Ad0[DLEN_], DBUFB[DLEN_], WCd[DLEN_], WBd[DLEN_]; char * Aptr = NULL, * Bptr = NULL, * WB = NULL, * WC = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD ); lside = ( Mupcase( SIDE [0] ) == CLEFT ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( ( TranOp = Mupcase( TRANSA[0] ) ) == CNOTRAN ); LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ) and sub( B ) */ if( lside ) { BnD = An = M; BnR = N; Broc = CCOLUMN; BmyprocD = myrow; BnprocsD = nprow; BmyprocR = mycol; BnprocsR = npcol; BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ]; BsrcR = DESCB[CSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD ); Binb1R = PB_Cfirstnb( BnR, JB, DESCB[INB_], BnbR ); } else { BnD = An = N; BnR = M; Broc = CROW; BmyprocD = mycol; BnprocsD = npcol; BmyprocR = myrow; BnprocsR = nprow; BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ]; BsrcR = DESCB[RSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD ); Binb1R = PB_Cfirstnb( BnR, IB, DESCB[IMB_], BnbR ); } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size ); /* * Compute conjugate of alpha for the conjugate transpose cases */ if( TranOp == CCOTRAN ) { conjg = CCONJG; talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); } else { conjg = CNOCONJG; talpha = ALPHA; } /* * Retrieve BLACS combine topology, set the transpose parameters to be passed * to the BLAS matrix multiply routine and finally describe the form of the * input and output operands. */ if( LNorRT ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); GemmTa = CNOTRAN; GemmTb = ( lside ? CTRAN : TranOp ); WCroc = CCOLUMN; WBroc = CROW; } else { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); GemmTb = CNOTRAN; GemmTa = ( lside ? TranOp : CTRAN ); WCroc = CROW; WBroc = CCOLUMN; } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ Alcmb = 2 * nb * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process p owning the last row or column of sub( B ). */ if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd ) { tmp = PB_Cindxg2p( BnR-1, Binb1R, BnbR, BrocR, BrocR, BnprocsR ); p = MModSub( tmp, BrocR, BnprocsR ); } /* * Loop over the processes rows or columns owning the BnR rows or columns of * sub( B ) to be processed. */ n = BnR; while( n > 0 ) { /* * Find out who is the active process row or column as well as the number of * rows or columns of sub( B ) it owns. */ BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) ); BnpR = PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR ); n -= BnpR; /* * Re-adjust the number of rows or columns to be handled at each step, in order * to average the message sizes and the computational granularity. */ if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 ); while( BnpR ) { nbb = MIN( nbb, BnpR ); /* * Describe the local contiguous panel of sub( B ) */ if( lside ) { PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD, BcurrocR, ctxt, Bld ); if( BisR || ( BmyprocR == BcurrocR ) ) Bptr = Mptr( B, BiiD, BiiR, Bld, size ); } else { PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR, BrocD, ctxt, Bld ); if( BisR || ( BmyprocR == BcurrocR ) ) Bptr = Mptr( B, BiiR, BiiD, Bld, size ); } /* * Replicate this panel in the process rows or columns spanned by sub( A ): WB */ PB_CInV( TYPE, NOCONJG, &WBroc, An, An, Ad0, nbb, Bptr, 0, 0, DBUFB, &Broc, &WB, WBd, &WBfr ); /* * Reuse sub( B ) and/or create vector WC in process columns or rows spanned by * sub( A ) */ PB_CInOutV( TYPE, &WCroc, An, An, Ad0, nbb, one, Bptr, 0, 0, DBUFB, &Broc, &tbeta, &WC, WCd, &WCfr, &WCsum, &WCpbY ); /* * When the input data is first transposed, zero it now for later accumulation */ if( notran ) PB_Cplapad( TYPE, ALL, NOCONJG, DBUFB[M_], DBUFB[N_], zero, zero, Bptr, 0, 0, DBUFB ); /* * Local matrix-matrix multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); WCld = WCd[LLD_]; if( ( Amp > 0 ) && ( Anq > 0 ) ) { WBld = WBd[LLD_]; if( upper ) { /* * sub( A ) is upper triangular */ if( LNorRT ) { for( l = 0; l < An; l += Alcmb ) { lb = An - l; lb = MIN( lb, Alcmb ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); if( Alp > 0 ) { Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp, &nbb, &Alq0, talpha, Mptr( Aptr, 0, Alq, Ald, size ), &Ald, Mptr( WB, 0, Alq, WBld, size ), &WBld, one, WC, &WCld ); } PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb, talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld, size ), WBld, Mptr( WC, Alp, 0, WCld, size ), WCld, PB_Ctztrmm ); } } else { for( l = 0; l < An; l += Alcmb ) { lb = An - l; lb = MIN( lb, Alcmb ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); if( Alq0 > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb, &Alq0, &Alp, talpha, WB, &WBld, Mptr( Aptr, 0, Alq, Ald, size ), &Ald, one, Mptr( WC, 0, Alq, WCld, size ), &WCld ); PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb, talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld, size ), WBld, Mptr( WC, 0, Alq, WCld, size ), WCld, PB_Ctztrmm ); } } } else { /* * sub( A ) is lower triangular */ if( LNorRT ) { for( l = 0; l < An; l += Alcmb ) { lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb, talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld, size ), WBld, Mptr( WC, Alp, 0, WCld, size ), WCld, PB_Ctztrmm ); Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Alp0 = Amp - Alp; Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); if( Alp0 > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp0, &nbb, &Alq0, talpha, Mptr( Aptr, Alp, Alq, Ald, size ), &Ald, Mptr( WB, 0, Alq, WBld, size ), &WBld, one, Mptr( WC, Alp, 0, WCld, size ), &WCld ); } } else { for( l = 0; l < An; l += Alcmb ) { lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) ); Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow ); Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb, talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld, size ), WBld, Mptr( WC, 0, Alq, WCld, size ), WCld, PB_Ctztrmm ); Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Alp0 = Amp - Alp; Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol ); if( Alq0 > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb, &Alq0, &Alp0, talpha, Mptr( WB, Alp, 0, WBld, size ), &WBld, Mptr( Aptr, Alp, Alq, Ald, size ), &Ald, one, Mptr( WC, 0, Alq, WCld, size ), &WCld ); } } } } if( WBfr ) free( WB ); if( LNorRT ) { /* * Combine the partial column results into WC */ if( WCsum && ( Amp > 0 ) ) gsum2d( ctxt, ROW, &top, Amp, nbb, WC, WCld, myrow, WCd[CSRC_] ); /* * sub( B ) := WC (if necessary) */ if( WCpbY ) PB_Cpaxpby( TYPE, &conjg, An, nbb, one, WC, 0, 0, WCd, &WCroc, zero, Bptr, 0, 0, DBUFB, &Broc ); } else { /* * Combine the partial row results into WC */ if( WCsum && ( Anq > 0 ) ) gsum2d( ctxt, COLUMN, &top, nbb, Anq, WC, WCld, WCd[RSRC_], mycol ); /* * sub( B ) := WC (if necessary) */ if( WCpbY ) PB_Cpaxpby( TYPE, &conjg, nbb, An, one, WC, 0, 0, WCd, &WCroc, zero, Bptr, 0, 0, DBUFB, &Broc ); } if( WCfr ) free( WC ); /* * Go to the next contiguous panel if any residing in this process row or column */ BnpR -= nbb; if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb; } /* * Go to next or previous process row or column owning some of sub( B ) */ if( !BisR ) p = ( Bfwd ? MModAdd1( p, BnprocsR ) : MModSub1( p, BnprocsR ) ); } if( TranOp == CCOTRAN ) free( talpha ); /* * End of PB_CptrmmB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cptrm.c0000640000175000017500000010504110363532303017437 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cptrm( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE, char * UPLO, char * TRANS, char * DIAG, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * X, int LDX, char * Y, int LDY, TZTRM_T TRM ) #else void PB_Cptrm( TYPE, UTYP, SIDE, UPLO, TRANS, DIAG, N, K, ALPHA, A, IA, JA, DESCA, X, LDX, Y, LDY, TRM ) /* * .. Scalar Arguments .. */ char * DIAG, * SIDE, * TRANS, * UPLO; int IA, JA, K, LDX, LDY, N; char * ALPHA; PBTYP_T * TYPE, * UTYP; TZTRM_T TRM; /* * .. Array Arguments .. */ int * DESCA; char * A, * X, * Y; #endif { /* * Purpose * ======= * * PB_Cptrm performs a triangular matrix-matrix or matrix-vector multi- * plication. In the following, sub( A ) denotes the triangular subma- * trix operand A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UTYP (local input) pointer to a PBTYP_T structure * On entry, UTYP is a pointer to a structure of type PBTYP_T, * that contains type information for the Y's (See pblas.h). * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) multiplies * its operand X from the left or right as follows: * * SIDE = 'L' or 'l' Y := alpha*op( sub( A ) )*X + Y, * * SIDE = 'R' or 'r' Y := alpha*X*op( sub( A ) ) + Y. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' Y := alpha * sub( A ) * X + Y, * * TRANS = 'T' or 't' Y := alpha * sub( A )' * X + Y, * * TRANS = 'C' or 'c' Y := alpha * sub( A )' * X + Y, or * Y := alpha * conjg(sub( A )') * X + Y. * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the local number of columns of the lo- * cal array X and the local number of rows of the local array * Y when SIDE is 'L' or 'l' and TRANS is 'N' or 'n', or SIDE is * 'R' or 'r' and TRANS is 'T', 't', 'C' or 'c'. Otherwise, K * specifies the local number of rows of the local array X and * the local number of columns of the local array Y. K mut be at * least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LDX,Kx), where Kx is * at least Lc( JA, N ) when SIDE is 'L' or 'l' and TRANS is 'N' * or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C' or * 'c', and K otherwise. Before entry, this array contains the * local entries of the matrix X. * * LDX (local input) INTEGER * On entry, LDX specifies the leading dimension of the array X. * LDX must be at least K when SIDE is 'L' or 'l' and TRANS is * 'N' or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C' * or 'c', and max( 1, Lp( IA, N ) ) otherwise. * * Y (local input/local output) pointer to CHAR * On entry, Y is an array of dimension ( LDY, Ky ), where Ky is * at least max( 1, K ) when SIDE is 'L' or 'l' and TRANS is 'N' * or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C' or * 'c', and max( 1, Lc( JA, N ) ) otherwise. Before entry, this * array contains the local entries of the matrix Y. On exit, * this array contains the updated vector Y. * * LDY (local input) INTEGER * On entry, LDY specifies the leading dimension of the array Y. * LDY must be at least max( 1, Lr( IA, N ) ) when SIDE is 'L' * or 'l' and TRANS is 'N' or 'n', or SIDE is 'R' or 'r' and * TRANS is 'T', 't', 'C' or 'c', and max( 1, K ) otherwise. * * TRM (local input) pointer to function of type TZTRM_T * On entry, TRM specifies the function performing the triangu- * lar multiplication of a single block. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq, Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl, IsRowRepl, Xinc, Yinc, XYii=0, XYjj=0, XYoffi=-1, XYoffj=-1, XisRow, iimax, ilow, imbloc, inbloc, ioffd, ioffxy, iupp, jjmax, joffd, joffxy, lcmt, lcmt00, lmbloc, lnbloc, low, lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( N <= 0 ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in * all processes. */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) ); IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) ); Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ]; size = TYPE->size; if( IsRowRepl && IsColRepl ) { TRM( TYPE, SIDE, UPLO, TRANS, DIAG, Amp, Anq, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, X, LDX, Y, LDY ); return; } if( Mupcase( SIDE[0] ) == CLEFT ) { if( Mupcase( TRANS[0] ) == CNOTRAN ) { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; } else { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; } } else { if( Mupcase( TRANS[0] ) == CNOTRAN ) { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; } else { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; } } upper = ( Mupcase( UPLO[0] ) == CUPPER ); lower = ( Mupcase( UPLO[0] ) == CLOWER ); /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( IsRowRepl ? Amb : nprow * Amb ); qnb = ( IsColRepl ? Anb : npcol * Anb ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( XisRow ) { /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX, Y+XYii*Yinc, LDY ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be operated with and * one is planning to go south in the table, it is neccessary to take care * of the remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, X+(XYjj+inbloc)*Xinc, LDX, Y+XYii*Yinc, LDY ); } Aii += imbloc; XYii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be operated with and * one is planning to go east in the table, it is neccessary to take care * of the remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX, Y+(XYii+imbloc)*Yinc, LDY ); } Ajj += inbloc; XYjj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row indexes in A and XC. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; XYoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row indexes in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; } /* * Operate with the upper triangular part of sub( A ) we just skipped when * necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); Aii += tmp1; XYii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; XYoffi = ioffxy; ioffd += mbloc; ioffxy += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY ); } tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; XYoffj += inbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A and XR. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; XYoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A and XR. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX, Y+XYii*Yinc, LDY ); Ajj += tmp1; XYjj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+(joffxy+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; XYoffj = joffxy; joffd += nbloc; joffxy += nbloc; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+(joffxy+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); } tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; XYoffi += imbloc; /* * Operate with the lower triangular part of sub( A ). */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX, Y+(XYoffi+1)*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); Aii += tmp1; XYii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; XYoffi = ioffxy; ioffd += mbloc; ioffxy += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY ); } tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; XYoffj += nbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc, LDX, Y+XYii*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } } else { /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+XYjj*Yinc, LDY ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be operated with and * one is planning to go south in the table, it is neccessary to take care * of the remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(XYjj+inbloc)*Yinc, LDY ); } Aii += imbloc; XYii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be operated with and * one is planning to go east in the table, it is neccessary to take care * of the remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, X+(XYii+imbloc)*Xinc, LDX, Y+XYjj*Yinc, LDY ); } Ajj += inbloc; XYjj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row indexes in A and XC. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; XYoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row indexes in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; } /* * Operate with the upper triangular part of sub( A ) we just skipped when * necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); Aii += tmp1; XYii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; XYoffi = ioffxy; ioffd += mbloc; ioffxy += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); } tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; XYoffj += inbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A and XR. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; XYoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A and XR. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+XYjj*Yinc, LDY ); Ajj += tmp1; XYjj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(joffxy+1)*Yinc, LDY ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; XYoffj = joffxy; joffd += nbloc; joffxy += nbloc; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(joffxy+1)*Yinc, LDY ); } tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; XYoffi += imbloc; /* * Operate with the lower triangular part of sub( A ). */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+(XYoffi+1)*Xinc, LDX, Y+XYjj*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); Aii += tmp1; XYii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; XYoffi = ioffxy; ioffd += mbloc; ioffxy += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); } tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; XYoffj += nbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) { TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY ); } Aii = Aoffi + 1; Ajj = Aoffj + 1; XYii = XYoffi + 1; XYjj = XYoffj + 1; } } /* * End of PB_Cptrm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctztrmm.c0000640000175000017500000002367010363532303020021 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctztrmm( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * TRANS, char * DIAG, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * B, int LDB, char * C, int LDC ) #else void PB_Ctztrmm( TYPE, SIDE, UPLO, TRANS, DIAG, M, N, K, IOFFD, ALPHA, A, LDA, B, LDB, C, LDC ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO, * TRANS, * DIAG; int IOFFD, K, LDA, LDB, LDC, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_Ctztrmm performs the matrix-matrix operation * * C := alpha * op( A ) * B, * * or * * C := alpha * B * op( A ), * * where alpha is a scalar, A is an m by n trapezoidal triangular ma- * trix, B is an k by n matrix when TRANS is 'N' or 'n' and an m by k * matrix otherwise, and op( A ) is one of * * op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (input) pointer to CHAR * On entry, SIDE specifies whether op( A ) multiplies B from * the left or right as follows: * * SIDE = 'L' or 'l' C := alpha * op( A ) * B, * * SIDE = 'R' or 'r' C := alpha * B * op( A ). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * TRANS (input) pointer to CHAR * On entry, TRANS specifies the form of op( A ) to be used as * follows: * * TRANS = 'N' or 'n': op( A ) = A, * * TRANS = 'T' or 't': op( A ) = A', * * TRANS = 'C' or 'c': op( A ) = A' or conjg( A' ). * * DIAG (input) pointer to CHAR * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of rows of the matrix B when * TRANS is 'N' or 'n', and the number of columns of the matrix * B otherwise. K must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. When DIAG = 'U' or 'u', the * diagonal elements of A are not referenced either, but are * assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * B (input) pointer to CHAR * On entry, B is an array of dimension (LDB,Kb). Before entry, * with TRANS = 'N' or 'n', the array B must contain the k by n * matrix B corresponding to the columns of A. Otherwise, the * array B must contain the m by k matrix B corresponding to the * rows of A. When TRANS is 'N' or 'n', LDB is at least K, and * Kb is at least N. Otherwise, LDB is at least max(1,M), and Kb * is at least K. * * LDB (input) INTEGER * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least K when TRANS is 'N' or 'n' and * max( 1, M ) otherwise. * * C (input/output) pointer to CHAR * On entry, C is an array of dimension (LDC,Kc). On exit, with * TRANS = 'N' or 'n', the array C contains the m by k matrix C * corresponding to the rows of A. Otherwise, the array C con- * tains the k by n matrix C corresponding to the columns of A. * When TRANS is 'N' or 'n', LDC is at least max( 1, M ), and Kc * is at least K. Otherwise, LDC is at least K, and Kc is at * least N. On exit, C is overwritten by the partial updated * matrix C. * * LDC (input) INTEGER * On entry, LDC specifies the leading dimension of the array C. * LDC must be at least max( 1, M ) when TRANS is 'N' or * 'n' and 1 otherwise. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Aptr = NULL; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( ( Mupcase( UPLO[0] ) == CLOWER ) || ( Mupcase( UPLO[0] ) == CUPPER ) ) { Aptr = PB_Cmalloc( M * N * TYPE->size ); TYPE->Ftzpadcpy( C2F_CHAR( UPLO ), C2F_CHAR( DIAG ), &M, &N, &IOFFD, A, &LDA, Aptr, &M ); if( Mupcase( SIDE[0] ) == CLEFT ) { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemm( C2F_CHAR( TRANS ), C2F_CHAR( TRAN ), &M, &K, &N, ALPHA, Aptr, &M, B, &LDB, TYPE->one, C, &LDC ); } else { TYPE->Fgemm( C2F_CHAR( TRANS ), C2F_CHAR( NOTRAN ), &K, &N, &M, ALPHA, B, &LDB, Aptr, &M, TYPE->one, C, &LDC ); } } else { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemm( C2F_CHAR( TRAN ), C2F_CHAR( TRANS ), &K, &N, &M, ALPHA, B, &LDB, Aptr, &M, TYPE->one, C, &LDC ); } else { TYPE->Fgemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANS ), &M, &K, &N, ALPHA, Aptr, &M, B, &LDB, TYPE->one, C, &LDC ); } } if( Aptr ) free( Aptr ); } else { if( Mupcase( SIDE[0] ) == CLEFT ) { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemm( C2F_CHAR( TRANS ), C2F_CHAR( TRAN ), &M, &K, &N, ALPHA, A, &LDA, B, &LDB, TYPE->one, C, &LDC ); } else { TYPE->Fgemm( C2F_CHAR( TRANS ), C2F_CHAR( NOTRAN ), &K, &N, &M, ALPHA, B, &LDB, A, &LDA, TYPE->one, C, &LDC ); } } else { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemm( C2F_CHAR( TRAN ), C2F_CHAR( TRANS ), &K, &N, &M, ALPHA, B, &LDB, A, &LDA, TYPE->one, C, &LDC ); } else { TYPE->Fgemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANS ), &M, &K, &N, ALPHA, A, &LDA, B, &LDB, TYPE->one, C, &LDC ); } } } /* * End of PB_Ctztrmm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpsyr2kA.c0000640000175000017500000011174510363532303020020 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpsyr2kA( PBTYP_T * TYPE, char * DIRECAB, char * CONJUG, char * UPLO, char * TRANS, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_Cpsyr2kA( TYPE, DIRECAB, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECAB, * TRANS, * UPLO; int IA, IB, IC, JA, JB, JC, K, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_Cpsyr2kA performs one of the following symmetric or Hermitian rank * 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( B ) )' + * conjg( alpha )*sub( B )*conjg( sub( A ) )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric or * Hermitian submatrix and sub( A ) and sub( B ) are n by k submatrices * in the first case and k by n submatrices in the second case. * * This is the outer-product algorithm using the logical LCM hybrid * and static blocking technique. The submatrix operand sub( C ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECAB (global input) pointer to CHAR * On entry, DIRECAB specifies the direction in which the rows * or columns of sub( A ) and sub( B ) should be looped over as * follows: * DIRECAB = 'F' or 'f' forward or increasing, * DIRECAB = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( C ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( C ) is symmetric, * CONJUG = 'Z' or 'z' sub( C ) is Hermitian. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the submatrix sub( C ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( C ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( C ) are referenced. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( B )' ) + * conjg( alpha )*sub( B )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * or * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't' or 'C' or 'c', K specifies the * number of rows of the submatrices sub( A ) and sub( B ). * K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly lower triangular * of sub( C ) are not referenced. On exit, the upper triangular * part of sub( C ) is overwritten by the upper triangular part * of the updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly upper triangular * of sub( C ) are not referenced. On exit, the lower triangular * part of sub( C ) is overwritten by the lower triangular part * of the updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one, * talpha, * zero; int ABfwd, ABmyprocD, ABmyprocR, ABnprocsD, ABnprocsR, ABrocs, Abufld, AcurrocR, Afr, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, AkkR, Ald, AnbD, AnbR, AnpD, AnpR, Aoff, ArocD, ArocR, AsrcR, Bbufld, BcurrocR, Bfr, BiD, BiR, BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, BkkR, Bld, BnbD, BnbR, BnpD, BnpR, Boff, BrocD, BrocR, BsrcR, Ccol, Cii, Cimb1, Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp, Cmp0, Cnb, Cnq, Crow, WACfr, WACld, WACsum, WARfr, WARld, WARsum, WBCfr, WBCld, WBCsum, WBRfr, WBRld, WBRsum, Wkbb=0, conjg, ctxt, k, kb, kbb, l, lb, lcmb, ltmp, maxp, maxpm1, maxq, mycol, myrow, ncpq, notran, npcol, npq, nprow, nrpq, p=0, q=0, size, tmp, upper; GEMM_T gemm; TZSYR2_T tzsyr2k; /* * .. Local Arrays .. */ PB_VM_T VM; int Cd0 [DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WACd0[DLEN_], WARd0[DLEN_], WBCd0[DLEN_], WBRd0[DLEN_]; char * Abuf = NULL, * Bbuf = NULL, * Cptr = NULL, * WAC = NULL, * WAR = NULL, * WBC = NULL, * WBR = NULL; /* .. * .. Executable Statements .. * */ /* * sub( C ) := beta * sub( C ) */ PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); conjg = ( Mupcase( CONJUG [0] ) == CCONJG ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute descriptor Cd0 for sub( C ) */ PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 ); Cmp = PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol ); if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { Cptr = Mptr( C, Cii, Cjj, Cld, size ); if( conjg ) { talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); tzsyr2k = PB_Ctzher2k; } else { talpha = ALPHA; tzsyr2k = PB_Ctzsyr2k; } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ Clcmb = 2 * kb * PB_Clcm( ( Crow >= 0 ? nprow : 1 ), ( Ccol >= 0 ? npcol : 1 ) ); } /* * Retrieve local information for sub( A ) and sub( B ) */ if( ( notran = ( Mupcase( TRANS[0] ) == CNOTRAN ) ) != 0 ) { ABnprocsR = npcol; AiR = JA; AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_]; BiR = JB; BinbR = DESCB[INB_]; BnbR = DESCB[NB_]; BsrcR = DESCB[CSRC_]; } else { ABnprocsR = nprow; AiR = IA; AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_]; BiR = IB; BinbR = DESCB[IMB_]; BnbR = DESCB[MB_]; BsrcR = DESCB[RSRC_]; } /* * If sub( A ) and sub( B ) only spans one process row or column, then there is * no need to pack the data. */ if( !( PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, ABnprocsR ) ) && !( PB_Cspan( K, BiR, BinbR, BnbR, BsrcR, ABnprocsR ) ) ) { /* * Replicate sub( A ) in process rows and columns spanned by sub( C ): WAR, WAC * Replicate sub( B ) in process rows and columns spanned by sub( C ): WBR, WBC */ if( notran ) { PB_CInV( TYPE, NOCONJG, COLUMN, N, N, Cd0, K, A, IA, JA, DESCA, COLUMN, &WAC, WACd0, &WACfr ); PB_CInV( TYPE, CONJUG, ROW, N, N, Cd0, K, WAC, 0, 0, WACd0, COLUMN, &WAR, WARd0, &WARfr ); PB_CInV( TYPE, NOCONJG, COLUMN, N, N, Cd0, K, B, IB, JB, DESCB, COLUMN, &WBC, WBCd0, &WBCfr ); PB_CInV( TYPE, CONJUG, ROW, N, N, Cd0, K, WBC, 0, 0, WBCd0, COLUMN, &WBR, WBRd0, &WBRfr ); } else { PB_CInV( TYPE, NOCONJG, ROW, N, N, Cd0, K, A, IA, JA, DESCA, ROW, &WAR, WARd0, &WARfr ); PB_CInV( TYPE, CONJUG, COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0, ROW, &WAC, WACd0, &WACfr ); PB_CInV( TYPE, NOCONJG, ROW, N, N, Cd0, K, B, IB, JB, DESCB, ROW, &WBR, WBRd0, &WBRfr ); PB_CInV( TYPE, CONJUG, COLUMN, N, N, Cd0, K, WBR, 0, 0, WBRd0, ROW, &WBC, WBCd0, &WBCfr ); } /* * Perform the local update if I own some data */ if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { WACld = WACd0[LLD_]; WBCld = WBCd0[LLD_]; WARld = WARd0[LLD_]; WBRld = WBRd0[LLD_]; if( Mupcase( UPLO[0] ) == CUPPER ) { for( l = 0; l < N; l += Clcmb ) { lb = N - l; lb = MIN( lb, Clcmb ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Clp > 0 && Cnq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &K, ALPHA, WAC, &WACld, Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &K, talpha, WBC, &WBCld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); } PB_Cpsyr2( TYPE, UPPER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Mptr( WBC, Clp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l, Cd0, tzsyr2k ); } } else { for( l = 0; l < N; l += Clcmb ) { lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); PB_Cpsyr2( TYPE, LOWER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Mptr( WBC, Clp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l, Cd0, tzsyr2k ); Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cmp0 = Cmp - Clp; Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Cmp0 > 0 && Cnq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld, Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &K, talpha, Mptr( WBC, Clp, 0, WBCld, size ), &WBCld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); } } } if( conjg ) free( talpha ); } if( WACfr ) free( WAC ); if( WARfr ) free( WAR ); if( WBCfr ) free( WBC ); if( WBRfr ) free( WBR ); return; } /* * Otherwise sub( A ) and sub( B ) spans more than one process row or columns */ ABfwd = ( Mupcase( DIRECAB[0] ) == CFORWARD ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); if( notran ) { ABmyprocD = myrow; ABmyprocR = mycol; ABnprocsD = nprow; AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; BiD = IB; BinbD = DESCB[IMB_]; BnbD = DESCB[MB_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IA, JA, DESCA, ABnprocsD, ABnprocsR, ABmyprocD, ABmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); PB_Cinfog2l( IB, JB, DESCB, ABnprocsD, ABnprocsR, ABmyprocD, ABmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); } else { ABmyprocD = mycol; ABmyprocR = myrow; ABnprocsD = npcol; AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; BiD = JB; BinbD = DESCB[INB_]; BnbD = DESCB[NB_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IA, JA, DESCA, ABnprocsR, ABnprocsD, ABmyprocR, ABmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); PB_Cinfog2l( IB, JB, DESCB, ABnprocsR, ABnprocsD, ABmyprocR, ABmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); } Ainb1D = PB_Cfirstnb( N, AiD, AinbD, AnbD ); AnpD = PB_Cnumroc( N, 0, Ainb1D, AnbD, ABmyprocD, ArocD, ABnprocsD ); Ainb1R = PB_Cfirstnb( K, AiR, AinbR, AnbR ); AisR = ( ( AsrcR < 0 ) || ( ABnprocsR == 1 ) ); Binb1D = PB_Cfirstnb( N, BiD, BinbD, BnbD ); BnpD = PB_Cnumroc( N, 0, Binb1D, BnbD, ABmyprocD, BrocD, ABnprocsD ); Binb1R = PB_Cfirstnb( K, BiR, BinbR, BnbR ); BisR = ( ( BsrcR < 0 ) || ( ABnprocsR == 1 ) ); /* * When sub( A ) is not replicated and backward pass on sub( A ), find the * virtual process q owning the last row or column of sub( A ). */ if( !( AisR ) && !( ABfwd ) ) { tmp = PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, ABnprocsR ); q = MModSub( tmp, ArocR, ABnprocsR ); } /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process p owning the last row or column of sub( B ). */ if( !( BisR ) && !( ABfwd ) ) { tmp = PB_Cindxg2p( K - 1, Binb1R, BnbR, BrocR, BrocR, ABnprocsR ); p = MModSub( tmp, BrocR, ABnprocsR ); } /* * Allocate work space in process rows and columns spanned by sub( C ) */ PB_COutV( TYPE, COLUMN, NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr, &WACsum ); PB_COutV( TYPE, ROW, NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr, &WARsum ); PB_COutV( TYPE, COLUMN, NOINIT, N, N, Cd0, kb, &WBC, WBCd0, &WBCfr, &WBCsum ); PB_COutV( TYPE, ROW, NOINIT, N, N, Cd0, kb, &WBR, WBRd0, &WBRfr, &WBRsum ); /* * Loop over the virtual process grid induced by the rows or columns of * sub( A ) and sub( B ) */ lcmb = PB_Clcm( ( maxp = ( BisR ? 1 : ABnprocsR ) ) * BnbR, ( maxq = ( AisR ? 1 : ABnprocsR ) ) * AnbR ); maxpm1 = maxp - 1; /* * Find out process coordinates corresponding to first virtual process (p,q) */ AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, ABnprocsR ) ); AkkR = PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, ABnprocsR ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, ABnprocsR ); BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, ABnprocsR ) ); BkkR = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, ABnprocsR ); BnpR = PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, ABnprocsR ); /* * Find out how many diagonals this virtual process (p,q) has */ PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q, maxp, maxq, lcmb ); npq = PB_CVMnpq( &VM ); for( k = 0; k < K; k += kb ) { kbb = K - k; kbb = MIN( kbb, kb ); while( Wkbb != kbb ) { /* * Ensure that the current virtual process (p,q) has something to contribute * to the replicated buffers WA and WB. */ while( npq == 0 ) { if( ( ABfwd && ( p == maxpm1 ) ) || ( !( ABfwd ) && ( p == 0 ) ) ) q = ( ABfwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( ABfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, ABnprocsR ) ); AkkR = PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, ABnprocsR ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, ABnprocsR ); BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, ABnprocsR ) ); BkkR = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, ABnprocsR ); BnpR = PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, ABnprocsR ); PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q, maxp, maxq, lcmb ); npq = PB_CVMnpq( &VM ); } /* * Current virtual process (p,q) has something, find out how many rows or * columns could be used: ABrocs. */ if( Wkbb == 0 ) { ABrocs = ( npq < kbb ? npq : kbb ); } else { ABrocs = kbb - Wkbb; ABrocs = MIN( ABrocs, npq ); } /* * Find out how many rows or columns of sub( A ) and sub( B ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Boff, &Aoff ); if( notran ) { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < ABrocs ) ) != 0 ) { /* * If columns of sub( A ) are not contiguous, then allocate the buffer and * pack the ABrocs columns of sub( A ). */ Abufld = MAX( 1, AnpD ); if( AisR || ( ABmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, COLUMN, COLUMN, PACKING, NOTRAN, ABrocs, AnpD, one, Mptr( A, AiiD, AkkR, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( ABmyprocR == AcurrocR ) ) Abuf = Mptr( A, AiiD, AkkR + Aoff, Ald, size ); } PB_Cdescset( DBUFA, N, ABrocs, Ainb1D, ABrocs, AnbD, ABrocs, ArocD, AcurrocR, ctxt, Abufld ); /* * Replicate panels of columns of sub( A ) over sub( C ) */ PB_CInV2( TYPE, NOCONJG, COLUMN, N, N, Cd0, ABrocs, Abuf, 0, 0, DBUFA, COLUMN, WAC, Wkbb, WACd0 ); if( Afr & ( AisR || ( ABmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Compute the descriptor DBUFB for the buffer that will contained the packed * columns of sub( B ). */ if( ( Bfr = ( nrpq < ABrocs ) ) != 0 ) { /* * If columns of sub( B ) are not contiguous, then allocate the buffer and * pack the ABrocs columns of sub( B ). */ Bbufld = MAX( 1, BnpD ); if( BisR || ( ABmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, ROW, COLUMN, PACKING, NOTRAN, ABrocs, BnpD, one, Mptr( B, BiiD, BkkR, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( ABmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BiiD, BkkR + Boff, Bld, size ); } PB_Cdescset( DBUFB, N, ABrocs, Binb1D, ABrocs, BnbD, ABrocs, BrocD, BcurrocR, ctxt, Bbufld ); /* * Replicate panels of columns of sub( A ) over sub( C ) */ PB_CInV2( TYPE, NOCONJG, COLUMN, N, N, Cd0, ABrocs, Bbuf, 0, 0, DBUFB, COLUMN, WBC, Wkbb, WBCd0 ); if( Bfr & ( BisR || ( ABmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); } else { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * rows of sub( A ). */ if( ( Afr = ( ncpq < ABrocs ) ) != 0 ) { /* * If rows of sub( A ) are not contiguous, then allocate the buffer and * pack the ABrocs rows of sub( A ). */ Abufld = ABrocs; if( AisR || ( ABmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, COLUMN, ROW, PACKING, NOTRAN, ABrocs, AnpD, one, Mptr( A, AkkR, AiiD, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( ABmyprocR == AcurrocR ) ) Abuf = Mptr( A, AkkR + Aoff, AiiD, Ald, size ); } PB_Cdescset( DBUFA, ABrocs, N, ABrocs, Ainb1D, ABrocs, AnbD, AcurrocR, ArocD, ctxt, Abufld ); /* * Replicate panels of rows of sub( A ) over sub( C ) */ PB_CInV2( TYPE, NOCONJG, ROW, N, N, Cd0, ABrocs, Abuf, 0, 0, DBUFA, ROW, WAR, Wkbb, WARd0 ); if( Afr & ( AisR || ( ABmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); /* * Compute the descriptor DBUFB for the buffer that will contained the packed * rows of sub( B ). */ if( ( Bfr = ( nrpq < ABrocs ) ) != 0 ) { /* * If rows of sub( B ) are not contiguous, then allocate the buffer and * pack the ABrocs rows of sub( B ). */ Bbufld = ABrocs; if( BisR || ( ABmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, ROW, ROW, PACKING, NOTRAN, ABrocs, BnpD, one, Mptr( B, BkkR, BiiD, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( ABmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BkkR + Boff, BiiD, Bld, size ); } PB_Cdescset( DBUFB, ABrocs, N, ABrocs, Binb1D, ABrocs, BnbD, BcurrocR, BrocD, ctxt, Bbufld ); /* * Replicate panels of rows of sub( B ) over sub( C ) */ PB_CInV2( TYPE, NOCONJG, ROW, N, N, Cd0, ABrocs, Bbuf, 0, 0, DBUFB, ROW, WBR, Wkbb, WBRd0 ); if( Bfr & ( BisR || ( ABmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); } /* * Update the local indexes of sub( A ) and sub( B ) */ PB_CVMupdate( &VM, ABrocs, &BkkR, &AkkR ); /* * ABrocs rows or columns of sub( A ) and sub( B ) have been replicated, * update the number of diagonals in this virtual process as well as the * number of rows or columns of sub( A ) and sub( B ) that are in WA, WB. */ npq -= ABrocs; Wkbb += ABrocs; } if( notran ) { /* * WAR := WAC' */ PB_CInV2( TYPE, CONJUG, ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0, COLUMN, WAR, 0, WARd0 ); /* * WBR := WBC' */ PB_CInV2( TYPE, CONJUG, ROW, N, N, Cd0, kbb, WBC, 0, 0, WBCd0, COLUMN, WBR, 0, WBRd0 ); } else { /* * WAC := WAR' */ PB_CInV2( TYPE, CONJUG, COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0, ROW, WAC, 0, WACd0 ); /* * WBC := WBR' */ PB_CInV2( TYPE, CONJUG, COLUMN, N, N, Cd0, kbb, WBR, 0, 0, WBRd0, ROW, WBC, 0, WBCd0 ); } /* * Perform the local update if I own some data */ if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { WACld = WACd0[LLD_]; WBCld = WBCd0[LLD_]; WARld = WARd0[LLD_]; WBRld = WBRd0[LLD_]; if( upper ) { for( l = 0; l < N; l += Clcmb ) { lb = N - l; lb = MIN( lb, Clcmb ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Clp > 0 && Cnq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &kbb, ALPHA, WAC, &WACld, Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &kbb, talpha, WBC, &WBCld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld ); } PB_Cpsyr2( TYPE, UPPER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Mptr( WBC, Clp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l, Cd0, tzsyr2k ); } } else { for( l = 0; l < N; l += Clcmb ) { lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) ); Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow ); Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol ); PB_Cpsyr2( TYPE, LOWER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld, Mptr( WBC, Clp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l, Cd0, tzsyr2k ); Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cmp0 = Cmp - Clp; Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol ); if( Cmp0 > 0 && Cnq0 > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld, Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0, &kbb, talpha, Mptr( WBC, Clp, 0, WBCld, size ), &WBCld, Mptr( WAR, 0, Clq, WARld, size ), &WARld, one, Mptr( Cptr, Clp, Clq, Cld, size ), &Cld ); } } } } Wkbb = 0; } if( WACfr ) free( WAC ); if( WARfr ) free( WAR ); if( WBCfr ) free( WBC ); if( WBRfr ) free( WBR ); if( conjg && ( Cmp > 0 ) && ( Cnq > 0 ) ) free( talpha ); /* * End of PB_Cpsyr2kA */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMswp.c0000640000175000017500000005264610363532303017545 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_CVMswp( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS, char * TRANS, int MN, char * X, int INCX, char * Y, int INCY ) #else int PB_CVMswp( TYPE, VM, VROCS, ROCS, TRANS, MN, X, INCX, Y, INCY ) /* * .. Scalar Arguments .. */ int INCX, INCY, MN; /* * .. Array Arguments .. */ char * VROCS, * ROCS, * TRANS; PBTYP_T * TYPE; PB_VM_T * VM; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CVMswp swaps a one-dimensional distributed vector X with another * one-dimensional distributed vector Y. This operation is triggered by * a virtual distributed array. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * VROCS (local input) pointer to CHAR * On entry, VROCS specifies if the rows or columns of the vir- * tual distributed array grid should be used for the swapping * operation as follows: * VROCS = 'R' or 'r', the rows should be used, * VROCS = 'C' or 'c', the columns should be used. * * ROCS (local input) pointer to CHAR * On entry, ROCS specifies if rows or columns should be swap- * ped as follows: * ROCS = 'R' or 'r', rows should be swapped, * ROCS = 'C' or 'c', columns should be swapped. * * TRANS (local input) pointer to CHAR * On entry, TRANS specifies if transposition should occur du- * ring the swapping operation as follows: * TRANS = 'N' or 'n', natural swapping, * otherwise, transposed swapping. * * MN (local input) INTEGER * On entry, MN specifies the number of rows or columns to be * swapped. MN must be at least zero. * * X (local input/local output) pointer to CHAR * On entry, X points to an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) where n is IMBLOC+(MBLKS-2)*MB+ * LMB when VROCS is 'R' or 'r', and INBLOC+(NBLKS-2)*NB+LNB * otherwise. Before entry, the incremented array X must contain * the vector x. On exit, the entries of the incremented array X * are exchanged with the entries of the incremented array Y. * * INCX (local input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (local input/local output) pointer to CHAR * On entry, Y points to an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ) where n is IMBLOC+(MBLKS-2)*MB+ * LMB when VROCS is 'C' or 'c', and INBLOC+(NBLKS-2)*NB+LNB * otherwise. Before entry, the incremented array Y must contain * the vector y. On exit, the entries of the incremented array Y * are exchanged with the entries of the incremented array X. * * INCY (local input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, Xinc, Yinc, ilow, imbloc, inbloc, iupp, kb, lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc, nb, nblkd, nblks, nbloc, notran, npcol, npq=0, nprow, pmb, qnb, rows, size, tmp1, tmp2, upp; char * Xptrd, * Yptrd; /* .. * .. Executable Statements .. * */ mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks. */ if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 ); /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp; iupp = VM->iupp; nprow = VM->nprow; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low; ilow = VM->ilow; npcol = VM->npcol; notran = ( Mupcase( TRANS[0] ) == CNOTRAN ); size = TYPE->size; rows = ( Mupcase( ROCS[0] ) == CROW ); if( Mupcase( VROCS[0] ) == CROW ) { /* * (un)packing using rows of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A. */ Xinc = size; Yinc = ( notran ? size : INCY * size ); } else { /* * (un)packing columns of k by mn array A */ Xinc = INCX * size; Yinc = ( notran ? INCY * size : size ); } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( mblks < 2 ) ? imbloc : imbloc + ( mblks - 2 ) * mb + lmbloc ); npq = MIN( npq, kb ); if( rows ) TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); else TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X+lcmt00*Xinc, &INCX, Y, &INCY ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Y-lcmt00*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to X. The pointer to Y remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to X accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; X += mb * Xinc; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; Xptrd = X; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to Y. The pointer to X remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * Y accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Y += nb * Yinc; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; Yptrd = Y; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X+lcmt*Xinc, &INCX, Yptrd, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; Yptrd += nbloc * Yinc; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; X += mb * Xinc; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Y += nb * Yinc; } if( lcmt00 <= upp ) break; } } if( !mblks || !nblks ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; Xptrd = X; while( mblkd && lcmt >= low ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; Y += nbloc * Yinc; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } else { /* * (un)packing using columns of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A */ Xinc = size; Yinc = ( notran ? size : INCY * size ); } else { /* * (un)packing columns of k by mn array A */ Xinc = INCX * size; Yinc = ( notran ? INCY * size : size ); } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( nblks < 2 ) ? inbloc : inbloc + ( nblks - 2 ) * nb + lnbloc ); npq = MIN( npq, kb ); if( rows ) TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); else TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Y+lcmt00*Yinc, &INCY ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt00*Xinc, &INCX, Y, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to Y. The pointer to X remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to Y accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Y += mb * Yinc; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; Yptrd = Y; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to X. The pointer to Y remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * X accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; X += nb * Xinc; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; Xptrd = X; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y+lcmt*Yinc, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd-lcmt*Xinc, &INCX, Y, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; Xptrd += nbloc * Xinc; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Y += mb * Yinc; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; X += nb * Xinc; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; Yptrd = Y; /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ while( mblkd && lcmt >= low ) { mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; X += nbloc * Xinc; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } /* * End of PB_CVMswp */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cptrsm.c0000640000175000017500000010664010363532303017630 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cptrsm( PBTYP_T * TYPE, int FBCAST, char * SIDE, char * UPLO, char * TRANS, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BC, int LDBC, char * BR, int LDBR ) #else void PB_Cptrsm( TYPE, FBCAST, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, BC, LDBC, BR, LDBR ) /* * .. Scalar Arguments .. */ char * ALPHA, * DIAG, * SIDE, * TRANS, * UPLO; int FBCAST, IA, JA, LDBC, LDBR, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA; char * A, * BC, * BR; #endif { /* * Purpose * ======= * * PB_Cptrsm solves one of the matrix equations * * op( sub( A ) ) * X = B, or X * op( sub( A ) ) = alpha * B, * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R'. * * X and B are m by n submatrices, sub( A ) is a unit, or non-unit, * upper or lower triangular submatrix and op( Y ) is one of * * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ). * * The submatrix X is overwritten on B. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * FBCAST (global input) INTEGER * On entry, FBCAST specifies whether the transposed of the vec- * tor solution should be broadcast or not when there is a pos- * sible ambiguity, i.e. when sub( A ) is just one block. When * FBCAST is zero, the solution vector is not broadcast, and the * the solution vector is broadcast otherwise. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) ) * X = B, * * SIDE = 'R' or 'r' X * op( sub( A ) ) = B. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * X = B, * * TRANS = 'T' or 't' sub( A )' * X = B, * * TRANS = 'C' or 'c' conjg( sub( A )' ) * X = B. * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix B. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * B. N must be at least zero. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 0, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 0, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BC (local input/local output) pointer to CHAR * On entry, BC is an array of dimension (LDBC,Kbc), where Kbc * is at least N when SIDE is 'L' or 'l' and at least M other- * wise. Before entry, when SIDE is 'L' or 'l' and TRANS is 'N' * or 'n' or SIDE is 'R' or 'r' and TRANS is not 'N' or 'n', * this array contains the local entries of the right-hand-side * matrix B. Otherwise, the entries of BC should be zero. On * exit, this array contains the partial solution matrix X. * * LDBC (local input) INTEGER * On entry, LDBC specifies the leading dimension of the array * BC. LDBC must be at least MAX( 1, Lr( IA, M ) ) when SIDE * is 'L' or 'l' and at least MAX( 1, Lr( IA, N ) ) otherwise. * * BR (local input/local output) pointer to CHAR * On entry, BR is an array of dimension (LDBR,Kbr), where Kbr * is at least Lc( JA, M ) when SIDE is 'L' or 'l' and at least * Lc( JA, N ) otherwise. Before entry, when SIDE is 'L' or 'l' * and TRANS is 'N' or 'n' or SIDE is 'R' or 'r' and TRANS is * not 'N' or 'n', the entries of BR should be zero. Otherwise, * this array contains the local entries of the right-hand-side * matrix B. On exit, this array contains the partial solution * matrix X. * * LDBR (local input) INTEGER * On entry, LDBR specifies the leading dimension of the array * BR. LDBR must be at least MAX( 1, N ) when SIDE is 'L' or 'l' * and at least MAX( 1, M ) otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char btop, * negone, * one, * talpha1, * talpha2, * zero; int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep, AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp, Anq, Arow, Asrc, ChangeRoc=0, LNorRT, Na, Nb, bcst, ctxt, izero=0, k=0, kb, kbprev=0, kbsize, lside, mb1, mycol, myrow, n1, n1last, n1p, n1pprev=0, nb1, nlast, notran, npcol, nprow, rocprev, size, tmp1, tmp2; MMADD_T add, tadd; TZPAD_T pad; GEMM_T gemm; TRSM_T trsm; GESD2D_T send; GERV2D_T recv; GEBS2D_T bsend; GEBR2D_T brecv; /* * .. Local Arrays .. */ char * Aprev = NULL, * Bd = NULL, * Bdprev = NULL, * Bprev = NULL, * work = NULL; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); lside = ( Mupcase( SIDE [0] ) == CLEFT ); notran = ( Mupcase( TRANS[0] ) == CNOTRAN ); LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) ); if( LNorRT ) { Na = M; Nb = N; } else { Na = N; Nb = M; } /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); /* * Determine if sub( A ) spans more than one process row, and/or more than one * process column. */ Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ]; Aimb1 = PB_Cfirstnb( Na, IA, DESCA[IMB_], Amb ); Anp = PB_Cnumroc( Na, 0, Aimb1, Amb, myrow, Arow, nprow ); Ais1Row = !( PB_Cspan( Na, 0, Aimb1, Amb, Arow, nprow ) ); Ainb1 = PB_Cfirstnb( Na, JA, DESCA[INB_], Anb ); Anq = PB_Cnumroc( Na, 0, Ainb1, Anb, mycol, Acol, npcol ); Ais1Col = !( PB_Cspan( Na, 0, Ainb1, Anb, Acol, npcol ) ); /* * When sub( A ) spans only one process, solve the system locally and return. */ if( Ais1Row && Ais1Col ) { if( LNorRT ) { if( Anq > 0 ) { if( Anp > 0 ) { TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT : LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, TYPE->size ), &Ald, BC, &LDBC ); TYPE->Fmmtadd( &M, &N, TYPE->one, BC, &LDBC, TYPE->zero, BR, &LDBR ); } if( ( Arow >= 0 ) && FBCAST ) { btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == Arow ) TYPE->Cgebs2d( ctxt, COLUMN, &btop, N, M, BR, LDBR ); else TYPE->Cgebr2d( ctxt, COLUMN, &btop, N, M, BR, LDBR, Arow, mycol ); } } } else { if( Anp > 0 ) { if( Anq > 0 ) { TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT : LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, TYPE->size ), &Ald, BR, &LDBR ); TYPE->Fmmtadd( &M, &N, TYPE->one, BR, &LDBR, TYPE->zero, BC, &LDBC ); } if( ( Acol >= 0 ) && FBCAST ) { btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == Acol ) TYPE->Cgebs2d( ctxt, ROW, &btop, N, M, BC, LDBC ); else TYPE->Cgebr2d( ctxt, ROW, &btop, N, M, BC, LDBC, myrow, Acol ); } } } return; } /* * Retrieve from TYPE structure useful BLAS and BLACS functions. */ size = TYPE->size; negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero; add = TYPE->Fmmadd; tadd = TYPE->Fmmtadd; pad = TYPE->Ftzpad; gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm; send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d; bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d; if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size ); if( LNorRT ) { /* * Left - No tran or Right - (co)Trans */ if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) && ( myrow != Arow ) ) ) ) return; btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) ); AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) ); if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Initiate lookahead */ nlast = ( npcol - 1 ) * Anb; n1 = MAX( nlast, Anb ); nlast += Ainb1; n1last = n1 - Anb + MAX( Ainb1, Anb ); work = PB_Cmalloc( Nb * MIN( n1last, Anp ) * size ); tmp1 = Na-1; Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow ); Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol ); rocprev = Alcol; Anpprev = Anp; Bprev = BC; Bdprev = BR; Aprev = A = Mptr( A, 0, Anq, Ald, size ); mb1 = PB_Clastnb( Na, 0, Aimb1, Amb ); nb1 = PB_Clastnb( Na, 0, Ainb1, Anb ); tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Col || ( Na - nb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); Asrc = Arow; n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc, nprow ); talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Alcol ) ) ? ALPHA : one ); while( Na > 0 ) { kbsize = kb * size; if( Ais1Col || ( mycol == Alcol ) ) { A -= Ald*kbsize; Anq -= kb; Bd = Mptr( BR, 0, Anq, LDBR, size ); } if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; } /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) { tmp1 = ( Anpprev - n1pprev ) * size; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb, &kbprev, negone, Aprev+tmp1, &Ald, Bdprev, &LDBR, talpha1, Bprev+tmp1, &LDBC ); } /* * Send partial updated result to current column */ if( !( Ais1Col ) && ChangeRoc ) { if( mycol == rocprev ) { send( ctxt, n1pprev, Nb, Mptr( Bprev, Anpprev-n1pprev, 0, LDBC, size ), LDBC, myrow, Alcol ); } else if( mycol == Alcol ) { recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev ); add( &n1pprev, &Nb, one, work, &n1pprev, one, Mptr( Bprev, Anpprev-n1pprev, 0, LDBC, size ), &LDBC ); } } } /* * Solve current diagonal block */ if( Ais1Col || ( mycol == Alcol ) ) { if( AisRowRep || ( myrow == Alrow ) ) { trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Nb, talpha2, Mptr( A, Anp, 0, Ald, size ), &Ald, Mptr( BC, Anp, 0, LDBC, size ), &LDBC ); tadd( &kb, &Nb, one, Mptr( BC, Anp, 0, LDBC, size ), &LDBC, zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR ); } if( bcst ) { if( myrow == Alrow ) bsend( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR, size ), LDBR ); else brecv( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR, size ), LDBR, Alrow, mycol ); } talpha2 = one; } else { if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero, zero, zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC ); } /* * Finish previous update */ if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb, &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1, Bprev, &LDBC ); talpha1 = one; } /* * Save info of current step and update info for the next step */ if( Ais1Col || ( mycol == Alcol ) ) { Bdprev = Bd; Aprev = A; } if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; } n1pprev = n1p; rocprev = Alcol; kbprev = kb; k += kb; Na -= kb; mb1 -= kb; if( mb1 == 0 ) { if( !( Ais1Row ) && ( Alrow >= 0 ) ) Alrow = MModSub1( Alrow, nprow ); mb1 = ( Na > Aimb1 ? Amb : Aimb1 ); } nb1 -= kb; ChangeRoc = ( nb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Col ) && ( Alcol >= 0 ) ) Alcol = MModSub1( Alcol, npcol ); nb1 = ( Na > Ainb1 ? Anb : Ainb1 ); } tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Col || ( Na-nb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc, nprow ); } } else { /* * Initiate lookahead */ n1 = ( MAX( npcol, 2 ) - 1 ) * Anb; work = PB_Cmalloc( Nb*MIN( n1, Anp )*size ); Aprev = A; Bprev = BC, Bdprev = BR; Anpprev = Anp; mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol; tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb; Asrc = Arow; n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc, nprow ); talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Acol ) ) ? ALPHA : one ); while( kb > 0 ) { kbsize = kb * size; /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb, &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1, Bprev, &LDBC ); /* * Send partial updated result to current column */ if( !( Ais1Col ) && ChangeRoc ) { if( mycol == rocprev ) { send( ctxt, n1pprev, Nb, Bprev, LDBC, myrow, Acol ); } else if( mycol == Acol ) { recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev ); add( &n1pprev, &Nb, one, work, &n1pprev, one, Bprev, &LDBC ); } } } /* * Solve current diagonal block */ if( Ais1Col || ( mycol == Acol ) ) { if( AisRowRep || ( myrow == Arow ) ) { trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Nb, talpha2, A, &Ald, BC, &LDBC ); tadd( &kb, &Nb, one, BC, &LDBC, zero, BR, &LDBR ); } if( bcst ) { if( myrow == Arow ) bsend( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR ); else brecv( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR, Arow, mycol ); } talpha2 = one; } else { if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero, zero, zero, BC, &LDBC ); } /* * Finish previous update */ if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) { tmp2 = n1pprev * size; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb, &kbprev, negone, Aprev+tmp2, &Ald, Bdprev, &LDBR, talpha1, Bprev+tmp2, &LDBC ); } Aprev += Ald * kbprev * size; talpha1 = one; } /* * Save info of current step and update info for the next step */ if( Ais1Col || ( mycol == Acol ) ) { A += Ald*kbsize; Bdprev = Bd = BR; BR += LDBR*kbsize; } if( AisRowRep || ( myrow == Arow ) ) { Bprev = ( BC += kbsize ); A += kbsize; Aprev += kbsize; Anpprev = ( Anp -= kb ); } n1pprev = n1p; rocprev = Acol; kbprev = kb; k += kb; Na -= kb; mb1 -= kb; if( mb1 == 0 ) { if( !( Ais1Row ) && ( Arow >= 0 ) ) Arow = MModAdd1( Arow, nprow ); mb1 = MIN( Amb, Na ); } nb1 -= kb; ChangeRoc = ( nb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Col ) && ( Acol >= 0 ) ) Acol = MModAdd1( Acol, npcol ); nb1 = MIN( Anb, Na ); } tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb; n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Aimb1, Amb, myrow, Asrc, nprow ); } } } else { /* * Right - No tran or Left - (co)Trans */ if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) && ( mycol != Acol ) ) ) ) return; btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) ); AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) ); if( Mupcase( UPLO[0] ) == CUPPER ) { /* * Initiate lookahead */ n1 = ( MAX( nprow, 2 ) - 1 ) * Amb; work = PB_Cmalloc( Nb*MIN( n1, Anq )*size ); Aprev = A; Bprev = BR, Bdprev = BC; Anpprev = Anq; mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow; tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb; Asrc = Acol; n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc, npcol ); talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Arow ) ) ? ALPHA : one ); while( kb > 0 ) { kbsize = kb * size; /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &n1pprev, &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1, Bprev, &LDBR ); /* * Send partial updated result to current row */ if( !( Ais1Row ) && ChangeRoc ) { if( myrow == rocprev ) { send( ctxt, Nb, n1pprev, Bprev, LDBR, Arow, mycol ); } else if( myrow == Arow ) { recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol ); add( &Nb, &n1pprev, one, work, &Nb, one, Bprev, &LDBR ); } } } /* * Solve current diagonal block */ if( Ais1Row || ( myrow == Arow ) ) { if( AisColRep || ( mycol == Acol ) ) { trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Nb, &kb, talpha2, A, &Ald, BR, &LDBR ); tadd( &Nb, &kb, one, BR, &LDBR, zero, BC, &LDBC ); } if( bcst ) { if( mycol == Acol ) bsend( ctxt, ROW, &btop, kb, Nb, BC, LDBC ); else brecv( ctxt, ROW, &btop, kb, Nb, BC, LDBC, myrow, Acol ); } talpha2 = one; } else { if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero, zero, zero, BR, &LDBR ); } /* * Finish previous update */ if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) { tmp2 = n1pprev * size; gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1, &kbprev, negone, Bdprev, &LDBC, Aprev+Ald*tmp2, &Ald, talpha1, Bprev+LDBR*tmp2, &LDBR ); } Aprev += kbprev * size; talpha1 = one; } /* * Save info of current step and update info for the next step */ if( Ais1Row || ( myrow == Arow ) ) { A += kbsize; Bdprev = Bd = BC; BC += kbsize; } if( AisColRep || ( mycol == Acol ) ) { Bprev = ( BR += LDBR * kbsize ); A += Ald * kbsize; Anpprev = ( Anq -= kb ); Aprev += Ald * kbsize; } n1pprev = n1p; rocprev = Arow; kbprev = kb; k += kb; Na -= kb; nb1 -= kb; if( nb1 == 0 ) { if( !( Ais1Col ) && ( Acol >= 0 ) ) Acol = MModAdd1( Acol, npcol ); nb1 = MIN( Anb, Na ); } mb1 -= kb; ChangeRoc = ( mb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Row ) && ( Arow >= 0 ) ) Arow = MModAdd1( Arow, nprow ); mb1 = MIN( Amb, Na ); } tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb; n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Ainb1, Anb, mycol, Asrc, npcol ); } } else { /* * Initiate lookahead */ nlast = ( nprow - 1 ) * Amb; n1 = MAX( nlast, Amb ); nlast += Aimb1; n1last = n1 - Amb + MAX( Aimb1, Amb ); work = PB_Cmalloc( Nb * MIN( n1last, Anq ) * size ); tmp1 = Na-1; Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow ); Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol ); rocprev = Alrow; Anpprev = Anq; Bprev = BR; Bdprev = BC; Aprev = A = Mptr( A, Anp, 0, Ald, size ); mb1 = PB_Clastnb( Na, 0, Aimb1, Amb ); nb1 = PB_Clastnb( Na, 0, Ainb1, Anb ); tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); Asrc = Acol; n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc, npcol ); talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Alrow ) ) ? ALPHA : one ); while( Na > 0 ) { kbsize = kb * size; if( Ais1Row || ( myrow == Alrow ) ) { A -= kbsize; Anp -= kb; Bd = Mptr( BC, Anp, 0, LDBC, size ); } if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; } /* * Partial update of previous block */ if( n1pprev > 0 ) { if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) { tmp1 = ( Anpprev - n1pprev ) * size; TYPE->Fgemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &n1pprev, &kbprev, negone, Bdprev, &LDBC, Aprev+Ald*tmp1, &Ald, talpha1, Bprev+LDBR*tmp1, &LDBR ); } /* * Send partial updated result to current row */ if( !( Ais1Row ) && ChangeRoc ) { if( myrow == rocprev ) { send( ctxt, Nb, n1pprev, Mptr( Bprev, 0, Anpprev-n1pprev, LDBR, size ), LDBR, Alrow, mycol ); } else if( myrow == Alrow ) { recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol ); add( &Nb, &n1pprev, one, work, &Nb, one, Mptr( Bprev, 0, Anpprev-n1pprev, LDBR, size ), &LDBR ); } } } /* * Solve current diagonal block */ if( Ais1Row || ( myrow == Alrow ) ) { if( AisColRep || ( mycol == Alcol ) ) { trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Nb, &kb, talpha2, Mptr( A, 0, Anq, Ald, size ), &Ald, Mptr( BR, 0, Anq, LDBR, size ), &LDBR ); tadd( &Nb, &kb, one, Mptr( BR, 0, Anq, LDBR, size ), &LDBR, zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC ); } if( bcst ) { if( mycol == Alcol ) bsend( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC, size ), LDBC ); else brecv( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC, size ), LDBC, myrow, Alcol ); } talpha2 = one; } else { if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero, zero, zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR ); } /* * Finish previous update */ if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) ) { if( ( tmp1 = Anpprev - n1pprev ) > 0 ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1, &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1, Bprev, &LDBR ); talpha1 = one; } /* * Save info of current step and update info for the next step */ if( Ais1Row || ( myrow == Alrow ) ) { Bdprev = Bd; Aprev = A; } if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; } n1pprev = n1p; rocprev = Alrow; kbprev = kb; k += kb; Na -= kb; nb1 -= kb; if( nb1 == 0 ) { if( !( Ais1Col ) && ( Alcol >= 0 ) ) Alcol = MModSub1( Alcol, npcol ); nb1 = ( Na > Ainb1 ? Anb : Ainb1 ); } mb1 -= kb; ChangeRoc = ( mb1 == 0 ); if( ChangeRoc ) { if( !( Ais1Row ) && ( Alrow >= 0 ) ) Alrow = MModSub1( Alrow, nprow ); mb1 = ( Na > Aimb1 ? Amb : Aimb1 ); } tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 ); tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) ); n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc, npcol ); } } } if( work ) free( work ); /* * End of PB_Cptrsm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsymm.c0000640000175000017500000003243610363532303020027 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsymm( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * BC, int LDBC, char * BR, int LDBR, char * CC, int LDCC, char * CR, int LDCR ) #else void PB_Ctzsymm( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, BC, LDBC, BR, LDBR, CC, LDCC, CR, LDCR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDBC, LDBR, LDCC, LDCR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * BC, * BR, * CC, * CR; #endif { /* * Purpose * ======= * * PB_Ctzsymm performs the matrix-matrix operation * * C := alpha * A * B + C, * * or * * C := alpha * B * A + C, * * where alpha is a scalar, B and C are m by k and k by n matrices and A * is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (input) pointer to CHAR * On entry, SIDE specifies whether op( A ) multiplies B from * the left or right as follows: * * SIDE = 'L' or 'l' C := alpha * A * B + C, * * SIDE = 'R' or 'r' C := alpha * B * A + C. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of rows of the matrices BR * and CR and the number of columns of the matrices BC and CC. K * must be at least zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * BC (input) pointer to CHAR * On entry, BC is an array of dimension (LDBC,K) containing the * m by k matrix BC. * * LDBC (input) INTEGER * On entry, LDBC specifies the leading dimension of the array * BC. LDBC must be at least max( 1, M ). * * BR (input) pointer to CHAR * On entry, BR is an array of dimension (LDBR,N) containing the * k by n matrix BR. * * LDBR (input) INTEGER * On entry, LDBR specifies the leading dimension of the array * BR. LDBR must be at least K. * * CC (input/output) pointer to CHAR * On entry, CC is an array of dimension (LDCC,K) containing the * m by k matrix CC. On exit, CC is overwritten by the partially * updated matric CC. * * LDCC (input) INTEGER * On entry, LDCC specifies the leading dimension of the array * CC. LDCC must be at least max( 1, M ). * * CR (input/output) pointer to CHAR * On entry, CR is an array of dimension (LDCR,N) containing the * k by n matrix CR. On exit, CR is overwritten by the partially * updated matrix CR. * * LDCR (input) INTEGER * On entry, LDCR specifies the leading dimension of the array * CR. LDCR must be at least K. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, j1, m1, mn, n1, size; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( SIDE[0] ) == CLEFT ) { if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsymm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &n1, &K, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( A, i1, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } TYPE->Fsymm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &n1, &K, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( BC, m1, 0, LDBC, size ), &LDBC, one, Mptr( CC, m1, 0, LDCC, size ), &LDCC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, Mptr( A, 0, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &N, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &N, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } } else { if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsymm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &K, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CC, i1, 0, LDCC, size ), &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, Mptr( BC, i1, 0, LDBC, size ), &LDBC, Mptr( A, i1, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &m1, &K, &n1, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &m1, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } TYPE->Fsymm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), &K, &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( BR, 0, j1, LDBR, size ), &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &n1, &M, ALPHA, BC, &LDBC, Mptr( A, 0, j1, LDA, size ), &LDA, one, Mptr( CR, 0, j1, LDCR, size ), &LDCR ); } } else { one = TYPE->one; gemm = TYPE->Fgemm; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &M, &K, &N, ALPHA, A, &LDA, BR, &LDBR, one, CC, &LDCC ); gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &K, &N, &M, ALPHA, BC, &LDBC, A, &LDA, one, CR, &LDCR ); } } /* * End of PB_Ctzsymm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cabort.c0000640000175000017500000001024710363532303017567 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cabort( int ICTXT, char * ROUT, int INFO ) #else void PB_Cabort( ICTXT, ROUT, INFO ) /* * .. Scalar Arguments .. */ int ICTXT, INFO; /* * .. Array Arguments .. */ char * ROUT; #endif { /* * Purpose * ======= * * PB_Cabort is an error handler for the PBLAS routines. This routine * displays an error message on stderr by calling PB_Cwarn, and halts * execution by calling Cblacs_abort(). * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * INFO (local input) INTEGER * The error code computed by the calling PBLAS routine. * = 0: no error found * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int mycol, myrow, npcol, nprow; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &INFO ) ) return; #endif if( INFO < 0 ) { /* * Display an error message */ if( INFO < DESCMULT ) PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d had an illegal value", -INFO ); else PB_Cwarn( ICTXT, -1, ROUT, "Parameter number %d, entry number %d had an illegal value", (-INFO) / DESCMULT, (-INFO) % DESCMULT ); } else { /* * Error code is incorrect, it should be negative */ PB_Cwarn( ICTXT, -1, ROUT, "Positive error code %d returned by %s!!!", INFO ); } Cblacs_abort( ICTXT, INFO ); /* * End of PB_Cabort */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cwarn.c0000640000175000017500000001125110363532303017423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #ifdef TestingPblas #include "../SRC/pblas.h" #include "../SRC/PBpblas.h" #include "../SRC/PBtools.h" #include "../SRC/PBblacs.h" #include "../SRC/PBblas.h" #else #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #if (_F2C_CALL_ == _F2C_ADD_ ) #define PB_NoAbort pb_noabort_ #endif #if (_F2C_CALL_ == _F2C_UPCASE ) #define PB_NoAbort PB_NOABORT #endif #if (_F2C_CALL_ == _F2C_NOCHANGE ) #define PB_NoAbort pb_noabort #endif #if (_F2C_CALL_ == _F2C_F77ISF2C ) #define PB_NoAbort pb_noabort__ #endif #ifdef __STDC__ void PB_Cwarn( int ICTXT, int LINE, char * ROUT, char * FORM, ... ) #else void PB_Cwarn( va_alist ) va_dcl #endif { /* * Purpose * ======= * * PB_Cwarn is an error handler for the PBLAS routines. This routine * displays an error message on stderr. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * LINE (local input) INTEGER * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a valid line number, * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * error handler. * * FORM (local input) pointer to CHAR * On entry, FORM is a control string specifying the format * conversion of its following arguments. * * ... (local input) * On entry, FORM is a control string specifying the format * On entry, the expressions that are to be evaluated and con- * verted according to the formats in the control string FORM * and then placed in the output stream. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ va_list argptr; int iam, mycol, myrow, npcol, nprow; char cline[100]; /* .. * .. External Functions .. */ #ifdef TestingPblas #ifdef __STDC__ int PB_NoAbort( int * ); #else int PB_NoAbort(); #endif #endif #ifdef __STDC__ va_start( argptr, FORM ); #else char * ROUT, * FORM; int ICTXT, LINE; /* .. * .. Executable Statements .. * */ va_start( argptr ); ICTXT = va_arg( argptr, int ); LINE = va_arg( argptr, int ); ROUT = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif #ifdef TestingPblas /* * For testing purpose only, the error is reported, but the program execution * is not terminated */ if( PB_NoAbort( &ICTXT ) ) return; #endif vsprintf( cline, FORM, argptr ); va_end( argptr ); Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); if( nprow != -1 ) iam = Cblacs_pnum( ICTXT, myrow, mycol ); else iam = -1; /* * Display an error message */ if( LINE <= 0 ) (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", in routine ", ROUT ); else (void) fprintf( stderr, "%s'%s'\n%s{%d,%d}, %s%d, %s%d%s%d%s'%s'.\n\n", "PBLAS ERROR ", cline, "from ", myrow, mycol, "pnum=", iam, "Contxt=", ICTXT, ", on line ", LINE, " of routine ", ROUT ); /* * End of PB_Cwarn */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsyr2.c0000640000175000017500000002132310363532303017732 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsyr2( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * XC, int LDXC, char * YC, int LDYC, char * XR, int LDXR, char * YR, int LDYR, char * A, int LDA ) #else void PB_Ctzsyr2( TYPE, UPLO, M, N, K, IOFFD, ALPHA, XC, LDXC, YC, LDYC, XR, LDXR, YR, LDYR, A, LDA ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * A, * XC, * XR, * YC, * YR; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctzsyr2 performs the trapezoidal symmetric or Hermitian rank 2 * operation: * * A := alpha * XC * YR + alpha * YC * XR + A, or * * A := alpha*XC*conjg( YR ) + conjg( alpha )*YC*conjg( XR ) + A, * * where alpha is a scalar, XC and YC are m element vectors, XR and YR * are n element vectors and A is an m by n trapezoidal symmetric * or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (see pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * YC (input) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YR (input) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * A (input/output) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is updated. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ione=1, j1, m1, mn, n1, size; GERU_T geru; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; geru = TYPE->Fgeru; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { geru( &M, &n1, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); geru( &M, &n1, ALPHA, YC, &ione, XR, &LDXR, A, &LDA ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsyr2( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, i1, j1, LDA, size ), &LDA ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; geru( &m1, &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, i1, j1, LDA, size ), &LDA ); geru( &m1, &n1, ALPHA, Mptr( YC, i1, 0, LDYC, size ), &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, i1, j1, LDA, size ), &LDA ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; geru = TYPE->Fgeru; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { geru( &m1, &n1, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); geru( &m1, &n1, ALPHA, YC, &ione, XR, &LDXR, A, &LDA ); } TYPE->Fsyr2( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, m1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, m1, j1, LDA, size ), &LDA ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; geru( &M, &n1, ALPHA, XC, &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, 0, j1, LDA, size ), &LDA ); geru( &M, &n1, ALPHA, YC, &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, 0, j1, LDA, size ), &LDA ); } } else { geru = TYPE->Fgeru; geru( &M, &N, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); geru( &M, &N, ALPHA, YC, &ione, XR, &LDXR, A, &LDA ); } /* * End of PB_Ctzsyr2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrsmAB.c0000640000175000017500000013321010363532303020024 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrsmAB( PBTYP_T * TYPE, char * VARIANT, char * SIDE, char * UPLO, char * TRANSA, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB ) #else void PB_CptrsmAB( TYPE, VARIANT, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ char * DIAG, * SIDE, * TRANSA, * UPLO, * VARIANT; int IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CptrsmAB solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ). * * The submatrix X is overwritten on sub( B ). * * This is the outer-product algorithm using the logical aggregation * blocking technique. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * VARIANT (global input) pointer to CHAR * On entry, VARIANT specifies whether the left- or right-loo- * king variant of the algorithm should be used for the transpo- * se cases only, that is TRANSA is not 'N' or 'n'. When VARIANT * is 'L' or 'l', the left-looking variant is used, otherwise * the right-looking algorithm is selected. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char conjg, * negone, * one, * talph, top, * zero; int Afr, Bcol, Bcurcol, Bcurimb1, Bcurinb1, Bcurrow, Bfr, Bii, Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq, Bnq0, Brow, WAfr, WAoff, WBfr, WBsum, ctxt, k, kb, kbb, kmax, ktmp, lside, mn, mycol, myrow, notran, npcol, nprow, size, upper; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Bd0[DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WAd[DLEN_], WBd[DLEN_]; char * Aptr = NULL, * Bptr = NULL, * Bptr0 = NULL, * WA = NULL, * WB = NULL; /* .. * .. Executable Statements .. * */ lside = ( Mupcase( SIDE [0] ) == CLEFT ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( Mupcase( TRANSA[0] ) == CNOTRAN ); size = TYPE->size; negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero; gsum2d = TYPE->Cgsum2d; gemm = TYPE->Fgemm; talph = ALPHA; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol, Bld ... */ Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); Bimb1 = PB_Cfirstnb( M, IB, Bimb, Bmb ); Bmp0 = PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( N, JB, Binb, Bnb ); Bnq0 = PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); if( notran ) { if( lside ) { if( upper ) { kmax = ( ( M - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+ktmp-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A(IA:IA+ktmp-1, JA+k:JA+k+kbb-1) over B(IB:IB+ktmp-1, JB:JB+N-1) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB+k:IB+ktmp-1, JB:JB+N-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPLO, DIAG, kbb, N, talph, WA, k, 0, WAd, B, IB+k, JB, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+k-1, JB:JB+N-1 ) */ if( k > 0 ) { /* * Replicate B( IB+k:IB+ktmp-1, JB:JB+N-1 ) over B( IB:IB+k-1, JB:JB+N-1 ) */ PB_Cdescset( Bd0, k, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, k, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Local update */ Bmp = PB_Cnumroc( k, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, negone, WA, &WAd[LLD_], WB, &WBd[LLD_], talph, Bptr0, &Bld ); if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { for( k = 0; k < M; k += kb ) { ktmp = M - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPLO, DIAG, kbb, N, talph, WA, 0, 0, WAd, B, IB+k, JB, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB+k+kbb:IB+M-1, JB:JB+N-1 ) */ if( ( ktmp = ktmp - kbb ) > 0 ) { /* * Replicate B(IB+k:IB+k+kbb-1, JB:JB+N-1) over B(IB+k+kbb:IB+M-1, JB:JB+N-1) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k+kbb, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k+kbb, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Local update */ Bmp = PB_Cnumroc( ktmp, k+kbb, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[IMB_], WAd[MB_], myrow, WAd[RSRC_], nprow ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, negone, Mptr( WA, WAoff, 0, WAd[LLD_], size ), &WAd[LLD_], WB, &WBd[LLD_], talph, Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld ); } if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } else { if( upper ) { for( k = 0; k < N; k += kb ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPLO, DIAG, M, kbb, talph, WA, 0, 0, WAd, B, IB, JB+k, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+M-1, JB+k+kbb:JB+N-1 ) */ if( ( ktmp = ktmp - kbb ) > 0 ) { /* * Replicate B(IB:IB+M-1, JB+k:JB+k+kbb-1) over B(IB:IB+M-1, JB+k+kbb:JB+N-1) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k+kbb, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k+kbb, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Local update */ Bnq = PB_Cnumroc( ktmp, k+kbb, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[INB_], WAd[NB_], mycol, WAd[CSRC_], npcol ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, negone, WB, &WBd[LLD_], Mptr( WA, 0, WAoff, WAd[LLD_], size ), &WAd[LLD_], talph, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld ); } if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { kmax = ( ( N - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+ktmp-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+ktmp-1 ) over B(IB:IB+M-1, JB:JB+ktmp-1) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB+k:JB+ktmp-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPLO, DIAG, M, kbb, talph, WA, 0, k, WAd, B, IB, JB+k, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+M-1, JB:JB+k-1 ) */ if( k > 0 ) { /* * Replicate B( IB:IB+M-1, JB+k:JB+ktmp-1 ) over B( IB:IB+M-1, JB:JB+k-1 ) */ PB_Cdescset( Bd0, M, k, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, M, k, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Local update */ Bnq = PB_Cnumroc( k, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, negone, WB, &WBd[LLD_], WA, &WAd[LLD_], talph, Bptr0, &Bld ); if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } } else { if( Mupcase( VARIANT[0] ) == CRIGHT ) { /* * Right looking variant for the transpose cases */ conjg = ( ( Mupcase( TRANSA[0] ) == CCOTRAN ) ? CCONJG : CNOCONJG ); if( lside ) { if( !upper ) { /* * Left Lower (Conjugate) Transpose */ kmax = ( ( M - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+ktmp-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+ktmp-1 )' over B(IB:IB+ktmp-1, JB:JB+N-1) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB+k:IB+ktmp-1, JB:JB+N-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPPER, DIAG, kbb, N, talph, WA, k, 0, WAd, B, IB+k, JB, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+k-1, JB:JB+N-1 ) */ if( k > 0 ) { /* * Replicate B( IB+k:IB+ktmp-1, JB:JB+N-1 ) over B( IB:IB+k-1, JB:JB+N-1 ) */ PB_Cdescset( Bd0, k, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, k, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Local update */ Bmp = PB_Cnumroc( k, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, negone, WA, &WAd[LLD_], WB, &WBd[LLD_], talph, Bptr0, &Bld ); if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { /* * Left Upper (Conjugate) Transpose */ for( k = 0; k < M; k += kb ) { ktmp = M - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA+k:JA+M-1 )' over B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, LOWER, DIAG, kbb, N, talph, WA, 0, 0, WAd, B, IB+k, JB, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB+k+kbb:IB+M-1, JB:JB+N-1 ) */ if( ( ktmp = ktmp - kbb ) > 0 ) { /* * Replicate B(IB+k:IB+k+kbb-1, JB:JB+N-1) over B(IB+k+kbb:IB+M-1, JB:JB+N-1) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k+kbb, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k+kbb, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * Local update */ Bmp = PB_Cnumroc( ktmp, k+kbb, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[IMB_], WAd[MB_], myrow, WAd[RSRC_], nprow ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp, &Bnq0, &kbb, negone, Mptr( WA, WAoff, 0, WAd[LLD_], size ), &WAd[LLD_], WB, &WBd[LLD_], talph, Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld ); } if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } else { if( !upper ) { /* * Right Lower (Conjugate) Transpose */ for( k = 0; k < N; k += kb ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+N-1, JA+k:JA+k+kbb-1 )' over B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, UPPER, DIAG, M, kbb, talph, WA, 0, 0, WAd, B, IB, JB+k, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+M-1, JB+k+kbb:JB+N-1 ) */ if( ( ktmp = ktmp - kbb ) > 0 ) { /* * Replicate B(IB:IB+M-1, JB+k:JB+k+kbb-1) over B(IB:IB+M-1, JB+k+kbb:JB+N-1) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k+kbb, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k+kbb, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Local update */ Bnq = PB_Cnumroc( ktmp, k+kbb, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[INB_], WAd[NB_], mycol, WAd[CSRC_], npcol ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, negone, WB, &WBd[LLD_], Mptr( WA, 0, WAoff, WAd[LLD_], size ), &WAd[LLD_], talph, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld ); } if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } else { /* * Right Upper (Conjugate) Transpose */ kmax = ( ( N - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+ktmp-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+ktmp-1, JA+k:JA+k+kbb-1 )' over B(IB:IB+M-1, JB:JB+ktmp-1) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, &conjg, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB+k:JB+ktmp-1 ) with talph */ PB_CptrsmAB0( TYPE, SIDE, LOWER, DIAG, M, kbb, talph, WA, 0, k, WAd, B, IB, JB+k, DESCB, &Bptr, DBUFB, &Bfr ); /* * Update B( IB:IB+M-1, JB:JB+k-1 ) */ if( k > 0 ) { /* * Replicate B( IB:IB+M-1, JB+k:JB+ktmp-1 ) over B( IB:IB+M-1, JB:JB+k-1 ) */ PB_Cdescset( Bd0, M, k, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, M, k, Bd0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * Local update */ Bnq = PB_Cnumroc( k, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Bnq, &kbb, negone, WB, &WBd[LLD_], WA, &WAd[LLD_], talph, Bptr0, &Bld ); if( WBfr ) free( WB ); talph = one; } if( WAfr ) free( WA ); if( Bfr ) free( Bptr ); if( Afr ) free( Aptr ); } } } } else { /* * Left looking variant for the transpose cases */ if( lside ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( upper ) { /* * Accumulate A( IA:IA+Bimb1-1, JA:JA+Bimb1-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, Bimb1, Bimb1, A, IA, JA, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+Bimb1-1, JA:JA+Bimb1-1 ) over B(IB:IB+Bimb1-1, JB:JB+N-1) */ PB_Cdescset( Bd0, Bimb1, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, Bimb1, N, Bd0, Bimb1, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+Bimb1-1, JB:JB+N-1 ) */ if( ( ( Brow < 0 ) || ( myrow == Brow ) ) && ( Bnq0 > 0 ) ) TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bimb1, &Bnq0, ALPHA, WA, &WAd[LLD_], Bptr0, &Bld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Update and solve remaining rows of sub( B ) */ for( k = Bimb1; k < M; k += kb ) { kbb = M - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+ktmp-1, JA+k:JA+ktmp-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, ktmp, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+ktmp-1, JA+k:JA+ktmp-1 ) over B(IB:IB+ktmp-1, JB:JB+N-1) */ PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WB := A( IA:IA+k-1, JA+k:JA+ktmp-1 )' * B( IB:IB+k-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bmp = PB_Cnumroc( k, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, one, WA, &WAd[LLD_], Bptr0, &Bld, zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[RSRC_] = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WB, WBd[LLD_], WBd[RSRC_], mycol ); } /* * Add WB to B( IB+k:IB+ktmp-1, JB:JB+N-1 ) and solve it with * A( IA+k:IA+ktmp-1, JA+k:JA+ktmp-1 ) */ PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, kbb, N, ALPHA, WA, k, 0, WAd, B, IB+k, JB, DESCB, WB, WBd ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); } } else { /* * Solve last block of rows of sub( B ) */ Bcurimb1 = PB_Clastnb( M, IB, Bimb, Bmb ); k = M - Bcurimb1; /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+M-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, Bcurimb1, Bcurimb1, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+M-1, JA+k:JA+M-1 ) over B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, Bcurimb1, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, Bcurimb1, N, Bd0, Bcurimb1, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * Solve B( IB+k:IB+M-1, JB:JB+N-1 ) */ if( ( ( Brow < 0 ) || ( myrow == Bcurrow ) ) && ( Bnq0 > 0 ) ) TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bcurimb1, &Bnq0, ALPHA, WA, &WAd[LLD_], Mptr( Bptr0, Bmp0-Bcurimb1, 0, Bld, size ), &Bld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( ( mn = M - Bcurimb1 ) <= 0 ) return; /* * Update and solve remaining rows of sub( B ) */ kmax = ( ( mn - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { ktmp = M - k; kbb = mn - k; kbb = MIN( kbb, kb ); /* * Accumulate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, ktmp, kbb, A, IA+k, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+M-1, JA+k:JA+k+kbb-1 ) over B( IB+k:IB+M-1, JB:JB+N-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WB := A( IA+k+kbb:IA+M-1, JA+k:JA+k+kbb-1 )'* B( IB+k+kbb:IB+M-1, JB:JB+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bmp = PB_Cnumroc( ktmp-kbb, k+kbb, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bnq0 > 0 ) && ( Bmp > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[IMB_], WAd[MB_], myrow, WAd[RSRC_], nprow ); gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &kbb, &Bnq0, &Bmp, one, Mptr( WA, WAoff, 0, WAd[LLD_], size ), &WAd[LLD_], Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld, zero, WB, &WBd[LLD_] ); } if( WBsum ) { WBd[RSRC_] = PB_Cindxg2p( k + kbb - 1, Bimb1, Bmb, Brow, Brow, nprow ); if( Bnq0 > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq0, WB, WBd[LLD_], WBd[RSRC_], mycol ); } /* * Add WB to B( IB+k:IB+k+kbb-1, JB:JB+N-1 ) and solve it with * A( IA+k:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, kbb, N, ALPHA, WA, 0, 0, WAd, B, IB+k, JB, DESCB, WB, WBd ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); } } } else { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( upper ) { /* * Solve last block of columns of sub( B ) */ Bcurinb1 = PB_Clastnb( N, JB, Binb, Bnb ); k = N - Bcurinb1; /* * Accumulate A( IA+k:IA+N-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, Bcurinb1, Bcurinb1, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+N-1, JA+k:JA+N-1 ) over B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, Bcurinb1, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, Bcurinb1, Bd0, Bcurinb1, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB+k:JB+N-1 ) */ if( ( ( Bcol < 0 ) || ( mycol == Bcurcol ) ) && ( Bmp0 > 0 ) ) TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bmp0, &Bcurinb1, ALPHA, WA, &WAd[LLD_], Mptr( Bptr0, 0, Bnq0-Bcurinb1, Bld, size ), &Bld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( ( mn = N - Bcurinb1 ) <= 0 ) return; /* * Update and solve remaining columns of sub( B ) */ kmax = ( ( mn - 1 ) / kb ) * kb; for( k = kmax; k >= 0; k -= kb ) { ktmp = N - k; kbb = mn - k; kbb = MIN( kbb, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) */ PB_CGatherV( TYPE, REUSE, BACKWARD, kbb, ktmp, A, IA+k, JA+k, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA+k:JA+N-1 ) over B( IB:IB+M-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WB := B( IB:IB+M-1, JB+k+kbb:JB+N-1 ) * A(IA+k:IA+k+kbb-1, JA+k+kbb:JA+N-1)' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bnq = PB_Cnumroc( ktmp-kbb, k+kbb, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) { WAoff = PB_Cnumroc( kbb, 0, WAd[INB_], WAd[NB_], mycol, WAd[CSRC_], npcol ); gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &kbb, &Bnq, one, Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld, Mptr( WA, 0, WAoff, WAd[LLD_], size ), &WAd[LLD_], zero, WB, &WBd[LLD_] ); } if( WBsum ) { WBd[CSRC_] = PB_Cindxg2p( k + kbb - 1, Binb1, Bnb, Bcol, Bcol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WB, WBd[LLD_], myrow, WBd[CSRC_] ); } /* * Add WB to B( IB:IB+M-1, JB+k:JB+k+kbb-1 ) and solve it with * A( IA+k:IA+k+kbb-1, JA+k:JA+k+kbb-1 ) */ PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, M, kbb, ALPHA, WA, 0, 0, WAd, B, IB, JB+k, DESCB, WB, WBd ); if( WBfr ) free( WB ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); } } else { /* * Accumulate A( IA:IA+Binb1-1, JA:JA+Binb1-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, Binb1, Binb1, A, IA, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+Binb1-1, JA:JA+Binb1-1 ) over B(IB:IB+M-1, JB:JB+Binb1-1) */ PB_Cdescset( Bd0, M, Binb1, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, Binb1, Bd0, Binb1, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * Solve B( IB:IB+M-1, JB:JB+Binb1-1 ) */ if( ( ( Bcol < 0 ) || ( mycol == Bcol ) ) && ( Bmp0 > 0 ) ) TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ), C2F_CHAR( DIAG ), &Bmp0, &Binb1, ALPHA, WA, &WAd[LLD_], Bptr0, &Bld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Update and solve remaining columns of sub( B ) */ for( k = Binb1; k < N; k += kb ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+ktmp-1, JA:JA+ktmp-1 ) */ PB_CGatherV( TYPE, REUSE, FORWARD, kbb, ktmp, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+ktmp-1, JA:JA+ktmp-1 ) over B( IB:IB+M-1, JB:JB+ktmp-1 ) */ PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WB := B( IB:IB+M-1, JB:JB+k-1 ) * A( IA+k:IA+ktmp-1, JA:JA+k-1 )' */ PB_COutV( TYPE, COLUMN, INIT, M, ktmp, Bd0, kbb, &WB, WBd, &WBfr, &WBsum ); Bnq = PB_Cnumroc( k, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &kbb, &Bnq, one, Bptr0, &Bld, WA, &WAd[LLD_], zero, WB, &WBd[LLD_] ); if( WBsum ) { WBd[CSRC_] = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); if( Bmp0 > 0 ) gsum2d( ctxt, ROW, &top, Bmp0, kbb, WB, WBd[LLD_], myrow, WBd[CSRC_] ); } /* * Add WB to B( IB:IB+M-1, JB+k:JB+ktmp-1 ) and solve it with * A( IA+k:IA+ktmp-1, JA+k:JA+ktmp-1 ) */ PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, M, kbb, ALPHA, WA, 0, k, WAd, B, IB, JB+k, DESCB, WB, WBd ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( WBfr ) free( WB ); } } } } } /* * End of PB_CptrsmAB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpsyrkAC.c0000640000175000017500000005662610363532303020047 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpsyrkAC( PBTYP_T * TYPE, char * DIRECA, char * CONJUG, char * UPLO, char * TRANS, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpsyrkAC( TYPE, DIRECA, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECA, * TRANS, * UPLO; int IA, IC, JA, JC, K, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCC; char * A, * C; #endif { /* * Purpose * ======= * * PB_CpsyrkAC performs one of the following symmetric or Hermitian rank * k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * or Hermitian submatrix and sub( A ) is an n by k submatrix in the * first case and a k by n submatrix in the second case. * * This is the outer-product algorithm using the logical aggregation * blocking technique. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) and sub( C ) should be looped over as * follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( C ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( C ) is symmetric, * CONJUG = 'Z' or 'z' sub( C ) is Hermitian. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the submatrix sub( C ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( C ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( C ) are referenced. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't' or 'C' or 'c', K specifies the number of rows of the sub- * matrix sub( A ). K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly lower triangular * of sub( C ) are not referenced. On exit, the upper triangular * part of sub( C ) is overwritten by the upper triangular part * of the updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly upper triangular * of sub( C ) are not referenced. On exit, the lower triangular * part of sub( C ) is overwritten by the lower triangular part * of the updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char GatherDir, ScatterDir, * one, top, tran, * zero; int Acol, Acurcol, Acurimb1, Acurinb1, Acurrow, Afr, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj, Ald, Am, Amb, Amp, Amp0, An, Anb, Anq, Anq0, Arow, Ccsrc, Cimb, Cinb, Cmb, Cnb, Crsrc, WAfr, WCfr, WCsum, conjg, ctxt, fwd, k, kb, kbb, kend, kstart, kstep, ktmp, mycol, myrow, notran, npcol, nprow, size, upper; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Ad0[DLEN_], DBUFA[DLEN_], WAd[DLEN_], WCd[DLEN_]; char * Aptr = NULL, * Aptr0 = NULL, * WA = NULL, * WC = NULL; /* .. * .. Executable Statements .. * */ /* * sub( C ) = beta * sub( C ) */ PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); fwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); conjg = ( Mupcase( CONJUG[0] ) == CCONJG ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( Mupcase( TRANS [0] ) == CNOTRAN ); tran = ( conjg ? CCOTRAN : CTRAN ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gsum2d = TYPE->Cgsum2d; gemm = TYPE->Fgemm; /* * Figure out the loop bounds accordingly to DIRECA */ kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); if( fwd ) { kstart = 0; kend = ( ( N - 1 ) / kb + 1 ) * kb; kstep = kb; GatherDir = CFORWARD; ScatterDir = CBACKWARD; } else { kstart = ( ( N - 1 ) / kb ) * kb; kend = kstep = -kb; GatherDir = CBACKWARD; ScatterDir = CFORWARD; } /* * Compute local information for A */ if( notran ) { Am = N; An = K; } else { Am = K; An = N; } PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); Aimb = DESCA[IMB_]; Ainb = DESCA[INB_]; Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Ald = DESCA[LLD_]; Aimb1 = PB_Cfirstnb( Am, IA, Aimb, Amb ); Amp0 = PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow ); Ainb1 = PB_Cfirstnb( An, JA, Ainb, Anb ); Anq0 = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size ); if( notran ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cinb = DESCC[INB_]; Cnb = DESCC[NB_]; Ccsrc = DESCC[CSRC_]; if( upper ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, An, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over A( IA:IA+k+kbb-1, JA:JA+K-1 ) */ PB_Cdescset( Ad0, ktmp, An, Aimb1, Ainb1, Amb, Anb, Arow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WC := A( IA:IA+k+kbb-1, JA:JA+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )' */ PB_COutV( TYPE, COLUMN, INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr, &WCsum ); Amp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0, ALPHA, Aptr0, &Ald, WA, &WAd[LLD_], zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( WCsum ) { WCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : ktmp - 1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); if( Amp > 0 ) gsum2d( ctxt, ROW, &top, Amp, kbb, WC, WCd[LLD_], myrow, WCd[CSRC_] ); } /* * Zero lower triangle of WC( k:k+kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WC, k, 0, WCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WC, k+1, 0, WCd ); /* * Add WC to C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WC, 0, 0, WCd, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( WCfr ) free( WC ); } } else { for( k = kstart; k != kend; k += kstep ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, An, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over A( IA+k:IA+N-1, JA:JA+K-1 ) */ Acurimb1 = PB_Cfirstnb( ktmp, IA+k, Aimb, Amb ); Acurrow = PB_Cindxg2p( k, Aimb1, Amb, Arow, Arow, nprow ); PB_Cdescset( Ad0, ktmp, An, Acurimb1, Ainb1, Amb, Anb, Acurrow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WC := A( IA+k:IA+N-1, JA:JA+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )' */ PB_COutV( TYPE, COLUMN, INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr, &WCsum ); Amp = PB_Cnumroc( ktmp, k, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0, ALPHA, Mptr( Aptr0, Amp0-Amp, 0, Ald, size ), &Ald, WA, &WAd[LLD_], zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( WCsum ) { WCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : k + kbb - 1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); if( Amp > 0 ) gsum2d( ctxt, ROW, &top, Amp, kbb, WC, WCd[LLD_], myrow, WCd[CSRC_] ); } /* * Zero upper triangle of WC */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WC, 0, 0, WCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WC, 0, 1, WCd ); /* * Add WC to C( IC+k:IC+N-1, JC+k:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WC, 0, 0, WCd, COLUMN, one, C, IC+k, JC+k, DESCC, COLUMN ); if( WCfr ) free( WC ); } } } else { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cimb = DESCC[IMB_]; Cmb = DESCC[MB_]; Crsrc = DESCC[RSRC_]; if( upper ) { for( k = kstart; k != kend; k += kstep ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, Am, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over A( IA:IA+K-1, JA+k:JA+N-1 ) */ Acurinb1 = PB_Cfirstnb( ktmp, JA+k, Ainb, Anb ); Acurcol = PB_Cindxg2p( k, Ainb1, Anb, Acol, Acol, npcol ); PB_Cdescset( Ad0, Am, ktmp, Aimb1, Acurinb1, Amb, Anb, Arow, Acurcol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * A( IA:IA+K-1,JA+k:JA+N-1 ) */ PB_COutV( TYPE, ROW, INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr, &WCsum ); Anq = PB_Cnumroc( ktmp, k, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq > 0 ) && ( Amp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0, ALPHA, WA, &WAd[LLD_], Mptr( Aptr0, 0, Anq0-Anq, Ald, size ), &Ald, zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( WCsum ) { WCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : k + kbb - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); if( Anq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Anq, WC, WCd[LLD_], WCd[RSRC_], mycol ); } /* * Zero lower triangle of WC */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WC, 0, 0, WCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WC, 1, 0, WCd ); /* * Add WC to C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 ) */ PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd, ROW, one, C, IC+k, JC+k, DESCC, ROW ); if( WCfr ) free( WC ); } } else { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, Am, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over A( IA:IA+K-1, JA:JA+k+kbb-1 ) */ PB_Cdescset( Ad0, Am, ktmp, Aimb1, Ainb1, Amb, Anb, Arow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * A( IA:IA+K-1, JA:JA+k+kbb-1 ) */ PB_COutV( TYPE, ROW, INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr, &WCsum ); Anq = PB_Cnumroc( ktmp, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq > 0 ) && ( Amp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0, ALPHA, WA, &WAd[LLD_], Aptr0, &Ald, zero, WC, &WCd[LLD_] ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); if( WCsum ) { WCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : ktmp - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); if( Anq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Anq, WC, WCd[LLD_], WCd[RSRC_], mycol ); } /* * Zero upper triangle of WC( 0:kbb-1, k:k+kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WC, 0, k, WCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WC, 0, k+1, WCd ); /* * Add WC to C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd, ROW, one, C, IC+k, JC, DESCC, ROW ); if( WCfr ) free( WC ); } } } /* * End of PB_CpsyrkAC */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpsyr.c0000640000175000017500000005135310363532303017460 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpsyr( PBTYP_T * TYPE, char * UPLO, int N, int K, char * ALPHA, char * XC, int LDXC, char * XR, int LDXR, char * A, int IA, int JA, int * DESCA, TZSYR_T SYR ) #else void PB_Cpsyr( TYPE, UPLO, N, K, ALPHA, XC, LDXC, XR, LDXR, A, IA, JA, DESCA, SYR ) /* * .. Scalar Arguments .. */ char * UPLO; int IA, JA, K, LDXC, LDXR, N; char * ALPHA; PBTYP_T * TYPE; TZSYR_T SYR; /* * .. Array Arguments .. */ int * DESCA; char * A, * XC, * XR; #endif { /* * Purpose * ======= * * PB_Cpsyr performs a symmetric or Hermitian rank-k update of the sub- * matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric or Hermitian submatrix sub( A ) are to be * referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the local number of columns of the lo- * cal array XC and the local number of rows of the local array * XR. K mut be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (local input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,K). Before entry, * this array contains the local entries of the matrix XC. * * LDXC (local input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, Lp( IA, N ) ). * * XR (local input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,Kx), where Kx is * at least Lc( JA, N ). Before entry, this array contains the * local entries of the matrix XR. * * LDXR (local input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least max( 1, K ). * * A (local input/local output) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * SYR (local input) pointer to function of type TZSYR_T * On entry, SYR specifies the function performing the update of * a single block. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq, Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl, IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp, jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( N <= 0 ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in * all processes. */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[LLD_]; IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) ); IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) ); size = TYPE->size; if( IsRowRepl && IsColRepl ) { SYR( TYPE, UPLO, Amp, Anq, K, 0, ALPHA, XC, LDXC, XR, LDXR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); return; } XCinc = size; XRinc = LDXR * size; upper = ( Mupcase( UPLO[0] ) == CUPPER ); lower = ( Mupcase( UPLO[0] ) == CLOWER ); /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( IsRowRepl ? Amb : nprow * Amb ); qnb = ( IsColRepl ? Anb : npcol * Anb ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ SYR( TYPE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be updated and one is * planning to go south in the table, it is neccessary to take care of the * remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; SYR( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(Xjj+inbloc)*XRinc, LDXR, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald ); } Aii += imbloc; Xii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be updated and one is * planning to go east in the table, it is neccessary to take care of the * remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; SYR( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA, XC+(Xii+imbloc)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald ); } Ajj += inbloc; Xjj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row indexes in A and XC. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row indexes in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Update the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYR( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYR( TYPE, UPLO, mbloc, inbloc, K, lcmt, ALPHA, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; Xoffi = ioffx; ioffx += mbloc; } /* * Update the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYR( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * Update the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYR( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A and XR. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A and XR. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; } /* * Update the lower triangular part of sub( A ). */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { SYR( TYPE, ALL, m1, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); Ajj += tmp1; Xjj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; SYR( TYPE, UPLO, imbloc, nbloc, K, lcmt, ALPHA, XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, Mptr( A, Aii, joffd+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; joffd += nbloc; Xoffj = joffx; joffx += nbloc; } /* * Update the upper triangular part of sub( A ). */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) SYR( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, Mptr( A, Aii, (joffd+1), Ald, size ), Ald ); tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * Update the lower triangular part of sub( A ). */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) SYR( TYPE, ALL, m1, tmp1, K, 0, ALPHA, XC+(Xoffi+1)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Update the upper triangular part of sub( A ). */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYR( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYR( TYPE, UPLO, mbloc, nbloc, K, lcmt, ALPHA, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; Xoffi = ioffx; ioffd += mbloc; ioffx += mbloc; } /* * Update the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYR( TYPE, ALL, tmp1, nbloc, K, 0, ALPHA, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; Xoffj += nbloc; /* * Update the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYR( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * End of PB_Cpsyr */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cnpreroc.c0000640000175000017500000001216110363532303020125 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cnpreroc( int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS ) #else int PB_Cnpreroc( N, I, INB, NB, PROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int I, INB, N, NB, NPROCS, PROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cnpreroc computes the number of preceeding rows or columns of a * submatrix that are possessed by processes closer to SRCPROC1 than * PROC where SRCPROC1 is the process owning the row or column globally * indexed by I. The submatrix is defined by giving out N rows/columns * starting from global index I. Therefore, if SRCPROC=0 and PROC=4, * then PB_Cnpreroc returns the number of matrix rows or columns owned * by processes 0, 1, 2, and 3. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix distribution. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, mydist, nblocks; /* .. * .. Executable Statements .. * */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * The data is not distributed, or there is just one process in this dimension * of the grid. */ return( 0 ); /* * Compute coordinate of process owning I and corresponding INB */ if( ( INB -= I ) <= 0 ) { /* * I is not in first block, find out which process has it and update size of * first block */ nblocks = ( -INB ) / NB + 1; SRCPROC += nblocks; SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS; INB += nblocks * NB; } /* * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. If I am the * source process, nothing preceeds me ... */ if( PROC == SRCPROC ) return( 0 ); /* * If SRCPROC owns the N rows or columns, then return N since I cannot be the * source process anymore. */ if( N <= INB ) return( N ); /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. */ nblocks = ( N - INB ) / NB + 1; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS; /* * When mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full * and I own it, or the last block is full and I am the first process owning * only ilocblk full blocks. * * Therefore, when 0 < mydist <= nblocks - ilocblk * NPROCS, the number of rows * or columns preceeding me is INB + ilocblk*NB + (mydist-1)*(ilocblk+1)*NB, * i.e. INB - NB + ( ilocblk+1 ) * NB * mydist. Otherwise, there are exactly * NB * ilocblk * ( NPROCS - mydist ) rows or columns after me including mine, * i.e N + NB * ilocblk * ( mydist - NPROCS ) rows or columns preceeding me. */ if( nblocks < NPROCS ) return( ( ( mydist <= nblocks ) ? INB + NB * ( mydist - 1 ) : N ) ); ilocblk = nblocks / NPROCS; return( ( ( mydist <= ( nblocks - ilocblk * NPROCS ) ) ? INB - NB + ( ilocblk + 1 ) * NB * mydist : N + NB * ilocblk * ( mydist - NPROCS ) ) ); /* * End of PB_Cnpreroc */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Clcm.c0000640000175000017500000000440610363532303017233 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Clcm( int M, int N ) #else int PB_Clcm( M, N ) /* * .. Scalar Arguments .. */ int M, N; #endif { /* * Purpose * ======= * * PB_Clcm computes and returns the Least Common Multiple (LCM) of two * positive integers M and N. In fact, the routine computes the Greatest * Common Divisor (GCD) and use the property that M*N = GCD*LCM. * * Arguments * ========= * * M (input) INTEGER * On entry, M must be at least zero. * * N (input) INTEGER * On entry, N must be at least zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int gcd=1, m_val, n_val, t; /* .. * .. Executable Statements .. * */ if( M > N ) { m_val = N; n_val = M; } else { m_val = M; n_val = N; } while( m_val > 0 ) { while( !( m_val & 1 ) ) { /* * m is even */ m_val >>= 1; /* * if n is odd, gcd( m, n ) = gcd( m / 2, n ) */ if( !( n_val & 1 ) ) { /* * otherwise gcd( m, n ) = 2 * gcd( m / 2, n / 2 ) */ n_val >>= 1; gcd <<= 1; } } /* * m is odd now. If n is odd, gcd( m, n ) = gcd( m, ( m - n ) / 2 ). * Otherwise, gcd( m, n ) = gcd ( m, n / 2 ). */ n_val -= ( n_val & 1 ) ? m_val : 0; n_val >>= 1; while( n_val >= m_val ) { /* * If n is odd, gcd( m, n ) = gcd( m, ( m - n ) / 2 ). * Otherwise, gcd( m, n ) = gcd ( m, n / 2 ) */ n_val -= ( n_val & 1 ) ? m_val : 0; n_val >>= 1; } /* * n < m, gcd( m, n ) = gcd( n, m ) */ t = n_val; n_val = m_val; m_val = t; } return ( ( M * N ) / ( n_val * gcd ) ); /* * End of PB_Clcm */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrsmB.c0000640000175000017500000010233410363532303017726 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrsmB( PBTYP_T * TYPE, char * DIRECB, char * SIDE, char * UPLO, char * TRANSA, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB ) #else void PB_CptrsmB( TYPE, DIRECB, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ char * DIAG, * DIRECB, * SIDE, * TRANSA, * UPLO; int IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CptrsmB solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ). * * The submatrix X is overwritten on sub( B ). * * This is the inner-product algorithm using the logical LCM hybrid * and static blocking techniques. The submatrix operand sub( A ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECB (global input) pointer to CHAR * On entry, DIRECB specifies the direction in which the rows * or columns of sub( B ) should be looped over as follows: * DIRECB = 'F' or 'f' forward or increasing, * DIRECB = 'B' or 'b' backward or decreasing. * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) pointer to CHAR * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Broc, TranOp, conjg, * negone, * one, * talpha, * talph0, top, * zero; int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Alcmb, Ald, Amb, An, Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, Astart, BcurrocR, Bfwd, BiiD, BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD, BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR, BsrcR, LNorRT, WBCfr, WBCld, WBCapbX, WBCsum, WBRfr, WBRld, WBRapbX, WBRsum, ctxt, izero=0, k, kb, kbnext, kbprev, ktmp, lside, mycol, myrow, n, nb, nbb, notran, npcol, nprow, p=0, size, tmp, upper; TZPAD_T pad; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Ad0[DLEN_], DBUFB[DLEN_], WBCd[DLEN_], WBRd[DLEN_]; char * Aptr = NULL, * Bptr = NULL, * WBC = NULL, * WBR = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD ); lside = ( Mupcase( SIDE [0] ) == CLEFT ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( ( TranOp = Mupcase( TRANSA[0] ) ) == CNOTRAN ); LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; negone = TYPE->negone; pad = TYPE->Ftzpad; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ) and sub( B ) */ if( lside ) { BnD = An = M; BnR = N; Broc = CCOLUMN; BmyprocD = myrow; BnprocsD = nprow; BmyprocR = mycol; BnprocsR = npcol; BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ]; BsrcR = DESCB[CSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD ); Binb1R = PB_Cfirstnb( BnR, JB, DESCB[INB_], BnbR ); } else { BnD = An = N; BnR = M; Broc = CROW; BmyprocD = mycol; BnprocsD = npcol; BmyprocR = myrow; BnprocsR = nprow; BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ]; BsrcR = DESCB[RSRC_]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD ); Binb1R = PB_Cfirstnb( BnR, IB, DESCB[IMB_], BnbR ); } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Compute conjugate of alpha for the conjugate transpose cases */ if( TranOp == CCOTRAN ) { conjg = CCONJG; talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); } else { conjg = CNOCONJG; talpha = ALPHA; } /* * Retrieve BLACS combine topology, select backward ot forward substitution. */ if( LNorRT ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Astart = ( upper ? An - 1 : 0 ); } else { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Astart = ( upper ? 0 : An - 1 ); } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ Alcmb = 2 * nb * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process p owning the last row or column of sub( B ). */ if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !( Bfwd ) ) { tmp = PB_Cindxg2p( BnR - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR ); p = MModSub( tmp, BrocR, BnprocsR ); } /* * Loop over the processes rows or columns owning the BnR rows or columns of * sub( B ) to be processed. */ n = BnR; while( n > 0 ) { /* * Find out who is the active process row or column as well as the number of * rows or columns of sub( B ) it owns. */ BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) ); BnpR = PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR ); n -= BnpR; /* * Re-adjust the number of rows or columns to be handled at each step, in order * to average the message sizes and the computational granularity. */ if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 ); while( BnpR ) { nbb = MIN( nbb, BnpR ); /* * Describe the local contiguous panel of sub( B ) */ if( lside ) { PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD, BcurrocR, ctxt, Bld ); if( BisR || ( BmyprocR == BcurrocR ) ) Bptr = Mptr( B, BiiD, BiiR, Bld, size ); } else { PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR, BrocD, ctxt, Bld ); if( BisR || ( BmyprocR == BcurrocR ) ) Bptr = Mptr( B, BiiR, BiiD, Bld, size ); } talph0 = talpha; if( LNorRT ) { /* * Reuse sub( B ) and/or create vector WBC in process column owning the first * or last column of sub( A ) */ PB_CInOutV2( TYPE, &conjg, COLUMN, An, An, Astart, Ad0, nbb, Bptr, 0, 0, DBUFB, &Broc, &WBC, WBCd, &WBCfr, &WBCsum, &WBCapbX ); /* * Create WBR in process rows spanned by sub( A ) */ PB_COutV( TYPE, ROW, INIT, An, An, Ad0, nbb, &WBR, WBRd, &WBRfr, &WBRsum ); /* * Retrieve local quantities related to sub( A ) -> Ad0 */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size ); WBCld = WBCd[LLD_]; WBRld = WBRd[LLD_]; if( upper ) { /* * sub( A ) is upper triangular */ for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb ) { ktmp = An - k; kb = MIN( ktmp, Alcmb ); /* * Solve logical diagonal block, WBC contains the solution scattered in multiple * process columns and WBR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsm( TYPE, WBRsum, SIDE, UPLO, TRANSA, DIAG, kb, nbb, talph0, Aptr, k, k, Ad0, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Akq, WBRld, size ), WBRld ); /* * Update: only the part of sub( B ) to be solved at the next step is locally * updated and combined, the remaining part of the matrix to be solved later * is only locally updated. */ if( Akp > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( WBCsum ) { kbprev = MIN( k, Alcmb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow, Arow, nprow ); Akp -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &ktmp, &nbb, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld ); Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol ); gsum2d( ctxt, ROW, &top, ktmp, nbb, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, myrow, Asrc ); if( mycol != Asrc ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &ktmp, &nbb, &izero, zero, zero, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld ); } if( ( Akp > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &Akp, &nbb, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, WBC, &WBCld ); } else { if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &Akp, &nbb, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, WBC, &WBCld ); } } talph0 = one; } } else { /* * sub( A ) is lower triangular */ for( k = 0; k < An; k += Alcmb ) { ktmp = An - k; kb = MIN( ktmp, Alcmb ); /* * Solve logical diagonal block, WBC contains the solution scattered in multiple * process columns and WBR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsm( TYPE, WBRsum, SIDE, UPLO, TRANSA, DIAG, kb, nbb, talph0, Aptr, k, k, Ad0, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Akq, WBRld, size ), WBRld ); /* * Update: only the part of sub( B ) to be solved at the next step is locally * updated and combined, the remaining part of the matrix to be solved later is * only locally updated. */ Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Anp0 = Anp - Akp ) > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( WBCsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, Alcmb ); ktmp = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow, nprow ); Anp0 -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &ktmp, &nbb, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld ); Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol ); gsum2d( ctxt, ROW, &top, ktmp, nbb, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, myrow, Asrc ); if( mycol != Asrc ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &ktmp, &nbb, &izero, zero, zero, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld ); } if( ( Anp0 > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &Anp0, &nbb, &Anq0, negone, Mptr( Aptr, Akp+ktmp, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, Mptr( WBC, Akp+ktmp, 0, WBCld, size ), &WBCld ); } else { if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &Anp0, &nbb, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld, talph0, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld ); } } talph0 = one; } } /* * Combine the scattered resulting matrix WBC */ if( WBCsum && ( Anp > 0 ) ) gsum2d( ctxt, ROW, &top, Anp, nbb, WBC, WBCld, myrow, WBCd[CSRC_] ); /* * sub( B ) := WBC (if necessary) */ if( WBCapbX ) PB_Cpaxpby( TYPE, &conjg, An, nbb, one, WBC, 0, 0, WBCd, COLUMN, zero, Bptr, 0, 0, DBUFB, &Broc ); } else { /* * Reuse sub( B ) and/or create vector WBR in process row owning the first or * last row of sub( A ) */ PB_CInOutV2( TYPE, &conjg, ROW, An, An, Astart, Ad0, nbb, Bptr, 0, 0, DBUFB, &Broc, &WBR, WBRd, &WBRfr, &WBRsum, &WBRapbX ); /* * Create WBC in process columns spanned by sub( A ) */ PB_COutV( TYPE, COLUMN, INIT, An, An, Ad0, nbb, &WBC, WBCd, &WBCfr, &WBCsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size ); WBCld = WBCd[LLD_]; WBRld = WBRd[LLD_]; if( upper ) { /* * sub( A ) is upper triangular */ for( k = 0; k < An; k += Alcmb ) { ktmp = An - k; kb = MIN( ktmp, Alcmb ); /* * Solve logical diagonal block, WBR contains the solution scattered in multiple * process rows and WBC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsm( TYPE, WBCsum, SIDE, UPLO, TRANSA, DIAG, nbb, kb, talph0, Aptr, k, k, Ad0, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Akq, WBRld, size ), WBRld ); /* * Update: only the part of sub( B ) to be solved at the next step is locally * updated and combined, the remaining part of the matrix to be solved later is * only locally updated. */ Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq0 = Anq - Akq ) > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( WBRsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, Alcmb ); ktmp = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol, npcol ); Anq0 -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &ktmp, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, talph0, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld ); Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow ); gsum2d( ctxt, COLUMN, &top, nbb, ktmp, Mptr( WBR, 0, Akq, WBRld, size ), WBRld, Asrc, mycol ); if( myrow != Asrc ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &nbb, &ktmp, &izero, zero, zero, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld ); } if( ( Anp0 > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &Anq0, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, Akq+ktmp, Ald, size ), &Ald, talph0, Mptr( WBR, 0, Akq+ktmp, WBRld, size ), &WBRld ); } else { if( Anp0 > 0 ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &Anq0, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, talph0, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld ); } } talph0 = one; } } else { /* * sub( A ) is lower triangular */ for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb ) { ktmp = An - k; kb = MIN( ktmp, Alcmb ); /* * Solve logical diagonal block, WBR contains the solution scattered in multiple * process rows and WBC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsm( TYPE, WBCsum, SIDE, UPLO, TRANSA, DIAG, nbb, kb, talph0, Aptr, k, k, Ad0, Mptr( WBC, Akp, 0, WBCld, size ), WBCld, Mptr( WBR, 0, Akq, WBRld, size ), WBRld ); /* * Update: only the part of sub( B ) to be solved at the next step is locally * updated and combined, the remaining part of the matrix to be solved later * is only locally updated. */ if( Akq > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( WBRsum ) { kbprev = MIN( k, Alcmb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol, Acol, npcol ); Akq -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &ktmp, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, talph0, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld ); Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow ); gsum2d( ctxt, COLUMN, &top, nbb, ktmp, Mptr( WBR, 0, Akq, WBRld, size ), WBRld, Asrc, mycol ); if( myrow != Asrc ) pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &nbb, &ktmp, &izero, zero, zero, Mptr( WBR, 0, Akq, WBRld, size ), &WBRld ); } if( ( Anp0 > 0 ) && ( Akq > 0 ) ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &Akq, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, talph0, WBR, &WBRld ); } else { if( Anp0 > 0 ) gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &nbb, &Akq, &Anp0, negone, Mptr( WBC, Akp, 0, WBCld, size ), &WBCld, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, talph0, WBR, &WBRld ); } } talph0 = one; } } /* * Combine the scattered resulting matrix WBR */ if( WBRsum && ( Anq > 0 ) ) gsum2d( ctxt, COLUMN, &top, nbb, Anq, WBR, WBRld, WBRd[RSRC_], mycol ); /* * sub( B ) := WBR (if necessary) */ if( WBRapbX ) PB_Cpaxpby( TYPE, &conjg, nbb, An, one, WBR, 0, 0, WBRd, ROW, zero, Bptr, 0, 0, DBUFB, &Broc ); } if( WBCfr ) free( WBC ); if( WBRfr ) free( WBR ); /* * Go to the next contiguous panel if any residing in this process row or column */ BnpR -= nbb; if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb; } /* * Go to next or previous process row or column owning some of sub( B ) */ if( !( BisR ) ) p = ( Bfwd ? MModAdd1( p, BnprocsR ) : MModSub1( p, BnprocsR ) ); } if( TranOp == CCOTRAN ) free( talpha ); /* * End of PB_CptrsmB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpdotNN.c0000640000175000017500000004346510363532303017672 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpdotNN( PBTYP_T * TYPE, int N, char * DOT, char * X, int IX, int JX, int * DESCX, int INCX, char * Y, int IY, int JY, int * DESCY, int INCY, VVDOT_T FDOT ) #else void PB_CpdotNN( TYPE, N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, FDOT ) /* * .. Scalar Arguments .. */ int INCX, INCY, IX, IY, JX, JY, N; char * DOT; PBTYP_T * TYPE; VVDOT_T FDOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CpdotNN forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ) or DOT := sub( X )**H * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Both subvectors are assumed to be not distributed. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) pointer to CHAR * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * FDOT (local input) pointer to a function of type VVDOT * On entry, FDOT points to a subroutine that computes the local * dot product of two vectors. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * top; int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnprocsR, XprocR, Xrow, Ycol, Yii, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnprocsR, YprocR, Yrow, csrc, ctxt, ione=1, mycol, myrow, npcol, nprow, rsrc, size; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 ) { Xld = DESCX[LLD_]; Xlinc = Xld; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); } else { Xld = DESCX[LLD_]; Xlinc = 1; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 ) { Yld = DESCY[LLD_]; Ylinc = Yld; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); } else { Yld = DESCY[LLD_]; Ylinc = 1; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); } /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * Neither sub( X ) nor sub( Y ) are distributed */ if( !XisR ) { /* * sub( X ) is not replicated */ if( !( YisR ) ) { /* * sub( Y ) is not replicated */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) /* * If I am not in XprocR or YprocR, then return immediately */ return; size = TYPE->size; if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( XprocR == YprocR ) { /* * sub( X ) and sub( Y ) are in the same process row or column */ FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); } else { /* * sub( X ) and sub( Y ) are in a different process row or column */ if( XmyprocR == XprocR ) { buf = PB_Cmalloc( N * size ); /* * Send sub( X ) to where sub( Y ) resides, and receive sub( Y ) from the same * location. */ if( XisRow ) { TYPE->Cgesd2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, YprocR, XmyprocD ); TYPE->Cgerv2d( ctxt, 1, N, buf, 1, YprocR, XmyprocD ); } else { TYPE->Cgesd2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, XmyprocD, YprocR ); TYPE->Cgerv2d( ctxt, N, 1, buf, N, XmyprocD, YprocR ); } FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( YmyprocR == YprocR ) { buf = PB_Cmalloc( N * size ); /* * Send sub( Y ) to where sub( X ) resides, and receive sub( X ) from the same * location. */ if( YisRow ) { TYPE->Cgesd2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); TYPE->Cgerv2d( ctxt, 1, N, buf, 1, XprocR, YmyprocD ); } else { TYPE->Cgesd2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); TYPE->Cgerv2d( ctxt, N, 1, buf, N, YmyprocD, XprocR ); } FDOT( &N, DOT, buf, &ione, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); if( buf ) free( buf ); } } } else { /* * sub( X ) and sub( Y ) are not both row or column vectors */ if( ( XmyprocR == XprocR ) && ( YmyprocR == YprocR ) ) { /* * If I am at the intersection of the process row and column, then compute the * dot product and broadcast it in my process row and column. */ FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); } else if( XmyprocR == XprocR ) { if( XisRow ) { Xscope = CROW; rsrc = XprocR; csrc = YprocR; } else { Xscope = CCOLUMN; rsrc = YprocR; csrc = XprocR; } top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET ); TYPE->Cgebr2d( ctxt, &Xscope, top, 1, 1, DOT, 1, rsrc, csrc ); } else if( YmyprocR == YprocR ) { if( YisRow ) { Yscope = CROW; rsrc = YprocR; csrc = XprocR; } else { Yscope = CCOLUMN; rsrc = XprocR; csrc = YprocR; } top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET ); TYPE->Cgebr2d( ctxt, &Yscope, top, 1, 1, DOT, 1, rsrc, csrc ); } } } else { /* * sub( Y ) is replicated */ if( XmyprocR == XprocR ) { /* * If I am in the process row (resp. column) owning sub( X ), then compute the * dot product and broadcast in my column (resp. row). */ size = TYPE->size; FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); } } else { /* * Otherwise, receive the dot product */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocD, XprocR ); } } } } else { /* * sub( X ) is replicated */ if( YisR || ( YmyprocR == YprocR ) ) { /* * If I own a piece of sub( Y ), then compute the dot product */ size = TYPE->size; FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); } if( !YisR ) { /* * If sub( Y ) is not replicated, then broadcast the result to the other * processes that own a piece of sub( X ), but were not involved in the above * dot-product computation. */ if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 ); else TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, YmyprocD, YprocR ); } } } /* * End of PB_CpdotNN */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpsyr2.c0000640000175000017500000005541210363532303017542 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpsyr2( PBTYP_T * TYPE, char * UPLO, int N, int K, char * ALPHA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR, char * A, int IA, int JA, int * DESCA, TZSYR2_T SYR2 ) #else void PB_Cpsyr2( TYPE, UPLO, N, K, ALPHA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR, A, IA, JA, DESCA, SYR2 ) /* * .. Scalar Arguments .. */ char * UPLO; int IA, JA, K, LDXC, LDXR, LDYC, LDYR, N; char * ALPHA; PBTYP_T * TYPE; TZSYR2_T SYR2; /* * .. Array Arguments .. */ int * DESCA; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Cpsyr2 performs a symmetric or Hermitian rank-2 update of the sub- * matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric or Hermitian submatrix sub( A ) are to be * referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the local number of columns of the lo- * cal array XC and the local number of rows of the local array * XR. K mut be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (local input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,K). Before entry, * this array contains the local entries of the matrix XC. * * LDXC (local input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, Lp( IA, N ) ). * * YC (local input) pointer to CHAR * On entry, YC is an array of dimension (LDYC,K). Before entry, * this array contains the local entries of the matrix YC. * * LDYC (local input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, Lp( IA, N ) ). * * XR (local input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,Kx), where Kx is * at least Lc( JA, N ). Before entry, this array contains the * local entries of the matrix XR. * * LDXR (local input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least max( 1, K ). * * YR (local input) pointer to CHAR * On entry, YR is an array of dimension (LDYR,Ky), where Ky is * at least Lc( JA, N ). Before entry, this array contains the * local entries of the matrix YR. * * LDYR (local input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least max( 1, K ). * * A (local input/local output) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * SYR2 (local input) pointer to function of type TZSYR2_T * On entry, SYR2 specifies the function performing the update * of a single block. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq, Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl, IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1, YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp, jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( N <= 0 ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in * all processes. */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) ); IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) ); Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[LLD_]; size = TYPE->size; if( IsRowRepl && IsColRepl ) { SYR2( TYPE, UPLO, Amp, Anq, K, 0, ALPHA, XC, LDXC, YC, LDYC, XR, LDXR, YR, LDYR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); return; } XCinc = size; XRinc = LDXR * size; YCinc = size; YRinc = LDYR * size; upper = ( Mupcase( UPLO[0] ) == CUPPER ); lower = ( Mupcase( UPLO[0] ) == CLOWER ); /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( IsRowRepl ? Amb : nprow * Amb ); qnb = ( IsColRepl ? Anb : npcol * Anb ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ SYR2( TYPE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be updated and one is * planning to go south in the table, it is neccessary to take care of the * remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; SYR2( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(Xjj+inbloc)*XRinc, LDXR, YR+(Xjj+inbloc)*YRinc, LDYR, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald ); } Aii += imbloc; Xii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be updated and one is * planning to go east in the table, it is neccessary to take care of the * remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; SYR2( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA, XC+(Xii+imbloc)*XCinc, LDXC, YC+(Xii+imbloc)*YCinc, LDYC, XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald ); } Ajj += inbloc; Xjj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row indexes in A and XC. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row indexes in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Update the upper triangular part of sub( A ) we just skipped when necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYR2( TYPE, UPLO, mbloc, inbloc, K, lcmt, ALPHA, XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; Xoffi = ioffx; ioffx += mbloc; } /* * Update the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYR2( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA, XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * Update the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A and XR. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A and XR. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; } /* * Update the lower triangular part of sub( A ). */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { SYR2( TYPE, ALL, m1, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR, Mptr( A, Aii, Ajj, Ald, size ), Ald ); Ajj += tmp1; Xjj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; SYR2( TYPE, UPLO, imbloc, nbloc, K, lcmt, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR, Mptr( A, Aii, joffd+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; joffd += nbloc; Xoffj = joffx; joffx += nbloc; } /* * Update the upper triangular part of sub( A ). */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) SYR2( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR, Mptr( A, Aii, joffd+1, Ald, size ), Ald ); tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * Update the lower triangular part of sub( A ). */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) SYR2( TYPE, ALL, m1, tmp1, K, 0, ALPHA, XC+(Xoffi+1)*XCinc, LDXC, YC+(Xoffi+1)*YCinc, LDYC, XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Update the upper triangular part of sub( A ). */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYR2( TYPE, UPLO, mbloc, nbloc, K, lcmt, ALPHA, XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; Xoffi = ioffx; ioffd += mbloc; ioffx += mbloc; } /* * Update the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYR2( TYPE, ALL, tmp1, nbloc, K, 0, ALPHA, XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald ); tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; Xoffj += nbloc; /* * Update the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA, XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC, XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * End of PB_Cpsyr2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMcontig.c0000640000175000017500000003456110363532303020213 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CVMcontig( PB_VM_T * VM, int * NRPQ, int * NCPQ, int * IOFF, int * JOFF ) #else void PB_CVMcontig( VM, NRPQ, NCPQ, IOFF, JOFF ) /* * .. Scalar Arguments .. */ int * IOFF, * JOFF, * NCPQ, * NRPQ; PB_VM_T * VM; #endif { /* * Purpose * ======= * * PB_CVMcontig computes the maximum number of contiguous rows and * columns corresponding to the first diagonals of the local virtual * matrix VM. This routine also returns the row and column offset of the * first diagonal entry. * * Arguments * ========= * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * NRPQ (local output) INTEGER * On exit, NRPQ specifies the number of contiguous rows corres- * ponding to the first diagonals of the local virtual matrix * VM. On exit, NRPQ is at least zero. * * NCPQ (local output) INTEGER * On exit, NCPQ specifies the number of contiguous columns cor- * responding to the first diagonals of the local virtual matrix * VM. On exit, NRPQ is at least zero. * * IOFF (local output) INTEGER * On exit, IOFF is the local row offset of the first row cor- * responding to a diagonal entry of the Virtual matrix VM. If * no diagonals are found, the value zero is returned. On exit, * IOFF is at least zero. * * JOFF (local output) INTEGER * On exit, JOFF is the local column offset of the first column * corresponding to a diagonal entry of the Virtual matrix VM. * If no diagonals are found, the value zero is returned. On * exit, JOFF is at least zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ColCont=1, FirstD=0, GoSouth, GoEast, RowCont=1, ilow, imbloc, inbloc, iupp, lcmt, lcmtnn=0, lcmt00, lmbloc, lnbloc, low, mb, mblks, mbloc, mcur=0, mcurd, md=0, nb, nblks, nbloc, ncur=0, ncurd, nd=0, npq=0, pmb, qnb, tmp1, tmp2, upp; /* .. * .. Executable Statements .. * */ *NRPQ = 0; *NCPQ = 0; *IOFF = 0; *JOFF = 0; mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks. */ if( ( mblks == 0 ) || ( nblks == 0 ) ) return; /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; iupp = VM->iupp; upp = VM->upp; pmb = VM->nprow * mb; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; ilow = VM->ilow; low = VM->low; qnb = VM->npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the coordinates of the * current entry in the LCM table (mcur,ncur). */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp * Compute the number of diagonals in this block as well as lcm value (lcntnn) * that its neighbor should have to preserve continuity. */ if( lcmt00 >= 0 ) { tmp2 = ( ( tmp1 = imbloc - lcmt00 ) > 0 ? tmp1 : 0 ); if( tmp2 < inbloc ) { npq = tmp2; lcmtnn = -npq; } else if ( tmp2 == inbloc ) { npq = inbloc; lcmtnn = 0; } else { npq = inbloc; lcmtnn = lcmt00 + npq; } *IOFF += lcmt00; } else { tmp2 = ( ( tmp1 = inbloc + lcmt00 ) > 0 ? tmp1 : 0 ); if( tmp2 < imbloc ) { npq = tmp2; lcmtnn = npq; } else if ( tmp2 == imbloc ) { npq = tmp2; lcmtnn = 0; } else { npq = imbloc; lcmtnn = lcmt00 - npq; } *JOFF -= lcmt00; } /* * Save coordinates of last block owning diagonals. Set FirstD to one, since * a block owning diagonals has been found. */ md = 0; nd = 0; FirstD = 1; /* * Those rows and columns are obviously contiguous */ *NRPQ = *NCPQ = npq; /* * Decide whether one should go south or east in the table: Go east if the * block below the current one only owns lower entries. If this block, however, * owns diagonals, then go south. */ GoSouth = !( GoEast = ( lcmt00 - iupp + upp - pmb < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value. */ lcmt00 -= iupp - upp + pmb; mcur++; if( !FirstD ) *IOFF += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value accordingly. */ while( ( mcur < mblks ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mcur++; if( !FirstD ) *IOFF += mb; } /* * Return if no more row in the LCM table. */ if( mcur >= mblks ) goto l_end; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mbloc = mb; mcurd = mcur; while( ( mcurd < mblks ) && ( lcmt >= ilow ) ) { if( mcurd == mblks-1 ) mbloc = lmbloc; /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. * If this is not the first one, update the booleans telling if the rows * and/or columns are contiguous. */ if( FirstD ) { RowCont = RowCont && ( ( ( mcurd == md+1 ) && ( lcmtnn <= 0 ) && ( lcmt <= 0 ) ) || ( ( mcurd == md ) && ( ncur == nd+1 ) && ( lcmtnn == lcmt ) ) ); ColCont = ColCont && ( ( ( ncur == nd+1 ) && ( lcmtnn >= 0 ) && ( lcmt >= 0 ) ) || ( ( ncur == nd ) && ( mcurd == md+1 ) && ( lcmtnn == lcmt ) ) ); } /* * Compute the number of diagonals in this block as well as lcm value (lcntnn) * that its neighbor should have to preserve continuity. */ if( lcmt >= 0 ) { tmp2 = ( ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < inbloc ) { npq = tmp2; lcmtnn = -npq; } else if ( tmp2 == inbloc ) { npq = inbloc; lcmtnn = 0; } else { npq = inbloc; lcmtnn = lcmt + npq; } if( !FirstD ) *IOFF += lcmt; } else { tmp2 = ( ( tmp1 = inbloc + lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < mbloc ) { npq = tmp2; lcmtnn = npq; } else if ( tmp2 == mbloc ) { npq = tmp2; lcmtnn = 0; } else { npq = mbloc; lcmtnn = lcmt - npq; } if( !FirstD ) *JOFF -= lcmt; } /* * Save coordinates of last block owning diagonals. Set FirstD to one, since * a block owning diagonals has been found. */ md = mcurd; nd = ncur; FirstD = 1; /* * If rows (resp columns) are still contiguous, add those npq rows (resp. * columns). */ if( RowCont ) *NRPQ += npq; if( ColCont ) *NCPQ += npq; /* * Keep going south until there are no more blocks owning diagonals */ lcmt00 = lcmt; lcmt -= pmb; mcur = mcurd++; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; ncur++; if( !FirstD ) *JOFF += inbloc; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value. */ lcmt00 += low - ilow + qnb; ncur++; if( !FirstD ) *JOFF += inbloc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value. */ while( ( ncur < nblks ) && ( lcmt00 < low ) ) { lcmt00 += qnb; ncur++; if( !FirstD ) *JOFF += nb; } /* * Return if no more column in the LCM table. */ if( ncur >= nblks ) goto l_end; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nbloc = nb; ncurd = ncur; while( ( ncurd < nblks ) && ( lcmt <= iupp ) ) { if( ncurd == nblks-1 ) nbloc = lnbloc; /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. * If this is not the first one, update the booleans telling if the rows * and/or columns are contiguous. */ if( FirstD ) { RowCont = RowCont && ( ( ( mcur == md+1 ) && ( lcmtnn <= 0 ) && ( lcmt <= 0 ) ) || ( ( mcur == md ) && ( ncurd == nd+1 ) && ( lcmtnn == lcmt ) ) ); ColCont = ColCont && ( ( ( ncurd == nd+1 ) && ( lcmtnn >= 0 ) && ( lcmt >= 0 ) ) || ( ( ncurd == nd ) && ( mcur == md+1 ) && ( lcmtnn == lcmt ) ) ); } /* * Compute the number of diagonals in this block as well as lcm value (lcntnn) * that its neighbor should have to preserve continuity. */ if( lcmt >= 0 ) { tmp2 = ( ( tmp1 = imbloc - lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < nbloc ) { npq = tmp2; lcmtnn = -npq; } else if ( tmp2 == nbloc ) { npq = nbloc; lcmtnn = 0; } else { npq = nbloc; lcmtnn = lcmt + npq; } if( !FirstD ) *IOFF += lcmt; } else { tmp2 = ( ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < imbloc ) { npq = tmp2; lcmtnn = npq; } else if ( tmp2 == imbloc ) { npq = tmp2; lcmtnn = 0; } else { npq = imbloc; lcmtnn = lcmt - npq; } if( !FirstD ) *JOFF -= lcmt; } /* * Save coordinates of last block owning diagonals. Set FirstD to one, since * a block owning diagonals has been found. */ md = mcur; nd = ncurd; FirstD = 1; /* * If rows (resp columns) are still contiguous, add those npq rows (resp. * columns). */ if( RowCont ) *NRPQ += npq; if( ColCont ) *NCPQ += npq; /* * Keep going east until there are no more blocks owning diagonals. */ lcmt00 = lcmt; lcmt += qnb; ncur = ncurd++; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mcur++; if( !FirstD ) *IOFF += imbloc; } /* * Loop over the remaining columns of the LCM table. */ nbloc = nb; while( ( RowCont || ColCont ) && ( ncur < nblks ) ) { if( ncur == nblks-1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value accordingly. */ while( ( mcur < mblks ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mcur++; if( !FirstD ) *IOFF += mb; } /* * Return if no more row in the LCM table. */ if( mcur >= mblks ) goto l_end; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mbloc = mb; mcurd = mcur; while( ( mcurd < mblks ) && ( lcmt >= low ) ) { if( mcurd == mblks-1 ) mbloc = lmbloc; /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. * If this is not the first one, update the booleans telling if the rows * and/or columns are contiguous. */ if( FirstD ) { RowCont = RowCont && ( ( ( mcurd == md+1 ) && ( lcmtnn <= 0 ) && ( lcmt <= 0 ) ) || ( ( mcurd == md ) && ( ncur == nd+1 ) && ( lcmtnn == lcmt ) ) ); ColCont = ColCont && ( ( ( ncur == nd+1 ) && ( lcmtnn >= 0 ) && ( lcmt >= 0 ) ) || ( ( ncur == nd ) && ( mcurd == md+1 ) && ( lcmtnn == lcmt ) ) ); } /* * Compute the number of diagonals in this block as well as lcm value (lcntnn) * that its neighbor should have to preserve continuity. */ if( lcmt >= 0 ) { tmp2 = ( ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < nbloc ) { npq = tmp2; lcmtnn = -npq; } else if ( tmp2 == nbloc ) { npq = nbloc; lcmtnn = 0; } else { npq = nbloc; lcmtnn = lcmt + npq; } if( !FirstD ) *IOFF += lcmt; } else { tmp2 = ( ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ); if( tmp2 < mbloc ) { npq = tmp2; lcmtnn = npq; } else if ( tmp2 == mbloc ) { npq = tmp2; lcmtnn = 0; } else { npq = mbloc; lcmtnn = lcmt - npq; } if( !FirstD ) *JOFF -= lcmt; } /* * Save coordinates of last block owning diagonals. Set FirstD to one, since * a block owning diagonals has been found. */ md = mcurd; nd = ncur; FirstD = 1; /* * If rows (resp columns) are still contiguous, add those npq rows (resp. * columns). */ if( RowCont ) *NRPQ += npq; if( ColCont ) *NCPQ += npq; /* * Keep going south until there are no more blocks owning diagonals */ lcmt00 = lcmt; lcmt -= pmb; mcur = mcurd++; } /* * I am done with this column of the LCM table. Go to the next column until * there are no more column in the table. */ lcmt00 += qnb; ncur++; if( !FirstD ) *JOFF += nb; } l_end: /* * If no diagonals were found, reset IOFF and JOFF to zero. */ if( !FirstD ) { *IOFF = 0; *JOFF = 0; } /* * End of PB_CVMcontig */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cmalloc.c0000640000175000017500000000313310363532303017723 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ char * PB_Cmalloc( int LENGTH ) #else char * PB_Cmalloc( LENGTH ) /* * .. Scalar Arguments .. */ int LENGTH; #endif { /* * Purpose * ======= * * PB_Cmalloc allocates a dynamic memory buffer. In case of failure, the * program is stopped by calling Cblacs_abort. * * Arguments * ========= * * LENGTH (local input) INTEGER * On entry, LENGTH specifies the length in bytes of the buffer * to be allocated. If LENGTH is less or equal than zero, this * function returns NULL. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * bufptr = NULL; /* .. * .. Executable Statements .. * */ if( LENGTH > 0 ) { if( !( bufptr = (char *) malloc( (unsigned)LENGTH ) ) ) { (void) fprintf( stderr, "Not enough memory on line %d of file %s!!\n", __LINE__, __FILE__ ); Cblacs_abort( -1, -1 ); } } return( bufptr ); /* * End of PB_Cmalloc */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctztrmv.c0000640000175000017500000002067310363532303020032 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctztrmv( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * TRANS, char * DIAG, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * X, int LDX, char * Y, int LDY ) #else void PB_Ctztrmv( TYPE, SIDE, UPLO, TRANS, DIAG, M, N, K, IOFFD, ALPHA, A, LDA, X, LDX, Y, LDY ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO, * TRANS, * DIAG; int IOFFD, K, LDA, LDX, LDY, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * A, * X, * Y; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctztrmv performs the matrix-vector operation * * y := A * x, or y := A' * x, or y := conjg( A' ) * x, * * where alpha and beta are scalars, x and y are vectors, and A is an * m by n trapezoidal triangular matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * TRANS (input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': y := A*x, * * TRANS = 'T' or 't': y := A'*x, * * TRANS = 'C' or 'c': y := A'*x or y := conjg( A' )*x. * * DIAG (input) pointer to CHAR * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (dummy) pointer to CHAR * In this routine, ALPHA is a dummy (unused) argument. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. When DIAG = 'U' or 'u', the * diagonal elements of A are not referenced either, but are * assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) pointer to CHAR * On entry, X is an array of dimension (LDX,Kx). Before entry, * with TRANS = 'N' or 'n', the array X must contain the n ele- * ment vector x corresponding to the columns of A. Otherwise, * the array X must contain the m element vector x corresponding * to the rows of A. When TRANS is 'N' or 'n', LDX is at least * 1, and Kx is at least N. Otherwise, LDX is at least max(1,M), * and Kx is at least 1. * * LDX (input) INTEGER * On entry, LDX specifies the leading dimension of the array X. * LDX must be at least 1 when TRANS is 'N' or 'n' and * max( 1, M ) otherwise. * * Y (input/output) pointer to CHAR * On entry, Y is an array of dimension (LDY,Ky). On exit, with * TRANS = 'N' or 'n', the array Y contains the m element vector * y corresponding to the rows of A. Otherwise, the array Y con- * tains the n element vector y corresponding to the columns of * A. When TRANS is 'N' or 'n', LDY is at least max( 1, M ), and * Ky is at least 1. Otherwise, LDY is at least 1, and Ky is at * least N. On exit, Y is overwritten by the partial updated * vector y. * * LDY (input) INTEGER * On entry, LDY specifies the leading dimension of the array Y. * LDY must be at least max( 1, M ) when TRANS is 'N' or * 'n' and 1 otherwise. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ione = 1; char * Aptr = NULL; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( ( Mupcase( UPLO[0] ) == CLOWER ) || ( Mupcase( UPLO[0] ) == CUPPER ) ) { Aptr = PB_Cmalloc( M * N * TYPE->size ); TYPE->Ftzpadcpy( C2F_CHAR( UPLO ), C2F_CHAR( DIAG ), &M, &N, &IOFFD, A, &LDA, Aptr, &M ); if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, Aptr, &M, X, &LDX, TYPE->one, Y, &ione ); } else { TYPE->Fgemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, Aptr, &M, X, &ione, TYPE->one, Y, &LDY ); } if( Aptr ) free( Aptr ); } else { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fgemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, A, &LDA, X, &LDX, TYPE->one, Y, &ione ); } else { TYPE->Fgemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, A, &LDA, X, &ione, TYPE->one, Y, &LDY ); } } /* * End of PB_Ctztrmv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsyrk.c0000640000175000017500000001744110363532303020031 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsyrk( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * AC, int LDAC, char * AR, int LDAR, char * C, int LDC ) #else void PB_Ctzsyrk( TYPE, UPLO, M, N, K, IOFFD, ALPHA, AC, LDAC, AR, LDAR, C, LDC ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDAC, LDAR, LDC, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * AC, * AR, * C; #endif { /* * Purpose * ======= * * PB_Ctzsyrk performs the trapezoidal symmetric or Hermitian rank k * operation: * * C := alpha * AC * AR + C, * * where alpha is a scalar, AC is an m by k matrix, AR is an k by n ma- * trix and C is an m by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix C is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of C is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of C is referenced, * * otherwise all of the matrix C is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix C. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix C. * N must be at least zero. * * K (input) INTEGER * On entry, K specifies the number of columns of the matrix AC, * and the number of rows of the matrix AR. K must be at least * zero. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of C as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal C( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal C( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal C( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * AC (input) pointer to CHAR * On entry, AC is an array of dimension (LDAC,K) containing the * m by k matrix AC. * * LDAC (input) INTEGER * On entry, LDAC specifies the leading dimension of the array * AC. LDAC must be at least max( 1, M ). * * AR (input) pointer to CHAR * On entry, AR is an array of dimension (LDAR,N) containing the * k by n matrix AR. * * LDAR (input) INTEGER * On entry, LDAR specifies the leading dimension of the array * AR. LDAR must be at least K. * * C (input/output) pointer to CHAR * On entry, C is an array of dimension (LDC,N) containing the m * by n matrix A. Only the trapezoidal part of C determined by * UPLO and IOFFD is updated. * * LDC (input) INTEGER * On entry, LDC specifies the leading dimension of the array C. * LDC must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, j1, m1, mn, n1, size; GEMM_T gemm; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, AR, &LDAR, one, C, &LDC ); if( ( n1 = MIN( M-IOFFD, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsyrk( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, one, Mptr( C, i1, j1, LDC, size ), &LDC ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, Mptr( AC, i1, 0, LDAC, size ), &LDAC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, i1, j1, LDC, size ), &LDC ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm; mn = MIN( M - IOFFD, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &m1, &n1, &K, ALPHA, AC, &LDAC, AR, &LDAR, one, C, &LDC ); TYPE->Fsyrk( C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), &n1, &K, ALPHA, Mptr( AC, m1, 0, LDAC, size ), &LDAC, one, Mptr( C, m1, j1, LDC, size ), &LDC ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &n1, &K, ALPHA, AC, &LDAC, Mptr( AR, 0, j1, LDAR, size ), &LDAR, one, Mptr( C, 0, j1, LDC, size ), &LDC ); } } else { TYPE->Fgemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &M, &N, &K, ALPHA, AC, &LDAC, AR, &LDAR, TYPE->one, C, &LDC ); } /* * End of PB_Ctzsyrk */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cg2lrem.c0000640000175000017500000001447010363532303017652 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cg2lrem( int IG, int INB, int NB, int MYPROC, int SRCPROC, int NPROCS ) #else int PB_Cg2lrem( IG, INB, NB, MYPROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int IG, INB, NB, NPROCS, MYPROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cg2lrem computes the local index of a matrix entry pointed to by * the global index IG. Note that when MYPROC is not the process owning * this entry, this routine returns the closest larger local index cor- * responding to IG just like the routine PB_Cinfog2l. * * Arguments * ========= * * IG (global input) INTEGER * On entry, IG specifies the global index of the matrix entry. * IG must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * MYPROC (global input) INTEGER * On entry, MYPROC specifies the process number in which the * value of the local index is to be computed. MYPROC must be at * least zero and strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, if SRCPROC = -1, the data is not distributed but * replicated, in which case this routine returns IG in all * processes. Otherwise, the value of SRCPROC is ignored. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, mydist, nblocks, proc; /* .. * .. Executable Statements .. * */ /* * The data is not distributed, or there is just one process in this dimension * of the grid. */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) return( IG ); /* * IG refers to an entry in the first block */ if( IG < INB ) return( ( MYPROC == SRCPROC ? IG : 0 ) ); /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source process is the * process such that mydist = 0, or equivalently MYROC == SRCPROC. * * Find out the global coordinate of the block IG belongs to (nblocks), as well * as the minimum local number of blocks that every process has. * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, I own ilocblk full blocks * but not IG, or I own ilocblk + 1 blocks and the entry IG refers to. */ if( MYPROC == SRCPROC ) { /* * If I am the source process and there are less than NPROCS blocks, then * the local index in that process is INB. */ nblocks = ( IG - INB ) / NB + 1; if( nblocks < NPROCS ) return( INB ); /* * IG refers to an entry that is not in the first block, find out which process * has it. */ proc = SRCPROC + nblocks; proc -= ( proc / NPROCS ) * NPROCS; /* * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only three * possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROCS = 0 and I don't own IG, in * which case II = INB + ( ilocblk - 1 ) * NB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROCS = 0 and I own IG, in which * case IG and II can respectively be written as INB + (nblocks-1)*NB + IL, * INB + (ilocblk-1) * NB + IL. That is II = IG + ( ilocblk - nblocks )*NB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROCS, the source process owns * ilocblk+1 full blocks, and therefore II = INB + ilocblk * NB. Note * that when ilocblk is zero, II is just INB. */ ilocblk = nblocks / NPROCS; if( ilocblk * NPROCS >= nblocks ) return( ( ( MYPROC == proc ) ? IG + ( ilocblk - nblocks ) * NB : INB + ( ilocblk - 1 ) * NB ) ); else return( INB + ilocblk * NB ); } else { /* * IG refers to an entry that is not in the first block, find out which process * has it. */ nblocks = ( IG -= INB ) / NB + 1; proc = SRCPROC + nblocks; proc -= ( proc / NPROCS ) * NPROCS; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYPROC - SRCPROC ) < 0 ) mydist += NPROCS; /* * When mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks of * size NB since I am not the source process, i.e. II = ( ilocblk + 1 ) * NB. * When mydist >= nblocks - ilocblk * NPROCS and I don't own IG, I own ilocblk * full blocks of size NB, i.e. II = ilocblk * NB, otherwise I own ilocblk * blocks and IG, in which case IG can be written as INB + (nblocks-1)*NB + IL * and II = ilocblk*NB + IL = IG - INB + ( ilocblk - nblocks + 1 )*NB. */ if( nblocks < NPROCS ) { mydist -= nblocks; return( ( ( mydist < 0 ) ? NB : ( ( MYPROC == proc ) ? IG + ( 1 - nblocks ) * NB : 0 ) ) ); } else { ilocblk = nblocks / NPROCS; mydist -= nblocks - ilocblk * NPROCS; return( ( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : ( ( MYPROC == proc ) ? ( ilocblk - nblocks + 1 ) * NB + IG : ilocblk * NB ) ) ); } } /* * End of PB_Cg2lrem */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CptrsmAB0.c0000640000175000017500000004661610363532303020121 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CptrsmAB0( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * DIAG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * * C, int * DESCC, int * CFREE ) #else void PB_CptrsmAB0( TYPE, SIDE, UPLO, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, C, DESCC, CFREE ) /* * .. Scalar Arguments .. */ char * DIAG, * SIDE, * UPLO; int * CFREE, IA, IB, JA, JB, M, N; char * ALPHA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * * C; #endif { /* * .. Local Scalars .. */ char btop, * negone, * one, * talpha, * zero; int Acol, Acurcol, Acurrow, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj, Ald, Almb1, Alnb1, Amb, Amp0, Anq0, An, Anb, Arow, Bcol, Bii, Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp0, Bnb, Bnq0, Brow, Cld, ctxt, k=1, kb, kblks, kbprev, ktmp, lside, mycol, myrow, npcol, nprow, size, upper; char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL, * Cptr = NULL; MMADD_T mmadd; GEBR2D_T brecv; GEBS2D_T bsend; GEMM_T gemm; TRSM_T trsm; /* .. * .. Executable Statements .. * */ size = TYPE->size; lside = ( Mupcase( SIDE[0] ) == CLEFT ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol, Ald */ Ald = DESCA[LLD_]; PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol, Bld ... */ Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); /* * Shorcuts when sub( B ) spans only one process row or column */ if( lside ) { if( !( PB_Cspan( M, IB, Bimb, Bmb, DESCB[RSRC_], nprow ) ) ) { *CFREE = 0; Binb1 = PB_Cfirstnb( N, JB, Binb, Bnb ); PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); Bnq0 = PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bnq0 > 0 ) && ( ( ( Brow >= 0 ) && ( myrow == Brow ) ) || ( Brow < 0 ) ) ) { *C = Mptr( B, Bii, Bjj, Bld, size ); TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &M, &Bnq0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, *C, &Bld ); } return; } } else { if( !( PB_Cspan( N, JB, Binb, Bnb, DESCB[CSRC_], npcol ) ) ) { *CFREE = 0; Bimb1 = PB_Cfirstnb( M, IB, Bimb, Bmb ); PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); Bmp0 = PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp0 > 0 ) && ( ( ( Bcol >= 0 ) && ( mycol == Bcol ) ) || ( Bcol < 0 ) ) ) { *C = Mptr( B, Bii, Bjj, Bld, size ); TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Bmp0, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, *C, &Bld ); } return; } } /* * Handle the general case now */ An = ( lside ? M : N ); upper = ( Mupcase( UPLO[0] ) == CUPPER ); talpha = ALPHA; negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero; brecv = TYPE->Cgebr2d; bsend = TYPE->Cgebs2d; mmadd = TYPE->Fmmadd; gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm; /* * Compute more local information for sub( A ) and sub( B ) */ Aimb = DESCA[IMB_]; Ainb = DESCA[INB_]; Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Aimb1 = PB_Cfirstnb( An, IA, Aimb, Amb ); Almb1 = PB_Clastnb ( An, IA, Aimb, Amb ); Amp0 = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow ); Ainb1 = PB_Cfirstnb( An, JA, Ainb, Anb ); Alnb1 = PB_Clastnb ( An, JA, Ainb, Anb ); Anq0 = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size ); Bimb1 = PB_Cfirstnb( M, IB, Bimb, Bmb ); Bmp0 = PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( N, JB, Binb, Bnb ); Bnq0 = PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); if( lside ) { Cld = M; PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, -1, Bcol, ctxt, Cld ); if( Bnq0 > 0 ) { Cptr = *C = PB_Cmalloc( M * Bnq0 * size ); *CFREE = 1; } else { *C = NULL; *CFREE = 0; return; } kblks = ( An > Aimb1 ? ( An - Aimb1 - 1 ) / Amb + 2 : 1 ); btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( upper ) { Acurrow = PB_Cindxg2p( An-1, Aimb1, Amb, Arow, Arow, nprow ); kb = Almb1; Bptr = Mptr( Bptr0, Bmp0 - kb, 0, Bld, size ); Cptr = Mptr( *C, An - kb, 0, Cld, size ); /* * Solve last block of rows of sub( B ) and broadcast it vertically to update * the rest of sub( B ) */ if( myrow == Acurrow ) { trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA, Mptr( Aptr0, Amp0-kb, Anq0-kb, Ald, size ), &Ald, Bptr, &Bld ); bsend( ctxt, COLUMN, &btop, kb, Bnq0, Bptr, Bld ); mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero, Cptr, &Cld ); Amp0 -= kb; Bmp0 -= kb; } else { brecv( ctxt, COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol ); } Acurrow = MModSub1( Acurrow, nprow ); An -= ( kbprev = kb ); Anq0 -= kb; kblks -= 1; /* * Lookahead */ while( kblks > 0 ) { kb = ( kblks == 1 ? Aimb1 : Amb ); Aptr = Mptr( Aptr0, 0, Anq0, Ald, size ); Bptr = Mptr( Bptr0, Bmp0 - kb, 0, Bld, size ); Cptr = Mptr( *C, An, 0, Cld, size ); if( myrow == Acurrow ) { /* * Update the current block of rows of sub( B ) with block of rows of sub( B ) * of previous step */ gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &kb, &Bnq0, &kbprev, negone, Mptr( Aptr, Amp0-kb, 0, Ald, size ), &Ald, Cptr, &Cld, talpha, Bptr, &Bld ); /* * Solve the current block of rows of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Bnq0, one, Mptr( Aptr, Amp0-kb, -kb, Ald, size ), &Ald, Bptr, &Bld ); /* * Broadcast the current block of rows of sub( B ) for next update */ bsend( ctxt, COLUMN, &btop, kb, Bnq0, Bptr, Bld ); mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero, Mptr( Cptr, -kb, 0, Cld, size ), &Cld ); /* * Finish update of the remaining blocks of rows of sub( B ) with block of rows * of sub( B ) of previous step */ if( ( ktmp = Amp0 - kb ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &ktmp, &Bnq0, &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0, &Bld ); Amp0 -= kb; Bmp0 -= kb; } else { /* * Update the remaining rows of sub( B ) with block of rows of sub( B ) of * previous step */ if( Amp0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Amp0, &Bnq0, &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0, &Bld ); /* * Receive the current block of rows of sub( B ) for next update */ brecv( ctxt, COLUMN, &btop, kb, Bnq0, Mptr( Cptr, -kb, 0, Cld, size ), Cld, Acurrow, mycol ); } Acurrow = MModSub1( Acurrow, nprow ); An -= ( kbprev = kb ); Anq0 -= kb; talpha = one; kblks -= 1; } } else { Acurrow = Arow; kb = Aimb1; /* * Solve first block of rows of sub( B ) and broadcast it vertically to update * the rest of sub( B ) */ if( myrow == Acurrow ) { trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA, Aptr0, &Ald, Bptr0, &Bld ); bsend( ctxt, COLUMN, &btop, kb, Bnq0, Bptr0, Bld ); mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld ); Amp0 -= kb; Aptr0 = Mptr( Aptr0, kb, 0, Ald, size ); Bptr0 = Mptr( Bptr0, kb, 0, Bld, size ); } else { brecv( ctxt, COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol ); } Acurrow = MModAdd1( Acurrow, nprow ); kbprev = kb; Cptr = Mptr( Cptr, kb, 0, Cld, size ); Aptr0 = Mptr( Aptr0, 0, kb, Ald, size ); k += 1; /* * Lookahead */ while( k <= kblks ) { kb = ( k == kblks ? Almb1 : Amb ); if( myrow == Acurrow ) { /* * Update the current block of rows of sub( B ) with block of rows of sub( B ) * of previous step */ gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &kb, &Bnq0, &kbprev, negone, Mptr( Aptr0, 0, -kbprev, Ald, size ), &Ald, Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha, Bptr0, &Bld ); /* * Solve the current block of rows of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &kb, &Bnq0, one, Aptr0, &Ald, Bptr0, &Bld ); /* * Broadcast the current block of rows of sub( B ) for next update */ bsend( ctxt, COLUMN, &btop, kb, Bnq0, Bptr0, Bld ); mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld ); /* * Finish update of the remaining blocks of rows of sub( B ) with block of rows * of sub( B ) of previous step */ if( ( ktmp = Amp0 - kb ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &ktmp, &Bnq0, &kbprev, negone, Mptr( Aptr0, kb, -kbprev, Ald, size ), &Ald, Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha, Mptr( Bptr0, kb, 0, Bld, size ), &Bld ); Amp0 -= kb; Aptr0 = Mptr( Aptr0, kb, 0, Ald, size ); Bptr0 = Mptr( Bptr0, kb, 0, Bld, size ); } else { /* * Update the remaining rows of sub( B ) with block of rows of sub( B ) of * previous step */ if( Amp0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Amp0, &Bnq0, &kbprev, negone, Mptr( Aptr0, 0, -kbprev, Ald, size ), &Ald, Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha, Bptr0, &Bld ); /* * Receive the current block of rows of sub( B ) for next update */ brecv( ctxt, COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol ); } Acurrow = MModAdd1( Acurrow, nprow ); kbprev = kb; Cptr = Mptr( Cptr, kb, 0, Cld, size ); Aptr0 = Mptr( Aptr0, 0, kb, Ald, size ); talpha = one; k += 1; } } } else { Cld = MAX( 1, Bmp0 ); PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, -1, ctxt, Cld ); if( Bmp0 > 0 ) { Cptr = *C = PB_Cmalloc( Bmp0 * N * size ); *CFREE = 1; } else { *C = NULL; *CFREE = 0; return; } kblks = ( An > Ainb1 ? ( An - Ainb1 - 1 ) / Anb + 2 : 1 ); btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( upper ) { Acurcol = Acol; kb = Ainb1; /* * Solve first block of columns of sub( B ) and broadcast it horizontally to * update the rest of sub( B ) */ if( mycol == Acurcol ) { trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA, Aptr0, &Ald, Bptr0, &Bld ); bsend( ctxt, ROW, &btop, Bmp0, kb, Bptr0, Bld ); mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld ); Anq0 -= kb; Aptr0 = Mptr( Aptr0, 0, kb, Ald, size ); Bptr0 = Mptr( Bptr0, 0, kb, Bld, size ); } else { brecv( ctxt, ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol ); } Acurcol = MModAdd1( Acurcol, npcol ); kbprev = kb; k += 1; Cptr = Mptr( Cptr, 0, kb, Cld, size ); Aptr0 = Mptr( Aptr0, kb, 0, Ald, size ); /* * Lookahead */ while( k <= kblks ) { kb = ( k == kblks ? Alnb1 : Anb ); if( mycol == Acurcol ) { /* * Update the current block of columns of sub( B ) with block of columns of * sub( B ) of previous step */ gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &kb, &kbprev, negone, Mptr( Cptr, 0, -kbprev, Cld, size ), &Cld, Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald, talpha, Bptr0, &Bld ); /* * Solve the current block of columns of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Bmp0, &kb, one, Aptr0, &Ald, Bptr0, &Bld ); /* * Broadcast the current block of columns of sub( B ) for next update */ bsend( ctxt, ROW, &btop, Bmp0, kb, Bptr0, Bld ); mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld ); /* * Finish update of the remaining blocks of columns of sub( B ) with block of * columns of sub( B ) of previous step */ if( ( ktmp = Anq0 - kb ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &ktmp, &kbprev, negone, Mptr( Cptr, 0, -kbprev, Cld, size ), &Cld, Mptr( Aptr0, -kbprev, kb, Ald, size ), &Ald, talpha, Mptr( Bptr0, 0, kb, Bld, size ), &Bld ); Anq0 -= kb; Aptr0 = Mptr( Aptr0, 0, kb, Ald, size ); Bptr0 = Mptr( Bptr0, 0, kb, Bld, size ); } else { /* * Update the remaining columns of sub( B ) with block of columns of sub( B ) * of previous step */ if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Anq0, &kbprev, negone, Mptr( Cptr, 0, -kbprev, Cld, size ), &Cld, Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald, talpha, Bptr0, &Bld ); /* * Receive the current block of columns of sub( B ) for next update */ brecv( ctxt, ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol ); } Acurcol = MModAdd1( Acurcol, npcol ); kbprev = kb; Cptr = Mptr( Cptr, 0, kb, Cld, size ); Aptr0 = Mptr( Aptr0, kb, 0, Ald, size ); talpha = one; k += 1; } } else { Acurcol = PB_Cindxg2p( An-1, Ainb1, Anb, Acol, Acol, npcol ); kb = Alnb1; Bptr = Mptr( Bptr0, 0, Bnq0 - kb, Bld, size ); Cptr = Mptr( *C, 0, An - kb, Cld, size ); /* * Solve last block of columns of sub( B ) and broadcast it horizontally to * update the rest of sub( B ) */ if( mycol == Acurcol ) { trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA, Mptr( Aptr0, Amp0-kb, Anq0-kb, Ald, size ), &Ald, Bptr, &Bld ); bsend( ctxt, ROW, &btop, Bmp0, kb, Bptr, Bld ); mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero, Cptr, &Cld ); Anq0 -= kb; Bnq0 -= kb; } else { brecv( ctxt, ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol ); } Acurcol = MModSub1( Acurcol, npcol ); An -= ( kbprev = kb ); Amp0 -= kb; kblks -= 1; /* * Lookahead */ while( kblks > 0 ) { kb = ( kblks == 1 ? Ainb1 : Anb ); Aptr = Mptr( Aptr0, Amp0, 0, Ald, size ); Bptr = Mptr( Bptr0, 0, Bnq0 - kb, Bld, size ); Cptr = Mptr( *C, 0, An, Cld, size ); if( mycol == Acurcol ) { /* * Update the current block of columns of sub( B ) with block of columns of * sub( B ) of previous step */ gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &kb, &kbprev, negone, Cptr, &Cld, Mptr( Aptr, 0, Anq0-kb, Ald, size ), &Ald, talpha, Bptr, &Bld ); /* * Solve the current block of columns of sub( B ) */ trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ), C2F_CHAR( DIAG ), &Bmp0, &kb, one, Mptr( Aptr, -kb, Anq0-kb, Ald, size ), &Ald, Bptr, &Bld ); /* * Broadcast the current block of columns of sub( B ) for next update */ bsend( ctxt, ROW, &btop, Bmp0, kb, Bptr, Bld ); mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero, Mptr( Cptr, 0, -kb, Cld, size ), &Cld ); /* * Finish update of the remaining blocks of columns of sub( B ) with block of * columns of sub( B ) of previous step */ if( ( ktmp = Anq0 - kb ) > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &ktmp, &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0, &Bld ); Anq0 -= kb; Bnq0 -= kb; } else { /* * Update the remaining columns of sub( B ) with block of columns of sub( B ) * of previous step */ if( Anq0 > 0 ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Bmp0, &Anq0, &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0, &Bld ); /* * Receive the current block of columns of sub( B ) for next update */ brecv( ctxt, ROW, &btop, Bmp0, kb, Mptr( Cptr, 0, -kb, Cld, size ), Cld, myrow, Acurcol ); } Acurcol = MModSub1( Acurcol, npcol ); An -= ( kbprev = kb ); Amp0 -= kb; talpha = one; kblks -= 1; } } } /* * End of PB_CptrsmAB0 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpgemmAB.c0000640000175000017500000006377610363532303020007 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpgemmAB( PBTYP_T * TYPE, char * DIRECA, char * DIRECB, char * TRANSA, char * TRANSB, int M, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpgemmAB( TYPE, DIRECA, DIRECB, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * DIRECA, * DIRECB, * TRANSA, * TRANSB; int IA, IB, IC, JA, JB, JC, K, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_CpgemmAB performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * conjg(A(IA:IA+K-1,JA:JA+M-1)') if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * conjg(B(IB:IB+N-1,JB:JB+K-1)') if TRANSB = 'C'. * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * This is the outer-product algorithm using the logical LCM hybrid * algorithmic blocking technique. The submatrix operand sub( C ) stays * in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be looped over as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * DIRECB (global input) pointer to CHAR * On entry, DIRECB specifies the direction in which the rows * or columns of sub( B ) should be looped over as follows: * DIRECB = 'F' or 'f' forward or increasing, * DIRECB = 'B' or 'b' backward or decreasing. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * TRANSB (global input) pointer to CHAR * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * TRANSB = 'C' or 'c' op( sub( B ) ) = conjg( sub( B )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Aroc, Broc, TrA, TrB, * one, * tbeta, * zero; int ABrocs, Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D, Ainb1R, AisR, AkkR, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff, ArocD, ArocR, AsrcR, Bbufld, BcurrocR, Bfr, Bfwd, BiD, BiR, BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, BkkR, Bld, BmyprocD, BmyprocR, BnbD, BnbR, BnpD, BnpR, BnprocsD, BnprocsR, Boff, BrocD, BrocR, BsrcR, Ccol, Cii, Cimb1, Cinb1, Cjj, Cld, Cmb, Cmp, Cnb, Cnq, Crow, WAfr, WAsum, WBfr, WBsum, Wkbb=0, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxq, mycol, myrow, ncpq, nota, notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp; GEMM_T gemm; /* * .. Local Arrays .. */ PB_VM_T VM; int Cd0[DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WAd0[DLEN_], WBd0[DLEN_]; char * Abuf = NULL, * Bbuf = NULL, * Cptr = NULL, * WA = NULL, * WB = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); nota = ( ( TrA = Mupcase( TRANSA[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( TRANSB[0] ) ) == CNOTRAN ); TrA = ( ( TrA == CCOTRAN ) ? CCONJG : CNOCONJG ); TrB = ( ( TrB == CCOTRAN ) ? CCONJG : CNOCONJG ); size = TYPE->size; /* * Retrieve local information for sub( A ), sub( B ) and sub( C ) */ if( nota ) { AiR = JA; Aroc = CCOLUMN; AnprocsR = npcol; AinbR = DESCA[INB_]; AnbR = DESCA[NB_ ]; AsrcR = DESCA[CSRC_]; } else { AiR = IA; Aroc = CROW; AnprocsR = nprow; AinbR = DESCA[IMB_]; AnbR = DESCA[MB_ ]; AsrcR = DESCA[RSRC_]; } if( notb ) { BiR = IB; Broc = CROW; BnprocsR = nprow; BinbR = DESCB[IMB_]; BnbR = DESCB[MB_ ]; BsrcR = DESCB[RSRC_]; } else { BiR = JB; Broc = CCOLUMN; BnprocsR = npcol; BinbR = DESCB[INB_]; BnbR = DESCB[NB_ ]; BsrcR = DESCB[CSRC_]; } /* * Retrieve sub( C )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cdescribe( M, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 ); Cmp = PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol ); /* * When sub( A ) and sub( B ) do not span more than one process row or column, * there is no need to pack the data. */ if( !( PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) && !( PB_Cspan( K, BiR, BinbR, BnbR, BsrcR, BnprocsR ) ) ) { PB_CInV( TYPE, &TrA, COLUMN, M, N, Cd0, K, A, IA, JA, DESCA, &Aroc, &WA, WAd0, &WAfr ); PB_CInV( TYPE, &TrB, ROW, M, N, Cd0, K, B, IB, JB, DESCB, &Broc, &WB, WBd0, &WBfr ); if( ( Cmp > 0 ) && ( Cnq > 0 ) ) { /* * Perform the local update if I own some of sub( C ) */ TYPE->Fgemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp, &Cnq, &K, ALPHA, WA, &WAd0[LLD_], WB, &WBd0[LLD_], BETA, Mptr( C, Cii, Cjj, Cld, size ), &Cld ); } if( WAfr ) free( WA ); if( WBfr ) free( WB ); return; } /* * sub( A ) and sub( B ) span more than one process row or column. */ Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD ); one = TYPE->one; zero = TYPE->zero; tbeta = BETA; gemm = TYPE->Fgemm; kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ) and sub( B ) */ if( nota ) { AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow; PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR, &AiiD, &AiiR, &ArocD, &ArocR ); } else { AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol; PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD, &AiiR, &AiiD, &ArocR, &ArocD ); } Ainb1D = PB_Cfirstnb( M, AiD, AinbD, AnbD ); AnpD = PB_Cnumroc( M, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD ); Ainb1R = PB_Cfirstnb( K, AiR, AinbR, AnbR ); AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ); if( notb ) { BiD = JB; BinbD = DESCB[INB_]; BnbD = DESCB[NB_]; Bld = DESCB[LLD_]; BmyprocD = mycol; BmyprocR = myrow; BnprocsD = npcol; PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); } else { BiD = IB; BinbD = DESCB[IMB_]; BnbD = DESCB[MB_]; Bld = DESCB[LLD_]; BmyprocD = myrow; BmyprocR = mycol; BnprocsD = nprow; PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); } Binb1D = PB_Cfirstnb( N, BiD, BinbD, BnbD ); BnpD = PB_Cnumroc( N, 0, Binb1D, BnbD, BmyprocD, BrocD, BnprocsD ); Binb1R = PB_Cfirstnb( K, BiR, BinbR, BnbR ); BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ); /* * When sub( A ) is not replicated and backward pass on sub( A ), find the * virtual process q owning the last row or column of sub( A ). */ if( !( AisR ) && !( Afwd ) ) { tmp = PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR ); q = MModSub( tmp, ArocR, AnprocsR ); } /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process p owning the last row or column of sub( B ). */ if( !( BisR ) && !( Bfwd ) ) { tmp = PB_Cindxg2p( K - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR ); p = MModSub( tmp, BrocR, BnprocsR ); } if( Cmp > 0 && Cnq > 0 ) Cptr = Mptr( C, Cii, Cjj, Cld, size ); /* * Allocate work space in process rows and columns spanned by sub( C ) */ PB_COutV( TYPE, COLUMN, NOINIT, M, N, Cd0, kb, &WA, WAd0, &WAfr, &WAsum ); PB_COutV( TYPE, ROW, NOINIT, M, N, Cd0, kb, &WB, WBd0, &WBfr, &WBsum ); /* * Loop over the virtual process grid induced by the sub( A ) and sub( B ) */ lcmb = PB_Clcm( ( maxp = ( BisR ? 1 : BnprocsR ) ) * BnbR, ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR ); maxpm1 = maxp - 1; /* * Find out process coordinates corresponding to first virtual process (p,q) */ AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, AnprocsR ) ); AkkR = PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, AnprocsR ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) ); BkkR = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR ); BnpR = PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR ); /* * Find out how many diagonals this virtual process (p,q) has */ PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q, maxp, maxq, lcmb ); npq = PB_CVMnpq( &VM ); for( k = 0; k < K; k += kb ) { kbb = K - k; kbb = MIN( kbb, kb ); while( Wkbb != kbb ) { /* * Ensure that the current virtual process (p,q) has something to contribute * to the replicated buffers WA and WB. */ while( npq == 0 ) { if( ( Bfwd && ( p == maxpm1 ) ) || ( !( Bfwd ) && ( p == 0 ) ) ) q = ( Afwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( Bfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, AnprocsR ) ); AkkR = PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, AnprocsR ); AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR ); BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) ); BkkR = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR ); BnpR = PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR ); PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q, maxp, maxq, lcmb ); npq = PB_CVMnpq( &VM ); } /* * Current virtual process (p,q) has something, find out how many rows or * columns could be used: ABrocs. */ if( Wkbb == 0 ) { ABrocs = ( npq < kbb ? npq : kbb ); } else { ABrocs = kbb - Wkbb; ABrocs = MIN( ABrocs, npq ); } /* * Find out how many rows or columns of sub( A ) and sub( B ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Boff, &Aoff ); if( nota ) { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * columns of sub( A ). */ if( ( Afr = ( ncpq < ABrocs ) ) != 0 ) { /* * If columns of sub( A ) are not contiguous, then allocate the buffer and * pack the ABrocs columns of sub( A ). */ Abufld = MAX( 1, AnpD ); if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, ABrocs, AnpD, one, Mptr( A, AiiD, AkkR, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, AiiD, AkkR + Aoff, Ald, size ); } PB_Cdescset( DBUFA, M, ABrocs, Ainb1D, ABrocs, AnbD, ABrocs, ArocD, AcurrocR, ctxt, Abufld ); } else { /* * Compute the descriptor DBUFA for the buffer that will contained the packed * rows of sub( A ). */ if( ( Afr = ( ncpq < ABrocs ) ) != 0 ) { /* * If rows of sub( A ) are not contiguous, then allocate the buffer and * pack the ABrocs rows of sub( A ). */ Abufld = ABrocs; if( AisR || ( AmyprocR == AcurrocR ) ) { Abuf = PB_Cmalloc( AnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, ABrocs, AnpD, one, Mptr( A, AkkR, AiiD, Ald, size ), Ald, zero, Abuf, Abufld ); } } else { /* * Otherwise, re-use sub( A ) directly. */ Abufld = Ald; if( AisR || ( AmyprocR == AcurrocR ) ) Abuf = Mptr( A, AkkR + Aoff, AiiD, Ald, size ); } PB_Cdescset( DBUFA, ABrocs, M, ABrocs, Ainb1D, ABrocs, AnbD, AcurrocR, ArocD, ctxt, Abufld ); } if( notb ) { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * rows of sub( B ). */ if( ( Bfr = ( nrpq < ABrocs ) ) != 0 ) { /* * If rows of sub( B ) are not contiguous, then allocate the buffer and * pack the ABrocs rows of sub( B ). */ Bbufld = ABrocs; if( BisR || ( BmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, ROW, &Broc, PACKING, NOTRAN, ABrocs, BnpD, one, Mptr( B, BkkR, BiiD, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BkkR + Boff, BiiD, Bld, size ); } PB_Cdescset( DBUFB, ABrocs, N, ABrocs, Binb1D, ABrocs, BnbD, BcurrocR, BrocD, ctxt, Bbufld ); } else { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * columns of sub( B ). */ if( ( Bfr = ( nrpq < ABrocs ) ) != 0 ) { /* * If columns of sub( B ) are not contiguous, then allocate the buffer and * pack the ABrocs columns of sub( B ). */ Bbufld = MAX( 1, BnpD ); if( BisR || ( BmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * ABrocs * size ); PB_CVMpack( TYPE, &VM, ROW, &Broc, PACKING, NOTRAN, ABrocs, BnpD, one, Mptr( B, BiiD, BkkR, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BiiD, BkkR + Boff, Bld, size ); } PB_Cdescset( DBUFB, N, ABrocs, Binb1D, ABrocs, BnbD, ABrocs, BrocD, BcurrocR, ctxt, Bbufld ); } /* * Update the local indexes of sub( A ) and sub( B ) */ PB_CVMupdate( &VM, ABrocs, &BkkR, &AkkR ); /* * Replicate panels of rows or columns of sub( A ) and sub( B ) over sub( C ) * -> WA, WB */ PB_CInV2( TYPE, &TrA, COLUMN, M, N, Cd0, ABrocs, Abuf, 0, 0, DBUFA, &Aroc, WA, Wkbb, WAd0 ); PB_CInV2( TYPE, &TrB, ROW, M, N, Cd0, ABrocs, Bbuf, 0, 0, DBUFB, &Broc, WB, Wkbb, WBd0 ); if( Afr & ( AisR || ( AmyprocR == AcurrocR ) ) ) if( Abuf ) free( Abuf ); if( Bfr & ( BisR || ( BmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); /* * ABrocs rows or columns of sub( A ) and sub( B ) have been replicated, * update the number of diagonals in this virtual process as well as the * number of rows or columns of sub( A ) and sub( B ) that are in WA, WB. */ npq -= ABrocs; Wkbb += ABrocs; } /* * Perform local update */ if( Cmp > 0 && Cnq > 0 ) { gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp, &Cnq, &kbb, ALPHA, WA, &WAd0[LLD_], WB, &WBd0[LLD_], tbeta, Cptr, &Cld ); tbeta = one; } Wkbb = 0; } if( WAfr ) free( WA ); if( WBfr ) free( WB ); /* * End of PB_CpgemmAB */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctop.c0000640000175000017500000000766410363532303017273 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ char * PB_Ctop( int * ICTXT, char * OP, char * SCOPE, char * TOP ) #else char * PB_Ctop( ICTXT, OP, SCOPE, TOP ) /* * .. Scalar Arguments .. */ int * ICTXT; /* * .. Array Arguments .. */ char * OP, * SCOPE, * TOP; #endif { /* * Purpose * ======= * * PB_Ctop returns or initializes the row-, column- or all- broadcast * or combine topologies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * OP (global input) pointer to CHAR * On input, OP specifies the BLACS operation defined as fol- * lows: * OP = 'B' or 'b', BLACS broadcast operation, * OP = 'C' or 'c', BLACS combine operation. * * SCOPE (global input) pointer to CHAR * On entry, SCOPE specifies the scope of the BLACS operation as * follows: * SCOPE = 'R' or 'r', rowwise broadcast or combine, * SCOPE = 'C' or 'c', column broadcast or combine, * SCOPE = 'A' or 'a', all broadcast or combine. * * TOP (global input) pointer to CHAR * On entry, TOP is a character string specifying the BLACS to- * pology to be used i.e. to be set for the given operation spe- * cified by OP and SCOPE. If TOP = TOP_GET, the routine instead * returns the current topology in use for the given operation * specified by OP and SCOPE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static char rbtop = CTOP_DEFAULT; static char cbtop = CTOP_DEFAULT; static char abtop = CTOP_DEFAULT; static char rctop = CTOP_DEFAULT; static char cctop = CTOP_DEFAULT; static char actop = CTOP_DEFAULT; /* .. * .. Executable Statements .. * */ /* * This BLACS topology information should be cached within a BLACS context. * This will be corrected in the near future. Sorry. */ if( *OP == CBCAST ) { /* * BLACS broadcast operations */ if( *TOP == CTOP_GET ) { /* * retrieve the current topology in SCOPE */ if( *SCOPE == CROW ) { return( &rbtop ); } else if( *SCOPE == CCOLUMN ) { return( &cbtop ); } else { return( &abtop ); } } else { /* * set the topology to be used from now on in SCOPE */ if( *SCOPE == CROW ) { rbtop = *TOP; return( &rbtop ); } else if( *SCOPE == CCOLUMN ) { cbtop = *TOP; return( &cbtop ); } else { abtop = *TOP; return( &abtop ); } } } else { /* * BLACS combine operations */ if( *TOP == CTOP_GET ) { /* * retrieve the current topology in SCOPE */ if( *SCOPE == CROW ) { return( &rctop ); } else if( *SCOPE == CCOLUMN ) { return( &cctop ); } else { return( &actop ); } } else { /* * set the topology to be used from now on in SCOPE */ if( *SCOPE == CROW ) { rctop = *TOP; return( &rctop ); } else if( *SCOPE == CCOLUMN ) { cctop = *TOP; return( &cctop ); } else { actop = *TOP; return( &actop ); } } } /* * End of PB_Ctop */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cnumroc.c0000640000175000017500000001563210363532303017766 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cnumroc( int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS ) #else int PB_Cnumroc( N, I, INB, NB, PROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int I, INB, N, NB, NPROCS, PROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cnumroc returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global in- * dex I. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, mydist, nblocks; /* .. * .. Executable Statements .. * */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * The data is not distributed, or there is just one process in this dimension * of the grid. */ return( N ); /* * Compute coordinate of process owning I and corresponding INB */ if( ( INB -= I ) <= 0 ) { /* * I is not in the first block, find out which process has it and update the * size of first block */ nblocks = (-INB) / NB + 1; SRCPROC += nblocks; SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS; INB += nblocks * NB; } /* * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. The discussion * goes as follows: compute my distance from the source process so that within * this process coordinate system, the source process is the process such that * mydist = 0, or equivalently PROC == SRCPROC. * * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. Then remark that * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full * and I own it, or the last block is full and I am the first process owning * only ilocblk full blocks. */ if( PROC == SRCPROC ) { /* * I am the source process, i.e. I own I (mydist = 0). When N <= INB, the * answer is simply N. */ if( N <= INB ) return( N ); /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. */ nblocks = ( N - INB ) / NB + 1; /* * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only two * possible cases: * * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS divides * the global number of full blocks, then the source process SRCPROC owns * one more block than the other processes; and N can be rewritten as * N = INB + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block. * Similarly, the local value Np corresponding to N can be written as * Np = INB + (ilocblk-1) * NB + LNB = N + ( ilocblk-1 - (nblocks-1) )*NB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only owns * full blocks, and therefore Np = INB + ilocblk * NB. Note that when * ilocblk is zero, Np is just INB. */ if( nblocks < NPROCS ) return( INB ); ilocblk = nblocks / NPROCS; return( ( nblocks - ilocblk * NPROCS ) ? INB + ilocblk * NB : N + ( ilocblk - nblocks ) * NB ); } else { /* * I am not the source process. When N <= INB, the answer is simply 0. */ if( N <= INB ) return( 0 ); /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries */ nblocks = ( N - INB ) / NB + 1; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS; /* * When mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks of * size NB since I am not the source process, * * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks of * size NB since I am not the source process, * * when mydist = nblocks - ilocblk * NPROCS, * either the last block is not full and I own it, in which case * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last block * such that NB > LNB > 0; the local value Np corresponding to N is given * by Np = ilocblk * NB + LNB = N - INB + ( ilocblk - nblocks + 1 ) * NB; * or the last block is full and I am the first process owning only ilocblk * full blocks of size NB, that is N = INB + ( nblocks - 1 ) * NB and * Np = ilocblk * NB = N - INB + ( ilocblk - nblocks + 1 ) * NB. */ if( nblocks < NPROCS ) return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : N - INB + NB * ( 1 - nblocks ) ) ); ilocblk = nblocks / NPROCS; mydist -= nblocks - ilocblk * NPROCS; return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : ( ( mydist > 0 ) ? ilocblk * NB : N - INB + NB * ( ilocblk - nblocks + 1 ) ) ); } /* * End of PB_Cnumroc */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpswapND.c0000640000175000017500000010226710363532303020040 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpswapND( PBTYP_T * TYPE, int N, char * X, int IX, int JX, int * DESCX, int INCX, char * Y, int IY, int JY, int * DESCY, int INCY ) #else void PB_CpswapND( TYPE, N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int INCX, INCY, IX, IY, JX, JY, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCX, * DESCY; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CpswapND swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * sub( X ) is assumed to be not distributed, and sub( Y ) is assumed to * be distributed. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top, * zero; int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, Xm, XmyprocD, XmyprocR, Xn, XnprocsD, XnprocsR, XprocR, Xroc, Xrow, Ycol, Yii, Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 ) { Xld = DESCX[LLD_]; Xlinc = Xld; XmyprocD = mycol; XnprocsD = npcol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); } else { Xld = DESCX[LLD_]; Xlinc = 1; XmyprocD = myrow; XnprocsD = nprow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 ) { YnbD = DESCY[NB_]; Yld = DESCY[LLD_]; Ylinc = Yld; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol; Yinb1D = PB_Cfirstnb( N, JY, DESCY[INB_], YnbD ); } else { YnbD = DESCY[MB_]; Yld = DESCY[LLD_]; Ylinc = 1; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow; Yinb1D = PB_Cfirstnb( N, IY, DESCY[IMB_], YnbD ); } YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * sub( X ) is not distributed and sub( Y ) is distributed */ size = TYPE->size; zero = TYPE->zero; if( !( XisR ) ) { /* * sub( X ) is not replicated. Since this operation is local if sub( X ) and * sub( Y ) are both row or column vectors, choose YprocR = XprocR when RRorCC, * and YprocR = 0 otherwise. */ if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); } /* * Now, it is just like sub( Y ) is not replicated, this information however is * kept in YisR for later use. */ if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( RRorCC ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); /* * sub( X ) and sub( Y ) are in the same process row or column */ if( XprocR == YprocR ) { /* * In a given process, sub( Y ) is swapped with the corresponding non distribu- * ted part of sub( X ). In the other processes, this part of sub( X ) is set * to zero for later reduction. */ if( YnpD > 0 ) { Yroc = YprocD; if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; } else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { TYPE->Fswap( &Yinb1D, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } else { TYPE->Fset( &Yinb1D, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); kk += kbb; } else { if( XisRow ) TYPE->Fset( &kbb, zero, Mptr( X, Xii, k, Xld, size ), &Xlinc ); else TYPE->Fset( &kbb, zero, Mptr( X, k, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); } } else { /* * If I don't own any entries of sub( Y ), then zero the entire sub( X ) * residing in this process. */ TYPE->Fset( &N, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } /* * Replicate locally scattered sub( X ) by reducing it */ if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } } else { /* * sub( X ) and sub( Y ) are in a different process row or column */ if( YmyprocR == YprocR ) { /* * If I own a piece of sub( Y ), then send it to the process row or column where * sub( X ) resides and receive back the sub( X ) data from the same process. */ if( YnpD > 0 ) { if( YisRow ) { TYPE->Cgesd2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); TYPE->Cgerv2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); } else { TYPE->Cgesd2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); TYPE->Cgerv2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); } } } if( XmyprocR == XprocR ) { /* * If I own a sub( X ), then receive the distributed part of sub( Y ) owned by * the process where sub( Y ) resides in my row or column. Perform a local swap * as if sub( Y ) would reside in the same process row or column as sub( X ). * Send the result back and finally perform the reduction to replicate sub( X ). */ if( YnpD > 0 ) { buf = PB_Cmalloc( YnpD * size ); if( YisRow ) TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, XmyprocD ); else TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD, YprocR ); Yroc = YprocD; kk = 0; if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { TYPE->Fswap( &Yinb1D, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); kk += Yinb1D; } else { TYPE->Fset( &Yinb1D, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, buf+kk*size, &ione ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, buf+kk*size, &ione ); kk += kbb; } else { if( XisRow ) TYPE->Fset( &kbb, zero, Mptr( X, Xii, k, Xld, size ), &Xlinc ); else TYPE->Fset( &kbb, zero, Mptr( X, k, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); } if( YisRow ) TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1, YprocR, XmyprocD ); else TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD, YprocR ); if( buf ) free( buf ); } else { TYPE->Fset( &N, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } /* * Replicate locally scattered sub( X ) by reducing it */ if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } } } } else { /* * sub( X ) and sub( Y ) are not both row or column vectors */ Xroc = 0; if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } /* * Loop over the processes in which sub( Y ) resides, for each process find the * next process Xroc and swap the data. After this, it will be needed to reduce * sub( X ) as above. */ for( p = 0; p < YnprocsD; p++ ) { mydist = MModSub( p, YprocD, YnprocsD ); myproc = MModAdd( YprocD, mydist, YnprocsD ); if( ( XprocR == p ) && ( YprocR == Xroc ) ) { /* * Swap locally at the intersection of the process cross */ if( XmyprocR == p ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { Yroc = YprocD; kk = ( XisRow ? Yii : Yjj ); if( myproc == Yroc ) { if( XmyprocD == Xroc ) { TYPE->Fswap( &Yinb1D, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } else { TYPE->Fset( &Yinb1D, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( myproc == Yroc ) { if( XmyprocD == Xroc ) { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); kk += kbb; } else { if( XisRow ) TYPE->Fset( &kbb, zero, Mptr( X, Xii, k, Xld, size ), &Xlinc ); else TYPE->Fset( &kbb, zero, Mptr( X, k, Xjj, Xld, size ), &Xlinc ); } } Yroc = MModAdd1( Yroc, YnprocsD ); } } } } else { /* * Message exchange */ if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { if( XisRow ) { TYPE->Cgesd2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, Xroc ); TYPE->Cgerv2d( ctxt, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, Xroc ); } else { TYPE->Cgesd2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, Xroc, XprocR ); TYPE->Cgerv2d( ctxt, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, Xroc, XprocR ); } } } if( XmyprocR == XprocR ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD, YnprocsD ); if( YnpD > 0 ) { Yroc = YprocD; kk = 0; /* * Receive the piece of sub( Y ) that I should handle */ if( XmyprocD == Xroc ) { buf = PB_Cmalloc( YnpD * size ); if( XisRow ) TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, p, YprocR ); else TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, p ); } if( myproc == Yroc ) { if( XmyprocD == Xroc ) { TYPE->Fswap( &Yinb1D, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); kk += Yinb1D; } else { TYPE->Fset( &Yinb1D, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( myproc == Yroc ) { if( XmyprocD == Xroc ) { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, buf+kk*size, &ione ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, buf+kk*size, &ione ); kk += kbb; } else { if( XisRow ) TYPE->Fset( &kbb, zero, Mptr( X, Xii, k, Xld, size ), &Xlinc ); else TYPE->Fset( &kbb, zero, Mptr( X, k, Xjj, Xld, size ), &Xlinc ); } } Yroc = MModAdd1( Yroc, YnprocsD ); } if( XmyprocD == Xroc ) { if( XisRow ) TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD, p, YprocR ); else TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1, YprocR, p ); if( buf ) free( buf ); } } } } Xroc = MModAdd1( Xroc, XnprocsD ); } /* * Replicate locally scattered sub( X ) by reducing it */ if( XmyprocR == XprocR ) { if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); TYPE->Cgsum2d( ctxt, ROW, top, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); TYPE->Cgsum2d( ctxt, COLUMN, top, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } } } } if( YisR ) { /* * Replicate sub( Y ) */ YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, 1, YnpD, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, ROW, top, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld ); else TYPE->Cgebr2d( ctxt, ROW, top, YnpD, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, YprocR ); } } } } else { /* * sub( X ) is replicated in every process. Swap the data in process row or * column YprocR when sub( Y ) is not replicated and in every process otherwise. */ if( YisR || ( YmyprocR == YprocR ) ) { YnpD = PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { Yroc = YprocD; kk = ( YisRow ? Yjj : Yii ); if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; } else { ktmp = IX + N; kn = IX + Yinb1D; } if( YmyprocD == Yroc ) { TYPE->Fswap( &Yinb1D, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); kk += Yinb1D; } else { TYPE->Fset( &Yinb1D, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); for( k = kn; k < ktmp; k += YnbD ) { kbb = ktmp - k; kbb = MIN( kbb, YnbD ); if( YmyprocD == Yroc ) { if( YisRow ) { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, kk, Yld, size ), &Ylinc ); } else { if( XisRow ) TYPE->Fswap( &kbb, Mptr( X, Xii, k, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); else TYPE->Fswap( &kbb, Mptr( X, k, Xjj, Xld, size ), &Xlinc, Mptr( Y, kk, Yjj, Yld, size ), &Ylinc ); } kk += kbb; } else { if( XisRow ) TYPE->Fset( &kbb, zero, Mptr( X, Xii, k, Xld, size ), &Xlinc ); else TYPE->Fset( &kbb, zero, Mptr( X, k, Xjj, Xld, size ), &Xlinc ); } Yroc = MModAdd1( Yroc, YnprocsD ); } } else { /* * If I don't own any of sub( Y ), then just zero sub( X ) */ TYPE->Fset( &N, zero, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc ); } /* * Replicate locally scattered sub( X ) by reducing it in the process scope of * sub( Y ) */ scope = ( YisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); if( XisRow ) TYPE->Cgsum2d( ctxt, &scope, top, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); else TYPE->Cgsum2d( ctxt, &scope, top, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, -1, 0 ); } if( !YisR ) { /* * If sub( Y ) is not replicated, then broadcast the result to the other pro- * cesses that own a piece of sub( X ), but were not involved in the above swap * operation. */ if( XisRow ) { Xm = 1; Xn = N; } else { Xm = N; Xn = 1; } if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, Xm, Xn, Mptr( X, Xii, Xjj, Xld, size ), Xld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, Xm, Xn, Mptr( X, Xii, Xjj, Xld, size ), Xld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, ROW, top, Xm, Xn, Mptr( X, Xii, Xjj, Xld, size ), Xld ); else TYPE->Cgebr2d( ctxt, ROW, top, Xm, Xn, Mptr( X, Xii, Xjj, Xld, size ), Xld, YmyprocD, YprocR ); } } } /* * End of PB_CpswapND */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CInV2.c0000640000175000017500000004474710363532303017252 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CInV2( PBTYP_T * TYPE, char * CONJUG, char * ROWCOL, int M, int N, int * DESCA, int K, char * X, int IX, int JX, int * DESCX, char * XROC, char * XAPTR, int IJXA, int * DXA ) #else void PB_CInV2( TYPE, CONJUG, ROWCOL, M, N, DESCA, K, X, IX, JX, DESCX, XROC, XAPTR, IJXA, DXA ) /* * .. Scalar Arguments .. */ char * CONJUG, * ROWCOL, * XROC; int IJXA, IX, JX, K, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DXA; char * X, * XAPTR; #endif { /* * Purpose * ======= * * PB_CInV2 adds data to an array that contains a one-dimensional input * only subvector which is replicated over the rows or columns of a sub- * matrix described by DESCA. A subvector is specified on input to this * routine that is added to the replicated buffer. This routine is spe- * cifically designed for LCM hybrid variants. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies if this routine should conjugate * the subvector as follows: * = 'N' or 'n': The initial subvector is copied, * = 'Z' or 'z': The conjugate subvector is copied. * * ROWCOL (global input) pointer to CHAR * On entry, ROWCOL specifies if the existing buffer pointed to * XAPTR is a row or column subvector replicated over the under- * lying submatrix as follows: * = 'R' or 'r': XAPTR is a row subvector, * = 'C' or 'c': XAPTR is a column subvector. * * M (global input) INTEGER * On entry, M specifies the number of rows of the underlying * submatrix described by DESCA. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the underlying * submatrix described by DESCA. N must be at least zero. * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * K (global input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension of the subvector sub( X ). K must be at least zero. * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( K, IX ) ) when XROC is 'R' or 'r' * and MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( K, JX ) otherwise. * Lx is N when ROWCOL = 'R' or 'r' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * XROC (global input) pointer to CHAR * On entry, XROC specifies the orientation of the subvector * sub( X ). When XROC is 'R' or 'r', sub( X ) is a row vector, * and a column vector otherwise. * * XAPTR (local input/local output) pointer to CHAR * On entry, XAPTR is an array containing some initial data. On * exit, the subvector sub( X ) is copied into this array which * is replicated over the rows or columns of the underlying ma- * trix as specified by ROWCOL and DESCA. * * IJXA (global input) INTEGER * On entry, IJXA specifies XA global row or column index depen- * ding on ROWCOL in the array pointed to by XAPTR, where the * subvector sub( X ) should copied. * * DXA (global and local input) INTEGER array * On entry, DXA is a descriptor array of dimension DLEN_ des- * cribing the data layout of the data pointed to by XAPTR. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Xptr = NULL, * top; int AColSpan, ARowSpan, Acol, Aimb, Ainb, AisD, Amb, Amp, Anb, Anq, Arow, XAld, Xcol, Xii, Ximb1, Xinb1, XisD, XisR, XisRow, Xjj, Xld=1, Xmb, Xnb, Xrow, ctxt, mycol, myrow, npcol, nprow, size; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Ycol */ Minfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, Xii, Xjj, Xrow, Xcol ); /* * Is sub( X ) distributed or not, replicated or not ? */ if( ( XisRow = ( Mupcase( XROC[0] ) == CROW ) ) != 0 ) { XisD = ( ( Xcol >= 0 ) && ( npcol > 1 ) ); XisR = ( ( Xrow == -1 ) || ( nprow == 1 ) ); } else { XisD = ( ( Xrow >= 0 ) && ( nprow > 1 ) ); XisR = ( ( Xcol == -1 ) || ( npcol == 1 ) ); } Arow = DESCA[ RSRC_ ]; Acol = DESCA[ CSRC_ ]; if( Mupcase( ROWCOL[0] ) == CROW ) { /* * Want a row vector. It is possible to reuse sub( X ) iff sub( X ) is already * a row vector and the data does not need to be conjugated. */ if( XisRow && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) ); Ainb = DESCA[INB_]; Anb = DESCA[NB_]; Xnb = DESCX[NB_]; Mfirstnb( Xinb1, N, JX, DESCX[INB_], Xnb ); /* * sub( X ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * column and either N is smaller than the first blocksize of sub( X ) and A, * or their column blocking factors match. */ if( ( !AisD && !XisD ) || ( ( AisD && XisD ) && ( ( Acol == Xcol ) && ( ( ( Ainb >= N ) && ( Xinb1 >= N ) ) || ( ( Ainb == Xinb1 ) && ( Anb == Xnb ) ) ) ) ) ) { /* * sub( X ) is aligned with A. Does A spans multiples process rows ? It does * if Arow < 0. */ ARowSpan = ( Arow < 0 ) || Mspan( M, 0, DESCA[IMB_], DESCA[MB_], Arow, nprow ); Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol ); if( XisR || ( !ARowSpan && ( Arow == Xrow ) ) ) { /* * If sub( X ) is replicated, or, A spans only one process row and either * sub( X ) is replicated or resides in the same process row than A, then * sub( X ) is already at the correct place. */ if( ( Anq > 0 ) && ( ARowSpan || ( myrow == Arow ) ) ) { size = TYPE->size; Xld = DESCX[ LLD_ ]; XAld = DXA[LLD_]; TYPE->Fmmadd( &K, &Anq, TYPE->one, Mptr( X, Xii, Xjj, Xld, size ), &Xld, TYPE->zero, Mptr( XAPTR, IJXA, 0, XAld, size ), &XAld ); } } else if( ARowSpan ) { /* * Otherwise, we know that sub( X ) cannot be replicated, let suppose in * addition that A spans all process rows. sub( X ) need simply to be broadcast * over A. */ if( myrow == Xrow ) { if( Anq > 0 ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); size = TYPE->size; Xld = DESCX[LLD_]; XAld = DXA[LLD_]; Xptr = Mptr( XAPTR, IJXA, 0, XAld, size ); TYPE->Fmmadd( &K, &Anq, TYPE->one, Mptr( X, Xii, Xjj, Xld, size ), &Xld, TYPE->zero, Xptr, &XAld ); TYPE->Cgebs2d( ctxt, COLUMN, top, K, Anq, Xptr, XAld ); } } else { if( Anq > 0 ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); XAld = DXA[LLD_]; TYPE->Cgebr2d( ctxt, COLUMN, top, K, Anq, Mptr( XAPTR, IJXA, 0, XAld, TYPE->size ), XAld, Xrow, mycol ); } } } else { /* * Finally, sub( X ) is not replicated and A spans only one process row. There * is no need to broadcast, a send/recv is sufficient. */ if( myrow == Xrow ) { if( Anq > 0 ) { Xld = DESCX[LLD_]; TYPE->Cgesd2d( ctxt, K, Anq, Mptr( X, Xii, Xjj, Xld, TYPE->size ), Xld, Arow, mycol ); } } else if( myrow == Arow ) { if( Anq > 0 ) { XAld = DXA[LLD_]; TYPE->Cgerv2d( ctxt, K, Anq, Mptr( XAPTR, IJXA, 0, XAld, TYPE->size ), XAld, Xrow, mycol ); } } } return; } } /* * sub( X ) cannot be reused, too bad ... redistribute */ if( XisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, N, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, XAPTR, IJXA, 0, DXA, ROW ); } else { PB_Cpaxpby( TYPE, CONJUG, N, K, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, XAPTR, IJXA, 0, DXA, ROW ); } } else { /* * Want a column vector. It is possible to reuse sub( X ) iff sub( X ) is * already a column vector and the data does not need to be conjugated */ if( !( XisRow ) && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) ); Aimb = DESCA[IMB_]; Amb = DESCA[MB_]; Xmb = DESCX[MB_]; Mfirstnb( Ximb1, M, IX, DESCX[IMB_], Xmb ); /* * sub( X ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * row and either M is smaller than the first blocksize of sub( X ) and A, or * their row blocking factors match. */ if( ( !AisD && !XisD ) || ( ( AisD && XisD ) && ( ( Arow == Xrow ) && ( ( ( Aimb >= M ) && ( Ximb1 >= M ) ) || ( ( Aimb == Ximb1 ) && ( Amb == Xmb ) ) ) ) ) ) { /* * sub( X ) is aligned with A. Does A spans multiples process columns ? It * does if Acol < 0. */ AColSpan = ( Acol < 0 ) || Mspan( N, 0, DESCA[INB_], DESCA[NB_], Acol, npcol ); Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow ); if( XisR || ( !AColSpan && ( Acol == Xcol ) ) ) { /* * If sub( X ) is replicated, or, A spans only one process column and either * sub( X ) is replicated or resides in the same process columns than A, then * sub( X ) is already at the correct place. */ if( ( Amp > 0 ) && ( AColSpan || ( mycol == Acol ) ) ) { size = TYPE->size; Xld = DESCX[ LLD_ ]; XAld = DXA[LLD_]; TYPE->Fmmadd( &Amp, &K, TYPE->one, Mptr( X, Xii, Xjj, Xld, size ), &Xld, TYPE->zero, Mptr( XAPTR, 0, IJXA, XAld, size ), &XAld ); } } else if( AColSpan ) { /* * Otherwise, we know that sub( X ) is not be replicated, let suppose in * addition that A spans all process columns. sub( X ) need simply to be * broadcast over A. */ if( mycol == Xcol ) { if( Amp > 0 ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); size = TYPE->size; Xld = DESCX[LLD_]; XAld = DXA[LLD_]; Xptr = Mptr( XAPTR, 0, IJXA, XAld, size ); TYPE->Fmmadd( &Amp, &K, TYPE->one, Mptr( X, Xii, Xjj, Xld, size ), &Xld, TYPE->zero, Xptr, &XAld ); TYPE->Cgebs2d( ctxt, ROW, top, Amp, K, Xptr, XAld ); } } else { if( Amp > 0 ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); XAld = DXA[LLD_]; TYPE->Cgebr2d( ctxt, ROW, top, Amp, K, Mptr( XAPTR, 0, IJXA, XAld, TYPE->size ), XAld, myrow, Xcol ); } } } else { /* * Finally, sub( X ) is not replicated and A spans only one process column. * There is no need to broadcast, a send/recv is sufficient. */ if( mycol == Xcol ) { if( Amp > 0 ) { Xld = DESCX[LLD_]; TYPE->Cgesd2d( ctxt, Amp, K, Mptr( X, Xii, Xjj, Xld, TYPE->size ), Xld, myrow, Acol ); } } else if( mycol == Acol ) { if( Amp > 0 ) { XAld = DXA[LLD_]; TYPE->Cgerv2d( ctxt, Amp, K, Mptr( XAPTR, 0, IJXA, XAld, TYPE->size ), XAld, myrow, Xcol ); } } } return; } } /* * sub( X ) cannot be reused, too bad ... redistribute */ if( XisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, M, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, XAPTR, 0, IJXA, DXA, COLUMN ); } else { PB_Cpaxpby( TYPE, CONJUG, M, K, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, XAPTR, 0, IJXA, DXA, COLUMN ); } } /* * End of PB_CInV2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cchkmat.c0000640000175000017500000002752310363532303017734 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cchkmat( int ICTXT, char * ROUT, char * MNAME, int M, int MPOS0, int N, int NPOS0, int IA, int JA, int * DESCA, int DPOS0, int * INFO ) #else void PB_Cchkmat( ICTXT, ROUT, MNAME, M, MPOS0, N, NPOS0, IA, JA, DESCA, DPOS0, INFO ) /* * .. Scalar Arguments .. */ int DPOS0, IA, ICTXT, * INFO, JA, M, MPOS0, N, NPOS0; /* * .. Array Arguments .. */ char * MNAME, * ROUT; int * DESCA; #endif { /* * Purpose * ======= * * PB_Cchkmat checks the validity of a descriptor vector DESCA, the * related global indexes IA, JA from a local view point. If an incon- * sistency is found among its parameters IA, JA and DESCA, the routine * returns an error code in INFO. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * ROUT (global input) pointer to CHAR * On entry, ROUT specifies the name of the routine calling this * input error checking routine. * * MNAME (global input) pointer to CHAR * On entry, MNAME specifies the name of the formal array argu- * ment in the calling routine. * * M (global input) INTEGER * On entry, M specifies the number of rows the submatrix * sub( A ). * * MPOS0 (global input) INTEGER * On entry, MPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter M appears. * * N (global input) INTEGER * On entry, N specifies the number of columns the submatrix * sub( A ). * * NPOS0 (global input) INTEGER * On entry, NPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter N appears. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * DPOS0 (global input) INTEGER * On entry, DPOS0 specifies the position in the calling rou- * tine's parameter list where the formal parameter DESCA ap- * pears. Note that it is assumed that IA and JA are respecti- * vely 2 and 1 entries behind DESCA. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had an * illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int dpos, iapos, japos, mpos, mycol, myrow, np, npcol, nprow, npos, nq; /* .. * .. Executable Statements .. * */ /* * Want to find errors with MIN( ), so if no error, set it to a big number. If * there already is an error, multiply by the the descriptor multiplier. */ if( *INFO >= 0 ) *INFO = BIGNUM; else if( *INFO < -DESCMULT ) *INFO = -(*INFO); else *INFO = -(*INFO) * DESCMULT; /* * Figure where in parameter list each parameter was, factoring in descriptor * multiplier */ mpos = MPOS0 * DESCMULT; npos = NPOS0 * DESCMULT; iapos = ( DPOS0 - 2 ) * DESCMULT; japos = ( DPOS0 - 1 ) * DESCMULT; dpos = DPOS0 * DESCMULT + 1; /* * Get process grid information */ Cblacs_gridinfo( ICTXT, &nprow, &npcol, &myrow, &mycol ); /* * Are M, N, IA, JA and DESCA legal inputs ? */ if( M < 0 ) { /* * M must be at least zero */ *INFO = MIN( *INFO, mpos ); PB_Cwarn( ICTXT, -1, ROUT, "%s sub( %s ) = %d, it must be at least 0", "Illegal number of rows of", MNAME, M ); } if( N < 0 ) { /* * N must be at least zero */ *INFO = MIN( *INFO, npos ); PB_Cwarn( ICTXT, -1, ROUT, "%s sub( %s ) = %d, it must be at least 0", "Illegal number of columns of", MNAME, N ); } if( IA < 0 ) { /* * IA must be at least zero */ *INFO = MIN( *INFO, iapos ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal I%s = %d, I%s must be at least 1", MNAME, IA+1, MNAME ); } if( JA < 0 ) { /* * JA must be at least zero */ *INFO = MIN( *INFO, japos ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal J%s = %d, I%s must be at least 1", MNAME, IA+1, MNAME ); } if( DESCA[DTYPE_] != BLOCK_CYCLIC_2D_INB ) { /* * Internally, only the descriptor type BLOCK_CYCLIC_2D_INB is supported */ *INFO = MIN( *INFO, dpos + DTYPE_ ); PB_Cwarn( ICTXT, -1, ROUT, "%s %d for matrix %s. PBLAS accepts: %d or %d", "Illegal descriptor type", DESCA[DTYPE_], MNAME, BLOCK_CYCLIC_2D, BLOCK_CYCLIC_2D_INB ); if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * No need to go any further ... */ return; } if( DESCA[CTXT_] != ICTXT ) { /* * Check if the context of X match the other contexts. Only intra-context * operations are supported. */ *INFO = MIN( *INFO, dpos + CTXT_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[CTXT_] = %d %s= %d", MNAME, DESCA[CTXT_], "does not match other operand's context ", ICTXT ); if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * No need to go any further ... */ return; } if( DESCA[IMB_] < 1 ) { /* * DESCA[IMB_] must be at least one */ *INFO = MIN( *INFO, dpos + IMB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[IMB_] = %d, DESC%s[IMB_] %s", MNAME, DESCA[IMB_], MNAME, "must be at least 1" ); } if( DESCA[INB_] < 1 ) { /* * DESCA[INB_] must be at least one */ *INFO = MIN( *INFO, dpos + INB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[INB_] = %d, DESC%s[INB_] %s", MNAME, DESCA[INB_], MNAME, "must be at least 1" ); } if( DESCA[MB_] < 1 ) { /* * DESCA[MB_] must be at least one */ *INFO = MIN( *INFO, dpos + MB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[MB_] = %d, DESC%s[MB_] %s", MNAME, DESCA[MB_], MNAME, "must be at least 1" ); } if( DESCA[NB_] < 1 ) { /* * DESCA[NB_] must be at least one */ *INFO = MIN( *INFO, dpos + NB_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[NB_] = %d, DESC%s[NB_] %s", MNAME, DESCA[NB_], MNAME, "must be at least 1" ); } if( ( DESCA[RSRC_] < -1 ) || ( DESCA[RSRC_] >= nprow ) ) { /* * DESCA[RSRC_] must be either -1 (replication) or in the interval [0 .. nprow) */ *INFO = MIN( *INFO, dpos + RSRC_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[RSRC_] = %d, DESC%s[RSRC_] %s%d", MNAME, DESCA[RSRC_], MNAME, "must be either -1, or >= 0 and < ", nprow ); } if( ( DESCA[CSRC_] < -1 ) || ( DESCA[CSRC_] >= npcol ) ) { /* * DESCX[CSRC_] must be either -1 (replication) or in the interval [0 .. npcol) */ *INFO = MIN( *INFO, dpos + CSRC_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[CSRC_] = %d, DESC%s[CSRC_] %s%d", MNAME, DESCA[CSRC_], MNAME, "must be either -1, or >= 0 and < ", npcol ); } if( M == 0 || N == 0 ) { /* * NULL matrix, relax some checks */ if( DESCA[M_] < 0 ) { /* * DESCX[M_] must be at least 0 */ *INFO = MIN( *INFO, dpos + M_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[M_] = %d, it must be at least 0", MNAME, DESCA[M_] ); } if( DESCA[N_] < 0 ) { /* * DESCX[N_] must be at least 0 */ *INFO = MIN( *INFO, dpos + N_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[N_] = %d, it must be at least 0", MNAME, DESCA[N_] ); } if( DESCA[LLD_] < 1 ) { /* * DESCA[LLD_] must be at least 1 */ *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least 1", MNAME, DESCA[LLD_] ); } } else { /* * more rigorous checks for non-degenerate matrix */ if( DESCA[M_] < 1 ) { /* * DESCA[M_] must be at least 1 */ *INFO = MIN( *INFO, dpos + M_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[M_] = %d, it must be at least 1", MNAME, DESCA[M_]); } if( DESCA[N_] < 1 ) { /* * DESCA[N_] must be at least 1 */ *INFO = MIN( *INFO, dpos + N_ ); PB_Cwarn( ICTXT, -1, ROUT, "Illegal DESC%s[N_] = %d, it must be at least 1", MNAME, DESCA[N_]); } if( ( DESCA[M_] >= 1 ) && ( DESCA[N_] >= 1 ) ) { if( IA+M > DESCA[M_] ) { /* * IA + M must be in [ 0 ... DESCA[M_] ] */ *INFO = MIN( *INFO, iapos ); PB_Cwarn( ICTXT, -1, ROUT, "%s M = %d, I%s = %d, DESC%s[M_] = %d", "Operation out of bounds:", M, MNAME, IA+1, MNAME, DESCA[M_]); } if( JA+N > DESCA[N_] ) { /* * JA + N must be in [ 0 ... DESCA[N_] ] */ *INFO = MIN( *INFO, japos ); PB_Cwarn( ICTXT, -1, ROUT, "%s N = %d, J%s = %d, DESC%s[N_] = %d", "Operation out of bounds:", N, MNAME, JA+1, MNAME, DESCA[N_]); } } /* * *INFO == BIGNUM => No errors have been found so far */ if( *INFO == BIGNUM ) { Mnumroc( np, DESCA[M_], 0, DESCA[IMB_], DESCA[MB_], myrow, DESCA[RSRC_], nprow ); if( DESCA[LLD_] < MAX( 1, np ) ) { Mnumroc( nq, DESCA[N_], 0, DESCA[INB_], DESCA[NB_], mycol, DESCA[CSRC_], npcol ); /* * DESCA[LLD_] must be at least 1 in order to be legal and this is enough if no * columns of A reside in this process. */ if( DESCA[LLD_] < 1 ) { *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least 1", MNAME, DESCA[LLD_] ); } else if( nq > 0 ) { /* * Some columns of A reside in this process, DESCA[LLD_] must be at least * MAX( 1, np ). */ *INFO = MIN( *INFO, dpos + LLD_ ); PB_Cwarn( ICTXT, -1, ROUT, "DESC%s[LLD_] = %d, it must be at least %d", MNAME, DESCA[LLD_], MAX( 1, np ) ); } } } } /* * Prepare output: set info = 0 if no error, and divide by DESCMULT if error is * not in a descriptor entry. */ if( *INFO == BIGNUM ) *INFO = 0; else if( *INFO % DESCMULT == 0 ) *INFO = -( (*INFO) / DESCMULT ); else *INFO = -(*INFO); /* * End of PB_Cchkmat */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CVMupdate.c0000640000175000017500000003162210363532303020205 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CVMupdate( PB_VM_T * VM, int K, int * II, int * JJ ) #else void PB_CVMupdate( VM, K, II, JJ ) /* * .. Scalar Arguments .. */ int * II, * JJ, K; PB_VM_T * VM; #endif { /* * Purpose * ======= * * PB_CVMupdate updates the local information of an m by n local array * owned by the process of relative coordinates ( MRROW, MRCOL ). Note * that if m or n is less or equal than zero, there is no data, in which * case this process does not need the local information computed by * this routine to proceed. * * Arguments * ========= * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * K (global input) INTEGER * On entry, K specifies the number of diagonal elements that * have been used so far. K must be at least zero. * * II (local input/local output) INTEGER * On entry, II specifies the local row index to be updated. On * exit, II points to the local row owning the K+1 diagonal of * this local block. On entry and on exit, II is at least zero. * * JJ (local input/local output) INTEGER * On entry, JJ specifies the local column index to be updated. * On exit, JJ points to the local column owning the K+1 * diagonal of this local block. On entry and on exit, JJ is at * least zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, ilow, imbloc, inbloc, ioff, ioffd, iupp, joff, joffd, lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc, nb, nblkd, nblks, nbloc, npq=0, pmb, qnb, tmp1, tmp2, upp; /* .. * .. Executable Statements .. * */ mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks or if no diagonals were found. */ if( ( K <= 0 ) || ( mblks == 0 ) || ( nblks == 0 ) ) return; /* * Handle the first block of rows or columns separately */ ioff = *II; joff = *JJ; /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; iupp = VM->iupp; upp = VM->upp; pmb = VM->nprow * mb; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; ilow = VM->ilow; low = VM->low; qnb = VM->npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { npq = ( ( tmp2 = ( tmp1 = imbloc - lcmt00 ) > 0 ? tmp1 : 0 ) < inbloc ? tmp2 : inbloc ); if( K < npq ) { tmp1 = lcmt00 + K; *II += tmp1; iupp = ( imbloc -= tmp1 ) - 1; if( mblks == 1 ) lmbloc = imbloc; *JJ += K; ilow = 1 - ( inbloc -= K ); if( nblks == 1 ) lnbloc = inbloc; lcmt00 = 0; goto l_end; } } else { npq = ( ( tmp2 = ( tmp1 = inbloc + lcmt00 ) > 0 ? tmp1 : 0 ) > imbloc ? imbloc : tmp2 ); if( K < npq ) { tmp1 = lcmt00 - K; *JJ -= tmp1; ilow = 1 - ( inbloc += tmp1 ); if( nblks == 1 ) lnbloc = inbloc; *II += K; iupp = ( imbloc -= K ) - 1; if( mblks == 1 ) lmbloc = imbloc; lcmt00 = 0; goto l_end; } } K -= npq; /* * Decide whether one should go south or east in the table: Go east if the * block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp-upp+pmb ) ) < ilow ) ); /* * Update the local indexes II and JJ */ if( GoSouth ) *II += imbloc; else *JJ += inbloc; } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value. */ lcmt00 -= iupp - upp + pmb; mblks--; ioff += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; ioff += mb; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) goto l_end; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; mbloc = mb; ioffd = ioff; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; if( lcmt >= 0 ) { npq = ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < inbloc ? tmp2 : inbloc ); if( K < npq ) { tmp1 = lcmt + K; *II = ioffd + tmp1; iupp = ( imbloc = mbloc - tmp1 ) - 1; if( mblks == 1 ) lmbloc = imbloc; *JJ = joff + K; ilow = 1 - ( inbloc -= K ); if( nblks == 1 ) lnbloc = inbloc; lcmt00 = 0; goto l_end; } } else { npq = ( ( tmp2 = ( tmp1 = inbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ? mbloc : tmp2 ); if( K < npq ) { tmp1 = lcmt - K; *JJ = joff - tmp1; ilow = 1 - ( inbloc += tmp1 ); if( nblks == 1 ) lnbloc = inbloc; *II = ioffd + K; iupp = ( imbloc = mbloc - K ) - 1; if( mblks == 1 ) lmbloc = imbloc; lcmt00 = 0; goto l_end; } } /* * Keep going south until there are no more blocks owning diagonals */ K -= npq; lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd--; ioff = ioffd; ioffd += mbloc; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; joff += inbloc; /* * Update the local indexes II and JJ */ *II = ioff; *JJ = joff; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value. */ lcmt00 += low - ilow + qnb; nblks--; joff += inbloc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; joff += nb; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) goto l_end; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; nbloc = nb; joffd = joff; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; if( lcmt >= 0 ) { npq = ( ( tmp2 = ( tmp1 = imbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ? tmp2 : nbloc ); if( K < npq ) { tmp1 = lcmt + K; *II = ioff + tmp1; iupp = ( imbloc -= tmp1 ) - 1; if( mblks == 1 ) lmbloc = imbloc; *JJ = joffd + K; ilow = 1 - ( inbloc = nbloc - K ); if( nblks == 1 ) lnbloc = inbloc; lcmt00 = 0; goto l_end; } } else { npq = ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > imbloc ? imbloc : tmp2 ); if( K < npq ) { tmp1 = lcmt - K; *JJ = joffd - tmp1; ilow = 1 - ( inbloc = nbloc + tmp1 ); if( nblks == 1 ) lnbloc = inbloc; *II = ioff + K; iupp = ( imbloc -= K ) - 1; if( mblks == 1 ) lmbloc = imbloc; lcmt00 = 0; goto l_end; } } /* * Keep going east until there are no more blocks owning diagonals. */ K -= npq; lcmt00 = lcmt; lcmt += qnb; nblks = nblkd--; joff = joffd; joffd += nbloc; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; ioff += imbloc; /* * Update the local indexes II and JJ */ *II = ioff; *JJ = joff; } /* * Loop over the remaining columns of the LCM table. */ nbloc = nb; while( nblks ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; ioff += mb; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) goto l_end; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; mbloc = mb; ioffd = ioff; while( mblkd && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; if( lcmt >= 0 ) { npq = ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ? tmp2 : nbloc ); if( K < npq ) { tmp1 = lcmt + K; *II = ioffd + tmp1; iupp = ( imbloc = mbloc - tmp1 ) - 1; if( mblks == 1 ) lmbloc = imbloc; *JJ = joff + K; ilow = 1 - ( inbloc = nbloc - K ); if( nblks == 1 ) lnbloc = inbloc; lcmt00 = 0; goto l_end; } } else { npq = ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ? mbloc : tmp2 ); if( K < npq ) { tmp1 = lcmt - K; *JJ = joff - tmp1; ilow = 1 - ( inbloc = nbloc + tmp1 ); if( nblks == 1 ) lnbloc = inbloc; *II = ioffd + K; iupp = ( imbloc = mbloc - K ) - 1; if( mblks == 1 ) lmbloc = imbloc; lcmt00 = 0; goto l_end; } } /* * Keep going south until there are no more blocks owning diagonals */ K -= npq; lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd--; ioff = ioffd; ioffd += mbloc; } /* * I am done with this column of the LCM table. Go to the next column until * there are no more column in the table. */ lcmt00 += qnb; nblks--; joff += nbloc; /* * Update the local indexes II and JJ */ *II = ioff; *JJ = joff; } l_end: /* * Update the fields of the VM structure */ VM->lcmt00 = lcmt00; VM->mp = ( mblks >= 2 ? imbloc + ( mblks - 2 ) * mb + lmbloc : ( mblks == 1 ? imbloc : 0 ) ); VM->imbloc = imbloc; VM->lmbloc = lmbloc; VM->mblks = mblks; VM->iupp = iupp; VM->nq = ( nblks >= 2 ? inbloc + ( nblks - 2 ) * nb + lnbloc : ( nblks == 1 ? inbloc : 0 ) ); VM->inbloc = inbloc; VM->lnbloc = lnbloc; VM->nblks = nblks; VM->ilow = ilow; /* * End of PB_CVMupdate */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzher2.c0000640000175000017500000002202110363532303017667 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzher2( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * XC, int LDXC, char * YC, int LDYC, char * XR, int LDXR, char * YR, int LDYR, char * A, int LDA ) #else void PB_Ctzher2( TYPE, UPLO, M, N, K, IOFFD, ALPHA, XC, LDXC, YC, LDYC, XR, LDXR, YR, LDYR, A, LDA ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * A, * XC, * XR, * YC, * YR; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctzher2 performs the trapezoidal symmetric or Hermitian rank 2 * operation: * * A := alpha * XC * YR + alpha * YC * XR + A, or * * A := alpha*XC*conjg( YR ) + conjg( alpha )*YC*conjg( XR ) + A, * * where alpha is a scalar, XC and YC are m element vectors, XR and YR * are n element vectors and A is an m by n trapezoidal symmetric * or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (see pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * YC (input) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YR (input) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * A (input/output) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is updated. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ione=1, j1, m1, mn, n1, size; char * Calph, type; cmplx Calph8; cmplx16 Calph16; GERC_T gerc; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; type = TYPE->type; if( type == SCPLX ) { Calph = ( (char *)(Calph8) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else if( type == DCPLX ) { Calph = ( (char *)(Calph16) ); PB_Cconjg( TYPE, ALPHA, Calph ); } else { Calph = ALPHA; } if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; gerc = TYPE->Fgerc; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gerc( &M, &n1, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); gerc( &M, &n1, Calph, YC, &ione, XR, &LDXR, A, &LDA ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fher2( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, i1, j1, LDA, size ), &LDA ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gerc( &m1, &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, i1, j1, LDA, size ), &LDA ); gerc( &m1, &n1, Calph, Mptr( YC, i1, 0, LDYC, size ), &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, i1, j1, LDA, size ), &LDA ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; gerc = TYPE->Fgerc; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gerc( &m1, &n1, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); gerc( &m1, &n1, Calph, YC, &ione, XR, &LDXR, A, &LDA ); } TYPE->Fher2( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, m1, 0, LDXC, size ), &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, m1, j1, LDA, size ), &LDA ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gerc( &M, &n1, ALPHA, XC, &ione, Mptr( YR, 0, j1, LDYR, size ), &LDYR, Mptr( A, 0, j1, LDA, size ), &LDA ); gerc( &M, &n1, Calph, YC, &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, 0, j1, LDA, size ), &LDA ); } } else { gerc = TYPE->Fgerc; gerc( &M, &N, ALPHA, XC, &ione, YR, &LDYR, A, &LDA ); gerc( &M, &N, Calph, YC, &ione, XR, &LDXR, A, &LDA ); } /* * End of PB_Ctzher2 */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Clastnb.c0000640000175000017500000000376510363532303017752 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Clastnb( int N, int I, int INB, int NB ) #else int PB_Clastnb( N, I, INB, NB ) /* * .. Scalar Arguments .. */ int I, INB, N, NB; #endif { /* * Purpose * ======= * * PB_Clastnb returns the global number of matrix rows or columns of the * last block, if N rows or columns are given out starting from the glo- * bal index I. Note that if N is equal 0, this routine returns 0. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix distribution. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int lnbt; /* .. * .. Executable Statements .. * */ if( ( lnbt = I + N - INB ) > 0 ) { lnbt = lnbt - NB * ( ( NB + lnbt - 1 ) / NB - 1 ); return( MIN( lnbt, N ) ); } else { return( N ); } /* * End of PB_Clastnb */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_topget_.c0000640000175000017500000000444510363532303020021 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_topget_( int * ICTXT, F_CHAR_T OP, F_CHAR_T SCOPE, F_CHAR_T TOP ) #else void PB_topget_( ICTXT, OP, SCOPE, TOP ) /* * .. Scalar Arguments .. */ int * ICTXT; /* * .. Array Arguments .. */ F_CHAR_T OP, SCOPE, TOP; #endif { /* * Purpose * ======= * * PB_topget_ returns the row, column or all broadcast and combine topo- * logies. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * OP (global input) pointer to CHAR * On input, OP specifies the BLACS operation defined as fol- * lows: * OP = 'B' or 'b', BLACS broadcast operation, * OP = 'C' or 'c', BLACS combine operation. * * SCOPE (global input) pointer to CHAR * On entry, SCOPE specifies the scope of the BLACS operation as * follows: * SCOPE = 'R' or 'r', rowwise broadcast or combine, * SCOPE = 'C' or 'c', column broadcast or combine, * SCOPE = 'A' or 'a', all broadcast or combine. * * TOP (global output) pointer to CHAR * On exit, TOP is a character string specifying the BLACS to- * pology current in use for the given operation specified by OP * and SCOPE. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * topc; /* .. * .. Executable Statements .. * */ topc = F2C_CHAR( TOP ); *topc = *PB_Ctop( ICTXT, F2C_CHAR( OP ), F2C_CHAR( SCOPE ), TOP_GET ); TOP = C2F_CHAR( topc ); /* * End of PB_topget_ */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CInV.c0000640000175000017500000005070110363532303017153 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CInV( PBTYP_T * TYPE, char * CONJUG, char * ROWCOL, int M, int N, int * DESCA, int K, char * X, int IX, int JX, int * DESCX, char * XROC, char * * XAPTR, int * DXA, int * XAFREE ) #else void PB_CInV( TYPE, CONJUG, ROWCOL, M, N, DESCA, K, X, IX, JX, DESCX, XROC, XAPTR, DXA, XAFREE ) /* * .. Scalar Arguments .. */ char * CONJUG, * ROWCOL, * XROC; int * XAFREE, IX, JX, K, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DXA; char * X, * * XAPTR; #endif { /* * Purpose * ======= * * PB_CInV returns a pointer to an array that contains a one-dimensional * input only subvector which is replicated over the rows or columns of * a submatrix described by DESCA. A subvector is specified on input to * this routine that is reused whenever possible. On return, the subvec- * tor is specified by a pointer to some data, a descriptor array des- * cribing its layout and a logical value indicating if this local piece * of data has been dynamically allocated by this function. This routine * is specifically designed for traditional Level 2 like PBLAS opera- * tions using an input only vector such as PxGER, PxSYR ... * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies if this routine should return * the conjugate subvector as follows: * = 'N' or 'n': The initial subvector is returned, * = 'Z' or 'z': The conjugate subvector is returned. * * ROWCOL (global input) pointer to CHAR * On entry, ROWCOL specifies if this routine should return a * row or column subvector replicated over the underlying subma- * trix as follows: * = 'R' or 'r': A row subvector is returned, * = 'C' or 'c': A column subvector is returned. * * M (global input) INTEGER * On entry, M specifies the number of rows of the underlying * submatrix described by DESCA. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the underlying * submatrix described by DESCA. N must be at least zero. * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * K (global input) INTEGER * On entry, K specifies the length of the non-distributed di- * mension of the subvector sub( X ). K must be at least zero. * * X (local input) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( K, IX ) ) when XROC is 'R' or 'r' * and MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( K, JX ) otherwise. * Lx is N when ROWCOL = 'R' or 'r' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * XROC (global input) pointer to CHAR * On entry, XROC specifies the orientation of the subvector * sub( X ). When XROC is 'R' or 'r', sub( X ) is a row vector, * and a column vector otherwise. * * XAPTR (local output) pointer to pointer to CHAR * On exit, * XAPTR is an array containing the same data as the * subvector sub( X ) which is replicated over the rows or co- * lumns of the underlying matrix as specified by ROWCOL and * DESCA. * * DXA (global and local output) INTEGER array * On exit, DXA is a descriptor array of dimension DLEN_ descri- * bing the data layout of the data pointed to by * XAPTR. * * XAFREE (local output) INTEGER * On exit, XAFREE specifies if it has been possible to reuse * the subvector sub( X ), i.e., if some dynamic memory was al- * located for the data pointed to by *XAPTR or not. When XAFREE * is zero, no dynamic memory was allocated. Otherwise, some dy- * namic memory was allocated by this function that one MUST re- * lease as soon as possible. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * top; int AColSpan, ARowSpan, Acol, Aimb, Ainb, AisD, Amb, Amp, Anb, Anq, Arow, Xcol, Xii, Ximb, Ximb1, Xinb, Xinb1, XisD, XisR, XisRow, Xjj, Xld=1, Xmb, Xmp, Xnb, Xnq, Xrow, ctxt, mycol, myrow, npcol, nprow; /* .. * .. Executable Statements .. * */ /* * Initialize the output parameters to a default value */ *XAFREE = 0; *XAPTR = NULL; /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) { if( Mupcase( ROWCOL[0] ) == CROW ) { PB_Cdescset( DXA, K, N, 1, DESCA[INB_], 1, DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 ); } else { PB_Cdescset( DXA, M, K, DESCA[IMB_], 1, DESCA[MB_], 1, DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], DESCA[LLD_] ); } return; } /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Ycol */ Minfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, Xii, Xjj, Xrow, Xcol ); /* * Is sub( X ) distributed or not, replicated or not ? */ if( ( XisRow = ( Mupcase( XROC[0] ) == CROW ) ) != 0 ) { XisD = ( ( Xcol >= 0 ) && ( npcol > 1 ) ); XisR = ( ( Xrow == -1 ) || ( nprow == 1 ) ); } else { XisD = ( ( Xrow >= 0 ) && ( nprow > 1 ) ); XisR = ( ( Xcol == -1 ) || ( npcol == 1 ) ); } Arow = DESCA[ RSRC_ ]; Acol = DESCA[ CSRC_ ]; if( Mupcase( ROWCOL[0] ) == CROW ) { /* * Want a row vector */ Ainb = DESCA[ INB_ ]; Anb = DESCA[ NB_ ]; Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol ); /* * Does A spans multiples process rows ? It does if Arow < 0. */ ARowSpan = ( Arow < 0 ) || Mspan( M, 0, DESCA[IMB_], DESCA[MB_], Arow, nprow ); if( XisRow && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { /* * It is possible to reuse sub( X ) iff sub( X ) is already a row vector and * the data does not need to be conjugated. */ AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) ); Xinb = DESCX[ INB_ ]; Xnb = DESCX[ NB_ ]; Mfirstnb( Xinb1, N, JX, Xinb, Xnb ); /* * sub( X ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * column and either N is smaller than the first blocksize of sub( X ) and A, * or their column blocking factors match. */ if( ( !AisD && !XisD ) || ( ( AisD && XisD ) && ( ( Acol == Xcol ) && ( ( ( Ainb >= N ) && ( Xinb1 >= N ) ) || ( ( Ainb == Xinb1 ) && ( Anb == Xnb ) ) ) ) ) ) { /* * sub( X ) is aligned with A */ Ximb = DESCX[ IMB_ ]; Xmb = DESCX[ MB_ ]; Mfirstnb( Ximb1, K, IX, Ximb, Xmb ); if( XisR || ( !ARowSpan && ( Arow == Xrow ) ) ) { /* * If sub( X ) is replicated, or, A spans only one process row and either * sub( X ) is replicated or resides in the same process row than A, then * sub( X ) is already at the correct place. */ if( Anq > 0 ) { Xld = DESCX[ LLD_ ]; if( ARowSpan || ( myrow == Arow ) ) *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); } else { Xld = 1; } MDescSet( DXA, K, N, K, Xinb1, 1, Xnb, ( ARowSpan ? -1 : Arow ), Xcol, ctxt, Xld ); } else if( ARowSpan ) { /* * Otherwise, we know that sub( X ) cannot be replicated, let suppose in * addition that A spans all process rows. sub( X ) need simply to be broadcast * over A. */ if( myrow == Xrow ) { Xld = DESCX[ LLD_ ]; if( Anq > 0 ) { *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebs2d( ctxt, COLUMN, top, K, Anq, *XAPTR, Xld ); } } else { Xld = MAX( 1, K ); if( Anq > 0 ) { *XAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *XAFREE = 1; top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebr2d( ctxt, COLUMN, top, K, Anq, *XAPTR, Xld, Xrow, mycol ); } } PB_Cdescset( DXA, K, N, K, Xinb1, 1, Xnb, -1, Xcol, ctxt, Xld ); } else { /* * Finally, sub( X ) is not replicated and A spans only one process row. There * is no need to broadcast, a send/recv is sufficient. */ if( myrow == Xrow ) { Xld = DESCX[ LLD_ ]; if( Anq > 0 ) { *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); TYPE->Cgesd2d( ctxt, K, Anq, *XAPTR, Xld, Arow, mycol ); } } else if( myrow == Arow ) { Xld = MAX( 1, K ); if( Anq > 0 ) { *XAPTR = PB_Cmalloc( K * Anq * TYPE->size ); *XAFREE = 1; TYPE->Cgerv2d( ctxt, K, Anq, *XAPTR, Xld, Xrow, mycol ); } } PB_Cdescset( DXA, K, N, K, Xinb1, 1, Xnb, Arow, Xcol, ctxt, Xld ); } return; } } /* * sub( X ) cannot be reused, too bad ... redistribute */ PB_Cdescset( DXA, K, N, K, Ainb, 1, Anb, ( ARowSpan ? -1 : Arow ), Acol, ctxt, K ); Xmp = ( ARowSpan ? K : ( ( myrow == Arow ) ? K : 0 ) ); if( Xmp > 0 && Anq > 0 ) { *XAPTR = PB_Cmalloc( Anq * Xmp * TYPE->size ); *XAFREE = 1; } if( XisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, N, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, *XAPTR, 0, 0, DXA, ROW ); } else { PB_Cpaxpby( TYPE, CONJUG, N, K, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, *XAPTR, 0, 0, DXA, ROW ); } } else { /* * Want a column vector */ Aimb = DESCA[IMB_]; Amb = DESCA[MB_]; Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow ); /* * Does A spans multiples process columns ? It does if Acol < 0. */ AColSpan = ( Acol < 0 ) || Mspan( N, 0, DESCA[INB_], DESCA[NB_], Acol, npcol ); if( !( XisRow ) && ( Mupcase( CONJUG[0] ) == CNOCONJG ) ) { /* * It is possible to reuse sub( X ) iff sub( X ) is already a column vector and * the data does not need to be conjugated */ AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) ); Ximb = DESCX[ IMB_ ]; Xmb = DESCX[ MB_ ]; Mfirstnb( Ximb1, M, IX, Ximb, Xmb ); /* * sub( X ) is aligned with A (reuse condition) iff both operands are not * distributed, or both of them are distributed and start in the same process * row and either M is smaller than the first blocksize of sub( X ) and A, or * their row blocking factors match. */ if( ( !AisD && !XisD ) || ( ( AisD && XisD ) && ( ( Arow == Xrow ) && ( ( ( Aimb >= M ) && ( Ximb1 >= M ) ) || ( ( Aimb == Ximb1 ) && ( Amb == Xmb ) ) ) ) ) ) { /* * sub( X ) is aligned with A */ Xinb = DESCX[ INB_ ]; Xnb = DESCX[ NB_ ]; Mfirstnb( Xinb1, K, JX, Xinb, Xnb ); if( XisR || ( !AColSpan && ( Acol == Xcol ) ) ) { /* * If sub( X ) is replicated, or, A spans only one process column and either * sub( X ) is replicated or resides in the same process columns than A, then * sub( X ) is already at the correct place. */ if( Amp > 0 ) { Xld = DESCX[ LLD_ ]; if( AColSpan || ( mycol == Acol ) ) *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); } else { Xld = 1; } MDescSet( DXA, M, K, Ximb1, K, Xmb, 1, Xrow, ( AColSpan ? -1 : Acol ), ctxt, Xld ); } else if( AColSpan ) { /* * Otherwise, we know that sub( X ) is not be replicated, let suppose in * addition that A spans all process columns. sub( X ) need simply to be * broadcast over A. */ if( mycol == Xcol ) { Xld = DESCX[ LLD_ ]; if( Amp > 0 ) { *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebs2d( ctxt, ROW, top, Amp, K, *XAPTR, Xld ); } } else { Xld = MAX( 1, Amp ); if( Amp > 0 ) { *XAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *XAFREE = 1; top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebr2d( ctxt, ROW, top, Amp, K, *XAPTR, Xld, myrow, Xcol ); } } PB_Cdescset( DXA, M, K, Ximb1, K, Xmb, 1, Xrow, -1, ctxt, Xld ); } else { /* * Finally, sub( X ) is not replicated and A spans only one process column. * There is no need to broadcast, a send/recv is sufficient. */ if( mycol == Xcol ) { Xld = DESCX[ LLD_ ]; if( Amp > 0 ) { *XAPTR = Mptr( X, Xii, Xjj, Xld, TYPE->size ); TYPE->Cgesd2d( ctxt, Amp, K, *XAPTR, Xld, myrow, Acol ); } } else if( mycol == Acol ) { Xld = MAX( 1, Amp ); if( Amp > 0 ) { *XAPTR = PB_Cmalloc( Amp * K * TYPE->size ); *XAFREE = 1; TYPE->Cgerv2d( ctxt, Amp, K, *XAPTR, Xld, myrow, Xcol ); } } PB_Cdescset( DXA, M, K, Ximb1, K, Xmb, 1, Xrow, Acol, ctxt, Xld ); } return; } } /* * sub( X ) cannot be reused, too bad ... redistribute */ PB_Cdescset( DXA, M, K, Aimb, K, Amb, 1, Arow, ( AColSpan ? -1 : Acol ), ctxt, MAX( 1, Amp ) ); Xnq = ( AColSpan ? K : ( ( mycol == Acol ) ? K : 0 ) ); if( Xnq > 0 && Amp > 0 ) { *XAPTR = PB_Cmalloc( Amp * Xnq * TYPE->size ); *XAFREE = 1; } if( XisRow ) { PB_Cpaxpby( TYPE, CONJUG, K, M, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, *XAPTR, 0, 0, DXA, COLUMN ); } else { PB_Cpaxpby( TYPE, CONJUG, M, K, TYPE->one, X, IX, JX, DESCX, XROC, TYPE->zero, *XAPTR, 0, 0, DXA, COLUMN ); } } /* * End of PB_CInV */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cstypeset.c0000640000175000017500000000624010363532303020336 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" PBTYP_T * PB_Cstypeset() { /* * Purpose * ======= * * PB_Cstypeset on the first call initializes a static structure contai- * ning typed information and returns a pointer to it. The other calls * to this routine just returns this pointer. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static int setup=0; static PBTYP_T TypeStruct; static float zero, one, negone; /* .. * .. Executable Statements .. * */ if( setup ) return( &TypeStruct ); setup = 1; TypeStruct.type = SREAL; TypeStruct.usiz = sizeof( float ); TypeStruct.size = sizeof( float ); zero = ZERO; one = ONE; negone = -ONE; TypeStruct.zero = (char *) (&zero); TypeStruct.one = (char *) (&one); TypeStruct.negone = (char *) (&negone); TypeStruct.Cgesd2d = Csgesd2d; TypeStruct.Cgerv2d = Csgerv2d; TypeStruct.Cgebs2d = Csgebs2d; TypeStruct.Cgebr2d = Csgebr2d; TypeStruct.Cgsum2d = Csgsum2d; TypeStruct.Fmmadd = smmadd_; TypeStruct.Fmmcadd = smmcadd_; TypeStruct.Fmmtadd = smmtadd_; TypeStruct.Fmmtcadd = smmtcadd_; TypeStruct.Fmmdda = smmdda_; TypeStruct.Fmmddac = smmddac_; TypeStruct.Fmmddat = smmddat_; TypeStruct.Fmmddact = smmddact_; TypeStruct.Fcshft = scshft_; TypeStruct.Frshft = srshft_; TypeStruct.Fvvdotu = svvdot_; TypeStruct.Fvvdotc = svvdot_; TypeStruct.Fset = sset_; TypeStruct.Ftzpad = stzpad_; TypeStruct.Ftzpadcpy = stzpadcpy_; TypeStruct.Ftzscal = stzscal_; TypeStruct.Fhescal = stzscal_; TypeStruct.Ftzcnjg = stzscal_; TypeStruct.Faxpy = saxpy_; TypeStruct.Fcopy = scopy_; TypeStruct.Fswap = sswap_; TypeStruct.Fgemv = sgemv_; TypeStruct.Fsymv = ssymv_; TypeStruct.Fhemv = ssymv_; TypeStruct.Ftrmv = strmv_; TypeStruct.Ftrsv = strsv_; TypeStruct.Fagemv = sagemv_; TypeStruct.Fasymv = sasymv_; TypeStruct.Fahemv = sasymv_; TypeStruct.Fatrmv = satrmv_; TypeStruct.Fgerc = sger_; TypeStruct.Fgeru = sger_; TypeStruct.Fsyr = ssyr_; TypeStruct.Fher = ssyr_; TypeStruct.Fsyr2 = ssyr2_; TypeStruct.Fher2 = ssyr2_; TypeStruct.Fgemm = sgemm_; TypeStruct.Fsymm = ssymm_; TypeStruct.Fhemm = ssymm_; TypeStruct.Fsyrk = ssyrk_; TypeStruct.Fherk = ssyrk_; TypeStruct.Fsyr2k = ssyr2k_; TypeStruct.Fher2k = ssyr2k_; TypeStruct.Ftrmm = strmm_; TypeStruct.Ftrsm = strsm_; return( &TypeStruct ); /* * End of PB_Cstypeset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_freebuf_.c0000640000175000017500000000155310363532303020132 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" void PB_freebuf_() { /* * Purpose * ======= * * PB_freebuf_ disposes the dynamic memory allocated by PB_Cgetbuf. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ (void) PB_Cgetbuf( " ", -1 ); /* * End of PB_freebuf_ */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cfirstnb.c0000640000175000017500000000366310363532303020133 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cfirstnb( int N, int I, int INB, int NB ) #else int PB_Cfirstnb( N, I, INB, NB ) /* * .. Scalar Arguments .. */ int I, INB, N, NB; #endif { /* * Purpose * ======= * * PB_Cfirstnb returns the global number of matrix rows or columns of the * first block, if N rows or columns are given out starting from the * global index I. Note that if N is equal 0, this routine returns 0. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix distribution. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int inbt; /* .. * .. Executable Statements .. * */ inbt = ( ( INB -= I ) <= 0 ? ( (-INB) / NB + 1 ) * NB + INB : INB ); return( MIN( inbt, N ) ); /* * End of PB_Cfirstnb */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cdtypeset.c0000640000175000017500000000624210363532303020321 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" PBTYP_T * PB_Cdtypeset() { /* * Purpose * ======= * * PB_Cdtypeset on the first call initializes a static structure contai- * ning typed information and returns a pointer to it. The other calls * to this routine just returns this pointer. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static int setup=0; static PBTYP_T TypeStruct; static double zero, one, negone; /* .. * .. Executable Statements .. * */ if( setup ) return( &TypeStruct ); setup = 1; TypeStruct.type = DREAL; TypeStruct.usiz = sizeof( double ); TypeStruct.size = sizeof( double ); zero = ZERO; one = ONE; negone = -ONE; TypeStruct.zero = (char *) (&zero); TypeStruct.one = (char *) (&one); TypeStruct.negone = (char *) (&negone); TypeStruct.Cgesd2d = Cdgesd2d; TypeStruct.Cgerv2d = Cdgerv2d; TypeStruct.Cgebs2d = Cdgebs2d; TypeStruct.Cgebr2d = Cdgebr2d; TypeStruct.Cgsum2d = Cdgsum2d; TypeStruct.Fmmadd = dmmadd_; TypeStruct.Fmmcadd = dmmcadd_; TypeStruct.Fmmtadd = dmmtadd_; TypeStruct.Fmmtcadd = dmmtcadd_; TypeStruct.Fmmdda = dmmdda_; TypeStruct.Fmmddac = dmmddac_; TypeStruct.Fmmddat = dmmddat_; TypeStruct.Fmmddact = dmmddact_; TypeStruct.Fcshft = dcshft_; TypeStruct.Frshft = drshft_; TypeStruct.Fvvdotu = dvvdot_; TypeStruct.Fvvdotc = dvvdot_; TypeStruct.Fset = dset_; TypeStruct.Ftzpad = dtzpad_; TypeStruct.Ftzpadcpy = dtzpadcpy_; TypeStruct.Ftzscal = dtzscal_; TypeStruct.Fhescal = dtzscal_; TypeStruct.Ftzcnjg = dtzscal_; TypeStruct.Faxpy = daxpy_; TypeStruct.Fcopy = dcopy_; TypeStruct.Fswap = dswap_; TypeStruct.Fgemv = dgemv_; TypeStruct.Fsymv = dsymv_; TypeStruct.Fhemv = dsymv_; TypeStruct.Ftrmv = dtrmv_; TypeStruct.Ftrsv = dtrsv_; TypeStruct.Fagemv = dagemv_; TypeStruct.Fasymv = dasymv_; TypeStruct.Fahemv = dasymv_; TypeStruct.Fatrmv = datrmv_; TypeStruct.Fgerc = dger_; TypeStruct.Fgeru = dger_; TypeStruct.Fsyr = dsyr_; TypeStruct.Fher = dsyr_; TypeStruct.Fsyr2 = dsyr2_; TypeStruct.Fher2 = dsyr2_; TypeStruct.Fgemm = dgemm_; TypeStruct.Fsymm = dsymm_; TypeStruct.Fhemm = dsymm_; TypeStruct.Fsyrk = dsyrk_; TypeStruct.Fherk = dsyrk_; TypeStruct.Fsyr2k = dsyr2k_; TypeStruct.Fher2k = dsyr2k_; TypeStruct.Ftrmm = dtrmm_; TypeStruct.Ftrsm = dtrsm_; return( &TypeStruct ); /* * End of PB_Cdtypeset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cdescset.c0000640000175000017500000001652610363532303020120 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cdescset( int * DESC, int M, int N, int IMB, int INB, int MB, int NB, int RSRC, int CSRC, int CTXT, int LLD ) #else void PB_Cdescset( DESC, M, N, IMB, INB, MB, NB, RSRC, CSRC, CTXT, LLD ) /* * .. Scalar Arguments .. */ int CSRC, CTXT, IMB, INB, LLD, M, MB, N, NB, RSRC; /* * .. Array Arguments .. */ int * DESC; #endif { /* * Purpose * ======= * * PB_Cdescset uses its 10 input arguments M, N, IMB, INB, MB, NB, * RSRC, CSRC, CTXT and LLD to initialize a descriptor vector of type * BLOCK_CYCLIC_2D_INB. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * DESC (global and local output) INTEGER array * On entry, DESC is an array of dimension DLEN_. DESC is the * array descriptor to be set. * * M (global input) INTEGER * On entry, M specifies the number of rows of the matrix. * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the matrix. * N must be at least zero. * * IMB (global input) INTEGER * On entry, IMB specifies the row size of the first block of * the global matrix distribution. IMB must be at least one. * * INB (global input) INTEGER * On entry, INB specifies the column size of the first block * of the global matrix distribution. INB must be at least one. * * MB (global input) INTEGER * On entry, MB specifies the row size of the blocks used to * partition the matrix. MB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the column size of the blocks used to * partition the matrix. NB must be at least one. * * RSRC (global input) INTEGER * On entry, RSRC specifies the row coordinate of the process * that possesses the first row of the matrix. When RSRC = -1, * the data is not distributed but replicated, otherwise RSRC * must be at least zero and strictly less than NPROW. * * CSRC (global input) INTEGER * On entry, CSRC specifies the column coordinate of the pro- * cess that possesses the first column of the matrix. When * CSRC = -1, the data is not distributed but replicated, other- * wise CSRC must be at least zero and strictly less than NPCOL. * * CTXT (local input) INTEGER * On entry, CTXT specifies the BLACS context handle, indicating * the global communication context. The value of the context * itself is local. * * LLD (local input) INTEGER * On entry, LLD specifies the leading dimension of the local * array storing the local entries of the matrix. LLD must be at * least MAX( 1, Lr(1,M) ). * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ DESC[DTYPE_] = BLOCK_CYCLIC_2D_INB; DESC[CTXT_ ] = CTXT; DESC[M_ ] = M; DESC[N_ ] = N; DESC[IMB_ ] = IMB; DESC[INB_ ] = INB; DESC[MB_ ] = MB; DESC[NB_ ] = NB; DESC[RSRC_ ] = RSRC; DESC[CSRC_ ] = CSRC; DESC[LLD_ ] = LLD; /* * End of PB_Cdescset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzhemv.c0000640000175000017500000002233410363532303017775 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzhemv( PBTYP_T * TYPE, char * SIDE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR ) #else void PB_Ctzhemv( TYPE, SIDE, UPLO, M, N, K, IOFFD, ALPHA, A, LDA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IOFFD, K, LDA, LDXC, LDXR, LDYC, LDYR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Ctzhemv performs the matrix-vector operation * * y := alpha * A * x + y, * * where alpha is a scalar, x and y are n element vectors and A is an m * by n trapezoidal symmetric or Hermitian matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * YC (input/output) pointer to CHAR * On entry, YC is an array of dimension (LDYC,1) containing the * m by 1 vector YC. On exit, YC is overwritten by the partially * updated vector y. * * LDYC (input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, M ). * * YR (input/output) pointer to CHAR * On entry, YR is an array of dimension (LDYR,N) containing the * 1 by n vector YR. On exit, YR is overwritten by the partially * updated vector y. * * LDYR (input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least 1. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int i1, ione=1, j1, m1, mn, n1, size; GEMV_T gemv; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; one = TYPE->one; gemv = TYPE->Fgemv; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) { gemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( COTRAN ), &M, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fhemv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YC, i1, 0, LDYC, size ), &ione ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, Mptr( YC, i1, 0, LDYC, size ), &ione ); gemv( C2F_CHAR( COTRAN ), &m1, &n1, ALPHA, Mptr( A, i1, j1, LDA, size ), &LDA, Mptr( XC, i1, 0, LDXC, size ), &ione, one, Mptr( YR, 0, j1, LDYR, size ), &LDYR ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; one = TYPE->one; gemv = TYPE->Fgemv; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) { gemv( C2F_CHAR( NOTRAN ), &m1, &n1, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( COTRAN ), &m1, &n1, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } TYPE->Fhemv( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( A, m1, j1, LDA, size ), &LDA, Mptr( XC, m1, 0, LDXC, size ), &ione, one, Mptr( YC, m1, 0, LDYC, size ), &ione ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gemv( C2F_CHAR( NOTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, Mptr( XR, 0, j1, LDXR, size ), &LDXR, one, YC, &ione ); gemv( C2F_CHAR( COTRAN ), &M, &n1, ALPHA, Mptr( A, 0, j1, LDA, size ), &LDA, XC, &ione, one, Mptr( YR, 0, j1, LDYR, size ), &LDYR ); } } else { one = TYPE->one; gemv = TYPE->Fgemv; gemv( C2F_CHAR( NOTRAN ), &M, &N, ALPHA, A, &LDA, XR, &LDXR, one, YC, &ione ); gemv( C2F_CHAR( COTRAN ), &M, &N, ALPHA, A, &LDA, XC, &ione, one, YR, &LDYR ); } /* * End of PB_Ctzhemv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CScatterV.c0000640000175000017500000005664710363532303020231 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CScatterV( PBTYP_T * TYPE, char * DIRECA, int M, int N, char * A, int IA, int JA, int * DESCA, char * AROC, char * ALPHA, char * B, int IB, int JB, int * DESCB, char * BROC ) #else void PB_CScatterV( TYPE, DIRECA, M, N, A, IA, JA, DESCA, AROC, ALPHA, B, IB, JB, DESCB, BROC ) /* * .. Scalar Arguments .. */ char * ALPHA, * AROC, * BROC, * DIRECA; int IA, IB, JA, JB, M, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CScatterV disaggregates the one-dimensional submatrix sub( A ) de- * noting A( IA:IA+M-1, JA:JA+N-1 ) into a two-dimensional submatrix * sub( B ) denoting B( IB:IB+M-1, JB:JB+N-1 ) when AROC is equal to * BROC and B( IB:IB+N-1, JB:JB+M-1 ) otherwise: * * sub( B ) := alpha * sub( B ) + sub( A ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECA (global input) pointer to CHAR * On entry, DIRECA specifies the direction in which the rows * or columns of sub( A ) should be disaggregated as follows: * DIRECA = 'F' or 'f' forward or increasing, * DIRECA = 'B' or 'b' backward or decreasing. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and, * Ka is at least Lc( N, JA ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the submatrix * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix, * and a column matrix otherwise. * * ALPHA (local input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * B (local output) pointer to CHAR * On entry, A is an array of dimension (LLD_B, Kb), where LLD_B * is DESCB[LLD_], i.e. at least MAX( 1, Lr( M, IB ) ) when AROC * and BROC are equal, and MAX( 1, Lr( N, IB ) ) otherwise, and, * Kb is at least Lc( N, JB ) when AROC and BROC are equal, and * Lc( M, JB ) otherwise. On exit, this array contains the local * entries of the disaggregated submatrix sub( A ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BROC (global input) pointer to CHAR * On entry, BROC specifies the orientation of the submatrix * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row matrix, * and a column matrix otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one; int Afwd, Bbufld, Bcol, Bcurcol, Bcurrow, Bii, Bimb, Bimb1, Binb, Binb1, BisRow, Bjj, Bld, Bm, Bmb, Bmp, Bn, Bnb, Bnnxt, BnnxtL, Bnpre, Bnq, Brow, WAfr, ctxt, kb, mycol, mydist, mydistnb, myrow, nlen, npcol, nprow, offset, size, srcdist, stride, stridenb, tmp; MMADD_T add; MMSHFT_T shft; /* * .. Local Arrays .. */ int Bd0[DLEN_], WAd[DLEN_]; char * Bptr = NULL, * Bbuf = NULL, * Bbufptr = NULL, * WA = NULL; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ); BisRow = ( Mupcase( BROC [0] ) == CROW ); if( Mupcase( AROC[0] ) == Mupcase( BROC[0] ) ) { Bm = M; Bn = N; } else { Bm = N; Bn = M; } /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ... */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_]; Bnb = DESCB[NB_]; Bimb1 = PB_Cfirstnb( Bm, IB, Bimb, Bmb ); Bmp = PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( Bn, JB, Binb, Bnb ); Bnq = PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol ); Bld = DESCB[LLD_]; size = TYPE->size; one = TYPE->one; if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr = Mptr( B, Bii, Bjj, Bld, size ); if( BisRow ) { /* * Compute descriptor Bd0 for sub( B ). */ if( Afwd ) { Bcurrow = Brow; } else { Bcurrow = PB_Cindxg2p( Bm-1, Bimb1, Bmb, Brow, Brow, nprow ); } PB_Cdescset( Bd0, Bm, Bn, Bm, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); /* * Align sub( A ) with sub( B ) */ PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bm, A, IA, JA, DESCA, AROC, &WA, WAd, &WAfr ); /* * Disaggregate WA = sub( A ) */ if( ( Brow == -1 ) || ( nprow == 1 ) ) { /* * sub( B ) is replicated */ if( Bnq > 0 ) TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld ); if( WAfr ) free( WA ); return; } if( !( PB_Cspan( Bm, 0, Bimb1, Bmb, Brow, nprow ) ) ) { /* * sub( B ) spans only one process row */ if( ( myrow == Brow ) && ( Bnq > 0 ) ) TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld ); if( WAfr ) free( WA ); return; } /* * sub( B ) spans more than one process row */ if( Afwd ) { /* * sub( B ) is not replicated and spans more than one process row. Forward row * dissagregation starts in the process row where the global row IB resides. */ if( ( Bmp > 0 ) && ( Bnq > 0 ) ) { /* * Compute how may rows are before and after me (Bnpre and Bnnxt). */ Bnpre = PB_Cnpreroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow ); Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow ); nlen = Bmp + Bnnxt; if( Bnpre > 0 ) { /* * If I don't own the row IB, then allocate and receive a buffer of length * ( Bmp + Bnnxt ) * Bnq from the previous process row. */ Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size ); Bbufld = nlen; TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModSub1( myrow, nprow ), mycol ); kb = Bmb; } else { /* * Otherwise, reuse WA. */ Bbufptr = Bbuf = WA; Bbufld = WAd[LLD_]; kb = Bimb1; } /* * Unpack the received data */ if( Bnnxt > 0 ) { /* * If some rows reside in the process row following mine, then unpack my piece, * sort the buffer and send those Bnnxt rows to the next process row. */ add = TYPE->Fmmadd; shft = TYPE->Frshft; mydistnb = ( nprow - MModSub( myrow, Brow, nprow ) - 1 ); stride = ( mydistnb *= Bmb ) * size; do { kb = MIN( kb, nlen ); add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); nlen -= kb; offset = -kb; shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld ); Bptr += kb*size; Bbufptr += stride; nlen -= mydistnb; kb = Bmb; } while( nlen > 0 ); /* * send buffer of length Bnnxt * Bnq to the next process row. */ TYPE->Cgesd2d( ctxt, Bnnxt, Bnq, Bbuf, Bbufld, MModAdd1( myrow, nprow ), mycol ); } else { /* * Otherwise, I must be the last process involved in the operation, so no * unpacking is necessary. */ TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); } /* * If I don't own the row IB, then release the dynamically allocated buffer. */ if( Bnpre > 0 ) free( Bbuf ); } if( WAfr ) free( WA ); } else { if( ( Bmp > 0 ) && ( Bnq > 0 ) ) { /* * Compute how may rows are before and after me (Bnpre, Bnnxt). */ Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow ); BnnxtL = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, Bcurrow, Brow, nprow ); Bnnxt = MModSub( Bnnxt, BnnxtL, Bm ); Bnpre = ( nlen = Bm - Bnnxt ) - Bmp; if( Bnnxt > 0 ) { /* * If I don't own the row IB+M-1, then allocate and receive a buffer of length * ( Bm - Bnnxt ) * Bnq from the next process row. */ Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size ); Bbufld = nlen; TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModAdd1( myrow, nprow ), mycol ); } else { /* * Otherwise, reuse WA. */ Bbufptr = Bbuf = WA; Bbufld = WAd[LLD_]; } /* * Unpack the received data */ if( Bnpre > 0 ) { /* * If some rows reside in the process row preceeding mine, then unpack my piece, * sort the buffer and send those Bnpre rows to the previous process row. */ add = TYPE->Fmmadd; shft = TYPE->Frshft; mydist = MModSub( Bcurrow, myrow, nprow ); srcdist = MModSub( Bcurrow, Brow, nprow ); stridenb = ( nprow - mydist - 1 ) * Bmb; if( mydist < srcdist ) { tmp = ( Bimb1 + ( srcdist - mydist - 1 ) * Bmb ); Bbufptr += tmp * size; nlen -= tmp; kb = Bmb; } else if( mydist == srcdist ) { kb = Bimb1; } else { Bbufptr += stridenb * size; nlen -= stridenb; kb = Bmb; } do { kb = MIN( kb, nlen ); add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); nlen -= kb; offset = -kb; shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld ); Bptr += kb*size; Bbufptr += stridenb*size; nlen -= stridenb; kb = Bmb; } while( nlen > 0 ); /* * send buffer of length Bnpre * Bnq to the previous process row. */ TYPE->Cgesd2d( ctxt, Bnpre, Bnq, Bbuf, Bbufld, MModSub1( myrow, nprow ), mycol ); } else { /* * Otherwise, I must be the last process involved in the operation, so no * unpacking is necessary. */ TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); } /* * If I don't own the row IB+M-1, then release the dynamically allocated buffer. */ if( Bnnxt > 0 ) free( Bbuf ); } if( WAfr ) free( WA ); } } else { /* * Compute descriptor Bd0 for sub( B ). */ if( Afwd ) { Bcurcol = Bcol; } else { Bcurcol = PB_Cindxg2p( Bn-1, Binb1, Bnb, Bcol, Bcol, npcol ); } PB_Cdescset( Bd0, Bm, Bn, Bimb1, Bn, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); /* * Align sub( A ) with sub( B ) */ PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bn, A, IA, JA, DESCA, AROC, &WA, WAd, &WAfr ); /* * Disaggregate WA = sub( A ) */ if( ( Bcol == -1 ) || ( npcol == 1 ) ) { /* * sub( B ) is replicated */ if( Bmp > 0 ) TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld ); if( WAfr ) free( WA ); return; } if( !( PB_Cspan( Bn, 0, Binb1, Bnb, Bcol, npcol ) ) ) { /* * sub( B ) spans only one process column */ if( ( mycol == Bcol ) && ( Bmp > 0 ) ) TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld ); if( WAfr ) free( WA ); return; } /* * sub( B ) spans more than one process column */ if( Afwd ) { /* * sub( B ) is not replicated and spans more than one process column. Forward * column dissagregation starts in the process column where the global column * JB resides. */ if( ( Bmp > 0 ) && ( Bnq > 0 ) ) { /* * Compute how may columns are before and after me (Bnpre and Bnnxt). */ Bnpre = PB_Cnpreroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol ); Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol ); nlen = Bnq + Bnnxt; if( Bnpre > 0 ) { /* * If I don't own the column JB, then allocate and receive a buffer of length * Bmp * ( Bnq + Bnnxt ) from the previous process column. */ Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size ); Bbufld = Bmp; TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow, MModSub1( mycol, npcol ) ); kb = Bnb; } else { /* * Otherwise, reuse WA. */ Bbufptr = Bbuf = WA; Bbufld = WAd[LLD_]; kb = Binb1; } /* * Unpack the received data */ if( Bnnxt > 0 ) { /* * If some columns reside in the process column following mine, then unpack my * piece, sort the buffer and send those Bnnxt columns to the next process * column. */ add = TYPE->Fmmadd; shft = TYPE->Fcshft; mydistnb = ( npcol - MModSub( mycol, Bcol, npcol ) - 1 ); stride = ( mydistnb *= Bnb ) * Bbufld * size; do { kb = MIN( kb, nlen ); add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); nlen -= kb; offset = -kb; shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld ); Bptr += kb*Bld*size; Bbufptr += stride; nlen -= mydistnb; kb = Bnb; } while( nlen > 0 ); /* * send buffer of length Bmp * Bnnxt to the next process column. */ TYPE->Cgesd2d( ctxt, Bmp, Bnnxt, Bbuf, Bbufld, myrow, MModAdd1( mycol, npcol ) ); } else { /* * Otherwise, I must be the last process involved in the operation, so no * unpacking is necessary. */ TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); } /* * If I don't own the column JB, then release the dynamically allocated buffer. */ if( Bnpre > 0 ) free( Bbuf ); } if( WAfr ) free( WA ); } else { if( ( Bmp > 0 ) && ( Bnq > 0 ) ) { /* * Compute how may rows are before and after me (Bnpre, Bnnxt). */ Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol ); BnnxtL = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, Bcurcol, Bcol, npcol ); Bnnxt = MModSub( Bnnxt, BnnxtL, Bn ); Bnpre = ( nlen = Bn - Bnnxt ) - Bnq; if( Bnnxt > 0 ) { /* * If I don't own the column JB+N-1, then allocate and receive a buffer of * length Bmp * ( Bn - Bnnxt ) from the next process column. */ Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size ); Bbufld = Bmp; TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow, MModAdd1( mycol, npcol ) ); } else { /* * Otherwise, reuse WA. */ Bbufptr = Bbuf = WA; Bbufld = WAd[LLD_]; } /* * Unpack the received data */ if( Bnpre > 0 ) { /* * If some columns reside in the process column preceeding mine, then unpack my * piece, sort the buffer and send those Bnpre columns to the previous process * column. */ add = TYPE->Fmmadd; shft = TYPE->Fcshft; mydist = MModSub( Bcurcol, mycol, npcol ); srcdist = MModSub( Bcurcol, Bcol, npcol ); stridenb = ( npcol - mydist - 1 ) * Bnb; if( mydist < srcdist ) { tmp = ( Binb1 + ( srcdist - mydist - 1 ) * Bnb ); Bbufptr += tmp * Bbufld * size; nlen -= tmp; kb = Bnb; } else if( mydist == srcdist ) { kb = Binb1; } else { Bbufptr += stridenb * Bbufld * size; nlen -= stridenb; kb = Bnb; } do { kb = MIN( kb, nlen ); add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); nlen -= kb; offset = -kb; shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld ); Bptr += kb * Bld * size; Bbufptr += stridenb * Bbufld * size; nlen -= stridenb; kb = Bnb; } while( nlen > 0 ); /* * send buffer of length Bmp * Bnpre to the previous process column. */ TYPE->Cgesd2d( ctxt, Bmp, Bnpre, Bbuf, Bbufld, myrow, MModSub1( mycol, npcol ) ); } else { /* * Otherwise, I must be the last process involved in the operation, so no * unpacking is necessary. */ TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld ); } /* * If I don't own the column JB+N-1, then release the dynamically allocated * buffer. */ if( Bnnxt > 0 ) free( Bbuf ); } if( WAfr ) free( WA ); } } /* * End of PB_CScatterV */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpsym.c0000640000175000017500000005650310363532303017455 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpsym( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE, char * UPLO, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * XC, int LDXC, char * XR, int LDXR, char * YC, int LDYC, char * YR, int LDYR, TZSYM_T SYM ) #else void PB_Cpsym( TYPE, UTYP, SIDE, UPLO, N, K, ALPHA, A, IA, JA, DESCA, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR, SYM ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO; int IA, JA, K, LDXC, LDXR, LDYC, LDYR, N; char * ALPHA; PBTYP_T * TYPE, * UTYP; TZSYM_T SYM; /* * .. Array Arguments .. */ int * DESCA; char * A, * XC, * XR, * YC, * YR; #endif { /* * Purpose * ======= * * PB_Cpsym performs a symmetric or Hermitian matrix-matrix or matrix- * vector multiplication. In the following, sub( A ) denotes the symme- * tric or Hermitian submatrix operand A( IA:IA+N-1, JA:JA+N-1 ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * UTYP (local input) pointer to a PBTYP_T structure * On entry, UTYP is a pointer to a structure of type PBTYP_T, * that contains type information for the Y's (See pblas.h). * * SIDE (global input) pointer to CHAR * On entry, SIDE specifies whether op( sub( A ) ) multiplies * its operand X from the left or right as follows: * * SIDE = 'L' or 'l' Y := alpha*op( sub( A ) )*X + Y, * * SIDE = 'R' or 'r' Y := alpha*X*op( sub( A ) ) + Y. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric or Hermitian submatrix sub( A ) are to be * referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the sym- * metric or Hermitian submatrix sub( A ) * are to be referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the local number of columns of the lo- * cal array XC and the local number of rows of the local array * XR. K mut be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the @(syhec) submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * XC (local input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,K). Before entry, * this array contains the local entries of the matrix XC. * * LDXC (local input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, Lp( IA, N ) ). * * XR (local input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,Kx), where Kx is * at least Lc( JA, N ). Before entry, this array contains the * local entries of the matrix XR. * * LDXR (local input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least max( 1, K ). * * YC (local input/local output) pointer to CHAR * On entry, YC is an array of dimension (LDYC,K). Before entry, * this array contains the local entries of the matrix YC. On * exit, this array contains the updated vector YC. * * LDYC (local input) INTEGER * On entry, LDYC specifies the leading dimension of the array * YC. LDYC must be at least max( 1, Lp( IA, N ) ). * * YR (local input/local output) pointer to CHAR * On entry, YR is an array of dimension (LDYR,Ky), where Ky is * at least Lc( JA, N ). Before entry, this array contains the * local entries of the matrix YR. On exit, this array contains * the updated vector YR. * * LDYR (local input) INTEGER * On entry, LDYR specifies the leading dimension of the array * YR. LDYR must be at least max( 1, K ). * * SYM (local input) pointer to function of type TZSYM_T * On entry, SYM specifies the function performing the symmetric * or Hermitian matrix-vector or matrix-matrix multiply of a * single block. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq, Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl, IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1, YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp, jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; /* .. * .. Executable Statements .. * */ /* * Quick return if possible */ if( N <= 0 ) return; /* * Retrieve process grid information */ Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); /* * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in * all processes. */ if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) ); IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) ); Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ]; size = TYPE->size; if( IsRowRepl && IsColRepl ) { SYM( TYPE, SIDE, UPLO, Amp, Anq, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ); return; } XCinc = size; XRinc = LDXR * size; YCinc = UTYP->size; YRinc = LDYR * UTYP->size; upper = ( Mupcase( UPLO[0] ) == CUPPER ); lower = ( Mupcase( UPLO[0] ) == CLOWER ); /* * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, * iupp, and upp. */ PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, &iupp, &upp ); iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); pmb = ( IsRowRepl ? Amb : nprow * Amb ); qnb = ( IsColRepl ? Anb : npcol * Anb ); /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); /* * Go through the table looking for blocks owning diagonal entries. */ if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ SYM( TYPE, SIDE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); /* * Decide whether one should go south or east in the table: Go east if the * block below the current one only owns lower entries. If this block, however, * owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); if( GoSouth ) { /* * When the upper triangular part of sub( A ) should be operated with and * one is planning to go south in the table, it is neccessary to take care * of the remaining columns of these imbloc rows immediately. */ if( upper && ( Anq > inbloc ) ) { tmp1 = Anq - inbloc; SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(Xjj+inbloc)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(Xjj+inbloc)*YRinc, LDYR ); } Aii += imbloc; Xii += imbloc; m1 -= imbloc; } else { /* * When the lower triangular part of sub( A ) should be operated with and * one is planning to go east in the table, it is neccessary to take care * of the remaining rows of these inbloc columns immediately. */ if( lower && ( Amp > imbloc ) ) { tmp1 = Amp - imbloc; SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0, ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, XC+(Xii+imbloc)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, YC+(Xii+imbloc)*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); } Ajj += inbloc; Xjj += inbloc; n1 -= inbloc; } } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the local row indexes in A and XC. */ lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row indexes in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Operate with the upper triangular part of sub( A ) we just skipped when * necessary. */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYM( TYPE, SIDE, UPLO, mbloc, inbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; ioffd += mbloc; Xoffi = ioffx; ioffx += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); tmp1 = Aoffi - Aii + 1; m1 -= tmp1; n1 -= inbloc; lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as well as * the local column index in A and XR. */ lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc; /* * While there are blocks remaining that own lower entries, keep going east. * Adjust the current LCM value as well as the local column index in A and XR. */ while( ( nblks > 0 ) && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; if( lower && ( tmp1 > 0 ) ) { SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); Ajj += tmp1; Xjj += tmp1; n1 -= tmp1; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return; /* * lcmt00 >= low. The current block owns either diagonals or upper entries. * Save the current position in the LCM table. After this row has been * completely taken care of, re-start from this column and the next row of * the LCM table. */ lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj; nbloc = Anb; while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ if( nblkd == 1 ) nbloc = lnbloc; SYM( TYPE, SIDE, UPLO, imbloc, nbloc, K, lcmt, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR ); lcmt00 = lcmt; lcmt += qnb; nblks = nblkd; nblkd--; Aoffj = joffd; joffd += nbloc; Xoffj = joffx; joffx += nbloc; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = n1 - joffd + Ajj - 1; if( upper && ( tmp1 > 0 ) ) SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0, ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR ); tmp1 = Aoffj - Ajj + 1; m1 -= imbloc; n1 -= tmp1; lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc; /* * Operate with the lower triangular part of sub( A ). */ if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0, ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, XC+(Xoffi+1)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, YC+(Xoffi+1)*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * Loop over the remaining columns of the LCM table. */ nbloc = Anb; while( nblks > 0 ) { if( nblks == 1 ) nbloc = lnbloc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the local row index in A and XC. */ while( ( mblks > 0 ) && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } /* * Operate with the upper triangular part of sub( A ). */ tmp1 = MIN( Aoffi, iimax ) - Aii + 1; if( upper && ( tmp1 > 0 ) ) { SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); Aii += tmp1; Xii += tmp1; m1 -= tmp1; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return; /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; mbloc = Amb; while( ( mblkd > 0 ) && ( lcmt >= low ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ if( mblkd == 1 ) mbloc = lmbloc; SYM( TYPE, SIDE, UPLO, mbloc, nbloc, K, lcmt, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); lcmt00 = lcmt; lcmt -= pmb; mblks = mblkd; mblkd--; Aoffi = ioffd; Xoffi = ioffx; ioffd += mbloc; ioffx += mbloc; } /* * Operate with the lower triangular part of sub( A ). */ tmp1 = m1 - ioffd + Aii - 1; if( lower && ( tmp1 > 0 ) ) SYM( TYPE, SIDE, ALL, tmp1, nbloc, K, 0, ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); tmp1 = MIN( Aoffi, iimax ) - Aii + 1; m1 -= tmp1; n1 -= nbloc; lcmt00 += qnb; nblks--; Aoffj += nbloc; Xoffj += nbloc; /* * Operate with the upper triangular part of sub( A ). */ if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); Aii = Aoffi + 1; Ajj = Aoffj + 1; Xii = Xoffi + 1; Xjj = Xoffj + 1; } /* * End of PB_Cpsym */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpswapNN.c0000640000175000017500000004220010363532303020040 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpswapNN( PBTYP_T * TYPE, int N, char * X, int IX, int JX, int * DESCX, int INCX, char * Y, int IY, int JY, int * DESCY, int INCY ) #else void PB_CpswapNN( TYPE, N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int INCX, INCY, IX, IY, JX, JY, N; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCX, * DESCY; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CpswapNN swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Both subvectors are assumed to be not distributed. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) pointer to CHAR * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) pointer to CHAR * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * top; int RRorCC, XYm, XYn, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnprocsR, XprocR, Xrow, Ycol, Yii, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnprocsR, YprocR, Yrow, csrc, ctxt, mycol, myrow, npcol, nprow, rsrc, size; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 ) { Xld = DESCX[LLD_]; Xlinc = Xld; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); } else { Xld = DESCX[LLD_]; Xlinc = 1; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 ) { Yld = DESCY[LLD_]; Ylinc = Yld; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); } else { Yld = DESCY[LLD_]; Ylinc = 1; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); } /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * Neither sub( X ) nor sub( Y ) are distributed */ if( !XisR ) { /* * sub( X ) is not replicated */ if( !( YisR ) ) { /* * sub( Y ) is not replicated */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) /* * If I am not in XprocR or YprocR, then return immediately */ return; size = TYPE->size; if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( XprocR == YprocR ) { /* * sub( X ) and sub( Y ) are in the same process row or column */ TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); } else { /* * sub( X ) and sub( Y ) are in a different process row or column */ if( XmyprocR == XprocR ) { /* * Send sub( X ) to where sub( Y ) resides, and receive sub( Y ) from the same * location. */ if( XisRow ) { TYPE->Cgesd2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, YprocR, XmyprocD ); TYPE->Cgerv2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ), Xld, YprocR, XmyprocD ); } else { TYPE->Cgesd2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, XmyprocD, YprocR ); TYPE->Cgerv2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ), Xld, XmyprocD, YprocR ); } } if( YmyprocR == YprocR ) { /* * Send sub( Y ) to where sub( X ) resides, and receive sub( X ) from the same * location. */ if( YisRow ) { TYPE->Cgesd2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); TYPE->Cgerv2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, YmyprocD ); } else { TYPE->Cgesd2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); TYPE->Cgerv2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ), Yld, YmyprocD, XprocR ); } } } } else { /* * sub( X ) and sub( Y ) are not both row or column vectors */ if( XisRow ) { XYm = 1; XYn = N; Xscope = CROW; Yscope = CCOLUMN; rsrc = XprocR; csrc = YprocR; } else { XYm = N; XYn = 1; Xscope = CCOLUMN; Yscope = CROW; rsrc = YprocR; csrc = XprocR; } if( ( XmyprocR == XprocR ) && ( YmyprocR == YprocR ) ) { /* * If I am at the intersection of the process row and column, then swap and * broadcast sub( X ) and sub( Y ) in their respective process scope. */ TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET ); TYPE->Cgebs2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld ); top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET ); TYPE->Cgebs2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj, Yld, size ), Yld ); } else if( XmyprocR == XprocR ) { top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET ); TYPE->Cgebr2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld, rsrc, csrc ); } else if( YmyprocR == YprocR ) { top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET ); TYPE->Cgebr2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); } } } else { /* * sub( Y ) is replicated */ size = TYPE->size; if( YisRow ) { XYm = 1; XYn = N; } else { XYm = N; XYn = 1; } if( XmyprocR == XprocR ) { /* * If I am in the process row (resp. column) owning sub( X ), then swap and * broadcast sub( Y ) in my column (resp. row). */ TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj, Yld, size ), Yld ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj, Yld, size ), Yld ); } } else { /* * Otherwise, receive sub( Y ) */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj, Yld, size ), Yld, XmyprocD, XprocR ); } } } } else { /* * sub( X ) is replicated */ size = TYPE->size; if( YisR || ( YmyprocR == YprocR ) ) { /* * If I own a piece of sub( Y ), then swap */ TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc ); } if( !YisR ) { /* * If sub( Y ) is not replicated, then broadcast the result to the other * processes that own a piece of sub( X ), but were not involved in the * above swap operation. */ if( XisRow ) { XYm = 1; XYn = N; } else { XYm = N; XYn = 1; } if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld ); else TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj, Xld, size ), Xld, YmyprocD, YprocR ); } } } /* * End of PB_CpswapNN */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzsyr.c0000640000175000017500000001644510363532303017661 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzsyr( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * XC, int LDXC, char * XR, int LDXR, char * A, int LDA ) #else void PB_Ctzsyr( TYPE, UPLO, M, N, K, IOFFD, ALPHA, XC, LDXC, XR, LDXR, A, LDA ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDA, LDXC, LDXR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR; #endif { /* * Purpose * ======= * * PB_Ctzsyr performs the trapezoidal symmetric or Hermitian rank 1 ope- * ration: * * A := alpha * XC * XR + A or A := alpha * XC * conjg( XR ) + A, * * where alpha is a scalar, XC is an m element vector, XR is an n ele- * ment vector and A is an m by n trapezoidal symmetric or Hermitian ma- * trix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (see pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * A (input/output) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is updated. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ione=1, j1, m1, mn, n1, size; GERU_T geru; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; geru = TYPE->Fgeru; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) geru( &M, &n1, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fsyr( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( A, i1, j1, LDA, size ), &LDA ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; geru( &m1, &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, i1, j1, LDA, size ), &LDA ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; geru = TYPE->Fgeru; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) geru( &m1, &n1, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); TYPE->Fsyr( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, m1, 0, LDXC, size ), &ione, Mptr( A, m1, j1, LDA, size ), &LDA ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; geru( &M, &n1, ALPHA, XC, &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, 0, j1, LDA, size ), &LDA ); } } else { TYPE->Fgeru( &M, &N, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); } /* * End of PB_Ctzsyr */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpaxpbyND.c0000640000175000017500000006662010363532303020213 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpaxpbyND( PBTYP_T * TYPE, char * CONJUG, int M, int N, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * AROC, char * BETA, char * B, int IB, int JB, int * DESCB, char * BROC ) #else void PB_CpaxpbyND( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B, IB, JB, DESCB, BROC ) /* * .. Scalar Arguments .. */ char * AROC, * BROC, * CONJUG; int IA, IB, JA, JB, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB; char * A, * B; #endif { /* * Purpose * ======= * * PB_CpaxpbyND adds one submatrix to another, * * sub( B ) := beta * sub( B ) + alpha * sub( A ), or, * * sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ), * * where sub( A ) is not distributed and sub( B ) is distributed. * * sub( A ) always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or * 'r' sub( A ) resides in a process row, otherwise sub( A ) resides in * a process column. When sub( A ) resides in a process row and BROC is * 'R' or 'r' or sub( A ) resides in a process column and BROC is 'C' or * 'c', then sub( B ) denotes B( IB:IB+M-1, JB:JB+N-1 ), and otherwise * sub( B ) denotes B(IB:IB+N-1,JB:JB+M-1). * otherwise. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether conjg( sub( A ) ) or * sub( A ) should be added to sub( B ) as follows: * CONJUG = 'N' or 'n': * sub( B ) := beta*sub( B ) + alpha*sub( A ), * otherwise * sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A cor- * responding to the entries of the submatrix sub( A ) need not * be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A * is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least * Lc( 1, JA+N-1 ). Before entry, this array contains the local * entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * AROC (global input) pointer to CHAR * On entry, AROC specifies the orientation of the subvector * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector, * and a column vector otherwise. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is sup- * plied as zero then the local entries of the array B corres- * ponding to the entries of the submatrix sub( B ) need not be * set on input. * * B (local input/local output) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where LLD_B * is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and * sub( B ) are both distributed along a process column or a * process row. In that case, Kb is at least Lc( 1, JB+N-1 ). * Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and * Kb is at least Lc( 1, JB+M-1 ). Before entry, this array * contains the local entries of the matrix B. On exit, sub( B ) * is overwritten with the updated submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BROC (global input) pointer to CHAR * On entry, BROC specifies the orientation of the subvector * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector, * and a column vector otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * one, * top, * zero; int Acol, Aii, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR, AnprocsD, AprocR, Aroc, Arow, Bcol, Bii, Binb1D, BisR, BisRow, Bjj, Bld, BmyprocD, BmyprocR, BnD, BnbD, BnpD, BnprocsD, BprocD, BprocR, Broc, Brow, RRorCC, ctxt, k, kbb, kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size; MMADD_T add; /* * .. Local Arrays .. */ char * buf = NULL; /* .. * .. Executable Statements .. * */ /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... */ PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 ) { BnD = N; Ald = DESCA[LLD_]; AmyprocD = mycol; AnprocsD = npcol; AmyprocR = myrow; AprocR = Arow; AisR = ( ( Arow == -1 ) || ( nprow == 1 ) ); } else { BnD = M; Ald = DESCA[LLD_]; AmyprocD = myrow; AnprocsD = nprow; AmyprocR = mycol; AprocR = Acol; AisR = ( ( Acol == -1 ) || ( npcol == 1 ) ); } /* * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ... */ PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 ) { BnbD = DESCB[NB_]; Bld = DESCB[LLD_]; BprocD = Bcol; BmyprocD = mycol; BnprocsD = npcol; BprocR = Brow; BmyprocR = myrow; BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) ); Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD ); } else { BnbD = DESCB[MB_]; Bld = DESCB[LLD_]; BprocD = Brow; BmyprocD = myrow; BnprocsD = nprow; BprocR = Bcol; BmyprocR = mycol; BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) ); Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD ); } /* * Are sub( A ) and sub( B ) both row or column vectors ? */ RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) ); /* * sub( A ) is not distributed and sub( B ) is distributed */ if( !( AisR ) ) { /* * sub( A ) is not replicated. Since this operation is local if sub( A ) and * sub( B ) are both row or column vectors, choose BprocR = AprocR when RRorCC, * and BprocR = 0 otherwise. */ if( BisR ) { BprocR = ( ( RRorCC ) ? AprocR : 0 ); } /* * Now, it is just like sub( B ) is not replicated, this information however is * kept in BisR for later use. */ size = TYPE->size; if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) ) { zero = TYPE->zero; one = TYPE->one; /* * sub( A ) and sub( B ) are both row or column vectors */ if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD ); /* * sub( A ) and sub( B ) are in the same process row or column */ if( AprocR == BprocR ) { /* * In each process, the non distributed part of sub( A ) is added to sub( B ). */ if( BnpD > 0 ) { Broc = BprocD; if( AisRow ) { kk = Bjj; ktmp = JA + N; kn = JA + Binb1D; } else { kk = Bii; ktmp = IA + M; kn = IA + Binb1D; } if( BmyprocD == Broc ) { if( AisRow ) add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Binb1D; } Broc = MModAdd1( Broc, BnprocsD ); for( k = kn; k < ktmp; k += BnbD ) { kbb = ktmp - k; kbb = MIN( kbb, BnbD ); if( BmyprocD == Broc ) { if( AisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ), &Ald, BETA, Mptr( B, Bii, kk, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ), &Ald, BETA, Mptr( B, kk, Bjj, Bld, size ), &Bld ); kk += kbb; } Broc = MModAdd1( Broc, BnprocsD ); } } } else { /* * sub( A ) and sub( B ) are in a different process row or column */ if( BmyprocR == BprocR ) { /* * If I own a piece of sub( B ), then receive the relevant piece of sub( A ) * from the corresponding process row or column where it resides. */ if( BnpD > 0 ) { if( BisRow ) { buf = PB_Cmalloc( M * BnpD * size ); TYPE->Cgerv2d( ctxt, M, BnpD, buf, M, AprocR, BmyprocD ); add( &M, &BnpD, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); if( buf ) free( buf ); } else { buf = PB_Cmalloc( BnpD * N * size ); TYPE->Cgerv2d( ctxt, BnpD, N, buf, BnpD, BmyprocD, AprocR ); add( &BnpD, &N, ALPHA, buf, &BnpD, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); if( buf ) free( buf ); } } } if( AmyprocR == AprocR ) { /* * If I own sub( A ), then pack and send the distributed part that should be * added to the distributed part of sub( B ) that resides in my row or column. */ if( BnpD > 0 ) { if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; buf = PB_Cmalloc( M * BnpD * size ); } else { ktmp = IA + M; kn = IA + Binb1D; buf = PB_Cmalloc( BnpD * N * size ); } Broc = BprocD; kk = 0; if( BmyprocD == Broc ) { if( AisRow ) add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, zero, buf, &M ); else add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, zero, buf, &BnpD ); kk += Binb1D; } Broc = MModAdd1( Broc, BnprocsD ); for( k = kn; k < ktmp; k += BnbD ) { kbb = ktmp - k; kbb = MIN( kbb, BnbD ); if( BmyprocD == Broc ) { if( AisRow ) add( &M, &kbb, one, Mptr( A, Aii, k, Ald, size ), &Ald, zero, Mptr( buf, 0, kk, M, size ), &M ); else add( &kbb, &N, one, Mptr( A, k, Ajj, Ald, size ), &Ald, zero, Mptr( buf, kk, 0, BnpD, size ), &BnpD ); kk += kbb; } Broc = MModAdd1( Broc, BnprocsD ); } if( AisRow ) TYPE->Cgesd2d( ctxt, M, BnpD, buf, M, BprocR, AmyprocD ); else TYPE->Cgesd2d( ctxt, BnpD, N, buf, BnpD, AmyprocD, BprocR ); if( buf ) free( buf ); } } } } else { /* * sub( A ) and sub( B ) are not both row or column vectors */ if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; Aroc = 0; if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; } else { ktmp = IA + M; kn = IA + Binb1D; } /* * Loop over the processes in which sub( B ) resides, for each process find the * next process Xroc. Exchange and add the data. */ for( p = 0; p < BnprocsD; p++ ) { mydist = MModSub( p, BprocD, BnprocsD ); myproc = MModAdd( BprocD, mydist, BnprocsD ); if( ( AprocR == p ) && ( BprocR == Aroc ) ) { if( ( AmyprocR == p ) && ( AmyprocD == Aroc ) ) { /* * local add at the intersection of the process cross */ BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD, BnprocsD ); if( BnpD > 0 ) { Broc = BprocD; kk = ( AisRow ? Bii : Bjj ); if( myproc == Broc ) { if( AisRow ) add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Binb1D; } Broc = MModAdd1( Broc, BnprocsD ); for( k = kn; k < ktmp; k += BnbD ) { kbb = ktmp - k; kbb = MIN( kbb, BnbD ); if( myproc == Broc ) { if( AisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ), &Ald, BETA, Mptr( B, kk, Bjj, Bld, size ), &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, kk, Bld, size ), &Bld ); kk += kbb; } Broc = MModAdd1( Broc, BnprocsD ); } } } } else { /* * Message exchange */ if( ( BmyprocR == BprocR ) && ( BmyprocD == p ) ) { BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD, BnprocsD ); if( BnpD > 0 ) { if( AisRow ) { buf = PB_Cmalloc( M * BnpD * size ); TYPE->Cgerv2d( ctxt, BnpD, M, buf, BnpD, AprocR, Aroc ); TYPE->Fmmadd( &BnpD, &M, ALPHA, buf, &BnpD, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } else { buf = PB_Cmalloc( BnpD * N * size ); TYPE->Cgerv2d( ctxt, N, BnpD, buf, N, Aroc, AprocR ); TYPE->Fmmadd( &N, &BnpD, ALPHA, buf, &N, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); } if( buf ) free( buf ); } } if( ( AmyprocR == AprocR ) && ( AmyprocD == Aroc ) ) { BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD, BnprocsD ); if( BnpD > 0 ) { if( AisRow ) buf = PB_Cmalloc( M * BnpD * size ); else buf = PB_Cmalloc( BnpD * N * size ); Broc = BprocD; kk = 0; if( myproc == Broc ) { if( AisRow ) add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, zero, buf, &BnpD ); else add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald, size ), &Ald, zero, buf, &N ); kk += Binb1D; } Broc = MModAdd1( Broc, BnprocsD ); for( k = kn; k < ktmp; k += BnbD ) { kbb = ktmp - k; kbb = MIN( kbb, BnbD ); if( myproc == Broc ) { if( AisRow ) add( &M, &kbb, one, Mptr( A, Aii, k, Ald, size ), &Ald, zero, Mptr( buf, kk, 0, BnpD, size ), &BnpD ); else add( &kbb, &N, one, Mptr( A, k, Ajj, Ald, size ), &Ald, zero, Mptr( buf, 0, kk, N, size ), &N ); kk += kbb; } Broc = MModAdd1( Broc, BnprocsD ); } if( AisRow ) TYPE->Cgesd2d( ctxt, BnpD, M, buf, BnpD, p, BprocR ); else TYPE->Cgesd2d( ctxt, N, BnpD, buf, N, BprocR, p ); if( buf ) free( buf ); } } } Aroc = MModAdd1( Aroc, AnprocsD ); } } } if( BisR ) { /* * Replicate sub( B ) */ BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD ); if( BnpD > 0 ) { if( BisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( BmyprocR == BprocR ) TYPE->Cgebs2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD, Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD, Mptr( B, Bii, Bjj, Bld, size ), Bld, BprocR, BmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( BmyprocR == BprocR ) TYPE->Cgebs2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ), Mptr( B, Bii, Bjj, Bld, size ), Bld ); else TYPE->Cgebr2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ), Mptr( B, Bii, Bjj, Bld, size ), Bld, BmyprocD, BprocR ); } } } } else { /* * sub( A ) is replicated in every process. Add the data in process row or * column BprocR when sub( B ) is not replicated and in every process otherwise. */ if( !( BisR ) && ( BmyprocR != BprocR ) ) return; size = TYPE->size; if( RRorCC ) { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd; else add = TYPE->Fmmadd; } else { if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd; else add = TYPE->Fmmtadd; } Broc = BprocD; kk = ( BisRow ? Bjj : Bii ); if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; } else { ktmp = IA + M; kn = IA + Binb1D; } if( BmyprocD == Broc ) { if( AisRow ) add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); else add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld ); kk += Binb1D; } Broc = MModAdd1( Broc, BnprocsD ); for( k = kn; k < ktmp; k += BnbD ) { kbb = ktmp - k; kbb = MIN( kbb, BnbD ); if( BmyprocD == Broc ) { if( BisRow ) { buf = Mptr( B, Bii, kk, Bld, size ); } else { buf = Mptr( B, kk, Bjj, Bld, size ); } if( AisRow ) add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ), &Ald, BETA, buf, &Bld ); else add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ), &Ald, BETA, buf, &Bld ); kk += kbb; } Broc = MModAdd1( Broc, BnprocsD ); } } /* * End of PB_CpaxpbyND */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzher.c0000640000175000017500000001644210363532303017617 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzher( PBTYP_T * TYPE, char * UPLO, int M, int N, int K, int IOFFD, char * ALPHA, char * XC, int LDXC, char * XR, int LDXR, char * A, int LDA ) #else void PB_Ctzher( TYPE, UPLO, M, N, K, IOFFD, ALPHA, XC, LDXC, XR, LDXR, A, LDA ) /* * .. Scalar Arguments .. */ char * UPLO; int IOFFD, K, LDA, LDXC, LDXR, M, N; char * ALPHA; /* * .. Array Arguments .. */ PBTYP_T * TYPE; char * A, * XC, * XR; #endif { /* * Purpose * ======= * * PB_Ctzher performs the trapezoidal symmetric or Hermitian rank 1 ope- * ration: * * A := alpha * XC * XR + A or A := alpha * XC * conjg( XR ) + A, * * where alpha is a scalar, XC is an m element vector, XR is an n ele- * ment vector and A is an m by n trapezoidal symmetric or Hermitian ma- * trix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (see pblas.h). * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * XC (input) pointer to CHAR * On entry, XC is an array of dimension (LDXC,1) containing the * m by 1 vector XC. * * LDXC (input) INTEGER * On entry, LDXC specifies the leading dimension of the array * XC. LDXC must be at least max( 1, M ). * * XR (input) pointer to CHAR * On entry, XR is an array of dimension (LDXR,N) containing the * 1 by n vector XR. * * LDXR (input) INTEGER * On entry, LDXR specifies the leading dimension of the array * XR. LDXR must be at least 1. * * A (input/output) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is updated. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ione=1, j1, m1, mn, n1, size; GERC_T gerc; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( Mupcase( UPLO[0] ) == CLOWER ) { size = TYPE->size; gerc = TYPE->Fgerc; mn = MAX( 0, -IOFFD ); if( ( n1 = MIN( mn, N ) ) > 0 ) gerc( &M, &n1, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); n1 = M - IOFFD; if( ( n1 = MIN( n1, N ) - mn ) > 0 ) { i1 = ( j1 = mn ) + IOFFD; TYPE->Fher( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( A, i1, j1, LDA, size ), &LDA ); if( ( m1 = M - mn - n1 - IOFFD ) > 0 ) { i1 += n1; gerc( &m1, &n1, ALPHA, Mptr( XC, i1, 0, LDXC, size ), &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, i1, j1, LDA, size ), &LDA ); } } } else if( Mupcase( UPLO[0] ) == CUPPER ) { size = TYPE->size; gerc = TYPE->Fgerc; mn = M - IOFFD; mn = MIN( mn, N ); if( ( n1 = mn - MAX( 0, -IOFFD ) ) > 0 ) { j1 = mn - n1; if( ( m1 = MAX( 0, IOFFD ) ) > 0 ) gerc( &m1, &n1, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); TYPE->Fher( C2F_CHAR( UPLO ), &n1, ALPHA, Mptr( XC, m1, 0, LDXC, size ), &ione, Mptr( A, m1, j1, LDA, size ), &LDA ); } if( ( n1 = N - MAX( 0, mn ) ) > 0 ) { j1 = N - n1; gerc( &M, &n1, ALPHA, XC, &ione, Mptr( XR, 0, j1, LDXR, size ), &LDXR, Mptr( A, 0, j1, LDA, size ), &LDA ); } } else { TYPE->Fgerc( &M, &N, ALPHA, XC, &ione, XR, &LDXR, A, &LDA ); } /* * End of PB_Ctzher */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cpsyr2kAC.c0000640000175000017500000012137210363532303020120 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cpsyr2kAC( PBTYP_T * TYPE, char * DIRECAB, char * CONJUG, char * UPLO, char * TRANS, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_Cpsyr2kAC( TYPE, DIRECAB, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * CONJUG, * DIRECAB, * TRANS, * UPLO; int IA, IB, IC, JA, JB, JC, K, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_Cpsyr2kAC performs one of the following symmetric or Hermitian * rank 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( B ) )' + * conjg( alpha )*sub( B )*conjg( sub( A ) )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric or * Hermitian submatrix and sub( A ) and sub( B ) are n by k submatrices * in the first case and k by n submatrices in the second case. * * This is the outer-product algorithm using the logical aggregation * blocking technique. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECAB (global input) pointer to CHAR * On entry, DIRECAB specifies the direction in which the rows * or columns of sub( A ), sub( B ) and sub( C ) should be * looped over as follows: * DIRECAB = 'F' or 'f' forward or increasing, * DIRECAB = 'B' or 'b' backward or decreasing. * * CONJUG (global input) pointer to CHAR * On entry, CONJUG specifies whether sub( C ) is a symmetric or * Hermitian submatrix operand as follows: * CONJUG = 'N' or 'n' sub( C ) is symmetric, * CONJUG = 'Z' or 'z' sub( C ) is Hermitian. * * UPLO (global input) pointer to CHAR * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the submatrix sub( C ) are to be referenced as follows: * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * submatrix sub( C ) are referenced, * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * submatrix sub( C ) are referenced. * * TRANS (global input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * or * sub( C ) := alpha*sub( A )*conjg( sub( B )' ) + * conjg( alpha )*sub( B )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * or * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * or * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't' or 'C' or 'c', K specifies the * number of rows of the submatrices sub( A ) and sub( B ). * K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly lower triangular * of sub( C ) are not referenced. On exit, the upper triangular * part of sub( C ) is overwritten by the upper triangular part * of the updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric or Hermitian submatrix sub( C ), and the * local entries corresponding to the strictly upper triangular * of sub( C ) are not referenced. On exit, the lower triangular * part of sub( C ) is overwritten by the lower triangular part * of the updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char GatherDir, ScatterDir, * one, top, * talpha, * tbeta, tran, * zero; int ABm, ABn, Acol, Acurcol, Acurrow, Acurimb1, Acurinb1, Afr, Aii, Aimb, Aimb1, Ainb, Ainb1, AisD, AisR, Ajj, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, Aspan, Bcol, Bcurcol, Bcurrow, Bcurimb1, Bcurinb1, Bfr, Bii, Bimb, Bimb1, Binb, Binb1, BisD, BisR, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq, Bnq0, Brow, Bspan, Ccsrc, Cimb, Cinb, Cmb, Cnb, Crsrc, WAfr, WACfr, WACld, WACreuse, WACsum, WBfr, WBCfr, WBCld, WBCsum, conjg, ctxt, fwd, k, kb, kbb, kend, kstart, kstep, ktmp, mycol, myrow, notran, npcol, nprow, size, upper; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL, * WA = NULL, * WB = NULL, * WAC = NULL, *WBC = NULL; int Ad0[DLEN_], Bd0[DLEN_], DBUFA[DLEN_], DBUFB[DLEN_], WAd[DLEN_], WBd[DLEN_], WACd [DLEN_], WBCd [DLEN_]; /* .. * .. Executable Statements .. * */ /* * sub( C ) = beta * sub( C ) */ PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC ); /* * Retrieve process grid information */ Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); fwd = ( Mupcase( DIRECAB[0] ) == CFORWARD ); conjg = ( Mupcase( CONJUG [0] ) == CCONJG ); upper = ( Mupcase( UPLO [0] ) == CUPPER ); notran = ( Mupcase( TRANS [0] ) == CNOTRAN ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gsum2d = TYPE->Cgsum2d; gemm = TYPE->Fgemm; /* * Figure out the loop bounds accordingly to DIRECAB */ kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); if( fwd ) { kstart = 0; kend = ( ( N - 1 ) / kb + 1 ) * kb; kstep = kb; GatherDir = CFORWARD; ScatterDir = CBACKWARD; } else { kstart = ( ( N - 1 ) / kb ) * kb; kend = kstep = -kb; GatherDir = CBACKWARD; ScatterDir = CFORWARD; } /* * Compute conjg( ALPHA ) and transpose parameter for Hermitian case */ if( conjg ) { tran = CCOTRAN; talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); } else { tran = CTRAN; talpha = ALPHA; } /* * Compute local information for sub( A ) and sub( B ) */ if( notran ) { ABm = N; ABn = K; } else { ABm = K; ABn = N; } PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow, &Acol ); Aimb = DESCA[IMB_]; Ainb = DESCA[INB_]; Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Ald = DESCA[LLD_]; Aimb1 = PB_Cfirstnb( ABm, IA, Aimb, Amb ); Amp0 = PB_Cnumroc( ABm, 0, Aimb1, Amb, myrow, Arow, nprow ); Ainb1 = PB_Cfirstnb( ABn, JA, Ainb, Anb ); Anq0 = PB_Cnumroc( ABn, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size ); PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow, &Bcol ); Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_]; Bimb1 = PB_Cfirstnb( ABm, IB, Bimb, Bmb ); Bmp0 = PB_Cnumroc( ABm, 0, Bimb1, Bmb, myrow, Brow, nprow ); Binb1 = PB_Cfirstnb( ABn, JB, Binb, Bnb ); Bnq0 = PB_Cnumroc( ABn, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size ); if( notran ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cinb = DESCC[INB_]; Cnb = DESCC[NB_]; Ccsrc = DESCC[CSRC_]; /* * Determine if one can reuse the WAC buffer for the intermediate local products * sub( A ) * sub( B )' and sub( B ) * sub( A )'. */ AisR = ( ( Acol < 0 ) || ( npcol == 1 ) ); BisR = ( ( Bcol < 0 ) || ( npcol == 1 ) ); if( !( AisR ) && !( BisR ) ) { /* * When neither sub( A ) nor sub( B ) are replicated, WAC can be reused if * either sub( A ) spans more than one process column, or, neither sub( A ) * nor sub( B ) span more than one process column, and both operands reside * in the same process column. */ Aspan = PB_Cspan( ABn, 0, Ainb1, Anb, Acol, npcol ); Bspan = PB_Cspan( ABn, 0, Binb1, Bnb, Bcol, npcol ); WACreuse = ( Aspan || ( !( Aspan ) && !( Bspan ) && ( Acol == Bcol ) ) ); } else { /* * Otherwise, WAC can be reused when both operands sub( A ) and sub( B ) are * replicated. */ WACreuse = ( AisR && BisR ); } /* * Furthermore, the ability to reuse WAC requires sub( A ) and sub( B ) to be * either both not row-distributed, or, both row-distributed and aligned. */ AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) ); BisD = ( ( Brow >= 0 ) && ( nprow > 1 ) ); WACreuse = ( WACreuse && ( ( !AisD && !BisD ) || ( ( AisD && BisD ) && ( ( Arow == Brow ) && ( ( ( Aimb1 >= ABm ) && ( Bimb1 >= ABm ) ) || ( ( Aimb1 == Bimb1 ) && ( Amb == Bmb ) ) ) ) ) ) ); tbeta = ( WACreuse ? one : zero ); if( upper ) { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, ABn, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB+k:IB+k+kbb-1, JB:JB+K-1 ) over A( IA:IA+k+kbb-1, JA:JA+K-1 ) */ PB_Cdescset( Ad0, ktmp, ABn, Aimb1, Ainb1, Amb, Anb, Arow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, ABn, Ad0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * WAC := A( IA:IA+k+kbb-1, JA:JA+K-1 ) * B( IB+k:IB+k+kbb-1, JB:JB+K-1 )' */ PB_COutV( TYPE, COLUMN, INIT, ktmp, ABn, Ad0, kbb, &WAC, WACd, &WACfr, &WACsum ); WACld = WACd[LLD_]; Amp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0, ALPHA, Aptr0, &Ald, WB, &WBd[LLD_], zero, WAC, &WACld ); if( WBfr ) free( WB ); if( Bfr ) free( Bptr ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, ABn, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over B( IB:IB+k+kbb-1, JB:JB+K-1 ) */ PB_Cdescset( Bd0, ktmp, ABn, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, ABn, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WBC := B( IB:IB+k+kbb-1, JB:JB+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )' */ if( WACreuse ) { WBC = WAC; MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; } else { PB_COutV( TYPE, COLUMN, INIT, ktmp, ABn, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); } WBCld = WBCd[LLD_]; Bmp = PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Bmp, &kbb, &Bnq0, talpha, Bptr0, &Bld, WA, &WAd[LLD_], tbeta, WBC, &WBCld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Combine the local copies of WAC when necessary */ if( WACsum ) { WACd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : ktmp - 1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); if( Amp > 0 ) gsum2d( ctxt, ROW, &top, Amp, kbb, WAC, WACld, myrow, WACd[CSRC_] ); } /* * Zero lower triangle of WAC( k:k+kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WAC, k, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, k+1, 0, WACd ); /* * Combine the local copies of WBC when necessary */ if( !( WACreuse ) ) { if( WBCsum ) { if( WACsum ) { WBCd[CSRC_] = WACd[CSRC_]; } else { WBCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : ktmp - 1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); } if( Bmp > 0 ) gsum2d( ctxt, ROW, &top, Bmp, kbb, WBC, WBCld, myrow, WBCd[CSRC_] ); } /* * Zero lower triangle of WBC( k:k+kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WBC, k, 0, WBCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WBC, k+1, 0, WBCd ); } /* * Add WAC to C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WAC, 0, 0, WACd, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( WACfr ) free( WAC ); /* * Add WBC to C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 ) */ if( !( WACreuse ) ) { PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WBC, 0, 0, WBCd, COLUMN, one, C, IC, JC+k, DESCC, COLUMN ); if( WBCfr ) free( WBC ); } } } else { for( k = kstart; k != kend; k += kstep ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate B( IB+k:IB+k+kbb-1, JB:JB+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, ABn, B, IB+k, JB, DESCB, ROW, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB+k:IB+k+kbb-1, JB:JB+K-1 ) over A( IA+k:IA+N-1, JA:JA+K-1 ) */ Acurimb1 = PB_Cfirstnb( ktmp, IA+k, Aimb, Amb ); Acurrow = PB_Cindxg2p( k, Aimb1, Amb, Arow, Arow, nprow ); PB_Cdescset( Ad0, ktmp, ABn, Acurimb1, Ainb1, Amb, Anb, Acurrow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, ABn, Ad0, kbb, Bptr, 0, 0, DBUFB, ROW, &WB, WBd, &WBfr ); /* * WAC := A( IA+k:IA+N-1, JA:JA+K-1 ) * B( IB+k:IB+k+kbb-1, JB:JB+K-1 )' */ PB_COutV( TYPE, COLUMN, INIT, ktmp, ABn, Ad0, kbb, &WAC, WACd, &WACfr, &WACsum ); WACld = WACd[LLD_]; Amp = PB_Cnumroc( ktmp, k, Aimb1, Amb, myrow, Arow, nprow ); if( ( Amp > 0 ) && ( Anq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0, ALPHA, Mptr( Aptr0, Amp0-Amp, 0, Ald, size ), &Ald, WB, &WBd[LLD_], zero, WAC, &WACld ); if( WBfr ) free( WB ); if( Bfr ) free( Bptr ); /* * Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, ABn, A, IA+k, JA, DESCA, ROW, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over B( IB+k:IB+N-1, JB:JB+K-1 ) */ Bcurimb1 = PB_Cfirstnb( ktmp, IB+k, Bimb, Bmb ); Bcurrow = PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow ); PB_Cdescset( Bd0, ktmp, ABn, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, ROW, ktmp, ABn, Bd0, kbb, Aptr, 0, 0, DBUFA, ROW, &WA, WAd, &WAfr ); /* * WBC := B( IB+k:IB+N-1, JB:JB+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )' */ if( WACreuse ) { WBC = WAC; MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; } else { PB_COutV( TYPE, COLUMN, INIT, ktmp, ABn, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); } WBCld = WBCd[LLD_]; Bmp = PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow ); if( ( Bmp > 0 ) && ( Bnq0 > 0 ) ) gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Bmp, &kbb, &Bnq0, talpha, Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld, WA, &WAd[LLD_], tbeta, WBC, &WBCld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Combine the local copies of WAC when necessary */ if( WACsum ) { WACd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : k + kbb - 1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); if( Amp > 0 ) gsum2d( ctxt, ROW, &top, Amp, kbb, WAC, WACld, myrow, WACd[CSRC_] ); } /* * Zero upper triangle of WAC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WAC, 0, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 0, 1, WACd ); /* * Combine the local copies of WBC when necessary */ if( !( WACreuse ) ) { if( WBCsum ) { if( WACsum ) { WBCd[CSRC_] = WACd[CSRC_]; } else { WBCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : k+kbb-1 ), Cinb, Cnb, Ccsrc, Ccsrc, npcol ); } if( Bmp > 0 ) gsum2d( ctxt, ROW, &top, Bmp, kbb, WBC, WBCld, myrow, WBCd[CSRC_] ); } /* * Zero upper triangle of WBC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WBC, 0, 0, WBCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WBC, 0, 1, WBCd ); } /* * Add WAC to C( IC+k:IC+N-1, JC+k:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WAC, 0, 0, WACd, COLUMN, one, C, IC+k, JC+k, DESCC, COLUMN ); if( WACfr ) free( WAC ); /* * Add WBC to C( IC+k:IC+N-1, JC+k:JC+k+kbb-1 ) */ if( !( WACreuse ) ) { PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WBC, 0, 0, WBCd, COLUMN, one, C, IC+k, JC+k, DESCC, COLUMN ); if( WBCfr ) free( WBC ); } } } } else { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cimb = DESCC[IMB_]; Cmb = DESCC[MB_]; Crsrc = DESCC[RSRC_]; /* * Determine if one can reuse the WAC buffer for the intermediate local products * sub( A )' * sub( B ) and sub( B )' * sub( A ). */ AisR = ( ( Arow < 0 ) || ( nprow == 1 ) ); BisR = ( ( Brow < 0 ) || ( nprow == 1 ) ); /* * When neither sub( A ) nor sub( B ) are replicated, WAC can be reused if * either sub( A ) spans more than one process row, or, neither sub( A ) nor * sub( B ) span more than one process row, and both operands reside in the * same process row. */ if( !( AisR ) && !( BisR ) ) { Aspan = PB_Cspan( ABm, 0, Aimb1, Amb, Arow, nprow ); Bspan = PB_Cspan( ABm, 0, Bimb1, Bmb, Brow, nprow ); WACreuse = ( Aspan || ( !( Aspan ) && !( Bspan ) && ( Arow == Brow ) ) ); } else { /* * Otherwise, WAC can be reused when both operands sub( A ) and sub( B ) are * replicated. */ WACreuse = ( AisR && BisR ); } /* * Furthermore, the ability to reuse WAC requires sub( A ) and sub( B ) to be * either both not column-distributed, or, both column-distributed and aligned. */ AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) ); BisD = ( ( Bcol >= 0 ) && ( npcol > 1 ) ); WACreuse = ( WACreuse && ( ( !AisD && !BisD ) || ( ( AisD && BisD ) && ( ( Acol == Bcol ) && ( ( ( Ainb1 >= ABn ) && ( Binb1 >= ABn ) ) || ( ( Ainb1 == Binb1 ) && ( Anb == Bnb ) ) ) ) ) ) ); tbeta = ( WACreuse ? one : zero ); if( upper ) { for( k = kstart; k != kend; k += kstep ) { ktmp = N - k; kbb = MIN( ktmp, kb ); /* * Accumulate B( IB:IB+K-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, ABm, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB:IB+K-1, JB+k:JB+k+kbb-1 ) over A( IA:IA+K-1, JA+k:JA+N-1 ) */ Acurinb1 = PB_Cfirstnb( ktmp, JA+k, Ainb, Anb ); Acurcol = PB_Cindxg2p( k, Ainb1, Anb, Acol, Acol, npcol ); PB_Cdescset( Ad0, ABm, ktmp, Aimb1, Acurinb1, Amb, Anb, Arow, Acurcol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, COLUMN, ABm, ktmp, Ad0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * WAC := B( IB:IB+K-1, JB+k:JB+k+kbb-1 )' * A( IA:IA+K-1, JA+k:JA+N-1 ) */ PB_COutV( TYPE, ROW, INIT, ABm, ktmp, Ad0, kbb, &WAC, WACd, &WACfr, &WACsum ); WACld = WACd[LLD_]; Anq = PB_Cnumroc( ktmp, k, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq > 0 ) && ( Amp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0, talpha, WB, &WBd[LLD_], Mptr( Aptr0, 0, Anq0-Anq, Ald, size ), &Ald, zero, WAC, &WACld ); if( WBfr ) free( WB ); if( Bfr ) free( Bptr ); /* * Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, ABm, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over B( IB:IB+K-1, JB+k:JB+N-1 ) */ Bcurinb1 = PB_Cfirstnb( ktmp, JB+k, Binb, Bnb ); Bcurcol = PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol ); PB_Cdescset( Bd0, ABm, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ABm, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WBC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * B( IB:IB+K-1, JB+k:JB+N-1 ) */ if( WACreuse ) { WBC = WAC; MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; } else { PB_COutV( TYPE, ROW, INIT, ABm, ktmp, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); } WBCld = WBCd[LLD_]; Bnq = PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bnq > 0 ) && ( Bmp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Bnq, &Bmp0, ALPHA, WA, &WAd[LLD_], Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld, tbeta, WBC, &WBCld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Combine the local copies of WAC when necessary */ if( WACsum ) { WACd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : k + kbb - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); if( Anq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Anq, WAC, WACld, WACd[RSRC_], mycol ); } /* * Zero lower triangle of WBC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WAC, 0, 0, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 1, 0, WACd ); /* * Combine the local copies of WBC when necessary */ if( !( WACreuse ) ) { if( WBCsum ) { if( WACsum ) { WBCd[RSRC_] = WACd[RSRC_]; } else { WBCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : k + kbb - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); } if( Bnq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq, WBC, WBCld, WBCd[RSRC_], mycol ); } /* * Zero lower triangle of WBC( 0:kbb-1, 0:kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WBC, 0, 0, WBCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WBC, 1, 0, WBCd ); } /* * Add WAC to C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 ) */ PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WAC, 0, 0, WACd, ROW, one, C, IC+k, JC+k, DESCC, ROW ); if( WACfr ) free( WAC ); /* * Add WBC to C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 ) */ if( !( WACreuse ) ) { PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WBC, 0, 0, WBCd, ROW, one, C, IC+k, JC+k, DESCC, ROW ); if( WBCfr ) free( WBC ); } } } else { for( k = kstart; k != kend; k += kstep ) { kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb; /* * Accumulate B( IB:IB+K-1, JB+k:JB+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, ABm, kbb, B, IB, JB+k, DESCB, COLUMN, &Bptr, DBUFB, &Bfr ); /* * Replicate B( IB:IB+K-1, JB+k:JB+k+kbb-1 ) over A( IA:IA+K-1, JA:JA+k+kbb-1 ) */ PB_Cdescset( Ad0, ABm, ktmp, Aimb1, Ainb1, Amb, Anb, Arow, Acol, ctxt, Ald ); PB_CInV( TYPE, NOCONJG, COLUMN, ABm, ktmp, Ad0, kbb, Bptr, 0, 0, DBUFB, COLUMN, &WB, WBd, &WBfr ); /* * WAC := B( IB:IB+K-1, JB+k:JB+k+kbb-1 )' * A( IA:IA+K-1, JA:JA+k+kbb-1 ) */ PB_COutV( TYPE, ROW, INIT, ABm, ktmp, Ad0, kbb, &WAC, WACd, &WACfr, &WACsum ); WACld = WACd[LLD_]; Anq = PB_Cnumroc( ktmp, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq > 0 ) && ( Amp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0, talpha, WB, &WBd[LLD_], Aptr0, &Ald, zero, WAC, &WACld ); if( WBfr ) free( WB ); if( Bfr ) free( Bptr ); /* * Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) */ PB_CGatherV( TYPE, REUSE, &GatherDir, ABm, kbb, A, IA, JA+k, DESCA, COLUMN, &Aptr, DBUFA, &Afr ); /* * Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over B( IB:IB+K-1, JB:JB+k+kbb-1 ) */ PB_Cdescset( Bd0, ABm, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld ); PB_CInV( TYPE, NOCONJG, COLUMN, ABm, ktmp, Bd0, kbb, Aptr, 0, 0, DBUFA, COLUMN, &WA, WAd, &WAfr ); /* * WBC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * B( IB:IB+K-1, JB:JB+k+kbb-1 ) */ if( WACreuse ) { WBC = WAC; MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; } else { PB_COutV( TYPE, ROW, INIT, ABm, ktmp, Bd0, kbb, &WBC, WBCd, &WBCfr, &WBCsum ); } WBCld = WBCd[LLD_]; Bnq = PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol ); if( ( Bnq > 0 ) && ( Bmp0 > 0 ) ) gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Bnq, &Bmp0, ALPHA, WA, &WAd[LLD_], Bptr0, &Bld, tbeta, WBC, &WBCld ); if( WAfr ) free( WA ); if( Afr ) free( Aptr ); /* * Combine the local copies of WAC when necessary */ if( WACsum ) { WACd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : ktmp - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); if( Anq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Anq, WAC, WACld, WACd[RSRC_], mycol ); } /* * Zero upper triangle of WBC( 0:kbb-1, k:k+kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WAC, 0, k, WACd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WAC, 0, k+1, WACd ); /* * Combine the local copies of WBC when necessary */ if( !( WACreuse ) ) { if( WBCsum ) { if( WACsum ) { WBCd[RSRC_] = WACd[RSRC_]; } else { WBCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : ktmp - 1 ), Cimb, Cmb, Crsrc, Crsrc, nprow ); } if( Bnq > 0 ) gsum2d( ctxt, COLUMN, &top, kbb, Bnq, WBC, WBCld, WBCd[RSRC_], mycol ); } /* * Zero upper triangle of WBC( 0:kbb-1, k:k+kbb-1 ) */ if( conjg ) PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WBC, 0, k, WBCd ); else if( kbb > 1 ) PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WBC, 0, k+1, WBCd ); } /* * Add WAC to C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 ) */ PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WAC, 0, 0, WACd, ROW, one, C, IC+k, JC, DESCC, ROW ); if( WACfr ) free( WAC ); /* * Add WBC to C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 ) */ if( !( WACreuse ) ) { PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WBC, 0, 0, WBCd, ROW, one, C, IC+k, JC, DESCC, ROW ); if( WBCfr ) free( WBC ); } } } } if( conjg ) free( talpha ); /* * End of PB_Cpsyr2kAC */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Cctypeset.c0000640000175000017500000000643110363532303020320 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" PBTYP_T * PB_Cctypeset() { /* * Purpose * ======= * * PB_Cctypeset on the first call initializes a static structure contai- * ning typed information and returns a pointer to it. The other calls * to this routine just returns this pointer. * * -- Written on April 1, 1998 by * R. Clint Whaley, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ static int setup=0; static PBTYP_T TypeStruct; static cmplx zero, one, negone; /* .. * .. Executable Statements .. * */ if( setup ) return( &TypeStruct ); setup = 1; TypeStruct.type = SCPLX; TypeStruct.usiz = sizeof( float ); TypeStruct.size = sizeof( cmplx ); zero [REAL_PART] = ZERO; zero [IMAG_PART] = ZERO; one [REAL_PART] = ONE; one [IMAG_PART] = ZERO; negone[REAL_PART] = -ONE; negone[IMAG_PART] = ZERO; TypeStruct.zero = ((char *) zero); TypeStruct.one = ((char *) one); TypeStruct.negone = ((char *) negone); TypeStruct.Cgesd2d = Ccgesd2d; TypeStruct.Cgerv2d = Ccgerv2d; TypeStruct.Cgebs2d = Ccgebs2d; TypeStruct.Cgebr2d = Ccgebr2d; TypeStruct.Cgsum2d = Ccgsum2d; TypeStruct.Fmmadd = cmmadd_; TypeStruct.Fmmcadd = cmmcadd_; TypeStruct.Fmmtadd = cmmtadd_; TypeStruct.Fmmtcadd = cmmtcadd_; TypeStruct.Fmmdda = cmmdda_; TypeStruct.Fmmddac = cmmddac_; TypeStruct.Fmmddat = cmmddat_; TypeStruct.Fmmddact = cmmddact_; TypeStruct.Fcshft = ccshft_; TypeStruct.Frshft = crshft_; TypeStruct.Fvvdotu = cvvdotu_; TypeStruct.Fvvdotc = cvvdotc_; TypeStruct.Fset = cset_; TypeStruct.Ftzpad = ctzpad_; TypeStruct.Ftzpadcpy = ctzpadcpy_; TypeStruct.Ftzscal = ctzscal_; TypeStruct.Fhescal = chescal_; TypeStruct.Ftzcnjg = ctzcnjg_; TypeStruct.Faxpy = caxpy_; TypeStruct.Fcopy = ccopy_; TypeStruct.Fswap = cswap_; TypeStruct.Fgemv = cgemv_; TypeStruct.Fsymv = csymv_; TypeStruct.Fhemv = chemv_; TypeStruct.Ftrmv = ctrmv_; TypeStruct.Ftrsv = ctrsv_; TypeStruct.Fagemv = cagemv_; TypeStruct.Fasymv = casymv_; TypeStruct.Fahemv = cahemv_; TypeStruct.Fatrmv = catrmv_; TypeStruct.Fgerc = cgerc_; TypeStruct.Fgeru = cgeru_; TypeStruct.Fsyr = csyr_; TypeStruct.Fher = cher_; TypeStruct.Fsyr2 = csyr2_; TypeStruct.Fher2 = cher2_; TypeStruct.Fgemm = cgemm_; TypeStruct.Fsymm = csymm_; TypeStruct.Fhemm = chemm_; TypeStruct.Fsyrk = csyrk_; TypeStruct.Fherk = cherk_; TypeStruct.Fsyr2k = csyr2k_; TypeStruct.Fher2k = cher2k_; TypeStruct.Ftrmm = ctrmm_; TypeStruct.Ftrsm = ctrsm_; return( &TypeStruct ); /* * End of PB_Cctypeset */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_Ctzatrmv.c0000640000175000017500000002145510363532303020172 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Ctzatrmv( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * TRANS, char * DIAG, int M, int N, int K, int IOFFD, char * ALPHA, char * A, int LDA, char * X, int LDX, char * Y, int LDY ) #else void PB_Ctzatrmv( TYPE, SIDE, UPLO, TRANS, DIAG, M, N, K, IOFFD, ALPHA, A, LDA, X, LDX, Y, LDY ) /* * .. Scalar Arguments .. */ char * SIDE, * UPLO, * TRANS, * DIAG; int IOFFD, K, LDA, LDX, LDY, M, N; char * ALPHA; /* * .. Array Arguments .. */ char * A, * X, * Y; PBTYP_T * TYPE; #endif { /* * Purpose * ======= * * PB_Ctzatrmv performs the matrix-vector operation * * y := abs( alpha )*abs( A )*abs( x )+ abs( y ), * * or * * y := abs( alpha )*abs( A' )*abs( x ) + abs( y ), * * or * * y := abs( alpha )*abs( conjg( A' ) )*abs( x ) + abs( y ), * * where alpha is a real scalar, y is a real vector, x is a vector and A * is an m by n trapezoidal triangular matrix. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * SIDE (dummy) pointer to CHAR * In this routine, SIDE is a dummy (unused) argument. * * UPLO (input) pointer to CHAR * On entry, UPLO specifies which part of the matrix A is to be * referenced as follows: * * UPLO = 'L' or 'l' the lower trapezoid of A is referenced, * * UPLO = 'U' or 'u' the upper trapezoid of A is referenced, * * otherwise all of the matrix A is referenced. * * TRANS (input) pointer to CHAR * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n': * y := abs( alpha )*abs( A )*abs( x )+ abs( beta*y ), * * TRANS = 'T' or 't': * y := abs( alpha )*abs( A' )*abs( x ) + abs( beta*y ), * * TRANS = 'C' or 'c': * y := abs( alpha )*abs( A' )*abs( x ) + abs( y ) or * y := abs( alpha )*abs( conjg(A') )*abs( x ) + abs( y ). * * DIAG (input) pointer to CHAR * On entry, DIAG specifies whether or not A is unit triangular * as follows: * * DIAG = 'U' or 'u' A is assumed to be unit triangular. * * DIAG = 'N' or 'n' A is not assumed to be unit triangular. * * M (input) INTEGER * On entry, M specifies the number of rows of the matrix A. M * must be at least zero. * * N (input) INTEGER * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * K (dummy) INTEGER * In this routine, K is a dummy (unused) argument. * * IOFFD (input) INTEGER * On entry, IOFFD specifies the position of the offdiagonal de- * limiting the upper and lower trapezoidal part of A as follows * (see the notes below): * * IOFFD = 0 specifies the main diagonal A( i, i ), * with i = 1 ... MIN( M, N ), * IOFFD > 0 specifies the subdiagonal A( i+IOFFD, i ), * with i = 1 ... MIN( M-IOFFD, N ), * IOFFD < 0 specifies the superdiagonal A( i, i-IOFFD ), * with i = 1 ... MIN( M, N+IOFFD ). * * ALPHA (input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. * * A (input) pointer to CHAR * On entry, A is an array of dimension (LDA,N) containing the m * by n matrix A. Only the trapezoidal part of A determined by * UPLO and IOFFD is referenced. When DIAG = 'U' or 'u', the * diagonal elements of A are not referenced either, but are * assumed to be unity. * * LDA (input) INTEGER * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max( 1, M ). * * X (input) pointer to CHAR * On entry, X is an array of dimension (LDX,Kx). Before entry, * with TRANS = 'N' or 'n', the array X must contain the n ele- * ment vector x corresponding to the columns of A. Otherwise, * the array X must contain the m element vector x corresponding * to the rows of A. When TRANS is 'N' or 'n', LDX is at least * 1, and Kx is at least N. Otherwise, LDX is at least max(1,M), * and Kx is at least 1. * * LDX (input) INTEGER * On entry, LDX specifies the leading dimension of the array X. * LDX must be at least 1 when TRANS is 'N' or 'n' and * max( 1, M ) otherwise. * * Y (input/output) pointer to CHAR * On entry, Y is an array of dimension (LDY,Ky). On exit, with * TRANS = 'N' or 'n', the array Y contains the m element vector * y corresponding to the rows of A. Otherwise, the array Y con- * tains the n element vector y corresponding to the columns of * A. When TRANS is 'N' or 'n', LDY is at least max( 1, M ), and * Ky is at least 1. Otherwise, LDY is at least 1, and Ky is at * least N. On exit, Y is overwritten by the partial updated * vector y. * * LDY (input) INTEGER * On entry, LDY specifies the leading dimension of the array Y. * LDY must be at least max( 1, M ) when TRANS is 'N' or * 'n' and 1 otherwise. * * Notes * ===== * N N * ---------------------------- ----------- * | d | | | * M | d Upper | | Upper | * | Lower d | |d | * | d | M | d | * ---------------------------- | d | * | d | * IOFFD < 0 | Lower d | * | d| * N | | * ----------- ----------- * | d Upper| * | d | IOFFD > 0 * M | d | * | d| N * | Lower | ---------------------------- * | | | Upper | * | | |d | * | | | d | * | | | d | * | | |Lower d | * ----------- ---------------------------- * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ione = 1; char * Aptr = NULL; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; if( ( Mupcase( UPLO[0] ) == CLOWER ) || ( Mupcase( UPLO[0] ) == CUPPER ) ) { Aptr = PB_Cmalloc( M * N * TYPE->size ); TYPE->Ftzpadcpy( C2F_CHAR( UPLO ), C2F_CHAR( DIAG ), &M, &N, &IOFFD, A, &LDA, Aptr, &M ); if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fagemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, Aptr, &M, X, &LDX, TYPE->one, Y, &ione ); } else { TYPE->Fagemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, Aptr, &M, X, &ione, TYPE->one, Y, &LDY ); } if( Aptr ) free( Aptr ); } else { if( Mupcase( TRANS[0] ) == CNOTRAN ) { TYPE->Fagemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, A, &LDA, X, &LDX, TYPE->one, Y, &ione ); } else { TYPE->Fagemv( C2F_CHAR( TRANS ), &M, &N, ALPHA, A, &LDA, X, &ione, TYPE->one, Y, &LDY ); } } /* * End of PB_Ctzatrmv */ } scalapack-1.8.0/PBLAS/SRC/PTOOLS/PB_CpgemmBC.c0000640000175000017500000006266510363532303020005 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_CpgemmBC( PBTYP_T * TYPE, char * DIRECB, char * DIRECC, char * TRANSA, char * TRANSB, int M, int N, int K, char * ALPHA, char * A, int IA, int JA, int * DESCA, char * B, int IB, int JB, int * DESCB, char * BETA, char * C, int IC, int JC, int * DESCC ) #else void PB_CpgemmBC( TYPE, DIRECB, DIRECC, TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ char * DIRECB, * DIRECC, * TRANSA, * TRANSB; int IA, IB, IC, JA, JB, JC, K, M, N; char * ALPHA, * BETA; PBTYP_T * TYPE; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; char * A, * B, * C; #endif { /* * Purpose * ======= * * PB_CpgemmBC performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * conjg(A(IA:IA+K-1,JA:JA+M-1)') if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * conjg(B(IB:IB+N-1,JB:JB+K-1)') if TRANSB = 'C'. * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * This is the inner-product algorithm using the logical LCM algorithmic * blocking technique. The submatrix operand sub( A ) stays in place. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * DIRECB (global input) pointer to CHAR * On entry, DIRECB specifies the direction in which the rows * or columns of sub( B ) should be looped over as follows: * DIRECB = 'F' or 'f' forward or increasing, * DIRECB = 'B' or 'b' backward or decreasing. * * DIRECC (global input) pointer to CHAR * On entry, DIRECC specifies the direction in which the rows * or columns of sub( C ) should be looped over as follows: * DIRECC = 'F' or 'f' forward or increasing, * DIRECC = 'B' or 'b' backward or decreasing. * * TRANSA (global input) pointer to CHAR * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * TRANSB (global input) pointer to CHAR * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * TRANSB = 'C' or 'c' op( sub( B ) ) = conjg( sub( B )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) pointer to CHAR * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) pointer to CHAR * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) pointer to CHAR * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) pointer to CHAR * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) pointer to CHAR * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Broc, GemmTa, GemmTb, TrA, TrB, * one, * talpha, * tbeta, top, * zero; int Acol, Aii, Aimb1, Ainb1, Ajj, Ald, Am, Amb, Amp, An, Anb, Anq, Arow, Bbufld, BcurrocR, Bfr, Bfwd, BiD, BiR, BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, Bkk, Bld, BmyprocD, BmyprocR, BnbD, BnbR, BnpD, BnpR, BnprocsD, BnprocsR, Boff, BrocD, BrocR, BsrcR, Bsrc_, Cbufld, Ccol, Ccurcol, Cfr, Cfwd, Cii, Cimb, Cimb1, Cinb, Cinb1, CisR, Cjj, Ckk, Cld, Cmb, Cmp, Cnb, Cnq, Coff, Crow, Csrc, WBfr, WCfr, WCsum, ctxt, lcmb, maxp, maxpm1, maxq, mycol, myrow, n, nb, nbb, ncpq, nota, notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp; GEMM_T gemm; GSUM2D_T gsum2d; /* * .. Local Arrays .. */ int Ad0[DLEN_], DBUFB[DLEN_], DBUFC[DLEN_], WBd[DLEN_], WCd[DLEN_]; PB_VM_T VM; char * Aptr = NULL, * Bbuf = NULL, * Cbuf = NULL, * WB = NULL, * WC = NULL; /* .. * .. Executable Statements .. * */ Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD ); Cfwd = ( Mupcase( DIRECC[0] ) == CFORWARD ); nota = ( ( TrA = Mupcase( TRANSA[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( TRANSB[0] ) ) == CNOTRAN ); size = TYPE->size; one = TYPE->one; zero = TYPE->zero; gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d; nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); /* * Compute local information for sub( A ), sub( B ) and sub( C ) */ if( notb ) { BiD = IB; BiR = JB; Bsrc_ = CSRC_; Broc = CCOLUMN; BinbD = DESCB[IMB_ ]; BinbR = DESCB[INB_]; BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ]; BsrcR = DESCB[Bsrc_]; Bld = DESCB[LLD_]; BmyprocD = myrow; BnprocsD = nprow; BmyprocR = mycol; BnprocsR = npcol; PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR, &BiiD, &BiiR, &BrocD, &BrocR ); } else { BiD = JB; BiR = IB; Bsrc_ = RSRC_; Broc = CROW; BinbR = DESCB[IMB_ ]; BinbD = DESCB[INB_]; BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ]; BsrcR = DESCB[Bsrc_]; Bld = DESCB[LLD_]; BmyprocD = mycol; BnprocsD = npcol; BmyprocR = myrow; BnprocsR = nprow; PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD, &BiiR, &BiiD, &BrocR, &BrocD ); } Binb1D = PB_Cfirstnb( K, BiD, BinbD, BnbD ); BnpD = PB_Cnumroc( K, 0, Binb1D, BnbD, BmyprocD, BrocD, BnprocsD ); Binb1R = PB_Cfirstnb( N, BiR, BinbR, BnbR ); Cimb = DESCC[IMB_ ]; Cinb = DESCC[INB_]; Cmb = DESCC[MB_ ]; Cnb = DESCC[NB_ ]; Csrc = DESCC[CSRC_]; Cld = DESCC[LLD_]; PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj, &Crow, &Ccol ); Cimb1 = PB_Cfirstnb( M, IC, Cimb, Cmb ); Cmp = PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow ); Cinb1 = PB_Cfirstnb( N, JC, Cinb, Cnb ); /* * Retrieve the BLACS combine topology, compute conjugate of alpha for the * conjugate transpose case and set the transpose parameters to be passed to * the BLAS matrix multiply routine. */ if( nota ) { Am = M; An = K; top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); talpha = ALPHA; GemmTa = CNOTRAN; GemmTb = ( notb ? CTRAN : TrB ); } else { Am = K; An = M; top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( TrA == CCOTRAN ) { talpha = PB_Cmalloc( size ); PB_Cconjg( TYPE, ALPHA, talpha ); GemmTa = ( ( TrB == CCOTRAN ) ? CTRAN : CCOTRAN ); } else { talpha = ALPHA; GemmTa = ( ( TrB == CCOTRAN ) ? CCOTRAN : CTRAN ); } GemmTb = CNOTRAN; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( Am, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Amp = PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( A, Aii, Ajj, Ald, size ); } /* * When sub( B ) is not replicated and backward pass on sub( B ), find the * virtual process q owning the last row or column of sub( B ). */ if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd ) { tmp = PB_Cindxg2p( N - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR ); q = MModSub( tmp, BrocR, BnprocsR ); } /* * When sub( C ) is not replicated and backward pass on sub( C ), find the * virtual process p owning the last row or column of sub( C ). */ if( !( CisR = ( ( Ccol < 0 ) || ( npcol == 1 ) ) ) && !Cfwd ) { tmp = PB_Cindxg2p( N - 1, Cinb1, Cnb, Ccol, Ccol, npcol ); p = MModSub( tmp, Ccol, npcol ); } /* * Loop over the virtual process grid induced by the rows or columns of * sub( B ) and sub( C ). */ lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : npcol ) ) * Cnb, ( maxq = ( BisR ? 1 : BnprocsR ) ) * BnbR ); n = N; maxpm1 = maxp - 1; while( n > 0 ) { /* * Initialize local virtual matrix in process (p,q) */ BcurrocR = ( BisR ? -1 : MModAdd( BrocR, q, BnprocsR ) ); Bkk = PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR ); BnpR = PB_Cnumroc( N, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR ); Ccurcol = ( CisR ? -1 : MModAdd( Ccol, p, npcol ) ); Ckk = PB_Cg2lrem( JC, Cinb, Cnb, Ccurcol, Csrc, npcol ); Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, Ccurcol, Ccol, npcol ); PB_CVMinit( &VM, 0, Cnq, BnpR, Cinb1, Binb1R, Cnb, BnbR, p, q, maxp, maxq, lcmb ); /* * Find how many diagonals in this virtual process */ npq = PB_CVMnpq( &VM ); n -= npq; /* * Re-adjust the number of rows or columns to be (un)packed, in order to * average the message sizes. */ if( npq ) nbb = npq / ( ( npq - 1 ) / nb + 1 ); while( npq ) { nbb = MIN( nbb, npq ); /* * Find out how many rows or columns of sub( B ) and sub( C ) are contiguous */ PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Boff ); if( notb ) { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * columns of sub( B ). */ if( ( Bfr = ( ncpq < nbb ) ) != 0 ) { /* * If columns of sub( B ) are not contiguous, then allocate the buffer and * pack the nbb columns of sub( B ). */ Bbufld = MAX( 1, BnpD ); if( BisR || ( BmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * nbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Broc, PACKING, NOTRAN, nbb, BnpD, one, Mptr( B, BiiD, Bkk, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BmyprocR == BcurrocR ) ) Bbuf = Mptr( B, BiiD, Bkk+Boff, Bld, size ); } PB_Cdescset( DBUFB, K, nbb, Binb1D, nbb, BnbD, nbb, BrocD, BcurrocR, ctxt, Bbufld ); } else { /* * Compute the descriptor DBUFB for the buffer that will contained the packed * rows of sub( B ). */ if( ( Bfr = ( ncpq < nbb ) ) != 0 ) { /* * If rows of sub( B ) are not contiguous, then allocate the buffer and pack * the nbb rows of sub( B ). */ Bbufld = nbb; if( BisR || ( BmyprocR == BcurrocR ) ) { Bbuf = PB_Cmalloc( BnpD * nbb * size ); PB_CVMpack( TYPE, &VM, COLUMN, &Broc, PACKING, NOTRAN, nbb, BnpD, one, Mptr( B, Bkk, BiiD, Bld, size ), Bld, zero, Bbuf, Bbufld ); } } else { /* * Otherwise, re-use sub( B ) directly. */ Bbufld = Bld; if( BisR || ( BmyprocR == BcurrocR ) ) Bbuf = Mptr( B, Bkk+Boff, BiiD, Bld, size ); } PB_Cdescset( DBUFB, nbb, K, nbb, Binb1D, nbb, BnbD, BcurrocR, BrocD, ctxt, Bbufld ); } if( nota ) { /* * Replicate this panel of rows or columns of sub( B ) over sub( A ) -> WB */ PB_CInV( TYPE, NOCONJG, ROW, Am, An, Ad0, nbb, Bbuf, 0, 0, DBUFB, &Broc, &WB, WBd, &WBfr ); /* * Allocate space for temporary results in scope of sub( A ) -> WC */ PB_COutV( TYPE, COLUMN, INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr, &WCsum ); /* * Local matrix-matrix multiply iff I own some data */ if( Amp > 0 && Anq > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Amp, &nbb, &Anq, talpha, Aptr, &Ald, WB, &WBd[LLD_], zero, WC, &WCd[LLD_] ); if( WBfr ) free( WB ); if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); /* * Accumulate the intermediate results in WC */ if( WCsum ) { WCd[CSRC_] = Ccurcol; if( Amp > 0 ) gsum2d( ctxt, ROW, &top, Amp, nbb, WC, WCd[LLD_], myrow, WCd[CSRC_] ); } /* * Compute the descriptor DBUFC for the buffer that will contained the packed * columns of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < nbb ) ) != 0 ) { /* * If columns of sub( C ) are not contiguous, then allocate the buffer */ Cbufld = MAX( 1, Cmp ); tbeta = zero; if( CisR || ( mycol == Ccurcol ) ) Cbuf = PB_Cmalloc( Cmp * nbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( mycol == Ccurcol ) ) Cbuf = Mptr( C, Cii, Ckk+Coff, Cld, size ); } PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol, ctxt, Cbufld ); /* * Cbuf := Cbuf + WC */ PB_Cpaxpby( TYPE, NOCONJG, M, nbb, one, WC, 0, 0, WCd, COLUMN, tbeta, Cbuf, 0, 0, DBUFC, COLUMN ); /* * Unpack the nbb columns of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( mycol == Ccurcol ) ) ) { PB_CVMpack( TYPE, &VM, ROW, COLUMN, UNPACKING, NOTRAN, nbb, Cmp, BETA, Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } if( WCfr ) free( WC ); } else { /* * Replicate this panel of rows or columns of sub( B ) over sub( A ) -> WB */ PB_CInV( TYPE, NOCONJG, COLUMN, Am, An, Ad0, nbb, Bbuf, 0, 0, DBUFB, &Broc, &WB, WBd, &WBfr ); /* * Allocate space for temporary results in scope of sub( A ) -> WC */ PB_COutV( TYPE, ROW, INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr, &WCsum ); /* * Local matrix-matrix multiply iff I own some data */ if( Amp > 0 && Anq > 0 ) gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb, &Anq, &Amp, talpha, WB, &WBd[LLD_], Aptr, &Ald, zero, WC, &WCd[LLD_] ); if( WBfr ) free( WB ); if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) ) if( Bbuf ) free( Bbuf ); /* * Accumulate the intermediate results in WC */ if( WCsum ) { WCd[RSRC_] = 0; if( Anq > 0 ) gsum2d( ctxt, COLUMN, &top, nbb, Anq, WC, WCd[LLD_], WCd[RSRC_], mycol ); } /* * Compute the descriptor DBUFC for the buffer that will contained the packed * columns of sub( C ). Allocate it. */ if( ( Cfr = ( nrpq < nbb ) ) != 0 ) { /* * If columns of sub( C ) are not contiguous, then allocate the buffer */ Cbufld = MAX( 1, Cmp ); tbeta = zero; if( CisR || ( mycol == Ccurcol ) ) Cbuf = PB_Cmalloc( Cmp * nbb * size ); } else { /* * Otherwise re-use sub( C ) */ Cbufld = Cld; tbeta = BETA; if( CisR || ( mycol == Ccurcol ) ) Cbuf = Mptr( C, Cii, Ckk+Coff, Cld, size ); } PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol, ctxt, Cbufld ); /* * Cbuf := Cbuf + WC' */ PB_Cpaxpby( TYPE, ( TrA == CCOTRAN ? CONJG : NOCONJG ), nbb, M, one, WC, 0, 0, WCd, ROW, tbeta, Cbuf, 0, 0, DBUFC, COLUMN ); /* * Unpack the nbb columns of sub( C ) and release the buffer containing them. */ if( Cfr && ( CisR || ( mycol == Ccurcol ) ) ) { PB_CVMpack( TYPE, &VM, ROW, COLUMN, UNPACKING, NOTRAN, nbb, Cmp, BETA, Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf, Cbufld ); if( Cbuf ) free( Cbuf ); } if( WCfr ) free( WC ); } /* * Update the local indexes of sub( B ) and sub( C ) */ PB_CVMupdate( &VM, nbb, &Ckk, &Bkk ); npq -= nbb; } /* * Go to next or previous virtual process row or column */ if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) ) q = ( Bfwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) ); p = ( Cfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) ); } if( TrA == CCOTRAN ) free( talpha ); /* * End of PB_CpgemmBC */ } scalapack-1.8.0/PBLAS/SRC/pstran_.c0000640000175000017500000002242310363532303016421 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstran_( int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pstran_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PSTRAN transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PSTRAN", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PSTRAN", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRAN", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cstypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cstypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cstypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PSTRAN */ } scalapack-1.8.0/PBLAS/SRC/pdgeadd_.c0000640000175000017500000002651510363532303016510 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdgeadd_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdgeadd_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PDGEADD adds a matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'C'. * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, DirC, ctop, rtop; int Ai, Aj, Ci, Cj, TrA, ctxt, info, mycol, myrow, npcol, nprow, notran; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ notran = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !notran ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDGEADD", "Illegal TRANS = %c\n", TrA ); info = -1; } if( notran ) PB_Cchkmat( ctxt, "PDGEADD", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); else PB_Cchkmat( ctxt, "PDGEADD", "A", *N, 3, *M, 2, Ai, Aj, Ad, 8, &info ); PB_Cchkmat( ctxt, "PDGEADD", "C", *M, 2, *N, 3, Ci, Cj, Cd, 13, &info ); } if( info ) { PB_Cabort( ctxt, "PDGEADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cdtypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cdtypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added. This * selection is based on the current setting for the BLACS broadcast operations. */ if( notran ) { rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) { DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpgeadd( PB_Cdtypeset(), &DirA, &DirC, NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cptran( PB_Cdtypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } /* * End of PDGEADD */ } scalapack-1.8.0/PBLAS/SRC/pstrsv_.c0000640000175000017500000007604410363532303016463 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstrsv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pstrsv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PSTRSV solves one of the systems of equations * * sub( A )*sub( X ) = b, or sub( A )'*sub( X ) = b, * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * b and sub( X ) are n element subvectors and sub( A ) is an n by n * unit, or non-unit, upper or lower triangular submatrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * sub( X ) = b. * * TRANS = 'T' or 't' sub( A )' * sub( X ) = b. * * TRANS = 'C' or 'c' sub( A )' * sub( X ) = b. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On entry, sub( X ) is the n element right-hand side * b. On exit, sub( X ) is overwritten with the solution subvec- * tor. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, btop, ctop, * negone, * one, * zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, XACapbX, XACfr, XACld, XACsum, XARapbX, XARfr, XARld, XARsum, Xi, Xj, ctxt, info, ione=1, k, kb, kbnext, kbprev, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XACd[DLEN_], XARd[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XAC = NULL, * XAR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PSTRSV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PSTRSV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRSV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); size = type->size; one = type->one; zero = type->zero; negone = type->negone; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the last * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ if( Akp > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow, Arow, nprow ); Akp -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) sgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol ); Csgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) sset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Akp > 0 && Anq0 > 0 ) sgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } else { if( Anq0 > 0 ) sgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Csgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the first * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Anp0 = Anp - Akp ) > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow, nprow ); Anp0 -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) sgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol ); Csgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) sset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Anp0 > 0 && Anq0 > 0 ) sgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp+ktmp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp+ktmp, 0, XACld, size ), &ione ); } else { if( Anq0 > 0 ) sgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Csgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } } else { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAR in process row owning the first row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq0 = Anq - Akq ) > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol, npcol ); Anq0 -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) sgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow ); Csgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) sset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Anq0 > 0 ) sgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq+ktmp, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq+ktmp, XARld, size ), &XARld ); } else { if( Anp0 > 0 ) sgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Csgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process row owning the last row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later * is only locally updated. */ if( Akq > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol, Acol, npcol ); Akq -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) sgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow ); Csgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) sset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Akq > 0 ) sgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } else { if( Anp0 > 0 ) sgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Csgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } } if( XACfr ) free( XAC ); if( XARfr ) free( XAR ); /* * End of PSTRSV */ } scalapack-1.8.0/PBLAS/SRC/pctrsm_.c0000640000175000017500000005170010363532303016422 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctrsm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB ) #else void pctrsm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; float * A, * B; #endif { /* * Purpose * ======= * * PCTRSM solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ). * * The submatrix X is overwritten on sub( B ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, itmp, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSM", "Illegal TRANS = %c\n", TranOp ); info = -3; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PCTRSM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PCTRSM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PCTRSM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRSM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); /* * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp2, tmp3 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp1, tmp4 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } if( ChooseAB ) { /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ForceTop ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrsmAB( type, &Var, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } } else { /* * BLACS topologies are always enforced. */ if( ( lside && notran ) || ( !lside && !notran ) ) { OpR = CCOMBINE; OpC = CBCAST; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopR = CTOP_DEFAULT; } else { OpR = CBCAST; OpC = CCOMBINE; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopC = CTOP_DEFAULT; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrsmB( type, &DirB, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies. */ rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCTRSM */ } scalapack-1.8.0/PBLAS/SRC/pscnrm2_.c0000640000175000017500000004331010363532303016474 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pscnrm2_( int * N, float * NORM2, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pscnrm2_( N, NORM2, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * NORM2; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSCNRM2 computes the 2-norm of a subvector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * NORM2 (local output) REAL * On exit, NORM2 specifies the 2-norm of the subvector sub( X ) * only in its scope (See below for further details). * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Xptr = NULL, top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, dst, dist, info, k, mycol, mydist, myrow, npcol, nprow, src, size; float Xtmp, scale, ssq, temp1, temp2; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; float work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSCNRM2", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PSCNRM2", info ); return; } #endif /* * Initialize NORM2 */ *NORM2 = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute NORM2 */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { scale = ZERO; ssq = ONE; type = PB_Cctypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); Xtmp = ((float *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((float *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } /* * Compute NORM2 = SCALE * SQRT( SSQ ) */ sasqrtb_( &scale, &ssq, NORM2 ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); size = type->size; Xptr = Mptr( ((char *) X), Xii, Xjj, Xld, size ); for( k = 0; k < Xnq; k++ ) { Xtmp = ((float *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((float *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += Xld * size; } } /* * If Xnq <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process row * 0 if npcol > 1 and Xcol >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Csgesd2d( ctxt, 2, 1, ((char*) work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Csgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, myrow, src ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process row. */ top = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Csgebs2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2, myrow, 0 ); } /* * Compute NORM2 redundantly NORM2 = WORK( 1 ) * SQRT( WORK( 2 ) ) */ sasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ sasqrtb_( &scale, &ssq, NORM2 ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cctypeset(); size = type->size; Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ); for( k = 0; k < Xnp; k++ ) { Xtmp = ((float *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((float *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += size; } } /* * If Xnp <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process * column 0 if nprow > 1 and Xrow >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Csgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Csgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, src, mycol ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process column */ top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Csgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 ); } else { Csgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2, 0, mycol ); } /* * Compute NORM2 redundantly NORM2 = WORK[0] * SQRT( WORK[1] ) */ sasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ sasqrtb_( &scale, &ssq, NORM2 ); } } return; } /* * End of PSCNRM2 */ } scalapack-1.8.0/PBLAS/SRC/pzswap_.c0000640000175000017500000007621510363532303016446 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzswap_( int * N, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzswap_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PZSWAP swaps two subvectors, * * sub( Y ) := sub( X ) and sub( X ) := sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * swapped. N must be at least zero. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with sub( Y ). * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char Xscope, Yscope, * one, * top, tran, * zero; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, XisD, XisR, Xinb1D, XinbD, XisRow, Xii, Xj, Xjj, Xld, Xlinc, Xm, XmyprocD, XmyprocR, Xn, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xroc, Xrow, Ycol, Yi, Yii, Yinb1D, YinbD, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, Ym, YmyprocD, YmyprocR, Yn, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow, cdst, csrc, ctxt, dst, gcdPQ, info, ione=1, k, l, lcmPQ, lcmb, mycol, myrow, npcol, npq, nprow, p, q, rdst, rsrc, src, size; PBTYP_T * type; PB_VM_T VM; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; char * buf = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZSWAP", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PZSWAP", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PZSWAP", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ... */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_ ]; XnbD = Xd[NB_ ]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_ ]; XnbD = Xd[MB_ ]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ... */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_ ]; YnbD = Yd[NB_ ]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_ ]; YnbD = Yd[MB_ ]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is distributed but not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector to * swap sub( X ) with. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cztypeset(); size = type->size; /* * A purely local operation occurs iff the operands start in the same process * and, if either the grid is mono-dimensional or there is a single local block * to be swapped or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { zswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } if( RRorCC && XisR && YisR ) return; } } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to swap the distributed vectors in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( RRorCC && ( XprocR != YprocR ) ) { /* * Both operands are distributed in the same direction, but reside in different * process rows or columns. */ if( XmyprocR == XprocR ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( XisRow ) { Czgesd2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); Czgerv2d( ctxt, 1, XnpD, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, YprocR, dst ); } else { Czgesd2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); Czgerv2d( ctxt, XnpD, 1, Mptr( ((char*) X), Xii, Xjj, Xld, size ), Xld, dst, YprocR ); } } } if( YmyprocR == YprocR ) { YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( YisRow ) { Czgesd2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); Czgerv2d( ctxt, 1, YnpD, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, XprocR, dst ); } else { Czgesd2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); Czgerv2d( ctxt, YnpD, 1, Mptr( ((char*) Y), Yii, Yjj, Yld, size ), Yld, dst, XprocR ); } } } } else { /* * General case when just one message needs to be exchanged */ if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) send it to the corresponding * process owning s piece of sub ( Y ). */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { dst = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); dst = MPosMod( dst, YnprocsD ); if( YisRow ) { rdst = YprocR; cdst = dst; } else { rdst = dst; cdst = YprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { zswap_( &XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( XisRow ) Czgesd2d( ctxt, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); else Czgesd2d( ctxt, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, rdst, cdst ); } } } if( YmyprocR == YprocR ) { /* * The processes owning a piece of sub( Y ) receive the corresponding piece * of sub( X ) and send the piece of sub( Y ) they own to the same process. */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { src = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); src = MPosMod( src, XnprocsD ); if( XisRow ) { rsrc = XprocR; csrc = src; } else { rsrc = src; csrc = XprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( YnpD * size ); if( XisRow ) Czgerv2d( ctxt, 1, YnpD, buf, 1, rsrc, csrc ); else Czgerv2d( ctxt, YnpD, 1, buf, YnpD, rsrc, csrc ); if( YisRow ) Czgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); else Czgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rsrc, csrc ); zcopy_( &YnpD, buf, &ione, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); if( buf ) free( buf ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding piece * of sub( Y ). */ if( XnpD > 0 ) { if( ( myrow != rdst ) || ( mycol != cdst ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Czgerv2d( ctxt, 1, XnpD, buf, 1, rdst, cdst ); else Czgerv2d( ctxt, XnpD, 1, buf, XnpD, rdst, cdst ); zcopy_( &XnpD, buf, &ione, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc ); if( buf ) free( buf ); } } } } } else if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) ) { /* * General case */ tran = ( RRorCC ? CNOTRAN : CTRAN ); if( XisRow ) { Xscope = CCOLUMN; Xm = 1; rsrc = XprocR; } else { Xscope = CROW; Xn = 1; csrc = XprocR; } if( YisRow ) { Yscope = CCOLUMN; Ym = 1; rdst = YprocR; } else { Yscope = CROW; Yn = 1; cdst = YprocR; } lcmb = PB_Clcm( XnprocsD * XnbD, YnprocsD * YnbD ); one = type->one; zero = type->zero; gcdPQ = PB_Cgcd( XnprocsD, YnprocsD ); lcmPQ = ( XnprocsD / gcdPQ ) * YnprocsD; for( k = 0; k < gcdPQ; k++ ) { p = 0; q = k; for( l = 0; l < lcmPQ; l++ ) { Xroc = MModAdd( XprocD, p, XnprocsD ); Yroc = MModAdd( YprocD, q, YnprocsD ); if( ( XmyprocD == Xroc ) || ( YmyprocD == Yroc ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, Xroc, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, Yroc, YprocD, YnprocsD ); PB_CVMinit( &VM, 0, XnpD, YnpD, Xinb1D, Yinb1D, XnbD, YnbD, p, q, XnprocsD, YnprocsD, lcmb ); if( npq = PB_CVMnpq( &VM ) ) { if( ( RRorCC && ( Xroc == Yroc ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( Xroc == YprocR ) && ( XprocR == Yroc ) ) ) { /* * If I am at the intersection of the process cross, or simply common to the * process rows or columns owning sub( X ) and sub( Y ) */ if( ( YmyprocD == Yroc ) && ( YmyprocR == YprocR ) ) { PB_CVMswp( type, &VM, ROW, &Xscope, &tran, npq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Ylinc ); } } else { /* * Perform the message exchange: pack the data I own, send it, receive the * remote data, and unpack it. */ if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) ) { if( XisRow ) { Xn = npq; } else { Xm = npq; } if( YisRow ) { Yn = npq; cdst = Yroc; } else { Ym = npq; rdst = Yroc; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, ROW, &Xscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, zero, buf, Xm ); Czgesd2d( ctxt, Xm, Xn, buf, Xm, rdst, cdst ); Czgerv2d( ctxt, Ym, Yn, buf, Ym, rdst, cdst ); PB_CVMpack( type, &VM, ROW, &Xscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, one, buf, Ym ); if( buf ) free ( buf ); } if( ( YmyprocR == YprocR ) && ( YmyprocD == Yroc ) ) { if( XisRow ) { Xn = npq; csrc = Xroc; } else { Xm = npq; rsrc = Xroc; } if( YisRow ) { Yn = npq; } else { Ym = npq; } buf = PB_Cmalloc( npq * size ); PB_CVMpack( type, &VM, COLUMN, &Yscope, PACKING, NOTRAN, npq, 1, one, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, zero, buf, Ym ); Czgesd2d( ctxt, Ym, Yn, buf, Ym, rsrc, csrc ); Czgerv2d( ctxt, Xm, Xn, buf, Xm, rsrc, csrc ); PB_CVMpack( type, &VM, COLUMN, &Yscope, UNPACKING, &tran, npq, 1, zero, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, one, buf, Xm ); if( buf ) free ( buf ); } } } } p = MModAdd1( p, XnprocsD ); q = MModAdd1( q, YnprocsD ); } } } if( XisR ) { /* * Replicate sub( X ) when necessary */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Czgebr2d( ctxt, COLUMN, top, 1, XnpD, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld ); else Czgebr2d( ctxt, ROW, top, XnpD, 1, Mptr( ((char *) X), Xii, Xjj, Xld, size ), Xld, XmyprocD, XprocR ); } } } if( YisR ) { /* * Replicate sub( Y ) when necessary */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocR == YprocR ) Czgebs2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Czgebr2d( ctxt, COLUMN, top, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YprocR, YmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocR == YprocR ) Czgebs2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld ); else Czgebr2d( ctxt, ROW, top, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, YmyprocD, YprocR ); } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ PB_CpswapND( PB_Cztypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ PB_CpswapND( PB_Cztypeset(), *N, ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed. */ PB_CpswapNN( PB_Cztypeset(), *N, ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY ); } /* * End of PZSWAP */ } scalapack-1.8.0/PBLAS/SRC/pstrsm_.c0000640000175000017500000005162310363532303016446 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstrsm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB ) #else void pstrsm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; float * A, * B; #endif { /* * Purpose * ======= * * PSTRSM solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y'. * * The submatrix X is overwritten on sub( B ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) REAL array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, itmp, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSM", "Illegal TRANS = %c\n", TranOp ); info = -3; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRSM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PSTRSM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PSTRSM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PSTRSM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRSM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); /* * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp2, tmp3 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp1, tmp4 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } if( ChooseAB ) { /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ForceTop ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrsmAB( type, &Var, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } } else { /* * BLACS topologies are always enforced. */ if( ( lside && notran ) || ( !lside && !notran ) ) { OpR = CCOMBINE; OpC = CBCAST; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopR = CTOP_DEFAULT; } else { OpR = CBCAST; OpC = CCOMBINE; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopC = CTOP_DEFAULT; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrsmB( type, &DirB, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies. */ rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PSTRSM */ } scalapack-1.8.0/PBLAS/SRC/pdagemv_.c0000640000175000017500000004517310363532303016544 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdagemv_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdagemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDAGEMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A ) | * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'T' or 't', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'C' or 'c', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * one, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDAGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PDAGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PDAGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PDAGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PDAGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PDAGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PDAGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cdtypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { dagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { dagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } one = utyp->one; if( nota ) { PB_Cpaxpby( utyp, NOCONJG, *M, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PDAGEMV */ } scalapack-1.8.0/PBLAS/SRC/psagemv_.c0000640000175000017500000004507210363532303016561 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psagemv_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psagemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSAGEMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A ) | * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'T' or 't', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'C' or 'c', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * one, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSAGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PSAGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PSAGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PSAGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PSAGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PSAGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PSAGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cstypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { sagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { sagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } one = utyp->one; if( nota ) { PB_Cpaxpby( utyp, NOCONJG, *M, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PSAGEMV */ } scalapack-1.8.0/PBLAS/SRC/pdsyr2k_.c0000640000175000017500000005242010363532303016510 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsyr2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdsyr2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PDSYR2K performs one of the symmetric rank 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) and sub( B ) are n by k submatrices in the * first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't' or 'C' or 'c', K specifies the * number of rows of the submatrices sub( A ) and sub( B ). * K must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYR2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYR2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PDSYR2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDSYR2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PDSYR2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDSYR2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PDSYR2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYR2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PDSYR2K */ } scalapack-1.8.0/PBLAS/SRC/pdtradd_.c0000640000175000017500000003342010363532303016533 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdtradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PDTRADD adds a trapezoidal matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'C', * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * upper or lower trapezoidal submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of the * array C containing the upper or lower triangular part of the * triangular submatrix sub( C ) is to be referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * triangular submatrix sub( C ) is to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * triangular submatrix sub( C ) is to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the upper triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly lower tri- * angular part of the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the lower triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly upper tri- * angular part of the submatrix sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAC, TranOp, UploC, ctop, rtop; int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol, nprow, upper; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRADD", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRADD", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PDTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PDTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cdtypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cdtypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added, but not * transposed. This selection is based on the current setting for the BLACS * broadcast operations. */ rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_Cptradd( PB_Cdtypeset(), &DirAC, &UploC, ( notran ? NOTRAN : TRAN ), *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PDTRADD */ } scalapack-1.8.0/PBLAS/SRC/pscopy_.c0000640000175000017500000002157710363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pscopy_( int * N, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pscopy_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PSCOPY copies one subvector into another, * * sub( Y ) := sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * copied. N must be at least zero. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PSCOPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PSCOPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PSCOPY", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) X), Xi, Xj, Xd, ROW, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) X), Xi, Xj, Xd, COLUMN, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PSCOPY */ } scalapack-1.8.0/PBLAS/SRC/psgeadd_.c0000640000175000017500000002643110363532303016524 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psgeadd_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void psgeadd_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PSGEADD adds a matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'C'. * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, DirC, ctop, rtop; int Ai, Aj, Ci, Cj, TrA, ctxt, info, mycol, myrow, npcol, nprow, notran; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ notran = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !notran ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSGEADD", "Illegal TRANS = %c\n", TrA ); info = -1; } if( notran ) PB_Cchkmat( ctxt, "PSGEADD", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); else PB_Cchkmat( ctxt, "PSGEADD", "A", *N, 3, *M, 2, Ai, Aj, Ad, 8, &info ); PB_Cchkmat( ctxt, "PSGEADD", "C", *M, 2, *N, 3, Ci, Cj, Cd, 13, &info ); } if( info ) { PB_Cabort( ctxt, "PSGEADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cstypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cstypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added. This * selection is based on the current setting for the BLACS broadcast operations. */ if( notran ) { rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) { DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpgeadd( PB_Cstypeset(), &DirA, &DirC, NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cptran( PB_Cstypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } /* * End of PSGEADD */ } scalapack-1.8.0/PBLAS/SRC/picopy_.c0000640000175000017500000002160110363532303016412 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void picopy_( int * N, int * X, int * IX, int * JX, int * DESCX, int * INCX, int * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void picopy_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; int * X, * Y; #endif { /* * Purpose * ======= * * PICOPY copies one subvector into another, * * sub( Y ) := sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * copied. N must be at least zero. * * X (local input) INTEGER array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local output) INTEGER array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PICOPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PICOPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PICOPY", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Get type structure */ type = PB_Citypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) X), Xi, Xj, Xd, ROW, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) X), Xi, Xj, Xd, COLUMN, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PICOPY */ } scalapack-1.8.0/PBLAS/SRC/pctradd_.c0000640000175000017500000003371410363532303016540 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pctradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCTRADD adds a trapezoidal matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C', * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * upper or lower trapezoidal submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of the * array C containing the upper or lower triangular part of the * triangular submatrix sub( C ) is to be referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * triangular submatrix sub( C ) is to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * triangular submatrix sub( C ) is to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the upper triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly lower tri- * angular part of the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the lower triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly upper tri- * angular part of the submatrix sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAC, TranOp, UploC, ctop, rtop; int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol, nprow, upper; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PCTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PCTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cctypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cctypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added, but not * transposed. This selection is based on the current setting for the BLACS * broadcast operations. */ rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_Cptradd( PB_Cctypeset(), &DirAC, &UploC, ( notran ? NOTRAN : ( ( TranOp == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PCTRADD */ } scalapack-1.8.0/PBLAS/SRC/pdgemm_.c0000640000175000017500000004770010363532303016370 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdgemm_( F_CHAR_T TRANSA, F_CHAR_T TRANSB, int * M, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdgemm_( TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANSA, TRANSB; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PDGEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'C', * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * * TRANSB = 'C' or 'c' op( sub( B ) ) = sub( B )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, DirBC, OpC, OpR, TrA, TrB, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ChooseBC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, nota, notb, npcol, nprow; double ABest, ACest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANSA )[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( F2C_CHAR( TRANSB )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1001 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDGEMM", "Illegal TRANSA = %c\n", TrA ); info = -1; } else if( ( !notb ) && ( TrB != CTRAN ) && ( TrB != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDGEMM", "Illegal TRANSB = %c\n", TrB ); info = -2; } if( nota ) PB_Cchkmat( ctxt, "PDGEMM", "A", *M, 3, *K, 5, Ai, Aj, Ad, 10, &info ); else PB_Cchkmat( ctxt, "PDGEMM", "A", *K, 5, *M, 3, Ai, Aj, Ad, 10, &info ); if( notb ) PB_Cchkmat( ctxt, "PDGEMM", "B", *K, 5, *N, 4, Bi, Bj, Bd, 14, &info ); else PB_Cchkmat( ctxt, "PDGEMM", "B", *N, 4, *K, 5, Bi, Bj, Bd, 14, &info ); PB_Cchkmat( ctxt, "PDGEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PDGEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO || *K == 0 ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * If alpha or K is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( BETA[REAL_PART] == ONE ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char * ) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABest: both operands sub( A ) and sub( B ) are communicated (M, N >> K) * ACest: both operands sub( A ) and sub( C ) are communicated (K, N >> M) * BCest: both operands sub( B ) and sub( C ) are communicated (M, K >> N) */ ABest = (double)(*K); ACest = (double)(*M); BCest = (double)(*N); if( notb ) { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); ABest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[MB_], nprow ); BCest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ACest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } else { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); BCest *= CBRATIO * ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *K, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } ChooseAB = ( ( ABest <= ( 1.3 * BCest ) ) && ( ABest <= ( 1.3 * ACest ) ) ); ChooseBC = ( ( BCest <= ACest ) && ( ( 1.3 * BCest ) <= ABest ) ); /* * BLACS topologies are enforced iff M, N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) && ( *K > nb ) ); if( ChooseAB ) { OpR = CBCAST; OpC = CBCAST; } else if( ChooseBC ) { if( nota ) { OpR = CCOMBINE; OpC = CBCAST; } else { OpR = CBCAST; OpC = CCOMBINE; } } else { if( notb ) { OpR = CBCAST; OpC = CCOMBINE; } else { OpR = CCOMBINE; OpC = CBCAST; } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_IRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_IRING ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirAB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirBC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); if( ChooseAB ) { PB_CpgemmAB( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else if( ChooseBC ) { PB_CpgemmBC( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { PB_CpgemmAC( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PDGEMM */ } scalapack-1.8.0/PBLAS/SRC/pstrmm_.c0000640000175000017500000005235210363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstrmm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB ) #else void pstrmm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; float * A, * B; #endif { /* * Purpose * ======= * * PSTRMM performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X'. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) REAL array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMM", "Illegal TRANS = %c\n", TranOp ); info = -3; } if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PSTRMM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PSTRMM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PSTRMM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRMM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ChooseAB ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrmmAB( type, &Var, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } else { if( ( lside && notran ) || ( !( lside ) && !( notran ) ) ) { OpR = CCOMBINE; OpC = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } } else { OpR = CBCAST; OpC = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } } if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrmmB( type, &DirB, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PSTRMM */ } scalapack-1.8.0/PBLAS/SRC/pcgemm_.c0000640000175000017500000005066710363532303016375 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcgemm_( F_CHAR_T TRANSA, F_CHAR_T TRANSB, int * M, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcgemm_( TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANSA, TRANSB; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PCGEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * conjg(A(IA:IA+K-1,JA:JA+M-1)') if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * conjg(B(IB:IB+N-1,JB:JB+K-1)') if TRANSB = 'C'. * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * * TRANSB = 'C' or 'c' op( sub( B ) ) = conjg( sub( B )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, DirBC, OpC, OpR, TrA, TrB, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ChooseBC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, nota, notb, npcol, nprow; double ABest, ACest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANSA )[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( F2C_CHAR( TRANSB )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1001 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCGEMM", "Illegal TRANSA = %c\n", TrA ); info = -1; } else if( ( !notb ) && ( TrB != CTRAN ) && ( TrB != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCGEMM", "Illegal TRANSB = %c\n", TrB ); info = -2; } if( nota ) PB_Cchkmat( ctxt, "PCGEMM", "A", *M, 3, *K, 5, Ai, Aj, Ad, 10, &info ); else PB_Cchkmat( ctxt, "PCGEMM", "A", *K, 5, *M, 3, Ai, Aj, Ad, 10, &info ); if( notb ) PB_Cchkmat( ctxt, "PCGEMM", "B", *K, 5, *N, 4, Bi, Bj, Bd, 14, &info ); else PB_Cchkmat( ctxt, "PCGEMM", "B", *N, 4, *K, 5, Bi, Bj, Bd, 14, &info ); PB_Cchkmat( ctxt, "PCGEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PCGEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * If alpha or K is zero, sub( C ) := beta * sub( C ). */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char * ) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABest: both operands sub( A ) and sub( B ) are communicated (M, N >> K) * ACest: both operands sub( A ) and sub( C ) are communicated (K, N >> M) * BCest: both operands sub( B ) and sub( C ) are communicated (M, K >> N) */ ABest = (double)(*K); ACest = (double)(*M); BCest = (double)(*N); if( notb ) { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); ABest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[MB_], nprow ); BCest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ACest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } else { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); BCest *= CBRATIO * ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *K, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } ChooseAB = ( ( ABest <= ( 1.3 * BCest ) ) && ( ABest <= ( 1.3 * ACest ) ) ); ChooseBC = ( ( BCest <= ACest ) && ( ( 1.3 * BCest ) <= ABest ) ); /* * BLACS topologies are enforced iff M, N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) && ( *K > nb ) ); if( ChooseAB ) { OpR = CBCAST; OpC = CBCAST; } else if( ChooseBC ) { if( nota ) { OpR = CCOMBINE; OpC = CBCAST; } else { OpR = CBCAST; OpC = CCOMBINE; } } else { if( notb ) { OpR = CBCAST; OpC = CCOMBINE; } else { OpR = CCOMBINE; OpC = CBCAST; } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_IRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_IRING ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirAB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirBC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); if( ChooseAB ) { PB_CpgemmAB( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else if( ChooseBC ) { PB_CpgemmBC( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { PB_CpgemmAC( type, &DirAB, &DirBC, ( nota ? NOTRAN : ( ( TrA == CCOTRAN ) ? COTRAN : TRAN ) ), ( notb ? NOTRAN : ( ( TrB == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCGEMM */ } scalapack-1.8.0/PBLAS/SRC/pdcopy_.c0000640000175000017500000002163110363532303016410 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdcopy_( int * N, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdcopy_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PDCOPY copies one subvector into another, * * sub( Y ) := sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * copied. N must be at least zero. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PDCOPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PDCOPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PDCOPY", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) X), Xi, Xj, Xd, ROW, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) X), Xi, Xj, Xd, COLUMN, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PDCOPY */ } scalapack-1.8.0/PBLAS/SRC/pzahemv_.c0000640000175000017500000005631410363532303016572 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzahemv_( F_CHAR_T UPLO, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzahemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZAHEMV performs the matrix-vector operation * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a n element real subvec- * tor, sub( X ) is an n element subvector and sub( A ) is an n by n * Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper, usiz; double * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PZAHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PZAHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PZAHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PZAHEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); size = type->size; utyp = PB_Cdtypeset(); usiz = type->usiz; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( utyp, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) dascal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) dascal_( &Amp, ((char *) tbeta), YC, &ione ); } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); zagemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, usiz ), &ione ); zagemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || Yrow < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || Ycol < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PZAHEMV */ } scalapack-1.8.0/PBLAS/SRC/pcher2_.c0000640000175000017500000004332210363532303016276 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcher2_( F_CHAR_T UPLO, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY, float * A, int * IA, int * JA, int * DESCA ) #else void pcher2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCHER2 performs the Hermitian rank 2 operation * * sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) + * conjg( alpha )*sub( Y )*conjg( sub( X )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and * sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; cmplx Calpha; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_], YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCHER2", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PCHER2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PCHER2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PCHER2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PCHER2", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by * sub( A ) */ if( *INCY == Yd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ROW, &YR, YRd0, &YRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0, YRd0, ROW, &YC, YCd0, &YCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, COLUMN, &YC, YCd0, &YCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0, YCd0, COLUMN, &YR, YRd0, &YRfr ); } /* * Local rank-2 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd0[LLD_]; YCld = YCd0[LLD_]; XRld = XRd0[LLD_]; YRld = YRd0[LLD_]; Calpha[REAL_PART] = ALPHA[REAL_PART]; Calpha[IMAG_PART] = -ALPHA[IMAG_PART]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cgerc_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); cgerc_( &Akp, &Anq0, ((char *) Calpha), YC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzher2 ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzher2 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cgerc_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); cgerc_( &Amp0, &Anq0, ((char *) Calpha), Mptr( YC, Akp, 0, YCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); if( YRfr ) free( YR ); if( YCfr ) free( YC ); /* * End of PCHER2 */ } scalapack-1.8.0/PBLAS/SRC/pcdotu_.c0000640000175000017500000006671410363532303016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcdotu_( int * N, float * DOT, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcdotu_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; float * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PCDOTU forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) COMPLEX array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCDOTU", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PCDOTU", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PCDOTU", info ); return; } #endif DOT[REAL_PART] = ZERO; DOT[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cctypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cctypeset(); size = type->size; dot = type->Fvvdotu; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Ccgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Ccgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Ccgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Ccgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Ccgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Ccgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Ccgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Ccgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Ccgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Ccgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Ccgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Ccgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Ccgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cctypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cctypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotu ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cctypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } /* * End of PCDOTU */ } scalapack-1.8.0/PBLAS/SRC/pcsymm_.c0000640000175000017500000005324310363532303016426 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcsymm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcsymm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PCSYMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the symmetric submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PCSYMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PCSYMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PCSYMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PCSYMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PCSYMM */ } scalapack-1.8.0/PBLAS/SRC/pdtrsm_.c0000640000175000017500000005167210363532303016433 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtrsm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB ) #else void pdtrsm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; double * A, * B; #endif { /* * Purpose * ======= * * PDTRSM solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y'. * * The submatrix X is overwritten on sub( B ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) DOUBLE PRECISION array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, itmp, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSM", "Illegal TRANS = %c\n", TranOp ); info = -3; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PDTRSM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PDTRSM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PDTRSM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRSM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); /* * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp2, tmp3 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp1, tmp4 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } if( ChooseAB ) { /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ForceTop ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrsmAB( type, &Var, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } } else { /* * BLACS topologies are always enforced. */ if( ( lside && notran ) || ( !lside && !notran ) ) { OpR = CCOMBINE; OpC = CBCAST; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopR = CTOP_DEFAULT; } else { OpR = CBCAST; OpC = CCOMBINE; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopC = CTOP_DEFAULT; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrsmB( type, &DirB, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ), &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies. */ rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PDTRSM */ } scalapack-1.8.0/PBLAS/SRC/pchemm_.c0000640000175000017500000005353010363532303016366 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pchemm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pchemm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PCHEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a Hermitian submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the Hermitian submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * Hermitian submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the Hermitian submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the Hermitian submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * Hermitian submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the Hermitian submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the Hermitian submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PCHEMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCHEMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PCHEMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCHEMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PCHEMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCHEMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PCHEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PCHEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, CONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, CONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PCHEMM */ } scalapack-1.8.0/PBLAS/SRC/pdzasum_.c0000640000175000017500000002534710363532303016605 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdzasum_( int * N, double * ASUM, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdzasum_( N, ASUM, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * ASUM; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDZASUM returns the sum of absolute values of the entries of a sub- * vector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ASUM (local output) DOUBLE PRECISION * On exit, ASUM specifies the sum of absolute values of the * subvector sub( X ) only in its scope (See below for further * details). * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Xptr = NULL, top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDZASUM", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PDZASUM", info ); return; } #endif /* * Initialize ASUM */ *ASUM = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute ASUM */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { type = PB_Cztypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); *ASUM = ABS( ((double *) Xptr)[REAL_PART] ) + ABS( ((double *) Xptr)[IMAG_PART] ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); dzvasum_( &Xnq, ((char *) ASUM), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } /* * If Xnq <= 0, ASUM is zero (see initialization above) */ if( ( npcol > 1 ) && ( Xcol >= 0 ) ) { /* * Combine the local results if npcol > 1 and Xcol >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cztypeset(); dzvasum_( &Xnp, ((char *) ASUM), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } /* * If Xnp <= 0, ASUM is zero (see initialization above) */ if( ( nprow > 1 ) && ( Xrow >= 0 ) ) { /* * Combine the local results if nprow > 1 and Xrow >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } /* * End of PDZASUM */ } scalapack-1.8.0/PBLAS/SRC/pscasum_.c0000640000175000017500000002532410363532303016570 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pscasum_( int * N, float * ASUM, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pscasum_( N, ASUM, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * ASUM; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSCASUM returns the sum of absolute values of the entries of a sub- * vector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ASUM (local output) REAL * On exit, ASUM specifies the sum of absolute values of the * subvector sub( X ) only in its scope (See below for further * details). * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Xptr = NULL, top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSCASUM", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PSCASUM", info ); return; } #endif /* * Initialize ASUM */ *ASUM = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute ASUM */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { type = PB_Cctypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); *ASUM = ABS( ((float *) Xptr)[REAL_PART] ) + ABS( ((float *) Xptr)[IMAG_PART] ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); scvasum_( &Xnq, ((char *) ASUM), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } /* * If Xnq <= 0, ASUM is zero (see initialization above) */ if( ( npcol > 1 ) && ( Xcol >= 0 ) ) { /* * Combine the local results if npcol > 1 and Xcol >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and compute the local sum */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cctypeset(); scvasum_( &Xnp, ((char *) ASUM), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } /* * If Xnp <= 0, ASUM is zero (see initialization above) */ if( ( nprow > 1 ) && ( Xrow >= 0 ) ) { /* * Combine the local results if nprow > 1 and Xrow >= 0, i.e sub( X ) is * distributed. */ top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, 1, ((char *)ASUM), 1, -1, mycol ); } } return; } /* * End of PSCASUM */ } scalapack-1.8.0/PBLAS/SRC/pilaenv.f0000640000175000017500000000567110363532303016422 0ustar muammarmuammar INTEGER FUNCTION PILAENV( ICTXT, PREC ) * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * .. Scalar Arguments .. INTEGER ICTXT CHARACTER*1 PREC * .. * * Purpose * ======= * * PILAENV returns the positive integer value of the logical blocking * size. This value is machine and precision specific. This version pro- * vides a logical blocking size which should give good but not optimal * performance on many of the currently available distributed-memory * concurrent computers. Users are encouraged to modify this subroutine * to set this tuning parameter for their particular machine. * * Arguments * ========= * * ICTXT (local input) INTEGER * On entry, ICTXT specifies the BLACS context handle, indica- * ting the global context of the operation. The context itself * is global, but the value of ICTXT is local. * * PREC (global input) CHARACTER*1 * On input, PREC specifies the precision for which the logical * block size should be returned as follows: * PREC = 'S' or 's' single precision real, * PREC = 'D' or 'd' double precision real, * PREC = 'C' or 'c' single precision complex, * PREC = 'Z' or 'z' double precision complex, * PREC = 'I' or 'i' integer. * * Notes * ===== * * Before modifying this routine to tune the library performance on your * system, be aware of the following: * * 1) The value this function returns must be STRICTLY LARGER THAN ZERO, * * 2) If you are planning to link your program with different instances * of the library, (for example on a heterogeneous machine), you MUST * compile each instance of the library with the EXACT SAME version of * this routine for obvious inter-operability reasons. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * IF( LSAME( PREC, 'S' ) ) THEN * * Single precision real logical block size * PILAENV = 32 * ELSE IF( LSAME( PREC, 'D' ) ) THEN * * Double precision real logical block size * PILAENV = 32 * ELSE IF( LSAME( PREC, 'C' ) ) THEN * * Single precision complex logical block size * PILAENV = 32 * ELSE IF( LSAME( PREC, 'Z' ) ) THEN * * Double precision complex logical block size * PILAENV = 32 * ELSE IF( LSAME( PREC, 'I' ) ) THEN * * Integer logical block size * PILAENV = 32 * ELSE * * Probably unused * PILAENV = 32 * END IF * RETURN * * End of PILAENV * END scalapack-1.8.0/PBLAS/SRC/pccopy_.c0000640000175000017500000002160510363532303016410 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pccopy_( int * N, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pccopy_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PCCOPY copies one subvector into another, * * sub( Y ) := sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * copied. N must be at least zero. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local output) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCCOPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PCCOPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PCCOPY", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) X), Xi, Xj, Xd, ROW, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) X), Xi, Xj, Xd, COLUMN, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PCCOPY */ } scalapack-1.8.0/PBLAS/SRC/psgemv_.c0000640000175000017500000004307510363532303016421 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psgemv_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psgemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSGEMV performs one of the matrix-vector operations * * sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors * and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ), * * TRANS = 'T' or 't', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ), * * TRANS = 'C' or 'c', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PSGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PSGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PSGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PSGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PSGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PSGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( BETA[REAL_PART] == ZERO ) { sset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { sscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( BETA[REAL_PART] == ZERO ) { sset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { sscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { sgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { sgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PSGEMV */ } scalapack-1.8.0/PBLAS/SRC/pzgeadd_.c0000640000175000017500000002727010363532303016535 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzgeadd_( F_CHAR_T TRANS, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzgeadd_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZGEADD adds a matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C'. * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, DirC, ctop, rtop; int Ai, Aj, Ci, Cj, TrA, ctxt, info, mycol, myrow, npcol, nprow, notran; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ notran = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !notran ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZGEADD", "Illegal TRANS = %c\n", TrA ); info = -1; } if( notran ) PB_Cchkmat( ctxt, "PZGEADD", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); else PB_Cchkmat( ctxt, "PZGEADD", "A", *N, 3, *M, 2, Ai, Aj, Ad, 8, &info ); PB_Cchkmat( ctxt, "PZGEADD", "C", *M, 2, *N, 3, Ci, Cj, Cd, 13, &info ); } if( info ) { PB_Cabort( ctxt, "PZGEADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added. This * selection is based on the current setting for the BLACS broadcast operations. */ if( notran ) { rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) { DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpgeadd( PB_Cztypeset(), &DirA, &DirC, NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else if( TrA == CTRAN ) { PB_Cptran( PB_Cztypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cptran( PB_Cztypeset(), CONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } /* * End of PZGEADD */ } scalapack-1.8.0/PBLAS/SRC/pcagemv_.c0000640000175000017500000004533010363532303016536 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcagemv_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcagemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCAGEMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) ) * + abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A ) | * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'T' or 't', * sub( Y ) := |alpha|*|sub( A )'| * |sub( X )| + * |beta*sub( Y )|, * * TRANS = 'C' or 'c', * sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| + * |beta*sub( Y )|. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * one, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCAGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PCAGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PCAGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PCAGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PCAGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PCAGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PCAGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); utyp = PB_Cstypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cagemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, utyp->size ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], utyp->size ), INCY ); } } } one = utyp->one; if( nota ) { PB_Cpaxpby( utyp, NOCONJG, *M, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PCAGEMV */ } scalapack-1.8.0/PBLAS/SRC/psscal_.c0000640000175000017500000002167410602576752016422 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psscal_( int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void psscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PSSCAL multiplies an n element subvector sub( X ) by the scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PSSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PSSCAL", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { if( ALPHA[REAL_PART] == ONE ) return; /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; if( ALPHA[REAL_PART] == ZERO ) { sset_( &Xnq, ((char *) ALPHA), ((char *)(X+(Xii+Xjj*Xld))), &Xld ); } else { sscal_( &Xnq, ((char *) ALPHA), ((char *)(X+(Xii+Xjj*Xld))), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { if( ALPHA[REAL_PART] == ONE ) return; /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { if( ALPHA[REAL_PART] == ZERO ) { sset_( &Xnp, ((char *) ALPHA), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } else { sscal_( &Xnp, ((char *) ALPHA), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } } } return; } /* * End of PSSCAL */ } scalapack-1.8.0/PBLAS/SRC/pdtrsv_.c0000640000175000017500000007607610363532303016451 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdtrsv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdtrsv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PDTRSV solves one of the systems of equations * * sub( A )*sub( X ) = b, or sub( A )'*sub( X ) = b, * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * b and sub( X ) are n element subvectors and sub( A ) is an n by n * unit, or non-unit, upper or lower triangular submatrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * sub( X ) = b. * * TRANS = 'T' or 't' sub( A )' * sub( X ) = b. * * TRANS = 'C' or 'c' sub( A )' * sub( X ) = b. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On entry, sub( X ) is the n element right-hand side * b. On exit, sub( X ) is overwritten with the solution subvec- * tor. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, btop, ctop, * negone, * one, * zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, XACapbX, XACfr, XACld, XACsum, XARapbX, XARfr, XARld, XARsum, Xi, Xj, ctxt, info, ione=1, k, kb, kbnext, kbprev, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XACd[DLEN_], XARd[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XAC = NULL, * XAR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PDTRSV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PDTRSV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PDTRSV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PDTRSV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); size = type->size; one = type->one; zero = type->zero; negone = type->negone; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the last * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ if( Akp > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow, Arow, nprow ); Akp -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) dgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol ); Cdgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) dset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Akp > 0 && Anq0 > 0 ) dgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } else { if( Anq0 > 0 ) dgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Cdgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the first * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Anp0 = Anp - Akp ) > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow, nprow ); Anp0 -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) dgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol ); Cdgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) dset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Anp0 > 0 && Anq0 > 0 ) dgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp+ktmp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp+ktmp, 0, XACld, size ), &ione ); } else { if( Anq0 > 0 ) dgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Cdgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } } else { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAR in process row owning the first row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq0 = Anq - Akq ) > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol, npcol ); Anq0 -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) dgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow ); Cdgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) dset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Anq0 > 0 ) dgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq+ktmp, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq+ktmp, XARld, size ), &XARld ); } else { if( Anp0 > 0 ) dgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Cdgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process row owning the last row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later * is only locally updated. */ if( Akq > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol, Acol, npcol ); Akq -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) dgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow ); Cdgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) dset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Akq > 0 ) dgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } else { if( Anp0 > 0 ) dgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Cdgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } } if( XACfr ) free( XAC ); if( XARfr ) free( XAR ); /* * End of PDTRSV */ } scalapack-1.8.0/PBLAS/SRC/pstradd_.c0000640000175000017500000003333410363532303016556 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pstradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PSTRADD adds a trapezoidal matrix to another * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'C', * * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n * upper or lower trapezoidal submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of the * array C containing the upper or lower triangular part of the * triangular submatrix sub( C ) is to be referenced as follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * triangular submatrix sub( C ) is to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * triangular submatrix sub( C ) is to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the form of op( sub( A ) ) to be * used in the matrix addition as follows: * * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANS = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the upper triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly lower tri- * angular part of the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar- * ray contains the local entries corresponding to the entries * of the lower triangular submatrix sub( A ), and the local en- * tries corresponding to the entries of the strictly upper tri- * angular part of the submatrix sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the triangular submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAC, TranOp, UploC, ctop, rtop; int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol, nprow, upper; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRADD", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRADD", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PSTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PSTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRADD", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * And when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( PB_Cstypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cstypeset(), &UploC, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ /* * This operation mainly involves point-to-point send and receive communication. * There is therefore no particular BLACS topology to recommend. Still, one can * choose the main loop direction in which the operands will be added, but not * transposed. This selection is based on the current setting for the BLACS * broadcast operations. */ rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( *M <= *N ) DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_Cptradd( PB_Cstypeset(), &DirAC, &UploC, ( notran ? NOTRAN : TRAN ), *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PSTRADD */ } scalapack-1.8.0/PBLAS/SRC/pchemv_.c0000640000175000017500000005616510363532303016406 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pchemv_( F_CHAR_T UPLO, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pchemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCHEMV performs the matrix-vector operation * * sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, sub( X ) and sub( Y ) are n element sub- * vectors and sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; float * tbeta; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PCHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PCHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PCHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PCHEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * When alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { cset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { cscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { cset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { cscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( type, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) { if( ( tbeta[REAL_PART] == ZERO ) && ( tbeta[IMAG_PART] == ZERO ) ) { cset_( &Anq, ((char *) tbeta), YR, &YRld ); } else { cscal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) { if( ( tbeta[REAL_PART] == ZERO ) && ( tbeta[IMAG_PART] == ZERO ) ) { cset_( &Amp, ((char *) tbeta), YC, &ione ); } else { cscal_( &Amp, ((char *) tbeta), YC, &ione ); } } } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); cgemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzhemv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzhemv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, size ), &ione ); cgemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PCHEMV */ } scalapack-1.8.0/PBLAS/SRC/pdsyr2_.c0000640000175000017500000004247210363532303016343 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsyr2_( F_CHAR_T UPLO, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY, double * A, int * IA, int * JA, int * DESCA ) #else void pdsyr2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDSYR2 performs the symmetric rank 2 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + * alpha*sub( Y )*sub( X )' + sub( A ) , * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and * sub( A ) is an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_], YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYR2", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PDSYR2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PDSYR2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PDSYR2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYR2", info ); return; } #endif /* * Quick return if possible */ if( (*N == 0) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by * sub( A ) */ if( *INCY == Yd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ROW, &YR, YRd0, &YRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0, YRd0, ROW, &YC, YCd0, &YCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, COLUMN, &YC, YCd0, &YCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0, YCd0, COLUMN, &YR, YRd0, &YRfr ); } /* * Local rank-2 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd0[LLD_]; YCld = YCd0[LLD_]; XRld = XRd0[LLD_]; YRld = YRd0[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); dger_( &Akp, &Anq0, ((char *) ALPHA), YC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzsyr2 ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzsyr2 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); dger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( YC, Akp, 0, YCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); if( YRfr ) free( YR ); if( YCfr ) free( YC ); /* * End of PDSYR2 */ } scalapack-1.8.0/PBLAS/SRC/pcgemv_.c0000640000175000017500000004356610363532303016406 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcgemv_( F_CHAR_T TRANS, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcgemv_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCGEMV performs one of the matrix-vector operations * * sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ), or * sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1). * * When TRANS = 'N', * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y, * Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y, * and, otherwise * * sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X, * X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors * and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ), * * TRANS = 'T' or 't', * sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ), * * TRANS = 'C' or 'c', * sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) + * beta*sub( Y ). * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char TrA, Yroc, * tbeta, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol, myrow, nota, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCGEMV", "Illegal TRANS=%c\n", TrA ); info = -1; } PB_Cchkmat( ctxt, "PCGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8, &info ); if( nota ) { PB_Cchkvec( ctxt, "PCGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PCGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18, &info ); } else { PB_Cchkvec( ctxt, "PCGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12, &info ); PB_Cchkvec( ctxt, "PCGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18, &info ); } } if( info ) { PB_Cabort( ctxt, "PCGEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * When alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { cset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { cscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { cset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { cscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( nota ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta, YA, &ione ); } if( XAfr ) free( XA ); /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Local matrix-vector multiply iff I own some data */ Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow ); Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A), Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta, YA, &YAd[LLD_] ); } if( XAfr ) free( XA ); /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PCGEMV */ } scalapack-1.8.0/PBLAS/SRC/pztrsm_.c0000640000175000017500000005171410363532303016456 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztrsm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB ) #else void pztrsm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; double * A, * B; #endif { /* * Purpose * ======= * * PZTRSM solves one of the matrix equations * * op( sub( A ) )*X = alpha*sub( B ), or * * X*op( sub( A ) ) = alpha*sub( B ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, X and sub( B ) are m by n submatrices, sub( A ) is * a unit, or non-unit, upper or lower triangular submatrix and op( Y ) * is one of * * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ). * * The submatrix X is overwritten on sub( B ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) appears on * the left or right of X as follows: * * SIDE = 'L' or 'l' op( sub( A ) )*X = alpha*sub( B ), * * SIDE = 'R' or 'r' X*op( sub( A ) ) = alpha*sub( B ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n solution submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, itmp, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSM", "Illegal TRANS = %c\n", TranOp ); info = -3; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRSM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PZTRSM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PZTRSM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PZTRSM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRSM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); /* * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( (double)(CEIL( *M, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp2, tmp3 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); itmp = MIN( Ad[MB_], Ad[NB_] ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ); ChooseAB = ( ABestR <= ( 2.0 * Best ) ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); itmp = MIN( Ad[MB_], Ad[NB_] ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( (double)(CEIL( *N, itmp )) * (double)(itmp) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : ONE ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : ONE ) ) + MAX( tmp1, tmp4 ) ); ChooseAB = ( ( ABestL <= ( 2.0 * Best ) ) || ( ABestR <= ( 2.0 * Best ) ) ); } } if( ChooseAB ) { /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ForceTop ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrsmAB( type, &Var, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } } else { /* * BLACS topologies are always enforced. */ if( ( lside && notran ) || ( !lside && !notran ) ) { OpR = CCOMBINE; OpC = CBCAST; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopR = CTOP_DEFAULT; } else { OpR = CBCAST; OpC = CCOMBINE; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } /* * Remove the next line when the BLACS combine operations support ring * topologies */ TopC = CTOP_DEFAULT; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrsmB( type, &DirB, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); /* * Restore the BLACS topologies. */ rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZTRSM */ } scalapack-1.8.0/PBLAS/SRC/pzscal_.c0000640000175000017500000002240610602576752016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzscal_( int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pzscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PZSCAL multiplies an n element subvector sub( X ) by the scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PZSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PZSCAL", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { if( ( ALPHA[REAL_PART] == ONE ) && ( ALPHA[IMAG_PART] == ZERO ) ) return; /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { zset_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } else { zscal_( &Xnq, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xld, type->size ), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { if( ( ALPHA[REAL_PART] == ONE ) && ( ALPHA[IMAG_PART] == ZERO ) ) return; /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cztypeset(); if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { zset_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } else { zscal_( &Xnp, ((char *) ALPHA), Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ), INCX ); } } } return; } /* * End of PZSCAL */ } scalapack-1.8.0/PBLAS/SRC/psasymv_.c0000640000175000017500000005570210363532303016622 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psasymv_( F_CHAR_T UPLO, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psasymv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSASYMV performs the matrix-vector operation * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a n element real subvec- * tor, sub( X ) is an n element subvector and sub( A ) is an n by n * symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper, usiz; float * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PSASYMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PSASYMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PSASYMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PSASYMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cstypeset(); size = usiz = type->size; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( utyp, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) sascal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) sascal_( &Amp, ((char *) tbeta), YC, &ione ); } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); sagemv_( C2F_CHAR( TRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzasymv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzasymv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, usiz ), &ione ); sagemv_( C2F_CHAR( TRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || Yrow < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || Ycol < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PSASYMV */ } scalapack-1.8.0/PBLAS/SRC/pztranc_.c0000640000175000017500000002273610363532303016602 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztranc_( int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pztranc_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZTRANC transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, * * op( X ) = conjg( X )'. * * Thus, op( sub( A ) ) denotes conjg( A(IA:IA+N-1,JA:JA+M-1)' ). * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PZTRANC", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PZTRANC", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRANC", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cztypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cztypeset(), CONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PZTRANC */ } scalapack-1.8.0/PBLAS/SRC/pzamax_.c0000640000175000017500000005321410363532303016414 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzamax_( int * N, double * AMAX, int * INDX, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pzamax_( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * INDX, * IX, * JX, * N; double * AMAX; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PZAMAX computes the global index of the maximum element in absolute * value of a subvector sub( X ). The global index is returned in INDX * and the value of that element is returned in AMAX, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * AMAX (global output) COMPLEX*16 array * On exit, AMAX specifies the largest entry in absolute value * of the subvector sub( X ) only in its scope (See below for * further details). * * INDX (global output) INTEGER * On exit, INDX specifies the global index of the maximum ele- * ment in absolute value of the subvector sub( X ) only in its * scope (See below for further details). * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char cbtop, cctop, rbtop, rctop; int Xcol, Xgindx, Xi, Xii, Ximb, Xinb, Xj, Xjj, Xlindx, Xld, Xmb, Xnb, Xnp, Xnq, Xrow, Xsrc, ctxt, dist, dst, idumm, info, k, maxpos, mycol, mydist, myrow, npcol, nprow, src, size; PBTYP_T * type; /* * .. Local Arrays .. */ char * Xptr; int Xd[DLEN_]; cmplx16 work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PZAMAX", "X", *N, 1, Xi, Xj, Xd, *INCX, 7, &info ); if( info ) { PB_Cabort( ctxt, "PZAMAX", info ); return; } #endif /* * Initialize INDX and AMAX */ *INDX = 0; AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *INCX == 1 ) && ( Xd[M_] == 1 ) && ( *N == 1 ) ) { /* * Make sure I own some data and compute INDX and AMAX */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *INDX = *JX; type = PB_Cztypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); AMAX[REAL_PART] = ((double*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((double*)(Xptr))[IMAG_PART]; } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( ( rctop == CTOP_DEFAULT ) || ( rctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); size = type->size; Xlindx = Xjj - 1 + izamax_( &Xnq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); Xptr = Mptr( ((char *) X), Xii, Xlindx, Xld, size ); work[0][REAL_PART] = ((double*)(Xptr))[REAL_PART]; work[0][IMAG_PART] = ((double*)(Xptr))[IMAG_PART]; work[1][REAL_PART] = ((double)( Xgindx+1 )); work[1][IMAG_PART] = ZERO; } else { work[0][REAL_PART] = ZERO; work[0][IMAG_PART] = ZERO; work[1][REAL_PART] = ZERO; work[1][IMAG_PART] = ZERO; } /* * Combine the local results using a 1-tree topology within process column 0 * if npcol > 1 or Xcol >= 0, i.e sub( X ) is distributed. */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Czgesd2d( ctxt, 2, 1, ((char*)work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Czgerv2d( ctxt, 2, 1, ((char*) work[2]), 2, myrow, src ); if( ( ABS( work[0][REAL_PART] ) + ABS( work[0][IMAG_PART] ) ) < ( ABS( work[2][REAL_PART] ) + ABS( work[2][IMAG_PART] ) ) ) { work[0][REAL_PART] = work[2][REAL_PART]; work[0][IMAG_PART] = work[2][IMAG_PART]; work[1][REAL_PART] = work[3][REAL_PART]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of INDX and AMAX within * their process row. */ rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Czgebs2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2 ); } else { Czgebr2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2, myrow, 0 ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ AMAX[REAL_PART] = work[0][REAL_PART]; AMAX[IMAG_PART] = work[0][IMAG_PART]; *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *JX ) : ( (int)(work[1][REAL_PART]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; type = PB_Cztypeset(); size = type->size; Xlindx = Xjj - 1 + izamax_( &Xnq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); Xptr = Mptr( ((char *) X), Xii, Xlindx, Xld, size ); AMAX[REAL_PART] = ((double*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((double*)(Xptr))[IMAG_PART]; } else { AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; } if( Xcol >= 0 ) { /* * Combine leave on all the local maximum if Xcol >= 0, i.e sub( X ) is * distributed */ Czgamx2d( ctxt, ROW, &rctop, 1, 1, ((char*)AMAX), 1, &idumm, &maxpos, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( ( AMAX[REAL_PART] != ZERO ) || ( AMAX[IMAG_PART] != ZERO ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == maxpos ) { Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); *INDX = Xgindx + 1; Cigebs2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1, myrow, maxpos ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *JX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *JX ) : Xlindx + 1 ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ( cctop == CTOP_DEFAULT ) || ( cctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); size = type->size; Xlindx = Xii - 1 + izamax_( &Xnp, Mptr( ((char *)X), Xii, Xjj, Xld, size ), INCX ); Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); Xptr = Mptr( ((char *) X), Xlindx, Xjj, Xld, size ); work[0][REAL_PART] = ((double*)(Xptr))[REAL_PART]; work[0][IMAG_PART] = ((double*)(Xptr))[IMAG_PART]; work[1][REAL_PART] = ((double)( Xgindx+1 )); work[1][IMAG_PART] = ZERO; } else { work[0][REAL_PART] = ZERO; work[0][IMAG_PART] = ZERO; work[1][REAL_PART] = ZERO; work[1][IMAG_PART] = ZERO; } /* * Combine the local results using a 1-tree topology within process row 0 * if nprow > 1 or Xrow >= 0, i.e sub( X ) is distributed. */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Czgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Czgerv2d( ctxt, 2, 1, ((char*) work[2]), 2, src, mycol ); if( ( ABS( work[0][REAL_PART] ) + ABS( work[0][IMAG_PART] ) ) < ( ABS( work[2][REAL_PART] ) + ABS( work[2][IMAG_PART] ) ) ) { work[0][REAL_PART] = work[2][REAL_PART]; work[0][IMAG_PART] = work[2][IMAG_PART]; work[1][REAL_PART] = work[3][REAL_PART]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process row 0 broadcasts the combined values of INDX and AMAX within their * process column. */ cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Czgebs2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2 ); } else { Czgebr2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2, 0, mycol ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ AMAX[REAL_PART] = work[0][REAL_PART]; AMAX[IMAG_PART] = work[0][IMAG_PART]; *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *IX ) : ( (int)(work[1][REAL_PART]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; type = PB_Cztypeset(); size = type->size; Xlindx = Xii - 1 + izamax_( &Xnp, Mptr( ((char *) X), Xii, Xjj, Xld, size ), INCX ); Xptr = Mptr( ((char *) X), Xlindx, Xjj, Xld, size ); AMAX[REAL_PART] = ((double*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((double*)(Xptr))[IMAG_PART]; } else { AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; } if( Xrow >= 0 ) { /* * Combine leave on all the local maximum if Xrow >= 0, i.e sub( X ) is * distributed. */ Czgamx2d( ctxt, COLUMN, &cctop, 1, 1, ((char*)AMAX), 1, &maxpos, &idumm, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( ( AMAX[REAL_PART] != ZERO ) || ( AMAX[IMAG_PART] != ZERO ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == maxpos ) { Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); *INDX = Xgindx + 1; Cigebs2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1, maxpos, mycol ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *IX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *IX ) : Xlindx + 1 ); } } } return; } /* * End of PZAMAX */ } scalapack-1.8.0/PBLAS/SRC/pzcopy_.c0000640000175000017500000002161510363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzcopy_( int * N, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzcopy_( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PZCOPY copies one subvector into another, * * sub( Y ) := sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * copied. N must be at least zero. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local output) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with sub( X ). * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 501 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZCOPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 5, &info ); PB_Cchkvec( ctxt, "PZCOPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 10, &info ); } if( info ) { PB_Cabort( ctxt, "PZCOPY", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) X), Xi, Xj, Xd, ROW, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) X), Xi, Xj, Xd, COLUMN, type->zero, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PZCOPY */ } scalapack-1.8.0/PBLAS/SRC/pzgerc_.c0000640000175000017500000002745310363532303016414 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzgerc_( int * M, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY, double * A, int * IA, int * JA, int * DESCA ) #else void pzgerc_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZGERC performs the rank 1 operation * * sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZGERC", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PZGERC", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PZGERC", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PZGERC", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zgerc_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PZGERC */ } scalapack-1.8.0/PBLAS/SRC/pstrmv_.c0000640000175000017500000004747310363532303016461 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pstrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pstrmv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PSTRMV performs one of the matrix-vector operations * * sub( X ) := sub( A )*sub( X ) or sub( X ) := sub( A )'*sub( X ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * sub( X ) is an n element subvector and sub( A ) is an n by n unit, * or non-unit, upper or lower triangular submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( X ) := sub( A ) * sub( X ). * * TRANS = 'T' or 't' sub( X ) := sub( A )' * sub( X ). * * TRANS = 'C' or 'c' sub( X ) := sub( A )' * sub( X ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the transfor- * med subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, * one, * tbeta, top, *zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, YAfr, YAld, YApbY, YAsum, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PSTRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PSTRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PSTRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PSTRMV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); size = type->size; one = type->one; zero = type->zero; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process column(s), then zero it. */ if( Xroc == CCOLUMN ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process columns(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { sset_( &Xnp, zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ), &ione ); } } } } else { /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process row(s), then zero it. */ if( Xroc == CROW ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; sset_( &Xnq, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); } } } } /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, size ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } if( YAfr ) free( YA ); /* * End of PSTRMV */ } scalapack-1.8.0/PBLAS/SRC/pzher2_.c0000640000175000017500000004334210363532303016327 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzher2_( F_CHAR_T UPLO, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY, double * A, int * IA, int * JA, int * DESCA ) #else void pzher2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZHER2 performs the Hermitian rank 2 operation * * sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) + * conjg( alpha )*sub( Y )*conjg( sub( X )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and * sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; cmplx16 Calpha; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_], YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZHER2", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PZHER2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PZHER2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PZHER2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PZHER2", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by * sub( A ) */ if( *INCY == Yd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ROW, &YR, YRd0, &YRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0, YRd0, ROW, &YC, YCd0, &YCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, COLUMN, &YC, YCd0, &YCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0, YCd0, COLUMN, &YR, YRd0, &YRfr ); } /* * Local rank-2 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd0[LLD_]; YCld = YCd0[LLD_]; XRld = XRd0[LLD_]; YRld = YRd0[LLD_]; Calpha[REAL_PART] = ALPHA[REAL_PART]; Calpha[IMAG_PART] = -ALPHA[IMAG_PART]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zgerc_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); zgerc_( &Akp, &Anq0, ((char *) Calpha), YC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzher2 ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzher2 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zgerc_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); zgerc_( &Amp0, &Anq0, ((char *) Calpha), Mptr( YC, Akp, 0, YCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); if( YRfr ) free( YR ); if( YCfr ) free( YC ); /* * End of PZHER2 */ } scalapack-1.8.0/PBLAS/SRC/pzdotc_.c0000640000175000017500000006712310363532303016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzdotc_( int * N, double * DOT, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzdotc_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; double * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PZDOTC forms the dot product of two subvectors, * * DOT := sub( X )**H * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) COMPLEX*16 array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZDOTC", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PZDOTC", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PZDOTC", info ); return; } #endif DOT[REAL_PART] = ZERO; DOT[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cztypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cztypeset(); size = type->size; dot = type->Fvvdotc; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Czgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Czgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Czgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Czgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Czgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Czgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Czgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Czgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Czgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Czgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Czgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Czgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Czgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cztypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cztypeset(); /* * Compute DOT := sub( Y )**H * sub( X ) */ PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotc ); /* * Conjugate the result */ DOT[IMAG_PART] = -DOT[IMAG_PART]; } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cztypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotc ); } /* * End of PZDOTC */ } scalapack-1.8.0/PBLAS/SRC/pzsyrk_.c0000640000175000017500000004513310363532303016457 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzsyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzsyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZSYRK performs one of the symmetric rank k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't', K specifies the number of rows of the submatrix * sub( A ). K must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYRK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYRK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PZSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PZSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PZSYRK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZSYRK */ } scalapack-1.8.0/PBLAS/SRC/pdsymm_.c0000640000175000017500000005303010363532303016421 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsymm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdsymm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PDSYMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the symmetric submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) DOUBLE PRECISION array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PDSYMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PDSYMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PDSYMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( BETA[REAL_PART] == ONE ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PDSYMM */ } scalapack-1.8.0/PBLAS/SRC/pdscal_.c0000640000175000017500000002172610602576752016401 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdscal_( int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdscal_( N, ALPHA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDSCAL multiplies an n element subvector sub( X ) by the scalar * alpha, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input/local output) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the scaled * subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Xd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDSCAL", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PDSCAL", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( Xd[CTXT_], &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Start the operations */ if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { if( ALPHA[REAL_PART] == ONE ) return; /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; if( ALPHA[REAL_PART] == ZERO ) { dset_( &Xnq, ((char *) ALPHA), ((char *)(X+(Xii+Xjj*Xld))), &Xld ); } else { dscal_( &Xnq, ((char *) ALPHA), ((char *)(X+(Xii+Xjj*Xld))), &Xld ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { if( ALPHA[REAL_PART] == ONE ) return; /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { if( ALPHA[REAL_PART] == ZERO ) { dset_( &Xnp, ((char *) ALPHA), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } else { dscal_( &Xnp, ((char *) ALPHA), ((char *)( X+(Xii+Xjj*Xd[LLD_]) )), INCX ); } } } return; } /* * End of PDSCAL */ } scalapack-1.8.0/PBLAS/SRC/pdaxpy_.c0000640000175000017500000002254210363532303016421 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdaxpy_( int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdaxpy_( N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PDAXPY adds one subvector to another, * * sub( Y ) := sub( Y ) + alpha * sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER. * On entry, N specifies the length of the subvectors to be * added. N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PDAXPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PDAXPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PDAXPY", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, ROW, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, COLUMN, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PDAXPY */ } scalapack-1.8.0/PBLAS/SRC/pzherk_.c0000640000175000017500000004553110363532303016422 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzherk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzherk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PZHERK performs one of the Hermitian rank k operations * * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ), * * or * * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are real scalars, sub( C ) is an n by n Hermitian * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the Hermitian submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'C' or * 'c', K specifies the number of rows of the submatrix * sub( A ). K must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; cmplx16 Calph; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZHERK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZHERK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PZHERK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PZHERK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZHERK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PZHERK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, CONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif Calph[REAL_PART] = ALPHA[REAL_PART]; Calph[IMAG_PART] = ZERO; /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZHERK */ } scalapack-1.8.0/PBLAS/SRC/pcaxpy_.c0000640000175000017500000002256510363532303016425 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcaxpy_( int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcaxpy_( N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PCAXPY adds one subvector to another, * * sub( Y ) := sub( Y ) + alpha * sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER. * On entry, N specifies the length of the subvectors to be * added. N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCAXPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PCAXPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PCAXPY", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, ROW, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, COLUMN, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PCAXPY */ } scalapack-1.8.0/PBLAS/SRC/PBpblas.h0000640000175000017500000015156210363532303016312 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * This file includes PBLAS definitions. All PBLAS routines include this * file. * * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #if( _F2C_CALL_ == _F2C_ADD_ ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine. No redefinition is necessary to have the * following FORTRAN to C interface: * * FORTRAN CALL C DECLARATION * CALL PDGEMM(...) void pdgemm_(...) * * This is the PBLAS default. */ #define PB_freebuf_ PB_freebuf_ #define PB_topget_ pb_topget_ #define PB_topset_ pb_topset_ #endif #if( _F2C_CALL_ == _F2C_UPCASE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine. No redefinition is necessary to have the * following FORTRAN to C interface: * * FORTRAN CALL C DECLARATION * CALL PDGEMM(...) void PDGEMM(...) */ #define pilaenv_ PILAENV #define PB_freebuf_ PB_FREEBUF #define PB_topget_ PB_TOPGET #define PB_topset_ PB_TOPSET /* Level-1 PBLAS */ #define picopy_ PICOPY #define pscopy_ PSCOPY #define pdcopy_ PDCOPY #define pccopy_ PCCOPY #define pzcopy_ PZCOPY #define psswap_ PSSWAP #define pdswap_ PDSWAP #define pcswap_ PCSWAP #define pzswap_ PZSWAP #define psaxpy_ PSAXPY #define pdaxpy_ PDAXPY #define pcaxpy_ PCAXPY #define pzaxpy_ PZAXPY #define psscal_ PSSCAL #define pdscal_ PDSCAL #define pcscal_ PCSCAL #define pzscal_ PZSCAL #define pcsscal_ PCSSCAL #define pzdscal_ PZDSCAL #define psasum_ PSASUM #define pdasum_ PDASUM #define pscasum_ PSCASUM #define pdzasum_ PDZASUM #define psnrm2_ PSNRM2 #define pdnrm2_ PDNRM2 #define pscnrm2_ PSCNRM2 #define pdznrm2_ PDZNRM2 #define psdot_ PSDOT #define pddot_ PDDOT #define pcdotu_ PCDOTU #define pzdotu_ PZDOTU #define pcdotc_ PCDOTC #define pzdotc_ PZDOTC #define psamax_ PSAMAX #define pdamax_ PDAMAX #define pcamax_ PCAMAX #define pzamax_ PZAMAX #define psgemv_ PSGEMV #define pdgemv_ PDGEMV #define pcgemv_ PCGEMV #define pzgemv_ PZGEMV #define psagemv_ PSAGEMV #define pdagemv_ PDAGEMV #define pcagemv_ PCAGEMV #define pzagemv_ PZAGEMV #define pssymv_ PSSYMV #define pdsymv_ PDSYMV #define pchemv_ PCHEMV #define pzhemv_ PZHEMV #define psasymv_ PSASYMV #define pdasymv_ PDASYMV #define pcahemv_ PCAHEMV #define pzahemv_ PZAHEMV #define pstrmv_ PSTRMV #define pdtrmv_ PDTRMV #define pctrmv_ PCTRMV #define pztrmv_ PZTRMV #define psatrmv_ PSATRMV #define pdatrmv_ PDATRMV #define pcatrmv_ PCATRMV #define pzatrmv_ PZATRMV #define pstrsv_ PSTRSV #define pdtrsv_ PDTRSV #define pctrsv_ PCTRSV #define pztrsv_ PZTRSV #define psger_ PSGER #define pdger_ PDGER #define pcgeru_ PCGERU #define pzgeru_ PZGERU #define pcgerc_ PCGERC #define pzgerc_ PZGERC #define pssyr_ PSSYR #define pdsyr_ PDSYR #define pcher_ PCHER #define pzher_ PZHER #define pssyr2_ PSSYR2 #define pdsyr2_ PDSYR2 #define pcher2_ PCHER2 #define pzher2_ PZHER2 #define psgemm_ PSGEMM #define pdgemm_ PDGEMM #define pcgemm_ PCGEMM #define pzgemm_ PZGEMM #define psgeadd_ PSGEADD #define pdgeadd_ PDGEADD #define pcgeadd_ PCGEADD #define pzgeadd_ PZGEADD #define pssymm_ PSSYMM #define pdsymm_ PDSYMM #define pcsymm_ PCSYMM #define pchemm_ PCHEMM #define pzsymm_ PZSYMM #define pzhemm_ PZHEMM #define pstrmm_ PSTRMM #define pdtrmm_ PDTRMM #define pctrmm_ PCTRMM #define pztrmm_ PZTRMM #define pstrsm_ PSTRSM #define pdtrsm_ PDTRSM #define pctrsm_ PCTRSM #define pztrsm_ PZTRSM #define pssyrk_ PSSYRK #define pdsyrk_ PDSYRK #define pcsyrk_ PCSYRK #define pcherk_ PCHERK #define pzsyrk_ PZSYRK #define pzherk_ PZHERK #define pssyr2k_ PSSYR2K #define pdsyr2k_ PDSYR2K #define pcsyr2k_ PCSYR2K #define pcher2k_ PCHER2K #define pzsyr2k_ PZSYR2K #define pzher2k_ PZHER2K #define pstradd_ PSTRADD #define pdtradd_ PDTRADD #define pctradd_ PCTRADD #define pztradd_ PZTRADD #define pstran_ PSTRAN #define pdtran_ PDTRAN #define pctranu_ PCTRANU #define pztranu_ PZTRANU #define pctranc_ PCTRANC #define pztranc_ PZTRANC #endif #if( _F2C_CALL_ == _F2C_NOCHANGE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine with the following FORTRAN to C interface: * * FORTRAN CALL C DECLARATION * CALLL PDGEMM(...) void pdgemm(...) */ #define pilaenv_ pilaenv #define PB_freebuf_ PB_freebuf #define PB_topget_ pb_topget #define PB_topset_ pb_topset #define picopy_ picopy #define pscopy_ pscopy #define pdcopy_ pdcopy #define pccopy_ pccopy #define pzcopy_ pzcopy #define psswap_ psswap #define pdswap_ pdswap #define pcswap_ pcswap #define pzswap_ pzswap #define psaxpy_ psaxpy #define pdaxpy_ pdaxpy #define pcaxpy_ pcaxpy #define pzaxpy_ pzaxpy #define psscal_ psscal #define pdscal_ pdscal #define pcscal_ pcscal #define pzscal_ pzscal #define pcsscal_ pcsscal #define pzdscal_ pzdscal #define psasum_ psasum #define pdasum_ pdasum #define pscasum_ pscasum #define pdzasum_ pdzasum #define psnrm2_ psnrm2 #define pdnrm2_ pdnrm2 #define pscnrm2_ pscnrm2 #define pdznrm2_ pdznrm2 #define psdot_ psdot #define pddot_ pddot #define pcdotu_ pcdotu #define pzdotu_ pzdotu #define pcdotc_ pcdotc #define pzdotc_ pzdotc #define psamax_ psamax #define pdamax_ pdamax #define pcamax_ pcamax #define pzamax_ pzamax #define psgemv_ psgemv #define pdgemv_ pdgemv #define pcgemv_ pcgemv #define pzgemv_ pzgemv #define psagemv_ psagemv #define pdagemv_ pdagemv #define pcagemv_ pcagemv #define pzagemv_ pzagemv #define pssymv_ pssymv #define pdsymv_ pdsymv #define pchemv_ pchemv #define pzhemv_ pzhemv #define psasymv_ psasymv #define pdasymv_ pdasymv #define pcahemv_ pcahemv #define pzahemv_ pzahemv #define pstrmv_ pstrmv #define pdtrmv_ pdtrmv #define pctrmv_ pctrmv #define pztrmv_ pztrmv #define psatrmv_ psatrmv #define pdatrmv_ pdatrmv #define pcatrmv_ pcatrmv #define pzatrmv_ pzatrmv #define pstrsv_ pstrsv #define pdtrsv_ pdtrsv #define pctrsv_ pctrsv #define pztrsv_ pztrsv #define psger_ psger #define pdger_ pdger #define pcgeru_ pcgeru #define pzgeru_ pzgeru #define pcgerc_ pcgerc #define pzgerc_ pzgerc #define pssyr_ pssyr #define pdsyr_ pdsyr #define pcher_ pcher #define pzher_ pzher #define pssyr2_ pssyr2 #define pdsyr2_ pdsyr2 #define pcher2_ pcher2 #define pzher2_ pzher2 #define psgeadd_ psgeadd #define pdgeadd_ pdgeadd #define pcgeadd_ pcgeadd #define pzgeadd_ pzgeadd #define psgemm_ psgemm #define pdgemm_ pdgemm #define pcgemm_ pcgemm #define pzgemm_ pzgemm #define pssymm_ pssymm #define pdsymm_ pdsymm #define pcsymm_ pcsymm #define pchemm_ pchemm #define pzsymm_ pzsymm #define pzhemm_ pzhemm #define pstrmm_ pstrmm #define pdtrmm_ pdtrmm #define pctrmm_ pctrmm #define pztrmm_ pztrmm #define pstrsm_ pstrsm #define pdtrsm_ pdtrsm #define pctrsm_ pctrsm #define pztrsm_ pztrsm #define pssyrk_ pssyrk #define pdsyrk_ pdsyrk #define pcsyrk_ pcsyrk #define pcherk_ pcherk #define pzsyrk_ pzsyrk #define pzherk_ pzherk #define pssyr2k_ pssyr2k #define pdsyr2k_ pdsyr2k #define pcsyr2k_ pcsyr2k #define pcher2k_ pcher2k #define pzsyr2k_ pzsyr2k #define pzher2k_ pzher2k #define pstradd_ pstradd #define pdtradd_ pdtradd #define pctradd_ pctradd #define pztradd_ pztradd #define pstran_ pstran #define pdtran_ pdtran #define pctranu_ pctranu #define pztranu_ pztranu #define pctranc_ pctranc #define pztranc_ pztranc #endif #if( _F2C_CALL_ == _F2C_F77ISF2C ) #define PB_freebuf_ PB_freebuf__ #define PB_topget_ pb_topget__ #define PB_topset_ pb_topset__ #endif /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ #ifdef __STDC__ void PB_freebuf_ ( void ); void PB_topget_ ( int *, F_CHAR_T, F_CHAR_T, F_CHAR_T ); void PB_topset_ ( int *, F_CHAR_T, F_CHAR_T, F_CHAR_T ); void picopy_ ( int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int * ); void pscopy_ ( int *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pdcopy_ ( int *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void pccopy_ ( int *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pzcopy_ ( int *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void psswap_ ( int *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pdswap_ ( int *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void pcswap_ ( int *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pzswap_ ( int *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void psaxpy_ ( int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pdaxpy_ ( int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void pcaxpy_ ( int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pzaxpy_ ( int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void psscal_ ( int *, float *, float *, int *, int *, int *, int * ); void pdscal_ ( int *, double *, double *, int *, int *, int *, int * ); void pcscal_ ( int *, float *, float *, int *, int *, int *, int * ); void pcsscal_ ( int *, float *, float *, int *, int *, int *, int * ); void pzscal_ ( int *, double *, double *, int *, int *, int *, int * ); void pzdscal_ ( int *, double *, double *, int *, int *, int *, int * ); void psasum_ ( int *, float *, float *, int *, int *, int *, int * ); void pdasum_ ( int *, double *, double *, int *, int *, int *, int * ); void pscasum_ ( int *, float *, float *, int *, int *, int *, int * ); void pdzasum_ ( int *, double *, double *, int *, int *, int *, int * ); void psnrm2_ ( int *, float *, float *, int *, int *, int *, int * ); void pdnrm2_ ( int *, double *, double *, int *, int *, int *, int * ); void pscnrm2_ ( int *, float *, float *, int *, int *, int *, int * ); void pdznrm2_ ( int *, double *, double *, int *, int *, int *, int * ); void psdot_ ( int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pddot_ ( int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void pcdotc_ ( int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pcdotu_ ( int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int * ); void pzdotc_ ( int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void pzdotu_ ( int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int * ); void psamax_ ( int *, float *, int *, float *, int *, int *, int *, int * ); void pdamax_ ( int *, double *, int *, double *, int *, int *, int *, int * ); void pcamax_ ( int *, float *, int *, float *, int *, int *, int *, int * ); void pzamax_ ( int *, double *, int *, double *, int *, int *, int *, int * ); void psgemv_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pdgemv_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pcgemv_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pzgemv_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void psagemv_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pdagemv_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pcagemv_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pzagemv_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void psger_ ( int *, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pdger_ ( int *, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pcgerc_ ( int *, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pcgeru_ ( int *, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pzgerc_ ( int *, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pzgeru_ ( int *, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pssymv_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pdsymv_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pchemv_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pzhemv_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void psasymv_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pdasymv_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pcahemv_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pzahemv_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pssyr_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pdsyr_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pcher_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pzher_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pssyr2_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pdsyr2_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pcher2_ ( F_CHAR_T, int *, float *, float *, int *, int *, int *, int *, float *, int *, int *, int *, int *, float *, int *, int *, int * ); void pzher2_ ( F_CHAR_T, int *, double *, double *, int *, int *, int *, int *, double *, int *, int *, int *, int *, double *, int *, int *, int * ); void pstrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, int *, int *, int *, float *, int *, int *, int *, int * ); void pdtrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, int *, int *, int *, double *, int *, int *, int *, int * ); void pctrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, int *, int *, int *, float *, int *, int *, int *, int * ); void pztrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, int *, int *, int *, double *, int *, int *, int *, int * ); void psatrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pdatrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pcatrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, int *, float *, float *, int *, int *, int *, int * ); void pzatrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, int *, double *, double *, int *, int *, int *, int * ); void pstrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, int *, int *, int *, float *, int *, int *, int *, int * ); void pdtrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, int *, int *, int *, double *, int *, int *, int *, int * ); void pctrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, float *, int *, int *, int *, float *, int *, int *, int *, int * ); void pztrsv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, double *, int *, int *, int *, double *, int *, int *, int *, int * ); void psgeadd_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdgeadd_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcgeadd_ ( F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzgeadd_ ( F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void psgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzgemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pssymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdsymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcsymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzsymm_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pchemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzhemm_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pssyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdsyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcsyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzsyr2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcher2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzher2k_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pssyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdsyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcsyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzsyrk_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pcherk_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pzherk_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pstradd_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdtradd_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pctradd_ ( F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pztradd_ ( F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pstran_ ( int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pdtran_ ( int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pctranc_ ( int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pztranc_ ( int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pctranu_ ( int *, int *, float *, float *, int *, int *, int *, float *, float *, int *, int *, int * ); void pztranu_ ( int *, int *, double *, double *, int *, int *, int *, double *, double *, int *, int *, int * ); void pstrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int * ); void pdtrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int * ); void pctrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int * ); void pztrmm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int * ); void pstrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int * ); void pdtrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int * ); void pctrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, float *, float *, int *, int *, int *, float *, int *, int *, int * ); void pztrsm_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, double *, double *, int *, int *, int *, double *, int *, int *, int * ); #else void PB_freebuf_ (); void PB_topget_ (); void PB_topset_ (); void picopy_ (); void pscopy_ (); void pdcopy_ (); void pccopy_ (); void pzcopy_ (); void psswap_ (); void pdswap_ (); void pcswap_ (); void pzswap_ (); void psaxpy_ (); void pdaxpy_ (); void pcaxpy_ (); void pzaxpy_ (); void psscal_ (); void pdscal_ (); void pcscal_ (); void pcsscal_ (); void pzscal_ (); void pzdscal_ (); void psasum_ (); void pdasum_ (); void pscasum_ (); void pdzasum_ (); void psnrm2_ (); void pdnrm2_ (); void pscnrm2_ (); void pdznrm2_ (); void psdot_ (); void pddot_ (); void pcdotc_ (); void pcdotu_ (); void pzdotc_ (); void pzdotu_ (); void psamax_ (); void pdamax_ (); void pcamax_ (); void pzamax_ (); void psgemv_ (); void pdgemv_ (); void pcgemv_ (); void pzgemv_ (); void psagemv_ (); void pdagemv_ (); void pcagemv_ (); void pzagemv_ (); void psger_ (); void pdger_ (); void pcgerc_ (); void pcgeru_ (); void pzgerc_ (); void pzgeru_ (); void pssymv_ (); void pdsymv_ (); void pchemv_ (); void pzhemv_ (); void psasymv_ (); void pdasymv_ (); void pcahemv_ (); void pzahemv_ (); void pssyr_ (); void pdsyr_ (); void pcher_ (); void pzher_ (); void pssyr2_ (); void pdsyr2_ (); void pcher2_ (); void pzher2_ (); void pstrmv_ (); void pdtrmv_ (); void pctrmv_ (); void pztrmv_ (); void psatrmv_ (); void pdatrmv_ (); void pcatrmv_ (); void pzatrmv_ (); void pstrsv_ (); void pdtrsv_ (); void pctrsv_ (); void pztrsv_ (); void psgeadd_ (); void pdgeadd_ (); void pcgeadd_ (); void pzgeadd_ (); void psgemm_ (); void pdgemm_ (); void pcgemm_ (); void pzgemm_ (); void pssymm_ (); void pdsymm_ (); void pcsymm_ (); void pchemm_ (); void pzsymm_ (); void pzhemm_ (); void pssyr2k_ (); void pdsyr2k_ (); void pcsyr2k_ (); void pcher2k_ (); void pzsyr2k_ (); void pzher2k_ (); void pssyrk_ (); void pdsyrk_ (); void pcsyrk_ (); void pcherk_ (); void pzsyrk_ (); void pzherk_ (); void pstradd_ (); void pdtradd_ (); void pctradd_ (); void pztradd_ (); void pstran_ (); void pdtran_ (); void pctranc_ (); void pctranu_ (); void pztranc_ (); void pztranu_ (); void pstrmm_ (); void pdtrmm_ (); void pctrmm_ (); void pztrmm_ (); void pstrsm_ (); void pdtrsm_ (); void pctrsm_ (); void pztrsm_ (); #endif scalapack-1.8.0/PBLAS/SRC/pztrmm_.c0000640000175000017500000005244310363532303016450 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztrmm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB ) #else void pztrmm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; double * A, * B; #endif { /* * Purpose * ======= * * PZTRMM performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMM", "Illegal TRANS = %c\n", TranOp ); info = -3; } if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PZTRMM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PZTRMM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PZTRMM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRMM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ChooseAB ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrmmAB( type, &Var, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } else { if( ( lside && notran ) || ( !( lside ) && !( notran ) ) ) { OpR = CCOMBINE; OpC = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } } else { OpR = CBCAST; OpC = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } } if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrmmB( type, &DirB, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZTRMM */ } scalapack-1.8.0/PBLAS/SRC/pztrmv_.c0000640000175000017500000004763710363532303016472 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pztrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pztrmv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; double * A, * X; #endif { /* * Purpose * ======= * * PZTRMV performs one of the matrix-vector operations * * sub( X ) := sub( A )*sub( X ) or sub( X ) := sub( A )'*sub( X ) * * or * * sub( X ) := conjg( sub( A )' )*sub( X ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * sub( X ) is an n element subvector and sub( A ) is an n by n unit, * or non-unit, upper or lower triangular submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( X ) := sub( A ) * sub( X ). * * TRANS = 'T' or 't' sub( X ) := sub( A )' * sub( X ). * * TRANS = 'C' or 'c' * sub( X ) := conjg( sub( A )' ) * sub( X ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the transfor- * med subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, * one, * tbeta, top, *zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, YAfr, YAld, YApbY, YAsum, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PZTRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PZTRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PZTRMV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); size = type->size; one = type->one; zero = type->zero; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process column(s), then zero it. */ if( Xroc == CCOLUMN ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process columns(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { zset_( &Xnp, zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ), &ione ); } } } } else { /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process row(s), then zero it. */ if( Xroc == CROW ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; zset_( &Xnq, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); } } } } /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, size ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Czgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Czgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } if( YAfr ) free( YA ); /* * End of PZTRMV */ } scalapack-1.8.0/PBLAS/SRC/pctranc_.c0000640000175000017500000002271610363532303016551 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctranc_( int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pctranc_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCTRANC transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, * * op( X ) = conjg( X )'. * * Thus, op( sub( A ) ) denotes conjg( A(IA:IA+N-1,JA:JA+M-1)' ). * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PCTRANC", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PCTRANC", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRANC", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cctypeset(), CONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PCTRANC */ } scalapack-1.8.0/PBLAS/SRC/pssymv_.c0000640000175000017500000005517410363532303016464 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssymv_( F_CHAR_T UPLO, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pssymv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSSYMV performs the matrix-vector operation * * sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, sub( X ) and sub( Y ) are n element sub- * vectors and sub( A ) is an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; float * tbeta; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PSSYMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PSSYMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PSSYMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( BETA[REAL_PART] == ZERO ) { sset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { sscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( BETA[REAL_PART] == ZERO ) { sset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { sscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( type, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) { if( tbeta[REAL_PART] == ZERO ) { sset_( &Anq, ((char *) tbeta), YR, &YRld ); } else { sscal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) { if( tbeta[REAL_PART] == ZERO ) { sset_( &Amp, ((char *) tbeta), YC, &ione ); } else { sscal_( &Amp, ((char *) tbeta), YC, &ione ); } } } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); sgemv_( C2F_CHAR( TRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzsymv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzsymv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, size ), &ione ); sgemv_( C2F_CHAR( TRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PSSYMV */ } scalapack-1.8.0/PBLAS/SRC/pctrmm_.c0000640000175000017500000005242710363532303016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctrmm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB ) #else void pctrmm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, SIDE, TRANS, UPLO; int * IA, * IB, * JA, * JB, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCB; float * A, * B; #endif { /* * Purpose * ======= * * PCTRMM performs one of the matrix-matrix operations * * sub( B ) := alpha * op( sub( A ) ) * sub( B ), * * or * * sub( B ) := alpha * sub( B ) * op( sub( A ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a * unit, or non-unit, upper or lower triangular submatrix and op( X ) is * one of * * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether op( sub( A ) ) multiplies * sub( B ) from the left or right as follows: * * SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ), * * SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( B ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( B ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array B * corresponding to the entries of the submatrix sub( B ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input/local output) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * On exit, the local entries of this array corresponding to the * to the entries of the submatrix sub( B ) are overwritten by * the local entries of the m by n transformed submatrix. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA, Var, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, lside, mycol, myrow, nb, notran, nounit, npcol, nprow, upper; double ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMM", "Illegal UPLO = %c\n", UploA ); info = -2; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMM", "Illegal TRANS = %c\n", TranOp ); info = -3; } if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMM", "Illegal DIAG = %c\n", DiagA ); info = -4; } if( lside ) PB_Cchkmat( ctxt, "PCTRMM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11, &info ); else PB_Cchkmat( ctxt, "PCTRMM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11, &info ); PB_Cchkmat( ctxt, "PCTRMM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRMM", info ); return; } #endif /* * Quick return if possible */ if( *M == 0 || *N == 0 ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) B), Bi, Bj, Bd ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated * ( N >> M when SIDE is left and M >> N otherwise ) * Best : only sub( B ) is communicated * ( M >> N when SIDE is left and N >> M otherwise ) */ if( lside ) { if( notran ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABestL = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) ); ABestR = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *M, Bd[MB_], nprow ); Best = (double)(*N) * ( ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } else { if( notran ) { tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( 1.1 * ABestR ) <= Best ); } else { tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); ABestL = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + CBRATIO * ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) ); ABestR = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + MAX( tmp2, tmp1 ) / TWO ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); Best = (double)(*M) * ( ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) || ( ( 1.1 * ABestR ) <= Best ) ); } } /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); if( ChooseAB ) { if( lside ) { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } else { if( ABestL <= ABestR ) { OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } } } else { if( notran ) { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_DRING; } else { TopR = TopC = CTOP_IRING; } } else { if( ABestL <= ABestR ) { OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT; if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; } else { TopR = CTOP_IRING; TopC = CTOP_DRING; } } else { OpR = CBCAST; OpC = CBCAST; Var = CRIGHT; if( upper ) { TopR = TopC = CTOP_IRING; } else { TopR = TopC = CTOP_DRING; } } } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { if( ( rtopsave = rtop ) != TopR ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); if( ( ctopsave = ctop ) != TopC ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } PB_CptrmmAB( type, &Var, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } else { if( ( lside && notran ) || ( !( lside ) && !( notran ) ) ) { OpR = CCOMBINE; OpC = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } } else { OpR = CBCAST; OpC = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } } if( lside ) DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CptrmmB( type, &DirB, &SideOp, &UploA, &TranOp, &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCTRMM */ } scalapack-1.8.0/PBLAS/SRC/pzher2k_.c0000640000175000017500000005302110363532303016475 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzher2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzher2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PZHER2K performs one of the Hermitian rank 2k operations * * sub( C ) := alpha*sub( A )*conjg( sub( B ) )' + * conjg( alpha )*sub( B )*conjg( sub( A ) )' + * beta*sub( C ), * * or * * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars with beta real, sub( C ) is an n by n * Hermitian submatrix and sub( A ) and sub( B ) are n by k submatrices * in the first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the Hermitian submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*conjg( sub( B )' ) + * conjg( alpha )*sub( B )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'C' or 'c', K specifies the number of rows * of the submatrices sub( A ) and sub( B ). K must be at least * zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZHER2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZHER2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PZHER2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZHER2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PZHER2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZHER2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PZHER2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PZHER2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, CONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PZHER2K */ } scalapack-1.8.0/PBLAS/SRC/pctrsv_.c0000640000175000017500000007614010363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctrsv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pctrsv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PCTRSV solves one of the systems of equations * * sub( A )*sub( X ) = b, or sub( A )'*sub( X ) = b, or * * conjg( sub( A )' )*sub( X ) = b, * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * b and sub( X ) are n element subvectors and sub( A ) is an n by n * unit, or non-unit, upper or lower triangular submatrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( A ) * sub( X ) = b. * * TRANS = 'T' or 't' sub( A )' * sub( X ) = b. * * TRANS = 'C' or 'c' conjg( sub( A )' ) * sub( X ) = b. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On entry, sub( X ) is the n element right-hand side * b. On exit, sub( X ) is overwritten with the solution subvec- * tor. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, btop, ctop, * negone, * one, * zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, XACapbX, XACfr, XACld, XACsum, XARapbX, XARfr, XARld, XARsum, Xi, Xj, ctxt, info, ione=1, k, kb, kbnext, kbprev, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XACd[DLEN_], XARd[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XAC = NULL, * XAR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PCTRSV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PCTRSV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRSV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); size = type->size; one = type->one; zero = type->zero; negone = type->negone; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the last * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ if( Akp > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow, Arow, nprow ); Akp -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) cgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol ); Ccgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) cset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Akp > 0 && Anq0 > 0 ) cgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } else { if( Anq0 > 0 ) cgemv_( TRANS, &Akp, &Anq0, negone, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, XAC, &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Ccgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process column owning the first * column of sub( A ) */ PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd, &XACfr, &XACsum, &XACapbX ); /* * Create vector XAR in process rows spanned by sub( A ) */ PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr, &XARsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAC contains the solution scattered in multiple * process columns and XAR contains the solution replicated in the process rows. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); if( ( Anp0 = Anp - Akp ) > 0 ) { Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( XACsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow, nprow ); Anp0 -= ktmp; if( ktmp > 0 ) { if( Anq0 > 0 ) cgemv_( TRANS, &ktmp, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol ); Ccgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp, 0, XACld, size ), XACld, myrow, Asrc ); if( mycol != Asrc ) cset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } if( Anp0 > 0 && Anq0 > 0 ) cgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp+ktmp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp+ktmp, 0, XACld, size ), &ione ); } else { if( Anq0 > 0 ) cgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAR, 0, Akq, XARld, size ), &XARld, one, Mptr( XAC, Akp, 0, XACld, size ), &ione ); } } } /* * Combine the scattered resulting vector XAC */ if( XACsum && ( Anp > 0 ) ) { Ccgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow, XACd[CSRC_] ); } /* * sub( X ) := XAC (if necessary) */ if( XACapbX ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, COLUMN, &btop ); (void) PB_Ctop( &ctxt, COMBINE, ROW, &ctop ); } } else { if( upper ) { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAR in process row owning the first row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, 0, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = 0; k < *N; k += nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later is * only locally updated. */ Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anq0 = Anq - Akq ) > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbnext = ktmp - kb; kbnext = MIN( kbnext, nb ); ktmp = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol, npcol ); Anq0 -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) cgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow ); Ccgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) cset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Anq0 > 0 ) cgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq+ktmp, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq+ktmp, XARld, size ), &XARld ); } else { if( Anp0 > 0 ) cgemv_( TRANS, &Anp0, &Anq0, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Ccgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } else { /* * Save current and enforce ring BLACS topologies */ btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); (void) PB_Ctop( &ctxt, BCAST, ROW, TOP_DRING ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DRING ); /* * Remove next line when BLACS combine operations support ring topologies. */ (void) PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); /* * Reuse sub( X ) and/or create vector XAC in process row owning the last row * of sub( A ) */ PB_CInOutV2( type, NOCONJG, ROW, *N, *N, *N-1, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd, &XARfr, &XARsum, &XARapbX ); /* * Create vector XAC in process columns spanned by sub( A ) */ PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr, &XACsum ); /* * Retrieve local quantities related to Ad0 -> sub( A ) */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_ ]; Anb = Ad0[NB_ ]; Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_]; Ald = Ad0[LLD_]; Anp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Anp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XACld = XACd[LLD_]; XARld = XARd[LLD_]; for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb ) { ktmp = *N - k; kb = MIN( ktmp, nb ); /* * Solve logical diagonal block, XAR contains the solution scattered in multiple * process rows and XAC contains the solution replicated in the process columns. */ Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k, Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0, Akq, XARld, size ), XARld ); /* * Update: only the part of sub( X ) to be solved at the next step is locally * updated and combined, the remaining part of the vector to be solved later * is only locally updated. */ if( Akq > 0 ) { Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow ); if( XARsum ) { kbprev = MIN( k, nb ); ktmp = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol, Acol, npcol ); Akq -= ktmp; if( ktmp > 0 ) { if( Anp0 > 0 ) cgemv_( TRANS, &Anp0, &ktmp, negone, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow ); Ccgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0, Akq, XARld, size ), XARld, Asrc, mycol ); if( myrow != Asrc ) cset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld, size ), &XARld ); } if( Anp0 > 0 && Akq > 0 ) cgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } else { if( Anp0 > 0 ) cgemv_( TRANS, &Anp0, &Akq, negone, Mptr( Aptr, Akp, 0, Ald, size ), &Ald, Mptr( XAC, Akp, 0, XACld, size ), &ione, one, XAR, &XARld ); } } } /* * Combine the scattered resulting vector XAR */ if( XARsum && ( Anq > 0 ) ) { Ccgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_], mycol ); } /* * sub( X ) := XAR (if necessary) */ if( XARapbX ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } /* * Restore BLACS topologies */ (void) PB_Ctop( &ctxt, BCAST, ROW, &btop ); (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop ); } } if( XACfr ) free( XAC ); if( XARfr ) free( XAR ); /* * End of PCTRSV */ } scalapack-1.8.0/PBLAS/SRC/pcher2k_.c0000640000175000017500000005276410363532303016463 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcher2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcher2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PCHER2K performs one of the Hermitian rank 2k operations * * sub( C ) := alpha*sub( A )*conjg( sub( B ) )' + * conjg( alpha )*sub( B )*conjg( sub( A ) )' + * beta*sub( C ), * * or * * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars with beta real, sub( C ) is an n by n * Hermitian submatrix and sub( A ) and sub( B ) are n by k submatrices * in the first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the Hermitian submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*conjg( sub( B )' ) + * conjg( alpha )*sub( B )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*conjg( sub( A )' )*sub( B ) + * conjg( alpha )*conjg( sub( B )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'C' or 'c', K specifies the number of rows * of the submatrices sub( A ) and sub( B ). K must be at least * zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCHER2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCHER2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PCHER2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCHER2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PCHER2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCHER2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PCHER2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PCHER2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, CONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCHER2K */ } scalapack-1.8.0/PBLAS/SRC/pctranu_.c0000640000175000017500000002266410363532303016575 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctranu_( int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pctranu_( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ int * IA, * IC, * JA, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCTRANU transposes a matrix * * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. * * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an * n by m submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ) and the number of columns of the submatrix sub( A ). * M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ) and the number of rows of the submatrix sub( A ). N * must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ). Before entry, this array contains * the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, npcol, nprow; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkmat( ctxt, "PCTRANU", "A", *N, 2, *M, 1, Ai, Aj, Ad, 7, &info ); PB_Cchkmat( ctxt, "PCTRANU", "C", *M, 1, *N, 2, Ci, Cj, Cd, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRANU", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) return; /* * And when alpha is zero */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA), ((char * )C), Ci, Cj, Cd ); } return; } /* * Start the operations */ PB_Cptran( PB_Cctypeset(), NOCONJG, *M, *N, ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); /* * End of PCTRANU */ } scalapack-1.8.0/PBLAS/SRC/pcgerc_.c0000640000175000017500000002743310363532303016363 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcgerc_( int * M, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY, float * A, int * IA, int * JA, int * DESCA ) #else void pcgerc_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCGERC performs the rank 1 operation * * sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PCGERC", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PCGERC", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PCGERC", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PCGERC", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { cgerc_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PCGERC */ } scalapack-1.8.0/PBLAS/SRC/pcher_.c0000640000175000017500000003446410363532303016223 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcher_( F_CHAR_T UPLO, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * A, int * IA, int * JA, int * DESCA ) #else void pcher_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PCHER performs the Hermitian rank 1 operation * * sub( A ) := alpha*sub( X )*conjg( sub( X )' ) + sub( A ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Alpha is a real scalar, sub( X ) is an n element subvector and * sub( A ) is an n by n Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X * corresponding to the entries of the subvector sub( X ) need * not be set on input. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * A (local input/local output) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; cmplx Calpha; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCHER", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PCHER", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkmat( ctxt, "PCHER", "A", *N, 2, *N, 2, Ai, Aj, Ad, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PCHER", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Local rank-1 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); XCld = XCd0[LLD_]; XRld = XRd0[LLD_]; Calpha[REAL_PART] = ALPHA[REAL_PART]; Calpha[IMAG_PART] = ZERO; if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) cgerc_( &Akp, &Anq0, ((char *) Calpha), XC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); PB_Cpsyr( type, UPPER, kb, 1, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzher ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr( type, LOWER, kb, 1, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Aptr, k, k, Ad0, PB_Ctzher ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) cgerc_( &Amp0, &Anq0, ((char *) Calpha), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); /* * End of PCHER */ } scalapack-1.8.0/PBLAS/SRC/pblas.h0000640000175000017500000005021210363532303016056 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * This file includes the standard C libraries, as well as system depen- * dent include files. All PBLAS routines include this file. * * --------------------------------------------------------------------- * Machine Specific PBLAS macros * --------------------------------------------------------------------- */ #define _HAL_ 0 #define _T3D_ 1 #define _T3E_ 2 #ifdef T3D #define _MACH_ _T3D_ #endif #ifdef T3E #define _MACH_ _T3E_ #endif #ifndef _MACH_ #define _MACH_ _HAL_ #endif /* * CBRATIO is the ratio of the transfer cost per element for the combine * sum to one process and the broadcast operation. This value is used * within the Level 3 PBLAS routines to decide on which algorithm to se- * lect. */ #define CBRATIO 1.3 /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include #include #ifdef __STDC__ #include #else #include #endif #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) #include #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #define _F2C_ADD_ 0 #define _F2C_NOCHANGE 1 #define _F2C_UPCASE 2 #define _F2C_F77ISF2C 3 #ifdef UpCase #define _F2C_CALL_ _F2C_UPCASE #endif #ifdef NoChange #define _F2C_CALL_ _F2C_NOCHANGE #endif #ifdef Add_ #define _F2C_CALL_ _F2C_ADD_ #endif #ifdef f77IsF2C #define _F2C_CALL_ _F2C_F77ISF2C #endif #ifndef _F2C_CALL_ #define _F2C_CALL_ _F2C_ADD_ #endif /* * --------------------------------------------------------------------- * TYPE DEFINITIONS AND CONVERSION UTILITIES * --------------------------------------------------------------------- */ #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) #define float double /* Type of character argument in a FORTRAN call */ #define F_CHAR_T _fcd /* Character conversion utilities */ #define F2C_CHAR(a) ( _fcdtocp( (a) ) ) #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) /* Type of FORTRAN functions */ #define F_VOID_FCT void fortran /* Subroutine */ #define F_INTG_FCT int fortran /* INTEGER function */ #else /* Type of character argument in a FORTRAN call */ typedef char * F_CHAR_T; /* Character conversion utilities */ #define F2C_CHAR(a) (a) #define C2F_CHAR(a) (a) /* Type of FORTRAN functions */ #define F_VOID_FCT void /* Subroutine */ #define F_INTG_FCT int /* INTEGER function */ #endif /* * ---------------------------------------------------------------------- * #typedef definitions * --------------------------------------------------------------------- */ typedef float cmplx [2]; typedef double cmplx16[2]; #define REAL_PART 0 #define IMAG_PART 1 #ifdef __STDC__ typedef void (*GESD2D_T) ( int, int, int, char *, int, int, int ); typedef void (*GERV2D_T) ( int, int, int, char *, int, int, int ); typedef void (*GEBS2D_T) ( int, char *, char *, int, int, char *, int ); typedef void (*GEBR2D_T) ( int, char *, char *, int, int, char *, int, int, int ); typedef void (*GSUM2D_T) ( int, char *, char *, int, int, char *, int, int, int ); typedef F_VOID_FCT (*MMADD_T) ( int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*MMSHFT_T) ( int *, int *, int *, char *, int * ); typedef F_VOID_FCT (*VVDOT_T) ( int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*VVSET_T) ( int *, char *, char *, int * ); typedef F_VOID_FCT (*TZPAD_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); typedef F_VOID_FCT (*TZPADCPY_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*TZSET_T) ( F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); typedef F_VOID_FCT (*TZSCAL_T) ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AXPY_T) ( int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*COPY_T) ( int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SWAP_T) ( int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GEMV_T) ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AGEMV_T) ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*ASYMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HEMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AHEMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*ATRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRSV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GERC_T) ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GERU_T) ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SYR_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*HER_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SYR2_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*HER2_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GEMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HEMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYRK_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HERK_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYR2K_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HER2K_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRMM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*TRSM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); #else typedef void (*GESD2D_T) (); typedef void (*GERV2D_T) (); typedef void (*GEBS2D_T) (); typedef void (*GEBR2D_T) (); typedef void (*GSUM2D_T) (); typedef F_VOID_FCT (*MMADD_T) (); typedef F_VOID_FCT (*MMSHFT_T) (); typedef F_VOID_FCT (*VVDOT_T) (); typedef F_VOID_FCT (*VVSET_T) (); typedef F_VOID_FCT (*TZPAD_T) (); typedef F_VOID_FCT (*TZPADCPY_T) (); typedef F_VOID_FCT (*TZSET_T) (); typedef F_VOID_FCT (*TZSCAL_T) (); typedef F_VOID_FCT (*AXPY_T) (); typedef F_VOID_FCT (*COPY_T) (); typedef F_VOID_FCT (*SWAP_T) (); typedef F_VOID_FCT (*GEMV_T) (); typedef F_VOID_FCT (*AGEMV_T) (); typedef F_VOID_FCT (*SYMV_T) (); typedef F_VOID_FCT (*ASYMV_T) (); typedef F_VOID_FCT (*HEMV_T) (); typedef F_VOID_FCT (*AHEMV_T) (); typedef F_VOID_FCT (*TRMV_T) (); typedef F_VOID_FCT (*ATRMV_T) (); typedef F_VOID_FCT (*TRSV_T) (); typedef F_VOID_FCT (*GERC_T) (); typedef F_VOID_FCT (*GERU_T) (); typedef F_VOID_FCT (*SYR_T) (); typedef F_VOID_FCT (*HER_T) (); typedef F_VOID_FCT (*SYR2_T) (); typedef F_VOID_FCT (*HER2_T) (); typedef F_VOID_FCT (*GEMM_T) (); typedef F_VOID_FCT (*SYMM_T) (); typedef F_VOID_FCT (*HEMM_T) (); typedef F_VOID_FCT (*SYRK_T) (); typedef F_VOID_FCT (*HERK_T) (); typedef F_VOID_FCT (*SYR2K_T) (); typedef F_VOID_FCT (*HER2K_T) (); typedef F_VOID_FCT (*TRMM_T) (); typedef F_VOID_FCT (*TRSM_T) (); #endif typedef struct { char type; /* Encoding of the data type */ int usiz; /* length in bytes of elementary data type */ int size; /* length in bytes of data type */ char * zero, * one, * negone; /* pointers to contants of correct type */ GESD2D_T Cgesd2d; /* BLACS functions */ GERV2D_T Cgerv2d; GEBS2D_T Cgebs2d; GEBR2D_T Cgebr2d; GSUM2D_T Cgsum2d; MMADD_T Fmmadd; /* Addition functions */ MMADD_T Fmmcadd; MMADD_T Fmmtadd; MMADD_T Fmmtcadd; MMADD_T Fmmdda; MMADD_T Fmmddac; MMADD_T Fmmddat; MMADD_T Fmmddact; MMSHFT_T Fcshft; /* Shift functions */ MMSHFT_T Frshft; VVDOT_T Fvvdotu; /* Dot functions */ VVDOT_T Fvvdotc; TZPAD_T Ftzpad; /* Array pad function */ TZPADCPY_T Ftzpadcpy; VVSET_T Fset; TZSCAL_T Ftzscal; /* Scaling functions */ TZSCAL_T Fhescal; TZSCAL_T Ftzcnjg; AXPY_T Faxpy; /* Level 1 BLAS */ COPY_T Fcopy; SWAP_T Fswap; GEMV_T Fgemv; /* Level 2 BLAS */ SYMV_T Fsymv; HEMV_T Fhemv; TRMV_T Ftrmv; TRSV_T Ftrsv; AGEMV_T Fagemv; ASYMV_T Fasymv; AHEMV_T Fahemv; ATRMV_T Fatrmv; GERC_T Fgerc; GERU_T Fgeru; SYR_T Fsyr; HER_T Fher; SYR2_T Fsyr2; HER2_T Fher2; GEMM_T Fgemm; /* Level 3 BLAS */ SYMM_T Fsymm; HEMM_T Fhemm; SYRK_T Fsyrk; HERK_T Fherk; SYR2K_T Fsyr2k; HER2K_T Fher2k; TRMM_T Ftrmm; TRSM_T Ftrsm; } PBTYP_T; #ifdef __STDC__ typedef void (*TZSYR_T) ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); typedef void (*TZSYR2_T) ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); typedef void (*TZTRM_T) ( PBTYP_T *, char *, char *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); typedef void (*TZSYM_T) ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); #else typedef void (*TZSYR_T) (); typedef void (*TZSYR2_T) (); typedef void (*TZTRM_T) (); typedef void (*TZSYM_T) (); #endif typedef struct { int offd; /* Global diagonal offset */ int lcmt00; /* LCM value of first block */ int mp; /* Local number of rows */ int imb1; /* Size of first row block (global) */ int imbloc; /* Size of first local row block */ int mb; /* Row block size */ int lmbloc; /* Size of last local row block */ int mblks; /* Number of local row blocks */ int iupp; /* LCM row bound for first diagonal block */ int upp; /* LCM row bound for diagonal block */ int prow; /* Relative row process coordinate */ int nprow; /* Number of process rows */ int nq; /* Local number of columns */ int inb1; /* Size of first column block (global) */ int inbloc; /* Size of first local column block */ int nb; /* Column block size */ int lnbloc; /* Size of last local column block */ int nblks; /* Number of local column blocks */ int ilow; /* LCM column bound for first diagonal block */ int low; /* LCM column bound for diagonal block */ int pcol; /* Relative column process coordinate */ int npcol; /* Number of process columns */ int lcmb; /* Least common multiple of nprow * mb and npcol * nb */ } PB_VM_T; /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define INT 'I' /* type identifiers */ #define SREAL 'S' #define DREAL 'D' #define SCPLX 'C' #define DCPLX 'Z' #define crot_ CROT scalapack-1.8.0/PBLAS/SRC/pcamax_.c0000640000175000017500000005317210363532303016370 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcamax_( int * N, float * AMAX, int * INDX, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pcamax_( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * INDX, * IX, * JX, * N; float * AMAX; /* * .. Array Arguments .. */ int * DESCX; float * X; #endif { /* * Purpose * ======= * * PCAMAX computes the global index of the maximum element in absolute * value of a subvector sub( X ). The global index is returned in INDX * and the value of that element is returned in AMAX, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * AMAX (global output) COMPLEX array * On exit, AMAX specifies the largest entry in absolute value * of the subvector sub( X ) only in its scope (See below for * further details). * * INDX (global output) INTEGER * On exit, INDX specifies the global index of the maximum ele- * ment in absolute value of the subvector sub( X ) only in its * scope (See below for further details). * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char cbtop, cctop, rbtop, rctop; int Xcol, Xgindx, Xi, Xii, Ximb, Xinb, Xj, Xjj, Xlindx, Xld, Xmb, Xnb, Xnp, Xnq, Xrow, Xsrc, ctxt, dist, dst, idumm, info, k, maxpos, mycol, mydist, myrow, npcol, nprow, src, size; PBTYP_T * type; /* * .. Local Arrays .. */ char * Xptr; int Xd[DLEN_]; cmplx work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PCAMAX", "X", *N, 1, Xi, Xj, Xd, *INCX, 7, &info ); if( info ) { PB_Cabort( ctxt, "PCAMAX", info ); return; } #endif /* * Initialize INDX and AMAX */ *INDX = 0; AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *INCX == 1 ) && ( Xd[M_] == 1 ) && ( *N == 1 ) ) { /* * Make sure I own some data and compute INDX and AMAX */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *INDX = *JX; type = PB_Cctypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); AMAX[REAL_PART] = ((float*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((float*)(Xptr))[IMAG_PART]; } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( ( rctop == CTOP_DEFAULT ) || ( rctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); size = type->size; Xlindx = Xjj - 1 + icamax_( &Xnq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); Xptr = Mptr( ((char *) X), Xii, Xlindx, Xld, size ); work[0][REAL_PART] = ((float*)(Xptr))[REAL_PART]; work[0][IMAG_PART] = ((float*)(Xptr))[IMAG_PART]; work[1][REAL_PART] = ((float )( Xgindx+1 )); work[1][IMAG_PART] = ZERO; } else { work[0][REAL_PART] = ZERO; work[0][IMAG_PART] = ZERO; work[1][REAL_PART] = ZERO; work[1][IMAG_PART] = ZERO; } /* * Combine the local results using a 1-tree topology within process column 0 * if npcol > 1 or Xcol >= 0, i.e sub( X ) is distributed. */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Ccgesd2d( ctxt, 2, 1, ((char*)work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Ccgerv2d( ctxt, 2, 1, ((char*) work[2]), 2, myrow, src ); if( ( ABS( work[0][REAL_PART] ) + ABS( work[0][IMAG_PART] ) ) < ( ABS( work[2][REAL_PART] ) + ABS( work[2][IMAG_PART] ) ) ) { work[0][REAL_PART] = work[2][REAL_PART]; work[0][IMAG_PART] = work[2][IMAG_PART]; work[1][REAL_PART] = work[3][REAL_PART]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of INDX and AMAX within * their process row. */ rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Ccgebs2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2 ); } else { Ccgebr2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2, myrow, 0 ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ AMAX[REAL_PART] = work[0][REAL_PART]; AMAX[IMAG_PART] = work[0][IMAG_PART]; *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *JX ) : ( (int)(work[1][REAL_PART]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; type = PB_Cctypeset(); size = type->size; Xlindx = Xjj - 1 + icamax_( &Xnq, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); Xptr = Mptr( ((char *) X), Xii, Xlindx, Xld, size ); AMAX[REAL_PART] = ((float*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((float*)(Xptr))[IMAG_PART]; } else { AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; } if( Xcol >= 0 ) { /* * Combine leave on all the local maximum if Xcol >= 0, i.e sub( X ) is * distributed */ Ccgamx2d( ctxt, ROW, &rctop, 1, 1, ((char*)AMAX), 1, &idumm, &maxpos, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( ( AMAX[REAL_PART] != ZERO ) || ( AMAX[IMAG_PART] != ZERO ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == maxpos ) { Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); *INDX = Xgindx + 1; Cigebs2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1, myrow, maxpos ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *JX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *JX ) : Xlindx + 1 ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ( cctop == CTOP_DEFAULT ) || ( cctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { Xld = Xd[LLD_]; type = PB_Cctypeset(); size = type->size; Xlindx = Xii - 1 + icamax_( &Xnp, Mptr( ((char *)X), Xii, Xjj, Xld, size ), INCX ); Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); Xptr = Mptr( ((char *) X), Xlindx, Xjj, Xld, size ); work[0][REAL_PART] = ((float*)(Xptr))[REAL_PART]; work[0][IMAG_PART] = ((float*)(Xptr))[IMAG_PART]; work[1][REAL_PART] = ((float )( Xgindx+1 )); work[1][IMAG_PART] = ZERO; } else { work[0][REAL_PART] = ZERO; work[0][IMAG_PART] = ZERO; work[1][REAL_PART] = ZERO; work[1][IMAG_PART] = ZERO; } /* * Combine the local results using a 1-tree topology within process row 0 * if nprow > 1 or Xrow >= 0, i.e sub( X ) is distributed. */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Ccgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Ccgerv2d( ctxt, 2, 1, ((char*) work[2]), 2, src, mycol ); if( ( ABS( work[0][REAL_PART] ) + ABS( work[0][IMAG_PART] ) ) < ( ABS( work[2][REAL_PART] ) + ABS( work[2][IMAG_PART] ) ) ) { work[0][REAL_PART] = work[2][REAL_PART]; work[0][IMAG_PART] = work[2][IMAG_PART]; work[1][REAL_PART] = work[3][REAL_PART]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process row 0 broadcasts the combined values of INDX and AMAX within their * process column. */ cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Ccgebs2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2 ); } else { Ccgebr2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2, 0, mycol ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ AMAX[REAL_PART] = work[0][REAL_PART]; AMAX[IMAG_PART] = work[0][IMAG_PART]; *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *IX ) : ( (int)(work[1][REAL_PART]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; type = PB_Cctypeset(); size = type->size; Xlindx = Xii - 1 + icamax_( &Xnp, Mptr( ((char *) X), Xii, Xjj, Xld, size ), INCX ); Xptr = Mptr( ((char *) X), Xlindx, Xjj, Xld, size ); AMAX[REAL_PART] = ((float*)(Xptr))[REAL_PART]; AMAX[IMAG_PART] = ((float*)(Xptr))[IMAG_PART]; } else { AMAX[REAL_PART] = ZERO; AMAX[IMAG_PART] = ZERO; } if( Xrow >= 0 ) { /* * Combine leave on all the local maximum if Xrow >= 0, i.e sub( X ) is * distributed. */ Ccgamx2d( ctxt, COLUMN, &cctop, 1, 1, ((char*)AMAX), 1, &maxpos, &idumm, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( ( AMAX[REAL_PART] != ZERO ) || ( AMAX[IMAG_PART] != ZERO ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == maxpos ) { Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); *INDX = Xgindx + 1; Cigebs2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1, maxpos, mycol ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *IX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( ( AMAX[REAL_PART] == ZERO ) && ( AMAX[IMAG_PART] == ZERO ) ) ? ( *IX ) : Xlindx + 1 ); } } } return; } /* * End of PCAMAX */ } scalapack-1.8.0/PBLAS/SRC/pzgeru_.c0000640000175000017500000002744210363532303016434 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzgeru_( int * M, int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY, double * A, int * IA, int * JA, int * DESCA ) #else void pzgeru_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * M, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZGERU performs the rank 1 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + sub( A ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is * an n element subvector and sub( A ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( A ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( A ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( A ) are overwritten by the * local entries of the m by n updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd[DLEN_]; char * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZGERU", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PZGERU", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PZGERU", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PZGERU", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); /* * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA */ PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); /* * Local rank-1 update iff I own some data */ Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { zgeru_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald ); } if( XAfr ) free( XA ); if( YAfr ) free( YA ); /* * End of PZGERU */ } scalapack-1.8.0/PBLAS/SRC/pcherk_.c0000640000175000017500000004546710363532303016403 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcherk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcherk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCHERK performs one of the Hermitian rank k operations * * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ), * * or * * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are real scalars, sub( C ) is an n by n Hermitian * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the Hermitian submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'C' or * 'c', K specifies the number of rows of the submatrix * sub( A ). K must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the Hermitian submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( C ) need not be * set, they are assumed to be zero, and on exit they are set * to zero. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; cmplx Calph; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCHERK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCHERK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PCHERK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PCHERK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCHERK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PCHERK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, CONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif Calph[REAL_PART] = ALPHA[REAL_PART]; Calph[IMAG_PART] = ZERO; /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ), *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCHERK */ } scalapack-1.8.0/PBLAS/SRC/pssyr2k_.c0000640000175000017500000005231710363532303016534 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssyr2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pssyr2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PSSYR2K performs one of the symmetric rank 2k operations * * sub( C ) := alpha*sub( A )*sub( B )' + alpha*sub( B )*sub( A )' + * beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( B ) + alpha*sub( B )'*sub( A ) + * beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise, and, * * sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1) if TRANS = 'N', * B(IB:IB+K-1,JB:JB+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) and sub( B ) are n by k submatrices in the * first case and k by n submatrices in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( B )' + * alpha*sub( B )*sub( A )' + * beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ), * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( B )'*sub( A ) + * alpha*sub( A )'*sub( B ) + * beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrices sub( A ) and sub( B ), and on * entry with TRANS = 'T' or 't' or 'C' or 'c', K specifies the * number of rows of the submatrices sub( A ) and sub( B ). * K must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and B corresponding to the entries of the submatrices * sub( A ) and sub( B ) respectively need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) REAL array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JB+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( B ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( B ). * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double ABCest, ABest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYR2K", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYR2K", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) { PB_Cchkmat( ctxt, "PSSYR2K", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSSYR2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PSSYR2K", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSSYR2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PSSYR2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYR2K", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N) * ABest : only sub( A ) and sub( B ) are communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + TWO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol ); ABest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + TWO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.4 * ABCest ) <= ABest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseABC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_Cpsyr2kA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PSSYR2K */ } scalapack-1.8.0/PBLAS/SRC/PBblacs.h0000640000175000017500000004242210363532303016267 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * This file includes BLACS function type definitions, define macros, * and function prototypes. All PBLAS routines include this file. * * ---------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ /* BLACS scopes and topologies */ /* #define CALL 'A' (already defined) */ #define CCOLUMN 'C' #define CROW 'R' #define CBCAST 'B' #define CCOMBINE 'C' #define CTOP_GET '!' #define CTOP_DEFAULT ' ' #define CTOP_IRING 'I' #define CTOP_DRING 'D' #define CTOP_SRING 'S' #define CTOP_HYPER 'H' #define CTOP_FULL 'F' #define CTOP_MRING 'M' #define CTOP_TTREE 'T' #define CTOP_TREE1 '1' #define CTOP_TREE2 '2' #define CTOP_TREE3 '3' #define CTOP_TREE4 '4' #define CTOP_TREE5 '5' #define CTOP_TREE6 '6' #define CTOP_TREE7 '7' #define CTOP_TREE8 '8' #define CTOP_TREE9 '9' /* #define ALL "A" (already defined) */ #define COLUMN "C" #define ROW "R" #define BCAST "B" #define COMBINE "C" #define TOP_GET "!" #define TOP_DEFAULT " " #define TOP_IRING "I" #define TOP_DRING "D" #define TOP_SRING "S" #define TOP_HYPER "H" #define TOP_FULL "F" #define TOP_MRING "M" #define TOP_TTREE "T" #define TOP_TREE1 "1" #define TOP_TREE2 "2" #define TOP_TREE3 "3" #define TOP_TREE4 "4" #define TOP_TREE5 "5" #define TOP_TREE6 "6" #define TOP_TREE7 "7" #define TOP_TREE8 "8" #define TOP_TREE9 "9" /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ #ifdef __STDC__ /* BLACS Initialization */ void Cblacs_pinfo ( int *, int * ); void Cblacs_setup ( int *, int * ); void Cblacs_get ( int, int, int * ); void Cblacs_set ( int, int, int * ); void Cblacs_gridinit ( int *, char *, int, int ); void Cblacs_gridmap ( int *, int *, int, int, int ); /* BLACS Destruction */ void Cblacs_freebuff ( int, int ); void Cblacs_gridexit ( int ); void Cblacs_abort ( int, int ); void Cblacs_exit ( int ); /* BLACS Informational and Miscellaneous */ void Cblacs_gridinfo ( int, int *, int *, int *, int * ); int Cblacs_pnum ( int, int, int ); void Cblacs_pcoord ( int, int, int *, int * ); void Cblacs_barrier ( int, char * ); /* BLACS Sending */ void Cigesd2d ( int, int, int, char *, int, int, int ); void Csgesd2d ( int, int, int, char *, int, int, int ); void Cdgesd2d ( int, int, int, char *, int, int, int ); void Ccgesd2d ( int, int, int, char *, int, int, int ); void Czgesd2d ( int, int, int, char *, int, int, int ); void Citrsd2d ( int, char *, char *, int, int, char *, int, int, int ); void Cstrsd2d ( int, char *, char *, int, int, char *, int, int, int ); void Cdtrsd2d ( int, char *, char *, int, int, char *, int, int, int ); void Cctrsd2d ( int, char *, char *, int, int, char *, int, int, int ); void Cztrsd2d ( int, char *, char *, int, int, char *, int, int, int ); void Cigebs2d ( int, char *, char *, int, int, char *, int ); void Csgebs2d ( int, char *, char *, int, int, char *, int ); void Cdgebs2d ( int, char *, char *, int, int, char *, int ); void Ccgebs2d ( int, char *, char *, int, int, char *, int ); void Czgebs2d ( int, char *, char *, int, int, char *, int ); void Citrbs2d ( int, char *, char *, char *, char *, int, int, char *, int ); void Cstrbs2d ( int, char *, char *, char *, char *, int, int, char *, int ); void Cdtrbs2d ( int, char *, char *, char *, char *, int, int, char *, int ); void Cctrbs2d ( int, char *, char *, char *, char *, int, int, char *, int ); void Cztrbs2d ( int, char *, char *, char *, char *, int, int, char *, int ); /* BLACS Receiving */ void Cigerv2d ( int, int, int, char *, int, int, int ); void Csgerv2d ( int, int, int, char *, int, int, int ); void Cdgerv2d ( int, int, int, char *, int, int, int ); void Ccgerv2d ( int, int, int, char *, int, int, int ); void Czgerv2d ( int, int, int, char *, int, int, int ); void Citrrv2d ( int, char *, char *, int, int, char *, int, int, int ); void Cstrrv2d ( int, char *, char *, int, int, char *, int, int, int ); void Cdtrrv2d ( int, char *, char *, int, int, char *, int, int, int ); void Cctrrv2d ( int, char *, char *, int, int, char *, int, int, int ); void Cztrrv2d ( int, char *, char *, int, int, char *, int, int, int ); void Cigebr2d ( int, char *, char *, int, int, char *, int, int, int ); void Csgebr2d ( int, char *, char *, int, int, char *, int, int, int ); void Cdgebr2d ( int, char *, char *, int, int, char *, int, int, int ); void Ccgebr2d ( int, char *, char *, int, int, char *, int, int, int ); void Czgebr2d ( int, char *, char *, int, int, char *, int, int, int ); void Citrbr2d ( int, char *, char *, char *, char *, int, int, char *, int, int, int ); void Cstrbr2d ( int, char *, char *, char *, char *, int, int, char *, int, int, int ); void Cdtrbr2d ( int, char *, char *, char *, char *, int, int, char *, int, int, int ); void Cctrbr2d ( int, char *, char *, char *, char *, int, int, char *, int, int, int ); void Cztrbr2d ( int, char *, char *, char *, char *, int, int, char *, int, int, int ); /* BLACS Combine Operations */ void Cigamx2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Csgamx2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Cdgamx2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Ccgamx2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Czgamx2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Cigamn2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Csgamn2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Cdgamn2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Ccgamn2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Czgamn2d ( int, char *, char *, int, int, char *, int, int *, int *, int, int, int ); void Cigsum2d ( int, char *, char *, int, int, char *, int, int, int ); void Csgsum2d ( int, char *, char *, int, int, char *, int, int, int ); void Cdgsum2d ( int, char *, char *, int, int, char *, int, int, int ); void Ccgsum2d ( int, char *, char *, int, int, char *, int, int, int ); void Czgsum2d ( int, char *, char *, int, int, char *, int, int, int ); #else /* BLACS Initialization */ void Cblacs_pinfo (); void Cblacs_setup (); void Cblacs_get (); void Cblacs_set (); void Cblacs_gridinit (); void Cblacs_gridmap (); /* BLACS Destruction */ void Cblacs_freebuff (); void Cblacs_gridexit (); void Cblacs_abort (); void Cblacs_exit (); /* BLACS Informational and Miscellaneous */ void Cblacs_gridinfo (); int Cblacs_pnum (); void Cblacs_pcoord (); void Cblacs_barrier (); /* BLACS Sending */ void Cigesd2d (); void Csgesd2d (); void Cdgesd2d (); void Ccgesd2d (); void Czgesd2d (); void Citrsd2d (); void Cstrsd2d (); void Cdtrsd2d (); void Cctrsd2d (); void Cztrsd2d (); void Cigebs2d (); void Csgebs2d (); void Cdgebs2d (); void Ccgebs2d (); void Czgebs2d (); void Citrbs2d (); void Cstrbs2d (); void Cdtrbs2d (); void Cctrbs2d (); void Cztrbs2d (); /* BLACS Receiving */ void Cigerv2d (); void Csgerv2d (); void Cdgerv2d (); void Ccgerv2d (); void Czgerv2d (); void Citrrv2d (); void Cstrrv2d (); void Cdtrrv2d (); void Cctrrv2d (); void Cztrrv2d (); void Cigebr2d (); void Csgebr2d (); void Cdgebr2d (); void Ccgebr2d (); void Czgebr2d (); void Citrbr2d (); void Cstrbr2d (); void Cdtrbr2d (); void Cctrbr2d (); void Cztrbr2d (); /* BLACS Combine Operations */ void Cigamx2d (); void Csgamx2d (); void Cdgamx2d (); void Ccgamx2d (); void Czgamx2d (); void Cigamn2d (); void Csgamn2d (); void Cdgamn2d (); void Ccgamn2d (); void Czgamn2d (); void Cigsum2d (); void Csgsum2d (); void Cdgsum2d (); void Ccgsum2d (); void Czgsum2d (); #endif scalapack-1.8.0/PBLAS/SRC/PBtools.h0000640000175000017500000024425110363532303016347 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 12, 2002 * * --------------------------------------------------------------------- */ /* * This file includes PBLAS tools definitions. All PBLAS routines include * this file. * * ---------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- * * Descriptor entries for type 1 */ #define BLOCK_CYCLIC_2D 1 #define DTYPE1_ 0 /* Descriptor Type */ #define CTXT1_ 1 /* BLACS context */ #define M1_ 2 /* Global Number of Rows */ #define N1_ 3 /* Global Number of Columns */ #define MB1_ 4 /* Row Blocking Size */ #define NB1_ 5 /* Column Blocking Size */ #define RSRC1_ 6 /* Starting Processor Row */ #define CSRC1_ 7 /* Starting Processor Column */ #define LLD1_ 8 /* Local Leading Dimension */ #define DLEN1_ 9 /* Descriptor Length */ /* * Descriptor entries for type 2 */ #define BLOCK_CYCLIC_2D_INB 2 #define DTYPE_ 0 /* Descriptor Type */ #define CTXT_ 1 /* BLACS context */ #define M_ 2 /* Global Number of Rows */ #define N_ 3 /* Global Number of Columns */ #define IMB_ 4 /* Initial Row Blocking Size */ #define INB_ 5 /* Initial Column Blocking Size */ #define MB_ 6 /* Row Blocking Size */ #define NB_ 7 /* Column Blocking Size */ #define RSRC_ 8 /* Starting Process Row */ #define CSRC_ 9 /* Starting Process Column */ #define LLD_ 10 /* Local Leading Dimension */ #define DLEN_ 11 /* Descriptor Length */ #define CPACKING 'P' #define CUNPACKING 'U' #define PACKING "P" #define UNPACKING "U" #define CGENERAL 'G' /* #define CSYMM 'S' */ #define CHERM 'H' #define GENERAL "G" #define SYMM "S" #define HERM "H" #define ONE 1.0 #define TWO 2.0 #define ZERO 0.0 /* Input error checking related constants */ #define DESCMULT 100 #define BIGNUM 10000 /* * --------------------------------------------------------------------- * #define macro functions * --------------------------------------------------------------------- */ #define ABS( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) #define MIN( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) #define MAX( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) #define FLOOR(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) #define CEIL(a,b) ( ( (a)+(b)-1 ) / (b) ) #define ICEIL(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) #define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) #define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) /* * The following macros perform common modulo operations; All functions * except MPosMod assume arguments are < d (i.e., arguments are themsel- * ves within modulo range). */ /* increment with mod */ #define MModInc(I, d) if(++(I) == (d)) (I) = 0 /* decrement with mod */ #define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 /* positive modulo */ #define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) /* add two numbers */ #define MModAdd(I1, I2, d) \ ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) /* add 1 to # */ #define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) /* subtract two numbers */ #define MModSub(I1, I2, d) \ ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) /* sub 1 from # */ #define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) /* * DNROC computes maximum number of local rows or columns. This macro is * only used to compute the time estimates in the Level 3 PBLAS routines. */ #define DNROC( n_, nb_, p_ ) \ ((double)(((((n_)+(nb_)-1)/(nb_))+(p_)-1)/(p_))*(double)((nb_))) /* * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and * also less silly errors ... */ #define Mptr( a_, i_, j_, lda_, siz_ ) \ ( (a_) + ( ( (i_)+(j_)*(lda_) )*(siz_) ) ) /* * Mfirstnb and Mlastnb compute the global size of the first and last * block corresponding to the interval i_:i_+n_-1 of global indexes. */ #define Mfirstnb( inbt_, n_, i_, inb_, nb_ ) \ inbt_ = (inb_) - (i_); \ if( inbt_ <= 0 ) \ inbt_ = ( (-inbt_) / (nb_) + 1 ) * (nb_) + inbt_; \ inbt_ = MIN( inbt_, (n_) ); #define Mlastnb( inbt_, n_, i_, inb_, nb_ ) \ inbt_ = (i_) + (n_) - (inb_); \ if( inbt_ > 0 ) \ { \ inbt_ = -( ( (nb_)+inbt_-1 )/(nb_)-1 )*(nb_) + inbt_; \ inbt_ = MIN( inbt_, (n_) ); \ } \ else { inbt_ = (n_); }; /* * Does the index interval i_:i_+n_-1 spans more than one process rows * or columns ? * * Mspan returns 0 (false) when the data is replicated (srcproc_ < 0) or * when there is only one process row or column in the process grid. */ #define Mspan( n_, i_, inb_, nb_, srcproc_, nprocs_ ) \ ( ( (srcproc_) >= 0 ) && ( ( (nprocs_) > 1 ) && \ ( ( (i_) < (inb_) ) ? \ ( (i_) + (n_) > (inb_) ) : \ ( (i_) + (n_) > (inb_) + \ ( ( (i_) - (inb_) ) / (nb_) + 1 ) * (nb_) ) ) ) ) /* * Mindxl2g computes the global index ig_ corresponding to the local * index il_ in process proc_. */ #define Mindxl2g( ig_, il_, inb_, nb_, proc_, srcproc_, nprocs_ ) \ { \ if( ( (srcproc_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ if( (proc_) == (srcproc_) ) \ { \ if( (il_) < (inb_) ) ig_ = (il_); \ else ig_ = (il_) + \ (nb_)*((nprocs_)-1)*( ((il_)-(inb_)) / (nb_) + 1 ); \ } \ else if( (proc_) < (srcproc_) ) \ { \ ig_ = (il_) + (inb_) + \ (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ (proc_)-(srcproc_)-1+(nprocs_) ); \ } \ else \ { \ ig_ = (il_) + (inb_) + \ (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ (proc_)-(srcproc_)-1 ); \ } \ } \ else \ { \ ig_ = (il_); \ } \ } /* * Mindxg2p returns the process coodinate owning the entry globally * indexed by ig_. */ #define Mindxg2p( ig_, inb_, nb_, proc_, srcproc_, nprocs_ ) \ { \ if( ( (ig_) >= (inb_) ) && ( (srcproc_) >= 0 ) && \ ( (nprocs_) > 1 ) ) \ { \ proc_ = (srcproc_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ } \ else \ { \ proc_ = (srcproc_); \ } \ } /* * Mnumroc computes the # of local indexes np_ residing in the process * of coordinate proc_ corresponding to the interval of global indexes * i_:i_+n_-1 assuming that the global index 0 resides in the process * srcproc_, and that the indexes are distributed from srcproc_ using * the parameters inb_, nb_ and nprocs_. */ #define Mnumroc( np_, n_, i_, inb_, nb_, proc_, srcproc_, nprocs_ ) \ { \ if( ( (srcproc_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ int inb__, mydist__, n__, nblk__, quot__, src__; \ if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ { \ src__ = (srcproc_) + ( nblk__ = (-inb__) / (nb_) + 1 ); \ src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ inb__ += nblk__*(nb_); \ if( ( n__ = (n_) - inb__ ) <= 0 ) \ { if( (proc_) == src__ ) np_ = (n_); else np_ = 0; } \ else \ { \ if( ( mydist__ = (proc_) - src__ ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ mydist__ -= nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ < 0 ) \ { \ if( (proc_) != src__ ) \ np_ = (nb_) + (nb_) * quot__; \ else \ np_ = inb__ + (nb_) * quot__; \ } \ else if( mydist__ > 0 ) \ { \ np_ = (nb_) * quot__; \ } \ else \ { \ if( (proc_) != src__ ) \ np_ = n__ + (nb_) + (nb_) * ( quot__ - nblk__ ); \ else \ np_ = (n_) + (nb_) * ( quot__ - nblk__ ); \ } \ } \ } \ else \ { \ if( ( n__ = (n_) - inb__ ) <= 0 ) \ { if( (proc_) == (srcproc_) ) np_ = (n_); else np_ = 0; } \ else \ { \ if( ( mydist__ = (proc_) - (srcproc_) ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ mydist__ -= nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ < 0 ) \ { \ if( (proc_) != (srcproc_) ) \ np_ = (nb_) + (nb_) * quot__; \ else \ np_ = inb__ + (nb_) * quot__; \ } \ else if( mydist__ > 0 ) \ { \ np_ = (nb_) * quot__; \ } \ else \ { \ if( (proc_) != (srcproc_) ) \ np_ = n__ + (nb_) + (nb_) * ( quot__ - nblk__ ); \ else \ np_ = (n_) + (nb_) * ( quot__ - nblk__ ); \ } \ } \ } \ } \ else \ { \ np_ = (n_); \ } \ } #define Mnpreroc( np_, n_, i_, inb_, nb_, proc_, srcproc_, nprocs_ ) \ { \ if( ( (srcproc_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ int inb__, mydist__, n__, nblk__, quot__, rem__, src__; \ if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ { \ src__ = (srcproc_) + ( nblk__ = (-inb__) / (nb_) + 1 ); \ src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ if( (proc_) != src__ ) \ { \ inb__ += nblk__*(nb_); \ if( ( n__ = (n_) - inb__ ) <= 0 ) { np_ = (n_); } \ else \ { \ if( ( mydist__ = (proc_) - src__ ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ rem__ = nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ <= rem__ ) \ { \ np_ = inb__ - (nb_) + \ ( quot__ + 1 ) * mydist__ * (nb_); \ } \ else \ { \ np_ = (n_) + \ ( mydist__ - (nprocs_) ) * quot__ * (nb_); \ } \ } \ } \ else \ { \ np_ = 0; \ } \ } \ else \ { \ if( (proc_) != (srcproc_) ) \ { \ if( ( n__ = (n_) - inb__ ) <= 0 ) { np_ = (n_); } \ else \ { \ if( ( mydist__ = (proc_) - (srcproc_) ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ rem__ = nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ <= rem__ ) \ { \ np_ = inb__ - (nb_) + \ ( quot__ + 1 ) * mydist__ * (nb_); \ } \ else \ { \ np_ = (n_) + \ ( mydist__ - (nprocs_) ) * quot__ * (nb_); \ } \ } \ } \ else \ { \ np_ = 0; \ } \ } \ } \ else \ { \ np_ = 0; \ } \ } #define Mnnxtroc( np_, n_, i_, inb_, nb_, proc_, srcproc_, nprocs_ ) \ { \ if( ( (srcproc_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ int inb__, mydist__, n__, nblk__, quot__, rem__, src__; \ if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ { \ src__ = (srcproc_) + ( nblk__ = (-inb__) / (nb_) + 1 ); \ src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ inb__ += nblk__*(nb_); \ if( ( n__ = (n_) - inb__ ) <= 0 ) { np_ = 0; } \ else \ { \ if( ( mydist__ = (proc_) - src__ ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ rem__ = nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ < rem__ ) \ { \ np_ = n__ - ( quot__ * mydist__ + \ quot__ + mydist__ ) * (nb_); \ } \ else \ { \ np_ = ( (nprocs_) - 1 - mydist__ ) * quot__ * (nb_); \ } \ } \ } \ else \ { \ if( ( n__ = (n_) - inb__ ) <= 0 ) { np_ = 0; } \ else \ { \ if( ( mydist__ = (proc_) - (srcproc_) ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ rem__ = nblk__ - \ ( quot__ = ( nblk__ / (nprocs_) ) ) * (nprocs_); \ if( mydist__ < rem__ ) \ { \ np_ = n__ - ( quot__ * mydist__ + \ quot__ + mydist__ ) * (nb_); \ } \ else \ { \ np_ = ( (nprocs_) - 1 - mydist__ ) * quot__ * (nb_); \ } \ } \ } \ } \ else \ { np_ = 0; } \ } #define Minfog2l( i_, j_, desc_, nr_, nc_, r_, c_, ii_, jj_, pr_, pc_ ) \ { \ int quot__, i__, imb__, inb__, j__, mb__, mydist__, \ nb__, nblk__, src__; \ imb__ = desc_[IMB_]; mb__ = desc_[MB_]; pr_ = desc_[RSRC_]; \ if( ( pr_ >= 0 ) && ( nr_ > 1 ) ) \ { \ if( ( i__ = (i_) - imb__ ) < 0 ) \ { ii_ = ( r_ == pr_ ? (i_) : 0 ); } \ else \ { \ src__ = pr_; \ pr_ += ( nblk__ = i__ / mb__ + 1 ); \ pr_ -= ( pr_ / nr_ ) * nr_; \ if( ( mydist__ = r_ - src__ ) < 0 ) mydist__ += nr_; \ if( mydist__ >= nblk__ - ( quot__ = nblk__ / nr_ ) * nr_ ) \ { \ if( r_ != src__ ) ii_ = mb__; \ else ii_ = imb__; \ if( r_ != pr_ ) \ ii_ += ( quot__ - 1 ) * mb__; \ else \ ii_ += i__ + ( quot__ - nblk__ ) * mb__; \ } \ else \ { \ if( r_ != src__ ) ii_ = mb__ + quot__ * mb__; \ else ii_ = imb__ + quot__ * mb__; \ } \ } \ } \ else \ { \ ii_ = (i_); \ } \ inb__ = desc_[INB_]; nb__ = desc_[NB_]; pc_ = desc_[CSRC_]; \ if( ( pc_ >= 0 ) && ( nc_ > 1 ) ) \ { \ if( ( j__ = (j_) - inb__ ) < 0 ) \ { jj_ = ( c_ == pc_ ? (j_) : 0 ); } \ else \ { \ src__ = pc_; \ pc_ += ( nblk__ = j__ / nb__ + 1 ); \ pc_ -= ( pc_ / nc_ ) * nc_; \ if( ( mydist__ = c_ - src__ ) < 0 ) mydist__ += nc_; \ if( mydist__ >= nblk__ - ( quot__ = nblk__ / nc_ ) * nc_ ) \ { \ if( c_ != src__ ) jj_ = nb__; \ else jj_ = inb__; \ if( c_ != pc_ ) \ jj_ += ( quot__ - 1 ) * nb__; \ else \ jj_ += j__ + ( quot__ - nblk__ ) * nb__; \ } \ else \ { \ if( c_ != src__ ) jj_ = nb__ + quot__ * nb__; \ else jj_ = inb__ + quot__ * nb__; \ } \ } \ } \ else \ { \ jj_ = (j_); \ } \ } /* * The following macros initialize or translate descriptors. */ #define MDescSet( desc, m, n, imb, inb, mb, nb, rsrc, csrc, ictxt, lld ) \ { \ (desc)[DTYPE_] = BLOCK_CYCLIC_2D_INB; \ (desc)[CTXT_ ] = (ictxt); \ (desc)[M_ ] = (m); \ (desc)[N_ ] = (n); \ (desc)[IMB_ ] = (imb); \ (desc)[INB_ ] = (inb); \ (desc)[MB_ ] = (mb); \ (desc)[NB_ ] = (nb); \ (desc)[RSRC_ ] = (rsrc); \ (desc)[CSRC_ ] = (csrc); \ (desc)[LLD_ ] = (lld); \ } #define MDescCopy(DescIn, DescOut) \ { \ (DescOut)[DTYPE_] = (DescIn)[DTYPE_]; \ (DescOut)[M_ ] = (DescIn)[M_ ]; \ (DescOut)[N_ ] = (DescIn)[N_ ]; \ (DescOut)[IMB_ ] = (DescIn)[IMB_ ]; \ (DescOut)[INB_ ] = (DescIn)[INB_ ]; \ (DescOut)[MB_ ] = (DescIn)[MB_ ]; \ (DescOut)[NB_ ] = (DescIn)[NB_ ]; \ (DescOut)[RSRC_ ] = (DescIn)[RSRC_ ]; \ (DescOut)[CSRC_ ] = (DescIn)[CSRC_ ]; \ (DescOut)[CTXT_ ] = (DescIn)[CTXT_ ]; \ (DescOut)[LLD_ ] = (DescIn)[LLD_ ]; \ } #define MDescTrans(DescIn, DescOut) \ { \ if ( (DescIn)[DTYPE_] == BLOCK_CYCLIC_2D ) \ { \ (DescOut)[DTYPE_] = BLOCK_CYCLIC_2D_INB; \ (DescOut)[M_ ] = (DescIn)[M1_ ]; \ (DescOut)[N_ ] = (DescIn)[N1_ ]; \ (DescOut)[IMB_ ] = (DescIn)[MB1_ ]; \ (DescOut)[INB_ ] = (DescIn)[NB1_ ]; \ (DescOut)[MB_ ] = (DescIn)[MB1_ ]; \ (DescOut)[NB_ ] = (DescIn)[NB1_ ]; \ (DescOut)[RSRC_ ] = (DescIn)[RSRC1_ ]; \ (DescOut)[CSRC_ ] = (DescIn)[CSRC1_ ]; \ (DescOut)[CTXT_ ] = (DescIn)[CTXT1_ ]; \ (DescOut)[LLD_ ] = (DescIn)[LLD1_ ]; \ } \ else if ( (DescIn)[DTYPE_] == BLOCK_CYCLIC_2D_INB ) \ { \ (DescOut)[DTYPE_] = BLOCK_CYCLIC_2D_INB; \ (DescOut)[M_ ] = (DescIn)[M_ ]; \ (DescOut)[N_ ] = (DescIn)[N_ ]; \ (DescOut)[IMB_ ] = (DescIn)[IMB_ ]; \ (DescOut)[INB_ ] = (DescIn)[INB_ ]; \ (DescOut)[MB_ ] = (DescIn)[MB_ ]; \ (DescOut)[NB_ ] = (DescIn)[NB_ ]; \ (DescOut)[RSRC_ ] = (DescIn)[RSRC_ ]; \ (DescOut)[CSRC_ ] = (DescIn)[CSRC_ ]; \ (DescOut)[CTXT_ ] = (DescIn)[CTXT_ ]; \ (DescOut)[LLD_ ] = (DescIn)[LLD_ ]; \ } \ else \ { \ (DescOut)[DTYPE_] = (DescIn)[0]; \ (DescOut)[CTXT_ ] = (DescIn)[1]; \ (DescOut)[M_ ] = 0; \ (DescOut)[N_ ] = 0; \ (DescOut)[IMB_ ] = 1; \ (DescOut)[INB_ ] = 1; \ (DescOut)[MB_ ] = 1; \ (DescOut)[NB_ ] = 1; \ (DescOut)[RSRC_ ] = 0; \ (DescOut)[CSRC_ ] = 0; \ (DescOut)[LLD_ ] = 1; \ } \ } #define MIndxTrans( I, J, i, j ) \ { \ i = *I - 1; \ j = *J - 1; \ } #if( _F2C_CALL_ == _F2C_ADD_ ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine. No redefinition is necessary to have * the following FORTRAN to C interface: * * FORTRAN DECLARATION C CALL * SUBROUTINE PDFOO(...) pdfoo_(...) * * This is the PBLAS default. */ #endif #if( _F2C_CALL_ == _F2C_F77ISF2C ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine for systems where the FORTRAN compiler * is actually f2c (a FORTRAN to C conversion utility). * * FORTRAN DECLARATION C CALL * SUBROUTINE PDFOO(...) pdfoo__(...) */ #endif #if( _F2C_CALL_ == _F2C_UPCASE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE PDFOO(...) PDFOO(...) */ #define immadd_ IMMADD #define smmadd_ SMMADD #define dmmadd_ DMMADD #define cmmadd_ CMMADD #define zmmadd_ ZMMADD #define immtadd_ IMMTADD #define smmtadd_ SMMTADD #define dmmtadd_ DMMTADD #define cmmtadd_ CMMTADD #define zmmtadd_ ZMMTADD #define smmcadd_ SMMCADD #define dmmcadd_ DMMCADD #define cmmcadd_ CMMCADD #define zmmcadd_ ZMMCADD #define smmtcadd_ SMMTCADD #define dmmtcadd_ DMMTCADD #define cmmtcadd_ CMMTCADD #define zmmtcadd_ ZMMTCADD #define immdda_ IMMDDA #define smmdda_ SMMDDA #define dmmdda_ DMMDDA #define cmmdda_ CMMDDA #define zmmdda_ ZMMDDA #define smmddac_ SMMDDAC #define dmmddac_ DMMDDAC #define cmmddac_ CMMDDAC #define zmmddac_ ZMMDDAC #define immddat_ IMMDDAT #define smmddat_ SMMDDAT #define dmmddat_ DMMDDAT #define cmmddat_ CMMDDAT #define zmmddat_ ZMMDDAT #define smmddact_ SMMDDACT #define dmmddact_ DMMDDACT #define cmmddact_ CMMDDACT #define zmmddact_ ZMMDDACT #define sasqrtb_ SASQRTB #define dasqrtb_ DASQRTB #define sset_ SSET #define dset_ DSET #define cset_ CSET #define zset_ ZSET #define svasum_ SVASUM #define dvasum_ DVASUM #define scvasum_ SCVASUM #define dzvasum_ DZVASUM #define sascal_ SASCAL #define dascal_ DASCAL #define scshft_ SCSHFT #define dcshft_ DCSHFT #define ccshft_ CCSHFT #define zcshft_ ZCSHFT #define srshft_ SRSHFT #define drshft_ DRSHFT #define crshft_ CRSHFT #define zrshft_ ZRSHFT #define svvdot_ SVVDOT #define dvvdot_ DVVDOT #define cvvdotc_ CVVDOTC #define cvvdotu_ CVVDOTU #define zvvdotc_ ZVVDOTC #define zvvdotu_ ZVVDOTU #define stzpad_ STZPAD #define dtzpad_ DTZPAD #define ctzpad_ CTZPAD #define ztzpad_ ZTZPAD #define stzpadcpy_ STZPADCPY #define dtzpadcpy_ DTZPADCPY #define ctzpadcpy_ CTZPADCPY #define ztzpadcpy_ ZTZPADCPY #define stzscal_ STZSCAL #define dtzscal_ DTZSCAL #define ctzscal_ CTZSCAL #define ztzscal_ ZTZSCAL #define chescal_ CHESCAL #define zhescal_ ZHESCAL #define ctzcnjg_ CTZCNJG #define ztzcnjg_ ZTZCNJG #define sagemv_ SAGEMV #define dagemv_ DAGEMV #define cagemv_ CAGEMV #define zagemv_ ZAGEMV #define sasymv_ SASYMV #define dasymv_ DASYMV #define casymv_ CASYMV #define zasymv_ ZASYMV #define cahemv_ CAHEMV #define zahemv_ ZAHEMV #define satrmv_ SATRMV #define datrmv_ DATRMV #define catrmv_ CATRMV #define zatrmv_ ZATRMV #define csymv_ CSYMV #define zsymv_ ZSYMV #define csyr_ CSYR #define zsyr_ ZSYR #define csyr2_ CSYR2 #define zsyr2_ ZSYR2 #endif #if( _F2C_CALL_ == _F2C_NOCHANGE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE PDFOO(...) pdfoo(...) */ #define immadd_ immadd #define smmadd_ smmadd #define dmmadd_ dmmadd #define cmmadd_ cmmadd #define zmmadd_ zmmadd #define immtadd_ immtadd #define smmtadd_ smmtadd #define dmmtadd_ dmmtadd #define cmmtadd_ cmmtadd #define zmmtadd_ zmmtadd #define smmcadd_ smmcadd #define dmmcadd_ dmmcadd #define cmmcadd_ cmmcadd #define zmmcadd_ zmmcadd #define smmtcadd_ smmtcadd #define dmmtcadd_ dmmtcadd #define cmmtcadd_ cmmtcadd #define zmmtcadd_ zmmtcadd #define immdda_ immdda #define smmdda_ smmdda #define dmmdda_ dmmdda #define cmmdda_ cmmdda #define zmmdda_ zmmdda #define smmddac_ smmddac #define dmmddac_ dmmddac #define cmmddac_ cmmddac #define zmmddac_ zmmddac #define immddat_ immddat #define smmddat_ smmddat #define dmmddat_ dmmddat #define cmmddat_ cmmddat #define zmmddat_ zmmddat #define smmddact_ smmddact #define dmmddact_ dmmddact #define cmmddact_ cmmddact #define zmmddact_ zmmddact #define sasqrtb_ sasqrtb #define dasqrtb_ dasqrtb #define sset_ sset #define dset_ dset #define cset_ cset #define zset_ zset #define svasum_ svasum #define dvasum_ dvasum #define scvasum_ scvasum #define dzvasum_ dzvasum #define sascal_ sascal #define dascal_ dascal #define scshft_ scshft #define dcshft_ dcshft #define ccshft_ ccshft #define zcshft_ zcshft #define srshft_ srshft #define drshft_ drshft #define crshft_ crshft #define zrshft_ zrshft #define svvdot_ svvdot #define dvvdot_ dvvdot #define cvvdotc_ cvvdotc #define cvvdotu_ cvvdotu #define zvvdotc_ zvvdotc #define zvvdotu_ zvvdotu #define stzpad_ stzpad #define dtzpad_ dtzpad #define ctzpad_ ctzpad #define ztzpad_ ztzpad #define stzpadcpy_ stzpadcpy #define dtzpadcpy_ dtzpadcpy #define ctzpadcpy_ ctzpadcpy #define ztzpadcpy_ ztzpadcpy #define stzscal_ stzscal #define dtzscal_ dtzscal #define ctzscal_ ctzscal #define ztzscal_ ztzscal #define chescal_ chescal #define zhescal_ zhescal #define ctzcnjg_ ctzcnjg #define ztzcnjg_ ztzcnjg #define sagemv_ sagemv #define dagemv_ dagemv #define cagemv_ cagemv #define zagemv_ zagemv #define sasymv_ sasymv #define dasymv_ dasymv #define casymv_ casymv #define zasymv_ zasymv #define cahemv_ cahemv #define zahemv_ zahemv #define satrmv_ satrmv #define datrmv_ datrmv #define catrmv_ catrmv #define zatrmv_ zatrmv #define csymv_ csymv #define zsymv_ zsymv #define csyr_ csyr #define zsyr_ zsyr #define csyr2_ csyr2 #define zsyr2_ zsyr2 #endif /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ #ifdef __STDC__ F_VOID_FCT immadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT immtadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmtadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmtadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmtadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmtadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmtcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmtcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmtcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmtcadd_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT immdda_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmdda_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmdda_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmdda_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmdda_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmddac_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmddac_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmddac_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmddac_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT immddat_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmddat_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmddat_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmddat_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmddat_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT smmddact_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT dmmddact_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT cmmddact_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT zmmddact_ ( int *, int *, char *, char *, int *, char *, char *, int * ); F_VOID_FCT sasqrtb_ ( float *, float *, float * ); F_VOID_FCT dasqrtb_ ( double *, double *, double * ); F_VOID_FCT sset_ ( int *, char *, char *, int * ); F_VOID_FCT dset_ ( int *, char *, char *, int * ); F_VOID_FCT cset_ ( int *, char *, char *, int * ); F_VOID_FCT zset_ ( int *, char *, char *, int * ); F_VOID_FCT svasum_ ( int *, char *, char *, int * ); F_VOID_FCT dvasum_ ( int *, char *, char *, int * ); F_VOID_FCT scvasum_ ( int *, char *, char *, int * ); F_VOID_FCT dzvasum_ ( int *, char *, char *, int * ); F_VOID_FCT sascal_ ( int *, char *, char *, int * ); F_VOID_FCT dascal_ ( int *, char *, char *, int * ); F_VOID_FCT scshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT dcshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT ccshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT zcshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT srshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT drshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT crshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT zrshft_ ( int *, int *, int *, char *, int * ); F_VOID_FCT svvdot_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT dvvdot_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT cvvdotu_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT cvvdotc_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT zvvdotu_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT zvvdotc_ ( int *, char *, char *, int *, char *, int * ); F_VOID_FCT stzpad_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); F_VOID_FCT dtzpad_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); F_VOID_FCT ctzpad_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); F_VOID_FCT ztzpad_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); F_VOID_FCT stzpadcpy_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); F_VOID_FCT dtzpadcpy_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); F_VOID_FCT ctzpadcpy_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); F_VOID_FCT ztzpadcpy_ ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); F_VOID_FCT stzscal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT dtzscal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT ctzscal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT ztzscal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT chescal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT zhescal_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT ctzcnjg_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT ztzcnjg_ ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); F_VOID_FCT sagemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dagemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT cagemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zagemv_ ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT sasymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT dasymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT casymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zasymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT cahemv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zahemv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT satrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT datrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT catrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zatrmv_ ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT csymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT zsymv_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); F_VOID_FCT csyr_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT zsyr_ ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); F_VOID_FCT csyr2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); F_VOID_FCT zsyr2_ ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); void PB_Ctzsyr ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzher ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzsyr2 ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzher2 ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctztrmv ( PBTYP_T *, char *, char *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzatrmv ( PBTYP_T *, char *, char *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzsymv ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzhemv ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzasymv ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzahemv ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzsyrk ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzherk ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzsyr2k ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzher2k ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctztrmm ( PBTYP_T *, char *, char *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); void PB_Ctzsymm ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_Ctzhemm ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); void PB_CpswapNN ( PBTYP_T *, int, char *, int, int, int *, int, char *, int, int, int *, int ); void PB_CpswapND ( PBTYP_T *, int, char *, int, int, int *, int, char *, int, int, int *, int ); void PB_Cpdot11 ( PBTYP_T *, int, char *, char *, int, int, int *, int, char *, int, int, int *, int, VVDOT_T ); void PB_CpdotNN ( PBTYP_T *, int, char *, char *, int, int, int *, int, char *, int, int, int *, int, VVDOT_T ); void PB_CpdotND ( PBTYP_T *, int, char *, char *, int, int, int *, int, char *, int, int, int *, int, VVDOT_T ); void PB_CpaxpbyNN ( PBTYP_T *, char *, int, int, char *, char *, int, int, int *, char *, char *, char *, int, int, int *, char * ); void PB_CpaxpbyND ( PBTYP_T *, char *, int, int, char *, char *, int, int, int *, char *, char *, char *, int, int, int *, char * ); void PB_CpaxpbyDN ( PBTYP_T *, char *, int, int, char *, char *, int, int, int *, char *, char *, char *, int, int, int *, char * ); void PB_Cpaxpby ( PBTYP_T *, char *, int, int, char *, char *, int, int, int *, char *, char *, char *, int, int, int *, char * ); void PB_Cpsyr ( PBTYP_T *, char *, int, int, char *, char *, int, char *, int, char *, int, int, int *, TZSYR_T ); void PB_Cpsyr2 ( PBTYP_T *, char *, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int, int, int *, TZSYR2_T ); void PB_Cptrm ( PBTYP_T *, PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, char *, int, TZTRM_T ); void PB_Cpsym ( PBTYP_T *, PBTYP_T *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, char *, int, char *, int, char *, int, TZSYM_T ); void PB_Cpgeadd ( PBTYP_T *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_Cptradd ( PBTYP_T *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_Cptran ( PBTYP_T *, char *, int, int, char *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_Cptrsv ( PBTYP_T *, int, char *, char *, char *, int, char *, int, int, int *, char *, int, char *, int ); void PB_Cptrsm ( PBTYP_T *, int, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, char *, int ); void PB_CpgemmAB ( PBTYP_T *, char *, char *, char *, char *, int, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpgemmAC ( PBTYP_T *, char *, char *, char *, char *, int, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpgemmBC ( PBTYP_T *, char *, char *, char *, char *, int, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpsymmAB ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpsymmBC ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpsyrkA ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CpsyrkAC ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_Cpsyr2kA ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_Cpsyr2kAC ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, char *, int, int, int * ); void PB_CptrmmAB ( PBTYP_T *, char *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int * ); void PB_CptrmmB ( PBTYP_T *, char *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int * ); void PB_CptrsmAB ( PBTYP_T *, char *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int * ); void PB_CptrsmAB0 ( PBTYP_T *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char * *, int *, int * ); void PB_CptrsmAB1 ( PBTYP_T *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int *, char *, int * ); void PB_CptrsmB ( PBTYP_T *, char *, char *, char *, char *, char *, int, int, char *, char *, int, int, int *, char *, int, int, int * ); #else F_VOID_FCT immadd_ (); F_VOID_FCT smmadd_ (); F_VOID_FCT dmmadd_ (); F_VOID_FCT cmmadd_ (); F_VOID_FCT zmmadd_ (); F_VOID_FCT smmcadd_ (); F_VOID_FCT dmmcadd_ (); F_VOID_FCT cmmcadd_ (); F_VOID_FCT zmmcadd_ (); F_VOID_FCT immtadd_ (); F_VOID_FCT smmtadd_ (); F_VOID_FCT dmmtadd_ (); F_VOID_FCT cmmtadd_ (); F_VOID_FCT zmmtadd_ (); F_VOID_FCT smmtcadd_ (); F_VOID_FCT dmmtcadd_ (); F_VOID_FCT cmmtcadd_ (); F_VOID_FCT zmmtcadd_ (); F_VOID_FCT immdda_ (); F_VOID_FCT smmdda_ (); F_VOID_FCT dmmdda_ (); F_VOID_FCT cmmdda_ (); F_VOID_FCT zmmdda_ (); F_VOID_FCT smmddac_ (); F_VOID_FCT dmmddac_ (); F_VOID_FCT cmmddac_ (); F_VOID_FCT zmmddac_ (); F_VOID_FCT immddat_ (); F_VOID_FCT smmddat_ (); F_VOID_FCT dmmddat_ (); F_VOID_FCT cmmddat_ (); F_VOID_FCT zmmddat_ (); F_VOID_FCT smmddact_ (); F_VOID_FCT dmmddact_ (); F_VOID_FCT cmmddact_ (); F_VOID_FCT zmmddact_ (); F_VOID_FCT sasqrtb_ (); F_VOID_FCT dasqrtb_ (); F_VOID_FCT sset_ (); F_VOID_FCT dset_ (); F_VOID_FCT cset_ (); F_VOID_FCT zset_ (); F_VOID_FCT svasum_ (); F_VOID_FCT dvasum_ (); F_VOID_FCT scvasum_ (); F_VOID_FCT dzvasum_ (); F_VOID_FCT sascal_ (); F_VOID_FCT dascal_ (); F_VOID_FCT scshft_ (); F_VOID_FCT dcshft_ (); F_VOID_FCT ccshft_ (); F_VOID_FCT zcshft_ (); F_VOID_FCT srshft_ (); F_VOID_FCT drshft_ (); F_VOID_FCT crshft_ (); F_VOID_FCT zrshft_ (); F_VOID_FCT svvdot_ (); F_VOID_FCT dvvdot_ (); F_VOID_FCT cvvdotc_ (); F_VOID_FCT cvvdotu_ (); F_VOID_FCT zvvdotc_ (); F_VOID_FCT zvvdotu_ (); F_VOID_FCT stzpad_ (); F_VOID_FCT dtzpad_ (); F_VOID_FCT ctzpad_ (); F_VOID_FCT ztzpad_ (); F_VOID_FCT stzpadcpy_ (); F_VOID_FCT dtzpadcpy_ (); F_VOID_FCT ctzpadcpy_ (); F_VOID_FCT ztzpadcpy_ (); F_VOID_FCT stzscal_ (); F_VOID_FCT dtzscal_ (); F_VOID_FCT ctzscal_ (); F_VOID_FCT ztzscal_ (); F_VOID_FCT chescal_ (); F_VOID_FCT zhescal_ (); F_VOID_FCT ctzcnjg_ (); F_VOID_FCT ztzcnjg_ (); F_VOID_FCT sagemv_ (); F_VOID_FCT dagemv_ (); F_VOID_FCT cagemv_ (); F_VOID_FCT zagemv_ (); F_VOID_FCT sasymv_ (); F_VOID_FCT dasymv_ (); F_VOID_FCT casymv_ (); F_VOID_FCT zasymv_ (); F_VOID_FCT cahemv_ (); F_VOID_FCT zahemv_ (); F_VOID_FCT satrmv_ (); F_VOID_FCT datrmv_ (); F_VOID_FCT catrmv_ (); F_VOID_FCT zatrmv_ (); F_VOID_FCT csymv_ (); F_VOID_FCT zsymv_ (); F_VOID_FCT csyr_ (); F_VOID_FCT zsyr_ (); F_VOID_FCT csyr2_ (); F_VOID_FCT zsyr2_ (); void PB_Ctzsyr (); void PB_Ctzher (); void PB_Ctzsyr2 (); void PB_Ctzher2 (); void PB_Ctztrmv (); void PB_Ctzatrmv (); void PB_Ctzsymv (); void PB_Ctzhemv (); void PB_Ctzasymv (); void PB_Ctzahemv (); void PB_Ctzsyrk (); void PB_Ctzherk (); void PB_Ctzsyr2k (); void PB_Ctzher2k (); void PB_Ctztrmm (); void PB_Ctzsymm (); void PB_Ctzhemm (); void PB_CpswapNN (); void PB_CpswapND (); void PB_Cpdot11 (); void PB_CpdotNN (); void PB_CpdotND (); void PB_CpaxpbyNN (); void PB_CpaxpbyND (); void PB_CpaxpbyDN (); void PB_Cpaxpby (); void PB_Cpsyr (); void PB_Cpsyr2 (); void PB_Cptrm (); void PB_Cpsym (); void PB_Cpgeadd (); void PB_Cptradd (); void PB_Cptran (); void PB_Cptrsv (); void PB_Cptrsm (); void PB_CpgemmAB (); void PB_CpgemmAC (); void PB_CpgemmBC (); void PB_CpsymmAB (); void PB_CpsymmBC (); void PB_CpsyrkA (); void PB_CpsyrkAC (); void PB_Cpsyr2kA (); void PB_Cpsyr2kAC (); void PB_CptrmmAB (); void PB_CptrmmB (); void PB_CptrsmAB (); void PB_CptrsmAB0 (); void PB_CptrsmAB1 (); void PB_CptrsmB (); #endif /* TOOLS */ #ifdef __STDC__ int PB_Cgcd ( int, int ); int PB_Clcm ( int, int ); void PB_Cdescset ( int *, int, int, int, int, int, int, int, int, int, int ); void PB_Cdescribe ( int, int, int, int, int *, int, int, int, int, int *, int *, int *, int *, int *, int *, int *, int *, int *, int * ); void PB_CargFtoC ( int, int, int *, int *, int *, int * ); int PB_Cfirstnb ( int, int, int, int ); int PB_Clastnb ( int, int, int, int ); int PB_Cspan ( int, int, int, int, int, int ); void PB_Cainfog2l ( int, int, int, int, int *, int, int, int, int, int *, int *, int *, int *, int *, int *, int *, int *, int *, int * ); void PB_Cinfog2l ( int, int, int *, int, int, int, int, int *, int *, int *, int * ); int PB_Cg2lrem ( int, int, int, int, int, int ); int PB_Cindxg2p ( int, int, int, int, int, int ); int PB_Cnumroc ( int, int, int, int, int, int, int ); int PB_Cnpreroc ( int, int, int, int, int, int, int ); int PB_Cnnxtroc ( int, int, int, int, int, int, int ); void PB_Cconjg ( PBTYP_T *, char *, char * ); void PB_Cwarn ( int, int, char *, char *, ... ); void PB_Cabort ( int, char *, int ); void PB_Cchkmat ( int, char *, char *, int, int, int, int, int, int, int *, int, int * ); void PB_Cchkvec ( int, char *, char *, int, int, int, int, int *, int, int, int * ); char * PB_Cmalloc ( int ); char * PB_Cgetbuf ( char *, int ); PBTYP_T * PB_Citypeset ( void ); PBTYP_T * PB_Cstypeset ( void ); PBTYP_T * PB_Cdtypeset ( void ); PBTYP_T * PB_Cctypeset ( void ); PBTYP_T * PB_Cztypeset ( void ); int pilaenv_ ( int *, F_CHAR_T ); char * PB_Ctop ( int *, char *, char *, char * ); void PB_CVMinit ( PB_VM_T *, int, int, int, int, int, int, int, int, int, int, int, int ); int PB_CVMnpq ( PB_VM_T * ); void PB_CVMcontig ( PB_VM_T *, int *, int *, int *, int * ); int PB_CVMloc ( PBTYP_T *, PB_VM_T *, char *, char *, char *, char *, int, int, char *, char *, int, char *, char *, int ); int PB_CVMswp ( PBTYP_T *, PB_VM_T *, char *, char *, char *, int, char *, int, char *, int ); int PB_CVMpack ( PBTYP_T *, PB_VM_T *, char *, char *, char *, char *, int, int, char *, char *, int, char *, char *, int ); void PB_CVMupdate ( PB_VM_T *, int, int *, int * ); void PB_Cbinfo ( int, int, int, int, int, int, int, int, int, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int * ); void PB_Cplaprnt ( PBTYP_T *, int, int, char *, int, int, int *, int, int, char * ); void PB_Cplaprn2 ( PBTYP_T *, int, int, char *, int, int, int *, int, int, char *, int, int ); void PB_Cprnt ( char, int, int, int, char *, int, int, char * ); void PB_Cplapad ( PBTYP_T *, char *, char *, int, int, char *, char *, char *, int, int, int * ); void PB_Cplapd2 ( PBTYP_T *, char *, char *, int, int, char *, char *, char *, int, int, int * ); void PB_Cplascal ( PBTYP_T *, char *, char *, int, int, char *, char *, int, int, int * ); void PB_Cplasca2 ( PBTYP_T *, char *, char *, int, int, char *, char *, int, int, int * ); void PB_Cplacnjg ( PBTYP_T *, int, int, char *, char *, int, int, int * ); void PB_CInV ( PBTYP_T *, char *, char *, int, int, int *, int, char *, int, int, int *, char *, char * *, int *, int * ); void PB_CInV2 ( PBTYP_T *, char *, char *, int, int, int *, int, char *, int, int, int *, char *, char *, int, int * ); void PB_CInOutV ( PBTYP_T *, char *, int, int, int *, int, char *, char *, int, int, int *, char *, char * *, char * *, int *, int *, int *, int * ); void PB_CInOutV2 ( PBTYP_T *, char *, char *, int, int, int, int *, int, char *, int, int, int *, char *, char * *, int *, int *, int *, int * ); void PB_COutV ( PBTYP_T *, char *, char *, int, int, int *, int, char * *, int *, int *, int * ); void PB_CGatherV ( PBTYP_T *, char *, char *, int, int, char *, int, int, int *, char *, char * *, int *, int * ); void PB_CScatterV ( PBTYP_T *, char *, int, int, char *, int, int, int *, char *, char *, char *, int, int, int *, char * ); #else int PB_Cgcd (); int PB_Clcm (); void PB_Cdescset (); void PB_Cdescribe (); void PB_CargFtoC (); int PB_Cfirstnb (); int PB_Clastnb (); int PB_Cspan (); void PB_Cainfog2l (); void PB_Cinfog2l (); int PB_Cg2lrem (); int PB_Cindxg2p (); int PB_Cnumroc (); int PB_Cnpreroc (); int PB_Cnnxtroc (); void PB_Cconjg (); void PB_Cwarn (); void PB_Cabort (); void PB_Cchkmat (); void PB_Cchkvec (); char * PB_Cmalloc (); char * PB_Cgetbuf (); PBTYP_T * PB_Citypeset (); PBTYP_T * PB_Cstypeset (); PBTYP_T * PB_Cdtypeset (); PBTYP_T * PB_Cctypeset (); PBTYP_T * PB_Cztypeset (); int pilaenv_ (); char * PB_Ctop (); void PB_CVMinit (); int PB_CVMnpq (); void PB_CVMcontig (); int PB_CVMloc (); int PB_CVMswp (); int PB_CVMpack (); void PB_CVMupdate (); void PB_Cbinfo (); void PB_Cplaprnt (); void PB_Cplaprn2 (); void PB_Cprnt (); void PB_Cplapad (); void PB_Cplapd2 (); void PB_Cplascal (); void PB_Cplasca2 (); void PB_Cplacnjg (); void PB_CInV (); void PB_CInV2 (); void PB_CInOutV (); void PB_CInOutV2 (); void PB_COutV (); void PB_CGatherV (); void PB_CScatterV (); #endif scalapack-1.8.0/PBLAS/SRC/psatrmv_.c0000640000175000017500000006143010363532303016607 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSATRMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an n by n triangular subma- * trix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'T' or 't' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'C' or 'c' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Yroc, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr, YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper, usiz; float * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSATRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSATRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PSATRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PSATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkvec( ctxt, "PSATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info ); PB_Cchkvec( ctxt, "PSATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PSATRMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = utyp = PB_Cstypeset(); size = usiz = type->size; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Scale YA in the case sub( Y ) has been reused */ if( notran && !( YApbY ) ) { /* * YA resides in (a) process column(s) */ if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Amp > 0 ) sascal_( &Amp, ((char *) tbeta), YA, &ione ); } } else if( !( notran ) && !( YApbY ) ) { /* * YA resides in (a) process row(s) */ if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Anq > 0 ) sascal_( &Anq, ((char *) tbeta), YA, &YAld ); } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, usiz ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sagemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } if( notran ) { PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PSATRMV */ } scalapack-1.8.0/PBLAS/SRC/pddot_.c0000640000175000017500000006670610363532303016240 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pddot_( int * N, double * DOT, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pddot_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; double * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PDDOT forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) DOUBLE PRECISION array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PDDOT", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PDDOT", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PDDOT", info ); return; } #endif DOT[REAL_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cdtypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cdtypeset(); size = type->size; dot = type->Fvvdotu; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Cdgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Cdgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Cdgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Cdgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Cdgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Cdgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Cdgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Cdgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Cdgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Cdgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Cdgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Cdgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Cdgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Cdgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Cdgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Cdgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Cdgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cdtypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cdtypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotu ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cdtypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } /* * End of PDDOT */ } scalapack-1.8.0/PBLAS/SRC/pcsyrk_.c0000640000175000017500000004511310363532303016426 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcsyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pcsyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; float * A, * C; #endif { /* * Purpose * ======= * * PCSYRK performs one of the symmetric rank k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't', K specifies the number of rows of the submatrix * sub( A ). K must be at least zero. * * ALPHA (global input) COMPLEX * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) COMPLEX * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYRK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCSYRK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PCSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PCSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PCSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PCSYRK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) && ( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cctypeset(); /* * And when alpha or K is zero */ if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) || ( *K == 0 ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PCSYRK */ } scalapack-1.8.0/PBLAS/SRC/pctrmv_.c0000640000175000017500000004762710363532303016442 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pctrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pctrmv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * IX, * JA, * JX, * N; /* * .. Array Arguments .. */ int * DESCA, * DESCX; float * A, * X; #endif { /* * Purpose * ======= * * PCTRMV performs one of the matrix-vector operations * * sub( X ) := sub( A )*sub( X ) or sub( X ) := sub( A )'*sub( X ) * * or * * sub( X ) := conjg( sub( A )' )*sub( X ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and, * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * sub( X ) is an n element subvector and sub( A ) is an n by n unit, * or non-unit, upper or lower triangular submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' sub( X ) := sub( A ) * sub( X ). * * TRANS = 'T' or 't' sub( X ) := sub( A )' * sub( X ). * * TRANS = 'C' or 'c' * sub( X ) := conjg( sub( A )' ) * sub( X ). * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input/local output) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. On exit, sub( X ) is overwritten with the transfor- * med subvector. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Xroc, * one, * tbeta, top, *zero; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, YAfr, YAld, YApbY, YAsum, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PCTRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PCTRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 8, &info ); PB_Cchkvec( ctxt, "PCTRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info ); } if( info ) { PB_Cabort( ctxt, "PCTRMV", info ); return; } #endif /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); size = type->size; one = type->one; zero = type->zero; /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process column(s), then zero it. */ if( Xroc == CCOLUMN ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process columns(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { cset_( &Xnp, zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ), &ione ); } } } } else { /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, &Xroc, &XA, XAd, &XAfr ); /* * Reuse sub( X ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( type, ROW, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd, &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * If sub( X ) is distributed in (a) process row(s), then zero it. */ if( Xroc == CROW ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Make sure I own some data and scale sub( X ) */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; cset_( &Xnq, zero, Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xld ); } } } } /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, size ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one, Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cgemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, size ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Ccgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } /* * sub( X ) := YA (if necessary) */ if( YApbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, zero, ((char *) X), Xi, Xj, Xd, &Xroc ); } } if( YAfr ) free( YA ); /* * End of PCTRMV */ } scalapack-1.8.0/PBLAS/SRC/psdot_.c0000640000175000017500000006663710363532303016262 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psdot_( int * N, float * DOT, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void psdot_( N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; float * DOT; /* * .. Array Arguments .. */ int * DESCX, * DESCY; float * X, * Y; #endif { /* * Purpose * ======= * * PSDOT forms the dot product of two subvectors, * * DOT := sub( X )**T * sub( Y ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvectors to be * multiplied. N must be at least zero. * * DOT (local output) REAL array * On exit, DOT specifies the dot product of the two subvectors * sub( X ) and sub( Y ) only in their scope (See below for fur- * ther details). * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char scope, * top; int OneBlock, OneDgrid, RRorCC, Square, Xcol, Xi, Xii, XinbD, Xinb1D, XisD, XisR, XisRow, Xj, Xjj, Xld, Xlinc, XmyprocD, XmyprocR, XnbD, XnpD, XnprocsD, XnprocsR, XprocD, XprocR, Xrow, Ycol, Yi, Yii, YinbD, Yinb1D, YisD, YisR, YisRow, Yj, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yrow, cdst, csrc, ctxt, dst, info, ione=1, mycol, myrow, npcol, nprow, rdst, rsrc, size, src; PBTYP_T * type; VVDOT_T dot; /* * .. Local Arrays .. */ char * buf = NULL; int Xd[DLEN_], Yd[DLEN_], dbuf[ DLEN_ ]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PSDOT", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PSDOT", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PSDOT", info ); return; } #endif DOT[REAL_PART] = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Handle degenerate case */ if( ( *N == 1 ) && ( ( Xd[ M_ ] == 1 ) || ( Yd[ M_ ] == 1 ) ) ) { type = PB_Cstypeset(); PB_Cpdot11( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Determine if sub( X ) is distributed or not */ if( ( XisRow = ( *INCX == Xd[M_] ) ) != 0 ) XisD = ( ( Xd[CSRC_] >= 0 ) && ( ( XnprocsD = npcol ) > 1 ) ); else XisD = ( ( Xd[RSRC_] >= 0 ) && ( ( XnprocsD = nprow ) > 1 ) ); /* * Determine if sub( Y ) is distributed or not */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) YisD = ( ( Yd[CSRC_] >= 0 ) && ( ( YnprocsD = npcol ) > 1 ) ); else YisD = ( ( Yd[RSRC_] >= 0 ) && ( ( YnprocsD = nprow ) > 1 ) ); /* * Are sub( X ) and sub( Y ) both row or column vectors ? */ RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) ); /* * XisD && YisD <=> both vector operands are indeed distributed */ if( XisD && YisD ) { /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); if( XisRow ) { XinbD = Xd[INB_]; XnbD = Xd[NB_]; Xld = Xd[LLD_]; Xlinc = Xld; XprocD = Xcol; XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow; XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xj, XinbD, XnbD ); } else { XinbD = Xd[IMB_]; XnbD = Xd[MB_]; Xld = Xd[LLD_]; Xlinc = 1; XprocD = Xrow; XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol; XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) ); Mfirstnb( Xinb1D, *N, Xi, XinbD, XnbD ); } /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( YisRow ) { YinbD = Yd[INB_]; YnbD = Yd[NB_]; Yld = Yd[LLD_]; Ylinc = Yld; YprocD = Ycol; YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow; YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yj, YinbD, YnbD ); } else { YinbD = Yd[IMB_]; YnbD = Yd[MB_]; Yld = Yd[LLD_]; Ylinc = 1; YprocD = Yrow; YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol; YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) ); Mfirstnb( Yinb1D, *N, Yi, YinbD, YnbD ); } /* * Do sub( X ) and sub( Y ) span more than one process ? */ OneDgrid = ( ( XnprocsD == 1 ) && ( YnprocsD == 1 ) ); OneBlock = ( ( Xinb1D >= *N ) && ( Yinb1D >= *N ) ); /* * Are sub( X ) and sub( Y ) distributed in the same manner ? */ Square = ( ( Xinb1D == Yinb1D ) && ( XnbD == YnbD ) && ( XnprocsD == YnprocsD ) ); if( !( XisR ) ) { /* * sub( X ) is not replicated */ if( YisR ) { /* * If sub( X ) is not replicated, but sub( Y ) is, a process row or column * YprocR need to be selected. It will contain the non-replicated vector used * to perform the dot product computation. */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing YprocR to be equal to XprocR. */ YprocR = XprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for YprocR to maximize the number of links, i.e reduce contention. */ YprocR = MModAdd1( XprocR, XnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for YprocR does not really matter. Select the process origin. */ YprocR = XprocD; } } else { /* * Neither sub( X ) nor sub( Y ) are replicated. If I am not in process row or * column XprocR and not in process row or column YprocR, then quick return. */ if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) ) return; } } else { /* * sub( X ) is distributed and replicated (so no quick return possible) */ if( YisR ) { /* * sub( Y ) is distributed and replicated as well */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR and YprocR to be equal * to zero. */ XprocR = YprocR = 0; } else { /* * Otherwise, communication has to occur, so select YprocR to be zero and the * next process row or column for XprocR in order to maximize the number of * used links, i.e reduce contention. */ YprocR = 0; XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, select the * origin processes. */ XprocR = YprocD; YprocR = XprocD; } } else { /* * sub( Y ) is distributed, but not replicated */ if( RRorCC ) { /* * sub( X ) and sub( Y ) are both row or column vectors */ if( ( OneDgrid || OneBlock || Square ) && ( XprocD == YprocD ) ) { /* * sub( X ) and sub( Y ) start in the same process row or column XprocD=YprocD. * Enforce a purely local operation by choosing XprocR to be equal to YprocR. */ XprocR = YprocR; } else { /* * Otherwise, communication has to occur, so choose the next process row or * column for XprocR to maximize the number of links, i.e reduce contention. */ XprocR = MModAdd1( YprocR, YnprocsR ); } } else { /* * sub( X ) and sub( Y ) are distributed in orthogonal directions, what is * chosen for XprocR does not really matter. Select the origin process. */ XprocR = YprocD; } } } /* * Even if sub( X ) and/or sub( Y ) are replicated, only two process row or * column are active, namely XprocR and YprocR. If any of those operands is * replicated, broadcast will occur (unless there is an easy way out). */ type = PB_Cstypeset(); size = type->size; dot = type->Fvvdotu; /* * A purely operation occurs iff the operands start in the same process and if * either the grid is mono-dimensional or there is a single local block to be * operated with or if both operands are aligned. */ if( ( ( RRorCC && ( XprocD == YprocD ) && ( XprocR == YprocR ) ) || ( !( RRorCC ) && ( XprocD == YprocR ) && ( XprocR == YprocD ) ) ) && ( OneDgrid || OneBlock || ( RRorCC && Square ) ) ) { if( ( !XisR && ( XmyprocR == XprocR ) && !YisR && ( YmyprocR == YprocR ) ) || ( !XisR && YisR && ( YmyprocR == YprocR ) ) || ( !YisR && XisR && ( XmyprocR == XprocR ) ) || ( XisR && YisR ) ) { XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( ( XnpD > 0 ) && ( YnpD > 0 ) ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } } /* * Combine the local results in sub( X )'s scope */ if( ( XisR && YisR ) || ( XmyprocR == XprocR ) ) { scope = ( XisRow ? CROW : CCOLUMN ); top = PB_Ctop( &ctxt, COMBINE, &scope, TOP_GET ); Csgsum2d( ctxt, &scope, top, 1, 1, ((char *) DOT), 1, -1, 0 ); } if( RRorCC && XisR && YisR ) return; } else if( ( RRorCC && OneDgrid ) || OneBlock || Square ) { /* * Otherwise, it may be possible to compute the desired dot-product in a single * message exchange iff the grid is mono-dimensional and the operands are * distributed in the same direction, or there is just one block to be exchanged * or if both operands are similarly distributed in their respective direction. */ if( ( YmyprocR == YprocR ) ) { /* * The processes owning a piece of sub( Y ) send it to the corresponding * process owning s piece of sub ( X ). */ YnpD = PB_Cnumroc( *N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD ); if( YnpD > 0 ) { dst = XprocD + MModSub( YmyprocD, YprocD, YnprocsD ); dst = MPosMod( dst, XnprocsD ); if( XisRow ) { rdst = XprocR; cdst = dst; } else { rdst = dst; cdst = XprocR; } if( ( myrow == rdst ) && ( mycol == cdst ) ) { dot( &YnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), &Ylinc ); } else { if( YisRow ) Csgesd2d( ctxt, 1, YnpD, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); else Csgesd2d( ctxt, YnpD, 1, Mptr( ((char *) Y), Yii, Yjj, Yld, size ), Yld, rdst, cdst ); } } } if( XmyprocR == XprocR ) { /* * The processes owning a piece of sub( X ) receive the corresponding local * piece of sub( Y ), compute the local dot product and combine the results * within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XnpD > 0 ) { src = YprocD + MModSub( XmyprocD, XprocD, XnprocsD ); src = MPosMod( src, YnprocsD ); if( YisRow ) { rsrc = YprocR; csrc = src; } else { rsrc = src; csrc = YprocR; } if( ( myrow != rsrc ) || ( mycol != csrc ) ) { buf = PB_Cmalloc( XnpD * size ); if( YisRow ) Csgerv2d( ctxt, 1, XnpD, buf, 1, rsrc, csrc ); else Csgerv2d( ctxt, XnpD, 1, buf, XnpD, rsrc, csrc ); dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } else { /* * General case, copy sub( Y ) within sub( X )'s scope, compute the local * results and combine them within sub( X )'s scope. */ XnpD = PB_Cnumroc( *N, 0, Xinb1D, XnbD, XmyprocD, XprocD, XnprocsD ); if( XisRow ) { PB_Cdescset( dbuf, 1, *N, 1, Xinb1D, 1, XnbD, XprocR, XprocD, ctxt, 1 ); } else { PB_Cdescset( dbuf, *N, 1, Xinb1D, 1, XnbD, 1, XprocD, XprocR, ctxt, MAX( 1, XnpD ) ); } if( ( XmyprocR == XprocR ) && ( XnpD > 0 ) ) buf = PB_Cmalloc( XnpD * size ); if( YisRow ) { PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, ((char *) Y), Yi, Yj, Yd, ROW, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, type->one, ((char *) Y), Yi, Yj, Yd, COLUMN, type->zero, buf, 0, 0, dbuf, ( XisRow ? ROW : COLUMN ) ); } if( XmyprocR == XprocR ) { if( XnpD > 0 ) { dot( &XnpD, ((char *) DOT), Mptr( ((char *) X), Xii, Xjj, Xld, size ), &Xlinc, buf, &ione ); if( buf ) free( buf ); } if( XisRow ) { top = PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } else { top = PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, -1, 0 ); } } } /* * Send the DOT product result within sub( Y )'s scope */ if( XisR || YisR ) { /* * Either sub( X ) or sub( Y ) are replicated, so that every process should have * the result -> broadcast it orthogonally from sub( X )'s direction. */ if( XisRow ) { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( XmyprocR == XprocR ) Csgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Csgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, XmyprocD ); } else { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( XmyprocR == XprocR ) Csgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Csgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, XmyprocD, XprocR ); } } else { /* * Neither sub( X ) nor sub( Y ) are replicated */ if( RRorCC ) { /* * Both sub( X ) are distributed in the same direction -> the process row or * column XprocR sends the result to the process row or column YprocR. */ if( XprocR != YprocR ) { if( XmyprocR == XprocR ) { if( XisRow ) Csgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YprocR, YmyprocD ); else Csgesd2d( ctxt, 1, 1, ((char *) DOT), 1, YmyprocD, YprocR ); } else if( YmyprocR == YprocR ) { if( XisRow ) Csgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XprocR, XmyprocD ); else Csgerv2d( ctxt, 1, 1, ((char *) DOT), 1, XmyprocD, XprocR ); } } } else { /* * Otherwise, the process at the intersection of sub( X )'s and sub( Y )'s * scope, broadcast the result within sub( Y )'s scope. */ if( YmyprocR == YprocR ) { if( YisRow ) { top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( YmyprocD == XprocR ) Csgebs2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1 ); else Csgebr2d( ctxt, ROW, top, 1, 1, ((char*)DOT), 1, YprocR, XprocR ); } else { top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( YmyprocD == XprocR ) Csgebs2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1 ); else Csgebr2d( ctxt, COLUMN, top, 1, 1, ((char*)DOT), 1, XprocR, YprocR ); } } } } } else if( !( XisD ) && YisD ) { /* * sub( X ) is not distributed and sub( Y ) is distributed. */ type = PB_Cstypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } else if( XisD && !( YisD ) ) { /* * sub( X ) is distributed and sub( Y ) is not distributed. */ type = PB_Cstypeset(); PB_CpdotND( type, *N, ((char *) DOT), ((char *) Y), Yi, Yj, Yd, *INCY, ((char *) X), Xi, Xj, Xd, *INCX, type->Fvvdotu ); } else { /* * Neither sub( X ) nor sub( Y ) are distributed */ type = PB_Cstypeset(); PB_CpdotNN( type, *N, ((char *) DOT), ((char *) X), Xi, Xj, Xd, *INCX, ((char *) Y), Yi, Yj, Yd, *INCY, type->Fvvdotu ); } /* * End of PSDOT */ } scalapack-1.8.0/PBLAS/SRC/pzsymm_.c0000640000175000017500000005326710363532303016463 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzsymm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pzsymm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; double * A, * B, * C; #endif { /* * Purpose * ======= * * PZSYMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the symmetric submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) COMPLEX*16 array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) COMPLEX*16 * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) COMPLEX*16 array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZSYMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PZSYMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PZSYMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PZSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PZSYMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PZSYMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) && ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) { if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else if( !( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PZSYMM */ } scalapack-1.8.0/PBLAS/SRC/pssymm_.c0000640000175000017500000005272710363532303016454 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssymm_( F_CHAR_T SIDE, F_CHAR_T UPLO, int * M, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void pssymm_( SIDE, UPLO, M, N, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T SIDE, UPLO; int * IA, * IB, * IC, * JA, * JB, * JC, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PSSYMM performs one of the matrix-matrix operations * * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * or * * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), * * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L', * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and, * * sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Alpha and beta are scalars, sub( A ) is a symmetric submatrix and * sub( B ) and sub( C ) are m by n submatrices. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * On entry, SIDE specifies whether the symmetric submatrix * sub( A ) appears on the left or right in the operation as * follows: * * SIDE = 'L' or 'l' * sub( C ) := alpha*sub( A )*sub( B ) + beta*sub( C ), * * SIDE = 'R' or 'r' * sub( C ) := alpha*sub( B )*sub( A ) + beta*sub( C ). * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * sub( C ). M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * sub( C ). N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at * at least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with SIDE = 'L' or 'l', this array contains * the local entries corresponding to the entries of the m by m * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * Before entry with SIDE = 'R' or 'r', this array contains * the local entries corresponding to the entries of the n by n * symmetric submatrix sub( A ), such that when UPLO = 'U' or * 'u', this array contains the local entries of the upper tri- * angular part of the symmetric submatrix sub( A ), and the * local entries of the strictly lower triangular of sub( A ) * are not referenced, and when UPLO = 'L' or 'l', this array * contains the local entries of the lower triangular part of * the symmetric submatrix sub( A ), and the local entries of * the strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) REAL array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ). Before entry, this array contains * the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, SideOp, UploA, cbtop, cbtopsave, cctop, cctopsave, rbtop, rbtopsave, rctop, rctopsave; int Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info, lside, mycol, myrow, nb, npcol, nprow, upper; double ABCest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ lside = ( ( SideOp = Mupcase( F2C_CHAR( SIDE )[0] ) ) == CLEFT ); upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !lside ) && ( SideOp != CRIGHT ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYMM", "Illegal SIDE = %c\n", SideOp ); info = -1; } else if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYMM", "Illegal UPLO = %c\n", UploA ); info = -2; } if( lside ) { PB_Cchkmat( ctxt, "PSSYMM", "A", *M, 3, *M, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } else { PB_Cchkmat( ctxt, "PSSYMM", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PSSYMM", "B", *M, 3, *N, 4, Bi, Bj, Bd, 13, &info ); } PB_Cchkmat( ctxt, "PSSYMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 18, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * If alpha is zero, sub( C ) := beta * sub( C ). */ if( ALPHA[REAL_PART] == ZERO ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( BETA[REAL_PART] == ONE ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (N >> M) * BCest : Both operands sub( B ) and sub( C ) are communicated (M >> N) */ if( lside ) { tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ABCest = (double)(*M) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest = (double)(*N) * ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp3 ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp1 = DNROC( *N, Ad[NB_], npcol ); tmp2 = DNROC( *M, Bd[MB_], nprow ); ABCest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp1 / TWO ) + ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp2 + tmp2 * CBRATIO ) ); tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *N, Bd[NB_], npcol ); tmp4 = DNROC( *N, Cd[NB_], npcol ); BCest = (double)(*M) * ( ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp4 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseABC = ( ( 1.5 * ABCest ) <= BCest ); /* * BLACS topologies are enforced iff M and N are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) ); rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ChooseABC ) { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } } } if( lside ) DirAB = ( rbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); else DirAB = ( cbtop == CTOP_DRING ? CBACKWARD : CFORWARD ); PB_CpsymmAB( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( ForceTop ) { rbtopsave = rbtop; rctopsave = rctop; cbtopsave = cbtop; cctopsave = cctop; if( lside ) { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( rbtop != CTOP_DRING ) && ( rbtop != CTOP_IRING ) && ( rbtop != CTOP_SRING ) ) || ( rbtop != rctop ) ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_IRING ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } else { /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( ( cbtop != CTOP_DRING ) && ( cbtop != CTOP_IRING ) && ( cbtop != CTOP_SRING ) ) || ( cbtop != cctop ) ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_IRING ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING ); /* * Remove the next 2 lines when the BLACS combine operations support ring * topologies */ rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT ); } rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_DEFAULT ); rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_DEFAULT ); } } if( lside ) DirAB = ( ( rbtop == CTOP_DRING || rctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); else DirAB = ( ( cbtop == CTOP_DRING || cctop == CTOP_DRING ) ? CBACKWARD : CFORWARD ); PB_CpsymmBC( type, &DirAB, NOCONJG, &SideOp, &UploA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rbtopsave = *PB_Ctop( &ctxt, BCAST, ROW, &rbtopsave ); rctopsave = *PB_Ctop( &ctxt, COMBINE, ROW, &rctopsave ); cbtopsave = *PB_Ctop( &ctxt, BCAST, COLUMN, &cbtopsave ); cctopsave = *PB_Ctop( &ctxt, COMBINE, COLUMN, &cctopsave ); } /* * End of PSSYMM */ } scalapack-1.8.0/PBLAS/SRC/pcahemv_.c0000640000175000017500000005623510363532303016545 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pcahemv_( F_CHAR_T UPLO, int * N, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * BETA, float * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pcahemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PCAHEMV performs the matrix-vector operation * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a n element real subvec- * tor, sub( X ) is an n element subvector and sub( A ) is an n by n * Hermitian submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the Hermitian submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * Hermitian submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * Hermitian submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * Hermitian submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * Note that the imaginary parts of the local entries corres- * ponding to the diagonal elements of sub( A ) need not be * set and assumed to be zero. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper, usiz; float * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PCAHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PCAHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PCAHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PCAHEMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cctypeset(); size = type->size; utyp = PB_Cstypeset(); usiz = type->usiz; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( utyp, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) sascal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) sascal_( &Amp, ((char *) tbeta), YC, &ione ); } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { cagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); cagemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { cagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, usiz ), &ione ); cagemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || Yrow < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; sascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Csgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Csgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || Ycol < 0 ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { sascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PCAHEMV */ } scalapack-1.8.0/PBLAS/SRC/pzatrmv_.c0000640000175000017500000006176010363532303016624 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T DIAG, TRANS, UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PZATRMV performs one of the matrix-vector operations * * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + * abs( beta*sub( Y ) ), * or * * sub( Y ) := abs( alpha )*abs( conjg( sub( A )' ) )*abs( sub( X ) ) * + abs( beta*sub( Y ) ), * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, * and, * * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. * * Alpha and beta are real scalars, sub( Y ) is a real subvector, * sub( X ) is a subvector and sub( A ) is an n by n triangular subma- * trix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the submatrix sub( A ) is * an upper or lower triangular submatrix as follows: * * UPLO = 'U' or 'u' sub( A ) is an upper triangular * submatrix, * * UPLO = 'L' or 'l' sub( A ) is a lower triangular * submatrix. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'T' or 't' * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + * |beta*sub( Y )|. * * TRANS = 'C' or 'c' * sub( Y ) := |alpha|*|conjg( sub( A )' )|*|sub( X )| + * |beta*sub( Y )|. * * DIAG (global input) CHARACTER*1 * On entry, DIAG specifies whether or not sub( A ) is unit * triangular as follows: * * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- * gular, * * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- * angular. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) COMPLEX*16 array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains the * local entries corresponding to the entries of the upper tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly lower triangular part of * the submatrix sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains the * local entries corresponding to the entries of the lower tri- * angular submatrix sub( A ), and the local entries correspon- * ding to the entries of the strictly upper triangular part of * the submatrix sub( A ) are not referenced. * Note that when DIAG = 'U' or 'u', the local entries corres- * ponding to the diagonal elements of the submatrix sub( A ) * are not referenced either, but are assumed to be unity. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- * try, this array contains the local entries of the matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- * try, this array contains the local entries of the matrix Y. * On exit, sub( Y ) is overwritten by the updated subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DiagA, TranOp, UploA, Yroc, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr, YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran, nounit, npcol, nprow, size, upper, usiz; double * tbeta; PBTYP_T * type, * utyp; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XA = NULL, * YA = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal UPLO = %c\n", UploA ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal TRANS = %c\n", TranOp ); info = -2; } else if( ( !nounit ) && ( DiagA != CUNIT ) ) { PB_Cwarn( ctxt, __LINE__, "PZATRMV", "Illegal DIAG = %c\n", DiagA ); info = -3; } PB_Cchkmat( ctxt, "PZATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); PB_Cchkvec( ctxt, "PZATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info ); PB_Cchkvec( ctxt, "PZATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PZATRMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cztypeset(); size = type->size; utyp = PB_Cdtypeset(); usiz = type->usiz; /* * and when alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); if( notran ) { /* * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) */ PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process rows spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } else { /* * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) */ PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, &YAsum, &YApbY ); /* * Replicate sub( X ) in process columns spanned by sub( A ) -> XA */ PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XAld = XAd[LLD_]; YAld = YAd[LLD_]; /* * Scale YA in the case sub( Y ) has been reused */ if( notran && !( YApbY ) ) { /* * YA resides in (a) process column(s) */ if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Amp > 0 ) dascal_( &Amp, ((char *) tbeta), YA, &ione ); } } else if( !( notran ) && !( YApbY ) ) { /* * YA resides in (a) process row(s) */ if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YA */ if( Anq > 0 ) dascal_( &Anq, ((char *) tbeta), YA, &YAld ); } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, YA, &ione ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { zagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); } } } else { if( notran ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld, Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, 0, Akq, XAld, size ), &XAld, one, Mptr( YA, Akp, 0, YAld, usiz ), &ione ); } } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld, Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { zagemv_( TRANS, &Amp0, &Anq0, one, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XA, Akp, 0, XAld, size ), &ione, one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); } } } } } if( XAfr ) free( XA ); if( notran ) { /* * Combine the partial column results into YA */ if( YAsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, YAd[CSRC_] ); } } else { /* * Combine the partial row results into YA */ if( YAsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], mycol ); } } /* * sub( Y ) := beta * sub( Y ) + YA (if necessary) */ if( YApbY ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, usiz ), &Yld ); } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], usiz ), INCY ); } } } if( notran ) { PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } else { PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, ((char *) Y), Yi, Yj, Yd, &Yroc ); } } if( YAfr ) free( YA ); /* * End of PZATRMV */ } scalapack-1.8.0/PBLAS/SRC/pdamax_.c0000640000175000017500000004527210363532303016373 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdamax_( int * N, double * AMAX, int * INDX, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdamax_( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * INDX, * IX, * JX, * N; double * AMAX; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDAMAX computes the global index of the maximum element in absolute * value of a subvector sub( X ). The global index is returned in INDX * and the value of that element is returned in AMAX, * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * AMAX (global output) DOUBLE PRECISION array * On exit, AMAX specifies the largest entry in absolute value * of the subvector sub( X ) only in its scope (See below for * further details). * * INDX (global output) INTEGER * On exit, INDX specifies the global index of the maximum ele- * ment in absolute value of the subvector sub( X ) only in its * scope (See below for further details). * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char cbtop, cctop, rbtop, rctop; int Xcol, Xgindx, Xi, Xii, Ximb, Xinb, Xj, Xjj, Xlindx, Xld, Xmb, Xnb, Xnp, Xnq, Xrow, Xsrc, ctxt, dist, dst, idumm, info, k, maxpos, mycol, mydist, myrow, npcol, nprow, src; /* * .. Local Arrays .. */ int Xd[DLEN_]; double work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDAMAX", "X", *N, 1, Xi, Xj, Xd, *INCX, 7, &info ); if( info ) { PB_Cabort( ctxt, "PDAMAX", info ); return; } #endif /* * Initialize INDX and AMAX */ *INDX = 0; *AMAX = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *INCX == 1 ) && ( Xd[M_] == 1 ) && ( *N == 1 ) ) { /* * Make sure I own some data and compute INDX and AMAX */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { *INDX = *JX; *AMAX = X[Xii+Xjj*Xd[LLD_]]; } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { rctop = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); if( ( rctop == CTOP_DEFAULT ) || ( rctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { Xld = Xd[LLD_]; Xlindx = Xjj - 1 + idamax_( &Xnq, ((char*)(X+(Xii+Xjj*Xld))), &Xld ); Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); work[0] = X[Xii+Xlindx*Xld]; work[1] = ((double)( Xgindx+1 )); } else { work[0] = ZERO; work[1] = ZERO; } /* * Combine the local results using a 1-tree topology within process column 0 * if npcol > 1 or Xcol >= 0, i.e sub( X ) is distributed. */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Cdgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2, myrow, src ); if( ABS( work[0] ) < ABS( work[2] ) ) { work[0] = work[2]; work[1] = work[3]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of INDX and AMAX within * their process row. */ rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Cdgebs2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, ROW, &rbtop, 2, 1, ((char*)work), 2, myrow, 0 ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ *AMAX = work[0]; *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : ( (int)(work[1]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Xinb = Xd[INB_ ]; Xnb = Xd[NB_ ]; Xsrc = Xd[CSRC_]; Xnq = PB_Cnumroc( *N, Xj, Xinb, Xnb, mycol, Xsrc, npcol ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnq > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; Xlindx = Xjj - 1 + idamax_( &Xnq, ((char*)(X+(Xii+Xjj*Xld))), &Xld ); *AMAX = X[Xii+Xlindx*Xld]; } else { *AMAX = ZERO; } if( Xcol >= 0 ) { /* * Combine leave on all the local maximum if Xcol >= 0, i.e sub( X ) is * distributed */ Cdgamx2d( ctxt, ROW, &rctop, 1, 1, ((char*)AMAX), 1, &idumm, &maxpos, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( *AMAX != ZERO ) { rbtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == maxpos ) { Mindxl2g( Xgindx, Xlindx, Xinb, Xnb, mycol, Xsrc, npcol ); *INDX = Xgindx + 1; Cigebs2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, ROW, &rbtop, 1, 1, ((char*)INDX), 1, myrow, maxpos ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *JX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( *AMAX == ZERO ) ? ( *JX ) : Xlindx + 1 ); } } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { cctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); if( ( cctop == CTOP_DEFAULT ) || ( cctop == CTOP_TREE1 ) ) { /* * Inline the 1-tree combine for communication savings */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { Xld = Xd[LLD_]; Xlindx = Xii - 1 + idamax_( &Xnp, ((char*)(X+(Xii+Xjj*Xld))), INCX ); Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); work[0] = X[Xlindx+Xjj*Xld]; work[1] = ((double)( Xgindx+1 )); } else { work[0] = ZERO; work[1] = ZERO; } /* * Combine the local results using a 1-tree topology within process row 0 * if nprow > 1 or Xrow >= 0, i.e sub( X ) is distributed. */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Cdgerv2d( ctxt, 2, 1, ((char*) &work[2]), 2, src, mycol ); if( ABS( work[0] ) < ABS( work[2] ) ) { work[0] = work[2]; work[1] = work[3]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process row 0 broadcasts the combined values of INDX and AMAX within their * process column. */ cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Cdgebs2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, COLUMN, &cbtop, 2, 1, ((char*)work), 2, 0, mycol ); } } /* * Set INDX and AMAX to the replicated answers contained in work. If AMAX is * zero, then select a coherent INDX. */ *AMAX = work[0]; *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : ( (int)(work[1]) ) ); } else { /* * Otherwise use the current topology settings to combine the results */ Ximb = Xd[IMB_ ]; Xmb = Xd[MB_ ]; Xsrc = Xd[RSRC_]; Xnp = PB_Cnumroc( *N, Xi, Ximb, Xmb, myrow, Xsrc, nprow ); /* * Make sure I own some data and compute local INDX and AMAX */ if( Xnp > 0 ) { /* * Compute the local maximum and its corresponding local index */ Xld = Xd[LLD_]; Xlindx = Xii - 1 + idamax_( &Xnp, ((char*)(X+(Xii+Xjj*Xld))), INCX ); *AMAX = X[Xlindx+Xjj*Xld]; } else { *AMAX = ZERO; } if( Xrow >= 0 ) { /* * Combine leave on all the local maximum if Xrow >= 0, i.e sub( X ) is * distributed. */ Cdgamx2d( ctxt, COLUMN, &cctop, 1, 1, ((char*)AMAX), 1, &maxpos, &idumm, 1, -1, mycol ); /* * Broadcast the corresponding global index */ if( *AMAX != ZERO ) { cbtop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == maxpos ) { Mindxl2g( Xgindx, Xlindx, Ximb, Xmb, myrow, Xsrc, nprow ); *INDX = Xgindx + 1; Cigebs2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1 ); } else { Cigebr2d( ctxt, COLUMN, &cbtop, 1, 1, ((char*)INDX), 1, maxpos, mycol ); } } else { /* * If AMAX is zero, then select a coherent INDX. */ *INDX = *IX; } } else { /* * sub( X ) is not distributed. If AMAX is zero, then select a coherent INDX. */ *INDX = ( ( *AMAX == ZERO ) ? ( *IX ) : Xlindx + 1 ); } } } return; } /* * End of PDAMAX */ } scalapack-1.8.0/PBLAS/SRC/pdsyrk_.c0000640000175000017500000004507710363532303016440 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * BETA, double * C, int * IC, int * JC, int * DESCC ) #else void pdsyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANS, UPLO; int * IA, * IC, * JA, * JC, * K, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCC; double * A, * C; #endif { /* * Purpose * ======= * * PDSYRK performs one of the symmetric rank k operations * * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * or * * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and, * * sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N', * A(IA:IA+K-1,JA:JA+N-1) otherwise. * * Alpha and beta are scalars, sub( C ) is an n by n symmetric * submatrix and sub( A ) is an n by k submatrix in the first case and a * k by n submatrix in the second case. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array C containing the upper or lower triangular part * of the symmetric submatrix sub( C ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( C ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( C ) are to be * referenced. * * TRANS (global input) CHARACTER*1 * On entry, TRANS specifies the operation to be performed as * follows: * * TRANS = 'N' or 'n' * sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ), * * TRANS = 'T' or 't' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * TRANS = 'C' or 'c' * sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ). * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( C ). * N must be at least zero. * * K (global input) INTEGER * On entry, with TRANS = 'N' or 'n', K specifies the number of * columns of the submatrix sub( A ), and with TRANS = 'T' or * 't' or 'C' or 'c', K specifies the number of rows of the sub- * matrix sub( A ). K must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array A * corresponding to the entries of the submatrix sub( A ) need * not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at * least Lc( 1, JA+N-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * Before entry with TRANS = 'N' or 'n', this array contains the * local entries corresponding to the entries of the n by k sub- * matrix sub( A ), otherwise the local entries corresponding to * the entries of the k by n submatrix sub( A ). * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) DOUBLE PRECISION array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly lower triangular of sub( C ) * are not referenced. On exit, the upper triangular part of * sub( C ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( C ), and the local entries * corresponding to the strictly upper triangular of sub( C ) * are not referenced. On exit, the lower triangular part of * sub( C ) is overwritten by the lower triangular part of the * updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, notran, npcol, nprow, upper; double Aest, ACest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploC != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYRK", "Illegal UPLO = %c\n", UploC ); info = -1; } else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PDSYRK", "Illegal TRANS = %c\n", TranOp ); info = -2; } if( notran ) PB_Cchkmat( ctxt, "PDSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9, &info ); else PB_Cchkmat( ctxt, "PDSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9, &info ); PB_Cchkmat( ctxt, "PDSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYRK", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cdtypeset(); /* * And when alpha or K is zero */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero, ((char *) C), Ci, Cj, Cd ); } else { PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA), ((char *) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ACest: both operands sub( A ) and sub( C ) are communicated (K >> N) * Aest : only sub( A ) is communicated (N >> K) */ if( notran ) { tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest = (double)(*N) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) + ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Ad[MB_], nprow ); Aest = (double)(*K) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) ); } else { tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest = (double)(*N) * ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) ); tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *N, Ad[NB_], npcol ); Aest = (double)(*K) * ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) + ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) ); } /* * Shift a little the cross-over point between both algorithms. */ ChooseAC = ( ( 1.3 * ACest ) <= Aest ); /* * BLACS topologies are enforced iff N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *N > nb ) && ( *K > nb ) ); if( ChooseAC ) { if( notran ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CCOMBINE; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CCOMBINE; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; } else { TopR = CTOP_DRING; TopC = CTOP_IRING; } rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC ); /* * Remove the next line when the BLACS combine operations support ring * topologies */ ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { if( notran ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); if( ForceTop ) { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); } else { OpC = CBCAST; ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { OpR = CBCAST; rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING ); } DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); } PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PDSYRK */ } scalapack-1.8.0/PBLAS/SRC/pdsymv_.c0000640000175000017500000005527510363532303016447 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdsymv_( F_CHAR_T UPLO, int * N, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * BETA, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pdsymv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; double * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; double * A, * X, * Y; #endif { /* * Purpose * ======= * * PDSYMV performs the matrix-vector operation * * sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ), * * where * * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha and beta are scalars, sub( X ) and sub( Y ) are n element sub- * vectors and sub( A ) is an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) DOUBLE PRECISION * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A * and X corresponding to the entries of the submatrix sub( A ) * and the subvector sub( X ) need not be set on input. * * A (local input) DOUBLE PRECISION array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries of the upper triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly lower triangular of sub( A ) are not referenced. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries of the lower triangular part of the * symmetric submatrix sub( A ), and the local entries of the * strictly upper triangular of sub( A ) are not referenced. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * X (local input) DOUBLE PRECISION array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * BETA (global input) DOUBLE PRECISION * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array Y * corresponding to the entries of the subvector sub( Y ) need * not be set on input. * * Y (local input/local output) DOUBLE PRECISION array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten by the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA, * one, top; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; double * tbeta; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkmat( ctxt, "PDSYMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); PB_Cchkvec( ctxt, "PDSYMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); PB_Cchkvec( ctxt, "PDSYMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PDSYMV", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cdtypeset(); /* * When alpha is zero */ if( ALPHA[REAL_PART] == ZERO ) { /* * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol */ PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, &Ycol ); if( *INCY == Yd[M_] ) { /* * sub( Y ) resides in (a) process row(s) */ if( ( myrow == Yrow ) || ( Yrow < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], npcol ); if( Ynq > 0 ) { Yld = Yd[LLD_]; if( BETA[REAL_PART] == ZERO ) { dset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } else { dscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yld, type->size ), &Yld ); } } } } else { /* * sub( Y ) resides in (a) process column(s) */ if( ( mycol == Ycol ) || ( Ycol < 0 ) ) { /* * Make sure I own some data and scale sub( Y ) */ Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], nprow ); if( Ynp > 0 ) { if( BETA[REAL_PART] == ZERO ) { dset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } else { dscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, Yjj, Yd[LLD_], type->size ), INCY ); } } } } return; } /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process * columns spanned by sub( A ) */ if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) { PB_CInOutV( type, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, &YRsum, &YRpbY ); PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); } else { PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, &YCsum, &YCpbY ); PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); } /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, ROW, &XC, XCd, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, COLUMN, &XR, XRd, &XRfr ); } one = type->one; /* * Local matrix-vector multiply iff I own some data */ Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; /* * Scale YR or YC in the case sub( Y ) has been reused */ if( YisRow ) { /* * YR resides in (a) process row(s) */ if( !YRpbY ) { if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) { /* * Make sure I own some data and scale YR */ if( Anq > 0 ) { if( tbeta[REAL_PART] == ZERO ) { dset_( &Anq, ((char *) tbeta), YR, &YRld ); } else { dscal_( &Anq, ((char *) tbeta), YR, &YRld ); } } } } } else { /* * YC resides in (a) process column(s) */ if( !YCpbY ) { if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) { /* * Make sure I own some data and scale YC */ if( Amp > 0 ) { if( tbeta[REAL_PART] == ZERO ) { dset_( &Amp, ((char *) tbeta), YC, &ione ); } else { dscal_( &Amp, ((char *) tbeta), YC, &ione ); } } } } } /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { dgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, YC, &ione ); dgemv_( C2F_CHAR( TRAN ), &Akp, &Anq0, ((char *)ALPHA), Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzsymv ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, PB_Ctzsymv ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { dgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, size ), &ione ); dgemv_( C2F_CHAR( TRAN ), &Amp0, &Anq0, ((char *) ALPHA), Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, size ), &YRld ); } } } } if( XCfr ) free( XC ); if( XRfr ) free( XR ); if( YisRow ) { /* * Combine the partial column results into YC */ if( YCsum ) { YCd[CSRC_] = 0; if( Amp > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); } } /* * Combine the partial row results into YR */ if( YRsum && ( Anq > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], mycol ); } /* * YR := YR + YC */ PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, YR, 0, 0, YRd, ROW ); /* * sub( Y ) := beta * sub( Y ) + YR (if necessary) */ if( YRpbY ) { PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW ); } } else { /* * Combine the partial row results into YR */ if( YRsum ) { YRd[RSRC_] = 0; if( Anq > 0 ) { top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, mycol ); } } /* * Combine the partial column results into YC */ if( YCsum && ( Amp > 0 ) ) { top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, YCd[CSRC_] ); } /* * YC := YR + YC */ PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, YC, 0, 0, YCd, COLUMN ); /* * sub( Y ) := beta * sub( Y ) + YC (if necessary) */ if( YCpbY ) { PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd, COLUMN ); } } if( YCfr ) free( YC ); if( YRfr ) free( YR ); /* * End of PDSYMV */ } scalapack-1.8.0/PBLAS/SRC/pssyr2_.c0000640000175000017500000004240610363532303016357 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pssyr2_( F_CHAR_T UPLO, int * N, float * ALPHA, float * X, int * IX, int * JX, int * DESCX, int * INCX, float * Y, int * IY, int * JY, int * DESCY, int * INCY, float * A, int * IA, int * JA, int * DESCA ) #else void pssyr2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY, A, IA, JA, DESCA ) /* * .. Scalar Arguments .. */ F_CHAR_T UPLO; int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, * N; float * ALPHA; /* * .. Array Arguments .. */ int * DESCA, * DESCX, * DESCY; float * A, * X, * Y; #endif { /* * Purpose * ======= * * PSSYR2 performs the symmetric rank 2 operation * * sub( A ) := alpha*sub( X )*sub( Y )' + * alpha*sub( Y )*sub( X )' + sub( A ) , * * where * * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and * sub( A ) is an n by n symmetric submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * On entry, UPLO specifies whether the local pieces of * the array A containing the upper or lower triangular part * of the symmetric submatrix sub( A ) are to be referenced as * follows: * * UPLO = 'U' or 'u' Only the local pieces corresponding to * the upper triangular part of the * symmetric submatrix sub( A ) are to be * referenced, * * UPLO = 'L' or 'l' Only the local pieces corresponding to * the lower triangular part of the * symmetric submatrix sub( A ) are to be * referenced. * * N (global input) INTEGER * On entry, N specifies the order of the submatrix sub( A ). * N must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays X * and Y corresponding to the entries of the subvectors sub( X ) * and sub( Y ) respectively need not be set on input. * * X (local input) REAL array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input) REAL array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * A (local input/local output) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+N-1 ). Before entry, this array contains * the local entries of the matrix A. * Before entry with UPLO = 'U' or 'u', this array contains * the local entries corresponding to the upper triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly lower triangular of sub( A ) * are not referenced. On exit, the upper triangular part of * sub( A ) is overwritten by the upper triangular part of the * updated submatrix. * Before entry with UPLO = 'L' or 'l', this array contains * the local entries corresponding to the lower triangular part * of the symmetric submatrix sub( A ), and the local entries * corresponding to the strictly upper triangular of sub( A ) * are not referenced. On exit, the lower triangular part of * sub( A ) is overwritten by the lower triangular part of the * updated submatrix. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char UploA; int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_], YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_]; char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, * YR = NULL; /* .. * .. Executable Statements .. * */ upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) { if( ( !upper ) && ( UploA != CLOWER ) ) { PB_Cwarn( ctxt, __LINE__, "PSSYR2", "Illegal UPLO = %c\n", UploA ); info = -1; } PB_Cchkvec( ctxt, "PSSYR2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info ); PB_Cchkvec( ctxt, "PSSYR2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); PB_Cchkmat( ctxt, "PSSYR2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info ); } if( info ) { PB_Cabort( ctxt, "PSSYR2", info ); return; } #endif /* * Quick return if possible */ if( (*N == 0) || ( ALPHA[REAL_PART] == ZERO ) ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Get type structure */ type = PB_Cstypeset(); /* * Compute descriptor Ad0 for sub( A ) */ PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); /* * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by * sub( A ) */ if( *INCX == Xd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, ROW, &XR, XRd0, &XRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd0, ROW, &XC, XCd0, &XCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, COLUMN, &XC, XCd0, &XCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd0, COLUMN, &XR, XRd0, &XRfr ); } /* * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by * sub( A ) */ if( *INCY == Yd[M_] ) { PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, ROW, &YR, YRd0, &YRfr ); PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0, YRd0, ROW, &YC, YCd0, &YCfr ); } else { PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, COLUMN, &YC, YCd0, &YCfr ); PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0, YCd0, COLUMN, &YR, YRd0, &YRfr ); } /* * Local rank-2 update if I own some data */ Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); if( ( Amp > 0 ) && ( Anq > 0 ) ) { size = type->size; Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); XCld = XCd0[LLD_]; YCld = YCd0[LLD_]; XRld = XRd0[LLD_]; YRld = YRd0[LLD_]; /* * Computational partitioning size is computed as the product of the logical * value returned by pilaenv_ and 2 * lcm( nprow, npcol ). */ nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) * PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); if( upper ) { for( k = 0; k < *N; k += nb ) { kb = *N - k; kb = MIN( kb, nb ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Akp > 0 && Anq0 > 0 ) { sger_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); sger_( &Akp, &Anq0, ((char *) ALPHA), YC, &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); } PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzsyr2 ); } } else { for( k = 0; k < *N; k += nb ) { kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld, Aptr, k, k, Ad0, PB_Ctzsyr2 ); Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); Amp0 = Amp - Akp; Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); if( Amp0 > 0 && Anq0 > 0 ) { sger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( XC, Akp, 0, XCld, size ), &ione, Mptr( YR, 0, Akq, YRld, size ), &YRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); sger_( &Amp0, &Anq0, ((char *) ALPHA), Mptr( YC, Akp, 0, YCld, size ), &ione, Mptr( XR, 0, Akq, XRld, size ), &XRld, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); } } } } if( XRfr ) free( XR ); if( XCfr ) free( XC ); if( YRfr ) free( YR ); if( YCfr ) free( YC ); /* * End of PSSYR2 */ } scalapack-1.8.0/PBLAS/SRC/psgemm_.c0000640000175000017500000004757710363532303016423 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void psgemm_( F_CHAR_T TRANSA, F_CHAR_T TRANSB, int * M, int * N, int * K, float * ALPHA, float * A, int * IA, int * JA, int * DESCA, float * B, int * IB, int * JB, int * DESCB, float * BETA, float * C, int * IC, int * JC, int * DESCC ) #else void psgemm_( TRANSA, TRANSB, M, N, K, ALPHA, A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC ) /* * .. Scalar Arguments .. */ F_CHAR_T TRANSA, TRANSB; int * IA, * IB, * IC, * JA, * JB, * JC, * K, * M, * N; float * ALPHA, * BETA; /* * .. Array Arguments .. */ int * DESCA, * DESCB, * DESCC; float * A, * B, * C; #endif { /* * Purpose * ======= * * PSGEMM performs one of the matrix-matrix operations * * sub( C ) := alpha*op( sub( A ) )*op( sub( B ) ) + beta*sub( C ), * * where * * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of * op( X ) = X or op( X ) = X'. * * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+K-1) if TRANSA = 'N', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'T', * A(IA:IA+K-1,JA:JA+M-1)' if TRANSA = 'C', * * and, op( sub( B ) ) denotes B(IB:IB+K-1,JB:JB+N-1) if TRANSB = 'N', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'T', * B(IB:IB+N-1,JB:JB+K-1)' if TRANSB = 'C', * * Alpha and beta are scalars. A, B and C are matrices; op( sub( A ) ) * is an m by k submatrix, op( sub( B ) ) is an k by n submatrix and * sub( C ) is an m by n submatrix. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * TRANSA (global input) CHARACTER*1 * On entry, TRANSA specifies the form of op( sub( A ) ) to be * used in the matrix multiplication as follows: * * TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ), * * TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )', * * TRANSA = 'C' or 'c' op( sub( A ) ) = sub( A )'. * * TRANSB (global input) CHARACTER*1 * On entry, TRANSB specifies the form of op( sub( B ) ) to be * used in the matrix multiplication as follows: * * TRANSB = 'N' or 'n' op( sub( B ) ) = sub( B ), * * TRANSB = 'T' or 't' op( sub( B ) ) = sub( B )', * * TRANSB = 'C' or 'c' op( sub( B ) ) = sub( B )'. * * M (global input) INTEGER * On entry, M specifies the number of rows of the submatrix * op( sub( A ) ) and of the submatrix sub( C ). M must be at * least zero. * * N (global input) INTEGER * On entry, N specifies the number of columns of the submatrix * op( sub( B ) ) and the number of columns of the submatrix * sub( C ). N must be at least zero. * * K (global input) INTEGER * On entry, K specifies the number of columns of the submatrix * op( sub( A ) ) and the number of rows of the submatrix * op( sub( B ) ). K must be at least zero. * * ALPHA (global input) REAL * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the arrays A and * B corresponding to the entries of the submatrices sub( A ) * and sub( B ) respectively need not be set on input. * * A (local input) REAL array * On entry, A is an array of dimension (LLD_A, Ka), where Ka is * at least Lc( 1, JA+K-1 ) when TRANSA = 'N' or 'n', and is at * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array * contains the local entries of the matrix A. * * IA (global input) INTEGER * On entry, IA specifies A's global row index, which points to * the beginning of the submatrix sub( A ). * * JA (global input) INTEGER * On entry, JA specifies A's global column index, which points * to the beginning of the submatrix sub( A ). * * DESCA (global and local input) INTEGER array * On entry, DESCA is an integer array of dimension DLEN_. This * is the array descriptor for the matrix A. * * B (local input) REAL array * On entry, B is an array of dimension (LLD_B, Kb), where Kb is * at least Lc( 1, JB+N-1 ) when TRANSB = 'N' or 'n', and is at * least Lc( 1, JB+K-1 ) otherwise. Before entry, this array * contains the local entries of the matrix B. * * IB (global input) INTEGER * On entry, IB specifies B's global row index, which points to * the beginning of the submatrix sub( B ). * * JB (global input) INTEGER * On entry, JB specifies B's global column index, which points * to the beginning of the submatrix sub( B ). * * DESCB (global and local input) INTEGER array * On entry, DESCB is an integer array of dimension DLEN_. This * is the array descriptor for the matrix B. * * BETA (global input) REAL * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the local entries of the array C * corresponding to the entries of the submatrix sub( C ) need * not be set on input. * * C (local input/local output) REAL array * On entry, C is an array of dimension (LLD_C, Kc), where Kc is * at least Lc( 1, JC+N-1 ). Before entry, this array contains * the local entries of the matrix C. * On exit, the entries of this array corresponding to the local * entries of the submatrix sub( C ) are overwritten by the * local entries of the m by n updated submatrix. * * IC (global input) INTEGER * On entry, IC specifies C's global row index, which points to * the beginning of the submatrix sub( C ). * * JC (global input) INTEGER * On entry, JC specifies C's global column index, which points * to the beginning of the submatrix sub( C ). * * DESCC (global and local input) INTEGER array * On entry, DESCC is an integer array of dimension DLEN_. This * is the array descriptor for the matrix C. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char DirAB, DirBC, OpC, OpR, TrA, TrB, ctop, ctopsave, rtop, rtopsave; int Ai, Aj, Bi, Bj, ChooseAB, ChooseBC, Ci, Cj, ForceTop, ctxt, info, mycol, myrow, nb, nota, notb, npcol, nprow; double ABest, ACest, BCest, tmp1, tmp2, tmp3, tmp4; PBTYP_T * type; /* * .. Local Arrays .. */ int Ad[DLEN_], Bd[DLEN_], Cd[DLEN_]; /* .. * .. Executable Statements .. * */ nota = ( ( TrA = Mupcase( F2C_CHAR( TRANSA )[0] ) ) == CNOTRAN ); notb = ( ( TrB = Mupcase( F2C_CHAR( TRANSB )[0] ) ) == CNOTRAN ); PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd ); PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 1001 + CTXT_ ) : 0 ) ) ) { if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSGEMM", "Illegal TRANSA = %c\n", TrA ); info = -1; } else if( ( !notb ) && ( TrB != CTRAN ) && ( TrB != CCOTRAN ) ) { PB_Cwarn( ctxt, __LINE__, "PSGEMM", "Illegal TRANSB = %c\n", TrB ); info = -2; } if( nota ) PB_Cchkmat( ctxt, "PSGEMM", "A", *M, 3, *K, 5, Ai, Aj, Ad, 10, &info ); else PB_Cchkmat( ctxt, "PSGEMM", "A", *K, 5, *M, 3, Ai, Aj, Ad, 10, &info ); if( notb ) PB_Cchkmat( ctxt, "PSGEMM", "B", *K, 5, *N, 4, Bi, Bj, Bd, 14, &info ); else PB_Cchkmat( ctxt, "PSGEMM", "B", *N, 4, *K, 5, Bi, Bj, Bd, 14, &info ); PB_Cchkmat( ctxt, "PSGEMM", "C", *M, 3, *N, 4, Ci, Cj, Cd, 19, &info ); } if( info ) { PB_Cabort( ctxt, "PSGEMM", info ); return; } #endif /* * Quick return if possible */ if( ( *M == 0 ) || ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO || *K == 0 ) && ( BETA [REAL_PART] == ONE ) ) ) return; /* * Get type structure */ type = PB_Cstypeset(); /* * If alpha or K is zero, sub( C ) := beta * sub( C ). */ if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) { if( BETA[REAL_PART] == ZERO ) { PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero, ((char * ) C), Ci, Cj, Cd ); } else if( !( BETA[REAL_PART] == ONE ) ) { PB_Cplascal( type, ALL, NOCONJG, *M, *N, ((char *) BETA), ((char * ) C), Ci, Cj, Cd ); } return; } /* * Start the operations */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Algorithm selection is based on approximation of the communication volume * for distributed and aligned operands. * * ABest: both operands sub( A ) and sub( B ) are communicated (M, N >> K) * ACest: both operands sub( A ) and sub( C ) are communicated (K, N >> M) * BCest: both operands sub( B ) and sub( C ) are communicated (M, K >> N) */ ABest = (double)(*K); ACest = (double)(*M); BCest = (double)(*N); if( notb ) { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); ABest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); tmp3 = DNROC( *K, Ad[NB_], npcol ); ACest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[MB_], nprow ); BCest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ); tmp1 = DNROC( *K, Bd[MB_], nprow ); tmp2 = DNROC( *N, Bd[NB_], npcol ); ACest *= ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } else { if( nota ) { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *K, Ad[NB_], npcol ); BCest *= CBRATIO * ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) + ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ); } else { tmp1 = DNROC( *M, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp3 = DNROC( *M, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[MB_], nprow ); ABest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *N, Bd[MB_], nprow ); tmp2 = DNROC( *K, Bd[NB_], npcol ); tmp3 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow ); ACest *= CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); tmp1 = DNROC( *K, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol ); tmp3 = DNROC( *K, Bd[NB_], npcol ); tmp4 = DNROC( *M, Cd[MB_], nprow ); BCest *= ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) + CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ); } } ChooseAB = ( ( ABest <= ( 1.3 * BCest ) ) && ( ABest <= ( 1.3 * ACest ) ) ); ChooseBC = ( ( BCest <= ACest ) && ( ( 1.3 * BCest ) <= ABest ) ); /* * BLACS topologies are enforced iff M, N and K are strictly greater than the * logical block size returned by pilaenv_. Otherwise, it is assumed that the * routine calling this routine has already selected an adequate topology. */ nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) ); ForceTop = ( ( *M > nb ) && ( *N > nb ) && ( *K > nb ) ); if( ChooseAB ) { OpR = CBCAST; OpC = CBCAST; } else if( ChooseBC ) { if( nota ) { OpR = CCOMBINE; OpC = CBCAST; } else { OpR = CBCAST; OpC = CCOMBINE; } } else { if( notb ) { OpR = CBCAST; OpC = CCOMBINE; } else { OpR = CCOMBINE; OpC = CBCAST; } } rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET ); ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET ); if( ForceTop ) { rtopsave = rtop; ctopsave = ctop; /* * No clear winner for the ring topologies, so that if a ring topology is * already selected, keep it. */ if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) && ( rtop != CTOP_SRING ) ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_IRING ); if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) && ( ctop != CTOP_SRING ) ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_IRING ); /* * Remove the next 4 lines when the BLACS combine operations support ring * topologies */ if( OpR == CCOMBINE ) rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT ); if( OpC == CCOMBINE ) ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT ); } DirAB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); DirBC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); if( ChooseAB ) { PB_CpgemmAB( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else if( ChooseBC ) { PB_CpgemmBC( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } else { PB_CpgemmAC( type, &DirAB, &DirBC, ( nota ? NOTRAN : TRAN ), ( notb ? NOTRAN : TRAN ), *M, *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj, Cd ); } /* * Restore the BLACS topologies when necessary. */ if( ForceTop ) { rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave ); ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave ); } /* * End of PSGEMM */ } scalapack-1.8.0/PBLAS/SRC/pdznrm2_.c0000640000175000017500000004333710363532303016515 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pdznrm2_( int * N, double * NORM2, double * X, int * IX, int * JX, int * DESCX, int * INCX ) #else void pdznrm2_( N, NORM2, X, IX, JX, DESCX, INCX ) /* * .. Scalar Arguments .. */ int * INCX, * IX, * JX, * N; double * NORM2; /* * .. Array Arguments .. */ int * DESCX; double * X; #endif { /* * Purpose * ======= * * PDZNRM2 computes the 2-norm of a subvector sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the length of the subvector sub( X ). * N must be at least zero. * * NORM2 (local output) DOUBLE PRECISION * On exit, NORM2 specifies the 2-norm of the subvector sub( X ) * only in its scope (See below for further details). * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Further Details * =============== * * When the result of a vector-oriented PBLAS call is a scalar, this * scalar is set only within the process scope which owns the vector(s) * being operated on. Let sub( X ) be a generic term for the input vec- * tor(s). Then, the processes owning the correct the answer is determi- * ned as follows: if an operation involves more than one vector, the * processes receiving the result will be the union of the following set * of processes for each vector: * * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- * cess row or process column owns the vector operand, therefore only * the process owning sub( X ) receives the correct result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process in this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process in this column receives the result; * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ char * Xptr = NULL, top; int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, dst, dist, info, k, mycol, mydist, myrow, npcol, nprow, src, size; double Xtmp, scale, ssq, temp1, temp2; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_]; double work[4]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) PB_Cchkvec( ctxt, "PDZNRM2", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); if( info ) { PB_Cabort( ctxt, "PDZNRM2", info ); return; } #endif /* * Initialize NORM2 */ *NORM2 = ZERO; /* * Quick return if possible */ if( *N == 0 ) return; /* * Retrieve process grid information */ #ifdef NO_ARGCHK Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); #endif /* * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol */ PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow, &Xcol ); /* * Handle degenerate case separately, sub( X )'s scope is just one process */ if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) { /* * Make sure I own some data and compute NORM2 */ if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) { scale = ZERO; ssq = ONE; type = PB_Cztypeset(); Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); Xtmp = ((double *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((double *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } /* * Compute NORM2 = SCALE * SQRT( SSQ ) */ dasqrtb_( &scale, &ssq, NORM2 ); } return; } else if( *INCX == Xd[M_] ) { /* * sub( X ) resides in (a) process row(s) */ if( ( myrow == Xrow ) || ( Xrow < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); if( Xnq > 0 ) { Xld = Xd[LLD_]; type = PB_Cztypeset(); size = type->size; Xptr = Mptr( ((char *) X), Xii, Xjj, Xld, size ); for( k = 0; k < Xnq; k++ ) { Xtmp = ((double *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((double *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += Xld * size; } } /* * If Xnq <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( npcol >= 2 ) && ( Xcol >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process row * 0 if npcol > 1 and Xcol >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = mycol; k = 1; l_10: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, npcol ); Cdgesd2d( ctxt, 2, 1, ((char*) work), 2, myrow, dst ); goto l_20; } else { dist = mycol + k; src = MPosMod( dist, npcol ); if( mycol < src ) { Cdgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, myrow, src ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < npcol ) goto l_10; l_20: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process row. */ top = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); if( mycol == 0 ) { Cdgebs2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, ROW, &top, 2, 1, ((char*)work), 2, myrow, 0 ); } /* * Compute NORM2 redundantly NORM2 = WORK( 1 ) * SQRT( WORK( 2 ) ) */ dasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ dasqrtb_( &scale, &ssq, NORM2 ); } } return; } else { /* * sub( X ) resides in (a) process column(s) */ if( ( mycol == Xcol ) || ( Xcol < 0 ) ) { /* * Initialize SCALE and SSQ */ scale = ZERO; ssq = ONE; /* * Make sure I own some data and compute local sum of squares */ Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); if( Xnp > 0 ) { type = PB_Cztypeset(); size = type->size; Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], size ); for( k = 0; k < Xnp; k++ ) { Xtmp = ((double *) Xptr)[REAL_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xtmp = ((double *) Xptr)[IMAG_PART]; if( Xtmp != ZERO ) { temp1 = ABS( Xtmp ); if( scale < temp1 ) { temp2 = scale / temp1; ssq = ONE + ssq * ( temp2 * temp2 ); scale = temp1; } else { temp2 = temp1 / scale; ssq = ssq + ( temp2 * temp2 ); } } Xptr += size; } } /* * If Xnp <= 0, SCALE is zero and SSQ is one (see initialization above) */ if( ( nprow >= 2 ) && ( Xrow >= 0 ) ) { /* * Combine the local sum of squares using a 1-tree topology within process * column 0 if nprow > 1 and Xrow >= 0, i.e sub( X ) is distributed. */ work[0] = scale; work[1] = ssq; mydist = myrow; k = 1; l_30: if( mydist & 1 ) { dist = k * ( mydist - 1 ); dst = MPosMod( dist, nprow ); Cdgesd2d( ctxt, 2, 1, ((char*)work), 2, dst, mycol ); goto l_40; } else { dist = myrow + k; src = MPosMod( dist, nprow ); if( myrow < src ) { Cdgerv2d( ctxt, 2, 1, ((char*)&work[2]), 2, src, mycol ); if( work[0] >= work[2] ) { if( work[0] != ZERO ) { temp1 = work[2] / work[0]; work[1] = work[1] + ( temp1 * temp1 ) * work[3]; } } else { temp1 = work[0] / work[2]; work[1] = work[3] + ( temp1 * temp1 ) * work[1]; work[0] = work[2]; } } mydist >>= 1; } k <<= 1; if( k < nprow ) goto l_30; l_40: /* * Process column 0 broadcasts the combined values of SCALE and SSQ within their * process column */ top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); if( myrow == 0 ) { Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 ); } else { Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2, 0, mycol ); } /* * Compute NORM2 redundantly NORM2 = WORK[0] * SQRT( WORK[1] ) */ dasqrtb_( &work[0], &work[1], NORM2 ); } else { /* * Compute NORM2 redundantly ( sub( X ) is not distributed ) */ dasqrtb_( &scale, &ssq, NORM2 ); } } return; } /* * End of PDZNRM2 */ } scalapack-1.8.0/PBLAS/SRC/pzaxpy_.c0000640000175000017500000002260110363532303016443 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" #include "PBpblas.h" #include "PBtools.h" #include "PBblacs.h" #include "PBblas.h" #ifdef __STDC__ void pzaxpy_( int * N, double * ALPHA, double * X, int * IX, int * JX, int * DESCX, int * INCX, double * Y, int * IY, int * JY, int * DESCY, int * INCY ) #else void pzaxpy_( N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY ) /* * .. Scalar Arguments .. */ int * INCX, * INCY, * IX, * IY, * JX, * JY, * N; double * ALPHA; /* * .. Array Arguments .. */ int * DESCX, * DESCY; double * X, * Y; #endif { /* * Purpose * ======= * * PZAXPY adds one subvector to another, * * sub( Y ) := sub( Y ) + alpha * sub( X ), * * where * * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * N (global input) INTEGER. * On entry, N specifies the length of the subvectors to be * added. N must be at least zero. * * ALPHA (global input) COMPLEX*16 * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the local entries of the array X cor- * responding to the entries of the subvector sub( X ) need not * be set on input. * * X (local input) COMPLEX*16 array * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. * Before entry, this array contains the local entries of the * matrix X. * * IX (global input) INTEGER * On entry, IX specifies X's global row index, which points to * the beginning of the submatrix sub( X ). * * JX (global input) INTEGER * On entry, JX specifies X's global column index, which points * to the beginning of the submatrix sub( X ). * * DESCX (global and local input) INTEGER array * On entry, DESCX is an integer array of dimension DLEN_. This * is the array descriptor for the matrix X. * * INCX (global input) INTEGER * On entry, INCX specifies the global increment for the * elements of X. Only two values of INCX are supported in * this version, namely 1 and M_X. INCX must not be zero. * * Y (local input/local output) COMPLEX*16 array * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. * Before entry, this array contains the local entries of the * matrix Y. On exit, sub( Y ) is overwritten with the updated * subvector. * * IY (global input) INTEGER * On entry, IY specifies Y's global row index, which points to * the beginning of the submatrix sub( Y ). * * JY (global input) INTEGER * On entry, JY specifies Y's global column index, which points * to the beginning of the submatrix sub( Y ). * * DESCY (global and local input) INTEGER array * On entry, DESCY is an integer array of dimension DLEN_. This * is the array descriptor for the matrix Y. * * INCY (global input) INTEGER * On entry, INCY specifies the global increment for the * elements of Y. Only two values of INCY are supported in * this version, namely 1 and M_Y. INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int Xi, Xj, Yi, Yj, ctxt, info, mycol, myrow, npcol, nprow; PBTYP_T * type; /* * .. Local Arrays .. */ int Xd[DLEN_], Yd[DLEN_]; /* .. * .. Executable Statements .. * */ PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); #ifndef NO_ARGCHK /* * Test the input parameters */ Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) { PB_Cchkvec( ctxt, "PZAXPY", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); PB_Cchkvec( ctxt, "PZAXPY", "Y", *N, 1, Yi, Yj, Yd, *INCY, 11, &info ); } if( info ) { PB_Cabort( ctxt, "PZAXPY", info ); return; } #endif /* * Quick return if possible */ if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) return; /* * Get type structure */ type = PB_Cztypeset(); /* * Start the operations */ if( *INCX == Xd[M_] ) { PB_Cpaxpby( type, NOCONJG, 1, *N, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, ROW, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } else { PB_Cpaxpby( type, NOCONJG, *N, 1, ((char *) ALPHA), ((char *) X), Xi, Xj, Xd, COLUMN, type->one, ((char *) Y), Yi, Yj, Yd, ( *INCY == Yd[M_] ? ROW : COLUMN ) ); } /* * End of PZAXPY */ } scalapack-1.8.0/PBLAS/SRC/PBBLAS/0000750000175000017500000000000010605234721015550 5ustar muammarmuammarscalapack-1.8.0/PBLAS/SRC/PBBLAS/pbstrst1.f0000640000175000017500000000645110363532303017506 0ustar muammarmuammar SUBROUTINE PBSTRST1( ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, $ INCY, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 XDIST INTEGER ICONTXT, INCX, INCY, LCMP, LCMQ, N, NB, NINT, $ NZ REAL BETA * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * PBSTRST1 forms y <== x + beta * y, where y is a sorted * condensed row (or column) vector from a column (or row) vector of x. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Variables .. INTEGER ITER, IX, IY, K, KK, KZ, NJUMP * .. * .. External Subroutines .. EXTERNAL PBSVECADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, MOD * .. * .. Executable Statements .. * ITER = ICEIL( NINT, NB ) KZ = NZ * IF( LSAME( XDIST, 'R' ) ) THEN NJUMP = NB * LCMQ * DO 20 KK = 0, LCMQ-1 IX = NINT * MOD( KK*LCMP, LCMQ ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBSVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 10 K = 2, ITER-1 CALL PBSVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 10 CONTINUE END IF * CALL PBSVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 20 CONTINUE * * if( LSAME( XDIST, 'C' ) ) then * ELSE NJUMP = NB * LCMP * DO 40 KK = 0, LCMP-1 IX = NINT * MOD( KK*LCMQ, LCMP ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBSVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 30 K = 2, ITER-1 CALL PBSVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 30 CONTINUE END IF * CALL PBSVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 40 CONTINUE END IF * 50 CONTINUE * RETURN * * End of PBSTRST1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbctran.f0000640000175000017500000007120110363532303017350 0ustar muammarmuammar SUBROUTINE PBCTRAN( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC, $ M, N, NB COMPLEX BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), C( LDC, * ), WORK( * ) * .. * * Purpose * ======= * * PBCTRAN transposes a column block to row block, or a row block to * column block by reallocating data distribution. * * C := A^T + beta*C, or C := A^C + beta*C * * where A is an M-by-N matrix and C is an N-by-M matrix, and the size * of M or N is limited to its block size NB. * * The first elements of the matrices A, and C should be located at * the beginnings of their first blocks. (not the middle of the blocks.) * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * ADIST - (input) CHARACTER*1 * ADIST specifies whether A is a column block or a row block. * * ADIST = 'C', A is a column block * ADIST = 'R', A is a row block * * TRANS - (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the matrices A and C are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * M - (input) INTEGER * M specifies the (global) number of rows of the matrix (block * column or block row) A and of columns of the matrix C. * M >= 0. * * N - (input) INTEGER * N specifies the (global) number of columns of the matrix * (block column or block row) A and of columns of the matrix * C. N >= 0. * * NB - (input) INTEGER * NB specifies the column block size of the matrix A and the * row block size of the matrix C when ADIST = 'C'. Otherwise, * it specifies the row block size of the matrix A and the * column block size of the matrix C. NB >= 1. * * A (input) COMPLEX array of DIMENSION ( LDA, Lx ), * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'. * Before entry with ADIST = 'C', the leading Mp by N part of * the array A must contain the matrix A, otherwise the leading * M by Nq part of the array A must contain the matrix A. See * parameter details for the values of Mp and Nq. * * LDA (input) INTEGER * LDA specifies the leading dimension of (local) A as declared * in the calling (sub) program. LDA >= MAX(1,Mp) when * ADIST = 'C', or LDA >= MAX(1,M) otherwise. * * BETA (input) COMPLEX * BETA specifies scaler beta. * * C (input/output) COMPLEX array of DIMENSION ( LDC, Lx ), * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'. * If ADIST = 'C', the leading N-by-Mq part of the array C * contains the (local) matrix C, otherwise the leading * Np-by-M part of the array C must contain the (local) matrix * C. C will not be referenced if beta is zero. * * LDC (input) INTEGER * LDC specifies the leading dimension of (local) C as declared * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C', * or LDC >= MAX(1,Np) otherwise. * * IAROW (input) INTEGER * IAROW specifies a row of the process template, * which holds the first block of the matrix A. If A is a row * of blocks (ADIST = 'R') and all rows of processes have a copy * of A, then set IAROW = -1. * * IACOL (input) INTEGER * IACOL specifies a column of the process template, * which holds the first block of the matrix A. If A is a * column of blocks (ADIST = 'C') and all columns of processes * have a copy of A, then set IACOL = -1. * * ICROW (input) INTEGER * ICROW specifies the current row process which holds * the first block of the matrix C, which is transposed of A. * If C is a row of blocks (ADIST = 'C') and the transposed * row block C is distributed all rows of processes, set * ICROW = -1. * * ICCOL (input) INTEGER * ICCOL specifies the current column process which holds * the first block of the matrix C, which is transposed of A. * If C is a column of blocks (ADIST = 'R') and the transposed * column block C is distributed all columns of processes, * set ICCOL = -1. * * WORK (workspace) COMPLEX array of dimension Size(WORK). * It needs extra working space of A'. * * Parameters Details * ================== * * Lx It is a local portion of L owned by a process, (L is * replaced by M, or N, and x is replaced by either p (=NPROW) * or q (=NPCOL)). The value is determined by L, LB, x, and * MI, where LB is a block size and MI is a row or column * position in a process template. Lx is equal to or less * than Lx0 = CEIL( L, LB*x ) * LB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * Mqb = CEIL( M, NB*NPCOL ) * Npb = CEIL( N, NB*NPROW ) * LCMQ = LCM / NPCOL * LCMP = LCM / NPROW * * (1) ADIST = 'C' * (a) IACOL != -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * (b) IACOL = -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB)) * * (2) ADIST = 'R' * (a) IAROW != -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * (b) IAROW = -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ ) * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP ) * * ===================================================================== * * .. * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM, $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0, $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL, $ NPROW, NQ COMPLEX TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL ILCM, ICEIL, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CGERV2D, $ CGESD2D, PBCMATADD, PBCTR2AF, PBCTR2AT, $ PBCTR2BT, PBCTRGET, PBCTRSRT, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( M.EQ.0 .OR. N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( ADIST, 'C' ) ROWFORM = LSAME( ADIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( M .LT.0 ) THEN INFO = 4 ELSE IF( N .LT.0 ) THEN INFO = 5 ELSE IF( NB.LT.1 ) THEN INFO = 6 ELSE IF( IAROW.LT.-1 .OR. IAROW.GE.NPROW .OR. $ ( IAROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IACOL.LT.-1 .OR. IACOL.GE.NPCOL .OR. $ ( IACOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( ICROW.LT.-1 .OR. ICROW.GE.NPROW .OR. $ ( ICROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( ICCOL.LT.-1 .OR. ICCOL.GE.NPCOL .OR. $ ( ICCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO .NE. 0 ) THEN CALL PXERBLA( ICONTXT, 'PBCTRAN ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP * * When A is a column block * IF( COLFORM ) THEN * * Form C <== A' ( A is a column block ) * _ * | | * | | * _____________ | | * |______C______| <== |A| * | | * | | * |_| * * MRROW : row relative position in template from IAROW * MRCOL : column relative position in template from ICCOL * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) JCROW = ICROW IF( ICROW.EQ.-1 ) JCROW = IAROW * MP = NUMROC( M, NB, MYROW, IAROW, NPROW ) MQ = NUMROC( M, NB, MYCOL, ICCOL, NPCOL ) MQ0 = NUMROC( NUMROC(M, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * IF( LDA.LT.MP .AND. $ ( IACOL.EQ.MYCOL .OR. IACOL.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.N .AND. $ ( ICROW.EQ.MYROW .OR. ICROW.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a column process of IACOL has a column block A, * IF( IACOL.GE.0 ) THEN TBETA = ZERO IF( MYROW.EQ.JCROW ) TBETA = BETA * DO 20 I = 0, MIN( LCM, ICEIL(M,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IAROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + ICCOL, NPCOL ) IF( LCMQ.EQ.1 ) MQ0 = NUMROC( M, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IACOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBCTR2AT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBCTR2BT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) CALL CGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL ) ELSE CALL CGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL ) CALL PBCTR2AF( ICONTXT, 'Row', N, MQ-JDEX, NB, WORK, N, $ TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ, $ MQ0 ) END IF END IF 20 CONTINUE * * Broadcast a row block of C in each column of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.JCROW ) THEN CALL CGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL CGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ JCROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) MQ0 = MQ * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 30 I = 0, LCMP-1 IF( MRCOL.EQ.MOD( NPROW*I+MRROW, NPCOL ) ) THEN IF( LCMQ.EQ.1.AND.(ICROW.EQ.-1.OR.ICROW.EQ.MYROW) ) THEN CALL PBCTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, BETA, C, LDC, $ LCMP*NB ) ELSE CALL PBCTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) CALL PBCTRGET( ICONTXT, 'Row', N, MQ0, ICEIL(M,NB), WORK, N, $ MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) $ CALL PBCTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) CALL CGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL CGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL CGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL ) ELSE CALL CGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL ) CALL PBCMATADD( ICONTXT, 'G', N, MQ, ONE, WORK, N, $ BETA, C, LDC ) END IF END IF * ELSE ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL CGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN CALL CGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL ) END IF * IF( MYROW.EQ.ICROW ) $ CALL PBCTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) END IF END IF * END IF * * When A is a row block * ELSE * * Form C <== A' ( A is a row block ) * _ * | | * | | * | | _____________ * |C| <== |______A______| * | | * | | * |_| * * MRROW : row relative position in template from ICROW * MRCOL : column relative position in template from IACOL * MRROW = MOD( NPROW+MYROW-ICROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) JCCOL = ICCOL IF( ICCOL.EQ.-1 ) JCCOL = IACOL * NP = NUMROC( N, NB, MYROW, ICROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * IF( LDA.LT.M .AND. $ ( IAROW.EQ.MYROW .OR. IAROW.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.NP .AND. $ ( ICCOL.EQ.MYCOL .OR. ICCOL.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a row process of IAROW has a row block A, * IF( IAROW.GE.0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JCCOL ) TBETA = BETA * DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( N, NB, I, 0, NPROW ) IDEX = (I/NPROW) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * JDEX = (I/NPCOL) * NB IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN CALL PBCTR2AT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBCTR2BT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) CALL CGESD2D( ICONTXT, NP0, M, WORK, NP0, $ MCROW, JCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL ) ELSE CALL CGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL ) CALL PBCTR2AF( ICONTXT, 'Col', NP-IDEX, M, NB, WORK, $ NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ, $ NP0 ) END IF END IF 40 CONTINUE * * Broadcast a column block of WORK in each row of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JCCOL ) THEN CALL CGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL CGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, JCCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IF( LCMP.EQ.1.AND.(ICCOL.EQ.-1.OR.ICCOL.EQ.MYCOL) ) THEN CALL PBCTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, BETA, C, LDC, $ LCMQ*NB ) ELSE CALL PBCTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-ICROW, NPROW ) CALL PBCTRGET( ICONTXT, 'Col', NP0, M, ICEIL(N,NB), WORK, $ NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) $ CALL PBCTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) CALL CGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL CGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL CGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL ) ELSE CALL CGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL ) CALL PBCMATADD( ICONTXT, 'G', NP, M, ONE, WORK, NP, $ BETA, C, LDC ) END IF END IF * ELSE ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL CGESD2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN CALL CGERV2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, MCCOL ) END IF * IF( MYCOL.EQ.ICCOL ) $ CALL PBCTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) END IF END IF * END IF END IF * RETURN * * End of PBCTRAN * END * *======================================================================= * SUBROUTINE PBCTR2AT *======================================================================= * SUBROUTINE PBCTR2AT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, LCMP, LCMQ ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB COMPLEX BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBCTR2AT forms B <== A^T + beta*B, or A^C + beta*B * B is a ((conjugate) transposed) scattered block row (or column), * copied from a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IA, IB, K, INTV, JNTV * .. * .. External Subroutines .. EXTERNAL PBCMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( LCMP.EQ.LCMQ ) THEN CALL PBCMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBCMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + JNTV 10 CONTINUE * * If A is a row block ( ADIST = 'R' ), * ELSE INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, JNTV ) CALL PBCMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + JNTV IB = IB + INTV 20 CONTINUE END IF END IF * RETURN * * End of PBCTR2AT * END * *======================================================================= * SUBROUTINE PBCTR2BT *======================================================================= * SUBROUTINE PBCTR2BT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB COMPLEX BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBCTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a * ((conjugate) transposed) condensed block row (or column), copied from * a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IA, IB, K * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. External Subroutines .. EXTERNAL PBCMATADD * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( INTV.EQ.NB ) THEN CALL PBCMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBCMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + NB 10 CONTINUE * * If A is a row block (ADIST = 'R'), * ELSE IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, INTV ) CALL PBCMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + INTV IB = IB + NB 20 CONTINUE END IF END IF * RETURN * * End of PBCTR2BT * END * *======================================================================= * SUBROUTINE PBCTR2AF *======================================================================= * SUBROUTINE PBCTR2AF( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT COMPLEX BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBCTR2AF forms T <== A + BETA*T, where T is a scattered block * row (or column) copied from a (condensed) block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER JA, JB, K, INTV * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN INTV = NB * LCMQ JA = 1 JB = 1 DO 10 K = 1, ICEIL( NINT, NB ) CALL PBCMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1,JA), LDA, BETA, B(1,JB), LDB ) JA = JA + NB JB = JB + INTV 10 CONTINUE * * if( LSAME( ADIST, 'C' ) ) then * ELSE INTV = NB * LCMP JA = 1 JB = 1 DO 20 K = 1, ICEIL( NINT, NB ) CALL PBCMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA,1), LDA, BETA, B(JB,1), LDB ) JA = JA + NB JB = JB + INTV 20 CONTINUE END IF * RETURN * * End of PBCTR2AF * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbztrget.f0000640000175000017500000001003310363532303017554 0ustar muammarmuammar SUBROUTINE PBZTRGET( ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, $ MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, IGD, LDA, M, MCCOL, MCROW, MNB, MYCOL, $ MYROW, N, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ) * .. * * Purpose * ======= * * PBZTRGET forms a row block of A from scattered row subblocks if * ADIST = 'R', or forms a column block of A from scattered column * subblocks, if ADIST = 'C'. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Variables .. INTEGER KINT, KINT2, KLEN, KMOD, KPPOS, NLEN, NNUM, $ NTLEN REAL TEMP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * * if A is a row block, it needs to communicate columnwise. * IF( LSAME( ADIST, 'R' ) ) THEN KPPOS = MOD( NPROW+MYROW-MCROW, NPROW ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPROW/IGD, MNB-MCCOL ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPROW ) * 10 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL ZGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MOD(MYROW+KINT, NPROW), MYCOL ) NLEN = NLEN + KLEN END IF ELSE CALL ZGESD2D( ICONTXT, M, NLEN, A, LDA, $ MOD(NPROW+MYROW-KINT, NPROW), MYCOL ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 10 END IF END IF * * if A is a column block, it needs to communicate rowwise. * ELSE IF( LSAME( ADIST, 'C' ) ) THEN * KPPOS = MOD( NPCOL+MYCOL-MCCOL, NPCOL ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPCOL/IGD, MNB-MCROW ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPCOL ) * 20 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL ZGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MYROW, MOD(MYCOL+KINT, NPCOL) ) NLEN = NLEN + KLEN END IF ELSE CALL ZGESD2D( ICONTXT, M, NLEN, A, LDA, MYROW, $ MOD(NPCOL+MYCOL-KINT, NPCOL) ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 20 END IF END IF END IF * 30 CONTINUE * RETURN * * End of PBZTRGET * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbcvecadd.f0000640000175000017500000002366510363532303017645 0ustar muammarmuammar SUBROUTINE PBCVECADD( ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, $ INCY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, INCX, INCY, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * * .. * * Purpose * ======= * * PBCVECADD performs a vector X to be added to Y * Y := alpha*op(X) + beta*Y, * where alpha and beta are scalars, and X and Y are n vectors, * and op(X) = X**H if MODE = 'C', * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the transposed, or conjugate transposed vector X * to be added to the vector Y * = 'C': Conjugate vector X is added for complex data set. * Y = alpha * X**H + beta * Y * ELSE : Vector X is added. Y = alpha*X + beta*Y * if MODE = 'V', BLAS routine may be used. * * N (input) INTEGER * The number of elements of the vectors X and Y to be added. * N >= 0. * * ALPHA (input) COMPLEX * ALPHA specifies the scalar alpha. * * X (input) COMPLEX array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCX ) ) * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) COMPLEX * BETA specifies the scalar beta. * * Y (input/output) COMPLEX array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCY ) ) * On entry with BETA non-zero, the incremented array Y must * contain the vector Y. * On exit, Y is overwritten by the updated vector Y. * * INCY - (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * ===================================================================== * * .. * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IX, IY * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL CSCAL, CCOPY, CAXPY * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * IF( N.LE.0 .OR. ( ALPHA.EQ.ZERO .AND. BETA.EQ.ONE ) ) RETURN * IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCY.EQ.1 ) THEN DO 10 I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IY = 1 DO 20 I = 1, N Y( IY ) = ZERO IY = IY + INCY 20 CONTINUE END IF * ELSE IF( LSAME( MODE, 'V' ) ) THEN CALL CSCAL( N, BETA, Y, INCY ) ELSE IF( INCY.EQ.1 ) THEN DO 30 I = 1, N Y( I ) = BETA * Y( I ) 30 CONTINUE ELSE IY = 1 DO 40 I = 1, N Y( IY ) = BETA * Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF * ELSE IF( .NOT.LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL CCOPY( N, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 50 I = 1, N Y( I ) = X( I ) 50 CONTINUE ELSE IX = 1 IY = 1 DO 60 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 60 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 70 I = 1, N Y( I ) = X( I ) + Y( I ) 70 CONTINUE ELSE IX = 1 IY = 1 DO 80 I = 1, N Y( IY ) = X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 80 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 90 I = 1, N Y( I ) = X( I ) + BETA * Y( I ) 90 CONTINUE ELSE IX = 1 IY = 1 DO 100 I = 1, N Y( IY ) = X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 100 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 110 I = 1, N Y( I ) = ALPHA * X( I ) 110 CONTINUE ELSE IX = 1 IY = 1 DO 120 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 120 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL CAXPY( N, ALPHA, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 130 I = 1, N Y( I ) = ALPHA * X( I ) + Y( I ) 130 CONTINUE ELSE IX = 1 IY = 1 DO 140 I = 1, N Y( IY ) = ALPHA * X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 140 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 150 I = 1, N Y( I ) = ALPHA * X( I ) + BETA * Y( I ) 150 CONTINUE ELSE IX = 1 IY = 1 DO 160 I = 1, N Y( IY ) = ALPHA * X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 160 CONTINUE END IF END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 170 I = 1, N Y( I ) = CONJG( X( I ) ) 170 CONTINUE ELSE IX = 1 IY = 1 DO 180 I = 1, N Y( IY ) = CONJG( X( IX ) ) IX = IX + INCX IY = IY + INCY 180 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 190 I = 1, N Y( I ) = CONJG( X( I ) ) + Y( I ) 190 CONTINUE ELSE IX = 1 IY = 1 DO 200 I = 1, N Y( IY ) = CONJG( X( IX ) ) + Y( IY ) IX = IX + INCX IY = IY + INCY 200 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 210 I = 1, N Y( I ) = CONJG( X( I ) ) + BETA * Y( I ) 210 CONTINUE ELSE IX = 1 IY = 1 DO 220 I = 1, N Y( IY ) = CONJG( X( IX ) ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 220 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 230 I = 1, N Y( I ) = ALPHA * CONJG( X( I ) ) 230 CONTINUE ELSE IX = 1 IY = 1 DO 240 I = 1, N Y( IY ) = ALPHA * CONJG( X( IX ) ) IX = IX + INCX IY = IY + INCY 240 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 250 I = 1, N Y( I ) = ALPHA * CONJG( X( I ) ) + Y( I ) 250 CONTINUE ELSE IX = 1 IY = 1 DO 260 I = 1, N Y( IY ) = ALPHA * CONJG( X( IX ) ) + Y( IY ) IX = IX + INCX IY = IY + INCY 260 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 270 I = 1, N Y( I ) = ALPHA * CONJG( X( I ) ) + BETA * Y( I ) 270 CONTINUE ELSE IX = 1 IY = 1 DO 280 I = 1, N Y( IY ) = ALPHA * CONJG( X(IX) ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 280 CONTINUE END IF END IF END IF END IF * RETURN * * End of PBCVECADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdtran.f0000640000175000017500000007113010363532303017352 0ustar muammarmuammar SUBROUTINE PBDTRAN( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC, $ M, N, NB DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), C( LDC, * ), WORK( * ) * .. * * Purpose * ======= * * PBDTRAN transposes a column block to row block, or a row block to * column block by reallocating data distribution. * * C := A^T + beta*C, or C := A^C + beta*C * * where A is an M-by-N matrix and C is an N-by-M matrix, and the size * of M or N is limited to its block size NB. * * The first elements of the matrices A, and C should be located at * the beginnings of their first blocks. (not the middle of the blocks.) * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * ADIST - (input) CHARACTER*1 * ADIST specifies whether A is a column block or a row block. * * ADIST = 'C', A is a column block * ADIST = 'R', A is a row block * * TRANS - (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the matrices A and C are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * M - (input) INTEGER * M specifies the (global) number of rows of the matrix (block * column or block row) A and of columns of the matrix C. * M >= 0. * * N - (input) INTEGER * N specifies the (global) number of columns of the matrix * (block column or block row) A and of columns of the matrix * C. N >= 0. * * NB - (input) INTEGER * NB specifies the column block size of the matrix A and the * row block size of the matrix C when ADIST = 'C'. Otherwise, * it specifies the row block size of the matrix A and the * column block size of the matrix C. NB >= 1. * * A (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ), * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'. * Before entry with ADIST = 'C', the leading Mp by N part of * the array A must contain the matrix A, otherwise the leading * M by Nq part of the array A must contain the matrix A. See * parameter details for the values of Mp and Nq. * * LDA (input) INTEGER * LDA specifies the leading dimension of (local) A as declared * in the calling (sub) program. LDA >= MAX(1,Mp) when * ADIST = 'C', or LDA >= MAX(1,M) otherwise. * * BETA (input) DOUBLE PRECISION * BETA specifies scaler beta. * * C (input/output) DOUBLE PRECISION array of DIMENSION * ( LDC, Lx ), * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'. * If ADIST = 'C', the leading N-by-Mq part of the array C * contains the (local) matrix C, otherwise the leading * Np-by-M part of the array C must contain the (local) matrix * C. C will not be referenced if beta is zero. * * LDC (input) INTEGER * LDC specifies the leading dimension of (local) C as declared * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C', * or LDC >= MAX(1,Np) otherwise. * * IAROW (input) INTEGER * IAROW specifies a row of the process template, * which holds the first block of the matrix A. If A is a row * of blocks (ADIST = 'R') and all rows of processes have a copy * of A, then set IAROW = -1. * * IACOL (input) INTEGER * IACOL specifies a column of the process template, * which holds the first block of the matrix A. If A is a * column of blocks (ADIST = 'C') and all columns of processes * have a copy of A, then set IACOL = -1. * * ICROW (input) INTEGER * ICROW specifies the current row process which holds * the first block of the matrix C, which is transposed of A. * If C is a row of blocks (ADIST = 'C') and the transposed * row block C is distributed all rows of processes, set * ICROW = -1. * * ICCOL (input) INTEGER * ICCOL specifies the current column process which holds * the first block of the matrix C, which is transposed of A. * If C is a column of blocks (ADIST = 'R') and the transposed * column block C is distributed all columns of processes, * set ICCOL = -1. * * WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK). * It needs extra working space of A'. * * Parameters Details * ================== * * Lx It is a local portion of L owned by a process, (L is * replaced by M, or N, and x is replaced by either p (=NPROW) * or q (=NPCOL)). The value is determined by L, LB, x, and * MI, where LB is a block size and MI is a row or column * position in a process template. Lx is equal to or less * than Lx0 = CEIL( L, LB*x ) * LB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * Mqb = CEIL( M, NB*NPCOL ) * Npb = CEIL( N, NB*NPROW ) * LCMQ = LCM / NPCOL * LCMP = LCM / NPROW * * (1) ADIST = 'C' * (a) IACOL != -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * (b) IACOL = -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB)) * * (2) ADIST = 'R' * (a) IAROW != -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * (b) IAROW = -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ ) * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP ) * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM, $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0, $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL, $ NPROW, NQ DOUBLE PRECISION TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL ILCM, ICEIL, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D, $ DGESD2D, PBDMATADD, PBDTR2AF, PBDTR2AT, $ PBDTR2BT, PBDTRGET, PBDTRSRT, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( M.EQ.0 .OR. N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( ADIST, 'C' ) ROWFORM = LSAME( ADIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( M .LT.0 ) THEN INFO = 4 ELSE IF( N .LT.0 ) THEN INFO = 5 ELSE IF( NB.LT.1 ) THEN INFO = 6 ELSE IF( IAROW.LT.-1 .OR. IAROW.GE.NPROW .OR. $ ( IAROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IACOL.LT.-1 .OR. IACOL.GE.NPCOL .OR. $ ( IACOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( ICROW.LT.-1 .OR. ICROW.GE.NPROW .OR. $ ( ICROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( ICCOL.LT.-1 .OR. ICCOL.GE.NPCOL .OR. $ ( ICCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO .NE. 0 ) THEN CALL PXERBLA( ICONTXT, 'PBDTRAN ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP * * When A is a column block * IF( COLFORM ) THEN * * Form C <== A' ( A is a column block ) * _ * | | * | | * _____________ | | * |______C______| <== |A| * | | * | | * |_| * * MRROW : row relative position in template from IAROW * MRCOL : column relative position in template from ICCOL * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) JCROW = ICROW IF( ICROW.EQ.-1 ) JCROW = IAROW * MP = NUMROC( M, NB, MYROW, IAROW, NPROW ) MQ = NUMROC( M, NB, MYCOL, ICCOL, NPCOL ) MQ0 = NUMROC( NUMROC(M, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * IF( LDA.LT.MP .AND. $ ( IACOL.EQ.MYCOL .OR. IACOL.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.N .AND. $ ( ICROW.EQ.MYROW .OR. ICROW.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a column process of IACOL has a column block A, * IF( IACOL.GE.0 ) THEN TBETA = ZERO IF( MYROW.EQ.JCROW ) TBETA = BETA * DO 20 I = 0, MIN( LCM, ICEIL(M,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IAROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + ICCOL, NPCOL ) IF( LCMQ.EQ.1 ) MQ0 = NUMROC( M, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IACOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBDTR2AT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBDTR2BT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) CALL DGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL ) ELSE CALL DGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL ) CALL PBDTR2AF( ICONTXT, 'Row', N, MQ-JDEX, NB, WORK, N, $ TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ, $ MQ0 ) END IF END IF 20 CONTINUE * * Broadcast a row block of C in each column of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.JCROW ) THEN CALL DGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL DGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ JCROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) MQ0 = MQ * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 30 I = 0, LCMP-1 IF( MRCOL.EQ.MOD( NPROW*I+MRROW, NPCOL ) ) THEN IF( LCMQ.EQ.1.AND.(ICROW.EQ.-1.OR.ICROW.EQ.MYROW) ) THEN CALL PBDTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, BETA, C, LDC, $ LCMP*NB ) ELSE CALL PBDTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) CALL PBDTRGET( ICONTXT, 'Row', N, MQ0, ICEIL(M,NB), WORK, N, $ MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) $ CALL PBDTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) CALL DGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL DGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL DGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL ) ELSE CALL DGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL ) CALL PBDMATADD( ICONTXT, 'G', N, MQ, ONE, WORK, N, $ BETA, C, LDC ) END IF END IF * ELSE ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL DGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN CALL DGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL ) END IF * IF( MYROW.EQ.ICROW ) $ CALL PBDTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) END IF END IF * END IF * * When A is a row block * ELSE * * Form C <== A' ( A is a row block ) * _ * | | * | | * | | _____________ * |C| <== |______A______| * | | * | | * |_| * * MRROW : row relative position in template from ICROW * MRCOL : column relative position in template from IACOL * MRROW = MOD( NPROW+MYROW-ICROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) JCCOL = ICCOL IF( ICCOL.EQ.-1 ) JCCOL = IACOL * NP = NUMROC( N, NB, MYROW, ICROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * IF( LDA.LT.M .AND. $ ( IAROW.EQ.MYROW .OR. IAROW.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.NP .AND. $ ( ICCOL.EQ.MYCOL .OR. ICCOL.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a row process of IAROW has a row block A, * IF( IAROW.GE.0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JCCOL ) TBETA = BETA * DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( N, NB, I, 0, NPROW ) IDEX = (I/NPROW) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * JDEX = (I/NPCOL) * NB IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN CALL PBDTR2AT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBDTR2BT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) CALL DGESD2D( ICONTXT, NP0, M, WORK, NP0, $ MCROW, JCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL ) ELSE CALL DGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL ) CALL PBDTR2AF( ICONTXT, 'Col', NP-IDEX, M, NB, WORK, $ NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ, $ NP0 ) END IF END IF 40 CONTINUE * * Broadcast a column block of WORK in each row of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JCCOL ) THEN CALL DGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL DGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, JCCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IF( LCMP.EQ.1.AND.(ICCOL.EQ.-1.OR.ICCOL.EQ.MYCOL) ) THEN CALL PBDTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, BETA, C, LDC, $ LCMQ*NB ) ELSE CALL PBDTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-ICROW, NPROW ) CALL PBDTRGET( ICONTXT, 'Col', NP0, M, ICEIL(N,NB), WORK, $ NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) $ CALL PBDTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) CALL DGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL DGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL DGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL ) ELSE CALL DGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL ) CALL PBDMATADD( ICONTXT, 'G', NP, M, ONE, WORK, NP, $ BETA, C, LDC ) END IF END IF * ELSE ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL DGESD2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN CALL DGERV2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, MCCOL ) END IF * IF( MYCOL.EQ.ICCOL ) $ CALL PBDTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) END IF END IF * END IF END IF * RETURN * * End of PBDTRAN * END * *======================================================================= * SUBROUTINE PBDTR2AT *======================================================================= * SUBROUTINE PBDTR2AT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, LCMP, LCMQ ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBDTR2AT forms B <== A^T + beta*B, or A^C + beta*B * B is a ((conjugate) transposed) scattered block row (or column), * copied from a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER IA, IB, K, INTV, JNTV * .. * .. External Subroutines .. EXTERNAL PBDMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( LCMP.EQ.LCMQ ) THEN CALL PBDMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBDMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + JNTV 10 CONTINUE * * If A is a row block ( ADIST = 'R' ), * ELSE INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, JNTV ) CALL PBDMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + JNTV IB = IB + INTV 20 CONTINUE END IF END IF * RETURN * * End of PBDTR2AT * END * *======================================================================= * SUBROUTINE PBDTR2BT *======================================================================= * SUBROUTINE PBDTR2BT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBDTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a * ((conjugate) transposed) condensed block row (or column), copied from * a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER IA, IB, K * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. External Subroutines .. EXTERNAL PBDMATADD * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( INTV.EQ.NB ) THEN CALL PBDMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBDMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + NB 10 CONTINUE * * If A is a row block (ADIST = 'R'), * ELSE IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, INTV ) CALL PBDMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + INTV IB = IB + NB 20 CONTINUE END IF END IF * RETURN * * End of PBDTR2BT * END * *======================================================================= * SUBROUTINE PBDTR2AF *======================================================================= * SUBROUTINE PBDTR2AF( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBDTR2AF forms T <== A + BETA*T, where T is a scattered block * row (or column) copied from a (condensed) block column (or row) of A * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER JA, JB, K, INTV * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN INTV = NB * LCMQ JA = 1 JB = 1 DO 10 K = 1, ICEIL( NINT, NB ) CALL PBDMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1,JA), LDA, BETA, B(1,JB), LDB ) JA = JA + NB JB = JB + INTV 10 CONTINUE * * if( LSAME( ADIST, 'C' ) ) then * ELSE INTV = NB * LCMP JA = 1 JB = 1 DO 20 K = 1, ICEIL( NINT, NB ) CALL PBDMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA,1), LDA, BETA, B(JB,1), LDB ) JA = JA + NB JB = JB + INTV 20 CONTINUE END IF * RETURN * * End of PBDTR2AF * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/Makefile0000640000175000017500000000507410363532303017215 0ustar muammarmuammar############################################################################ # # Program: PBLAS -- (version 2.0 beta) # # Module: Makefile # # Purpose: PB-BLAS remaining source Makefile # # Creation date: August 24, 1997 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../../SLmake.inc ############################################################################ # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ../../../SLmake.inc file # definitions to match your compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # # To remove the object files after the library is created, enter # make clean # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # ############################################################################ all: single double complex complex16 PBSBLASAUX = pbstran.o pbsmatadd.o pbstrsrt.o pbstrget.o \ pbstrnv.o pbsvecadd.o pbstrst1.o PBCBLASAUX = pbctran.o pbcmatadd.o pbctrsrt.o pbctrget.o \ pbctrnv.o pbcvecadd.o pbctrst1.o PBDBLASAUX = pbdtran.o pbdmatadd.o pbdtrsrt.o pbdtrget.o \ pbdtrnv.o pbdvecadd.o pbdtrst1.o PBZBLASAUX = pbztran.o pbzmatadd.o pbztrsrt.o pbztrget.o \ pbztrnv.o pbzvecadd.o pbztrst1.o #--------------------------------------------------------------------------- $(PBSBLASAUX) : $(FRC) $(PBCBLASAUX) : $(FRC) $(PBDBLASAUX) : $(FRC) $(PBZBLASAUX) : $(FRC) #--------------------------------------------------------------------------- single: $(PBSBLASAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PBSBLASAUX) $(RANLIB) $(SCALAPACKLIB) double: $(PBDBLASAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PBDBLASAUX) $(RANLIB) $(SCALAPACKLIB) complex: $(PBCBLASAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PBCBLASAUX) $(RANLIB) $(SCALAPACKLIB) complex16: $(PBZBLASAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(PBZBLASAUX) $(RANLIB) $(SCALAPACKLIB) #--------------------------------------------------------------------------- FRC: @FRC=$(FRC) clean : rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbztrst1.f0000640000175000017500000000646610363532303017523 0ustar muammarmuammar SUBROUTINE PBZTRST1( ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, $ INCY, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 XDIST INTEGER ICONTXT, INCX, INCY, LCMP, LCMQ, N, NB, NINT, $ NZ COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * PBZTRST1 forms y <== x + beta * y, where y is a sorted * condensed row (or column) vector from a column (or row) vector of x. * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Variables .. INTEGER ITER, IX, IY, K, KK, KZ, NJUMP * .. * .. External Subroutines .. EXTERNAL PBZVECADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, MOD * .. * .. Executable Statements .. * ITER = ICEIL( NINT, NB ) KZ = NZ * IF( LSAME( XDIST, 'R' ) ) THEN NJUMP = NB * LCMQ * DO 20 KK = 0, LCMQ-1 IX = NINT * MOD( KK*LCMP, LCMQ ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBZVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 10 K = 2, ITER-1 CALL PBZVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 10 CONTINUE END IF * CALL PBZVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 20 CONTINUE * * if( LSAME( XDIST, 'C' ) ) then * ELSE NJUMP = NB * LCMP * DO 40 KK = 0, LCMP-1 IX = NINT * MOD( KK*LCMQ, LCMP ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBZVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 30 K = 2, ITER-1 CALL PBZVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 30 CONTINUE END IF * CALL PBZVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 40 CONTINUE END IF * 50 CONTINUE * RETURN * * End of PBZTRST1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbztrnv.f0000640000175000017500000006476410363532303017444 0ustar muammarmuammar SUBROUTINE PBZTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, $ WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 TRANS, XDIST INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL, $ IYROW, N, NB, NZ COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 WORK( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PBZTRNV transposes a column vector to row vector, or a row vector to * column vector by reallocating data distribution. * * Y := X' * * where X and Y are N vectors. * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * XDIST (input) CHARACTER*1 * XDIST specifies whether X is a column vector or a row vector, * * XDIST = 'C', X is a column vector (distributed columnwise) * XDIST = 'R', X is a row vector (distributed rowwise) * * TRANS (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the vectors X and Y are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * N (input) INTEGER * N specifies the (global) number of the vector X and the * vector Y. N >= 0. * * NB (input) INTEGER * NB specifies the block size of vectors X and Y. NB >= 0. * * NZ (input) INTEGER * NZ is the column offset to specify the column distance from * the beginning of the block to the first element of the * vector X, and the row offset to the first element of the * vector Y if XDIST = 'C'. * Otherwise, it is row offset to specify the row distance * from the beginning of the block to the first element of the * vector X, and the column offset to the first element of the * vector Y. 0 < NZ <= NB. * * X (input) COMPLEX*16 array of dimension at least * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'. * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) COMPLEX*16 * BETA specifies scaler beta. * * Y (input/output) COMPLEX*16 array of dimension at least * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or * The incremented array Y must contain the vector Y. * Y will not be referenced if beta is zero. * * INCY (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * IXROW (input) INTEGER * IXROW specifies a row of the process template, which holds * the first element of the vector X. If X is a row vector and * all rows of processes have a copy of X, then set IXROW = -1. * * IXCOL (input) INTEGER * IXCOL specifies a column of the process template, * which holds the first element of the vector X. If X is a * column block and all columns of processes have a copy of X, * then set IXCOL = -1. * * IYROW (input) INTEGER * IYROW specifies the current row process which holds the * first element of the vector Y, which is transposed of X. * If X is a column vector and the transposed row vector Y is * distributed all rows of processes, set IYROW = -1. * * IYCOL (input) INTEGER * IYCOL specifies the current column process which holds * the first element of the vector Y, which is transposed of Y. * If X is a row block and the transposed column vector Y is * distributed all columns of processes, set IYCOL = -1. * * WORK (workspace) COMPLEX*16 array of dimension Size(WORK). * It needs extra working space of x**T or x**H. * * Parameters Details * ================== * * Nx It is a local portion of N owned by a process, where x is * replaced by either p (=NPROW) or q (=NPCOL)). The value is * determined by N, NB, NZ, x, and MI, where NB is a block size, * NZ is a offset from the beginning of the block, and MI is a * row or column position in a process template. Nx is equal * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * NN = N + NZ * Npb = CEIL( NN, NB*NPROW ) * Nqb = CEIL( NN, NB*NPCOL ) * LCMP = LCM / NPROW * LCMQ = LCM / NPCOL * * (1) XDIST = 'C' * (a) IXCOL != -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * (b) IXCOL = -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB)) * * (2) XDIST = 'R' * (a) IXROW != -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * (b) IXROW = -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP) * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ) * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ, $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW, $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW, $ NQ, NQ0, NQ1 COMPLEX*16 TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL LSAME, ILCM, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBZTR2A1, PBZTR2B1, PBZTRGET, $ PBZTRST1, PBZVECADD, PXERBLA, ZGEBR2D, ZGEBS2D, $ ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( XDIST, 'C' ) ROWFORM = LSAME( XDIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( N .LT.0 ) THEN INFO = 4 ELSE IF( NB .LT.1 ) THEN INFO = 5 ELSE IF( NZ .LT.0 .OR. NZ.GE.NB ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR. $ ( IXROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR. $ ( IXCOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR. $ ( IYROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR. $ ( IYCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO.NE.0 ) THEN CALL PXERBLA( ICONTXT, 'PBZTRNV ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP NN = N + NZ * * When x is a column vector * IF( COLFORM ) THEN * * Form y <== x' ( x is a column vector ) * * || * || * _____________ || * -----(y)----- <== (x) * || * || * || * IF( IXROW.LT.0 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.0 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IXROW * MRCOL : column relative position in template from IYCOL * MRROW = MOD( NPROW+MYROW-IXROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) JYROW = IYROW IF( IYROW.EQ.-1 ) JYROW = IXROW * NP = NUMROC( NN, NB, MYROW, IXROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * * When a column process of IXCOL has a column block A, * IF( IXCOL .GE. 0 ) THEN TBETA = ZERO IF( MYROW.EQ.JYROW ) TBETA = BETA KZ = NZ * DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL ) IF( LCMQ.EQ.1 ) NQ0 = NUMROC( NN, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBZTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBZTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) CALL ZGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ JYROW, MCCOL ) END IF * * A destination node receives the copied vector * ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY, $ MCROW, IXCOL ) ELSE CALL ZGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ MCROW, IXCOL ) CALL PBZTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ*NB ) END IF END IF KZ = 0 20 CONTINUE * * Broadcast a row block of WORK in each column of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.JYROW ) THEN CALL ZGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL ZGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ JYROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) NQ0 = NQ * * Processors, which have diagonal blocks of X, copy them to * WORK array in transposed form * KZ = 0 IF( MRROW.EQ.0 ) KZ = NZ JZ = 0 IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ * DO 30 I = 0, LCMP - 1 IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN CALL PBZTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMP, 1 ) ELSE CALL PBZTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) CALL PBZTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBZTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF CALL ZGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL ZGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL ZGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL ) ELSE CALL ZGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL ) CALL PBZVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1, $ BETA, Y, INCY ) END IF END IF * ELSE NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL ZGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN CALL ZGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL ) END IF * IF( MYROW.EQ.IYROW ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBZTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF END IF END IF END IF * * When x is a row vector * ELSE * * Form y <== x' ( x is a row block ) * * || * || * || _____________ * (y) <== -----(x)----- * || * || * || * IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.0 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.0 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IYROW * MRCOL : column relative position in template from IXCOL * MRROW = MOD( NPROW+MYROW-IYROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL ) JYCOL = IYCOL IF( IYCOL.EQ.-1 ) JYCOL = IXCOL * NP = NUMROC( NN, NB, MYROW, IYROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * * When a row process of IXROW has a row block A, * IF( IXROW .GE. 0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JYCOL ) TBETA = BETA KZ = NZ * DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW ) JDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * IDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN CALL PBZTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ, LCMP ) * * The source node sends blocks to a destination node * ELSE CALL PBZTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) CALL ZGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ MCROW, JYCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY, $ IXROW, MCCOL ) ELSE CALL ZGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ IXROW, MCCOL ) CALL PBZTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP*NB ) END IF END IF KZ = 0 40 CONTINUE * * Broadcast a column vector Y in each row of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JYCOL ) THEN CALL ZGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL ZGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, JYCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * KZ = 0 IF( MRCOL.EQ.0 ) KZ = NZ JZ = 0 IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN CALL PBZTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMQ, 1 ) ELSE CALL PBZTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-IYROW, NPROW ) CALL PBZTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBZTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF CALL ZGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL ZGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL ZGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL ) ELSE CALL ZGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL ) CALL PBZVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA, $ Y, INCY ) END IF END IF * ELSE NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL ZGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN CALL ZGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL ) END IF * IF( MYCOL.EQ.IYCOL ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBZTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF END IF END IF END IF END IF * RETURN * * End of PBZTRNV * END * *======================================================================= * SUBROUTINE PBZTR2A1 *======================================================================= * SUBROUTINE PBZTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, $ INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x * y is a scattered vector, copied from a condensed vector x. * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBZVECADD * .. * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER * IX = 0 IY = 0 JZ = NZ ITER = ICEIL( N+NZ, INTV ) * IF( ITER.GT.1 ) THEN CALL PBZVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - JZ IY = IY + INTV - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBZVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + INTV 10 CONTINUE END IF * CALL PBZVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) * RETURN * * End of PBZTR2A1 * END * *======================================================================= * SUBROUTINE PBZTR2B1 *======================================================================= * SUBROUTINE PBZTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, $ INCY, JINX, JINY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x + beta * y * y is a condensed vector, copied from a scattered vector x * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBZVECADD * .. * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER, LENX, LENY * IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN CALL PBZVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA, $ Y, INCY ) * ELSE IX = 0 IY = 0 JZ = NZ LENX = NB * JINX LENY = NB * JINY ITER = ICEIL( N+NZ, LENX ) * IF( ITER.GT.1 ) THEN CALL PBZVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX - JZ IY = IY + LENY - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBZVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX IY = IY + LENY 10 CONTINUE END IF * CALL PBZVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) END IF * RETURN * * End of PBZTR2B1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdmatadd.f0000640000175000017500000003033310363532303017640 0ustar muammarmuammar SUBROUTINE PBDMATADD( ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, $ LDB ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, LDA, LDB, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBDMATADD performs the matrix add operation B := alpha*A + beta*B, * where alpha and beta are scalars, and A and B are m-by-n * upper/lower trapezoidal matrices, or rectangular matrices. * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the part of the matrix A, or (conjugate) transposed * matrix A to be added to the matrix B, * = 'U': Upper triangular part * up(B) = alpha*up(A) + beta*up(B) * = 'L': Lower triangular part * lo(B) = alpha*lo(A) + beta*lo(B) * = 'T': Transposed matrix A * B = alpha*A**T + beta*B * = 'C': Conjugate transposed matrix A * B = alpha*A**H + beta*B * Otherwise: B = alpha*A + beta*B * if M = LDA = LDB: use one BLAS loop * if MODE = 'V' : columnwise copy using BLAS if possible * else : use double loops * * M (input) INTEGER * M specifies the number of columns of the matrix A if * MODE != 'T'/'C', and it specifies the number of rows of the * matrix A otherwise. It also specifies the number of rows of * the matrix B. M >= 0. * * N (input) INTEGER * N specifies the number of rows of the matrix A if * MODE != 'T'/'C', and it specifies the number of columns of * the matrix A otherwise. It also specifies the number of * columns of the matrix B. N >= 0. * * ALPHA (input) DOUBLE PRECISION * ALPHA specifies the scalar alpha. * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The m by n matrix A if MODE != 'T'/'C'. * If MODE = 'U', only the upper triangle or trapezoid is * accessed; if MODE = 'L', only the lower triangle or * trapezoid is accessed. Otherwise all m-by-n data matrix * is accessed. * And the n by m matrix A if MODE = 'T'/'C'. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M) if * MODE != 'T'/'C'. And LDA >= max(1,N) if MODE = 'T'/'C'. * * BETA (input) DOUBLE PRECISION * BETA specifies the scalar beta. * * B (input) DOUBLE PRECISION array, dimension (LDB,N) * On exit, B = alpha*A + beta*B * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,M). * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL DSCAL, DCOPY, DAXPY * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.LE.0 .OR. N.LE.0 .OR. ( ALPHA.EQ.ZERO.AND.BETA.EQ.ONE ) ) $ RETURN * * A is upper triangular or upper trapezoidal, * IF( LSAME( MODE, 'U' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( I, J ) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, MIN( J, M ) B( I, J ) = BETA * B( I, J ) 30 CONTINUE 40 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 60 J = 1, N DO 50 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) 50 CONTINUE 60 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 80 J = 1, N DO 70 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + B( I, J ) 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 120 J = 1, N DO 110 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) 110 CONTINUE 120 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 140 J = 1, N DO 130 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 130 CONTINUE 140 CONTINUE ELSE DO 160 J = 1, N DO 150 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * * A is lower triangular or upper trapezoidal, * ELSE IF( LSAME( MODE, 'L' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, N DO 170 I = J, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE ELSE DO 200 J = 1, N DO 190 I = J, M B( I, J ) = BETA * B( I, J ) 190 CONTINUE 200 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 220 J = 1, N DO 210 I = J, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 240 J = 1, N DO 230 I = J, M B( I, J ) = A( I, J ) + B( I, J ) 230 CONTINUE 240 CONTINUE ELSE DO 260 J = 1, N DO 250 I = J, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 280 J = 1, N DO 270 I = J, M B( I, J ) = ALPHA * A( I, J ) 270 CONTINUE 280 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 300 J = 1, N DO 290 I = J, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 290 CONTINUE 300 CONTINUE ELSE DO 320 J = 1, N DO 310 I = J, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF * * If MODE = 'Transpose'/'Conjugate' * ELSE IF( LSAME( MODE, 'T' ) .OR. LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 340 J = 1, N DO 330 I = 1, M B( I, J ) = ZERO 330 CONTINUE 340 CONTINUE ELSE DO 360 J = 1, N DO 350 I = 1, M B( I, J ) = BETA * B( I, J ) 350 CONTINUE 360 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 380 J = 1, N DO 370 I = 1, M B( I, J ) = A( J, I ) 370 CONTINUE 380 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 400 J = 1, N DO 390 I = 1, M B( I, J ) = A( J, I ) + B( I, J ) 390 CONTINUE 400 CONTINUE ELSE DO 420 J = 1, N DO 410 I = 1, M B( I, J ) = A( J, I ) + BETA * B( I, J ) 410 CONTINUE 420 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 440 J = 1, N DO 430 I = 1, M B( I, J ) = ALPHA * A( J, I ) 430 CONTINUE 440 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 460 J = 1, N DO 450 I = 1, M B( I, J ) = ALPHA * A( J, I ) + B( I, J ) 450 CONTINUE 460 CONTINUE ELSE DO 480 J = 1, N DO 470 I = 1, M B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 470 CONTINUE 480 CONTINUE END IF END IF * * Other cases (for genral matrix additions) * ELSE IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 500 J = 1, N DO 490 I = 1, M B( I, J ) = ZERO 490 CONTINUE 500 CONTINUE * ELSE IF( M.EQ.LDB ) THEN CALL DSCAL( M*N, BETA, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 510 J = 1, N CALL DSCAL( M, BETA, B( 1, J ), 1 ) 510 CONTINUE ELSE DO 530 J = 1, N DO 520 I = 1, M B( I, J ) = BETA * B( I, J ) 520 CONTINUE 530 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL DCOPY( M*N, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 540 J = 1, N CALL DCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) 540 CONTINUE ELSE DO 560 J = 1, N DO 550 I = 1, M B( I, J ) = A( I, J ) 550 CONTINUE 560 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN DO 580 J = 1, N DO 570 I = 1, M B( I, J ) = A( I, J ) + B( I, J ) 570 CONTINUE 580 CONTINUE * ELSE DO 600 J = 1, N DO 590 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 590 CONTINUE 600 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 620 J = 1, N DO 610 I = 1, M B( I, J ) = ALPHA * A( I, J ) 610 CONTINUE 620 CONTINUE * ELSE IF( BETA.EQ.ONE ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL DAXPY( M*N, ALPHA, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 630 J = 1, N CALL DAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) 630 CONTINUE ELSE DO 650 J = 1, N DO 640 I = 1, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 640 CONTINUE 650 CONTINUE END IF * ELSE DO 670 J = 1, N DO 660 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 660 CONTINUE 670 CONTINUE END IF END IF END IF * RETURN * * End of PBDMATADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbsmatadd.f0000640000175000017500000003025310363532303017660 0ustar muammarmuammar SUBROUTINE PBSMATADD( ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, $ LDB ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, LDA, LDB, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBSMATADD performs the matrix add operation B := alpha*A + beta*B, * where alpha and beta are scalars, and A and B are m-by-n * upper/lower trapezoidal matrices, or rectangular matrices. * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the part of the matrix A, or (conjugate) transposed * matrix A to be added to the matrix B, * = 'U': Upper triangular part * up(B) = alpha*up(A) + beta*up(B) * = 'L': Lower triangular part * lo(B) = alpha*lo(A) + beta*lo(B) * = 'T': Transposed matrix A * B = alpha*A**T + beta*B * = 'C': Conjugate transposed matrix A * B = alpha*A**H + beta*B * Otherwise: B = alpha*A + beta*B * if M = LDA = LDB: use one BLAS loop * if MODE = 'V' : columnwise copy using BLAS if possible * else : use double loops * * M (input) INTEGER * M specifies the number of columns of the matrix A if * MODE != 'T'/'C', and it specifies the number of rows of the * matrix A otherwise. It also specifies the number of rows of * the matrix B. M >= 0. * * N (input) INTEGER * N specifies the number of rows of the matrix A if * MODE != 'T'/'C', and it specifies the number of columns of * the matrix A otherwise. It also specifies the number of * columns of the matrix B. N >= 0. * * ALPHA (input) REAL * ALPHA specifies the scalar alpha. * * A (input) REAL array, dimension (LDA,N) * The m by n matrix A if MODE != 'T'/'C'. * If MODE = 'U', only the upper triangle or trapezoid is * accessed; if MODE = 'L', only the lower triangle or * trapezoid is accessed. Otherwise all m-by-n data matrix * is accessed. * And the n by m matrix A if MODE = 'T'/'C'. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M) if * MODE != 'T'/'C'. And LDA >= max(1,N) if MODE = 'T'/'C'. * * BETA (input) REAL * BETA specifies the scalar beta. * * B (input) REAL array, dimension (LDB,N) * On exit, B = alpha*A + beta*B * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,M). * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL SSCAL, SCOPY, SAXPY * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( M.LE.0 .OR. N.LE.0 .OR. ( ALPHA.EQ.ZERO.AND.BETA.EQ.ONE ) ) $ RETURN * * A is upper triangular or upper trapezoidal, * IF( LSAME( MODE, 'U' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( I, J ) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, MIN( J, M ) B( I, J ) = BETA * B( I, J ) 30 CONTINUE 40 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 60 J = 1, N DO 50 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) 50 CONTINUE 60 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 80 J = 1, N DO 70 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + B( I, J ) 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 120 J = 1, N DO 110 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) 110 CONTINUE 120 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 140 J = 1, N DO 130 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 130 CONTINUE 140 CONTINUE ELSE DO 160 J = 1, N DO 150 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * * A is lower triangular or upper trapezoidal, * ELSE IF( LSAME( MODE, 'L' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, N DO 170 I = J, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE ELSE DO 200 J = 1, N DO 190 I = J, M B( I, J ) = BETA * B( I, J ) 190 CONTINUE 200 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 220 J = 1, N DO 210 I = J, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 240 J = 1, N DO 230 I = J, M B( I, J ) = A( I, J ) + B( I, J ) 230 CONTINUE 240 CONTINUE ELSE DO 260 J = 1, N DO 250 I = J, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 280 J = 1, N DO 270 I = J, M B( I, J ) = ALPHA * A( I, J ) 270 CONTINUE 280 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 300 J = 1, N DO 290 I = J, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 290 CONTINUE 300 CONTINUE ELSE DO 320 J = 1, N DO 310 I = J, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF * * If MODE = 'Transpose'/'Conjugate' * ELSE IF( LSAME( MODE, 'T' ) .OR. LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 340 J = 1, N DO 330 I = 1, M B( I, J ) = ZERO 330 CONTINUE 340 CONTINUE ELSE DO 360 J = 1, N DO 350 I = 1, M B( I, J ) = BETA * B( I, J ) 350 CONTINUE 360 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 380 J = 1, N DO 370 I = 1, M B( I, J ) = A( J, I ) 370 CONTINUE 380 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 400 J = 1, N DO 390 I = 1, M B( I, J ) = A( J, I ) + B( I, J ) 390 CONTINUE 400 CONTINUE ELSE DO 420 J = 1, N DO 410 I = 1, M B( I, J ) = A( J, I ) + BETA * B( I, J ) 410 CONTINUE 420 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 440 J = 1, N DO 430 I = 1, M B( I, J ) = ALPHA * A( J, I ) 430 CONTINUE 440 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 460 J = 1, N DO 450 I = 1, M B( I, J ) = ALPHA * A( J, I ) + B( I, J ) 450 CONTINUE 460 CONTINUE ELSE DO 480 J = 1, N DO 470 I = 1, M B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 470 CONTINUE 480 CONTINUE END IF END IF * * Other cases (for genral matrix additions) * ELSE IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 500 J = 1, N DO 490 I = 1, M B( I, J ) = ZERO 490 CONTINUE 500 CONTINUE * ELSE IF( M.EQ.LDB ) THEN CALL SSCAL( M*N, BETA, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 510 J = 1, N CALL SSCAL( M, BETA, B( 1, J ), 1 ) 510 CONTINUE ELSE DO 530 J = 1, N DO 520 I = 1, M B( I, J ) = BETA * B( I, J ) 520 CONTINUE 530 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL SCOPY( M*N, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 540 J = 1, N CALL SCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) 540 CONTINUE ELSE DO 560 J = 1, N DO 550 I = 1, M B( I, J ) = A( I, J ) 550 CONTINUE 560 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN DO 580 J = 1, N DO 570 I = 1, M B( I, J ) = A( I, J ) + B( I, J ) 570 CONTINUE 580 CONTINUE * ELSE DO 600 J = 1, N DO 590 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 590 CONTINUE 600 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 620 J = 1, N DO 610 I = 1, M B( I, J ) = ALPHA * A( I, J ) 610 CONTINUE 620 CONTINUE * ELSE IF( BETA.EQ.ONE ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL SAXPY( M*N, ALPHA, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 630 J = 1, N CALL SAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) 630 CONTINUE ELSE DO 650 J = 1, N DO 640 I = 1, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 640 CONTINUE 650 CONTINUE END IF * ELSE DO 670 J = 1, N DO 660 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 660 CONTINUE 670 CONTINUE END IF END IF END IF * RETURN * * End of PBSMATADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdtrsrt.f0000640000175000017500000000450110363532303017562 0ustar muammarmuammar SUBROUTINE PBDTRSRT( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB, NINT DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBDTRSRT forms T <== A + beta * T, where T is a sorted * condensed block row (or column) from a block column (or row) of A * with sorting index ISRT * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Variables .. INTEGER JA, JB, K, KK, NJUMP * .. * .. External Subroutines .. EXTERNAL PBDMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN NJUMP = NB * LCMQ DO 20 K = 0, LCMQ-1 JA = NINT * MOD( K*LCMP, LCMQ ) + 1 JB = K * NB + 1 * DO 10 KK = 1, ICEIL( NINT, NB ) IF( N.LT.JB ) GO TO 20 CALL PBDMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1, JA), LDA, BETA, B(1, JB), LDB ) JA = JA + NB JB = JB + NJUMP 10 CONTINUE 20 CONTINUE * * if( LSAME( ADIST, 'C') ) then * ELSE NJUMP = NB * LCMP DO 40 K = 0, LCMP-1 JA = 1 JB = K * NB + 1 * DO 30 KK = 1, ICEIL( NINT, NB ) IF( M.LT.JB ) GO TO 40 CALL PBDMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA, N*MOD(K*LCMQ,LCMP)+1), LDA, BETA, $ B(JB, 1), LDB ) JA = JA + NB JB = JB + NJUMP 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of PBDTRSRT * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbztran.f0000640000175000017500000007121510363532303017404 0ustar muammarmuammar SUBROUTINE PBZTRAN( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC, $ M, N, NB COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), C( LDC, * ), WORK( * ) * .. * * Purpose * ======= * * PBZTRAN transposes a column block to row block, or a row block to * column block by reallocating data distribution. * * C := A^T + beta*C, or C := A^C + beta*C * * where A is an M-by-N matrix and C is an N-by-M matrix, and the size * of M or N is limited to its block size NB. * * The first elements of the matrices A, and C should be located at * the beginnings of their first blocks. (not the middle of the blocks.) * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * ADIST - (input) CHARACTER*1 * ADIST specifies whether A is a column block or a row block. * * ADIST = 'C', A is a column block * ADIST = 'R', A is a row block * * TRANS - (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the matrices A and C are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * M - (input) INTEGER * M specifies the (global) number of rows of the matrix (block * column or block row) A and of columns of the matrix C. * M >= 0. * * N - (input) INTEGER * N specifies the (global) number of columns of the matrix * (block column or block row) A and of columns of the matrix * C. N >= 0. * * NB - (input) INTEGER * NB specifies the column block size of the matrix A and the * row block size of the matrix C when ADIST = 'C'. Otherwise, * it specifies the row block size of the matrix A and the * column block size of the matrix C. NB >= 1. * * A (input) COMPLEX*16 array of DIMENSION ( LDA, Lx ), * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'. * Before entry with ADIST = 'C', the leading Mp by N part of * the array A must contain the matrix A, otherwise the leading * M by Nq part of the array A must contain the matrix A. See * parameter details for the values of Mp and Nq. * * LDA (input) INTEGER * LDA specifies the leading dimension of (local) A as declared * in the calling (sub) program. LDA >= MAX(1,Mp) when * ADIST = 'C', or LDA >= MAX(1,M) otherwise. * * BETA (input) COMPLEX*16 * BETA specifies scaler beta. * * C (input/output) COMPLEX*16 array of DIMENSION ( LDC, Lx ), * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'. * If ADIST = 'C', the leading N-by-Mq part of the array C * contains the (local) matrix C, otherwise the leading * Np-by-M part of the array C must contain the (local) matrix * C. C will not be referenced if beta is zero. * * LDC (input) INTEGER * LDC specifies the leading dimension of (local) C as declared * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C', * or LDC >= MAX(1,Np) otherwise. * * IAROW (input) INTEGER * IAROW specifies a row of the process template, * which holds the first block of the matrix A. If A is a row * of blocks (ADIST = 'R') and all rows of processes have a copy * of A, then set IAROW = -1. * * IACOL (input) INTEGER * IACOL specifies a column of the process template, * which holds the first block of the matrix A. If A is a * column of blocks (ADIST = 'C') and all columns of processes * have a copy of A, then set IACOL = -1. * * ICROW (input) INTEGER * ICROW specifies the current row process which holds * the first block of the matrix C, which is transposed of A. * If C is a row of blocks (ADIST = 'C') and the transposed * row block C is distributed all rows of processes, set * ICROW = -1. * * ICCOL (input) INTEGER * ICCOL specifies the current column process which holds * the first block of the matrix C, which is transposed of A. * If C is a column of blocks (ADIST = 'R') and the transposed * column block C is distributed all columns of processes, * set ICCOL = -1. * * WORK (workspace) COMPLEX*16 array of dimension Size(WORK). * It needs extra working space of A'. * * Parameters Details * ================== * * Lx It is a local portion of L owned by a process, (L is * replaced by M, or N, and x is replaced by either p (=NPROW) * or q (=NPCOL)). The value is determined by L, LB, x, and * MI, where LB is a block size and MI is a row or column * position in a process template. Lx is equal to or less * than Lx0 = CEIL( L, LB*x ) * LB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * Mqb = CEIL( M, NB*NPCOL ) * Npb = CEIL( N, NB*NPROW ) * LCMQ = LCM / NPCOL * LCMP = LCM / NPROW * * (1) ADIST = 'C' * (a) IACOL != -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * (b) IACOL = -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB)) * * (2) ADIST = 'R' * (a) IAROW != -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * (b) IAROW = -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ ) * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP ) * * ===================================================================== * * .. * .. Parameters .. COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM, $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0, $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL, $ NPROW, NQ COMPLEX*16 TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL ILCM, ICEIL, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBZMATADD, PBZTR2AF, PBZTR2AT, $ PBZTR2BT, PBZTRGET, PBZTRSRT, PXERBLA, ZGEBR2D, $ ZGEBS2D, ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( M.EQ.0 .OR. N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( ADIST, 'C' ) ROWFORM = LSAME( ADIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( M .LT.0 ) THEN INFO = 4 ELSE IF( N .LT.0 ) THEN INFO = 5 ELSE IF( NB.LT.1 ) THEN INFO = 6 ELSE IF( IAROW.LT.-1 .OR. IAROW.GE.NPROW .OR. $ ( IAROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IACOL.LT.-1 .OR. IACOL.GE.NPCOL .OR. $ ( IACOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( ICROW.LT.-1 .OR. ICROW.GE.NPROW .OR. $ ( ICROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( ICCOL.LT.-1 .OR. ICCOL.GE.NPCOL .OR. $ ( ICCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO .NE. 0 ) THEN CALL PXERBLA( ICONTXT, 'PBZTRAN ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP * * When A is a column block * IF( COLFORM ) THEN * * Form C <== A' ( A is a column block ) * _ * | | * | | * _____________ | | * |______C______| <== |A| * | | * | | * |_| * * MRROW : row relative position in template from IAROW * MRCOL : column relative position in template from ICCOL * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) JCROW = ICROW IF( ICROW.EQ.-1 ) JCROW = IAROW * MP = NUMROC( M, NB, MYROW, IAROW, NPROW ) MQ = NUMROC( M, NB, MYCOL, ICCOL, NPCOL ) MQ0 = NUMROC( NUMROC(M, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * IF( LDA.LT.MP .AND. $ ( IACOL.EQ.MYCOL .OR. IACOL.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.N .AND. $ ( ICROW.EQ.MYROW .OR. ICROW.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a column process of IACOL has a column block A, * IF( IACOL.GE.0 ) THEN TBETA = ZERO IF( MYROW.EQ.JCROW ) TBETA = BETA * DO 20 I = 0, MIN( LCM, ICEIL(M,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IAROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + ICCOL, NPCOL ) IF( LCMQ.EQ.1 ) MQ0 = NUMROC( M, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IACOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBZTR2AT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBZTR2BT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) CALL ZGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL ) ELSE CALL ZGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL ) CALL PBZTR2AF( ICONTXT, 'Row', N, MQ-JDEX, NB, WORK, N, $ TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ, $ MQ0 ) END IF END IF 20 CONTINUE * * Broadcast a row block of C in each column of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.JCROW ) THEN CALL ZGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL ZGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ JCROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) MQ0 = MQ * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 30 I = 0, LCMP-1 IF( MRCOL.EQ.MOD( NPROW*I+MRROW, NPCOL ) ) THEN IF( LCMQ.EQ.1.AND.(ICROW.EQ.-1.OR.ICROW.EQ.MYROW) ) THEN CALL PBZTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, BETA, C, LDC, $ LCMP*NB ) ELSE CALL PBZTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) CALL PBZTRGET( ICONTXT, 'Row', N, MQ0, ICEIL(M,NB), WORK, N, $ MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) $ CALL PBZTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) CALL ZGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL ZGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL ZGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL ) ELSE CALL ZGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL ) CALL PBZMATADD( ICONTXT, 'G', N, MQ, ONE, WORK, N, $ BETA, C, LDC ) END IF END IF * ELSE ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL ZGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN CALL ZGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL ) END IF * IF( MYROW.EQ.ICROW ) $ CALL PBZTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) END IF END IF * END IF * * When A is a row block * ELSE * * Form C <== A' ( A is a row block ) * _ * | | * | | * | | _____________ * |C| <== |______A______| * | | * | | * |_| * * MRROW : row relative position in template from ICROW * MRCOL : column relative position in template from IACOL * MRROW = MOD( NPROW+MYROW-ICROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) JCCOL = ICCOL IF( ICCOL.EQ.-1 ) JCCOL = IACOL * NP = NUMROC( N, NB, MYROW, ICROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * IF( LDA.LT.M .AND. $ ( IAROW.EQ.MYROW .OR. IAROW.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.NP .AND. $ ( ICCOL.EQ.MYCOL .OR. ICCOL.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a row process of IAROW has a row block A, * IF( IAROW.GE.0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JCCOL ) TBETA = BETA * DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( N, NB, I, 0, NPROW ) IDEX = (I/NPROW) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * JDEX = (I/NPCOL) * NB IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN CALL PBZTR2AT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBZTR2BT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) CALL ZGESD2D( ICONTXT, NP0, M, WORK, NP0, $ MCROW, JCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL ) ELSE CALL ZGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL ) CALL PBZTR2AF( ICONTXT, 'Col', NP-IDEX, M, NB, WORK, $ NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ, $ NP0 ) END IF END IF 40 CONTINUE * * Broadcast a column block of WORK in each row of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JCCOL ) THEN CALL ZGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL ZGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, JCCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IF( LCMP.EQ.1.AND.(ICCOL.EQ.-1.OR.ICCOL.EQ.MYCOL) ) THEN CALL PBZTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, BETA, C, LDC, $ LCMQ*NB ) ELSE CALL PBZTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-ICROW, NPROW ) CALL PBZTRGET( ICONTXT, 'Col', NP0, M, ICEIL(N,NB), WORK, $ NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) $ CALL PBZTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) CALL ZGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL ZGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL ZGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL ZGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL ) ELSE CALL ZGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL ) CALL PBZMATADD( ICONTXT, 'G', NP, M, ONE, WORK, NP, $ BETA, C, LDC ) END IF END IF * ELSE ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL ZGESD2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN CALL ZGERV2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, MCCOL ) END IF * IF( MYCOL.EQ.ICCOL ) $ CALL PBZTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) END IF END IF * END IF END IF * RETURN * * End of PBZTRAN * END * *======================================================================= * SUBROUTINE PBZTR2AT *======================================================================= * SUBROUTINE PBZTR2AT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, LCMP, LCMQ ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBZTR2AT forms B <== A^T + beta*B, or A^C + beta*B * B is a ((conjugate) transposed) scattered block row (or column), * copied from a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IA, IB, K, INTV, JNTV * .. * .. External Subroutines .. EXTERNAL PBZMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( LCMP.EQ.LCMQ ) THEN CALL PBZMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBZMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + JNTV 10 CONTINUE * * If A is a row block ( ADIST = 'R' ), * ELSE INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, JNTV ) CALL PBZMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + JNTV IB = IB + INTV 20 CONTINUE END IF END IF * RETURN * * End of PBZTR2AT * END * *======================================================================= * SUBROUTINE PBZTR2BT *======================================================================= * SUBROUTINE PBZTR2BT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBZTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a * ((conjugate) transposed) condensed block row (or column), copied from * a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IA, IB, K * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. External Subroutines .. EXTERNAL PBZMATADD * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( INTV.EQ.NB ) THEN CALL PBZMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBZMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + NB 10 CONTINUE * * If A is a row block (ADIST = 'R'), * ELSE IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, INTV ) CALL PBZMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + INTV IB = IB + NB 20 CONTINUE END IF END IF * RETURN * * End of PBZTR2BT * END * *======================================================================= * SUBROUTINE PBZTR2AF *======================================================================= * SUBROUTINE PBZTR2AF( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBZTR2AF forms T <== A + BETA*T, where T is a scattered block * row (or column) copied from a (condensed) block column (or row) of A * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER JA, JB, K, INTV * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN INTV = NB * LCMQ JA = 1 JB = 1 DO 10 K = 1, ICEIL( NINT, NB ) CALL PBZMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1,JA), LDA, BETA, B(1,JB), LDB ) JA = JA + NB JB = JB + INTV 10 CONTINUE * * if( LSAME( ADIST, 'C' ) ) then * ELSE INTV = NB * LCMP JA = 1 JB = 1 DO 20 K = 1, ICEIL( NINT, NB ) CALL PBZMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA,1), LDA, BETA, B(JB,1), LDB ) JA = JA + NB JB = JB + INTV 20 CONTINUE END IF * RETURN * * End of PBZTR2AF * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbctrget.f0000640000175000017500000001003310363532303017525 0ustar muammarmuammar SUBROUTINE PBCTRGET( ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, $ MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, IGD, LDA, M, MCCOL, MCROW, MNB, MYCOL, $ MYROW, N, NPCOL, NPROW * .. * .. Array Arguments .. COMPLEX A( LDA, * ) * .. * * Purpose * ======= * * PBCTRGET forms a row block of A from scattered row subblocks if * ADIST = 'R', or forms a column block of A from scattered column * subblocks, if ADIST = 'C'. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Variables .. INTEGER KINT, KINT2, KLEN, KMOD, KPPOS, NLEN, NNUM, $ NTLEN REAL TEMP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL CGERV2D, CGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * * if A is a row block, it needs to communicate columnwise. * IF( LSAME( ADIST, 'R' ) ) THEN KPPOS = MOD( NPROW+MYROW-MCROW, NPROW ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPROW/IGD, MNB-MCCOL ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPROW ) * 10 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL CGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MOD(MYROW+KINT, NPROW), MYCOL ) NLEN = NLEN + KLEN END IF ELSE CALL CGESD2D( ICONTXT, M, NLEN, A, LDA, $ MOD(NPROW+MYROW-KINT, NPROW), MYCOL ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 10 END IF END IF * * if A is a column block, it needs to communicate rowwise. * ELSE IF( LSAME( ADIST, 'C' ) ) THEN * KPPOS = MOD( NPCOL+MYCOL-MCCOL, NPCOL ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPCOL/IGD, MNB-MCROW ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPCOL ) * 20 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL CGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MYROW, MOD(MYCOL+KINT, NPCOL) ) NLEN = NLEN + KLEN END IF ELSE CALL CGESD2D( ICONTXT, M, NLEN, A, LDA, MYROW, $ MOD(NPCOL+MYCOL-KINT, NPCOL) ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 20 END IF END IF END IF * 30 CONTINUE * RETURN * * End of PBCTRGET * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdvecadd.f0000640000175000017500000001552710363532303017644 0ustar muammarmuammar SUBROUTINE PBDVECADD( ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, $ INCY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, INCX, INCY, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * * .. * * Purpose * ======= * * PBDVECADD performs a vector X to be added to Y * Y := alpha*op(X) + beta*Y, * where alpha and beta are scalars, and X and Y are n vectors, * and op(X) = X**H if MODE = 'C', * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the transposed, or conjugate transposed vector X * to be added to the vector Y * = 'C': Conjugate vector X is added for complex data set. * Y = alpha * X**H + beta * Y * ELSE : Vector X is added. Y = alpha*X + beta*Y * if MODE = 'V', BLAS routine may be used. * * N (input) INTEGER * The number of elements of the vectors X and Y to be added. * N >= 0. * * ALPHA (input) DOUBLE PRECISION * ALPHA specifies the scalar alpha. * * X (input) DOUBLE PRECISION array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCX ) ) * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) DOUBLE PRECISION * BETA specifies the scalar beta. * * Y (input/output) DOUBLE PRECISION array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCY ) ) * On entry with BETA non-zero, the incremented array Y must * contain the vector Y. * On exit, Y is overwritten by the updated vector Y. * * INCY - (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0) * .. * .. Local Scalars .. INTEGER I, IX, IY * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL DSCAL, DCOPY, DAXPY * .. * .. Executable Statements .. * IF( N.LE.0 .OR. ( ALPHA.EQ.ZERO .AND. BETA.EQ.ONE ) ) RETURN * IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCY.EQ.1 ) THEN DO 10 I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IY = 1 DO 20 I = 1, N Y( IY ) = ZERO IY = IY + INCY 20 CONTINUE END IF * ELSE IF( LSAME( MODE, 'V' ) ) THEN CALL DSCAL( N, BETA, Y, INCY ) ELSE IF( INCY.EQ.1 ) THEN DO 30 I = 1, N Y( I ) = BETA * Y( I ) 30 CONTINUE ELSE IY = 1 DO 40 I = 1, N Y( IY ) = BETA * Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL DCOPY( N, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 50 I = 1, N Y( I ) = X( I ) 50 CONTINUE ELSE IX = 1 IY = 1 DO 60 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 60 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 70 I = 1, N Y( I ) = X( I ) + Y( I ) 70 CONTINUE ELSE IX = 1 IY = 1 DO 80 I = 1, N Y( IY ) = X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 80 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 90 I = 1, N Y( I ) = X( I ) + BETA * Y( I ) 90 CONTINUE ELSE IX = 1 IY = 1 DO 100 I = 1, N Y( IY ) = X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 100 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 110 I = 1, N Y( I ) = ALPHA * X( I ) 110 CONTINUE ELSE IX = 1 IY = 1 DO 120 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 120 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL DAXPY( N, ALPHA, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 130 I = 1, N Y( I ) = ALPHA * X( I ) + Y( I ) 130 CONTINUE ELSE IX = 1 IY = 1 DO 140 I = 1, N Y( IY ) = ALPHA * X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 140 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 150 I = 1, N Y( I ) = ALPHA * X( I ) + BETA * Y( I ) 150 CONTINUE ELSE IX = 1 IY = 1 DO 160 I = 1, N Y( IY ) = ALPHA * X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 160 CONTINUE END IF END IF END IF END IF * RETURN * * End of PBDVECADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdtrst1.f0000640000175000017500000000645110363532303017467 0ustar muammarmuammar SUBROUTINE PBDTRST1( ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, $ INCY, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 XDIST INTEGER ICONTXT, INCX, INCY, LCMP, LCMQ, N, NB, NINT, $ NZ DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * PBDTRST1 forms y <== x + beta * y, where y is a sorted * condensed row (or column) vector from a column (or row) vector of x. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Variables .. INTEGER ITER, IX, IY, K, KK, KZ, NJUMP * .. * .. External Subroutines .. EXTERNAL PBDVECADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, MOD * .. * .. Executable Statements .. * ITER = ICEIL( NINT, NB ) KZ = NZ * IF( LSAME( XDIST, 'R' ) ) THEN NJUMP = NB * LCMQ * DO 20 KK = 0, LCMQ-1 IX = NINT * MOD( KK*LCMP, LCMQ ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBDVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 10 K = 2, ITER-1 CALL PBDVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 10 CONTINUE END IF * CALL PBDVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 20 CONTINUE * * if( LSAME( XDIST, 'C' ) ) then * ELSE NJUMP = NB * LCMP * DO 40 KK = 0, LCMP-1 IX = NINT * MOD( KK*LCMQ, LCMP ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBDVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 30 K = 2, ITER-1 CALL PBDVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 30 CONTINUE END IF * CALL PBDVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 40 CONTINUE END IF * 50 CONTINUE * RETURN * * End of PBDTRST1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbctrsrt.f0000640000175000017500000000451510363532303017566 0ustar muammarmuammar SUBROUTINE PBCTRSRT( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB, NINT COMPLEX BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBCTRSRT forms T <== A + beta * T, where T is a sorted * condensed block row (or column) from a block column (or row) of A * with sorting index ISRT * * ===================================================================== * * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Variables .. INTEGER JA, JB, K, KK, NJUMP * .. * .. External Subroutines .. EXTERNAL PBCMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN NJUMP = NB * LCMQ DO 20 K = 0, LCMQ-1 JA = NINT * MOD( K*LCMP, LCMQ ) + 1 JB = K * NB + 1 * DO 10 KK = 1, ICEIL( NINT, NB ) IF( N.LT.JB ) GO TO 20 CALL PBCMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1, JA), LDA, BETA, B(1, JB), LDB ) JA = JA + NB JB = JB + NJUMP 10 CONTINUE 20 CONTINUE * * if( LSAME( ADIST, 'C') ) then * ELSE NJUMP = NB * LCMP DO 40 K = 0, LCMP-1 JA = 1 JB = K * NB + 1 * DO 30 KK = 1, ICEIL( NINT, NB ) IF( M.LT.JB ) GO TO 40 CALL PBCMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA, N*MOD(K*LCMQ,LCMP)+1), LDA, BETA, $ B(JB, 1), LDB ) JA = JA + NB JB = JB + NJUMP 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of PBCTRSRT * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbctrst1.f0000640000175000017500000000646610363532303017474 0ustar muammarmuammar SUBROUTINE PBCTRST1( ICONTXT, XDIST, N, NB, NZ, X, INCX, BETA, Y, $ INCY, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 XDIST INTEGER ICONTXT, INCX, INCY, LCMP, LCMQ, N, NB, NINT, $ NZ COMPLEX BETA * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * PBCTRST1 forms y <== x + beta * y, where y is a sorted * condensed row (or column) vector from a column (or row) vector of x. * * ===================================================================== * * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Variables .. INTEGER ITER, IX, IY, K, KK, KZ, NJUMP * .. * .. External Subroutines .. EXTERNAL PBCVECADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX, MOD * .. * .. Executable Statements .. * ITER = ICEIL( NINT, NB ) KZ = NZ * IF( LSAME( XDIST, 'R' ) ) THEN NJUMP = NB * LCMQ * DO 20 KK = 0, LCMQ-1 IX = NINT * MOD( KK*LCMP, LCMQ ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBCVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 10 K = 2, ITER-1 CALL PBCVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 10 CONTINUE END IF * CALL PBCVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 20 CONTINUE * * if( LSAME( XDIST, 'C' ) ) then * ELSE NJUMP = NB * LCMP * DO 40 KK = 0, LCMP-1 IX = NINT * MOD( KK*LCMQ, LCMP ) IY = MAX( 0, NB*KK-NZ ) IF( N.LT.IY ) GO TO 50 * IF( ITER.GT.1 ) THEN CALL PBCVECADD( ICONTXT, 'G', NB-KZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - KZ IY = IY + NJUMP - KZ KZ = 0 * DO 30 K = 2, ITER-1 CALL PBCVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + NJUMP 30 CONTINUE END IF * CALL PBCVECADD( ICONTXT, 'G', MIN(NB-KZ,N-IY), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), $ INCY ) KZ = 0 40 CONTINUE END IF * 50 CONTINUE * RETURN * * End of PBCTRST1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbsvecadd.f0000640000175000017500000001544710363532303017664 0ustar muammarmuammar SUBROUTINE PBSVECADD( ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, $ INCY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, INCX, INCY, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL X( * ), Y( * ) * * .. * * Purpose * ======= * * PBSVECADD performs a vector X to be added to Y * Y := alpha*op(X) + beta*Y, * where alpha and beta are scalars, and X and Y are n vectors, * and op(X) = X**H if MODE = 'C', * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the transposed, or conjugate transposed vector X * to be added to the vector Y * = 'C': Conjugate vector X is added for complex data set. * Y = alpha * X**H + beta * Y * ELSE : Vector X is added. Y = alpha*X + beta*Y * if MODE = 'V', BLAS routine may be used. * * N (input) INTEGER * The number of elements of the vectors X and Y to be added. * N >= 0. * * ALPHA (input) REAL * ALPHA specifies the scalar alpha. * * X (input) REAL array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCX ) ) * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) REAL * BETA specifies the scalar beta. * * Y (input/output) REAL array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCY ) ) * On entry with BETA non-zero, the incremented array Y must * contain the vector Y. * On exit, Y is overwritten by the updated vector Y. * * INCY - (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * ===================================================================== * * .. * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0) * .. * .. Local Scalars .. INTEGER I, IX, IY * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL SSCAL, SCOPY, SAXPY * .. * .. Executable Statements .. * IF( N.LE.0 .OR. ( ALPHA.EQ.ZERO .AND. BETA.EQ.ONE ) ) RETURN * IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCY.EQ.1 ) THEN DO 10 I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IY = 1 DO 20 I = 1, N Y( IY ) = ZERO IY = IY + INCY 20 CONTINUE END IF * ELSE IF( LSAME( MODE, 'V' ) ) THEN CALL SSCAL( N, BETA, Y, INCY ) ELSE IF( INCY.EQ.1 ) THEN DO 30 I = 1, N Y( I ) = BETA * Y( I ) 30 CONTINUE ELSE IY = 1 DO 40 I = 1, N Y( IY ) = BETA * Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL SCOPY( N, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 50 I = 1, N Y( I ) = X( I ) 50 CONTINUE ELSE IX = 1 IY = 1 DO 60 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 60 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 70 I = 1, N Y( I ) = X( I ) + Y( I ) 70 CONTINUE ELSE IX = 1 IY = 1 DO 80 I = 1, N Y( IY ) = X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 80 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 90 I = 1, N Y( I ) = X( I ) + BETA * Y( I ) 90 CONTINUE ELSE IX = 1 IY = 1 DO 100 I = 1, N Y( IY ) = X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 100 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 110 I = 1, N Y( I ) = ALPHA * X( I ) 110 CONTINUE ELSE IX = 1 IY = 1 DO 120 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 120 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL SAXPY( N, ALPHA, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 130 I = 1, N Y( I ) = ALPHA * X( I ) + Y( I ) 130 CONTINUE ELSE IX = 1 IY = 1 DO 140 I = 1, N Y( IY ) = ALPHA * X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 140 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 150 I = 1, N Y( I ) = ALPHA * X( I ) + BETA * Y( I ) 150 CONTINUE ELSE IX = 1 IY = 1 DO 160 I = 1, N Y( IY ) = ALPHA * X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 160 CONTINUE END IF END IF END IF END IF * RETURN * * End of PBSVECADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbstrnv.f0000640000175000017500000006462110363532303017425 0ustar muammarmuammar SUBROUTINE PBSTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, $ WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 TRANS, XDIST INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL, $ IYROW, N, NB, NZ REAL BETA * .. * .. Array Arguments .. REAL WORK( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PBSTRNV transposes a column vector to row vector, or a row vector to * column vector by reallocating data distribution. * * Y := X' * * where X and Y are N vectors. * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * XDIST (input) CHARACTER*1 * XDIST specifies whether X is a column vector or a row vector, * * XDIST = 'C', X is a column vector (distributed columnwise) * XDIST = 'R', X is a row vector (distributed rowwise) * * TRANS (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the vectors X and Y are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * N (input) INTEGER * N specifies the (global) number of the vector X and the * vector Y. N >= 0. * * NB (input) INTEGER * NB specifies the block size of vectors X and Y. NB >= 0. * * NZ (input) INTEGER * NZ is the column offset to specify the column distance from * the beginning of the block to the first element of the * vector X, and the row offset to the first element of the * vector Y if XDIST = 'C'. * Otherwise, it is row offset to specify the row distance * from the beginning of the block to the first element of the * vector X, and the column offset to the first element of the * vector Y. 0 < NZ <= NB. * * X (input) REAL array of dimension at least * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'. * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) REAL * BETA specifies scaler beta. * * Y (input/output) REAL array of dimension at least * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or * The incremented array Y must contain the vector Y. * Y will not be referenced if beta is zero. * * INCY (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * IXROW (input) INTEGER * IXROW specifies a row of the process template, which holds * the first element of the vector X. If X is a row vector and * all rows of processes have a copy of X, then set IXROW = -1. * * IXCOL (input) INTEGER * IXCOL specifies a column of the process template, * which holds the first element of the vector X. If X is a * column block and all columns of processes have a copy of X, * then set IXCOL = -1. * * IYROW (input) INTEGER * IYROW specifies the current row process which holds the * first element of the vector Y, which is transposed of X. * If X is a column vector and the transposed row vector Y is * distributed all rows of processes, set IYROW = -1. * * IYCOL (input) INTEGER * IYCOL specifies the current column process which holds * the first element of the vector Y, which is transposed of Y. * If X is a row block and the transposed column vector Y is * distributed all columns of processes, set IYCOL = -1. * * WORK (workspace) REAL array of dimension Size(WORK). * It needs extra working space of x**T or x**H. * * Parameters Details * ================== * * Nx It is a local portion of N owned by a process, where x is * replaced by either p (=NPROW) or q (=NPCOL)). The value is * determined by N, NB, NZ, x, and MI, where NB is a block size, * NZ is a offset from the beginning of the block, and MI is a * row or column position in a process template. Nx is equal * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * NN = N + NZ * Npb = CEIL( NN, NB*NPROW ) * Nqb = CEIL( NN, NB*NPCOL ) * LCMP = LCM / NPROW * LCMQ = LCM / NPCOL * * (1) XDIST = 'C' * (a) IXCOL != -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * (b) IXCOL = -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB)) * * (2) XDIST = 'R' * (a) IXROW != -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * (b) IXROW = -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP) * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ) * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ, $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW, $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW, $ NQ, NQ0, NQ1 REAL TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL LSAME, ILCM, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSTR2A1, PBSTR2B1, PBSTRGET, $ PBSTRST1, PBSVECADD, PXERBLA, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( XDIST, 'C' ) ROWFORM = LSAME( XDIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( N .LT.0 ) THEN INFO = 4 ELSE IF( NB .LT.1 ) THEN INFO = 5 ELSE IF( NZ .LT.0 .OR. NZ.GE.NB ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR. $ ( IXROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR. $ ( IXCOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR. $ ( IYROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR. $ ( IYCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO.NE.0 ) THEN CALL PXERBLA( ICONTXT, 'PBSTRNV ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP NN = N + NZ * * When x is a column vector * IF( COLFORM ) THEN * * Form y <== x' ( x is a column vector ) * * || * || * _____________ || * -----(y)----- <== (x) * || * || * || * IF( IXROW.LT.0 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.0 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IXROW * MRCOL : column relative position in template from IYCOL * MRROW = MOD( NPROW+MYROW-IXROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) JYROW = IYROW IF( IYROW.EQ.-1 ) JYROW = IXROW * NP = NUMROC( NN, NB, MYROW, IXROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * * When a column process of IXCOL has a column block A, * IF( IXCOL .GE. 0 ) THEN TBETA = ZERO IF( MYROW.EQ.JYROW ) TBETA = BETA KZ = NZ * DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL ) IF( LCMQ.EQ.1 ) NQ0 = NUMROC( NN, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) CALL SGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ JYROW, MCCOL ) END IF * * A destination node receives the copied vector * ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY, $ MCROW, IXCOL ) ELSE CALL SGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ MCROW, IXCOL ) CALL PBSTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ*NB ) END IF END IF KZ = 0 20 CONTINUE * * Broadcast a row block of WORK in each column of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.JYROW ) THEN CALL SGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL SGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ JYROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) NQ0 = NQ * * Processors, which have diagonal blocks of X, copy them to * WORK array in transposed form * KZ = 0 IF( MRROW.EQ.0 ) KZ = NZ JZ = 0 IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ * DO 30 I = 0, LCMP - 1 IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMP, 1 ) ELSE CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) CALL PBSTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBSTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF CALL SGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL SGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL SGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL ) ELSE CALL SGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL ) CALL PBSVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1, $ BETA, Y, INCY ) END IF END IF * ELSE NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL SGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN CALL SGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL ) END IF * IF( MYROW.EQ.IYROW ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBSTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF END IF END IF END IF * * When x is a row vector * ELSE * * Form y <== x' ( x is a row block ) * * || * || * || _____________ * (y) <== -----(x)----- * || * || * || * IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.0 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.0 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IYROW * MRCOL : column relative position in template from IXCOL * MRROW = MOD( NPROW+MYROW-IYROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL ) JYCOL = IYCOL IF( IYCOL.EQ.-1 ) JYCOL = IXCOL * NP = NUMROC( NN, NB, MYROW, IYROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * * When a row process of IXROW has a row block A, * IF( IXROW .GE. 0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JYCOL ) TBETA = BETA KZ = NZ * DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW ) JDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * IDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ, LCMP ) * * The source node sends blocks to a destination node * ELSE CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) CALL SGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ MCROW, JYCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY, $ IXROW, MCCOL ) ELSE CALL SGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ IXROW, MCCOL ) CALL PBSTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP*NB ) END IF END IF KZ = 0 40 CONTINUE * * Broadcast a column vector Y in each row of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JYCOL ) THEN CALL SGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL SGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, JYCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * KZ = 0 IF( MRCOL.EQ.0 ) KZ = NZ JZ = 0 IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMQ, 1 ) ELSE CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-IYROW, NPROW ) CALL PBSTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBSTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF CALL SGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL SGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL SGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL ) ELSE CALL SGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL ) CALL PBSVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA, $ Y, INCY ) END IF END IF * ELSE NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL SGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN CALL SGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL ) END IF * IF( MYCOL.EQ.IYCOL ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBSTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF END IF END IF END IF END IF * RETURN * * End of PBSTRNV * END * *======================================================================= * SUBROUTINE PBSTR2A1 *======================================================================= * SUBROUTINE PBSTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, $ INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV REAL BETA * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x * y is a scattered vector, copied from a condensed vector x. * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBSVECADD * .. * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER * IX = 0 IY = 0 JZ = NZ ITER = ICEIL( N+NZ, INTV ) * IF( ITER.GT.1 ) THEN CALL PBSVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - JZ IY = IY + INTV - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBSVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + INTV 10 CONTINUE END IF * CALL PBSVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) * RETURN * * End of PBSTR2A1 * END * *======================================================================= * SUBROUTINE PBSTR2B1 *======================================================================= * SUBROUTINE PBSTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, $ INCY, JINX, JINY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY REAL BETA * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x + beta * y * y is a condensed vector, copied from a scattered vector x * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBSVECADD * .. * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER, LENX, LENY * IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN CALL PBSVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA, $ Y, INCY ) * ELSE IX = 0 IY = 0 JZ = NZ LENX = NB * JINX LENY = NB * JINY ITER = ICEIL( N+NZ, LENX ) * IF( ITER.GT.1 ) THEN CALL PBSVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX - JZ IY = IY + LENY - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBSVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX IY = IY + LENY 10 CONTINUE END IF * CALL PBSVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) END IF * RETURN * * End of PBSTR2B1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbstrget.f0000640000175000017500000001003310363532303017545 0ustar muammarmuammar SUBROUTINE PBSTRGET( ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, $ MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, IGD, LDA, M, MCCOL, MCROW, MNB, MYCOL, $ MYROW, N, NPCOL, NPROW * .. * .. Array Arguments .. REAL A( LDA, * ) * .. * * Purpose * ======= * * PBSTRGET forms a row block of A from scattered row subblocks if * ADIST = 'R', or forms a column block of A from scattered column * subblocks, if ADIST = 'C'. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Variables .. INTEGER KINT, KINT2, KLEN, KMOD, KPPOS, NLEN, NNUM, $ NTLEN REAL TEMP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * * if A is a row block, it needs to communicate columnwise. * IF( LSAME( ADIST, 'R' ) ) THEN KPPOS = MOD( NPROW+MYROW-MCROW, NPROW ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPROW/IGD, MNB-MCCOL ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPROW ) * 10 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL SGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MOD(MYROW+KINT, NPROW), MYCOL ) NLEN = NLEN + KLEN END IF ELSE CALL SGESD2D( ICONTXT, M, NLEN, A, LDA, $ MOD(NPROW+MYROW-KINT, NPROW), MYCOL ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 10 END IF END IF * * if A is a column block, it needs to communicate rowwise. * ELSE IF( LSAME( ADIST, 'C' ) ) THEN * KPPOS = MOD( NPCOL+MYCOL-MCCOL, NPCOL ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPCOL/IGD, MNB-MCROW ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPCOL ) * 20 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL SGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MYROW, MOD(MYCOL+KINT, NPCOL) ) NLEN = NLEN + KLEN END IF ELSE CALL SGESD2D( ICONTXT, M, NLEN, A, LDA, MYROW, $ MOD(NPCOL+MYCOL-KINT, NPCOL) ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 20 END IF END IF END IF * 30 CONTINUE * RETURN * * End of PBSTRGET * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbctrnv.f0000640000175000017500000006475010363532303017410 0ustar muammarmuammar SUBROUTINE PBCTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, $ WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 TRANS, XDIST INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL, $ IYROW, N, NB, NZ COMPLEX BETA * .. * .. Array Arguments .. COMPLEX WORK( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PBCTRNV transposes a column vector to row vector, or a row vector to * column vector by reallocating data distribution. * * Y := X' * * where X and Y are N vectors. * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * XDIST (input) CHARACTER*1 * XDIST specifies whether X is a column vector or a row vector, * * XDIST = 'C', X is a column vector (distributed columnwise) * XDIST = 'R', X is a row vector (distributed rowwise) * * TRANS (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the vectors X and Y are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * N (input) INTEGER * N specifies the (global) number of the vector X and the * vector Y. N >= 0. * * NB (input) INTEGER * NB specifies the block size of vectors X and Y. NB >= 0. * * NZ (input) INTEGER * NZ is the column offset to specify the column distance from * the beginning of the block to the first element of the * vector X, and the row offset to the first element of the * vector Y if XDIST = 'C'. * Otherwise, it is row offset to specify the row distance * from the beginning of the block to the first element of the * vector X, and the column offset to the first element of the * vector Y. 0 < NZ <= NB. * * X (input) COMPLEX array of dimension at least * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'. * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) COMPLEX * BETA specifies scaler beta. * * Y (input/output) COMPLEX array of dimension at least * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or * The incremented array Y must contain the vector Y. * Y will not be referenced if beta is zero. * * INCY (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * IXROW (input) INTEGER * IXROW specifies a row of the process template, which holds * the first element of the vector X. If X is a row vector and * all rows of processes have a copy of X, then set IXROW = -1. * * IXCOL (input) INTEGER * IXCOL specifies a column of the process template, * which holds the first element of the vector X. If X is a * column block and all columns of processes have a copy of X, * then set IXCOL = -1. * * IYROW (input) INTEGER * IYROW specifies the current row process which holds the * first element of the vector Y, which is transposed of X. * If X is a column vector and the transposed row vector Y is * distributed all rows of processes, set IYROW = -1. * * IYCOL (input) INTEGER * IYCOL specifies the current column process which holds * the first element of the vector Y, which is transposed of Y. * If X is a row block and the transposed column vector Y is * distributed all columns of processes, set IYCOL = -1. * * WORK (workspace) COMPLEX array of dimension Size(WORK). * It needs extra working space of x**T or x**H. * * Parameters Details * ================== * * Nx It is a local portion of N owned by a process, where x is * replaced by either p (=NPROW) or q (=NPCOL)). The value is * determined by N, NB, NZ, x, and MI, where NB is a block size, * NZ is a offset from the beginning of the block, and MI is a * row or column position in a process template. Nx is equal * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * NN = N + NZ * Npb = CEIL( NN, NB*NPROW ) * Nqb = CEIL( NN, NB*NPCOL ) * LCMP = LCM / NPROW * LCMQ = LCM / NPCOL * * (1) XDIST = 'C' * (a) IXCOL != -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * (b) IXCOL = -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB)) * * (2) XDIST = 'R' * (a) IXROW != -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * (b) IXROW = -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP) * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ) * * ===================================================================== * * .. Parameters .. COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ, $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW, $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW, $ NQ, NQ0, NQ1 COMPLEX TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL LSAME, ILCM, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CGERV2D, $ CGESD2D, PBCTR2A1, PBCTR2B1, PBCTRGET, $ PBCTRST1, PBCVECADD, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( XDIST, 'C' ) ROWFORM = LSAME( XDIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( N .LT.0 ) THEN INFO = 4 ELSE IF( NB .LT.1 ) THEN INFO = 5 ELSE IF( NZ .LT.0 .OR. NZ.GE.NB ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR. $ ( IXROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR. $ ( IXCOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR. $ ( IYROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR. $ ( IYCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO.NE.0 ) THEN CALL PXERBLA( ICONTXT, 'PBCTRNV ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP NN = N + NZ * * When x is a column vector * IF( COLFORM ) THEN * * Form y <== x' ( x is a column vector ) * * || * || * _____________ || * -----(y)----- <== (x) * || * || * || * IF( IXROW.LT.0 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.0 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IXROW * MRCOL : column relative position in template from IYCOL * MRROW = MOD( NPROW+MYROW-IXROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) JYROW = IYROW IF( IYROW.EQ.-1 ) JYROW = IXROW * NP = NUMROC( NN, NB, MYROW, IXROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * * When a column process of IXCOL has a column block A, * IF( IXCOL .GE. 0 ) THEN TBETA = ZERO IF( MYROW.EQ.JYROW ) TBETA = BETA KZ = NZ * DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL ) IF( LCMQ.EQ.1 ) NQ0 = NUMROC( NN, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBCTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBCTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) CALL CGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ JYROW, MCCOL ) END IF * * A destination node receives the copied vector * ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY, $ MCROW, IXCOL ) ELSE CALL CGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ MCROW, IXCOL ) CALL PBCTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ*NB ) END IF END IF KZ = 0 20 CONTINUE * * Broadcast a row block of WORK in each column of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.JYROW ) THEN CALL CGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL CGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ JYROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) NQ0 = NQ * * Processors, which have diagonal blocks of X, copy them to * WORK array in transposed form * KZ = 0 IF( MRROW.EQ.0 ) KZ = NZ JZ = 0 IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ * DO 30 I = 0, LCMP - 1 IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN CALL PBCTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMP, 1 ) ELSE CALL PBCTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) CALL PBCTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBCTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF CALL CGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL CGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL CGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL ) ELSE CALL CGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL ) CALL PBCVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1, $ BETA, Y, INCY ) END IF END IF * ELSE NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL CGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN CALL CGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL ) END IF * IF( MYROW.EQ.IYROW ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBCTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF END IF END IF END IF * * When x is a row vector * ELSE * * Form y <== x' ( x is a row block ) * * || * || * || _____________ * (y) <== -----(x)----- * || * || * || * IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.0 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.0 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IYROW * MRCOL : column relative position in template from IXCOL * MRROW = MOD( NPROW+MYROW-IYROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL ) JYCOL = IYCOL IF( IYCOL.EQ.-1 ) JYCOL = IXCOL * NP = NUMROC( NN, NB, MYROW, IYROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * * When a row process of IXROW has a row block A, * IF( IXROW .GE. 0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JYCOL ) TBETA = BETA KZ = NZ * DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW ) JDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * IDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN CALL PBCTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ, LCMP ) * * The source node sends blocks to a destination node * ELSE CALL PBCTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) CALL CGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ MCROW, JYCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY, $ IXROW, MCCOL ) ELSE CALL CGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ IXROW, MCCOL ) CALL PBCTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP*NB ) END IF END IF KZ = 0 40 CONTINUE * * Broadcast a column vector Y in each row of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JYCOL ) THEN CALL CGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL CGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, JYCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * KZ = 0 IF( MRCOL.EQ.0 ) KZ = NZ JZ = 0 IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN CALL PBCTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMQ, 1 ) ELSE CALL PBCTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-IYROW, NPROW ) CALL PBCTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBCTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF CALL CGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL CGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL CGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL CGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL ) ELSE CALL CGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL ) CALL PBCVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA, $ Y, INCY ) END IF END IF * ELSE NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL CGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN CALL CGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL ) END IF * IF( MYCOL.EQ.IYCOL ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBCTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF END IF END IF END IF END IF * RETURN * * End of PBCTRNV * END * *======================================================================= * SUBROUTINE PBCTR2A1 *======================================================================= * SUBROUTINE PBCTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, $ INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV COMPLEX BETA * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x * y is a scattered vector, copied from a condensed vector x. * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBCVECADD * .. * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER * IX = 0 IY = 0 JZ = NZ ITER = ICEIL( N+NZ, INTV ) * IF( ITER.GT.1 ) THEN CALL PBCVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - JZ IY = IY + INTV - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBCVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + INTV 10 CONTINUE END IF * CALL PBCVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) * RETURN * * End of PBCTR2A1 * END * *======================================================================= * SUBROUTINE PBCTR2B1 *======================================================================= * SUBROUTINE PBCTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, $ INCY, JINX, JINY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY COMPLEX BETA * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x + beta * y * y is a condensed vector, copied from a scattered vector x * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBCVECADD * .. * .. Parameters .. COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER, LENX, LENY * IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN CALL PBCVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA, $ Y, INCY ) * ELSE IX = 0 IY = 0 JZ = NZ LENX = NB * JINX LENY = NB * JINY ITER = ICEIL( N+NZ, LENX ) * IF( ITER.GT.1 ) THEN CALL PBCVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX - JZ IY = IY + LENY - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBCVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX IY = IY + LENY 10 CONTINUE END IF * CALL PBCVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) END IF * RETURN * * End of PBCTR2B1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbzmatadd.f0000640000175000017500000003413010363532303017665 0ustar muammarmuammar SUBROUTINE PBZMATADD( ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, $ LDB ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, LDA, LDB, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBZMATADD performs the matrix add operation B := alpha*A + beta*B, * where alpha and beta are scalars, and A and B are m-by-n * upper/lower trapezoidal matrices, or rectangular matrices. * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the part of the matrix A, or (conjugate) transposed * matrix A to be added to the matrix B, * = 'U': Upper triangular part * up(B) = alpha*up(A) + beta*up(B) * = 'L': Lower triangular part * lo(B) = alpha*lo(A) + beta*lo(B) * = 'T': Transposed matrix A * B = alpha*A**T + beta*B * = 'C': Conjugate transposed matrix A * B = alpha*A**H + beta*B * Otherwise: B = alpha*A + beta*B * if M = LDA = LDB: use one BLAS loop * if MODE = 'V' : columnwise copy using BLAS if possible * else : use double loops * * M (input) INTEGER * M specifies the number of columns of the matrix A if * MODE != 'T'/'C', and it specifies the number of rows of the * matrix A otherwise. It also specifies the number of rows of * the matrix B. M >= 0. * * N (input) INTEGER * N specifies the number of rows of the matrix A if * MODE != 'T'/'C', and it specifies the number of columns of * the matrix A otherwise. It also specifies the number of * columns of the matrix B. N >= 0. * * ALPHA (input) COMPLEX*16 * ALPHA specifies the scalar alpha. * * A (input) COMPLEX*16 array, dimension (LDA,N) * The m by n matrix A if MODE != 'T'/'C'. * If MODE = 'U', only the upper triangle or trapezoid is * accessed; if MODE = 'L', only the lower triangle or * trapezoid is accessed. Otherwise all m-by-n data matrix * is accessed. * And the n by m matrix A if MODE = 'T'/'C'. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M) if * MODE != 'T'/'C'. And LDA >= max(1,N) if MODE = 'T'/'C'. * * BETA (input) COMPLEX*16 * BETA specifies the scalar beta. * * B (input) COMPLEX*16 array, dimension (LDB,N) * On exit, B = alpha*A + beta*B * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,M). * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL ZSCAL, ZCOPY, ZAXPY * .. * .. Intrinsic Functions .. INTRINSIC MIN, DCONJG * .. * .. Executable Statements .. * IF( M.LE.0 .OR. N.LE.0 .OR. ( ALPHA.EQ.ZERO.AND.BETA.EQ.ONE ) ) $ RETURN * * A is upper triangular or upper trapezoidal, * IF( LSAME( MODE, 'U' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( I, J ) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, MIN( J, M ) B( I, J ) = BETA * B( I, J ) 30 CONTINUE 40 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 60 J = 1, N DO 50 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) 50 CONTINUE 60 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 80 J = 1, N DO 70 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + B( I, J ) 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 120 J = 1, N DO 110 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) 110 CONTINUE 120 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 140 J = 1, N DO 130 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 130 CONTINUE 140 CONTINUE ELSE DO 160 J = 1, N DO 150 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * * A is lower triangular or upper trapezoidal, * ELSE IF( LSAME( MODE, 'L' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, N DO 170 I = J, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE ELSE DO 200 J = 1, N DO 190 I = J, M B( I, J ) = BETA * B( I, J ) 190 CONTINUE 200 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 220 J = 1, N DO 210 I = J, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 240 J = 1, N DO 230 I = J, M B( I, J ) = A( I, J ) + B( I, J ) 230 CONTINUE 240 CONTINUE ELSE DO 260 J = 1, N DO 250 I = J, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 280 J = 1, N DO 270 I = J, M B( I, J ) = ALPHA * A( I, J ) 270 CONTINUE 280 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 300 J = 1, N DO 290 I = J, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 290 CONTINUE 300 CONTINUE ELSE DO 320 J = 1, N DO 310 I = J, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF * * If MODE = 'Transpose' * ELSE IF( LSAME( MODE, 'T' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 340 J = 1, N DO 330 I = 1, M B( I, J ) = ZERO 330 CONTINUE 340 CONTINUE ELSE DO 360 J = 1, N DO 350 I = 1, M B( I, J ) = BETA * B( I, J ) 350 CONTINUE 360 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 380 J = 1, N DO 370 I = 1, M B( I, J ) = A( J, I ) 370 CONTINUE 380 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 400 J = 1, N DO 390 I = 1, M B( I, J ) = A( J, I ) + B( I, J ) 390 CONTINUE 400 CONTINUE ELSE DO 420 J = 1, N DO 410 I = 1, M B( I, J ) = A( J, I ) + BETA * B( I, J ) 410 CONTINUE 420 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 440 J = 1, N DO 430 I = 1, M B( I, J ) = ALPHA * A( J, I ) 430 CONTINUE 440 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 460 J = 1, N DO 450 I = 1, M B( I, J ) = ALPHA * A( J, I ) + B( I, J ) 450 CONTINUE 460 CONTINUE ELSE DO 480 J = 1, N DO 470 I = 1, M B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 470 CONTINUE 480 CONTINUE END IF END IF * * If MODE = 'Conjugate', * ELSE IF( LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 500 J = 1, N DO 490 I = 1, M B( I, J ) = ZERO 490 CONTINUE 500 CONTINUE ELSE DO 520 J = 1, N DO 510 I = 1, M B( I, J ) = BETA * B( I, J ) 510 CONTINUE 520 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 540 J = 1, N DO 530 I = 1, M B( I, J ) = DCONJG( A( J, I ) ) 530 CONTINUE 540 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 560 J = 1, N DO 550 I = 1, M B( I, J ) = DCONJG( A( J, I ) ) + B( I, J ) 550 CONTINUE 560 CONTINUE ELSE DO 580 J = 1, N DO 570 I = 1, M B( I, J ) = DCONJG( A( J, I ) ) + BETA * B( I, J ) 570 CONTINUE 580 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 600 J = 1, N DO 590 I = 1, M B( I, J ) = ALPHA * DCONJG( A( J, I ) ) 590 CONTINUE 600 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 620 J = 1, N DO 610 I = 1, M B( I, J ) = ALPHA * DCONJG( A( J, I ) ) + B( I, J ) 610 CONTINUE 620 CONTINUE ELSE DO 640 J = 1, N DO 630 I = 1, M B( I, J ) = ALPHA * DCONJG( A( J, I ) ) $ + BETA * B( I, J ) 630 CONTINUE 640 CONTINUE END IF END IF * * Other cases (for genral matrix additions) * ELSE IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 660 J = 1, N DO 650 I = 1, M B( I, J ) = ZERO 650 CONTINUE 660 CONTINUE * ELSE IF( M.EQ.LDB ) THEN CALL ZSCAL( M*N, BETA, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 670 J = 1, N CALL ZSCAL( M, BETA, B( 1, J ), 1 ) 670 CONTINUE ELSE DO 690 J = 1, N DO 680 I = 1, M B( I, J ) = BETA * B( I, J ) 680 CONTINUE 690 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL ZCOPY( M*N, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 700 J = 1, N CALL ZCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) 700 CONTINUE ELSE DO 720 J = 1, N DO 710 I = 1, M B( I, J ) = A( I, J ) 710 CONTINUE 720 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN DO 740 J = 1, N DO 730 I = 1, M B( I, J ) = A( I, J ) + B( I, J ) 730 CONTINUE 740 CONTINUE * ELSE DO 760 J = 1, N DO 750 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 750 CONTINUE 760 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 780 J = 1, N DO 770 I = 1, M B( I, J ) = ALPHA * A( I, J ) 770 CONTINUE 780 CONTINUE * ELSE IF( BETA.EQ.ONE ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL ZAXPY( M*N, ALPHA, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 790 J = 1, N CALL ZAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) 790 CONTINUE ELSE DO 810 J = 1, N DO 800 I = 1, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 800 CONTINUE 810 CONTINUE END IF * ELSE DO 830 J = 1, N DO 820 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 820 CONTINUE 830 CONTINUE END IF END IF END IF * RETURN * * End of PBZMATADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbstran.f0000640000175000017500000007103510363532303017375 0ustar muammarmuammar SUBROUTINE PBSTRAN( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, $ C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC, $ M, N, NB REAL BETA * .. * .. Array Arguments .. REAL A( LDA, * ), C( LDC, * ), WORK( * ) * .. * * Purpose * ======= * * PBSTRAN transposes a column block to row block, or a row block to * column block by reallocating data distribution. * * C := A^T + beta*C, or C := A^C + beta*C * * where A is an M-by-N matrix and C is an N-by-M matrix, and the size * of M or N is limited to its block size NB. * * The first elements of the matrices A, and C should be located at * the beginnings of their first blocks. (not the middle of the blocks.) * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * ADIST - (input) CHARACTER*1 * ADIST specifies whether A is a column block or a row block. * * ADIST = 'C', A is a column block * ADIST = 'R', A is a row block * * TRANS - (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the matrices A and C are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * M - (input) INTEGER * M specifies the (global) number of rows of the matrix (block * column or block row) A and of columns of the matrix C. * M >= 0. * * N - (input) INTEGER * N specifies the (global) number of columns of the matrix * (block column or block row) A and of columns of the matrix * C. N >= 0. * * NB - (input) INTEGER * NB specifies the column block size of the matrix A and the * row block size of the matrix C when ADIST = 'C'. Otherwise, * it specifies the row block size of the matrix A and the * column block size of the matrix C. NB >= 1. * * A (input) REAL array of DIMENSION ( LDA, Lx ), * where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'. * Before entry with ADIST = 'C', the leading Mp by N part of * the array A must contain the matrix A, otherwise the leading * M by Nq part of the array A must contain the matrix A. See * parameter details for the values of Mp and Nq. * * LDA (input) INTEGER * LDA specifies the leading dimension of (local) A as declared * in the calling (sub) program. LDA >= MAX(1,Mp) when * ADIST = 'C', or LDA >= MAX(1,M) otherwise. * * BETA (input) REAL * BETA specifies scaler beta. * * C (input/output) REAL array of DIMENSION ( LDC, Lx ), * where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'. * If ADIST = 'C', the leading N-by-Mq part of the array C * contains the (local) matrix C, otherwise the leading * Np-by-M part of the array C must contain the (local) matrix * C. C will not be referenced if beta is zero. * * LDC (input) INTEGER * LDC specifies the leading dimension of (local) C as declared * in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C', * or LDC >= MAX(1,Np) otherwise. * * IAROW (input) INTEGER * IAROW specifies a row of the process template, * which holds the first block of the matrix A. If A is a row * of blocks (ADIST = 'R') and all rows of processes have a copy * of A, then set IAROW = -1. * * IACOL (input) INTEGER * IACOL specifies a column of the process template, * which holds the first block of the matrix A. If A is a * column of blocks (ADIST = 'C') and all columns of processes * have a copy of A, then set IACOL = -1. * * ICROW (input) INTEGER * ICROW specifies the current row process which holds * the first block of the matrix C, which is transposed of A. * If C is a row of blocks (ADIST = 'C') and the transposed * row block C is distributed all rows of processes, set * ICROW = -1. * * ICCOL (input) INTEGER * ICCOL specifies the current column process which holds * the first block of the matrix C, which is transposed of A. * If C is a column of blocks (ADIST = 'R') and the transposed * column block C is distributed all columns of processes, * set ICCOL = -1. * * WORK (workspace) REAL array of dimension Size(WORK). * It needs extra working space of A'. * * Parameters Details * ================== * * Lx It is a local portion of L owned by a process, (L is * replaced by M, or N, and x is replaced by either p (=NPROW) * or q (=NPCOL)). The value is determined by L, LB, x, and * MI, where LB is a block size and MI is a row or column * position in a process template. Lx is equal to or less * than Lx0 = CEIL( L, LB*x ) * LB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * Mqb = CEIL( M, NB*NPCOL ) * Npb = CEIL( N, NB*NPROW ) * LCMQ = LCM / NPCOL * LCMP = LCM / NPROW * * (1) ADIST = 'C' * (a) IACOL != -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * (b) IACOL = -1 * Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB)) * * (2) ADIST = 'R' * (a) IAROW != -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * (b) IAROW = -1 * Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ ) * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP ) * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM, $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0, $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL, $ NPROW, NQ REAL TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL ILCM, ICEIL, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PBSMATADD, PBSTR2AF, PBSTR2AT, $ PBSTR2BT, PBSTRGET, PBSTRSRT, PXERBLA, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( M.EQ.0 .OR. N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( ADIST, 'C' ) ROWFORM = LSAME( ADIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( M .LT.0 ) THEN INFO = 4 ELSE IF( N .LT.0 ) THEN INFO = 5 ELSE IF( NB.LT.1 ) THEN INFO = 6 ELSE IF( IAROW.LT.-1 .OR. IAROW.GE.NPROW .OR. $ ( IAROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IACOL.LT.-1 .OR. IACOL.GE.NPCOL .OR. $ ( IACOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( ICROW.LT.-1 .OR. ICROW.GE.NPROW .OR. $ ( ICROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( ICCOL.LT.-1 .OR. ICCOL.GE.NPCOL .OR. $ ( ICCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO .NE. 0 ) THEN CALL PXERBLA( ICONTXT, 'PBSTRAN ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP * * When A is a column block * IF( COLFORM ) THEN * * Form C <== A' ( A is a column block ) * _ * | | * | | * _____________ | | * |______C______| <== |A| * | | * | | * |_| * * MRROW : row relative position in template from IAROW * MRCOL : column relative position in template from ICCOL * MRROW = MOD( NPROW+MYROW-IAROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) JCROW = ICROW IF( ICROW.EQ.-1 ) JCROW = IAROW * MP = NUMROC( M, NB, MYROW, IAROW, NPROW ) MQ = NUMROC( M, NB, MYCOL, ICCOL, NPCOL ) MQ0 = NUMROC( NUMROC(M, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * IF( LDA.LT.MP .AND. $ ( IACOL.EQ.MYCOL .OR. IACOL.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.N .AND. $ ( ICROW.EQ.MYROW .OR. ICROW.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a column process of IACOL has a column block A, * IF( IACOL.GE.0 ) THEN TBETA = ZERO IF( MYROW.EQ.JCROW ) TBETA = BETA * DO 20 I = 0, MIN( LCM, ICEIL(M,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IAROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + ICCOL, NPCOL ) IF( LCMQ.EQ.1 ) MQ0 = NUMROC( M, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IACOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBSTR2AT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB, $ A(IDEX+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) CALL SGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL ) ELSE CALL SGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL ) CALL PBSTR2AF( ICONTXT, 'Row', N, MQ-JDEX, NB, WORK, N, $ TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ, $ MQ0 ) END IF END IF 20 CONTINUE * * Broadcast a row block of C in each column of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.JCROW ) THEN CALL SGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL SGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ JCROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) MQ0 = MQ * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 30 I = 0, LCMP-1 IF( MRCOL.EQ.MOD( NPROW*I+MRROW, NPCOL ) ) THEN IF( LCMQ.EQ.1.AND.(ICROW.EQ.-1.OR.ICROW.EQ.MYROW) ) THEN CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, BETA, C, LDC, $ LCMP*NB ) ELSE CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB, $ A(I*NB+1,1), LDA, ZERO, WORK, N, $ LCMP*NB ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL ) CALL PBSTRGET( ICONTXT, 'Row', N, MQ0, ICEIL(M,NB), WORK, N, $ MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( ICROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) $ CALL PBSTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) CALL SGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC ) ELSE CALL SGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL SGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL ) ELSE CALL SGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL ) CALL PBSMATADD( ICONTXT, 'G', N, MQ, ONE, WORK, N, $ BETA, C, LDC ) END IF END IF * ELSE ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.ICROW ) $ CALL SGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL ) ELSE IF( MYROW.EQ.ICROW ) THEN CALL SGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL ) END IF * IF( MYROW.EQ.ICROW ) $ CALL PBSTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA, $ C, LDC, LCMP, LCMQ, MQ0 ) END IF END IF * END IF * * When A is a row block * ELSE * * Form C <== A' ( A is a row block ) * _ * | | * | | * | | _____________ * |C| <== |______A______| * | | * | | * |_| * * MRROW : row relative position in template from ICROW * MRCOL : column relative position in template from IACOL * MRROW = MOD( NPROW+MYROW-ICROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL ) JCCOL = ICCOL IF( ICCOL.EQ.-1 ) JCCOL = IACOL * NP = NUMROC( N, NB, MYROW, ICROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * IF( LDA.LT.M .AND. $ ( IAROW.EQ.MYROW .OR. IAROW.EQ.-1 ) ) THEN INFO = 8 ELSE IF( LDC.LT.NP .AND. $ ( ICCOL.EQ.MYCOL .OR. ICCOL.EQ.-1 ) ) THEN INFO = 11 END IF IF( INFO.NE.0 ) GO TO 10 * * When a row process of IAROW has a row block A, * IF( IAROW.GE.0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JCCOL ) TBETA = BETA * DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( N, NB, I, 0, NPROW ) IDEX = (I/NPROW) * NB * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * JDEX = (I/NPCOL) * NB IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN CALL PBSTR2AT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1), $ LDC, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB, $ A(1,JDEX+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) CALL SGESD2D( ICONTXT, NP0, M, WORK, NP0, $ MCROW, JCCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL ) ELSE CALL SGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL ) CALL PBSTR2AF( ICONTXT, 'Col', NP-IDEX, M, NB, WORK, $ NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ, $ NP0 ) END IF END IF 40 CONTINUE * * Broadcast a column block of WORK in each row of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JCCOL ) THEN CALL SGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL SGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, JCCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IF( LCMP.EQ.1.AND.(ICCOL.EQ.-1.OR.ICCOL.EQ.MYCOL) ) THEN CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, BETA, C, LDC, $ LCMQ*NB ) ELSE CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB, $ A(1,I*NB+1), LDA, ZERO, WORK, NP0, $ LCMQ*NB ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-ICROW, NPROW ) CALL PBSTRGET( ICONTXT, 'Col', NP0, M, ICEIL(N,NB), WORK, $ NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW, $ NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( ICCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) $ CALL PBSTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) CALL SGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC ) ELSE CALL SGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL SGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL SGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL ) ELSE CALL SGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL ) CALL PBSMATADD( ICONTXT, 'G', NP, M, ONE, WORK, NP, $ BETA, C, LDC ) END IF END IF * ELSE ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.ICCOL ) $ CALL SGESD2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, ICCOL ) ELSE IF( MYCOL.EQ.ICCOL ) THEN CALL SGERV2D( ICONTXT, NP0, ML, WORK, NP0, $ MYROW, MCCOL ) END IF * IF( MYCOL.EQ.ICCOL ) $ CALL PBSTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0, $ BETA, C, LDC, LCMP, LCMQ, NP0 ) END IF END IF * END IF END IF * RETURN * * End of PBSTRAN * END * *======================================================================= * SUBROUTINE PBSTR2AT *======================================================================= * SUBROUTINE PBSTR2AT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, LCMP, LCMQ ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB REAL BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBSTR2AT forms B <== A^T + beta*B, or A^C + beta*B * B is a ((conjugate) transposed) scattered block row (or column), * copied from a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER IA, IB, K, INTV, JNTV * .. * .. External Subroutines .. EXTERNAL PBSMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( LCMP.EQ.LCMQ ) THEN CALL PBSMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBSMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + JNTV 10 CONTINUE * * If A is a row block ( ADIST = 'R' ), * ELSE INTV = LCMP * NB JNTV = LCMQ * NB IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, JNTV ) CALL PBSMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + JNTV IB = IB + INTV 20 CONTINUE END IF END IF * RETURN * * End of PBSTR2AT * END * *======================================================================= * SUBROUTINE PBSTR2BT *======================================================================= * SUBROUTINE PBSTR2BT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, $ BETA, B, LDB, INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST, TRANS INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB REAL BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBSTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a * ((conjugate) transposed) condensed block row (or column), copied from * a scattered block column (or row) of A * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER IA, IB, K * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. External Subroutines .. EXTERNAL PBSMATADD * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Excutable Statements .. * IF( INTV.EQ.NB ) THEN CALL PBSMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B, $ LDB ) * ELSE * * If A is a column block ( ADIST = 'C' ), * IF( LSAME( ADIST, 'C' ) ) THEN IA = 1 IB = 1 DO 10 K = 1, ICEIL( M, INTV ) CALL PBSMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ), $ ONE, A(IA,1), LDA, BETA, B(1,IB), LDB ) IA = IA + INTV IB = IB + NB 10 CONTINUE * * If A is a row block (ADIST = 'R'), * ELSE IA = 1 IB = 1 DO 20 K = 1, ICEIL( N, INTV ) CALL PBSMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M, $ ONE, A(1,IA), LDA, BETA, B(IB,1), LDB ) IA = IA + INTV IB = IB + NB 20 CONTINUE END IF END IF * RETURN * * End of PBSTR2BT * END * *======================================================================= * SUBROUTINE PBSTR2AF *======================================================================= * SUBROUTINE PBSTR2AF( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT REAL BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBSTR2AF forms T <== A + BETA*T, where T is a scattered block * row (or column) copied from a (condensed) block column (or row) of A * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER JA, JB, K, INTV * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL LSAME, ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN INTV = NB * LCMQ JA = 1 JB = 1 DO 10 K = 1, ICEIL( NINT, NB ) CALL PBSMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1,JA), LDA, BETA, B(1,JB), LDB ) JA = JA + NB JB = JB + INTV 10 CONTINUE * * if( LSAME( ADIST, 'C' ) ) then * ELSE INTV = NB * LCMP JA = 1 JB = 1 DO 20 K = 1, ICEIL( NINT, NB ) CALL PBSMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA,1), LDA, BETA, B(JB,1), LDB ) JA = JA + NB JB = JB + INTV 20 CONTINUE END IF * RETURN * * End of PBSTR2AF * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbstrsrt.f0000640000175000017500000000450110363532303017601 0ustar muammarmuammar SUBROUTINE PBSTRSRT( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB, NINT REAL BETA * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBSTRSRT forms T <== A + beta * T, where T is a sorted * condensed block row (or column) from a block column (or row) of A * with sorting index ISRT * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Variables .. INTEGER JA, JB, K, KK, NJUMP * .. * .. External Subroutines .. EXTERNAL PBSMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN NJUMP = NB * LCMQ DO 20 K = 0, LCMQ-1 JA = NINT * MOD( K*LCMP, LCMQ ) + 1 JB = K * NB + 1 * DO 10 KK = 1, ICEIL( NINT, NB ) IF( N.LT.JB ) GO TO 20 CALL PBSMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1, JA), LDA, BETA, B(1, JB), LDB ) JA = JA + NB JB = JB + NJUMP 10 CONTINUE 20 CONTINUE * * if( LSAME( ADIST, 'C') ) then * ELSE NJUMP = NB * LCMP DO 40 K = 0, LCMP-1 JA = 1 JB = K * NB + 1 * DO 30 KK = 1, ICEIL( NINT, NB ) IF( M.LT.JB ) GO TO 40 CALL PBSMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA, N*MOD(K*LCMQ,LCMP)+1), LDA, BETA, $ B(JB, 1), LDB ) JA = JA + NB JB = JB + NJUMP 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of PBSTRSRT * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdtrnv.f0000640000175000017500000006470110363532303017405 0ustar muammarmuammar SUBROUTINE PBDTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, $ WORK ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * Jaeyoung Choi, Oak Ridge National Laboratory * Jack Dongarra, University of Tennessee and Oak Ridge National Lab. * David Walker, Oak Ridge National Laboratory * * .. Scalar Arguments .. CHARACTER*1 TRANS, XDIST INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL, $ IYROW, N, NB, NZ DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION WORK( * ), X( * ), Y( * ) * .. * * Purpose * ======= * * PBDTRNV transposes a column vector to row vector, or a row vector to * column vector by reallocating data distribution. * * Y := X' * * where X and Y are N vectors. * * Parameters * ========== * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * XDIST (input) CHARACTER*1 * XDIST specifies whether X is a column vector or a row vector, * * XDIST = 'C', X is a column vector (distributed columnwise) * XDIST = 'R', X is a row vector (distributed rowwise) * * TRANS (input) CHARACTER*1 * TRANS specifies whether the transposed format is transpose * or conjugate transpose. If the vectors X and Y are real, * the argument is ignored. * * TRANS = 'T', transpose * TRANS = 'C', conjugate transpose * * N (input) INTEGER * N specifies the (global) number of the vector X and the * vector Y. N >= 0. * * NB (input) INTEGER * NB specifies the block size of vectors X and Y. NB >= 0. * * NZ (input) INTEGER * NZ is the column offset to specify the column distance from * the beginning of the block to the first element of the * vector X, and the row offset to the first element of the * vector Y if XDIST = 'C'. * Otherwise, it is row offset to specify the row distance * from the beginning of the block to the first element of the * vector X, and the column offset to the first element of the * vector Y. 0 < NZ <= NB. * * X (input) DOUBLE PRECISION array of dimension at least * ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or * ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'. * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) DOUBLE PRECISION * BETA specifies scaler beta. * * Y (input/output) DOUBLE PRECISION array of dimension at least * ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or * ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or * The incremented array Y must contain the vector Y. * Y will not be referenced if beta is zero. * * INCY (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * IXROW (input) INTEGER * IXROW specifies a row of the process template, which holds * the first element of the vector X. If X is a row vector and * all rows of processes have a copy of X, then set IXROW = -1. * * IXCOL (input) INTEGER * IXCOL specifies a column of the process template, * which holds the first element of the vector X. If X is a * column block and all columns of processes have a copy of X, * then set IXCOL = -1. * * IYROW (input) INTEGER * IYROW specifies the current row process which holds the * first element of the vector Y, which is transposed of X. * If X is a column vector and the transposed row vector Y is * distributed all rows of processes, set IYROW = -1. * * IYCOL (input) INTEGER * IYCOL specifies the current column process which holds * the first element of the vector Y, which is transposed of Y. * If X is a row block and the transposed column vector Y is * distributed all columns of processes, set IYCOL = -1. * * WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK). * It needs extra working space of x**T or x**H. * * Parameters Details * ================== * * Nx It is a local portion of N owned by a process, where x is * replaced by either p (=NPROW) or q (=NPCOL)). The value is * determined by N, NB, NZ, x, and MI, where NB is a block size, * NZ is a offset from the beginning of the block, and MI is a * row or column position in a process template. Nx is equal * to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB. * * Communication Scheme * ==================== * * The communication scheme of the routine is set to '1-tree', which is * fan-out. (For details, see BLACS user's guide.) * * Memory Requirement of WORK * ========================== * * NN = N + NZ * Npb = CEIL( NN, NB*NPROW ) * Nqb = CEIL( NN, NB*NPCOL ) * LCMP = LCM / NPROW * LCMQ = LCM / NPCOL * * (1) XDIST = 'C' * (a) IXCOL != -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * (b) IXCOL = -1 * Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB)) * * (2) XDIST = 'R' * (a) IXROW != -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * (b) IXROW = -1 * Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB)) * * Notes * ----- * More precise space can be computed as * * CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP) * CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ) * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLFORM, ROWFORM INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ, $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW, $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW, $ NQ, NQ0, NQ1 DOUBLE PRECISION TBETA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, ICEIL, NUMROC EXTERNAL LSAME, ILCM, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D, $ DGESD2D, PBDTR2A1, PBDTR2B1, PBDTRGET, $ PBDTRST1, PBDVECADD, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible. * IF( N.EQ.0 ) RETURN * CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL ) * COLFORM = LSAME( XDIST, 'C' ) ROWFORM = LSAME( XDIST, 'R' ) * * Test the input parameters. * INFO = 0 IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN INFO = 2 ELSE IF( N .LT.0 ) THEN INFO = 4 ELSE IF( NB .LT.1 ) THEN INFO = 5 ELSE IF( NZ .LT.0 .OR. NZ.GE.NB ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 11 ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR. $ ( IXROW.EQ.-1 .AND. COLFORM ) ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR. $ ( IXCOL.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR. $ ( IYROW.EQ.-1 .AND. ROWFORM ) ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR. $ ( IYCOL.EQ.-1 .AND. COLFORM ) ) THEN INFO = 15 END IF * 10 CONTINUE IF( INFO.NE.0 ) THEN CALL PXERBLA( ICONTXT, 'PBDTRNV ', INFO ) RETURN END IF * * Start the operations. * * LCM : the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LCMQ = LCM / NPCOL IGD = NPCOL / LCMP NN = N + NZ * * When x is a column vector * IF( COLFORM ) THEN * * Form y <== x' ( x is a column vector ) * * || * || * _____________ || * -----(y)----- <== (x) * || * || * || * IF( IXROW.LT.0 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.0 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IXROW * MRCOL : column relative position in template from IYCOL * MRROW = MOD( NPROW+MYROW-IXROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) JYROW = IYROW IF( IYROW.EQ.-1 ) JYROW = IXROW * NP = NUMROC( NN, NB, MYROW, IXROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ ) * * When a column process of IXCOL has a column block A, * IF( IXCOL .GE. 0 ) THEN TBETA = ZERO IF( MYROW.EQ.JYROW ) TBETA = BETA KZ = NZ * DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL ) IF( LCMQ.EQ.1 ) NQ0 = NUMROC( NN, NB, I, 0, NPCOL ) JDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN * * The source node is a destination node * IDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP, LCMQ ) * * The source node sends blocks to a destination node * ELSE CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) CALL DGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ JYROW, MCCOL ) END IF * * A destination node receives the copied vector * ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY, $ MCROW, IXCOL ) ELSE CALL DGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1, $ MCROW, IXCOL ) CALL PBDTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ*NB ) END IF END IF KZ = 0 20 CONTINUE * * Broadcast a row block of WORK in each column of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.JYROW ) THEN CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ JYROW, MYCOL ) END IF END IF * * When all column procesors have a copy of the column block A, * ELSE IF( LCMQ.EQ.1 ) NQ0 = NQ * * Processors, which have diagonal blocks of X, copy them to * WORK array in transposed form * KZ = 0 IF( MRROW.EQ.0 ) KZ = NZ JZ = 0 IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ * DO 30 I = 0, LCMP - 1 IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMP, 1 ) ELSE CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMP, 1 ) END IF END IF 30 CONTINUE * * Get diagonal blocks of A for each column of the template * MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW ) IF( LCMQ.GT.1 ) THEN MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL ) CALL PBDTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a row block of WORK in every row of template * IF( IYROW.EQ.-1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( LCMQ.GT.1 ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY ) ELSE CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY, $ MCROW, MYCOL ) END IF * * Send a row block of WORK to the destination row * ELSE IF( LCMQ.EQ.1 ) THEN IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL DGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN IF( BETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL ) ELSE CALL DGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL ) CALL PBDVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1, $ BETA, Y, INCY ) END IF END IF * ELSE NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) ) IF( MYROW.EQ.MCROW ) THEN IF( MYROW.NE.IYROW ) $ CALL DGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL ) ELSE IF( MYROW.EQ.IYROW ) THEN CALL DGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL ) END IF * IF( MYROW.EQ.IYROW ) THEN KZ = 0 IF( MYCOL.EQ.IYCOL ) KZ = NZ CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NQ0 ) END IF END IF END IF END IF * * When x is a row vector * ELSE * * Form y <== x' ( x is a row block ) * * || * || * || _____________ * (y) <== -----(x)----- * || * || * || * IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN INFO = 12 ELSE IF( IXCOL.LT.0 .OR. IXCOL.GE.NPCOL ) THEN INFO = 13 ELSE IF( IYROW.LT.0 .OR. IYROW.GE.NPROW ) THEN INFO = 14 ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN INFO = 15 END IF IF( INFO.NE.0 ) GO TO 10 * * MRROW : row relative position in template from IYROW * MRCOL : column relative position in template from IXCOL * MRROW = MOD( NPROW+MYROW-IYROW, NPROW ) MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL ) JYCOL = IYCOL IF( IYCOL.EQ.-1 ) JYCOL = IXCOL * NP = NUMROC( NN, NB, MYROW, IYROW, NPROW ) IF( MRROW.EQ.0 ) NP = NP - NZ NQ = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL ) IF( MRCOL.EQ.0 ) NQ = NQ - NZ NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP ) * * When a row process of IXROW has a row block A, * IF( IXROW .GE. 0 ) THEN TBETA = ZERO IF( MYCOL.EQ.JYCOL ) TBETA = BETA KZ = NZ * DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1 MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW ) MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL ) IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW ) JDEX = (I/NPROW) * NB IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ) * * A source node copies the blocks to WORK, and send it * IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN * * The source node is a destination node * IDEX = (I/NPCOL) * NB IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ ) IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, TBETA, $ Y(JDEX*INCY+1), INCY, LCMQ, LCMP ) * * The source node sends blocks to a destination node * ELSE CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) CALL DGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ MCROW, JYCOL ) END IF * * A destination node receives the copied blocks * ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY, $ IXROW, MCCOL ) ELSE CALL DGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1, $ IXROW, MCCOL ) CALL PBDTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA, $ Y(JDEX*INCY+1), INCY, LCMP*NB ) END IF END IF KZ = 0 40 CONTINUE * * Broadcast a column vector Y in each row of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.JYCOL ) THEN CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, JYCOL ) END IF END IF * * When all row procesors have a copy of the row block A, * ELSE IF( LCMP.EQ.1 ) NP0 = NP * * Processors, which have diagonal blocks of A, copy them to * WORK array in transposed form * KZ = 0 IF( MRCOL.EQ.0 ) KZ = NZ JZ = 0 IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ * DO 50 I = 0, LCMQ-1 IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN IDEX = MAX( 0, I*NB-KZ ) IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, BETA, Y, INCY, $ LCMQ, 1 ) ELSE CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ, $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1, $ LCMQ, 1 ) END IF END IF 50 CONTINUE * * Get diagonal blocks of A for each row of the template * MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL ) IF( LCMP.GT.1 ) THEN MCROW = MOD( NPROW+MYROW-IYROW, NPROW ) CALL PBDTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ), $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL, $ NPROW, NPCOL ) END IF * * Broadcast a column block of WORK in every column of template * IF( IYCOL.EQ.-1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( LCMP.GT.1 ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY ) ELSE CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY, $ MYROW, MCCOL ) END IF * * Send a column block of WORK to the destination column * ELSE IF( LCMP.EQ.1 ) THEN IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL DGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN IF( BETA.EQ.ZERO ) THEN CALL DGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL ) ELSE CALL DGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL ) CALL PBDVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA, $ Y, INCY ) END IF END IF * ELSE NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) ) IF( MYCOL.EQ.MCCOL ) THEN IF( MYCOL.NE.IYCOL ) $ CALL DGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL ) ELSE IF( MYCOL.EQ.IYCOL ) THEN CALL DGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL ) END IF * IF( MYCOL.EQ.IYCOL ) THEN KZ = 0 IF( MYROW.EQ.IYROW ) KZ = NZ CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1, $ BETA, Y, INCY, LCMP, LCMQ, NP0 ) END IF END IF END IF END IF END IF * RETURN * * End of PBDTRNV * END * *======================================================================= * SUBROUTINE PBDTR2A1 *======================================================================= * SUBROUTINE PBDTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY, $ INTV ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x * y is a scattered vector, copied from a condensed vector x. * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBDVECADD * .. * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER * IX = 0 IY = 0 JZ = NZ ITER = ICEIL( N+NZ, INTV ) * IF( ITER.GT.1 ) THEN CALL PBDVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB - JZ IY = IY + INTV - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBDVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX, $ BETA, Y(IY*INCY+1), INCY ) IX = IX + NB IY = IY + INTV 10 CONTINUE END IF * CALL PBDVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) * RETURN * * End of PBDTR2A1 * END * *======================================================================= * SUBROUTINE PBDTR2B1 *======================================================================= * SUBROUTINE PBDTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y, $ INCY, JINX, JINY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 TRANS INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY DOUBLE PRECISION BETA * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * y <== x + beta * y * y is a condensed vector, copied from a scattered vector x * * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. External Subroutines .. EXTERNAL PBDVECADD * .. * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Variables .. INTEGER IX, IY, JZ, K, ITER, LENX, LENY * IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN CALL PBDVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA, $ Y, INCY ) * ELSE IX = 0 IY = 0 JZ = NZ LENX = NB * JINX LENY = NB * JINY ITER = ICEIL( N+NZ, LENX ) * IF( ITER.GT.1 ) THEN CALL PBDVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX - JZ IY = IY + LENY - JZ JZ = 0 * DO 10 K = 2, ITER-1 CALL PBDVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1), $ INCX, BETA, Y(IY*INCY+1), INCY ) IX = IX + LENX IY = IY + LENY 10 CONTINUE END IF * CALL PBDVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE, $ X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY ) END IF * RETURN * * End of PBDTR2B1 * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbztrsrt.f0000640000175000017500000000451510363532303017615 0ustar muammarmuammar SUBROUTINE PBZTRSRT( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B, $ LDB, LCMP, LCMQ, NINT ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB, NINT COMPLEX*16 BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBZTRSRT forms T <== A + beta * T, where T is a sorted * condensed block row (or column) from a block column (or row) of A * with sorting index ISRT * * ===================================================================== * * .. Parameters .. COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Variables .. INTEGER JA, JB, K, KK, NJUMP * .. * .. External Subroutines .. EXTERNAL PBZMATADD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( LSAME( ADIST, 'R' ) ) THEN NJUMP = NB * LCMQ DO 20 K = 0, LCMQ-1 JA = NINT * MOD( K*LCMP, LCMQ ) + 1 JB = K * NB + 1 * DO 10 KK = 1, ICEIL( NINT, NB ) IF( N.LT.JB ) GO TO 20 CALL PBZMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE, $ A(1, JA), LDA, BETA, B(1, JB), LDB ) JA = JA + NB JB = JB + NJUMP 10 CONTINUE 20 CONTINUE * * if( LSAME( ADIST, 'C') ) then * ELSE NJUMP = NB * LCMP DO 40 K = 0, LCMP-1 JA = 1 JB = K * NB + 1 * DO 30 KK = 1, ICEIL( NINT, NB ) IF( M.LT.JB ) GO TO 40 CALL PBZMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE, $ A(JA, N*MOD(K*LCMQ,LCMP)+1), LDA, BETA, $ B(JB, 1), LDB ) JA = JA + NB JB = JB + NJUMP 30 CONTINUE 40 CONTINUE END IF * RETURN * * End of PBZTRSRT * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbzvecadd.f0000640000175000017500000002371610363532303017671 0ustar muammarmuammar SUBROUTINE PBZVECADD( ICONTXT, MODE, N, ALPHA, X, INCX, BETA, Y, $ INCY ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, INCX, INCY, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * * .. * * Purpose * ======= * * PBZVECADD performs a vector X to be added to Y * Y := alpha*op(X) + beta*Y, * where alpha and beta are scalars, and X and Y are n vectors, * and op(X) = X**H if MODE = 'C', * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the transposed, or conjugate transposed vector X * to be added to the vector Y * = 'C': Conjugate vector X is added for complex data set. * Y = alpha * X**H + beta * Y * ELSE : Vector X is added. Y = alpha*X + beta*Y * if MODE = 'V', BLAS routine may be used. * * N (input) INTEGER * The number of elements of the vectors X and Y to be added. * N >= 0. * * ALPHA (input) COMPLEX*16 * ALPHA specifies the scalar alpha. * * X (input) COMPLEX*16 array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCX ) ) * The incremented array X must contain the vector X. * * INCX (input) INTEGER * INCX specifies the increment for the elements of X. * INCX <> 0. * * BETA (input) COMPLEX*16 * BETA specifies the scalar beta. * * Y (input/output) COMPLEX*16 array of DIMENSION at least * ( 1 + ( N - 1 )*abs( INCY ) ) * On entry with BETA non-zero, the incremented array Y must * contain the vector Y. * On exit, Y is overwritten by the updated vector Y. * * INCY - (input) INTEGER * INCY specifies the increment for the elements of Y. * INCY <> 0. * * ===================================================================== * * .. * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IX, IY * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL ZSCAL, ZCOPY, ZAXPY * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * IF( N.LE.0 .OR. ( ALPHA.EQ.ZERO .AND. BETA.EQ.ONE ) ) RETURN * IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCY.EQ.1 ) THEN DO 10 I = 1, N Y( I ) = ZERO 10 CONTINUE ELSE IY = 1 DO 20 I = 1, N Y( IY ) = ZERO IY = IY + INCY 20 CONTINUE END IF * ELSE IF( LSAME( MODE, 'V' ) ) THEN CALL ZSCAL( N, BETA, Y, INCY ) ELSE IF( INCY.EQ.1 ) THEN DO 30 I = 1, N Y( I ) = BETA * Y( I ) 30 CONTINUE ELSE IY = 1 DO 40 I = 1, N Y( IY ) = BETA * Y( IY ) IY = IY + INCY 40 CONTINUE END IF END IF * ELSE IF( .NOT.LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL ZCOPY( N, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 50 I = 1, N Y( I ) = X( I ) 50 CONTINUE ELSE IX = 1 IY = 1 DO 60 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 60 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 70 I = 1, N Y( I ) = X( I ) + Y( I ) 70 CONTINUE ELSE IX = 1 IY = 1 DO 80 I = 1, N Y( IY ) = X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 80 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 90 I = 1, N Y( I ) = X( I ) + BETA * Y( I ) 90 CONTINUE ELSE IX = 1 IY = 1 DO 100 I = 1, N Y( IY ) = X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 100 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 110 I = 1, N Y( I ) = ALPHA * X( I ) 110 CONTINUE ELSE IX = 1 IY = 1 DO 120 I = 1, N Y( IY ) = X( IX ) IX = IX + INCX IY = IY + INCY 120 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( LSAME( MODE, 'V' ) ) THEN CALL ZAXPY( N, ALPHA, X, INCX, Y, INCY ) ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 130 I = 1, N Y( I ) = ALPHA * X( I ) + Y( I ) 130 CONTINUE ELSE IX = 1 IY = 1 DO 140 I = 1, N Y( IY ) = ALPHA * X( IX ) + Y( IY ) IX = IX + INCX IY = IY + INCY 140 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 150 I = 1, N Y( I ) = ALPHA * X( I ) + BETA * Y( I ) 150 CONTINUE ELSE IX = 1 IY = 1 DO 160 I = 1, N Y( IY ) = ALPHA * X( IX ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 160 CONTINUE END IF END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 170 I = 1, N Y( I ) = DCONJG( X( I ) ) 170 CONTINUE ELSE IX = 1 IY = 1 DO 180 I = 1, N Y( IY ) = DCONJG( X( IX ) ) IX = IX + INCX IY = IY + INCY 180 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 190 I = 1, N Y( I ) = DCONJG( X( I ) ) + Y( I ) 190 CONTINUE ELSE IX = 1 IY = 1 DO 200 I = 1, N Y( IY ) = DCONJG( X( IX ) ) + Y( IY ) IX = IX + INCX IY = IY + INCY 200 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 210 I = 1, N Y( I ) = DCONJG( X( I ) ) + BETA * Y( I ) 210 CONTINUE ELSE IX = 1 IY = 1 DO 220 I = 1, N Y( IY ) = DCONJG( X( IX ) ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 220 CONTINUE END IF END IF * ELSE IF( BETA.EQ.ZERO ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 230 I = 1, N Y( I ) = ALPHA * DCONJG( X( I ) ) 230 CONTINUE ELSE IX = 1 IY = 1 DO 240 I = 1, N Y( IY ) = ALPHA * DCONJG( X( IX ) ) IX = IX + INCX IY = IY + INCY 240 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 250 I = 1, N Y( I ) = ALPHA * DCONJG( X( I ) ) + Y( I ) 250 CONTINUE ELSE IX = 1 IY = 1 DO 260 I = 1, N Y( IY ) = ALPHA * DCONJG( X( IX ) ) + Y( IY ) IX = IX + INCX IY = IY + INCY 260 CONTINUE END IF * ELSE IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN DO 270 I = 1, N Y( I ) = ALPHA * DCONJG( X( I ) ) + BETA * Y( I ) 270 CONTINUE ELSE IX = 1 IY = 1 DO 280 I = 1, N Y( IY ) = ALPHA * DCONJG( X(IX) ) + BETA * Y( IY ) IX = IX + INCX IY = IY + INCY 280 CONTINUE END IF END IF END IF END IF * RETURN * * End of PBZVECADD * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbdtrget.f0000640000175000017500000001003310363532303017526 0ustar muammarmuammar SUBROUTINE PBDTRGET( ICONTXT, ADIST, M, N, MNB, A, LDA, MCROW, $ MCCOL, IGD, MYROW, MYCOL, NPROW, NPCOL ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 ADIST INTEGER ICONTXT, IGD, LDA, M, MCCOL, MCROW, MNB, MYCOL, $ MYROW, N, NPCOL, NPROW * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * PBDTRGET forms a row block of A from scattered row subblocks if * ADIST = 'R', or forms a column block of A from scattered column * subblocks, if ADIST = 'C'. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Variables .. INTEGER KINT, KINT2, KLEN, KMOD, KPPOS, NLEN, NNUM, $ NTLEN REAL TEMP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL LSAME, ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL DGERV2D, DGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * * if A is a row block, it needs to communicate columnwise. * IF( LSAME( ADIST, 'R' ) ) THEN KPPOS = MOD( NPROW+MYROW-MCROW, NPROW ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPROW/IGD, MNB-MCCOL ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPROW ) * 10 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL DGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MOD(MYROW+KINT, NPROW), MYCOL ) NLEN = NLEN + KLEN END IF ELSE CALL DGESD2D( ICONTXT, M, NLEN, A, LDA, $ MOD(NPROW+MYROW-KINT, NPROW), MYCOL ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 10 END IF END IF * * if A is a column block, it needs to communicate rowwise. * ELSE IF( LSAME( ADIST, 'C' ) ) THEN * KPPOS = MOD( NPCOL+MYCOL-MCCOL, NPCOL ) IF( MOD( KPPOS, IGD ).EQ.0 ) THEN KINT = IGD NLEN = N NNUM = MIN( NPCOL/IGD, MNB-MCROW ) TEMP = REAL( NNUM ) NTLEN = N * NNUM NNUM = IGD * NNUM IF( KPPOS.GE.NNUM ) GO TO 30 KPPOS = MOD( KPPOS, NPCOL ) * 20 CONTINUE IF( TEMP.GT.ONE ) THEN KINT2 = 2 * KINT KMOD = MOD( KPPOS, KINT2 ) * IF( KMOD.EQ.0 ) THEN IF( KPPOS+KINT.LT.NNUM ) THEN KLEN = NTLEN - (KPPOS/KINT2)*(KINT2/IGD)*N KLEN = MIN( KLEN-NLEN, NLEN ) CALL DGERV2D( ICONTXT, M, KLEN, A(1,NLEN+1), LDA, $ MYROW, MOD(MYCOL+KINT, NPCOL) ) NLEN = NLEN + KLEN END IF ELSE CALL DGESD2D( ICONTXT, M, NLEN, A, LDA, MYROW, $ MOD(NPCOL+MYCOL-KINT, NPCOL) ) GO TO 30 END IF * KINT = KINT2 TEMP = TEMP / TWO GO TO 20 END IF END IF END IF * 30 CONTINUE * RETURN * * End of PBDTRGET * END scalapack-1.8.0/PBLAS/SRC/PBBLAS/pbcmatadd.f0000640000175000017500000003410510363532303017640 0ustar muammarmuammar SUBROUTINE PBCMATADD( ICONTXT, MODE, M, N, ALPHA, A, LDA, BETA, B, $ LDB ) * * -- PB-BLAS routine (version 2.1) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory. * April 28, 1996 * * .. Scalar Arguments .. CHARACTER*1 MODE INTEGER ICONTXT, LDA, LDB, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * PBCMATADD performs the matrix add operation B := alpha*A + beta*B, * where alpha and beta are scalars, and A and B are m-by-n * upper/lower trapezoidal matrices, or rectangular matrices. * * Arguments * ========= * * ICONTXT (input) INTEGER * ICONTXT is the BLACS mechanism for partitioning communication * space. A defining property of a context is that a message in * a context cannot be sent or received in another context. The * BLACS context includes the definition of a grid, and each * process' coordinates in it. * * MODE (input) CHARACTER*1 * Specifies the part of the matrix A, or (conjugate) transposed * matrix A to be added to the matrix B, * = 'U': Upper triangular part * up(B) = alpha*up(A) + beta*up(B) * = 'L': Lower triangular part * lo(B) = alpha*lo(A) + beta*lo(B) * = 'T': Transposed matrix A * B = alpha*A**T + beta*B * = 'C': Conjugate transposed matrix A * B = alpha*A**H + beta*B * Otherwise: B = alpha*A + beta*B * if M = LDA = LDB: use one BLAS loop * if MODE = 'V' : columnwise copy using BLAS if possible * else : use double loops * * M (input) INTEGER * M specifies the number of columns of the matrix A if * MODE != 'T'/'C', and it specifies the number of rows of the * matrix A otherwise. It also specifies the number of rows of * the matrix B. M >= 0. * * N (input) INTEGER * N specifies the number of rows of the matrix A if * MODE != 'T'/'C', and it specifies the number of columns of * the matrix A otherwise. It also specifies the number of * columns of the matrix B. N >= 0. * * ALPHA (input) COMPLEX * ALPHA specifies the scalar alpha. * * A (input) COMPLEX array, dimension (LDA,N) * The m by n matrix A if MODE != 'T'/'C'. * If MODE = 'U', only the upper triangle or trapezoid is * accessed; if MODE = 'L', only the lower triangle or * trapezoid is accessed. Otherwise all m-by-n data matrix * is accessed. * And the n by m matrix A if MODE = 'T'/'C'. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M) if * MODE != 'T'/'C'. And LDA >= max(1,N) if MODE = 'T'/'C'. * * BETA (input) COMPLEX * BETA specifies the scalar beta. * * B (input) COMPLEX array, dimension (LDB,N) * On exit, B = alpha*A + beta*B * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,M). * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL CSCAL, CCOPY, CAXPY * .. * .. Intrinsic Functions .. INTRINSIC MIN, CONJG * .. * .. Executable Statements .. * IF( M.LE.0 .OR. N.LE.0 .OR. ( ALPHA.EQ.ZERO.AND.BETA.EQ.ONE ) ) $ RETURN * * A is upper triangular or upper trapezoidal, * IF( LSAME( MODE, 'U' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( I, J ) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1, N DO 30 I = 1, MIN( J, M ) B( I, J ) = BETA * B( I, J ) 30 CONTINUE 40 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 60 J = 1, N DO 50 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) 50 CONTINUE 60 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 80 J = 1, N DO 70 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + B( I, J ) 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) + BETA * B( I, J ) 90 CONTINUE 100 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 120 J = 1, N DO 110 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) 110 CONTINUE 120 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 140 J = 1, N DO 130 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 130 CONTINUE 140 CONTINUE ELSE DO 160 J = 1, N DO 150 I = 1, MIN( J, M ) B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 150 CONTINUE 160 CONTINUE END IF END IF * * A is lower triangular or upper trapezoidal, * ELSE IF( LSAME( MODE, 'L' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 180 J = 1, N DO 170 I = J, M B( I, J ) = ZERO 170 CONTINUE 180 CONTINUE ELSE DO 200 J = 1, N DO 190 I = J, M B( I, J ) = BETA * B( I, J ) 190 CONTINUE 200 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 220 J = 1, N DO 210 I = J, M B( I, J ) = A( I, J ) 210 CONTINUE 220 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 240 J = 1, N DO 230 I = J, M B( I, J ) = A( I, J ) + B( I, J ) 230 CONTINUE 240 CONTINUE ELSE DO 260 J = 1, N DO 250 I = J, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 250 CONTINUE 260 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 280 J = 1, N DO 270 I = J, M B( I, J ) = ALPHA * A( I, J ) 270 CONTINUE 280 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 300 J = 1, N DO 290 I = J, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 290 CONTINUE 300 CONTINUE ELSE DO 320 J = 1, N DO 310 I = J, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 310 CONTINUE 320 CONTINUE END IF END IF * * If MODE = 'Transpose' * ELSE IF( LSAME( MODE, 'T' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 340 J = 1, N DO 330 I = 1, M B( I, J ) = ZERO 330 CONTINUE 340 CONTINUE ELSE DO 360 J = 1, N DO 350 I = 1, M B( I, J ) = BETA * B( I, J ) 350 CONTINUE 360 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 380 J = 1, N DO 370 I = 1, M B( I, J ) = A( J, I ) 370 CONTINUE 380 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 400 J = 1, N DO 390 I = 1, M B( I, J ) = A( J, I ) + B( I, J ) 390 CONTINUE 400 CONTINUE ELSE DO 420 J = 1, N DO 410 I = 1, M B( I, J ) = A( J, I ) + BETA * B( I, J ) 410 CONTINUE 420 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 440 J = 1, N DO 430 I = 1, M B( I, J ) = ALPHA * A( J, I ) 430 CONTINUE 440 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 460 J = 1, N DO 450 I = 1, M B( I, J ) = ALPHA * A( J, I ) + B( I, J ) 450 CONTINUE 460 CONTINUE ELSE DO 480 J = 1, N DO 470 I = 1, M B( I, J ) = ALPHA * A( J, I ) + BETA * B( I, J ) 470 CONTINUE 480 CONTINUE END IF END IF * * If MODE = 'Conjugate', * ELSE IF( LSAME( MODE, 'C' ) ) THEN IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 500 J = 1, N DO 490 I = 1, M B( I, J ) = ZERO 490 CONTINUE 500 CONTINUE ELSE DO 520 J = 1, N DO 510 I = 1, M B( I, J ) = BETA * B( I, J ) 510 CONTINUE 520 CONTINUE END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN DO 540 J = 1, N DO 530 I = 1, M B( I, J ) = CONJG( A( J, I ) ) 530 CONTINUE 540 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 560 J = 1, N DO 550 I = 1, M B( I, J ) = CONJG( A( J, I ) ) + B( I, J ) 550 CONTINUE 560 CONTINUE ELSE DO 580 J = 1, N DO 570 I = 1, M B( I, J ) = CONJG( A( J, I ) ) + BETA * B( I, J ) 570 CONTINUE 580 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 600 J = 1, N DO 590 I = 1, M B( I, J ) = ALPHA * CONJG( A( J, I ) ) 590 CONTINUE 600 CONTINUE ELSE IF( BETA.EQ.ONE ) THEN DO 620 J = 1, N DO 610 I = 1, M B( I, J ) = ALPHA * CONJG( A( J, I ) ) + B( I, J ) 610 CONTINUE 620 CONTINUE ELSE DO 640 J = 1, N DO 630 I = 1, M B( I, J ) = ALPHA * CONJG( A( J, I ) ) $ + BETA * B( I, J ) 630 CONTINUE 640 CONTINUE END IF END IF * * Other cases (for genral matrix additions) * ELSE IF( ALPHA.EQ.ZERO ) THEN IF( BETA.EQ.ZERO ) THEN DO 660 J = 1, N DO 650 I = 1, M B( I, J ) = ZERO 650 CONTINUE 660 CONTINUE * ELSE IF( M.EQ.LDB ) THEN CALL CSCAL( M*N, BETA, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 670 J = 1, N CALL CSCAL( M, BETA, B( 1, J ), 1 ) 670 CONTINUE ELSE DO 690 J = 1, N DO 680 I = 1, M B( I, J ) = BETA * B( I, J ) 680 CONTINUE 690 CONTINUE END IF END IF * ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ZERO ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL CCOPY( M*N, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 700 J = 1, N CALL CCOPY( M, A( 1, J ), 1, B( 1, J ), 1 ) 700 CONTINUE ELSE DO 720 J = 1, N DO 710 I = 1, M B( I, J ) = A( I, J ) 710 CONTINUE 720 CONTINUE END IF * ELSE IF( BETA.EQ.ONE ) THEN DO 740 J = 1, N DO 730 I = 1, M B( I, J ) = A( I, J ) + B( I, J ) 730 CONTINUE 740 CONTINUE * ELSE DO 760 J = 1, N DO 750 I = 1, M B( I, J ) = A( I, J ) + BETA * B( I, J ) 750 CONTINUE 760 CONTINUE END IF * ELSE IF( BETA.EQ.ZERO ) THEN DO 780 J = 1, N DO 770 I = 1, M B( I, J ) = ALPHA * A( I, J ) 770 CONTINUE 780 CONTINUE * ELSE IF( BETA.EQ.ONE ) THEN IF( M.EQ.LDA .AND. M.EQ.LDB ) THEN CALL CAXPY( M*N, ALPHA, A( 1, 1 ), 1, B( 1, 1 ), 1 ) ELSE IF( LSAME( MODE, 'V' ) ) THEN DO 790 J = 1, N CALL CAXPY( M, ALPHA, A( 1, J ), 1, B( 1, J ), 1 ) 790 CONTINUE ELSE DO 810 J = 1, N DO 800 I = 1, M B( I, J ) = ALPHA * A( I, J ) + B( I, J ) 800 CONTINUE 810 CONTINUE END IF * ELSE DO 830 J = 1, N DO 820 I = 1, M B( I, J ) = ALPHA * A( I, J ) + BETA * B( I, J ) 820 CONTINUE 830 CONTINUE END IF END IF END IF * RETURN * * End of PBCMATADD * END scalapack-1.8.0/EXAMPLE/0000750000175000017500000000000010605234747014420 5ustar muammarmuammarscalapack-1.8.0/EXAMPLE/psscaex.f0000640000175000017500000001725010604624224016233 0ustar muammarmuammar PROGRAM PSSCAEX * * -- ScaLAPACK example code -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * * This program solves a linear system by calling the ScaLAPACK * routine PSGESV. The input matrix and right-and-sides are * read from a file. The solution is written to a file. * * .. Parameters .. INTEGER REALSZ, INTGSZ, MEMSIZ, TOTMEM PARAMETER ( REALSZ = 4, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / REALSZ ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER*80 OUTFILE INTEGER IAM, ICTXT, INFO, IPA, IPACPY, IPB, IPPIV, IPX, $ IPW, LIPIV, MYCOL, MYROW, N, NB, NOUT, NPCOL, $ NPROCS, NPROW, NP, NQ, NQRHS, NRHS, WORKSIZ REAL ANORM, BNORM, EPS, XNORM, RESID * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCX( DLEN_ ) REAL MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, IGSUM2D, PDSCAEXINFO, PSGESV, $ PSGEMM, PSLACPY, PSLAPRNT, PSLAREAD, PSLAWRITE * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PSLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDSCAEXINFO( OUTFILE, NOUT, N, NRHS, NB, NPROW, NPCOL, MEM, $ IAM, NPROCS ) * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 20 * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) NQRHS = NUMROC( NRHS, NB, MYCOL, 0, NPCOL ) * * Initialize the array descriptor for the matrix A and B * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCB, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCX, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( 1 ) * IPA = 1 IPACPY = IPA + DESCA( LLD_ )*NQ IPB = IPACPY + DESCA( LLD_ )*NQ IPX = IPB + DESCB( LLD_ )*NQRHS IPPIV = IPX + DESCB( LLD_ )*NQRHS LIPIV = ICEIL( INTGSZ*( NP+NB ), REALSZ ) IPW = IPPIV + MAX( NP, LIPIV ) * WORKSIZ = NB * * Check for adequate memory for problem size * INFO = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*REALSZ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) IF( INFO.GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MEMORY' GO TO 10 END IF * * Read from file and distribute matrices A and B * CALL PSLAREAD( 'SSCAEXMAT.dat', MEM( IPA ), DESCA, 0, 0, $ MEM( IPW ) ) CALL PSLAREAD( 'SSCAEXRHS.dat', MEM( IPB ), DESCB, 0, 0, $ MEM( IPW ) ) * * Make a copy of A and the rhs for checking purposes * CALL PSLACPY( 'All', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPACPY ), 1, 1, DESCA ) CALL PSLACPY( 'All', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPX ), 1, 1, DESCX ) * ********************************************************************** * Call ScaLAPACK PSGESV routine ********************************************************************** * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) $ 'Example of ScaLAPACK routine call: (PSGESV)' WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'A * X = B, Matrix A:' WRITE( NOUT, FMT = * ) END IF CALL PSLAPRNT( N, N, MEM( IPA ), 1, 1, DESCA, 0, 0, $ 'A', NOUT, MEM( IPW ) ) IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix B:' WRITE( NOUT, FMT = * ) END IF CALL PSLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, $ 'B', NOUT, MEM( IPW ) ) * CALL PSGESV( N, NRHS, MEM( IPA ), 1, 1, DESCA, MEM( IPPIV ), $ MEM( IPB ), 1, 1, DESCB, INFO ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'INFO code returned by PSGESV = ', INFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix X = A^{-1} * B' WRITE( NOUT, FMT = * ) END IF CALL PSLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, 'X', NOUT, $ MEM( IPW ) ) CALL PSLAWRITE( 'SSCAEXSOL.dat', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ 0, 0, MEM( IPW ) ) * * Compute residual ||A * X - B|| / ( ||X|| * ||A|| * eps * N ) * EPS = PSLAMCH( ICTXT, 'Epsilon' ) ANORM = PSLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, MEM( IPW ) ) BNORM = PSLANGE( 'I', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ) ) CALL PSGEMM( 'No transpose', 'No transpose', N, NRHS, N, ONE, $ MEM( IPACPY ), 1, 1, DESCA, MEM( IPB ), 1, 1, DESCB, $ -ONE, MEM( IPX ), 1, 1, DESCX ) XNORM = PSLANGE( 'I', N, NRHS, MEM( IPX ), 1, 1, DESCX, $ MEM( IPW ) ) RESID = XNORM / ( ANORM * BNORM * EPS * DBLE( N ) ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '||A * X - B|| / ( ||X|| * ||A|| * eps * N ) = ', RESID WRITE( NOUT, FMT = * ) IF( RESID.LT.10.0D+0 ) THEN WRITE( NOUT, FMT = * ) 'The answer is correct.' ELSE WRITE( NOUT, FMT = * ) 'The answer is suspicious.' END IF END IF * 10 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 20 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = * ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9997 FORMAT( 'END OF TESTS.' ) * STOP * * End of PSSCAEX * END scalapack-1.8.0/EXAMPLE/CSCAEXMAT.dat0000640000175000017500000000165210604624224016417 0ustar muammarmuammar6 6 6.0000E+0 4.0000E+0 3.0000E+0 -5.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 3.0000E+0 -3.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 -3.0000E+0 0.0000E+0 -1.0000E+0 -9.0000E+0 1.0000E+0 2.0000E+0 1.0000E+0 4.0000E+0 0.0000E+0 0.0000E+0 -1.0000E+0 -3.0000E+0 0.0000E+0 0.0000E+0 11.0000E+0 21.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 10.0000E+0 5.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 -11.0000E+0 -12.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 2.0000E+0 0.0000E+0 -4.0000E+0 5.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 8.0000E+0 1.0000E+0 0.0000E+0 0.0000E+0 -10.0000E+0 -10.0000E+0 scalapack-1.8.0/EXAMPLE/Makefile0000640000175000017500000000350710604624224016056 0ustar muammarmuammarinclude ../SLmake.inc TESTOBJS = psscaex.o pdscaexinfo.o TESTOBJD = pdscaex.o pdscaexinfo.o TESTOBJC = pcscaex.o pdscaexinfo.o TESTOBJZ = pzscaex.o pdscaexinfo.o all: pdscaex pcscaex pzscaex psscaex single: psscaex double: pdscaex complex: pcscaex complex16: pzscaex $(TESTINGdir)/SCAEX.dat: SCAEX.dat cp SCAEX.dat $(TESTINGdir) $(TESTINGdir)/SSCAEXMAT.dat: SSCAEXMAT.dat cp SSCAEXMAT.dat $(TESTINGdir) $(TESTINGdir)/SSCAEXRHS.dat: SSCAEXRHS.dat cp SSCAEXRHS.dat $(TESTINGdir) psscaex: $(TESTOBJS) $(F77LOADER) $(F77LOADFLAGS) -o $(TESTINGdir)/xsscaex $(TESTOBJS) $(LIBS) $(MAKE) $(TESTINGdir)/SCAEX.dat $(MAKE) $(TESTINGdir)/SSCAEXMAT.dat $(MAKE) $(TESTINGdir)/SSCAEXRHS.dat $(TESTINGdir)/ZSCAEXMAT.dat: ZSCAEXMAT.dat cp ZSCAEXMAT.dat $(TESTINGdir) $(TESTINGdir)/ZSCAEXRHS.dat: ZSCAEXRHS.dat cp ZSCAEXRHS.dat $(TESTINGdir) pzscaex: $(TESTOBJZ) $(F77LOADER) $(F77LOADFLAGS) -o $(TESTINGdir)/xzscaex $(TESTOBJZ) $(LIBS) $(MAKE) $(TESTINGdir)/SCAEX.dat $(MAKE) $(TESTINGdir)/ZSCAEXMAT.dat $(MAKE) $(TESTINGdir)/ZSCAEXRHS.dat $(TESTINGdir)/CSCAEXMAT.dat: CSCAEXMAT.dat cp CSCAEXMAT.dat $(TESTINGdir) $(TESTINGdir)/CSCAEXRHS.dat: CSCAEXRHS.dat cp CSCAEXRHS.dat $(TESTINGdir) pcscaex: $(TESTOBJC) $(F77LOADER) $(F77LOADFLAGS) -o $(TESTINGdir)/xcscaex $(TESTOBJC) $(LIBS) $(MAKE) $(TESTINGdir)/SCAEX.dat $(MAKE) $(TESTINGdir)/CSCAEXMAT.dat $(MAKE) $(TESTINGdir)/CSCAEXRHS.dat $(TESTINGdir)/DSCAEXMAT.dat: DSCAEXMAT.dat cp DSCAEXMAT.dat $(TESTINGdir) $(TESTINGdir)/DSCAEXRHS.dat: DSCAEXRHS.dat cp DSCAEXRHS.dat $(TESTINGdir) pdscaex: $(TESTOBJD) $(F77LOADER) $(F77LOADFLAGS) -o $(TESTINGdir)/xdscaex $(TESTOBJD) $(LIBS) $(MAKE) $(TESTINGdir)/SCAEX.dat $(MAKE) $(TESTINGdir)/DSCAEXMAT.dat $(MAKE) $(TESTINGdir)/DSCAEXRHS.dat clean : rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/EXAMPLE/ZSCAEXRHS.dat0000640000175000017500000000055210604624224016457 0ustar muammarmuammar 6 1 72.000000000000000000D+00 81.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 160.000000000000000000D+00 120.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 scalapack-1.8.0/EXAMPLE/ZSCAEXMAT.dat0000640000175000017500000000165210604624224016446 0ustar muammarmuammar6 6 6.0000D+0 4.0000D+0 3.0000D+0 -5.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 3.0000D+0 -3.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 -3.0000D+0 0.0000D+0 -1.0000D+0 -9.0000D+0 1.0000D+0 2.0000D+0 1.0000D+0 4.0000D+0 0.0000D+0 0.0000D+0 -1.0000D+0 -3.0000D+0 0.0000D+0 0.0000D+0 11.0000D+0 21.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 10.0000D+0 5.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 -11.0000D+0 -12.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 2.0000D+0 0.0000D+0 -4.0000D+0 5.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 8.0000D+0 1.0000D+0 0.0000D+0 0.0000D+0 -10.0000D+0 -10.0000D+0 scalapack-1.8.0/EXAMPLE/pzscaex.f0000640000175000017500000001724510604624224016246 0ustar muammarmuammar PROGRAM PZSCAEX * * -- ScaLAPACK example code -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * * This program solves a linear system by calling the ScaLAPACK * routine PZGESV. The input matrix and right-and-sides are * read from a file. The solution is written to a file. * * .. Parameters .. INTEGER CP16SZ, INTGSZ, MEMSIZ, TOTMEM PARAMETER ( CP16SZ = 16, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CP16SZ ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = (1.0D+0,0.0D+0) ) * .. * .. Local Scalars .. CHARACTER*80 OUTFILE INTEGER IAM, ICTXT, INFO, IPA, IPACPY, IPB, IPPIV, IPX, $ IPW, LIPIV, MYCOL, MYROW, N, NB, NOUT, NPZOL, $ NPROCS, NPROW, NP, NQ, NQRHS, NRHS, WORKSIZ DOUBLE PRECISION ANORM, BNORM, EPS, XNORM, RESID * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCX( DLEN_ ) COMPLEX*16 MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, IGSUM2D, PZGESV, $ PZGEMM, PZLACPY, PZLAPRNT, PZLAREAD, PZLAWRITE * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PZLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDSCAEXINFO( OUTFILE, NOUT, N, NRHS, NB, NPROW, NPZOL, MEM, $ IAM, NPROCS ) * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPZOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPZOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPZOL ) $ GO TO 20 * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPZOL ) NQRHS = NUMROC( NRHS, NB, MYCOL, 0, NPZOL ) * * Initialize the array descriptor for the matrix A and B * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCB, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCX, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( 1 ) * IPA = 1 IPACPY = IPA + DESCA( LLD_ )*NQ IPB = IPACPY + DESCA( LLD_ )*NQ IPX = IPB + DESCB( LLD_ )*NQRHS IPPIV = IPX + DESCB( LLD_ )*NQRHS LIPIV = ICEIL( INTGSZ*( NP+NB ), CP16SZ ) IPW = IPPIV + MAX( NP, LIPIV ) * WORKSIZ = NB * * Check for adequate memory for problem size * INFO = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*CP16SZ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) IF( INFO.GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MEMORY' GO TO 10 END IF * * Read from file and distribute matrices A and B * CALL PZLAREAD( 'ZSCAEXMAT.dat', MEM( IPA ), DESCA, 0, 0, $ MEM( IPW ) ) CALL PZLAREAD( 'ZSCAEXRHS.dat', MEM( IPB ), DESCB, 0, 0, $ MEM( IPW ) ) * * Make a copy of A and the rhs for checking purposes * CALL PZLACPY( 'All', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPACPY ), 1, 1, DESCA ) CALL PZLACPY( 'All', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPX ), 1, 1, DESCX ) * ********************************************************************** * Call ScaLAPACK PZGESV routine ********************************************************************** * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) $ 'Example of ScaLAPACK routine call: (PZGESV)' WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'A * X = B, Matrix A:' WRITE( NOUT, FMT = * ) END IF CALL PZLAPRNT( N, N, MEM( IPA ), 1, 1, DESCA, 0, 0, $ 'A', NOUT, MEM( IPW ) ) IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix B:' WRITE( NOUT, FMT = * ) END IF CALL PZLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, $ 'B', NOUT, MEM( IPW ) ) * CALL PZGESV( N, NRHS, MEM( IPA ), 1, 1, DESCA, MEM( IPPIV ), $ MEM( IPB ), 1, 1, DESCB, INFO ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'INFO code returned by PZGESV = ', INFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix X = A^{-1} * B' WRITE( NOUT, FMT = * ) END IF CALL PZLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, 'X', NOUT, $ MEM( IPW ) ) CALL PZLAWRITE( 'ZSCAEXSOL.dat', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ 0, 0, MEM( IPW ) ) * * Compute residual ||A * X - B|| / ( ||X|| * ||A|| * eps * N ) * EPS = PDLAMCH( ICTXT, 'Epsilon' ) ANORM = PZLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, MEM( IPW ) ) BNORM = PZLANGE( 'I', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ) ) CALL PZGEMM( 'No transpose', 'No transpose', N, NRHS, N, ONE, $ MEM( IPACPY ), 1, 1, DESCA, MEM( IPB ), 1, 1, DESCB, $ -ONE, MEM( IPX ), 1, 1, DESCX ) XNORM = PZLANGE( 'I', N, NRHS, MEM( IPX ), 1, 1, DESCX, $ MEM( IPW ) ) RESID = XNORM / ( ANORM * BNORM * EPS * DBLE( N ) ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '||A * X - B|| / ( ||X|| * ||A|| * eps * N ) = ', RESID WRITE( NOUT, FMT = * ) IF( RESID.LT.10.0D+0 ) THEN WRITE( NOUT, FMT = * ) 'The answer is correct.' ELSE WRITE( NOUT, FMT = * ) 'The answer is suspicious.' END IF END IF * 10 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 20 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = * ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9997 FORMAT( 'END OF TESTS.' ) * STOP * * End of PZSCAEX * END scalapack-1.8.0/EXAMPLE/SSCAEXRHS.dat0000640000175000017500000000030210604624224016441 0ustar muammarmuammar 6 1 72.000000000000000000E+00 0.000000000000000000E+00 160.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 scalapack-1.8.0/EXAMPLE/SSCAEXMAT.dat0000640000175000017500000000062210604624224016433 0ustar muammarmuammar6 6 6.0000E+0 3.0000E+0 0.0000E+0 0.0000E+0 3.0000E+0 0.0000E+0 0.0000E+0 -3.0000E+0 -1.0000E+0 1.0000E+0 1.0000E+0 0.0000E+0 -1.0000E+0 0.0000E+0 11.0000E+0 0.0000E+0 0.0000E+0 10.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 -11.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 2.0000E+0 -4.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 0.0000E+0 8.0000E+0 0.0000E+0 -10.0000E+0 scalapack-1.8.0/EXAMPLE/pcscaex.f0000640000175000017500000001724410604624224016216 0ustar muammarmuammar PROGRAM PCSCAEX * * -- ScaLAPACK example code -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * * This program solves a linear system by calling the ScaLAPACK * routine PCGESV. The input matrix and right-and-sides are * read from a file. The solution is written to a file. * * .. Parameters .. INTEGER CPLXSZ, INTGSZ, MEMSIZ, TOTMEM PARAMETER ( CPLXSZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / CPLXSZ ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = (1.0D+0,0.0D+0) ) * .. * .. Local Scalars .. CHARACTER*80 OUTFILE INTEGER IAM, ICTXT, INFO, IPA, IPACPY, IPB, IPPIV, IPX, $ IPW, LIPIV, MYCOL, MYROW, N, NB, NOUT, NPCOL, $ NPROCS, NPROW, NP, NQ, NQRHS, NRHS, WORKSIZ REAL ANORM, BNORM, EPS, XNORM, RESID * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCX( DLEN_ ) COMPLEX MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, IGSUM2D, PCGESV, $ PCGEMM, PCLACPY, PCLAPRNT, PCLAREAD, PCLAWRITE * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH, PCLANGE EXTERNAL ICEIL, NUMROC, PSLAMCH, PCLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDSCAEXINFO( OUTFILE, NOUT, N, NRHS, NB, NPROW, NPCOL, MEM, $ IAM, NPROCS ) * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 20 * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) NQRHS = NUMROC( NRHS, NB, MYCOL, 0, NPCOL ) * * Initialize the array descriptor for the matrix A and B * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCB, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCX, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( 1 ) * IPA = 1 IPACPY = IPA + DESCA( LLD_ )*NQ IPB = IPACPY + DESCA( LLD_ )*NQ IPX = IPB + DESCB( LLD_ )*NQRHS IPPIV = IPX + DESCB( LLD_ )*NQRHS LIPIV = ICEIL( INTGSZ*( NP+NB ), CPLXSZ ) IPW = IPPIV + MAX( NP, LIPIV ) * WORKSIZ = NB * * Check for adequate memory for problem size * INFO = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*CPLXSZ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) IF( INFO.GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MEMORY' GO TO 10 END IF * * Read from file and distribute matrices A and B * CALL PCLAREAD( 'CSCAEXMAT.dat', MEM( IPA ), DESCA, 0, 0, $ MEM( IPW ) ) CALL PCLAREAD( 'CSCAEXRHS.dat', MEM( IPB ), DESCB, 0, 0, $ MEM( IPW ) ) * * Make a copy of A and the rhs for checking purposes * CALL PCLACPY( 'All', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPACPY ), 1, 1, DESCA ) CALL PCLACPY( 'All', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPX ), 1, 1, DESCX ) * ********************************************************************** * Call ScaLAPACK PCGESV routine ********************************************************************** * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) $ 'Example of ScaLAPACK routine call: (PCGESV)' WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'A * X = B, Matrix A:' WRITE( NOUT, FMT = * ) END IF CALL PCLAPRNT( N, N, MEM( IPA ), 1, 1, DESCA, 0, 0, $ 'A', NOUT, MEM( IPW ) ) IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix B:' WRITE( NOUT, FMT = * ) END IF CALL PCLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, $ 'B', NOUT, MEM( IPW ) ) * CALL PCGESV( N, NRHS, MEM( IPA ), 1, 1, DESCA, MEM( IPPIV ), $ MEM( IPB ), 1, 1, DESCB, INFO ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'INFO code returned by PCGESV = ', INFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix X = A^{-1} * B' WRITE( NOUT, FMT = * ) END IF CALL PCLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, 'X', NOUT, $ MEM( IPW ) ) CALL PCLAWRITE( 'CSCAEXSOL.dat', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ 0, 0, MEM( IPW ) ) * * Compute residual ||A * X - B|| / ( ||X|| * ||A|| * eps * N ) * EPS = PSLAMCH( ICTXT, 'Epsilon' ) ANORM = PCLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, MEM( IPW ) ) BNORM = PCLANGE( 'I', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ) ) CALL PCGEMM( 'No transpose', 'No transpose', N, NRHS, N, ONE, $ MEM( IPACPY ), 1, 1, DESCA, MEM( IPB ), 1, 1, DESCB, $ -ONE, MEM( IPX ), 1, 1, DESCX ) XNORM = PCLANGE( 'I', N, NRHS, MEM( IPX ), 1, 1, DESCX, $ MEM( IPW ) ) RESID = XNORM / ( ANORM * BNORM * EPS * DBLE( N ) ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '||A * X - B|| / ( ||X|| * ||A|| * eps * N ) = ', RESID WRITE( NOUT, FMT = * ) IF( RESID.LT.10.0D+0 ) THEN WRITE( NOUT, FMT = * ) 'The answer is correct.' ELSE WRITE( NOUT, FMT = * ) 'The answer is suspicious.' END IF END IF * 10 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 20 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = * ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9997 FORMAT( 'END OF TESTS.' ) * STOP * * End of PCSCAEX * END scalapack-1.8.0/EXAMPLE/DSCAEXRHS.dat0000640000175000017500000000030210604624224016422 0ustar muammarmuammar 6 1 72.000000000000000000D+00 0.000000000000000000D+00 160.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 0.000000000000000000D+00 scalapack-1.8.0/EXAMPLE/SCAEX.dat0000640000175000017500000000034710604624224015752 0ustar muammarmuammar'ScaLAPACK Tutorial, Example input file' 'PARA95, ScaLAPACK Example, August 1995.' 'SCAEX.out' output file name (if any) 6 device out 6 value of N 1 value of NRHS 2 values of NB 2 values of NPROW 2 values of NPCOL scalapack-1.8.0/EXAMPLE/pdscaexinfo.f0000640000175000017500000001235510604624224017071 0ustar muammarmuammar SUBROUTINE PDSCAEXINFO( SUMMRY, NOUT, N, NRHS, NB, NPROW, NPCOL, $ WORK, IAM, NPROCS ) * * -- ScaLAPACK example code -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * * This program solves a linear system by calling the ScaLAPACK * routine PDGESV. The input matrix and right-and-sides are * read from a file. The solution is written to a file. * * .. Scalar Arguments .. CHARACTER*( * ) SUMMRY INTEGER IAM, N, NRHS, NB, NOUT, NPCOL, NPROCS, NPROW * .. * .. Array Arguments .. INTEGER WORK( * ) * .. * * ====================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) * .. * .. Local Scalars .. CHARACTER*79 USRINFO INTEGER ICTXT * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINIT, BLACS_SETUP, IGEBR2D, IGEBS2D * .. * .. Executable Statements .. * * Process 0 reads the input data, broadcasts to other processes and * writes needed information to NOUT * IF( IAM.EQ.0 ) THEN * * Open file and skip data file header * OPEN( NIN, FILE='SCAEX.dat', STATUS='OLD' ) READ( NIN, FMT = * ) SUMMRY SUMMRY = ' ' * * Read in user-supplied info about machine type, compiler, etc. * READ( NIN, FMT = 9999 ) USRINFO * * Read name and unit number for summary output file * READ( NIN, FMT = * ) SUMMRY READ( NIN, FMT = * ) NOUT IF( NOUT.NE.0 .AND. NOUT.NE.6 ) $ OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) * * Read and check the parameter values for the tests. * * Get matrix dimensions * READ( NIN, FMT = * ) N READ( NIN, FMT = * ) NRHS * * Get value of NB * READ( NIN, FMT = * ) NB * * Get grid shape * READ( NIN, FMT = * ) NPROW READ( NIN, FMT = * ) NPCOL * * Close input file * CLOSE( NIN ) * * If underlying system needs additional set up, do it now * IF( NPROCS.LT.1 ) THEN NPROCS = NPROW * NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * * Pack information arrays and broadcast * WORK( 1 ) = N WORK( 2 ) = NRHS WORK( 3 ) = NB WORK( 4 ) = NPROW WORK( 5 ) = NPCOL CALL IGEBS2D( ICTXT, 'All', ' ', 5, 1, WORK, 5 ) * * regurgitate input * WRITE( NOUT, FMT = 9999 ) $ 'SCALAPACK example driver.' WRITE( NOUT, FMT = 9999 ) USRINFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The matrices A and B are read from '// $ 'a file.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'An explanation of the input/output '// $ 'parameters follows:' * WRITE( NOUT, FMT = 9999 ) $ 'N : The order of the matrix A.' WRITE( NOUT, FMT = 9999 ) $ 'NRHS : The number of right and sides.' WRITE( NOUT, FMT = 9999 ) $ 'NB : The size of the square blocks the'// $ ' matrices A and B are split into.' WRITE( NOUT, FMT = 9999 ) $ 'P : The number of process rows.' WRITE( NOUT, FMT = 9999 ) $ 'Q : The number of process columns.' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 ) $ 'The following parameter values will be used:' WRITE( NOUT, FMT = 9998 ) 'N ', N WRITE( NOUT, FMT = 9998 ) 'NRHS ', NRHS WRITE( NOUT, FMT = 9998 ) 'NB ', NB WRITE( NOUT, FMT = 9998 ) 'P ', NPROW WRITE( NOUT, FMT = 9998 ) 'Q ', NPCOL WRITE( NOUT, FMT = * ) * ELSE * * If underlying system needs additional set up, do it now * IF( NPROCS.LT.1 ) $ CALL BLACS_SETUP( IAM, NPROCS ) * * Temporarily define blacs grid to include all processes so * information can be broadcast to all processes * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS ) * CALL IGEBR2D( ICTXT, 'All', ' ', 5, 1, WORK, 5, 0, 0 ) N = WORK( 1 ) NRHS = WORK( 2 ) NB = WORK( 3 ) NPROW = WORK( 4 ) NPCOL = WORK( 5 ) * END IF * CALL BLACS_GRIDEXIT( ICTXT ) * RETURN * 20 WRITE( NOUT, FMT = 9997 ) CLOSE( NIN ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE( NOUT ) CALL BLACS_ABORT( ICTXT, 1 ) * STOP * 9999 FORMAT( A ) 9998 FORMAT( 2X, A5, ' : ', I6 ) 9997 FORMAT( ' Illegal input in file ',40A,'. Aborting run.' ) * * End of PDSCAEXINFO * END scalapack-1.8.0/EXAMPLE/CSCAEXRHS.dat0000640000175000017500000000034010604624224016423 0ustar muammarmuammar 6 1 72.000000000000000000E+00 0E+0 0.000000000000000000E+00 0E+0 160.000000000000000000E+00 0E+0 0.000000000000000000E+00 0E+0 0.000000000000000000E+00 0E+0 0.000000000000000000E+00 0E+0 scalapack-1.8.0/EXAMPLE/DSCAEXMAT.dat0000640000175000017500000000062210604624224016414 0ustar muammarmuammar6 6 6.0000D+0 3.0000D+0 0.0000D+0 0.0000D+0 3.0000D+0 0.0000D+0 0.0000D+0 -3.0000D+0 -1.0000D+0 1.0000D+0 1.0000D+0 0.0000D+0 -1.0000D+0 0.0000D+0 11.0000D+0 0.0000D+0 0.0000D+0 10.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 -11.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 2.0000D+0 -4.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 0.0000D+0 8.0000D+0 0.0000D+0 -10.0000D+0 scalapack-1.8.0/EXAMPLE/pdscaex.f0000640000175000017500000001725010604624224016214 0ustar muammarmuammar PROGRAM PDSCAEX * * -- ScaLAPACK example code -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * * This program solves a linear system by calling the ScaLAPACK * routine PDGESV. The input matrix and right-and-sides are * read from a file. The solution is written to a file. * * .. Parameters .. INTEGER DBLESZ, INTGSZ, MEMSIZ, TOTMEM PARAMETER ( DBLESZ = 8, INTGSZ = 4, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER*80 OUTFILE INTEGER IAM, ICTXT, INFO, IPA, IPACPY, IPB, IPPIV, IPX, $ IPW, LIPIV, MYCOL, MYROW, N, NB, NOUT, NPCOL, $ NPROCS, NPROW, NP, NQ, NQRHS, NRHS, WORKSIZ DOUBLE PRECISION ANORM, BNORM, EPS, XNORM, RESID * .. * .. Local Arrays .. INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCX( DLEN_ ) DOUBLE PRECISION MEM( MEMSIZ ) * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO, $ DESCINIT, IGSUM2D, PDSCAEXINFO, PDGESV, $ PDGEMM, PDLACPY, PDLAPRNT, PDLAREAD, PDLAWRITE * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ICEIL, NUMROC, PDLAMCH, PDLANGE * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) CALL PDSCAEXINFO( OUTFILE, NOUT, N, NRHS, NB, NPROW, NPCOL, MEM, $ IAM, NPROCS ) * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Go to bottom of process grid loop if this case doesn't use my * process * IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL ) $ GO TO 20 * NP = NUMROC( N, NB, MYROW, 0, NPROW ) NQ = NUMROC( N, NB, MYCOL, 0, NPCOL ) NQRHS = NUMROC( NRHS, NB, MYCOL, 0, NPCOL ) * * Initialize the array descriptor for the matrix A and B * CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCB, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) CALL DESCINIT( DESCX, N, NRHS, NB, NB, 0, 0, ICTXT, MAX( 1, NP ), $ INFO ) * * Assign pointers into MEM for SCALAPACK arrays, A is * allocated starting at position MEM( 1 ) * IPA = 1 IPACPY = IPA + DESCA( LLD_ )*NQ IPB = IPACPY + DESCA( LLD_ )*NQ IPX = IPB + DESCB( LLD_ )*NQRHS IPPIV = IPX + DESCB( LLD_ )*NQRHS LIPIV = ICEIL( INTGSZ*( NP+NB ), DBLESZ ) IPW = IPPIV + MAX( NP, LIPIV ) * WORKSIZ = NB * * Check for adequate memory for problem size * INFO = 0 IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*DBLESZ INFO = 1 END IF * * Check all processes for an error * CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 ) IF( INFO.GT.0 ) THEN IF( IAM.EQ.0 ) $ WRITE( NOUT, FMT = 9999 ) 'MEMORY' GO TO 10 END IF * * Read from file and distribute matrices A and B * CALL PDLAREAD( 'DSCAEXMAT.dat', MEM( IPA ), DESCA, 0, 0, $ MEM( IPW ) ) CALL PDLAREAD( 'DSCAEXRHS.dat', MEM( IPB ), DESCB, 0, 0, $ MEM( IPW ) ) * * Make a copy of A and the rhs for checking purposes * CALL PDLACPY( 'All', N, N, MEM( IPA ), 1, 1, DESCA, $ MEM( IPACPY ), 1, 1, DESCA ) CALL PDLACPY( 'All', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPX ), 1, 1, DESCX ) * ********************************************************************** * Call ScaLAPACK PDGESV routine ********************************************************************** * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) $ 'Example of ScaLAPACK routine call: (PDGESV)' WRITE( NOUT, FMT = * ) $ '***********************************************' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'A * X = B, Matrix A:' WRITE( NOUT, FMT = * ) END IF CALL PDLAPRNT( N, N, MEM( IPA ), 1, 1, DESCA, 0, 0, $ 'A', NOUT, MEM( IPW ) ) IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix B:' WRITE( NOUT, FMT = * ) END IF CALL PDLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, $ 'B', NOUT, MEM( IPW ) ) * CALL PDGESV( N, NRHS, MEM( IPA ), 1, 1, DESCA, MEM( IPPIV ), $ MEM( IPB ), 1, 1, DESCB, INFO ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'INFO code returned by PDGESV = ', INFO WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) 'Matrix X = A^{-1} * B' WRITE( NOUT, FMT = * ) END IF CALL PDLAPRNT( N, NRHS, MEM( IPB ), 1, 1, DESCB, 0, 0, 'X', NOUT, $ MEM( IPW ) ) CALL PDLAWRITE( 'DSCAEXSOL.dat', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ 0, 0, MEM( IPW ) ) * * Compute residual ||A * X - B|| / ( ||X|| * ||A|| * eps * N ) * EPS = PDLAMCH( ICTXT, 'Epsilon' ) ANORM = PDLANGE( 'I', N, N, MEM( IPA ), 1, 1, DESCA, MEM( IPW ) ) BNORM = PDLANGE( 'I', N, NRHS, MEM( IPB ), 1, 1, DESCB, $ MEM( IPW ) ) CALL PDGEMM( 'No transpose', 'No transpose', N, NRHS, N, ONE, $ MEM( IPACPY ), 1, 1, DESCA, MEM( IPB ), 1, 1, DESCB, $ -ONE, MEM( IPX ), 1, 1, DESCX ) XNORM = PDLANGE( 'I', N, NRHS, MEM( IPX ), 1, 1, DESCX, $ MEM( IPW ) ) RESID = XNORM / ( ANORM * BNORM * EPS * DBLE( N ) ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ '||A * X - B|| / ( ||X|| * ||A|| * eps * N ) = ', RESID WRITE( NOUT, FMT = * ) IF( RESID.LT.10.0D+0 ) THEN WRITE( NOUT, FMT = * ) 'The answer is correct.' ELSE WRITE( NOUT, FMT = * ) 'The answer is suspicious.' END IF END IF * 10 CONTINUE * CALL BLACS_GRIDEXIT( ICTXT ) * 20 CONTINUE * * Print ending messages and close output file * IF( IAM.EQ.0 ) THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9997 ) WRITE( NOUT, FMT = * ) IF( NOUT.NE.6 .AND. NOUT.NE.0 ) $ CLOSE ( NOUT ) END IF * CALL BLACS_EXIT( 0 ) * 9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' ) 9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least', $ I11 ) 9997 FORMAT( 'END OF TESTS.' ) * STOP * * End of PDSCAEX * END scalapack-1.8.0/README0000640000175000017500000001347310605264721014211 0ustar muammarmuammar===================== ScaLAPACK README FILE ===================== VERSION 1.0 : February 28, 1995 UPDATES: VERSION 1.1 : March 20, 1995 VERSION 1.2 : May 10, 1996 UPDATES: VERSION 1.3 : June 5, 1996 VERSION 1.4 : November 17, 1996 VERSION 1.5 : May 1, 1997 UPDATES: VERSION 1.6 : November 15, 1997 VERSION 1.7 : August, 2001 VERSION 1.8 : April 2007 ScaLAPACK, or Scalable LAPACK, is a library of high performance linear algebra routines for distributed memory message-passing MIMD computers and networks of workstations supporting MPI and/or PVM. The complete ScaLAPACK package is freely available on netlib and can be obtained via the World Wide Web or anonymous ftp. http://www.netlib.org/scalapack/ ScaLAPACK, version 1.8, includes routines for the solution of dense, band, and tridiagonal linear systems of equations, condition estimation and iterative refinement, for LU and Cholesky factorization, matrix inversion, full-rank linear least squares problems, orthogonal and generalized orthogonal factorizations, orthogonal transformation routines, reductions to upper Hessenberg, bidiagonal and tridiagonal form, reduction of a symmetric-definite/ Hermitian-definite generalized eigenproblem to standard form, the symmetric/Hermitian, divide-and-conquer symmetric/Hermitian, generalized symmetric/Hermitian and the nonsymmetric eigenproblem, and the singular value decomposition. With the exception of the singular value decomposition, all routines are available in four types: single precision real, double precision real, single precision complex, and double precision complex. Unless otherwise noted, the current scalapack.tgz on netlib contains all available updates. Errata for ScaLAPACK (source code and documentation) can be found at: http://www.netlib.org/scalapack/errata.html ScaLAPACK example programs can be found at: http://www.netlib.org/scalapack/examples/ Since ScaLAPACK 1.8, a basic example is include in the EXAMPLE directory. Prototype versions of ScaLAPACK routines using packed storage are now available in the scalapack/prototype directory on netlib: http://www.netlib.org/scalapack/prototype/packed.tgz It is highly recommended that you obtain a copy of the ScaLAPACK Users' Guide published by SIAM. This Users' Guide gives a detailed description of the philosophy behind ScaLAPACK as well as an explanation of its usage. Each Users' Guide includes a CD-ROM containing the HTML version of the ScaLAPACK Users' Guide, the source code for the package, testing and timing programs, prebuilt versions of the library for a number of computers, example programs, and a full set of LAPACK Working Notes. The ScaLAPACK Users' Guide can be purchased from: SIAM; 3600 University City Science Center; Philadelphia, PA 19104-2688; 215-382-9800, FAX 215-386-7999. It will also be available from booksellers. To order by email, send email to service@siam.org. The book is also available via SIAM's World Wide Web URL at http://www.siam.org. The ISBN number is 0-89871-397-8, and SIAM order code is SE04. The list price for SIAM members is $39.60; the cost for nonmembers is $49.50. To view an HTML version of the Users' Guide please refer to the URL http://www.netlib.org/scalapack/slug/ Prebuilt ScaLAPACK libraries are available for a variety of architectures. http://www.netlib.org/scalapack/archives/ A number of technical reports were written during the development of ScaLAPACK and published as LAPACK Working Notes by the University of Tennessee. These working notes are available in postscript and pdf format. http://www.netlib.org/lapack/lawns/ http://www.netlib.org/lapack/lawnspdf/ All questions/comments should be directed to scalapack@cs.utk.edu. ---------------------------------------------------------------------- The Makefiles in ScaLAPACK and its support libraries assume the basic directory structure below: SCALAPACK/ _____________________________/ | \______________________________________ / | | | | SLmake.inc | | | \ | | | | | | | | | | | | | | INSTALL/ EXAMPLE/ PBLAS/ REDIST/ SRC/ TESTING/ TOOLS/ / Input \ / Files & \ /Executables\ NOTE: It is assumed that the BLAS, BLACSi and LAPACK libraries (and possibly MPI or PVM) are available on your machine. These libraries are NOT included with this distribution. They can be obtained from the respective blas, blacs, mpi, or pvm directory on netlib. http://www.netlib.org/blas/ http://www.netlib.org/lapack/ http://www.netlib.org/blacs/ http://www.mcs.anl.gov/mpi/mpich/ http://www.lam-mpi.org/ http://www.open-mpi.org/ http://www.netlib.org/pvm3/ All ScaLAPACK routines -- driver, computational, and auxiliary -- can be found in the SRC/ directory. Testing routines and input files can be found in the TESTING/ directory. All machine-specific parameters and the locations of BLAS, BLACSi and LAPACK libraries are specified in the SCALAPACK/SLmake.inc file. This include file is then referenced in all subdirectory Makefiles. Once the include file has been modified, the entire installation process (including the building of testing executables) can be performed by typing ``make'' in the top-level ScaLAPACK directory. For detailed information on the installation and testing process, please refer to the "Installation Guide for ScaLAPACK" (scalapack_install.ps) in the INSTALL/ directory, or the scalapack directory on netlib. scalapack-1.8.0/TOOLS/0000750000175000017500000000000010605234716014221 5ustar muammarmuammarscalapack-1.8.0/TOOLS/dddot.f0000640000175000017500000000154710363532303015470 0ustar muammarmuammar SUBROUTINE DDDOT( N, DOT, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N DOUBLE PRECISION DOT * .. * .. Array Arguments .. DOUBLE PRECISION X( * ), Y( * ) * .. * * Purpose * ======= * * DDDOT is a simple FORTRAN wrapper around the BLAS function * DDOT returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. DOUBLE PRECISION DDOT EXTERNAL DDOT * .. * .. Executable Statements .. * DOT = DDOT( N, X, INCX, Y, INCY ) * RETURN * * End of DDDOT * END scalapack-1.8.0/TOOLS/pctreecomb.f0000640000175000017500000001575310363532303016521 0ustar muammarmuammar SUBROUTINE PCTREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, $ SUBPTR ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SCOPE INTEGER CDEST0, ICTXT, N, RDEST0 * .. * .. Array Arguments .. COMPLEX MINE( * ) * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PCTREECOMB does a 1-tree parallel combine operation on scalars, * using the subroutine indicated by SUBPTR to perform the required * computation. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * SCOPE (global input) CHARACTER * The scope of the operation: 'Rowwise', 'Columnwise', or * 'All'. * * N (global input) INTEGER * The number of elements in MINE. N = 1 for the norm-2 * computation and 2 for the sum of square. * * MINE (local input/global output) COMPLEX array of * dimension at least equal to N. The local data to use in the * combine. * * RDEST0 (global input) INTEGER * The process row to receive the answer. If RDEST0 = -1, * every process in the scope gets the answer. * * CDEST0 (global input) INTEGER * The process column to receive the answer. If CDEST0 = -1, * every process in the scope gets the answer. * * SUBPTR (local input) Pointer to the subroutine to call to perform * the required combine. * * ===================================================================== * * .. Local Scalars .. LOGICAL BCAST, RSCOPE, CSCOPE INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, $ RMSSG, TCDEST, TRDEST * .. * .. Local Arrays .. COMPLEX HIS( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, $ CGERV2D, CGESD2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * See if everyone wants the answer (need to broadcast the answer) * BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) IF( BCAST ) THEN TRDEST = 0 TCDEST = 0 ELSE TRDEST = RDEST0 TCDEST = CDEST0 END IF * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure scope-dependant variables, or report illegal scope * RSCOPE = LSAME( SCOPE, 'R' ) CSCOPE = LSAME( SCOPE, 'C' ) * IF( RSCOPE ) THEN IF( BCAST ) THEN TRDEST = MYROW ELSE IF( MYROW.NE.TRDEST ) THEN RETURN END IF NP = NPCOL MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) ELSE IF( CSCOPE ) THEN IF( BCAST ) THEN TCDEST = MYCOL ELSE IF( MYCOL.NE.TCDEST ) THEN RETURN END IF NP = NPROW MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) ELSE IF( LSAME( SCOPE, 'A' ) ) THEN NP = NPROW * NPCOL IAM = MYROW*NPCOL + MYCOL DEST = TRDEST*NPCOL + TCDEST MYDIST = MOD( NP + IAM - DEST, NP ) ELSE RETURN END IF * IF( NP.LT.2 ) $ RETURN * MYDIST2 = MYDIST RMSSG = MYROW CMSSG = MYCOL I = 1 * 10 CONTINUE * IF( MOD( MYDIST, 2 ).NE.0 ) THEN * * If I am process that sends information * DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) * * Figure coordinates of dest of message * IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) END IF * CALL CGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) * GO TO 20 * ELSE * * If I am a process receiving information, figure coordinates * of source of message * DIST = MYDIST2 + I IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) HISDIST = MOD( NP + CMSSG - TCDEST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) HISDIST = MOD( NP + RMSSG - TRDEST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) END IF * IF( MYDIST2.LT.HISDIST ) THEN * * If I have anyone sending to me * CALL CGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) CALL SUBPTR( MINE, HIS ) * END IF MYDIST = MYDIST / 2 * END IF I = I * 2 * IF( I.LT.NP ) $ GO TO 10 * 20 CONTINUE * IF( BCAST ) THEN IF( MYDIST2.EQ.0 ) THEN CALL CGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) ELSE CALL CGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, $ TRDEST, TCDEST ) END IF END IF * RETURN * * End of PCTREECOMB * END * SUBROUTINE CCOMBAMAX( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. COMPLEX V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * CCOMBAMAX finds the element having max. absolute value as well * as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) COMPLEX array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) COMPLEX array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, REAL, AIMAG * .. * .. Statement Functions .. COMPLEX ZDUM REAL CABS1 CABS1(ZDUM) = ABS(REAL(ZDUM)) + ABS(AIMAG(ZDUM)) * .. * .. Executable Statements .. * IF( CABS1( V1( 1 ) ).LT.CABS1( V2( 1 ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of CCOMBAMAX * END scalapack-1.8.0/TOOLS/infog2l.f0000640000175000017500000001464110363532303015731 0ustar muammarmuammar SUBROUTINE INFOG2L( GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, $ MYCOL, LRINDX, LCINDX, RSRC, CSRC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CSRC, GCINDX, GRINDX, LRINDX, LCINDX, MYCOL, $ MYROW, NPCOL, NPROW, RSRC * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * INFOG2L computes the starting local indexes LRINDX, LCINDX corres- * ponding to the distributed submatrix starting globally at the entry * pointed by GRINDX, GCINDX. This routine returns the coordinates in * the grid of the process owning the matrix entry of global indexes * GRINDX, GCINDX, namely RSRC and CSRC. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * GRINDX (global input) INTEGER * The global row starting index of the submatrix. * * GCINDX (global input) INTEGER * The global column starting index of the submatrix. * * DESC (input) INTEGER array of dimension DLEN_. * The array descriptor for the underlying distributed matrix. * * NPROW (global input) INTEGER * The total number of process rows over which the distributed * matrix is distributed. * * NPCOL (global input) INTEGER * The total number of process columns over which the * distributed matrix is distributed. * * MYROW (local input) INTEGER * The row coordinate of the process calling this routine. * * MYCOL (local input) INTEGER * The column coordinate of the process calling this routine. * * LRINDX (local output) INTEGER * The local rows starting index of the submatrix. * * LCINDX (local output) INTEGER * The local columns starting index of the submatrix. * * RSRC (global output) INTEGER * The row coordinate of the process that possesses the first * row and column of the submatrix. * * CSRC (global output) INTEGER * The column coordinate of the process that possesses the * first row and column of the submatrix. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CBLK, GCCPY, GRCPY, RBLK * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * GRCPY = GRINDX-1 GCCPY = GCINDX-1 * RBLK = GRCPY / DESC(MB_) CBLK = GCCPY / DESC(NB_) RSRC = MOD( RBLK + DESC(RSRC_), NPROW ) CSRC = MOD( CBLK + DESC(CSRC_), NPCOL ) * LRINDX = ( RBLK / NPROW + 1 ) * DESC(MB_) + 1 LCINDX = ( CBLK / NPCOL + 1 ) * DESC(NB_) + 1 * IF( MOD( MYROW+NPROW-DESC(RSRC_), NPROW ) .GE. $ MOD( RBLK, NPROW ) ) THEN IF( MYROW.EQ.RSRC ) $ LRINDX = LRINDX + MOD( GRCPY, DESC(MB_) ) LRINDX = LRINDX - DESC(MB_) END IF * IF( MOD( MYCOL+NPCOL-DESC(CSRC_), NPCOL ) .GE. $ MOD( CBLK, NPCOL ) ) THEN IF( MYCOL.EQ.CSRC ) $ LCINDX = LCINDX + MOD( GCCPY, DESC(NB_) ) LCINDX = LCINDX - DESC(NB_) END IF * RETURN * * End of INFOG2L * END scalapack-1.8.0/TOOLS/pdchekpad.f0000640000175000017500000001213310363532303016306 0ustar muammarmuammar SUBROUTINE PDCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N DOUBLE PRECISION CHKVAL * .. * .. Array Arguments .. CHARACTER MESS*(*) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDCHEKPAD checks that the padding around a local array has not * been overwritten since the call to PDFILLPAD. 3 types of errors * are reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells you that the 5th element of the IPRE long buffer has been * overwritten with the value 18, where it should still have the value * of CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells you that the 19th element after the end of A was overwritten * with the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells you that the element at the 12th row and 3rd column of A was * overwritten with the value of 22, where it should still have the * value of CHKVAL. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * MESS (local input) CHARACTER*(*) * String containing a user-defined message. * * M (local input) INTEGER * The number of rows in the local array A. * * N (input) INTEGER * The number of columns in the local array A. * * A (local input) DOUBLE PRECISION array of dimension (LDA,N). * A location IPRE elements in front of the array to be checked. * * LDA (local input) INTEGER * The leading Dimension of the local array to be checked. * * IPRE (local input) INTEGER * The size of the guard zone before the start of padded array. * * IPOST (local input) INTEGER * The size of guard zone after the padded array. * * CHKVAL (local input) DOUBLE PRECISION * The value the local array was padded with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ A( I ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PDCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, A( I ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PDCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, A( I ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G20.7 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G20.7 ) * RETURN * * End of PDCHEKPAD * END scalapack-1.8.0/TOOLS/indxl2g.f0000640000175000017500000000307210363532303015734 0ustar muammarmuammar INTEGER FUNCTION INDXL2G( INDXLOC, NB, IPROC, ISRCPROC, NPROCS ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INDXLOC, IPROC, ISRCPROC, NB, NPROCS * .. * * Purpose * ======= * * INDXL2G computes the global index of a distributed matrix entry * pointed to by the local index INDXLOC of the process indicated by * IPROC. * * Arguments * ========= * * INDXLOC (global input) INTEGER * The local index of the distributed matrix entry. * * NB (global input) INTEGER * Block size, size of the blocks the distributed matrix is * split into. * * IPROC (local input) INTEGER * The coordinate of the process whose local array row or * column is to be determined. * * ISRCPROC (global input) INTEGER * The coordinate of the process that possesses the first * row/column of the distributed matrix. * * NPROCS (global input) INTEGER * The total number processes over which the distributed * matrix is distributed. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * INDXL2G = NPROCS*NB*((INDXLOC-1)/NB) + MOD(INDXLOC-1,NB) + $ MOD(NPROCS+IPROC-ISRCPROC, NPROCS)*NB + 1 * RETURN * * End of INDXL2G * END scalapack-1.8.0/TOOLS/pztreecomb.f0000640000175000017500000001577610363532303016555 0ustar muammarmuammar SUBROUTINE PZTREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, $ SUBPTR ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SCOPE INTEGER CDEST0, ICTXT, N, RDEST0 * .. * .. Array Arguments .. COMPLEX*16 MINE( * ) * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PZTREECOMB does a 1-tree parallel combine operation on scalars, * using the subroutine indicated by SUBPTR to perform the required * computation. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * SCOPE (global input) CHARACTER * The scope of the operation: 'Rowwise', 'Columnwise', or * 'All'. * * N (global input) INTEGER * The number of elements in MINE. N = 1 for the norm-2 * computation and 2 for the sum of square. * * MINE (local input/global output) COMPLEX*16 array of * dimension at least equal to N. The local data to use in the * combine. * * RDEST0 (global input) INTEGER * The process row to receive the answer. If RDEST0 = -1, * every process in the scope gets the answer. * * CDEST0 (global input) INTEGER * The process column to receive the answer. If CDEST0 = -1, * every process in the scope gets the answer. * * SUBPTR (local input) Pointer to the subroutine to call to perform * the required combine. * * ===================================================================== * * .. Local Scalars .. LOGICAL BCAST, RSCOPE, CSCOPE INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, $ RMSSG, TCDEST, TRDEST * .. * .. Local Arrays .. COMPLEX*16 HIS( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZGEBR2D, ZGEBS2D, $ ZGERV2D, ZGESD2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * See if everyone wants the answer (need to broadcast the answer) * BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) IF( BCAST ) THEN TRDEST = 0 TCDEST = 0 ELSE TRDEST = RDEST0 TCDEST = CDEST0 END IF * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure scope-dependant variables, or report illegal scope * RSCOPE = LSAME( SCOPE, 'R' ) CSCOPE = LSAME( SCOPE, 'C' ) * IF( RSCOPE ) THEN IF( BCAST ) THEN TRDEST = MYROW ELSE IF( MYROW.NE.TRDEST ) THEN RETURN END IF NP = NPCOL MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) ELSE IF( CSCOPE ) THEN IF( BCAST ) THEN TCDEST = MYCOL ELSE IF( MYCOL.NE.TCDEST ) THEN RETURN END IF NP = NPROW MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) ELSE IF( LSAME( SCOPE, 'A' ) ) THEN NP = NPROW * NPCOL IAM = MYROW*NPCOL + MYCOL DEST = TRDEST*NPCOL + TCDEST MYDIST = MOD( NP + IAM - DEST, NP ) ELSE RETURN END IF * IF( NP.LT.2 ) $ RETURN * MYDIST2 = MYDIST RMSSG = MYROW CMSSG = MYCOL I = 1 * 10 CONTINUE * IF( MOD( MYDIST, 2 ).NE.0 ) THEN * * If I am process that sends information * DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) * * Figure coordinates of dest of message * IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) END IF * CALL ZGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) * GO TO 20 * ELSE * * If I am a process receiving information, figure coordinates * of source of message * DIST = MYDIST2 + I IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) HISDIST = MOD( NP + CMSSG - TCDEST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) HISDIST = MOD( NP + RMSSG - TRDEST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) END IF * IF( MYDIST2.LT.HISDIST ) THEN * * If I have anyone sending to me * CALL ZGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) CALL SUBPTR( MINE, HIS ) * END IF MYDIST = MYDIST / 2 * END IF I = I * 2 * IF( I.LT.NP ) $ GO TO 10 * 20 CONTINUE * IF( BCAST ) THEN IF( MYDIST2.EQ.0 ) THEN CALL ZGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) ELSE CALL ZGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, $ TRDEST, TCDEST ) END IF END IF * RETURN * * End of PZTREECOMB * END * SUBROUTINE ZCOMBAMAX( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. COMPLEX*16 V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * ZCOMBAMAX finds the element having max. absolute value as well * as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) COMPLEX*16 array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) COMPLEX*16 array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG * .. * .. Statement Functions .. COMPLEX*16 ZDUM DOUBLE PRECISION CABS1 CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * IF( CABS1( V1( 1 ) ).LT.CABS1( V2( 1 ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of ZCOMBAMAX * END scalapack-1.8.0/TOOLS/npreroc.f0000640000175000017500000000471110363532303016036 0ustar muammarmuammar INTEGER FUNCTION NPREROC( N, NB, IPROC, ISRCPROC, NPROCS ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPROC, ISRCPROC, N, NB, NPROCS * .. * * Purpose * ======= * * NPREROC computes the Number of PREceeding Rows Or Columns of a * distributed matrix that are possessed by processes closer to * ISRCPROC than IPROC. Therefore, if ISRCPROC=0 and IPROC=4, then * NPREROC returns the number of distributed matrix rows or columns * owned by processes 0, 1, 2, and 3. * * Arguments * ========= * * N (global input) INTEGER * The number of rows or columns in the distributed matrix. * * NB (global input) INTEGER * Block size, size of the blocks the distributed matrix is * split into. * * IPROC (local intput) INTEGER * The coordinate of the process whose preceeding distributed * matrix rows or columns are to be determined. * * ISRCPROC (global input) INTEGER * The coordinate of the process that possesses the first * row or column of the distributed matrix. * * NPROCS (global input) INTEGER * The total number processes over which the matrix is * distributed. * * ===================================================================== * * .. Local Scalars .. INTEGER EXTRABLKS, MYDIST, NBLOCKS * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Figure PROC's distance from source process * MYDIST = MOD( NPROCS+IPROC-ISRCPROC, NPROCS ) * * Figure the total number of whole NB blocks N is split up into * NBLOCKS = N / NB * * Figure the minimum number of rows/cols previous processes could have * NPREROC = (NBLOCKS/NPROCS) * NB * MYDIST * * See if there are any extra blocks * EXTRABLKS = MOD( NBLOCKS, NPROCS ) * * If I have an extra block, all processes in front of me got one too * IF( MYDIST.LE.EXTRABLKS ) THEN NPREROC = NPREROC + NB*MYDIST * * If I have don't have an extra block, add in extra blocks of * preceeding processes and the partial block, if it exists * ELSE NPREROC = NPREROC + EXTRABLKS*NB + MOD( N, NB ) END IF * RETURN * * End of NPREROC * END scalapack-1.8.0/TOOLS/psfillpad.f0000640000175000017500000000555410363532303016352 0ustar muammarmuammar SUBROUTINE PSFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N REAL CHKVAL * .. * .. Array Arguments .. REAL A( * ) * .. * * Purpose * ======= * * PSFILLPAD surrounds a two dimensional local array with a guard- * zone initialized to the value CHKVAL. The user may later call the * routine PSCHEKPAD to discover if the guardzone has been * violated. There are three guardzones. The first is a buffer of size * IPRE that is before the start of the array. The second is the buffer * of size IPOST which is after the end of the array to be padded. * Finally, there is a guard zone inside every column of the array to * be padded, in the elements of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (local input) INTEGER * The number of rows in the local array. * * N (local input) INTEGER * The number of columns in the local array. * * A (local input/local output) REAL, array of * dimension (LDA,N). A location IPRE elements in front of * the matrix to be padded. * * LDA (local input) INTEGER * The leading Dimension of the local array to be padded. * * IPRE (local input) INTEGER * The size of the guard zone to put before the start of * padded array. * * IPOST (local input) INTEGER * The size of the guard zone to put after padded array. * * CHKVAL (local input) REAL * The value to pad matrix with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PSFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no post-guardzone in PSFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PSFILLPAD * END scalapack-1.8.0/TOOLS/sltimer.f0000640000175000017500000002547310363532303016055 0ustar muammarmuammar SUBROUTINE SLBOOT() * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * SLBOOT (re)sets all timers to 0, and enables SLtimer. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION STARTFLAG, ZERO PARAMETER ( STARTFLAG = -5.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .FALSE. DO 10 I = 1, NTIMER CPUSEC( I ) = ZERO WALLSEC( I ) = ZERO CPUSTART( I ) = STARTFLAG WALLSTART( I ) = STARTFLAG 10 CONTINUE * RETURN * * End of SLBOOT * END * SUBROUTINE SLTIMER( I ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER I * .. * * Purpose * ======= * * SLtimer provides a "stopwatch" functionality cpu/wall timer * (in seconds). Up to 64 separate timers can be functioning at once. * The first call starts the timer, and the second stops it. This * routine can be disenabled, so that calls to the timer are ignored. * This feature can be used to make sure certain sections of code do * not affect timings, even if they call routines which have SLtimer * calls in them. * * Arguments * ========= * * I (global input) INTEGER * The timer to stop/start. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION STARTFLAG PARAMETER ( STARTFLAG = -5.0D+0 ) * .. * .. External Functions .. DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00 * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * * If timing disabled, return * IF( DISABLED ) $ RETURN * IF( WALLSTART( I ).EQ.STARTFLAG ) THEN * * If timer has not been started, start it * WALLSTART( I ) = DWALLTIME00() CPUSTART( I ) = DCPUTIME00() * ELSE * * Stop timer and add interval to count * CPUSEC( I ) = CPUSEC( I ) + DCPUTIME00() - CPUSTART( I ) WALLSEC( I ) = WALLSEC( I ) + DWALLTIME00() - WALLSTART( I ) WALLSTART( I ) = STARTFLAG * END IF * RETURN * * End of SLTIMER * END * SUBROUTINE SLENABLE() * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * SLENABLE sets it so calls to SLtimer are not ignored. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .FALSE. * RETURN * END * SUBROUTINE SLDISABLE() * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * Purpose * ======= * * SLDISABLE sets it so calls to SLTIMER are ignored. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * DISABLED = .TRUE. * RETURN * * End of SLDISABLE * END * DOUBLE PRECISION FUNCTION SLINQUIRE( TIMETYPE, I ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 TIMETYPE INTEGER I * .. * * Purpose * ======= * * SLINQUIRE returns wall or cpu time that has accumulated in timer I. * * Arguments * ========= * * TIMETYPE (global input) CHARACTER * Controls what time will be returned: * = 'W': wall clock time is returned, * = 'C': CPU time is returned (default). * * I (global input) INTEGER * The timer to return. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION ERRFLAG PARAMETER ( ERRFLAG = -1.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION TIME * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00, LSAME * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * IF( LSAME( TIMETYPE, 'W' ) ) THEN * * If walltime not available on this machine, return -1 flag * IF( DWALLTIME00().EQ.ERRFLAG ) THEN TIME = ERRFLAG ELSE TIME = WALLSEC( I ) END IF ELSE IF( DCPUTIME00().EQ.ERRFLAG ) THEN TIME = ERRFLAG ELSE TIME = CPUSEC( I ) END IF END IF * SLINQUIRE = TIME * RETURN * * End of SLINQUIRE * END * SUBROUTINE SLCOMBINE( ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, $ TIMES ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER OP, SCOPE, TIMETYPE INTEGER IBEG, ICTXT, N * .. * .. Array Arguments .. DOUBLE PRECISION TIMES( N ) * .. * * Purpose * ======= * * SLCOMBINE takes the timing information stored on a scope of processes * and combines them into the user's TIMES array. * * Arguments * ========= * * ICTXT (local input) INTEGER * The BLACS context handle. * * SCOPE (global input) CHARACTER * Controls what processes in grid participate in combine. * Options are 'Rowwise', 'Columnwise', or 'All'. * * OP (global input) CHARACTER * Controls what combine should be done: * = '>': get maximal time on any process (default), * = '<': get minimal time on any process, * = '+': get sum of times across processes. * * TIMETYPE (global input) CHARACTER * Controls what time will be returned in TIMES: * = 'W': wall clock time, * = 'C': CPU time (default). * * N (global input) INTEGER * The number of timers to combine. * * IBEG (global input) INTEGER * The first timer to be combined. * * TIMES (global output) DOUBLE PRECISION array, dimension (N) * The requested timing information is returned in this array. * * ===================================================================== * * .. Parameters .. INTEGER NTIMER PARAMETER ( NTIMER = 64 ) DOUBLE PRECISION ERRFLAG PARAMETER ( ERRFLAG = -1.0D+0 ) * .. * .. Local Scalars .. LOGICAL TMPDIS INTEGER I * .. * .. External Subroutines .. EXTERNAL DGAMX2D, DGAMN2D, DGSUM2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DCPUTIME00, DWALLTIME00 EXTERNAL DCPUTIME00, DWALLTIME00, LSAME * .. * .. Common Blocks .. LOGICAL DISABLED DOUBLE PRECISION CPUSEC( NTIMER ), CPUSTART( NTIMER ), $ WALLSEC( NTIMER ), WALLSTART( NTIMER ) COMMON /SLTIMER00/ CPUSEC, WALLSEC, CPUSTART, WALLSTART, DISABLED * .. * .. Executable Statements .. * * Disable timer for combine operation * TMPDIS = DISABLED DISABLED = .TRUE. * * Copy timer information into user's times array * IF( LSAME( TIMETYPE, 'W' ) ) THEN * * If walltime not available on this machine, fill in times * with -1 flag, and return * IF( DWALLTIME00().EQ.ERRFLAG ) THEN DO 10 I = 1, N TIMES( I ) = ERRFLAG 10 CONTINUE RETURN ELSE DO 20 I = 1, N TIMES( I ) = WALLSEC( IBEG + I - 1 ) 20 CONTINUE END IF ELSE IF( DCPUTIME00().EQ.ERRFLAG ) THEN DO 30 I = 1, N TIMES( I ) = ERRFLAG 30 CONTINUE RETURN ELSE DO 40 I = 1, N TIMES( I ) = CPUSEC( IBEG + I - 1 ) 40 CONTINUE END IF ENDIF * * Combine all nodes' information, restore disabled, and return * IF( OP.EQ.'>' ) THEN CALL DGAMX2D( ICTXT, SCOPE, ' ', N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) ELSE IF( OP.EQ.'<' ) THEN CALL DGAMN2D( ICTXT, SCOPE, ' ', N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) ELSE IF( OP.EQ.'+' ) THEN CALL DGSUM2D( ICTXT, SCOPE, ' ', N, 1, TIMES, N, -1, 0 ) ELSE CALL DGAMX2D( ICTXT, SCOPE, ' ', N, 1, TIMES, N, -1, -1, $ -1, -1, 0 ) END IF * DISABLED = TMPDIS * RETURN * * End of SLCOMBINE * END scalapack-1.8.0/TOOLS/Makefile0000640000175000017500000000564510604624224015670 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Tools Makefile # # Creation date: March 20, 1995 # # Modified: February 15, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../SLmake.inc ATOOLS = iceil.o ilacpy.o ilcm.o indxg2p.o indxg2l.o \ indxl2g.o infog1l.o infog2l.o npreroc.o numroc.o \ chk1mat.o pchkxmat.o sltimer.o desc_convert.o \ descinit.o descset.o reshape.o SL_gridreshape.o \ SL_init.o ITOOLS = picol2row.o pirow2col.o pilaprnt.o pitreecomb.o pifillpad.o \ pichekpad.o pielset.o pielset2.o pielget.o STOOLS = dsnrm2.o dsasum.o slatcpy.o ssdot.o smatadd.o \ psmatadd.o pscol2row.o psrow2col.o pslaprnt.o pstreecomb.o \ psfillpad.o pschekpad.o pselset.o pselset2.o pselget.o \ pslaread.o pslawrite.o DTOOLS = dddot.o dlatcpy.o dmatadd.o pdmatadd.o pdcol2row.o \ pdrow2col.o pdlaprnt.o pdtreecomb.o pdfillpad.o pdchekpad.o \ pdelset.o pdelset2.o pdelget.o \ pdlaread.o pdlawrite.o CTOOLS = dscnrm2.o dscasum.o ccdotu.o ccdotc.o clatcpy.o \ cmatadd.o pcmatadd.o pccol2row.o pcrow2col.o pclaprnt.o \ pctreecomb.o pcfillpad.o pcchekpad.o pcelset.o pcelset2.o \ pcelget.o \ pclaread.o pclawrite.o ZTOOLS = zzdotu.o zzdotc.o zlatcpy.o zmatadd.o pzmatadd.o \ pzcol2row.o pzrow2col.o pzlaprnt.o pztreecomb.o pzfillpad.o \ pzchekpad.o pzelset.o pzelset2.o pzelget.o \ pzlaread.o pzlawrite.o all: single double complex complex16 slapackaux: ( cd LAPACK; $(MAKE) single FRC=$(FRC) ) dlapackaux: ( cd LAPACK; $(MAKE) double FRC=$(FRC) ) clapackaux: ( cd LAPACK; $(MAKE) complex FRC=$(FRC) ) zlapackaux: ( cd LAPACK; $(MAKE) complex16 FRC=$(FRC) ) integer: $(ATOOLS) $(ITOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ATOOLS) $(ITOOLS) $(RANLIB) $(SCALAPACKLIB) single: slapackaux integer $(STOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(STOOLS) $(RANLIB) $(SCALAPACKLIB) double: dlapackaux integer $(DTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(DTOOLS) $(RANLIB) $(SCALAPACKLIB) complex: clapackaux integer $(CTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(CTOOLS) $(RANLIB) $(SCALAPACKLIB) complex16: zlapackaux integer $(ZTOOLS) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ZTOOLS) $(RANLIB) $(SCALAPACKLIB) $(ATOOLS): $(FRC) $(ITOOLS): $(FRC) $(STOOLS): $(FRC) $(DTOOLS): $(FRC) $(CTOOLS): $(FRC) $(ZTOOLS): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o ( cd LAPACK; $(MAKE) clean ) .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(CDEFS) $*.c scalapack-1.8.0/TOOLS/pzelget.f0000640000175000017500000001630710363532303016044 0ustar muammarmuammar SUBROUTINE PZELGET( SCOPE, TOP, ALPHA, A, IA, JA, DESCA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 SCOPE, TOP INTEGER IA, JA COMPLEX*16 ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZELGET sets alpha to the distributed matrix entry A( IA, JA ). * The value of alpha is set according to the scope. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCOPE (global input) CHARACTER*1 * The BLACS scope in which alpha is updated. * If SCOPE = 'R', alpha is updated only in the process row * containing A( IA, JA ), * If SCOPE = 'C', alpha is updated only in the process column * containing A( IA, JA ), * If SCOPE = 'A', alpha is updated in all the processes of the * grid, * otherwise alpha is updated only in the process containing * A( IA, JA ). * * TOP (global input) CHARACTER*1 * The topology to be used if broadcast is needed. * * ALPHA (global output) COMPLEX*16, the scalar alpha. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, IIA, IOFFA, JJA, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZGEBR2D, ZGEBS2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * ALPHA = ZERO * IF( LSAME( SCOPE, 'R' ) ) THEN IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL ZGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL ZGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'C' ) ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL ZGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL ZGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'A' ) ) THEN IF( ( MYROW.EQ.IAROW ).AND.( MYCOL.EQ.IACOL ) ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL ZGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL ZGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF ELSE IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ ALPHA = A( IIA+(JJA-1)*DESCA( LLD_ ) ) END IF * RETURN * * End of PZELGET * END scalapack-1.8.0/TOOLS/pzlaprnt.f0000640000175000017500000003101310363532303016233 0ustar muammarmuammar SUBROUTINE PZLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLAPRNT prints to the standard output a distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and * printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory to a * local array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IRPRNT (global input) INTEGER * The row index of the printing process. * * ICPRNT (global input) INTEGER * The column index of the printing process. * * CMATNM (global input) CHARACTER*(*) * Identifier of the distributed matrix to be printed. * * NOUT (global input) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. * * WORK (local workspace) COMPLEX*16 * Working array of minimum size equal to MB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ ZGERV2D, ZGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG, MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, $ DBLE( A( II+K+(JJ+H-1)*LDA ) ), $ DIMAG( A( II+K+(JJ+H-1)*LDA ) ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, DBLE( WORK( K ) ), $ DIMAG( WORK( K ) ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT(A,'(',I6,',',I6,')=',D30.18, '+i*(',D30.18, ')') * RETURN * * End of PZLAPRNT * END scalapack-1.8.0/TOOLS/psrow2col.f0000640000175000017500000002146510363532303016325 0ustar muammarmuammar SUBROUTINE PSROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, $ RSRC, CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. REAL VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a row of processes, and distribute those rows over a column of * processes. This routine minimizes communication by sending all * information it has that a given process in the CDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process row consisting of Q processes, and I want to send all of that * vector that I own to a new vector distributed over P processes within * a process column, that after I find the process in RDEST that owns * the row of the vector I'm currently looking at, he will want every * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a row. * * VS (local input) REAL * Array of dimension (LDVS,N), the block of vectors stored on * process row RSRC to be put into memory VD, and stored on * process column CDEST. * * LDVS (local input) INTEGER * The leading dimension of VS. * * VD (local output) REAL * Array of dimension (LDVD,N), on output, the contents of VD * stored on process column CDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD. * * RSRC (global input) INTEGER * The process row VS is distributed over. * * CSRC (global input) INTEGER * The process column the distributed block of vectors VS * begins on. * * RDEST (global input) INTEGER * The process row that VD begins on. * * CDEST (global input) INTEGER * The process column to distribute VD over. * * WORK (local workspace) REAL * Array, dimension (LDW). The required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mq = number of rows of VS on my process. * npcol = number of process columns * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGESD2D, SGERV2D, SLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there is no * copying required * IF( NPROW .NE. NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYROW.EQ.RSRC ) THEN * ISTART = 1 * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my first block * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) * * Loop over all possible destination processes * DO 20 K = 1, RBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL SLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary, NOTE: will send extra if NB > JB * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL SGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST, $ CDEST ) * * I am both source and destination, save where to start * copying from for later use * ELSE ICPY = ISTART END IF * ISTART = ISTART + NB IRDEST = MOD( IRDEST+NPCOL, NPROW ) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYCOL.EQ.CDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) * * Loop over all sending processes * DO 50 K = 1, CBLKSKIP * * If I don't already possess the required data * IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MP - ISTART + NB) / NB JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL SGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL SLACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1), $ LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL SLACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1), $ LDVD ) JJ = JJ + NB*RBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB ICSRC = MOD( ICSRC+NPROW, NPCOL ) 50 CONTINUE END IF * * if NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYROW.EQ.RSRC ) THEN * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my piece of the vector * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN CALL SGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST ) ELSE CALL SLACPY( 'G', MQ, N, VS, LDVS, VD, LDVD ) END IF END IF IF( MYCOL.EQ.CDEST ) THEN * * Figure my distance from RDEST: the process in RSRC the same * distance from CSRC will have my piece of the vector * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) ) $ CALL SGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC ) END IF END IF * RETURN * * End of PSROW2COL * END scalapack-1.8.0/TOOLS/dlatcpy.f0000640000175000017500000000434210363532303016026 0ustar muammarmuammar SUBROUTINE DLATCPY( UPLO, M, N, A, LDA, B, LDB ) * * * .. Scalar Arguments .. CHARACTER UPLO INTEGER LDA, LDB, M, N * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * DLATCPY copies all or part of a two-dimensional matrix A to another * matrix B in transpose form. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies the part of the matrix A to be copied to B. * = 'U': Upper triangular part * = 'L': Lower triangular part * Otherwise: All of the matrix A * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The m by n matrix A. If UPLO = 'U', only the upper triangle * or trapezoid is accessed; if UPLO = 'L', only the lower * triangle or trapezoid is accessed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * B (output) DOUBLE PRECISION array, dimension (LDB,M) * On exit, B = A^T in the locations specified by UPLO. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * * .. * .. Executable Statements .. * IF( LSAME( UPLO, 'U' ) ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( J, I ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN DO 40 J = 1, N DO 30 I = J, M B( J, I ) = A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = A( I, J ) 50 CONTINUE 60 CONTINUE END IF RETURN * * End of DLATCPY * END scalapack-1.8.0/TOOLS/pdcol2row.f0000640000175000017500000002161510363532303016303 0ustar muammarmuammar SUBROUTINE PDCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. DOUBLE PRECISION VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a column of processes, and distribute those rows over a row of * processes. This routine minimizes communication by sending all * information it has that a given process in the RDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process column consisting of P processes, and I want to send all of * that vector that I own to a new vector distributed over Q processes * within a process row, that after I find the process in RDEST that * owns the row of the vector I'm currently looking at, he will want * every ( (LCM(P,Q) / P ) block of my vector (the block being of size * NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) DOUBLE PRECISION * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST. * * LDVS (local input) INTEGER * The leading dimension of VS, LDVS >= MAX( 1, MP ). * * VD (local output) DOUBLE PRECISION * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD, LDVD >= MAX( 1, MQ ). * * RSRC (global input) INTEGER * The process row the distributed block of vectors VS begins * on. * * CSRC (global input) INTEGER * The process column VS is distributed over. * * RDEST (global input) INTEGER * The process row to distribute VD over. * * CDEST (global input) INTEGER * The process column that VD begins on. * * WORK (local workspace) DOUBLE PRECISION * Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGESD2D, DGERV2D, DLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL DLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL DGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL DGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL DLACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL DLACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL DGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL DLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL DGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PDCOL2ROW * END scalapack-1.8.0/TOOLS/infog1l.f0000640000175000017500000000432510363532303015726 0ustar muammarmuammar SUBROUTINE INFOG1L( GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, $ ROCSRC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER GINDX, ISRCPROC, LINDX, MYROC, NB, NPROCS, $ ROCSRC * .. * * Purpose * ======= * * INFOG1L computes the starting local indexes LINDX corresponding to * the distributed submatrix starting globally at the entry pointed by * GINDX. This routine returns the coordinates of the process in the * grid owning the submatrix entry of global index GINDX: ROCSRC. * INFOG1L is a 1-dimensional version of INFOG2L. * * Arguments * ========= * * GINDX (global input) INTEGER * The global starting index of the submatrix. * * NB (global input) INTEGER * The block size. * * NPROCS (global input) INTEGER * The total number of processes over which the distributed * submatrix is distributed. * * MYROC (local input) INTEGER * The coordinate of the process calling this routine. * * ISRCPROC (global input) INTEGER * The coordinate of the process having the first entry of * the distributed submatrix. * * LINDX (local output) INTEGER * The local starting indexes of the distributed submatrix. * * ROCSRC (global output) INTEGER * The coordinate of the process that possesses the first * row and column of the submatrix. * * ===================================================================== * * .. Local Scalars .. INTEGER GCPY, IBLK * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * GCPY = GINDX-1 IBLK = GCPY / NB ROCSRC = MOD( IBLK + ISRCPROC, NPROCS ) * LINDX = ( IBLK / NPROCS + 1 ) * NB + 1 * IF( MOD(MYROC+NPROCS-ISRCPROC,NPROCS).GE.MOD(IBLK, NPROCS) ) THEN IF( MYROC.EQ.ROCSRC ) $ LINDX = LINDX + MOD( GCPY, NB ) LINDX = LINDX - NB END IF * RETURN * * End of INFOG1L * END scalapack-1.8.0/TOOLS/desc_convert.f0000640000175000017500000000565110363532303017050 0ustar muammarmuammar SUBROUTINE DESC_CONVERT( DESC_IN, DESC_OUT, INFO ) * * * .. Array Arguments .. INTEGER DESC_IN( * ), DESC_OUT( * ), INFO * .. * * Purpose * ======= * * Converts descriptors from one type to another if they are compatible. * * Supports *ONLY* an output descriptor type of 1D_horizontal (type * number 501) or 1D_vertical (number 502). * Supports only one-dimensional 1xP input grids if descriptor_in is 2D. * * Arguments * ========= * * DESC_IN: (input) input descriptor * * DESC_OUT: (output) output descriptor (required to be 1D_horizontal * in this release). * * INFO: (output) return code * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * * .. Local Scalars .. INTEGER DESC_TYPE, DESC_TYPE_IN, ICTXT INTEGER CSRC, RSRC, MB, NB, LLDA INTEGER M, N, NPROW, NPCOL, IDUM1, IDUM2 * * .. External routines .. * EXTERNAL BLACS_GRIDINFO * .. * .. Executable Statements .. * INFO = 0 * DESC_TYPE_IN = DESC_IN( 1 ) * IF( DESC_TYPE_IN .EQ. BLOCK_CYCLIC_2D ) THEN ICTXT = DESC_IN( CTXT_ ) RSRC = DESC_IN( RSRC_ ) CSRC = DESC_IN( CSRC_ ) MB = DESC_IN( MB_ ) NB = DESC_IN( NB_ ) LLDA = DESC_IN( LLD_ ) M = DESC_IN( M_ ) N = DESC_IN( N_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, IDUM1, IDUM2 ) ELSEIF ( DESC_TYPE_IN .EQ. 502 ) THEN ICTXT = DESC_IN( 2 ) RSRC = DESC_IN( 5 ) CSRC = 1 MB = DESC_IN( 4 ) NB = 1 LLDA = DESC_IN( 6 ) M = DESC_IN( 3 ) N = 1 NPROW = 0 NPCOL = 1 ELSEIF ( DESC_TYPE_IN .EQ. 501 ) THEN ICTXT = DESC_IN( 2 ) RSRC = 1 CSRC = DESC_IN( 5 ) MB = 1 NB = DESC_IN( 4 ) LLDA = DESC_IN( 6 ) M = 1 N = DESC_IN( 3 ) NPROW = 1 NPCOL = 0 ENDIF * * DESC_TYPE = DESC_OUT( 1 ) * IF( DESC_TYPE .EQ. 501 ) THEN IF( NPROW .NE. 1 )THEN INFO = -1 RETURN ENDIF DESC_OUT( 2 ) = ICTXT DESC_OUT( 5 ) = CSRC DESC_OUT( 4 ) = NB DESC_OUT( 6 ) = LLDA DESC_OUT( 3 ) = N ELSEIF( DESC_TYPE .EQ. 502 ) THEN IF( NPCOL .NE. 1 )THEN INFO = -1 RETURN ENDIF DESC_OUT( 2 ) = ICTXT DESC_OUT( 5 ) = RSRC DESC_OUT( 4 ) = MB DESC_OUT( 6 ) = LLDA DESC_OUT( 3 ) = M ENDIF * RETURN * * End of DESC_CONVERT * END scalapack-1.8.0/TOOLS/pslawrite.f0000640000175000017500000001733010604624224016403 0ustar muammarmuammar SUBROUTINE PSLAWRITE( FILNAM, M, N, A, IA, JA, DESCA, IRWRIT, $ ICWRIT, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER IA, ICWRIT, IRWRIT, JA, M, N * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAWRITE writes to a file named FILNAMa distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent to and * written by the process of coordinates (IRWWRITE, ICWRIT). * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 13 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN OPEN( NOUT, FILE=FILNAM, STATUS='UNKNOWN' ) WRITE( NOUT, FMT = * ) M, N END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CLOSE( NOUT ) END IF * 9999 FORMAT( E15.9 ) * RETURN * * End of PSLAWRITE * END scalapack-1.8.0/TOOLS/pielset2.f0000640000175000017500000001235510363532303016120 0ustar muammarmuammar SUBROUTINE PIELSET2( ALPHA, A, IA, JA, DESCA, BETA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ALPHA, BETA, IA, JA * .. * .. Array arguments .. INTEGER A( * ), DESCA( * ) * .. * * Purpose * ======= * * PIELSET2 sets alpha to the distributed matrix entry A(IA,JA) * and A(IA,JA) to beta. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * ALPHA (local output) @(typec) * The scalar alpha. * * A (local input/local ouput) @(typec) pointer into the * local memory to an array of dimension (LLD_A,*) containing * the local pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (local input) @(typec) * The scalar beta. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ZERO PARAMETER ( ZERO = 0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, IOFFA, JJA, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) ALPHA = A( IOFFA ) A( IOFFA ) = BETA ELSE ALPHA = ZERO END IF * RETURN * * End of PIELSET2 * END scalapack-1.8.0/TOOLS/pzelset2.f0000640000175000017500000001246410363532303016142 0ustar muammarmuammar SUBROUTINE PZELSET2( ALPHA, A, IA, JA, DESCA, BETA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA COMPLEX*16 ALPHA, BETA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZELSET2 sets alpha to the distributed matrix entry A(IA,JA) * and A(IA,JA) to beta. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * ALPHA (local output) COMPLEX*16 * The scalar alpha. * * A (local input/local ouput) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,*) containing * the local pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (local input) COMPLEX*16 * The scalar beta. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, IOFFA, JJA, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) ALPHA = A( IOFFA ) A( IOFFA ) = BETA ELSE ALPHA = ZERO END IF * RETURN * * End of PZELSET2 * END scalapack-1.8.0/TOOLS/pcelset.f0000640000175000017500000001177310363532303016033 0ustar muammarmuammar SUBROUTINE PCELSET( A, IA, JA, DESCA, ALPHA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA COMPLEX ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCELSET sets the distributed matrix entry A( IA, JA ) to ALPHA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ALPHA (local input) COMPLEX * The scalar alpha. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, JJA, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ A( IIA+(JJA-1)*DESCA( LLD_ ) ) = ALPHA * RETURN * * End of PCELSET * END scalapack-1.8.0/TOOLS/clatcpy.f0000640000175000017500000000443410363532303016027 0ustar muammarmuammar SUBROUTINE CLATCPY( UPLO, M, N, A, LDA, B, LDB ) * * * .. Scalar Arguments .. CHARACTER UPLO INTEGER LDA, LDB, M, N * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * CLATCPY copies all or part of a two-dimensional matrix A to another * matrix B in transpose form. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies the part of the matrix A to be copied to B. * = 'U': Upper triangular part * = 'L': Lower triangular part * Otherwise: All of the matrix A * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The m by n matrix A. If UPLO = 'U', only the upper triangle * or trapezoid is accessed; if UPLO = 'L', only the lower * triangle or trapezoid is accessed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * B (output) DOUBLE PRECISION array, dimension (LDB,M) * On exit, B = A^T in the locations specified by UPLO. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN INTRINSIC CONJG * * .. * .. Executable Statements .. * IF( LSAME( UPLO, 'U' ) ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( J, I ) = CONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN DO 40 J = 1, N DO 30 I = J, M B( J, I ) = CONJG( A( I, J ) ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = CONJG( A( I, J ) ) 50 CONTINUE 60 CONTINUE END IF RETURN * * End of CLATCPY * END scalapack-1.8.0/TOOLS/psmatadd.f0000640000175000017500000002674110363532303016172 0ustar muammarmuammar SUBROUTINE PSMATADD( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, $ JC, DESCC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, JA, JC, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ) * .. * * Purpose * ======= * * PSMATADD performs a distributed matrix-matrix addition * * sub( C ) := alpha * sub( A ) + beta * sub( C ), * * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this * routine, the arrays are supposed to be aligned. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and * sub( C ). N >= 0. * * ALPHA (global input) REAL * The scalar ALPHA. * * A (local input) REAL pointer into the local memory * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This * array contains the local pieces of the distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (global input) REAL * The scalar BETA. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * This array contains the local pieces of the distributed * matrix sub( C ). On exit, this array contains the local * pieces of the resulting distributed matrix. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA, $ IIC, IOFFA, IOFFC, IROFF, J, JJA, JJC, LDA, $ LDC, MP, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA(CTXT_), NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR. $ ((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) * IROFF = MOD( IA-1, DESCA(MB_) ) ICOFF = MOD( JA-1, DESCA(NB_) ) MP = NUMROC( M+IROFF, DESCA(MB_), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA(NB_), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF LDA = DESCA(LLD_) LDC = DESCC(LLD_) * IF( NQ.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC + (JJC-1)*LDC DO 10 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 10 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 20 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 30 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 30 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 40 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 50 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 50 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 60 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC+(JJC-1)*LDC DO 80 J = 1, NQ DO 70 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 70 CONTINUE IOFFC = IOFFC + LDC 80 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 100 J = 1, NQ DO 90 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 90 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 120 J = 1, NQ DO 110 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 110 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 120 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 140 J = 1, NQ DO 130 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 130 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 160 J = 1, NQ DO 150 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 150 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 160 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 180 J = 1, NQ DO 170 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 170 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 180 CONTINUE END IF END IF END IF * RETURN * * End of PSMATADD * END scalapack-1.8.0/TOOLS/reshape.c0000640000175000017500000001014510363532303016010 0ustar muammarmuammar#include void Creshape( context_in, major_in, context_out, major_out, first_proc, nprow_new, npcol_new ) int context_in, *context_out, first_proc, major_in, major_out, nprow_new, npcol_new; /* major in, major out represent whether processors go row major (1) or column major (2) in the input and output grids */ { /** called subprograms **/ void proc_inc(); void Cblacs_gridinfo(); int Cblacs_pnum(); void Cblacs_get(); void Cblacs_gridmap(); /** variables **/ int i, j; int nprow_in, npcol_in, myrow_in, mycol_in; int nprocs_new; int myrow_old, mycol_old, myrow_new, mycol_new; int pnum; int *grid_new; /********** executable statements ************/ nprocs_new = nprow_new * npcol_new; Cblacs_gridinfo( context_in, &nprow_in, &npcol_in, &myrow_in, &mycol_in ); /* Quick return if possible */ if( ( nprow_in == nprow_new ) && ( npcol_in == npcol_new ) && ( first_proc == 0 ) && ( major_in == major_out ) ) { *context_out = context_in; return; } /* allocate space for new process mapping */ grid_new = (int *) malloc( nprocs_new * sizeof( int ) ); /* set place in old grid to start grabbing processors for new grid */ myrow_old = 0; mycol_old = 0; if ( major_in == 1 ) /* row major */ { myrow_old = first_proc / nprow_in; mycol_old = first_proc % nprow_in; } else /* col major */ { myrow_old = first_proc % nprow_in; mycol_old = first_proc / nprow_in; } myrow_new = 0; mycol_new = 0; /* Set up array of process numbers for new grid */ for (i=0; i< nprocs_new; i++ ) { pnum = Cblacs_pnum( context_in, myrow_old, mycol_old ); grid_new[ (mycol_new * nprow_new) + myrow_new ] = pnum; proc_inc( &myrow_old, &mycol_old, nprow_in, npcol_in, major_in ); proc_inc( &myrow_new, &mycol_new, nprow_new, npcol_new, major_out ); } /* get context */ Cblacs_get( context_in, 10, context_out ); /* allocate grid */ Cblacs_gridmap( context_out, grid_new, nprow_new, nprow_new, npcol_new ); /* free malloced space */ free( grid_new ); } /*************************************************************************/ void reshape( context_in, major_in, context_out, major_out, first_proc, nprow_new, npcol_new ) int *context_in, *context_out, *first_proc, *major_in, *major_out, *nprow_new, *npcol_new; { Creshape( *context_in, *major_in, context_out, *major_out, *first_proc, *nprow_new, *npcol_new ); } /*************************************************************************/ void RESHAPE( context_in, major_in, context_out, major_out, first_proc, nprow_new, npcol_new ) int *context_in, *context_out, *first_proc, *major_in, *major_out, *nprow_new, *npcol_new; { Creshape( *context_in, *major_in, context_out, *major_out, *first_proc, *nprow_new, *npcol_new ); } /*************************************************************************/ void reshape_( context_in, major_in, context_out, major_out, first_proc, nprow_new, npcol_new ) int *context_in, *context_out, *first_proc, *major_in, *major_out, *nprow_new, *npcol_new; { Creshape( *context_in, *major_in, context_out, *major_out, *first_proc, *nprow_new, *npcol_new ); } /*************************************************************************/ void proc_inc( myrow, mycol, nprow, npcol, major ) int *myrow, *mycol, nprow, npcol, major; { if( major == 1) /* row major */ { if( *mycol == npcol-1 ) { *mycol = 0; if( *myrow == nprow-1 ) { *myrow = 0; } else { *myrow = *myrow + 1; } } else { *mycol = *mycol + 1; } } else /* col major */ { if( *myrow == nprow-1 ) { *myrow = 0; if( *mycol == npcol-1 ) { *mycol = 0; } else { *mycol = *mycol + 1; } } else { *myrow = *myrow + 1; } } } scalapack-1.8.0/TOOLS/pselset2.f0000640000175000017500000001242610363532303016131 0ustar muammarmuammar SUBROUTINE PSELSET2( ALPHA, A, IA, JA, DESCA, BETA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA REAL ALPHA, BETA * .. * .. Array arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSELSET2 sets alpha to the distributed matrix entry A(IA,JA) * and A(IA,JA) to beta. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * ALPHA (local output) REAL * The scalar alpha. * * A (local input/local ouput) REAL pointer into the * local memory to an array of dimension (LLD_A,*) containing * the local pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (local input) REAL * The scalar beta. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, IOFFA, JJA, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) ALPHA = A( IOFFA ) A( IOFFA ) = BETA ELSE ALPHA = ZERO END IF * RETURN * * End of PSELSET2 * END scalapack-1.8.0/TOOLS/SL_init.f0000640000175000017500000000344610363532303015733 0ustar muammarmuammar SUBROUTINE SL_INIT( ICTXT, NPROW, NPCOL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, NPCOL, NPROW * .. * * Purpose * ======= * * SL_INIT initializes an NPROW x NPCOL process grid using a row-major * ordering of the processes. This routine retrieves a default system * context which will include all available processes. In addition it * spawns the processes if needed. * * Arguments * ========= * * ICTXT (global output) INTEGER * ICTXT specifies the BLACS context handle identifying the * created process grid. The context itself is global. * * NPROW (global input) INTEGER * NPROW specifies the number of process rows in the grid * to be created. * * NPCOL (global input) INTEGER * NPCOL specifies the number of process columns in the grid * to be created. * * ===================================================================== * * .. Local Scalars .. INTEGER IAM, NPROCS * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDINIT, BLACS_PINFO, $ BLACS_SETUP * .. * .. Executable Statements .. * * Get starting information * CALL BLACS_PINFO( IAM, NPROCS ) * * If machine needs additional set up, do it now * IF( NPROCS.LT.1 ) THEN IF( IAM.EQ.0 ) $ NPROCS = NPROW * NPCOL CALL BLACS_SETUP( IAM, NPROCS ) END IF * * Define process grid * CALL BLACS_GET( -1, 0, ICTXT ) CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL ) * RETURN * * End of SL_INIT * END scalapack-1.8.0/TOOLS/ilacpy.f0000640000175000017500000000463410363532303015653 0ustar muammarmuammar SUBROUTINE ILACPY( UPLO, M, N, A, LDA, B, LDB ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER LDA, LDB, M, N * .. * .. Array Arguments .. INTEGER A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ILACPY copies all or part of a local array A to another array B. * * Arguments * ========= * * UPLO (local input) CHARACTER*1 * Specifies the part of the array A to be copied to B. * = 'U': Upper triangular part * = 'L': Lower triangular part * Otherwise: All of the array A * * M (local input) INTEGER * The number of rows of the array A. M >= 0. * * N (local input) INTEGER * The number of columns of the array A. N >= 0. * * A (local input) INTEGER * Array, dimension (LDA,N), the m by n array A. * If UPLO = 'U', only the upper triangle or trapezoid is * accessed; if UPLO = 'L', only the lower triangle or trapezoid * is accessed. * * LDA (local input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * B (local output) INTEGER * Array, dimension (LDB,N), on exit, B = A in the locations * specified by UPLO. * * LDB (local input) INTEGER * The leading dimension of the array B. LDB >= max(1,M). * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * IF( LSAME( UPLO, 'U' ) ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( I, J ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN DO 40 J = 1, N DO 30 I = J, M B( I, J ) = A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( I, J ) = A( I, J ) 50 CONTINUE 60 CONTINUE END IF RETURN * * End of ILACPY * END scalapack-1.8.0/TOOLS/pccol2row.f0000640000175000017500000002156210363532303016303 0ustar muammarmuammar SUBROUTINE PCCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. COMPLEX VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a column of processes, and distribute those rows over a row of * processes. This routine minimizes communication by sending all * information it has that a given process in the RDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process column consisting of P processes, and I want to send all of * that vector that I own to a new vector distributed over Q processes * within a process row, that after I find the process in RDEST that * owns the row of the vector I'm currently looking at, he will want * every ( (LCM(P,Q) / P ) block of my vector (the block being of size * NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) COMPLEX * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST. * * LDVS (local input) INTEGER * The leading dimension of VS, LDVS >= MAX( 1, MP ). * * VD (local output) COMPLEX * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD, LDVD >= MAX( 1, MQ ). * * RSRC (global input) INTEGER * The process row the distributed block of vectors VS begins * on. * * CSRC (global input) INTEGER * The process column VS is distributed over. * * RDEST (global input) INTEGER * The process row to distribute VD over. * * CDEST (global input) INTEGER * The process column that VD begins on. * * WORK (local workspace) COMPLEX * Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGESD2D, CGERV2D, CLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL CLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL CGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL CGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL CLACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL CLACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL CGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL CLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL CGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PCCOL2ROW * END scalapack-1.8.0/TOOLS/pclaread.f0000640000175000017500000001114210604624224016137 0ustar muammarmuammar SUBROUTINE PCLAREAD( FILNAM, A, DESCA, IRREAD, ICREAD, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER ICREAD, IRREAD * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLAREAD reads from a file named FILNAM a matrix and distribute * it to the process grid. * * Only the process of coordinates {IRREAD, ICREAD} read the file. * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IB, ICTXT, ICURCOL, ICURROW, II, J, JB, $ JJ, K, LDA, M, MYCOL, MYROW, N, NPCOL, NPROW REAL REAL_PART, IMAG_PART * .. * .. Local Arrays .. INTEGER IWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, CGERV2D, CGESD2D, $ IGEBS2D, IGEBR2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN OPEN( NIN, FILE=FILNAM, STATUS='OLD' ) READ( NIN, FMT = * ) ( IWORK( I ), I = 1, 2 ) CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2, IRREAD, $ ICREAD ) END IF M = IWORK( 1 ) N = IWORK( 2 ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * IF( M.GT.DESCA( M_ ).OR. N.GT.DESCA( N_ ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( *, FMT = * ) 'PCLAREAD: Matrix too big to fit in' WRITE( *, FMT = * ) 'Abort ...' END IF CALL BLACS_ABORT( ICTXT, 0 ) END IF * II = 1 JJ = 1 ICURROW = DESCA( RSRC_ ) ICURCOL = DESCA( CSRC_ ) LDA = DESCA( LLD_ ) * * Loop over column blocks * DO 50 J = 1, N, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), N-J+1 ) DO 40 H = 0, JB-1 * * Loop over block of rows * DO 30 I = 1, M, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), M-I+1 ) IF( ICURROW.EQ.IRREAD .AND. ICURCOL.EQ.ICREAD ) THEN IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 10 K = 0, IB-1 READ( NIN , FMT = *) REAL_PART, IMAG_PART A( II+K+(JJ+H-1)*LDA ) = CMPLX(REAL_PART, IMAG_PART) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGERV2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRREAD, ICREAD ) ELSE IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 20 K = 1, IB READ( NIN, FMT = * ) REAL_PART, IMAG_PART WORK(K)=CMPLX(REAL_PART,IMAG_PART) 20 CONTINUE CALL CGESD2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = 1 ICURROW = DESCA( RSRC_ ) 40 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN CLOSE( NIN ) END IF * RETURN * * End of PCLAREAD * END scalapack-1.8.0/TOOLS/pslaread.f0000640000175000017500000001064510604624224016166 0ustar muammarmuammar SUBROUTINE PSLAREAD( FILNAM, A, DESCA, IRREAD, ICREAD, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER ICREAD, IRREAD * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAREAD reads from a file named FILNAM a matrix and distribute * it to the process grid. * * Only the process of coordinates {IRREAD, ICREAD} read the file. * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IB, ICTXT, ICURCOL, ICURROW, II, J, JB, $ JJ, K, LDA, M, MYCOL, MYROW, N, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SGERV2D, SGESD2D, $ IGEBS2D, IGEBR2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN OPEN( NIN, FILE=FILNAM, STATUS='OLD' ) READ( NIN, FMT = * ) ( IWORK( I ), I = 1, 2 ) CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2, IRREAD, $ ICREAD ) END IF M = IWORK( 1 ) N = IWORK( 2 ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * IF( M.GT.DESCA( M_ ).OR. N.GT.DESCA( N_ ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( *, FMT = * ) 'PSLAREAD: Matrix too big to fit in' WRITE( *, FMT = * ) 'Abort ...' END IF CALL BLACS_ABORT( ICTXT, 0 ) END IF * II = 1 JJ = 1 ICURROW = DESCA( RSRC_ ) ICURCOL = DESCA( CSRC_ ) LDA = DESCA( LLD_ ) * * Loop over column blocks * DO 50 J = 1, N, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), N-J+1 ) DO 40 H = 0, JB-1 * * Loop over block of rows * DO 30 I = 1, M, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), M-I+1 ) IF( ICURROW.EQ.IRREAD .AND. ICURCOL.EQ.ICREAD ) THEN IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 10 K = 0, IB-1 READ( NIN, FMT = * ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGERV2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRREAD, ICREAD ) ELSE IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 20 K = 1, IB READ( NIN, FMT = * ) WORK( K ) 20 CONTINUE CALL SGESD2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = 1 ICURROW = DESCA( RSRC_ ) 40 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN CLOSE( NIN ) END IF * RETURN * * End of PSLAREAD * END scalapack-1.8.0/TOOLS/pirow2col.f0000640000175000017500000002150110363532303016302 0ustar muammarmuammar SUBROUTINE PIROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, $ RSRC, CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. INTEGER VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a row of processes, and distribute those rows over a column of * processes. This routine minimizes communication by sending all * information it has that a given process in the CDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process row consisting of Q processes, and I want to send all of that * vector that I own to a new vector distributed over P processes within * a process column, that after I find the process in RDEST that owns * the row of the vector I'm currently looking at, he will want every * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a row. * * VS (local input) @(typec) * Array of dimension (LDVS,N), the block of vectors stored on * process row RSRC to be put into memory VD, and stored on * process column CDEST. * * LDVS (local input) INTEGER * The leading dimension of VS. * * VD (local output) @(typec) * Array of dimension (LDVD,N), on output, the contents of VD * stored on process column CDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD. * * RSRC (global input) INTEGER * The process row VS is distributed over. * * CSRC (global input) INTEGER * The process column the distributed block of vectors VS * begins on. * * RDEST (global input) INTEGER * The process row that VD begins on. * * CDEST (global input) INTEGER * The process column to distribute VD over. * * WORK (local workspace) @(typec) * Array, dimension (LDW). The required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mq = number of rows of VS on my process. * npcol = number of process columns * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGESD2D, IGERV2D, ILACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there is no * copying required * IF( NPROW .NE. NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYROW.EQ.RSRC ) THEN * ISTART = 1 * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my first block * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) * * Loop over all possible destination processes * DO 20 K = 1, RBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ILACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary, NOTE: will send extra if NB > JB * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL IGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST, $ CDEST ) * * I am both source and destination, save where to start * copying from for later use * ELSE ICPY = ISTART END IF * ISTART = ISTART + NB IRDEST = MOD( IRDEST+NPCOL, NPROW ) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYCOL.EQ.CDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) * * Loop over all sending processes * DO 50 K = 1, CBLKSKIP * * If I don't already possess the required data * IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MP - ISTART + NB) / NB JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL IGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL ILACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1), $ LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL ILACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1), $ LDVD ) JJ = JJ + NB*RBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB ICSRC = MOD( ICSRC+NPROW, NPCOL ) 50 CONTINUE END IF * * if NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYROW.EQ.RSRC ) THEN * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my piece of the vector * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN CALL IGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST ) ELSE CALL ILACPY( 'G', MQ, N, VS, LDVS, VD, LDVD ) END IF END IF IF( MYCOL.EQ.CDEST ) THEN * * Figure my distance from RDEST: the process in RSRC the same * distance from CSRC will have my piece of the vector * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) ) $ CALL IGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC ) END IF END IF * RETURN * * End of PIROW2COL * END scalapack-1.8.0/TOOLS/zlatcpy.f0000640000175000017500000000444010363532303016053 0ustar muammarmuammar SUBROUTINE ZLATCPY( UPLO, M, N, A, LDA, B, LDB ) * * * .. Scalar Arguments .. CHARACTER UPLO INTEGER LDA, LDB, M, N * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * ZLATCPY copies all or part of a two-dimensional matrix A to another * matrix B in transpose form. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies the part of the matrix A to be copied to B. * = 'U': Upper triangular part * = 'L': Lower triangular part * Otherwise: All of the matrix A * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The m by n matrix A. If UPLO = 'U', only the upper triangle * or trapezoid is accessed; if UPLO = 'L', only the lower * triangle or trapezoid is accessed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * B (output) DOUBLE PRECISION array, dimension (LDB,M) * On exit, B = A^T in the locations specified by UPLO. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN INTRINSIC DCONJG * * .. * .. Executable Statements .. * IF( LSAME( UPLO, 'U' ) ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( J, I ) = DCONJG( A( I, J ) ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN DO 40 J = 1, N DO 30 I = J, M B( J, I ) = DCONJG( A( I, J ) ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = DCONJG( A( I, J ) ) 50 CONTINUE 60 CONTINUE END IF RETURN * * End of ZLATCPY * END scalapack-1.8.0/TOOLS/indxg2p.f0000640000175000017500000000273210363532303015742 0ustar muammarmuammar INTEGER FUNCTION INDXG2P( INDXGLOB, NB, IPROC, ISRCPROC, NPROCS ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INDXGLOB, IPROC, ISRCPROC, NB, NPROCS * .. * * Purpose * ======= * * INDXG2P computes the process coordinate which posseses the entry of a * distributed matrix specified by a global index INDXGLOB. * * Arguments * ========= * * INDXGLOB (global input) INTEGER * The global index of the element. * * NB (global input) INTEGER * Block size, size of the blocks the distributed matrix is * split into. * * IPROC (local dummy) INTEGER * Dummy argument in this case in order to unify the calling * sequence of the tool-routines. * * ISRCPROC (global input) INTEGER * The coordinate of the process that possesses the first * row/column of the distributed matrix. * * NPROCS (global input) INTEGER * The total number processes over which the matrix is * distributed. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * INDXG2P = MOD( ISRCPROC + (INDXGLOB - 1) / NB, NPROCS ) * RETURN * * End of INDXG2P * END scalapack-1.8.0/TOOLS/pielset.f0000640000175000017500000001171510363532303016035 0ustar muammarmuammar SUBROUTINE PIELSET( A, IA, JA, DESCA, ALPHA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ALPHA, IA, JA * .. * .. Array arguments .. INTEGER A( * ), DESCA( * ) * .. * * Purpose * ======= * * PIELSET sets the distributed matrix entry A( IA, JA ) to ALPHA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (local output) @(typec) pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ALPHA (local input) @(typec) * The scalar alpha. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, JJA, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ A( IIA+(JJA-1)*DESCA( LLD_ ) ) = ALPHA * RETURN * * End of PIELSET * END scalapack-1.8.0/TOOLS/pzelset.f0000640000175000017500000001200110363532303016043 0ustar muammarmuammar SUBROUTINE PZELSET( A, IA, JA, DESCA, ALPHA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA COMPLEX*16 ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZELSET sets the distributed matrix entry A( IA, JA ) to ALPHA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ALPHA (local input) COMPLEX*16 * The scalar alpha. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, JJA, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ A( IIA+(JJA-1)*DESCA( LLD_ ) ) = ALPHA * RETURN * * End of PZELSET * END scalapack-1.8.0/TOOLS/descinit.f0000640000175000017500000001610010363532303016163 0ustar muammarmuammar SUBROUTINE DESCINIT( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, $ LLD, INFO ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICSRC, ICTXT, INFO, IRSRC, LLD, M, MB, N, NB * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * DESCINIT initializes the descriptor vector with the 8 input arguments * M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DESC (output) INTEGER array of dimension DLEN_. * The array descriptor of a distributed matrix to be set. * * M (global input) INTEGER * The number of rows in the distributed matrix. M >= 0. * * N (global input) INTEGER * The number of columns in the distributed matrix. N >= 0. * * MB (global input) INTEGER * The blocking factor used to distribute the rows of the * matrix. MB >= 1. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * IRSRC (global input) INTEGER * The process row over which the first row of the matrix is * distributed. 0 <= IRSRC < NPROW. * * ICSRC (global input) INTEGER * The process column over which the first column of the * matrix is distributed. 0 <= ICSRC < NPCOL. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * LLD (local input) INTEGER * The leading dimension of the local array storing the local * blocks of the distributed matrix. LLD >= MAX(1,LOCr(M)). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Note * ==== * * If the routine can recover from an erroneous input argument, it will * return an acceptable descriptor vector. For example, if LLD = 0 on * input, DESC(LLD_) will contain the smallest leading dimension * required to store the specified M-by-N distributed matrix, INFO * will be set -9 in that case. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PXERBLA * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( MB.LT.1 ) THEN INFO = -4 ELSE IF( NB.LT.1 ) THEN INFO = -5 ELSE IF( IRSRC.LT.0 .OR. IRSRC.GE.NPROW ) THEN INFO = -6 ELSE IF( ICSRC.LT.0 .OR. ICSRC.GE.NPCOL ) THEN INFO = -7 ELSE IF( NPROW.EQ.-1 ) THEN INFO = -8 ELSE IF( LLD.LT.MAX( 1, NUMROC( M, MB, MYROW, IRSRC, $ NPROW ) ) ) THEN INFO = -9 END IF * IF( INFO.NE.0 ) $ CALL PXERBLA( ICTXT, 'DESCINIT', -INFO ) * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D DESC( M_ ) = MAX( 0, M ) DESC( N_ ) = MAX( 0, N ) DESC( MB_ ) = MAX( 1, MB ) DESC( NB_ ) = MAX( 1, NB ) DESC( RSRC_ ) = MAX( 0, MIN( IRSRC, NPROW-1 ) ) DESC( CSRC_ ) = MAX( 0, MIN( ICSRC, NPCOL-1 ) ) DESC( CTXT_ ) = ICTXT DESC( LLD_ ) = MAX( LLD, MAX( 1, NUMROC( DESC( M_ ), DESC( MB_ ), $ MYROW, DESC( RSRC_ ), NPROW ) ) ) * RETURN * * End DESCINIT * END scalapack-1.8.0/TOOLS/smatadd.f0000640000175000017500000001024710363532303016004 0ustar muammarmuammar SUBROUTINE SMATADD( M, N, ALPHA, A, LDA, BETA, C, LDC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDA, LDC, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. REAL A( LDA, * ), C( LDC, * ) * .. * * Purpose * ======= * * SMATADD performs the following local matrix-matrix operation * * C := alpha * A + beta * C, * * where alpha and beta are scalars, and A and C are m by n arrays. * * Arguments * ========= * * M (local input) INTEGER * The number of rows of the array A. M >= 0. * * N (local input) INTEGER * The number of columns of the array A. N >= 0. * * ALPHA (local input) REAL * The scalar ALPHA. * * A (local input) REAL * Array, dimension (LDA,*), the array A. * * LDA (local input) INTEGER * The leading dimension of the array A, LDA >= MAX(1, M) * * BETA (local input) REAL * The scalar BETA. * * C (local input/local output) REAL * Array, dimension (LDC,*), the array C. * * LDC (local input) INTEGER * The leading dimension of the array C, LDC >= MAX(1, M) * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR.((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * IF( N.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, M C( I, 1 ) = ZERO 10 CONTINUE ELSE DO 20 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 30 I = 1, M C( I, 1 ) = A( I, 1 ) + C( I, 1 ) 30 CONTINUE ELSE DO 40 I = 1, M C( I, 1 ) = A( I, 1 ) + BETA*C( I, 1 ) 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 50 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + C( I, 1 ) 50 CONTINUE ELSE DO 60 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + BETA*C( I, 1 ) 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M C( I, J ) = ZERO 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, M C( I, J ) = ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 120 J = 1, N DO 110 I = 1, M C( I, J ) = A( I, J ) + C( I, J ) 110 CONTINUE 120 CONTINUE ELSE DO 140 J = 1, N DO 130 I = 1, M C( I, J ) = A( I, J ) + BETA * C( I, J ) 130 CONTINUE 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M C( I, J ) = C( I, J ) + ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE ELSE DO 180 J = 1, N DO 170 I = 1, M C( I, J ) = ALPHA * A( I, J ) + BETA * C( I, J ) 170 CONTINUE 180 CONTINUE END IF END IF END IF * RETURN * * End of SMATADD * END scalapack-1.8.0/TOOLS/pcfillpad.f0000640000175000017500000000556210363532303016331 0ustar muammarmuammar SUBROUTINE PCFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX CHKVAL * .. * .. Array Arguments .. COMPLEX A( * ) * .. * * Purpose * ======= * * PCFILLPAD surrounds a two dimensional local array with a guard- * zone initialized to the value CHKVAL. The user may later call the * routine PCCHEKPAD to discover if the guardzone has been * violated. There are three guardzones. The first is a buffer of size * IPRE that is before the start of the array. The second is the buffer * of size IPOST which is after the end of the array to be padded. * Finally, there is a guard zone inside every column of the array to * be padded, in the elements of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (local input) INTEGER * The number of rows in the local array. * * N (local input) INTEGER * The number of columns in the local array. * * A (local input/local output) COMPLEX, array of * dimension (LDA,N). A location IPRE elements in front of * the matrix to be padded. * * LDA (local input) INTEGER * The leading Dimension of the local array to be padded. * * IPRE (local input) INTEGER * The size of the guard zone to put before the start of * padded array. * * IPOST (local input) INTEGER * The size of the guard zone to put after padded array. * * CHKVAL (local input) COMPLEX * The value to pad matrix with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PCFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no post-guardzone in PCFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PCFILLPAD * END scalapack-1.8.0/TOOLS/zzdotu.f0000640000175000017500000000156110363532303015725 0ustar muammarmuammar SUBROUTINE ZZDOTU( N, DOTU, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX*16 DOTU * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * ZZDOTU is a simple FORTRAN wrapper around the BLAS function * ZDOTU returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. COMPLEX*16 ZDOTU EXTERNAL ZDOTU * .. * .. Executable Statements .. * DOTU = ZDOTU( N, X, INCX, Y, INCY ) * RETURN * * End of ZZDOTU * END scalapack-1.8.0/TOOLS/pdlaread.f0000640000175000017500000001064510604624224016147 0ustar muammarmuammar SUBROUTINE PDLAREAD( FILNAM, A, DESCA, IRREAD, ICREAD, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER ICREAD, IRREAD * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAREAD reads from a file named FILNAM a matrix and distribute * it to the process grid. * * Only the process of coordinates {IRREAD, ICREAD} read the file. * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IB, ICTXT, ICURCOL, ICURROW, II, J, JB, $ JJ, K, LDA, M, MYCOL, MYROW, N, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, DGERV2D, DGESD2D, $ IGEBS2D, IGEBR2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN OPEN( NIN, FILE=FILNAM, STATUS='OLD' ) READ( NIN, FMT = * ) ( IWORK( I ), I = 1, 2 ) CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2, IRREAD, $ ICREAD ) END IF M = IWORK( 1 ) N = IWORK( 2 ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * IF( M.GT.DESCA( M_ ).OR. N.GT.DESCA( N_ ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( *, FMT = * ) 'PDLAREAD: Matrix too big to fit in' WRITE( *, FMT = * ) 'Abort ...' END IF CALL BLACS_ABORT( ICTXT, 0 ) END IF * II = 1 JJ = 1 ICURROW = DESCA( RSRC_ ) ICURCOL = DESCA( CSRC_ ) LDA = DESCA( LLD_ ) * * Loop over column blocks * DO 50 J = 1, N, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), N-J+1 ) DO 40 H = 0, JB-1 * * Loop over block of rows * DO 30 I = 1, M, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), M-I+1 ) IF( ICURROW.EQ.IRREAD .AND. ICURCOL.EQ.ICREAD ) THEN IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 10 K = 0, IB-1 READ( NIN, FMT = * ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGERV2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRREAD, ICREAD ) ELSE IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 20 K = 1, IB READ( NIN, FMT = * ) WORK( K ) 20 CONTINUE CALL DGESD2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = 1 ICURROW = DESCA( RSRC_ ) 40 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN CLOSE( NIN ) END IF * RETURN * * End of PDLAREAD * END scalapack-1.8.0/TOOLS/pcrow2col.f0000640000175000017500000002147610363532303016307 0ustar muammarmuammar SUBROUTINE PCROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, $ RSRC, CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. COMPLEX VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a row of processes, and distribute those rows over a column of * processes. This routine minimizes communication by sending all * information it has that a given process in the CDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process row consisting of Q processes, and I want to send all of that * vector that I own to a new vector distributed over P processes within * a process column, that after I find the process in RDEST that owns * the row of the vector I'm currently looking at, he will want every * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a row. * * VS (local input) COMPLEX * Array of dimension (LDVS,N), the block of vectors stored on * process row RSRC to be put into memory VD, and stored on * process column CDEST. * * LDVS (local input) INTEGER * The leading dimension of VS. * * VD (local output) COMPLEX * Array of dimension (LDVD,N), on output, the contents of VD * stored on process column CDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD. * * RSRC (global input) INTEGER * The process row VS is distributed over. * * CSRC (global input) INTEGER * The process column the distributed block of vectors VS * begins on. * * RDEST (global input) INTEGER * The process row that VD begins on. * * CDEST (global input) INTEGER * The process column to distribute VD over. * * WORK (local workspace) COMPLEX * Array, dimension (LDW). The required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mq = number of rows of VS on my process. * npcol = number of process columns * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGESD2D, CGERV2D, CLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there is no * copying required * IF( NPROW .NE. NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYROW.EQ.RSRC ) THEN * ISTART = 1 * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my first block * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) * * Loop over all possible destination processes * DO 20 K = 1, RBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL CLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary, NOTE: will send extra if NB > JB * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL CGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST, $ CDEST ) * * I am both source and destination, save where to start * copying from for later use * ELSE ICPY = ISTART END IF * ISTART = ISTART + NB IRDEST = MOD( IRDEST+NPCOL, NPROW ) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYCOL.EQ.CDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) * * Loop over all sending processes * DO 50 K = 1, CBLKSKIP * * If I don't already possess the required data * IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MP - ISTART + NB) / NB JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL CGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL CLACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1), $ LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL CLACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1), $ LDVD ) JJ = JJ + NB*RBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB ICSRC = MOD( ICSRC+NPROW, NPCOL ) 50 CONTINUE END IF * * if NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYROW.EQ.RSRC ) THEN * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my piece of the vector * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN CALL CGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST ) ELSE CALL CLACPY( 'G', MQ, N, VS, LDVS, VD, LDVD ) END IF END IF IF( MYCOL.EQ.CDEST ) THEN * * Figure my distance from RDEST: the process in RSRC the same * distance from CSRC will have my piece of the vector * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) ) $ CALL CGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC ) END IF END IF * RETURN * * End of PCROW2COL * END scalapack-1.8.0/TOOLS/pselget.f0000640000175000017500000001625710363532303016041 0ustar muammarmuammar SUBROUTINE PSELGET( SCOPE, TOP, ALPHA, A, IA, JA, DESCA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 SCOPE, TOP INTEGER IA, JA REAL ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSELGET sets alpha to the distributed matrix entry A( IA, JA ). * The value of alpha is set according to the scope. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCOPE (global input) CHARACTER*1 * The BLACS scope in which alpha is updated. * If SCOPE = 'R', alpha is updated only in the process row * containing A( IA, JA ), * If SCOPE = 'C', alpha is updated only in the process column * containing A( IA, JA ), * If SCOPE = 'A', alpha is updated in all the processes of the * grid, * otherwise alpha is updated only in the process containing * A( IA, JA ). * * TOP (global input) CHARACTER*1 * The topology to be used if broadcast is needed. * * ALPHA (global output) REAL, the scalar alpha. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, IIA, IOFFA, JJA, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * ALPHA = ZERO * IF( LSAME( SCOPE, 'R' ) ) THEN IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL SGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL SGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'C' ) ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL SGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL SGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'A' ) ) THEN IF( ( MYROW.EQ.IAROW ).AND.( MYCOL.EQ.IACOL ) ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL SGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL SGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF ELSE IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ ALPHA = A( IIA+(JJA-1)*DESCA( LLD_ ) ) END IF * RETURN * * End of PSELGET * END scalapack-1.8.0/TOOLS/picol2row.f0000640000175000017500000002156510363532303016314 0ustar muammarmuammar SUBROUTINE PICOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. INTEGER VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a column of processes, and distribute those rows over a row of * processes. This routine minimizes communication by sending all * information it has that a given process in the RDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process column consisting of P processes, and I want to send all of * that vector that I own to a new vector distributed over Q processes * within a process row, that after I find the process in RDEST that * owns the row of the vector I'm currently looking at, he will want * every ( (LCM(P,Q) / P ) block of my vector (the block being of size * NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) @(typec) * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST. * * LDVS (local input) INTEGER * The leading dimension of VS, LDVS >= MAX( 1, MP ). * * VD (local output) @(typec) * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD, LDVD >= MAX( 1, MQ ). * * RSRC (global input) INTEGER * The process row the distributed block of vectors VS begins * on. * * CSRC (global input) INTEGER * The process column VS is distributed over. * * RDEST (global input) INTEGER * The process row to distribute VD over. * * CDEST (global input) INTEGER * The process column that VD begins on. * * WORK (local workspace) @(typec) * Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGESD2D, IGERV2D, ILACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL ILACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL IGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL IGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ILACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ILACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL IGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL ILACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL IGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PICOL2ROW * END scalapack-1.8.0/TOOLS/descset.f0000640000175000017500000001253710363532303016025 0ustar muammarmuammar SUBROUTINE DESCSET( DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, $ LLD ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICSRC, ICTXT, IRSRC, LLD, M, MB, N, NB * .. * .. Array Arguments .. INTEGER DESC( * ) * .. * * Purpose * ======= * * DESCSET initializes a descriptor vector with the 8 input arguments * M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DESC (output) INTEGER array of dimension DLEN. * The array descriptor of a distributed matrix to be set. * * M (global input) INTEGER * The number of rows in the distributed matrix. M >= 0. * * N (global input) INTEGER * The number of columns in the distributed matrix. N >= 0. * * MB (global input) INTEGER * The blocking factor used to distribute the rows of the * matrix. MB >= 1. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * IRSRC (global input) INTEGER * The process row over which the first row of the matrix is * distributed. 0 <= IRSRC < NPROW. * * ICSRC (global input) INTEGER * The process column over which the first column of the * matrix is distributed. 0 <= ICSRC < NPCOL. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * LLD (local input) INTEGER * The leading dimension of the local array storing the local * blocks of the distributed matrix. LLD >= MAX(1,LOCr(M)). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * * .. Executable Statements .. * DESC( DTYPE_ ) = BLOCK_CYCLIC_2D DESC( M_ ) = M DESC( N_ ) = N DESC( MB_ ) = MB DESC( NB_ ) = NB DESC( RSRC_ ) = IRSRC DESC( CSRC_ ) = ICSRC DESC( CTXT_ ) = ICTXT DESC( LLD_ ) = LLD * RETURN * * End DESCSET * END scalapack-1.8.0/TOOLS/ssdot.f0000640000175000017500000000154710363532303015526 0ustar muammarmuammar SUBROUTINE SSDOT( N, DOT, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N REAL DOT * .. * .. Array Arguments .. REAL X( * ), Y( * ) * .. * * Purpose * ======= * * SSDOT is a simple FORTRAN wrapper around the BLAS function * SDOT returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. REAL SDOT EXTERNAL SDOT * .. * .. Executable Statements .. * DOT = SDOT( N, X, INCX, Y, INCY ) * RETURN * * End of SSDOT * END scalapack-1.8.0/TOOLS/LAPACK/0000750000175000017500000000000010605234715015153 5ustar muammarmuammarscalapack-1.8.0/TOOLS/LAPACK/icopy.f0000640000175000017500000000434410363532303016446 0ustar muammarmuammar SUBROUTINE ICOPY( N, SX, INCX, SY, INCY ) * * -- LAPACK auxiliary test routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * February 29, 1992 * * .. Scalar Arguments .. INTEGER INCX, INCY, N * .. * .. Array Arguments .. INTEGER SX( * ), SY( * ) * .. * * Purpose * ======= * * ICOPY copies an integer vector x to an integer vector y. * Uses unrolled loops for increments equal to 1. * * Arguments * ========= * * N (input) INTEGER * The length of the vectors SX and SY. * * SX (input) INTEGER array, dimension (1+(N-1)*abs(INCX)) * The vector X. * * INCX (input) INTEGER * The spacing between consecutive elements of SX. * * SY (output) INTEGER array, dimension (1+(N-1)*abs(INCY)) * The vector Y. * * INCY (input) INTEGER * The spacing between consecutive elements of SY. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IX, IY, M, MP1 * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * IF( N.LE.0 ) $ RETURN IF( INCX.EQ.1 .AND. INCY.EQ.1 ) $ GO TO 20 * * Code for unequal increments or equal increments not equal to 1 * IX = 1 IY = 1 IF( INCX.LT.0 ) $ IX = ( -N+1 )*INCX + 1 IF( INCY.LT.0 ) $ IY = ( -N+1 )*INCY + 1 DO 10 I = 1, N SY( IY ) = SX( IX ) IX = IX + INCX IY = IY + INCY 10 CONTINUE RETURN * * Code for both increments equal to 1 * * Clean-up loop * 20 CONTINUE M = MOD( N, 7 ) IF( M.EQ.0 ) $ GO TO 40 DO 30 I = 1, M SY( I ) = SX( I ) 30 CONTINUE IF( N.LT.7 ) $ RETURN 40 CONTINUE MP1 = M + 1 DO 50 I = MP1, N, 7 SY( I ) = SX( I ) SY( I+1 ) = SX( I+1 ) SY( I+2 ) = SX( I+2 ) SY( I+3 ) = SX( I+3 ) SY( I+4 ) = SX( I+4 ) SY( I+5 ) = SX( I+5 ) SY( I+6 ) = SX( I+6 ) 50 CONTINUE RETURN * * End of ICOPY * END scalapack-1.8.0/TOOLS/LAPACK/dlatm1.f0000640000175000017500000001547410604316307016515 0ustar muammarmuammar SUBROUTINE DLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, N, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST, INFO, IRSIGN, MODE, N DOUBLE PRECISION COND * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION D( * ) * .. * * Purpose * ======= * * DLATM1 computes the entries of D(1..N) as specified by * MODE, COND and IRSIGN. IDIST and ISEED determine the generation * of random numbers. DLATM1 is called by SLATMR to generate * random test matrices for LAPACK programs. * * Arguments * ========= * * MODE - INTEGER * On entry describes how D is to be computed: * MODE = 0 means do not change D. * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * Not modified. * * COND - DOUBLE PRECISION * On entry, used as described under MODE above. * If used, it must be >= 1. Not modified. * * IRSIGN - INTEGER * On entry, if MODE neither -6, 0 nor 6, determines sign of * entries of D * 0 => leave entries of D unchanged * 1 => multiply each entry of D by 1 or -1 with probability .5 * * IDIST - CHARACTER*1 * On entry, IDIST specifies the type of distribution to be * used to generate a random matrix . * 1 => UNIFORM( 0, 1 ) * 2 => UNIFORM( -1, 1 ) * 3 => NORMAL( 0, 1 ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. The random number generator uses a * linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to DLATM1 * to continue the same random number sequence. * Changed on exit. * * D - DOUBLE PRECISION array, dimension ( MIN( M , N ) ) * Array to be computed according to MODE, COND and IRSIGN. * May be changed on exit if MODE is nonzero. * * N - INTEGER * Number of entries of D. Not modified. * * INFO - INTEGER * 0 => normal termination * -1 => if MODE not in range -6 to 6 * -2 => if MODE neither -6, 0 nor 6, and * IRSIGN neither 0 nor 1 * -3 => if MODE neither -6, 0 nor 6 and COND less than 1 * -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 3 * -7 => if N negative * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) DOUBLE PRECISION HALF PARAMETER ( HALF = 0.5D0 ) * .. * .. Local Scalars .. INTEGER I DOUBLE PRECISION ALPHA, TEMP * .. * .. External Functions .. DOUBLE PRECISION DLARAN EXTERNAL DLARAN * .. * .. External Subroutines .. EXTERNAL DLARNV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, EXP, LOG * .. * .. Executable Statements .. * * Decode and Test the input parameters. Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Set INFO if an error * IF( MODE.LT.-6 .OR. MODE.GT.6 ) THEN INFO = -1 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ ( IRSIGN.NE.0 .AND. IRSIGN.NE.1 ) ) THEN INFO = -2 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ COND.LT.ONE ) THEN INFO = -3 ELSE IF( ( MODE.EQ.6 .OR. MODE.EQ.-6 ) .AND. $ ( IDIST.LT.1 .OR. IDIST.GT.3 ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -7 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'DLATM1', -INFO ) RETURN END IF * * Compute D according to COND and MODE * IF( MODE.NE.0 ) THEN GO TO ( 10, 30, 50, 70, 90, 110 )ABS( MODE ) * * One large D value: * 10 CONTINUE DO 20 I = 1, N D( I ) = ONE / COND 20 CONTINUE D( 1 ) = ONE GO TO 120 * * One small D value: * 30 CONTINUE DO 40 I = 1, N D( I ) = ONE 40 CONTINUE D( N ) = ONE / COND GO TO 120 * * Exponentially distributed D values: * 50 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN ALPHA = COND**( -ONE / DBLE( N-1 ) ) DO 60 I = 2, N D( I ) = ALPHA**( I-1 ) 60 CONTINUE END IF GO TO 120 * * Arithmetically distributed D values: * 70 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN TEMP = ONE / COND ALPHA = ( ONE-TEMP ) / DBLE( N-1 ) DO 80 I = 2, N D( I ) = DBLE( N-I )*ALPHA + TEMP 80 CONTINUE END IF GO TO 120 * * Randomly distributed D values on ( 1/COND , 1): * 90 CONTINUE ALPHA = LOG( ONE / COND ) DO 100 I = 1, N D( I ) = EXP( ALPHA*DLARAN( ISEED ) ) 100 CONTINUE GO TO 120 * * Randomly distributed D values from IDIST * 110 CONTINUE CALL DLARNV( IDIST, ISEED, N, D ) * 120 CONTINUE * * If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign * random signs to D * IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ IRSIGN.EQ.1 ) THEN DO 130 I = 1, N TEMP = DLARAN( ISEED ) IF( TEMP.GT.HALF ) $ D( I ) = -D( I ) 130 CONTINUE END IF * * Reverse if MODE < 0 * IF( MODE.LT.0 ) THEN DO 140 I = 1, N / 2 TEMP = D( I ) D( I ) = D( N+1-I ) D( N+1-I ) = TEMP 140 CONTINUE END IF * END IF * RETURN * * End of DLATM1 * END scalapack-1.8.0/TOOLS/LAPACK/Makefile0000640000175000017500000000345010604316307016613 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: Tools - LAPACK Makefile # # Creation date: March 20, 1995 # # Modified: February 16, 2000 # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../../SLmake.inc ALLAUX = icopy.o SCATGEN = slatm1.o slaran.o slarnd.o SMATGEN = slatms.o slagge.o slagsy.o slarot.o CMATGEN = clarnv.o clatm1.o clatms.o clagge.o claghe.o clagsy.o clarot.o \ clarnd.o DZATGEN = dlatm1.o dlaran.o dlarnd.o DMATGEN = dlatms.o dlagge.o dlagsy.o dlarot.o ZMATGEN = zlarnv.o zlatm1.o zlatms.o zlagge.o zlaghe.o zlagsy.o zlarot.o \ zlarnd.o all : single complex double complex16 single: $(ALLAUX) $(SMATGEN) $(SCATGEN) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(SLASRC) $(ALLAUX) $(SCLAUX) \ $(SMATGEN) $(SCATGEN) $(RANLIB) $(SCALAPACKLIB) complex: $(ALLAUX) $(CMATGEN) $(SCATGEN) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(CLASRC) $(ALLAUX) $(SCLAUX) \ $(CMATGEN) $(SCATGEN) $(RANLIB) $(SCALAPACKLIB) double: $(ALLAUX) $(DMATGEN) $(DZATGEN) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(DLASRC) $(ALLAUX) $(DZLAUX) \ $(DMATGEN) $(DZATGEN) $(RANLIB) $(SCALAPACKLIB) complex16: $(ALLAUX) $(ZMATGEN) $(DZATGEN) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ZLASRC) $(ALLAUX) $(DZLAUX) \ $(ZMATGEN) $(DZATGEN) $(RANLIB) $(SCALAPACKLIB) $(ALLAUX): $(FRC) $(SCLAUX): $(FRC) $(DZLAUX): $(FRC) $(SCATGEN): $(FRC) $(SMATGEN): $(FRC) $(CMATGEN): $(FRC) $(DZATGEN): $(FRC) $(DMATGEN): $(FRC) $(ZMATGEN): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o slamch.o: $(F77) -c $(NOOPT) slamch.f dlamch.o: $(F77) -c $(NOOPT) dlamch.f .f.o : ; $(F77) -c $(F77FLAGS) $*.f scalapack-1.8.0/TOOLS/LAPACK/izmax1.f0000640000175000017500000000426610363532303016537 0ustar muammarmuammar INTEGER FUNCTION IZMAX1( N, CX, INCX ) * * -- LAPACK auxiliary routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1999 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. COMPLEX*16 CX( * ) * .. * * Purpose * ======= * * IZMAX1 finds the index of the element whose real part has maximum * absolute value. * * Based on IZAMAX from Level 1 BLAS. * The change is to use the 'genuine' absolute value. * * Contributed by Nick Higham for use with ZLACON. * * Arguments * ========= * * N (input) INTEGER * The number of elements in the vector CX. * * CX (input) COMPLEX*16 array, dimension (N) * The vector whose elements will be summed. * * INCX (input) INTEGER * The spacing between successive values of CX. INCX >= 1. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IX DOUBLE PRECISION SMAX COMPLEX*16 ZDUM * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. * * NEXT LINE IS THE ONLY MODIFICATION. CABS1( ZDUM ) = ABS( ZDUM ) * .. * .. Executable Statements .. * IZMAX1 = 0 IF( N.LT.1 ) $ RETURN IZMAX1 = 1 IF( N.EQ.1 ) $ RETURN IF( INCX.EQ.1 ) $ GO TO 30 * * CODE FOR INCREMENT NOT EQUAL TO 1 * IX = 1 SMAX = CABS1( CX( 1 ) ) IX = IX + INCX DO 20 I = 2, N IF( CABS1( CX( IX ) ).LE.SMAX ) $ GO TO 10 IZMAX1 = I SMAX = CABS1( CX( IX ) ) 10 CONTINUE IX = IX + INCX 20 CONTINUE RETURN * * CODE FOR INCREMENT EQUAL TO 1 * 30 CONTINUE SMAX = CABS1( CX( 1 ) ) DO 40 I = 2, N IF( CABS1( CX( I ) ).LE.SMAX ) $ GO TO 40 IZMAX1 = I SMAX = CABS1( CX( I ) ) 40 CONTINUE RETURN * * End of IZMAX1 * END scalapack-1.8.0/TOOLS/LAPACK/slagge.f0000640000175000017500000002161010604316307016562 0ustar muammarmuammar SUBROUTINE SLAGGE( M, N, KL, KU, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDA, M, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * SLAGGE generates a real general m by n matrix A, by pre- and post- * multiplying a real diagonal matrix D with random orthogonal matrices: * A = U*D*V. The lower and upper bandwidths may then be reduced to * kl and ku by additional orthogonal transformations. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= KL <= M-1. * * KU (input) INTEGER * The number of nonzero superdiagonals within the band of A. * 0 <= KU <= N-1. * * D (input) REAL array, dimension (min(M,N)) * The diagonal elements of the diagonal matrix D. * * A (output) REAL array, dimension (LDA,N) * The generated m by n matrix A. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= M. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) REAL array, dimension (M+N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J REAL TAU, WA, WB, WN * .. * .. External Subroutines .. EXTERNAL SGEMV, SGER, SLARNV, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SIGN * .. * .. External Functions .. REAL SNRM2 EXTERNAL SNRM2 * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN INFO = -3 ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN INFO = -4 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -7 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'SLAGGE', -INFO ) RETURN END IF * * initialize A to diagonal matrix * DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, MIN( M, N ) A( I, I ) = D( I ) 30 CONTINUE * * pre- and post-multiply A by random orthogonal matrices * DO 40 I = MIN( M, N ), 1, -1 IF( I.LT.M ) THEN * * generate random reflection * CALL SLARNV( 3, ISEED, M-I+1, WORK ) WN = SNRM2( M-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL SSCAL( M-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * multiply A(i:m,i:n) by random reflection from the left * CALL SGEMV( 'Transpose', M-I+1, N-I+1, ONE, A( I, I ), LDA, $ WORK, 1, ZERO, WORK( M+1 ), 1 ) CALL SGER( M-I+1, N-I+1, -TAU, WORK, 1, WORK( M+1 ), 1, $ A( I, I ), LDA ) END IF IF( I.LT.N ) THEN * * generate random reflection * CALL SLARNV( 3, ISEED, N-I+1, WORK ) WN = SNRM2( N-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL SSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * multiply A(i:m,i:n) by random reflection from the right * CALL SGEMV( 'No transpose', M-I+1, N-I+1, ONE, A( I, I ), $ LDA, WORK, 1, ZERO, WORK( N+1 ), 1 ) CALL SGER( M-I+1, N-I+1, -TAU, WORK( N+1 ), 1, WORK, 1, $ A( I, I ), LDA ) END IF 40 CONTINUE * * Reduce number of subdiagonals to KL and number of superdiagonals * to KU * DO 70 I = 1, MAX( M-1-KL, N-1-KU ) IF( KL.LE.KU ) THEN * * annihilate subdiagonal elements first (necessary if KL = 0) * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = SNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = SIGN( WN, A( KL+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL SSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL SGEMV( 'Transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL SGER( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, 1, $ A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = SNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = SIGN( WN, A( I, KU+I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL SSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = WB / WA END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL SGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL SGER( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF ELSE * * annihilate superdiagonal elements first (necessary if * KU = 0) * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = SNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = SIGN( WN, A( I, KU+I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL SSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = WB / WA END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL SGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL SGER( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = SNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = SIGN( WN, A( KL+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL SSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL SGEMV( 'Transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL SGER( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, 1, $ A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF END IF * DO 50 J = KL + I + 1, M A( J, I ) = ZERO 50 CONTINUE * DO 60 J = KU + I + 1, N A( I, J ) = ZERO 60 CONTINUE 70 CONTINUE RETURN * * End of SLAGGE * END scalapack-1.8.0/TOOLS/LAPACK/clarnv.f0000640000175000017500000000772210363532303016613 0ustar muammarmuammar SUBROUTINE CLARNV( IDIST, ISEED, N, X ) * * -- LAPACK auxiliary routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. INTEGER IDIST, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX X( * ) * .. * * Purpose * ======= * * CLARNV returns a vector of n random complex numbers from a uniform or * normal distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: real and imaginary parts each uniform (0,1) * = 2: real and imaginary parts each uniform (-1,1) * = 3: real and imaginary parts each normal (0,1) * = 4: uniformly distributed on the disc abs(z) < 1 * = 5: uniformly distributed on the circle abs(z) = 1 * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * N (input) INTEGER * The number of random numbers to be generated. * * X (output) COMPLEX array, dimension (N) * The generated random numbers. * * Further Details * =============== * * This routine calls the auxiliary routine SLARUV to generate random * real numbers from a uniform (0,1) distribution, in batches of up to * 128 using vectorisable code. The Box-Muller method is used to * transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, TWO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) INTEGER LV PARAMETER ( LV = 128 ) REAL TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) * .. * .. Local Scalars .. INTEGER I, IL, IV * .. * .. Local Arrays .. REAL U( LV ) * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, EXP, LOG, MIN, SQRT * .. * .. External Subroutines .. EXTERNAL SLARUV * .. * .. Executable Statements .. * DO 60 IV = 1, N, LV / 2 IL = MIN( LV / 2, N-IV+1 ) * * Call SLARUV to generate 2*IL real numbers from a uniform (0,1) * distribution (2*IL <= LV) * CALL SLARUV( ISEED, 2*IL, U ) * IF( IDIST.EQ.1 ) THEN * * Copy generated numbers * DO 10 I = 1, IL X( IV+I-1 ) = CMPLX( U( 2*I-1 ), U( 2*I ) ) 10 CONTINUE ELSE IF( IDIST.EQ.2 ) THEN * * Convert generated numbers to uniform (-1,1) distribution * DO 20 I = 1, IL X( IV+I-1 ) = CMPLX( TWO*U( 2*I-1 )-ONE, $ TWO*U( 2*I )-ONE ) 20 CONTINUE ELSE IF( IDIST.EQ.3 ) THEN * * Convert generated numbers to normal (0,1) distribution * DO 30 I = 1, IL X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) 30 CONTINUE ELSE IF( IDIST.EQ.4 ) THEN * * Convert generated numbers to complex numbers uniformly * distributed on the unit disk * DO 40 I = 1, IL X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) 40 CONTINUE ELSE IF( IDIST.EQ.5 ) THEN * * Convert generated numbers to complex numbers uniformly * distributed on the unit circle * DO 50 I = 1, IL X( IV+I-1 ) = EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) 50 CONTINUE END IF 60 CONTINUE RETURN * * End of CLARNV * END scalapack-1.8.0/TOOLS/LAPACK/clarnd.f0000640000175000017500000000555410604316307016574 0ustar muammarmuammar COMPLEX FUNCTION CLARND( IDIST, ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST * .. * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * CLARND returns a random complex number from a uniform or normal * distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: real and imaginary parts each uniform (0,1) * = 2: real and imaginary parts each uniform (-1,1) * = 3: real and imaginary parts each normal (0,1) * = 4: uniformly distributed on the disc abs(z) <= 1 * = 5: uniformly distributed on the circle abs(z) = 1 * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine calls the auxiliary routine SLARAN to generate a random * real number from a uniform (0,1) distribution. The Box-Muller method * is used to transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, TWO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) REAL TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) * .. * .. Local Scalars .. REAL T1, T2 * .. * .. External Functions .. REAL SLARAN EXTERNAL SLARAN * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, EXP, LOG, SQRT * .. * .. Executable Statements .. * * Generate a pair of real random numbers from a uniform (0,1) * distribution * T1 = SLARAN( ISEED ) T2 = SLARAN( ISEED ) * IF( IDIST.EQ.1 ) THEN * * real and imaginary parts each uniform (0,1) * CLARND = CMPLX( T1, T2 ) ELSE IF( IDIST.EQ.2 ) THEN * * real and imaginary parts each uniform (-1,1) * CLARND = CMPLX( TWO*T1-ONE, TWO*T2-ONE ) ELSE IF( IDIST.EQ.3 ) THEN * * real and imaginary parts each normal (0,1) * CLARND = SQRT( -TWO*LOG( T1 ) )*EXP( CMPLX( ZERO, TWOPI*T2 ) ) ELSE IF( IDIST.EQ.4 ) THEN * * uniform distribution on the unit disc abs(z) <= 1 * CLARND = SQRT( T1 )*EXP( CMPLX( ZERO, TWOPI*T2 ) ) ELSE IF( IDIST.EQ.5 ) THEN * * uniform distribution on the unit circle abs(z) = 1 * CLARND = EXP( CMPLX( ZERO, TWOPI*T2 ) ) END IF RETURN * * End of CLARND * END scalapack-1.8.0/TOOLS/LAPACK/zlarnv.f0000640000175000017500000000773310363532303016644 0ustar muammarmuammar SUBROUTINE ZLARNV( IDIST, ISEED, N, X ) * * -- LAPACK auxiliary routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. INTEGER IDIST, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX*16 X( * ) * .. * * Purpose * ======= * * ZLARNV returns a vector of n random complex numbers from a uniform or * normal distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: real and imaginary parts each uniform (0,1) * = 2: real and imaginary parts each uniform (-1,1) * = 3: real and imaginary parts each normal (0,1) * = 4: uniformly distributed on the disc abs(z) < 1 * = 5: uniformly distributed on the circle abs(z) = 1 * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * N (input) INTEGER * The number of random numbers to be generated. * * X (output) COMPLEX*16 array, dimension (N) * The generated random numbers. * * Further Details * =============== * * This routine calls the auxiliary routine DLARUV to generate random * real numbers from a uniform (0,1) distribution, in batches of up to * 128 using vectorisable code. The Box-Muller method is used to * transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, TWO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) INTEGER LV PARAMETER ( LV = 128 ) DOUBLE PRECISION TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) * .. * .. Local Scalars .. INTEGER I, IL, IV * .. * .. Local Arrays .. DOUBLE PRECISION U( LV ) * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, EXP, LOG, MIN, SQRT * .. * .. External Subroutines .. EXTERNAL DLARUV * .. * .. Executable Statements .. * DO 60 IV = 1, N, LV / 2 IL = MIN( LV / 2, N-IV+1 ) * * Call DLARUV to generate 2*IL real numbers from a uniform (0,1) * distribution (2*IL <= LV) * CALL DLARUV( ISEED, 2*IL, U ) * IF( IDIST.EQ.1 ) THEN * * Copy generated numbers * DO 10 I = 1, IL X( IV+I-1 ) = DCMPLX( U( 2*I-1 ), U( 2*I ) ) 10 CONTINUE ELSE IF( IDIST.EQ.2 ) THEN * * Convert generated numbers to uniform (-1,1) distribution * DO 20 I = 1, IL X( IV+I-1 ) = DCMPLX( TWO*U( 2*I-1 )-ONE, $ TWO*U( 2*I )-ONE ) 20 CONTINUE ELSE IF( IDIST.EQ.3 ) THEN * * Convert generated numbers to normal (0,1) distribution * DO 30 I = 1, IL X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) 30 CONTINUE ELSE IF( IDIST.EQ.4 ) THEN * * Convert generated numbers to complex numbers uniformly * distributed on the unit disk * DO 40 I = 1, IL X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) 40 CONTINUE ELSE IF( IDIST.EQ.5 ) THEN * * Convert generated numbers to complex numbers uniformly * distributed on the unit circle * DO 50 I = 1, IL X( IV+I-1 ) = EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) 50 CONTINUE END IF 60 CONTINUE RETURN * * End of ZLARNV * END scalapack-1.8.0/TOOLS/LAPACK/dlarnd.f0000640000175000017500000000427210604316307016571 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DLARND( IDIST, ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST * .. * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * DLARND returns a random real number from a uniform or normal * distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: uniform (0,1) * = 2: uniform (-1,1) * = 3: normal (0,1) * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine calls the auxiliary routine DLARAN to generate a random * real number from a uniform (0,1) distribution. The Box-Muller method * is used to transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) DOUBLE PRECISION TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION T1, T2 * .. * .. External Functions .. DOUBLE PRECISION DLARAN EXTERNAL DLARAN * .. * .. Intrinsic Functions .. INTRINSIC COS, LOG, SQRT * .. * .. Executable Statements .. * * Generate a real random number from a uniform (0,1) distribution * T1 = DLARAN( ISEED ) * IF( IDIST.EQ.1 ) THEN * * uniform (0,1) * DLARND = T1 ELSE IF( IDIST.EQ.2 ) THEN * * uniform (-1,1) * DLARND = TWO*T1 - ONE ELSE IF( IDIST.EQ.3 ) THEN * * normal (0,1) * T2 = DLARAN( ISEED ) DLARND = SQRT( -TWO*LOG( T1 ) )*COS( TWOPI*T2 ) END IF RETURN * * End of DLARND * END scalapack-1.8.0/TOOLS/LAPACK/clagge.f0000640000175000017500000002246010604316307016546 0ustar muammarmuammar SUBROUTINE CLAGGE( M, N, KL, KU, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDA, M, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL D( * ) COMPLEX A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * CLAGGE generates a complex general m by n matrix A, by pre- and post- * multiplying a real diagonal matrix D with random unitary matrices: * A = U*D*V. The lower and upper bandwidths may then be reduced to * kl and ku by additional unitary transformations. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= KL <= M-1. * * KU (input) INTEGER * The number of nonzero superdiagonals within the band of A. * 0 <= KU <= N-1. * * D (input) REAL array, dimension (min(M,N)) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX array, dimension (LDA,N) * The generated m by n matrix A. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= M. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX array, dimension (M+N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J REAL WN COMPLEX TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL CGEMV, CGERC, CLACGV, CLARNV, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, REAL * .. * .. External Functions .. REAL SCNRM2 EXTERNAL SCNRM2 * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN INFO = -3 ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN INFO = -4 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -7 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'CLAGGE', -INFO ) RETURN END IF * * initialize A to diagonal matrix * DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, MIN( M, N ) A( I, I ) = D( I ) 30 CONTINUE * * pre- and post-multiply A by random unitary matrices * DO 40 I = MIN( M, N ), 1, -1 IF( I.LT.M ) THEN * * generate random reflection * CALL CLARNV( 3, ISEED, M-I+1, WORK ) WN = SCNRM2( M-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL CSCAL( M-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = REAL( WB / WA ) END IF * * multiply A(i:m,i:n) by random reflection from the left * CALL CGEMV( 'Conjugate transpose', M-I+1, N-I+1, ONE, $ A( I, I ), LDA, WORK, 1, ZERO, WORK( M+1 ), 1 ) CALL CGERC( M-I+1, N-I+1, -TAU, WORK, 1, WORK( M+1 ), 1, $ A( I, I ), LDA ) END IF IF( I.LT.N ) THEN * * generate random reflection * CALL CLARNV( 3, ISEED, N-I+1, WORK ) WN = SCNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL CSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = REAL( WB / WA ) END IF * * multiply A(i:m,i:n) by random reflection from the right * CALL CGEMV( 'No transpose', M-I+1, N-I+1, ONE, A( I, I ), $ LDA, WORK, 1, ZERO, WORK( N+1 ), 1 ) CALL CGERC( M-I+1, N-I+1, -TAU, WORK( N+1 ), 1, WORK, 1, $ A( I, I ), LDA ) END IF 40 CONTINUE * * Reduce number of subdiagonals to KL and number of superdiagonals * to KU * DO 70 I = 1, MAX( M-1-KL, N-1-KU ) IF( KL.LE.KU ) THEN * * annihilate subdiagonal elements first (necessary if KL = 0) * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = SCNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = ( WN / ABS( A( KL+I, I ) ) )*A( KL+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL CSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL CGEMV( 'Conjugate transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL CGERC( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, $ 1, A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = SCNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = ( WN / ABS( A( I, KU+I ) ) )*A( I, KU+I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL CSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL CLACGV( N-KU-I+1, A( I, KU+I ), LDA ) CALL CGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL CGERC( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF ELSE * * annihilate superdiagonal elements first (necessary if * KU = 0) * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = SCNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = ( WN / ABS( A( I, KU+I ) ) )*A( I, KU+I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL CSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL CLACGV( N-KU-I+1, A( I, KU+I ), LDA ) CALL CGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL CGERC( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = SCNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = ( WN / ABS( A( KL+I, I ) ) )*A( KL+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL CSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL CGEMV( 'Conjugate transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL CGERC( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, $ 1, A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF END IF * DO 50 J = KL + I + 1, M A( J, I ) = ZERO 50 CONTINUE * DO 60 J = KU + I + 1, N A( I, J ) = ZERO 60 CONTINUE 70 CONTINUE RETURN * * End of CLAGGE * END scalapack-1.8.0/TOOLS/LAPACK/zlagsy.f0000640000175000017500000001477110604316307016643 0ustar muammarmuammar SUBROUTINE ZLAGSY( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * ZLAGSY generates a complex symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: * A = U*D*U**T. The semi-bandwidth may then be reduced to k by * additional unitary transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) DOUBLE PRECISION array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX*16 array, dimension (LDA,N) * The generated n by n symmetric matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX*16 array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE, HALF PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ), $ HALF = ( 0.5D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, II, J, JJ DOUBLE PRECISION WN COMPLEX*16 ALPHA, TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL XERBLA, ZAXPY, ZGEMV, ZGERC, ZLACGV, ZLARNV, $ ZSCAL, ZSYMV * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 COMPLEX*16 ZDOTC EXTERNAL DZNRM2, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'ZLAGSY', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of symmetric matrix * DO 60 I = N - 1, 1, -1 * * generate random reflection * CALL ZLARNV( 3, ISEED, N-I+1, WORK ) WN = DZNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL ZSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = DBLE( WB / WA ) END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * conjg(u) * CALL ZLACGV( N-I+1, WORK, 1 ) CALL ZSYMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) CALL ZLACGV( N-I+1, WORK, 1 ) * * compute v := y - 1/2 * tau * ( u, y ) * u * ALPHA = -HALF*TAU*ZDOTC( N-I+1, WORK, 1, WORK( N+1 ), 1 ) CALL ZAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * * CALL ZSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, * $ A( I, I ), LDA ) * DO 50 JJ = I, N DO 40 II = JJ, N A( II, JJ ) = A( II, JJ ) - $ WORK( II-I+1 )*WORK( N+JJ-I+1 ) - $ WORK( N+II-I+1 )*WORK( JJ-I+1 ) 40 CONTINUE 50 CONTINUE 60 CONTINUE * * Reduce number of subdiagonals to K * DO 100 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = DZNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = ( WN / ABS( A( K+I, I ) ) )*A( K+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL ZSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL ZGEMV( 'Conjugate transpose', N-K-I+1, K-1, ONE, $ A( K+I, I+1 ), LDA, A( K+I, I ), 1, ZERO, WORK, 1 ) CALL ZGERC( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * conjg(u) * CALL ZLACGV( N-K-I+1, A( K+I, I ), 1 ) CALL ZSYMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) CALL ZLACGV( N-K-I+1, A( K+I, I ), 1 ) * * compute v := y - 1/2 * tau * ( u, y ) * u * ALPHA = -HALF*TAU*ZDOTC( N-K-I+1, A( K+I, I ), 1, WORK, 1 ) CALL ZAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply symmetric rank-2 update to A(k+i:n,k+i:n) * * CALL ZSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, * $ A( K+I, K+I ), LDA ) * DO 80 JJ = K + I, N DO 70 II = JJ, N A( II, JJ ) = A( II, JJ ) - A( II, I )*WORK( JJ-K-I+1 ) - $ WORK( II-K-I+1 )*A( JJ, I ) 70 CONTINUE 80 CONTINUE * A( K+I, I ) = -WA DO 90 J = K + I + 1, N A( J, I ) = ZERO 90 CONTINUE 100 CONTINUE * * Store full symmetric matrix * DO 120 J = 1, N DO 110 I = J + 1, N A( J, I ) = A( I, J ) 110 CONTINUE 120 CONTINUE RETURN * * End of ZLAGSY * END scalapack-1.8.0/TOOLS/LAPACK/zlatm1.f0000640000175000017500000001576710604316307016550 0ustar muammarmuammar SUBROUTINE ZLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, N, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST, INFO, IRSIGN, MODE, N DOUBLE PRECISION COND * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX*16 D( * ) * .. * * Purpose * ======= * * ZLATM1 computes the entries of D(1..N) as specified by * MODE, COND and IRSIGN. IDIST and ISEED determine the generation * of random numbers. ZLATM1 is called by CLATMR to generate * random test matrices for LAPACK programs. * * Arguments * ========= * * MODE - INTEGER * On entry describes how D is to be computed: * MODE = 0 means do not change D. * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * Not modified. * * COND - DOUBLE PRECISION * On entry, used as described under MODE above. * If used, it must be >= 1. Not modified. * * IRSIGN - INTEGER * On entry, if MODE neither -6, 0 nor 6, determines sign of * entries of D * 0 => leave entries of D unchanged * 1 => multiply each entry of D by random complex number * uniformly distributed with absolute value 1 * * IDIST - CHARACTER*1 * On entry, IDIST specifies the type of distribution to be * used to generate a random matrix . * 1 => real and imaginary parts each UNIFORM( 0, 1 ) * 2 => real and imaginary parts each UNIFORM( -1, 1 ) * 3 => real and imaginary parts each NORMAL( 0, 1 ) * 4 => complex number uniform in DISK( 0, 1 ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. The random number generator uses a * linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to ZLATM1 * to continue the same random number sequence. * Changed on exit. * * D - COMPLEX*16 array, dimension ( MIN( M , N ) ) * Array to be computed according to MODE, COND and IRSIGN. * May be changed on exit if MODE is nonzero. * * N - INTEGER * Number of entries of D. Not modified. * * INFO - INTEGER * 0 => normal termination * -1 => if MODE not in range -6 to 6 * -2 => if MODE neither -6, 0 nor 6, and * IRSIGN neither 0 nor 1 * -3 => if MODE neither -6, 0 nor 6 and COND less than 1 * -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 4 * -7 => if N negative * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) * .. * .. Local Scalars .. INTEGER I DOUBLE PRECISION ALPHA, TEMP COMPLEX*16 CTEMP * .. * .. External Functions .. DOUBLE PRECISION DLARAN COMPLEX*16 ZLARND EXTERNAL DLARAN, ZLARND * .. * .. External Subroutines .. EXTERNAL XERBLA, ZLARNV * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, EXP, LOG * .. * .. Executable Statements .. * * Decode and Test the input parameters. Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Set INFO if an error * IF( MODE.LT.-6 .OR. MODE.GT.6 ) THEN INFO = -1 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ ( IRSIGN.NE.0 .AND. IRSIGN.NE.1 ) ) THEN INFO = -2 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ COND.LT.ONE ) THEN INFO = -3 ELSE IF( ( MODE.EQ.6 .OR. MODE.EQ.-6 ) .AND. $ ( IDIST.LT.1 .OR. IDIST.GT.4 ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -7 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZLATM1', -INFO ) RETURN END IF * * Compute D according to COND and MODE * IF( MODE.NE.0 ) THEN GO TO ( 10, 30, 50, 70, 90, 110 )ABS( MODE ) * * One large D value: * 10 CONTINUE DO 20 I = 1, N D( I ) = ONE / COND 20 CONTINUE D( 1 ) = ONE GO TO 120 * * One small D value: * 30 CONTINUE DO 40 I = 1, N D( I ) = ONE 40 CONTINUE D( N ) = ONE / COND GO TO 120 * * Exponentially distributed D values: * 50 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN ALPHA = COND**( -ONE / DBLE( N-1 ) ) DO 60 I = 2, N D( I ) = ALPHA**( I-1 ) 60 CONTINUE END IF GO TO 120 * * Arithmetically distributed D values: * 70 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN TEMP = ONE / COND ALPHA = ( ONE-TEMP ) / DBLE( N-1 ) DO 80 I = 2, N D( I ) = DBLE( N-I )*ALPHA + TEMP 80 CONTINUE END IF GO TO 120 * * Randomly distributed D values on ( 1/COND , 1): * 90 CONTINUE ALPHA = LOG( ONE / COND ) DO 100 I = 1, N D( I ) = EXP( ALPHA*DLARAN( ISEED ) ) 100 CONTINUE GO TO 120 * * Randomly distributed D values from IDIST * 110 CONTINUE CALL ZLARNV( IDIST, ISEED, N, D ) * 120 CONTINUE * * If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign * random signs to D * IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ IRSIGN.EQ.1 ) THEN DO 130 I = 1, N CTEMP = ZLARND( 3, ISEED ) D( I ) = D( I )*( CTEMP / ABS( CTEMP ) ) 130 CONTINUE END IF * * Reverse if MODE < 0 * IF( MODE.LT.0 ) THEN DO 140 I = 1, N / 2 CTEMP = D( I ) D( I ) = D( N+1-I ) D( N+1-I ) = CTEMP 140 CONTINUE END IF * END IF * RETURN * * End of ZLATM1 * END scalapack-1.8.0/TOOLS/LAPACK/zlatms.f0000640000175000017500000012455110604316307016642 0ustar muammarmuammar SUBROUTINE ZLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, LDA, WORK, INFO ) * * -- LAPACK test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER INFO, KL, KU, LDA, M, MODE, N DOUBLE PRECISION COND, DMAX * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * ZLATMS generates random matrices with specified singular values * (or hermitian with specified eigenvalues) * for testing LAPACK programs. * * ZLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a matrix with the appropriate band structure, by one * of two methods: * * Method A: * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * * Method B: * Convert the bandwidth-0 (i.e., diagonal) matrix to a * bandwidth-1 matrix using Givens rotations, "chasing" * out-of-band elements back, much as in QR; then convert * the bandwidth-1 to a bandwidth-2 matrix, etc. Note * that for reasonably small bandwidths (relative to M and * N) this requires less storage, as a dense matrix is not * generated. Also, for hermitian or symmetric matrices, * only one triangle is generated. * * Method A is chosen if the bandwidth is a large fraction of the * order of the matrix, and LDA is at least M (so a dense * matrix can be stored.) Method B is chosen if the bandwidth * is small (< 1/2 N for hermitian or symmetric, < .3 N+M for * non-symmetric), or LDA is less than M and not less than the * bandwidth. * * Pack the matrix if desired. Options specified by PACK are: * no packing * zero out upper half (if hermitian) * zero out lower half (if hermitian) * store the upper half columnwise (if hermitian or upper * triangular) * store the lower half columnwise (if hermitian or lower * triangular) * store the lower triangle in banded format (if hermitian or * lower triangular) * store the upper triangle in banded format (if hermitian or * upper triangular) * store the entire matrix in banded format * If Method B is chosen, and band format is specified, then the * matrix will be generated in the band format, so no repacking * will be necessary. * * Arguments * ========= * * M - INTEGER * The number of rows of A. Not modified. * * N - INTEGER * The number of columns of A. N must equal M if the matrix * is symmetric or hermitian (i.e., if SYM is not 'N') * Not modified. * * DIST - CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to ZLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - CHARACTER*1 * If SYM='H', the generated matrix is hermitian, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is hermitian, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * If SYM='S', the generated matrix is (complex) symmetric, * with singular values specified by D, COND, MODE, and * DMAX; they will not be negative. * Not modified. * * D - DOUBLE PRECISION array, dimension ( MIN( M, N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - DOUBLE PRECISION * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - DOUBLE PRECISION * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric or hermitian. * Not modified. * * KU - INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric or hermitian. * Not modified. * * PACK - CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * 'U' => zero out all subdiagonal entries (if symmetric * or hermitian) * 'L' => zero out all superdiagonal entries (if symmetric * or hermitian) * 'C' => store the upper triangle columnwise (only if the * matrix is symmetric, hermitian, or upper triangular) * 'R' => store the lower triangle columnwise (only if the * matrix is symmetric, hermitian, or lower triangular) * 'B' => store the lower triangle in band storage scheme * (only if the matrix is symmetric, hermitian, or * lower triangular) * 'Q' => store the upper triangle in band storage scheme * (only if the matrix is symmetric, hermitian, or * upper triangular) * 'Z' => store the entire matrix in band storage scheme * (pivoting can be provided for by using this * option to store A in the trailing rows of * the allocated storage) * * Using these options, the various LAPACK packed and banded * storage schemes can be obtained: * GB - use 'Z' * PB, SB, HB, or TB - use 'B' or 'Q' * PP, SP, HB, or TP - use 'C' or 'R' * * If two calls to ZLATMS differ only in the PACK parameter, * they will generate mathematically equivalent matrices. * Not modified. * * A - COMPLEX*16 array, dimension ( LDA, N ) * On exit A is the desired test matrix. A is first generated * in full (unpacked) form, and then packed, if so specified * by PACK. Thus, the first M elements of the first N * columns will always be modified. If PACK specifies a * packed or banded storage scheme, all LDA elements of the * first N columns will be modified; the elements of the * array which do not correspond to elements of the generated * matrix are set to zero. * Modified. * * LDA - INTEGER * LDA specifies the first dimension of A as declared in the * calling program. If PACK='N', 'U', 'L', 'C', or 'R', then * LDA must be at least M. If PACK='B' or 'Q', then LDA must * be at least MIN( KL, M-1) (which is equal to MIN(KU,N-1)). * If PACK='Z', LDA must be large enough to hold the packed * array: MIN( KU, N-1) + MIN( KL, M-1) + 1. * Not modified. * * WORK - COMPLEX*16 array, dimension ( 3*MAX( N, M ) ) * Workspace. * Modified. * * INFO - INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM is not 'N' and KU is not equal to * KL * -12 => PACK illegal string, or PACK='U' or 'L', and SYM='N'; * or PACK='C' or 'Q' and SYM='N' and KL is not zero; * or PACK='R' or 'B' and SYM='N' and KU is not zero; * or PACK='U', 'L', 'C', 'R', 'B', or 'Q', and M is not * N. * -14 => LDA is less than M, or PACK='Z' and LDA is less than * MIN(KU,N-1) + MIN(KL,M-1) + 1. * 1 => Error return from DLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from ZLAGGE, CLAGHE or CLAGSY * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CZERO PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) DOUBLE PRECISION TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) * .. * .. Local Scalars .. LOGICAL GIVENS, ILEXTR, ILTEMP, TOPDWN, ZSYM INTEGER I, IC, ICOL, IDIST, IENDCH, IINFO, IL, ILDA, $ IOFFG, IOFFST, IPACK, IPACKG, IR, IR1, IR2, $ IROW, IRSIGN, ISKEW, ISYM, ISYMPK, J, JC, JCH, $ JKL, JKU, JR, K, LLB, MINLDA, MNMIN, MR, NC, $ UUB DOUBLE PRECISION ALPHA, ANGLE, REALC, TEMP COMPLEX*16 C, CT, CTEMP, DUMMY, EXTRA, S, ST * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLARND COMPLEX*16 ZLARND EXTERNAL LSAME, DLARND, ZLARND * .. * .. External Subroutines .. EXTERNAL DLATM1, DSCAL, XERBLA, ZLAGGE, ZLAGHE, ZLAGSY, $ ZLAROT, ZLARTG, ZLASET * .. * .. Intrinsic Functions .. INTRINSIC ABS, COS, DBLE, DCMPLX, DCONJG, MAX, MIN, MOD, $ SIN * .. * .. Executable Statements .. * * 1) Decode and Test the input parameters. * Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ZSYM = .FALSE. ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ZSYM = .FALSE. ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 0 ZSYM = .TRUE. ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ZSYM = .FALSE. ELSE ISYM = -1 END IF * * Decode PACK * ISYMPK = 0 IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IF( LSAME( PACK, 'U' ) ) THEN IPACK = 1 ISYMPK = 1 ELSE IF( LSAME( PACK, 'L' ) ) THEN IPACK = 2 ISYMPK = 1 ELSE IF( LSAME( PACK, 'C' ) ) THEN IPACK = 3 ISYMPK = 2 ELSE IF( LSAME( PACK, 'R' ) ) THEN IPACK = 4 ISYMPK = 3 ELSE IF( LSAME( PACK, 'B' ) ) THEN IPACK = 5 ISYMPK = 3 ELSE IF( LSAME( PACK, 'Q' ) ) THEN IPACK = 6 ISYMPK = 2 ELSE IF( LSAME( PACK, 'Z' ) ) THEN IPACK = 7 ELSE IPACK = -1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) UUB = MIN( KU, N-1 ) MR = MIN( M, N+LLB ) NC = MIN( N, M+UUB ) * IF( IPACK.EQ.5 .OR. IPACK.EQ.6 ) THEN MINLDA = UUB + 1 ELSE IF( IPACK.EQ.7 ) THEN MINLDA = LLB + UUB + 1 ELSE MINLDA = M END IF * * Use Givens rotation method if bandwidth small enough, * or if LDA is too small to store the matrix unpacked. * GIVENS = .FALSE. IF( ISYM.EQ.1 ) THEN IF( DBLE( LLB+UUB ).LT.0.3D0*DBLE( MAX( 1, MR+NC ) ) ) $ GIVENS = .TRUE. ELSE IF( 2*LLB.LT.M ) $ GIVENS = .TRUE. END IF IF( LDA.LT.M .AND. LDA.GE.MINLDA ) $ GIVENS = .TRUE. * * Set INFO if an error * IF( M.LT.0 ) THEN INFO = -1 ELSE IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT.ONE ) $ THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( IPACK.EQ.-1 .OR. ( ISYMPK.EQ.1 .AND. ISYM.EQ.1 ) .OR. $ ( ISYMPK.EQ.2 .AND. ISYM.EQ.1 .AND. KL.GT.0 ) .OR. $ ( ISYMPK.EQ.3 .AND. ISYM.EQ.1 .AND. KU.GT.0 ) .OR. $ ( ISYMPK.NE.0 .AND. M.NE.N ) ) THEN INFO = -12 ELSE IF( LDA.LT.MAX( 1, MINLDA ) ) THEN INFO = -14 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL DLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * Choose Top-Down if D is (apparently) increasing, * Bottom-Up if D is (apparently) decreasing. * IF( ABS( D( 1 ) ).LE.ABS( D( MNMIN ) ) ) THEN TOPDWN = .TRUE. ELSE TOPDWN = .FALSE. END IF * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL DSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) * * 3) Generate Banded Matrix using Givens rotations. * Also the special case of UUB=LLB=0 * * Compute Addressing constants to cover all * storage formats. Whether GE, HE, SY, GB, HB, or SB, * upper or lower triangle or both, * the (i,j)-th element is in * A( i - ISKEW*j + IOFFST, j ) * IF( IPACK.GT.4 ) THEN ILDA = LDA - 1 ISKEW = 1 IF( IPACK.GT.5 ) THEN IOFFST = UUB + 1 ELSE IOFFST = 1 END IF ELSE ILDA = LDA ISKEW = 0 IOFFST = 0 END IF * * IPACKG is the format that the matrix is generated in. If this is * different from IPACK, then the matrix must be repacked at the * end. It also signals how to compute the norm, for scaling. * IPACKG = 0 * * Diagonal Matrix -- We are done, unless it * is to be stored HP/SP/PP/TP (PACK='R' or 'C') * IF( LLB.EQ.0 .AND. UUB.EQ.0 ) THEN DO 30 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFST, J ) = DCMPLX( D( J ) ) 30 CONTINUE * IF( IPACK.LE.2 .OR. IPACK.GE.5 ) $ IPACKG = IPACK * ELSE IF( GIVENS ) THEN * * Check whether to use Givens rotations, * Householder transformations, or nothing. * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * IF( IPACK.GT.4 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF * DO 40 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFST, J ) = DCMPLX( D( J ) ) 40 CONTINUE * IF( TOPDWN ) THEN JKL = 0 DO 70 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * Last row actually rotated is M * Last column actually rotated is MIN( M+JKU, N ) * DO 60 JR = 1, MIN( M+JKU, N ) + JKL - 1 EXTRA = CZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) ICOL = MAX( 1, JR-JKL ) IF( JR.LT.M ) THEN IL = MIN( N, JR+JKU ) + 1 - ICOL CALL ZLAROT( .TRUE., JR.GT.JKL, .FALSE., IL, C, $ S, A( JR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IR = JR IC = ICOL DO 50 JCH = JR - JKL, 1, -JKL - JKU IF( IR.LT.M ) THEN CALL ZLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = DCONJG( REALC*DUMMY ) S = DCONJG( -S*DUMMY ) END IF IROW = MAX( 1, JCH-JKU ) IL = IR + 2 - IROW CTEMP = CZERO ILTEMP = JCH.GT.JKU CALL ZLAROT( .FALSE., ILTEMP, .TRUE., IL, C, S, $ A( IROW-ISKEW*IC+IOFFST, IC ), $ ILDA, CTEMP, EXTRA ) IF( ILTEMP ) THEN CALL ZLARTG( A( IROW+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), CTEMP, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = DCONJG( REALC*DUMMY ) S = DCONJG( -S*DUMMY ) * ICOL = MAX( 1, JCH-JKU-JKL ) IL = IC + 2 - ICOL EXTRA = CZERO CALL ZLAROT( .TRUE., JCH.GT.JKU+JKL, .TRUE., $ IL, C, S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ CTEMP ) IC = ICOL IR = IROW END IF 50 CONTINUE 60 CONTINUE 70 CONTINUE * JKU = UUB DO 100 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * DO 90 JC = 1, MIN( N+JKL, M ) + JKU - 1 EXTRA = CZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) IROW = MAX( 1, JC-JKU ) IF( JC.LT.N ) THEN IL = MIN( M, JC+JKL ) + 1 - IROW CALL ZLAROT( .FALSE., JC.GT.JKU, .FALSE., IL, C, $ S, A( IROW-ISKEW*JC+IOFFST, JC ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IC = JC IR = IROW DO 80 JCH = JC - JKU, 1, -JKL - JKU IF( IC.LT.N ) THEN CALL ZLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = DCONJG( REALC*DUMMY ) S = DCONJG( -S*DUMMY ) END IF ICOL = MAX( 1, JCH-JKL ) IL = IC + 2 - ICOL CTEMP = CZERO ILTEMP = JCH.GT.JKL CALL ZLAROT( .TRUE., ILTEMP, .TRUE., IL, C, S, $ A( IR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, CTEMP, EXTRA ) IF( ILTEMP ) THEN CALL ZLARTG( A( IR+1-ISKEW*( ICOL+1 )+IOFFST, $ ICOL+1 ), CTEMP, REALC, S, $ DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = DCONJG( REALC*DUMMY ) S = DCONJG( -S*DUMMY ) IROW = MAX( 1, JCH-JKL-JKU ) IL = IR + 2 - IROW EXTRA = CZERO CALL ZLAROT( .FALSE., JCH.GT.JKL+JKU, .TRUE., $ IL, C, S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ CTEMP ) IC = ICOL IR = IROW END IF 80 CONTINUE 90 CONTINUE 100 CONTINUE * ELSE * * Bottom-Up -- Start at the bottom right. * JKL = 0 DO 130 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * First row actually rotated is M * First column actually rotated is MIN( M+JKU, N ) * IENDCH = MIN( M, N+JKL ) - 1 DO 120 JC = MIN( M+JKU, N ) - 1, 1 - JKL, -1 EXTRA = CZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) IROW = MAX( 1, JC-JKU+1 ) IF( JC.GT.0 ) THEN IL = MIN( M, JC+JKL+1 ) + 1 - IROW CALL ZLAROT( .FALSE., .FALSE., JC+JKL.LT.M, IL, $ C, S, A( IROW-ISKEW*JC+IOFFST, $ JC ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IC = JC DO 110 JCH = JC + JKL, IENDCH, JKL + JKU ILEXTR = IC.GT.0 IF( ILEXTR ) THEN CALL ZLARTG( A( JCH-ISKEW*IC+IOFFST, IC ), $ EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY END IF IC = MAX( 1, IC ) ICOL = MIN( N-1, JCH+JKU ) ILTEMP = JCH + JKU.LT.N CTEMP = CZERO CALL ZLAROT( .TRUE., ILEXTR, ILTEMP, ICOL+2-IC, $ C, S, A( JCH-ISKEW*IC+IOFFST, IC ), $ ILDA, EXTRA, CTEMP ) IF( ILTEMP ) THEN CALL ZLARTG( A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), CTEMP, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = CZERO CALL ZLAROT( .FALSE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, CTEMP, EXTRA ) IC = ICOL END IF 110 CONTINUE 120 CONTINUE 130 CONTINUE * JKU = UUB DO 160 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * * First row actually rotated is MIN( N+JKL, M ) * First column actually rotated is N * IENDCH = MIN( N, M+JKU ) - 1 DO 150 JR = MIN( N+JKL, M ) - 1, 1 - JKU, -1 EXTRA = CZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) ICOL = MAX( 1, JR-JKL+1 ) IF( JR.GT.0 ) THEN IL = MIN( N, JR+JKU+1 ) + 1 - ICOL CALL ZLAROT( .TRUE., .FALSE., JR+JKU.LT.N, IL, $ C, S, A( JR-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IR = JR DO 140 JCH = JR + JKU, IENDCH, JKL + JKU ILEXTR = IR.GT.0 IF( ILEXTR ) THEN CALL ZLARTG( A( IR-ISKEW*JCH+IOFFST, JCH ), $ EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY END IF IR = MAX( 1, IR ) IROW = MIN( M-1, JCH+JKL ) ILTEMP = JCH + JKL.LT.M CTEMP = CZERO CALL ZLAROT( .FALSE., ILEXTR, ILTEMP, IROW+2-IR, $ C, S, A( IR-ISKEW*JCH+IOFFST, $ JCH ), ILDA, EXTRA, CTEMP ) IF( ILTEMP ) THEN CALL ZLARTG( A( IROW-ISKEW*JCH+IOFFST, JCH ), $ CTEMP, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = CZERO CALL ZLAROT( .TRUE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( IROW-ISKEW*JCH+IOFFST, JCH ), $ ILDA, CTEMP, EXTRA ) IR = IROW END IF 140 CONTINUE 150 CONTINUE 160 CONTINUE * END IF * ELSE * * Symmetric -- A = U D U' * Hermitian -- A = U D U* * IPACKG = IPACK IOFFG = IOFFST * IF( TOPDWN ) THEN * * Top-Down -- Generate Upper triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 6 IOFFG = UUB + 1 ELSE IPACKG = 1 END IF * DO 170 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFG, J ) = DCMPLX( D( J ) ) 170 CONTINUE * DO 200 K = 1, UUB DO 190 JC = 1, N - 1 IROW = MAX( 1, JC-K ) IL = MIN( JC+1, K+2 ) EXTRA = CZERO CTEMP = A( JC-ISKEW*( JC+1 )+IOFFG, JC+1 ) ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) IF( ZSYM ) THEN CT = C ST = S ELSE CTEMP = DCONJG( CTEMP ) CT = DCONJG( C ) ST = DCONJG( S ) END IF CALL ZLAROT( .FALSE., JC.GT.K, .TRUE., IL, C, S, $ A( IROW-ISKEW*JC+IOFFG, JC ), ILDA, $ EXTRA, CTEMP ) CALL ZLAROT( .TRUE., .TRUE., .FALSE., $ MIN( K, N-JC )+1, CT, ST, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ CTEMP, DUMMY ) * * Chase EXTRA back up the matrix * ICOL = JC DO 180 JCH = JC - K, 1, -K CALL ZLARTG( A( JCH+1-ISKEW*( ICOL+1 )+IOFFG, $ ICOL+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = DCONJG( REALC*DUMMY ) S = DCONJG( -S*DUMMY ) CTEMP = A( JCH-ISKEW*( JCH+1 )+IOFFG, JCH+1 ) IF( ZSYM ) THEN CT = C ST = S ELSE CTEMP = DCONJG( CTEMP ) CT = DCONJG( C ) ST = DCONJG( S ) END IF CALL ZLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, CTEMP, EXTRA ) IROW = MAX( 1, JCH-K ) IL = MIN( JCH+1, K+2 ) EXTRA = CZERO CALL ZLAROT( .FALSE., JCH.GT.K, .TRUE., IL, CT, $ ST, A( IROW-ISKEW*JCH+IOFFG, JCH ), $ ILDA, EXTRA, CTEMP ) ICOL = JCH 180 CONTINUE 190 CONTINUE 200 CONTINUE * * If we need lower triangle, copy from upper. Note that * the order of copying is chosen to work for 'q' -> 'b' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.3 ) THEN DO 230 JC = 1, N IROW = IOFFST - ISKEW*JC IF( ZSYM ) THEN DO 210 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 210 CONTINUE ELSE DO 220 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = DCONJG( A( JC-ISKEW*JR+ $ IOFFG, JR ) ) 220 CONTINUE END IF 230 CONTINUE IF( IPACK.EQ.5 ) THEN DO 250 JC = N - UUB + 1, N DO 240 JR = N + 2 - JC, UUB + 1 A( JR, JC ) = CZERO 240 CONTINUE 250 CONTINUE END IF IF( IPACKG.EQ.6 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF ELSE * * Bottom-Up -- Generate Lower triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 5 IF( IPACK.EQ.6 ) $ IOFFG = 1 ELSE IPACKG = 2 END IF * DO 260 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFG, J ) = DCMPLX( D( J ) ) 260 CONTINUE * DO 290 K = 1, UUB DO 280 JC = N - 1, 1, -1 IL = MIN( N+1-JC, K+2 ) EXTRA = CZERO CTEMP = A( 1+( 1-ISKEW )*JC+IOFFG, JC ) ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE )*ZLARND( 5, ISEED ) S = SIN( ANGLE )*ZLARND( 5, ISEED ) IF( ZSYM ) THEN CT = C ST = S ELSE CTEMP = DCONJG( CTEMP ) CT = DCONJG( C ) ST = DCONJG( S ) END IF CALL ZLAROT( .FALSE., .TRUE., N-JC.GT.K, IL, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ CTEMP, EXTRA ) ICOL = MAX( 1, JC-K+1 ) CALL ZLAROT( .TRUE., .FALSE., .TRUE., JC+2-ICOL, $ CT, ST, A( JC-ISKEW*ICOL+IOFFG, $ ICOL ), ILDA, DUMMY, CTEMP ) * * Chase EXTRA back down the matrix * ICOL = JC DO 270 JCH = JC + K, N - 1, K CALL ZLARTG( A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ EXTRA, REALC, S, DUMMY ) DUMMY = ZLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY CTEMP = A( 1+( 1-ISKEW )*JCH+IOFFG, JCH ) IF( ZSYM ) THEN CT = C ST = S ELSE CTEMP = DCONJG( CTEMP ) CT = DCONJG( C ) ST = DCONJG( S ) END IF CALL ZLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, EXTRA, CTEMP ) IL = MIN( N+1-JCH, K+2 ) EXTRA = CZERO CALL ZLAROT( .FALSE., .TRUE., N-JCH.GT.K, IL, $ CT, ST, A( ( 1-ISKEW )*JCH+IOFFG, $ JCH ), ILDA, CTEMP, EXTRA ) ICOL = JCH 270 CONTINUE 280 CONTINUE 290 CONTINUE * * If we need upper triangle, copy from lower. Note that * the order of copying is chosen to work for 'b' -> 'q' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.4 ) THEN DO 320 JC = N, 1, -1 IROW = IOFFST - ISKEW*JC IF( ZSYM ) THEN DO 300 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 300 CONTINUE ELSE DO 310 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = DCONJG( A( JC-ISKEW*JR+ $ IOFFG, JR ) ) 310 CONTINUE END IF 320 CONTINUE IF( IPACK.EQ.6 ) THEN DO 340 JC = 1, UUB DO 330 JR = 1, UUB + 1 - JC A( JR, JC ) = CZERO 330 CONTINUE 340 CONTINUE END IF IF( IPACKG.EQ.5 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF END IF * * Ensure that the diagonal is real if Hermitian * IF( .NOT.ZSYM ) THEN DO 350 JC = 1, N IROW = IOFFST + ( 1-ISKEW )*JC A( IROW, JC ) = DCMPLX( DBLE( A( IROW, JC ) ) ) 350 CONTINUE END IF * END IF * ELSE * * 4) Generate Banded Matrix by first * Rotating by random Unitary matrices, * then reducing the bandwidth using Householder * transformations. * * Note: we should get here only if LDA .ge. N * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * CALL ZLAGGE( MR, NC, LLB, UUB, D, A, LDA, ISEED, WORK, $ IINFO ) ELSE * * Symmetric -- A = U D U' or * Hermitian -- A = U D U* * IF( ZSYM ) THEN CALL ZLAGSY( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) ELSE CALL ZLAGHE( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) END IF END IF * IF( IINFO.NE.0 ) THEN INFO = 3 RETURN END IF END IF * * 5) Pack the matrix * IF( IPACK.NE.IPACKG ) THEN IF( IPACK.EQ.1 ) THEN * * 'U' -- Upper triangular, not packed * DO 370 J = 1, M DO 360 I = J + 1, M A( I, J ) = CZERO 360 CONTINUE 370 CONTINUE * ELSE IF( IPACK.EQ.2 ) THEN * * 'L' -- Lower triangular, not packed * DO 390 J = 2, M DO 380 I = 1, J - 1 A( I, J ) = CZERO 380 CONTINUE 390 CONTINUE * ELSE IF( IPACK.EQ.3 ) THEN * * 'C' -- Upper triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 410 J = 1, M DO 400 I = 1, J IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 400 CONTINUE 410 CONTINUE * ELSE IF( IPACK.EQ.4 ) THEN * * 'R' -- Lower triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 430 J = 1, M DO 420 I = J, M IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 420 CONTINUE 430 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * 'B' -- The lower triangle is packed as a band matrix. * 'Q' -- The upper triangle is packed as a band matrix. * 'Z' -- The whole matrix is packed as a band matrix. * IF( IPACK.EQ.5 ) $ UUB = 0 IF( IPACK.EQ.6 ) $ LLB = 0 * DO 450 J = 1, UUB DO 440 I = MIN( J+LLB, M ), 1, -1 A( I-J+UUB+1, J ) = A( I, J ) 440 CONTINUE 450 CONTINUE * DO 470 J = UUB + 2, N DO 460 I = J - UUB, MIN( J+LLB, M ) A( I-J+UUB+1, J ) = A( I, J ) 460 CONTINUE 470 CONTINUE END IF * * If packed, zero out extraneous elements. * * Symmetric/Triangular Packed -- * zero out everything after A(IROW,ICOL) * IF( IPACK.EQ.3 .OR. IPACK.EQ.4 ) THEN DO 490 JC = ICOL, M DO 480 JR = IROW + 1, LDA A( JR, JC ) = CZERO 480 CONTINUE IROW = 0 490 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * Packed Band -- * 1st row is now in A( UUB+2-j, j), zero above it * m-th row is now in A( M+UUB-j,j), zero below it * last non-zero diagonal is now in A( UUB+LLB+1,j ), * zero below it, too. * IR1 = UUB + LLB + 2 IR2 = UUB + M + 2 DO 520 JC = 1, N DO 500 JR = 1, UUB + 1 - JC A( JR, JC ) = CZERO 500 CONTINUE DO 510 JR = MAX( 1, MIN( IR1, IR2-JC ) ), LDA A( JR, JC ) = CZERO 510 CONTINUE 520 CONTINUE END IF END IF * RETURN * * End of ZLATMS * END scalapack-1.8.0/TOOLS/LAPACK/clatms.f0000640000175000017500000012443610604316307016615 0ustar muammarmuammar SUBROUTINE CLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, LDA, WORK, INFO ) * * -- LAPACK test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER INFO, KL, KU, LDA, M, MODE, N REAL COND, DMAX * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL D( * ) COMPLEX A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * CLATMS generates random matrices with specified singular values * (or hermitian with specified eigenvalues) * for testing LAPACK programs. * * CLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a matrix with the appropriate band structure, by one * of two methods: * * Method A: * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * * Method B: * Convert the bandwidth-0 (i.e., diagonal) matrix to a * bandwidth-1 matrix using Givens rotations, "chasing" * out-of-band elements back, much as in QR; then convert * the bandwidth-1 to a bandwidth-2 matrix, etc. Note * that for reasonably small bandwidths (relative to M and * N) this requires less storage, as a dense matrix is not * generated. Also, for hermitian or symmetric matrices, * only one triangle is generated. * * Method A is chosen if the bandwidth is a large fraction of the * order of the matrix, and LDA is at least M (so a dense * matrix can be stored.) Method B is chosen if the bandwidth * is small (< 1/2 N for hermitian or symmetric, < .3 N+M for * non-symmetric), or LDA is less than M and not less than the * bandwidth. * * Pack the matrix if desired. Options specified by PACK are: * no packing * zero out upper half (if hermitian) * zero out lower half (if hermitian) * store the upper half columnwise (if hermitian or upper * triangular) * store the lower half columnwise (if hermitian or lower * triangular) * store the lower triangle in banded format (if hermitian or * lower triangular) * store the upper triangle in banded format (if hermitian or * upper triangular) * store the entire matrix in banded format * If Method B is chosen, and band format is specified, then the * matrix will be generated in the band format, so no repacking * will be necessary. * * Arguments * ========= * * M - INTEGER * The number of rows of A. Not modified. * * N - INTEGER * The number of columns of A. N must equal M if the matrix * is symmetric or hermitian (i.e., if SYM is not 'N') * Not modified. * * DIST - CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to CLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - CHARACTER*1 * If SYM='H', the generated matrix is hermitian, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is hermitian, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * If SYM='S', the generated matrix is (complex) symmetric, * with singular values specified by D, COND, MODE, and * DMAX; they will not be negative. * Not modified. * * D - REAL array, dimension ( MIN( M, N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - REAL * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - REAL * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric or hermitian. * Not modified. * * KU - INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric or hermitian. * Not modified. * * PACK - CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * 'U' => zero out all subdiagonal entries (if symmetric * or hermitian) * 'L' => zero out all superdiagonal entries (if symmetric * or hermitian) * 'C' => store the upper triangle columnwise (only if the * matrix is symmetric, hermitian, or upper triangular) * 'R' => store the lower triangle columnwise (only if the * matrix is symmetric, hermitian, or lower triangular) * 'B' => store the lower triangle in band storage scheme * (only if the matrix is symmetric, hermitian, or * lower triangular) * 'Q' => store the upper triangle in band storage scheme * (only if the matrix is symmetric, hermitian, or * upper triangular) * 'Z' => store the entire matrix in band storage scheme * (pivoting can be provided for by using this * option to store A in the trailing rows of * the allocated storage) * * Using these options, the various LAPACK packed and banded * storage schemes can be obtained: * GB - use 'Z' * PB, SB, HB, or TB - use 'B' or 'Q' * PP, SP, HB, or TP - use 'C' or 'R' * * If two calls to CLATMS differ only in the PACK parameter, * they will generate mathematically equivalent matrices. * Not modified. * * A - COMPLEX array, dimension ( LDA, N ) * On exit A is the desired test matrix. A is first generated * in full (unpacked) form, and then packed, if so specified * by PACK. Thus, the first M elements of the first N * columns will always be modified. If PACK specifies a * packed or banded storage scheme, all LDA elements of the * first N columns will be modified; the elements of the * array which do not correspond to elements of the generated * matrix are set to zero. * Modified. * * LDA - INTEGER * LDA specifies the first dimension of A as declared in the * calling program. If PACK='N', 'U', 'L', 'C', or 'R', then * LDA must be at least M. If PACK='B' or 'Q', then LDA must * be at least MIN( KL, M-1) (which is equal to MIN(KU,N-1)). * If PACK='Z', LDA must be large enough to hold the packed * array: MIN( KU, N-1) + MIN( KL, M-1) + 1. * Not modified. * * WORK - COMPLEX array, dimension ( 3*MAX( N, M ) ) * Workspace. * Modified. * * INFO - INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM is not 'N' and KU is not equal to * KL * -12 => PACK illegal string, or PACK='U' or 'L', and SYM='N'; * or PACK='C' or 'Q' and SYM='N' and KL is not zero; * or PACK='R' or 'B' and SYM='N' and KU is not zero; * or PACK='U', 'L', 'C', 'R', 'B', or 'Q', and M is not * N. * -14 => LDA is less than M, or PACK='Z' and LDA is less than * MIN(KU,N-1) + MIN(KL,M-1) + 1. * 1 => Error return from SLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from CLAGGE, CLAGHE or CLAGSY * * ===================================================================== * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CZERO PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) REAL TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) * .. * .. Local Scalars .. LOGICAL CSYM, GIVENS, ILEXTR, ILTEMP, TOPDWN INTEGER I, IC, ICOL, IDIST, IENDCH, IINFO, IL, ILDA, $ IOFFG, IOFFST, IPACK, IPACKG, IR, IR1, IR2, $ IROW, IRSIGN, ISKEW, ISYM, ISYMPK, J, JC, JCH, $ JKL, JKU, JR, K, LLB, MINLDA, MNMIN, MR, NC, $ UUB REAL ALPHA, ANGLE, REALC, TEMP COMPLEX C, CT, CTEMP, DUMMY, EXTRA, S, ST * .. * .. External Functions .. LOGICAL LSAME REAL SLARND COMPLEX CLARND EXTERNAL LSAME, SLARND, CLARND * .. * .. External Subroutines .. EXTERNAL CLAGGE, CLAGHE, CLAGSY, CLAROT, CLARTG, CLASET, $ SLATM1, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, CONJG, COS, MAX, MIN, MOD, REAL, $ SIN * .. * .. Executable Statements .. * * 1) Decode and Test the input parameters. * Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 CSYM = .FALSE. ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 CSYM = .FALSE. ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 0 CSYM = .TRUE. ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 CSYM = .FALSE. ELSE ISYM = -1 END IF * * Decode PACK * ISYMPK = 0 IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IF( LSAME( PACK, 'U' ) ) THEN IPACK = 1 ISYMPK = 1 ELSE IF( LSAME( PACK, 'L' ) ) THEN IPACK = 2 ISYMPK = 1 ELSE IF( LSAME( PACK, 'C' ) ) THEN IPACK = 3 ISYMPK = 2 ELSE IF( LSAME( PACK, 'R' ) ) THEN IPACK = 4 ISYMPK = 3 ELSE IF( LSAME( PACK, 'B' ) ) THEN IPACK = 5 ISYMPK = 3 ELSE IF( LSAME( PACK, 'Q' ) ) THEN IPACK = 6 ISYMPK = 2 ELSE IF( LSAME( PACK, 'Z' ) ) THEN IPACK = 7 ELSE IPACK = -1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) UUB = MIN( KU, N-1 ) MR = MIN( M, N+LLB ) NC = MIN( N, M+UUB ) * IF( IPACK.EQ.5 .OR. IPACK.EQ.6 ) THEN MINLDA = UUB + 1 ELSE IF( IPACK.EQ.7 ) THEN MINLDA = LLB + UUB + 1 ELSE MINLDA = M END IF * * Use Givens rotation method if bandwidth small enough, * or if LDA is too small to store the matrix unpacked. * GIVENS = .FALSE. IF( ISYM.EQ.1 ) THEN IF( REAL( LLB+UUB ).LT.0.3*REAL( MAX( 1, MR+NC ) ) ) $ GIVENS = .TRUE. ELSE IF( 2*LLB.LT.M ) $ GIVENS = .TRUE. END IF IF( LDA.LT.M .AND. LDA.GE.MINLDA ) $ GIVENS = .TRUE. * * Set INFO if an error * IF( M.LT.0 ) THEN INFO = -1 ELSE IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT.ONE ) $ THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( IPACK.EQ.-1 .OR. ( ISYMPK.EQ.1 .AND. ISYM.EQ.1 ) .OR. $ ( ISYMPK.EQ.2 .AND. ISYM.EQ.1 .AND. KL.GT.0 ) .OR. $ ( ISYMPK.EQ.3 .AND. ISYM.EQ.1 .AND. KU.GT.0 ) .OR. $ ( ISYMPK.NE.0 .AND. M.NE.N ) ) THEN INFO = -12 ELSE IF( LDA.LT.MAX( 1, MINLDA ) ) THEN INFO = -14 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'CLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL SLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * Choose Top-Down if D is (apparently) increasing, * Bottom-Up if D is (apparently) decreasing. * IF( ABS( D( 1 ) ).LE.ABS( D( MNMIN ) ) ) THEN TOPDWN = .TRUE. ELSE TOPDWN = .FALSE. END IF * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL SSCAL( MNMIN, ALPHA, D, 1 ) * END IF * CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) * * 3) Generate Banded Matrix using Givens rotations. * Also the special case of UUB=LLB=0 * * Compute Addressing constants to cover all * storage formats. Whether GE, HE, SY, GB, HB, or SB, * upper or lower triangle or both, * the (i,j)-th element is in * A( i - ISKEW*j + IOFFST, j ) * IF( IPACK.GT.4 ) THEN ILDA = LDA - 1 ISKEW = 1 IF( IPACK.GT.5 ) THEN IOFFST = UUB + 1 ELSE IOFFST = 1 END IF ELSE ILDA = LDA ISKEW = 0 IOFFST = 0 END IF * * IPACKG is the format that the matrix is generated in. If this is * different from IPACK, then the matrix must be repacked at the * end. It also signals how to compute the norm, for scaling. * IPACKG = 0 * * Diagonal Matrix -- We are done, unless it * is to be stored HP/SP/PP/TP (PACK='R' or 'C') * IF( LLB.EQ.0 .AND. UUB.EQ.0 ) THEN DO 30 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFST, J ) = CMPLX( D( J ) ) 30 CONTINUE * IF( IPACK.LE.2 .OR. IPACK.GE.5 ) $ IPACKG = IPACK * ELSE IF( GIVENS ) THEN * * Check whether to use Givens rotations, * Householder transformations, or nothing. * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * IF( IPACK.GT.4 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF * DO 40 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFST, J ) = CMPLX( D( J ) ) 40 CONTINUE * IF( TOPDWN ) THEN JKL = 0 DO 70 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * Last row actually rotated is M * Last column actually rotated is MIN( M+JKU, N ) * DO 60 JR = 1, MIN( M+JKU, N ) + JKL - 1 EXTRA = CZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) ICOL = MAX( 1, JR-JKL ) IF( JR.LT.M ) THEN IL = MIN( N, JR+JKU ) + 1 - ICOL CALL CLAROT( .TRUE., JR.GT.JKL, .FALSE., IL, C, $ S, A( JR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IR = JR IC = ICOL DO 50 JCH = JR - JKL, 1, -JKL - JKU IF( IR.LT.M ) THEN CALL CLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = CONJG( REALC*DUMMY ) S = CONJG( -S*DUMMY ) END IF IROW = MAX( 1, JCH-JKU ) IL = IR + 2 - IROW CTEMP = CZERO ILTEMP = JCH.GT.JKU CALL CLAROT( .FALSE., ILTEMP, .TRUE., IL, C, S, $ A( IROW-ISKEW*IC+IOFFST, IC ), $ ILDA, CTEMP, EXTRA ) IF( ILTEMP ) THEN CALL CLARTG( A( IROW+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), CTEMP, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = CONJG( REALC*DUMMY ) S = CONJG( -S*DUMMY ) * ICOL = MAX( 1, JCH-JKU-JKL ) IL = IC + 2 - ICOL EXTRA = CZERO CALL CLAROT( .TRUE., JCH.GT.JKU+JKL, .TRUE., $ IL, C, S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ CTEMP ) IC = ICOL IR = IROW END IF 50 CONTINUE 60 CONTINUE 70 CONTINUE * JKU = UUB DO 100 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * DO 90 JC = 1, MIN( N+JKL, M ) + JKU - 1 EXTRA = CZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) IROW = MAX( 1, JC-JKU ) IF( JC.LT.N ) THEN IL = MIN( M, JC+JKL ) + 1 - IROW CALL CLAROT( .FALSE., JC.GT.JKU, .FALSE., IL, C, $ S, A( IROW-ISKEW*JC+IOFFST, JC ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IC = JC IR = IROW DO 80 JCH = JC - JKU, 1, -JKL - JKU IF( IC.LT.N ) THEN CALL CLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = CONJG( REALC*DUMMY ) S = CONJG( -S*DUMMY ) END IF ICOL = MAX( 1, JCH-JKL ) IL = IC + 2 - ICOL CTEMP = CZERO ILTEMP = JCH.GT.JKL CALL CLAROT( .TRUE., ILTEMP, .TRUE., IL, C, S, $ A( IR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, CTEMP, EXTRA ) IF( ILTEMP ) THEN CALL CLARTG( A( IR+1-ISKEW*( ICOL+1 )+IOFFST, $ ICOL+1 ), CTEMP, REALC, S, $ DUMMY ) DUMMY = CLARND( 5, ISEED ) C = CONJG( REALC*DUMMY ) S = CONJG( -S*DUMMY ) IROW = MAX( 1, JCH-JKL-JKU ) IL = IR + 2 - IROW EXTRA = CZERO CALL CLAROT( .FALSE., JCH.GT.JKL+JKU, .TRUE., $ IL, C, S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ CTEMP ) IC = ICOL IR = IROW END IF 80 CONTINUE 90 CONTINUE 100 CONTINUE * ELSE * * Bottom-Up -- Start at the bottom right. * JKL = 0 DO 130 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * First row actually rotated is M * First column actually rotated is MIN( M+JKU, N ) * IENDCH = MIN( M, N+JKL ) - 1 DO 120 JC = MIN( M+JKU, N ) - 1, 1 - JKL, -1 EXTRA = CZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) IROW = MAX( 1, JC-JKU+1 ) IF( JC.GT.0 ) THEN IL = MIN( M, JC+JKL+1 ) + 1 - IROW CALL CLAROT( .FALSE., .FALSE., JC+JKL.LT.M, IL, $ C, S, A( IROW-ISKEW*JC+IOFFST, $ JC ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IC = JC DO 110 JCH = JC + JKL, IENDCH, JKL + JKU ILEXTR = IC.GT.0 IF( ILEXTR ) THEN CALL CLARTG( A( JCH-ISKEW*IC+IOFFST, IC ), $ EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY END IF IC = MAX( 1, IC ) ICOL = MIN( N-1, JCH+JKU ) ILTEMP = JCH + JKU.LT.N CTEMP = CZERO CALL CLAROT( .TRUE., ILEXTR, ILTEMP, ICOL+2-IC, $ C, S, A( JCH-ISKEW*IC+IOFFST, IC ), $ ILDA, EXTRA, CTEMP ) IF( ILTEMP ) THEN CALL CLARTG( A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), CTEMP, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = CZERO CALL CLAROT( .FALSE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, CTEMP, EXTRA ) IC = ICOL END IF 110 CONTINUE 120 CONTINUE 130 CONTINUE * JKU = UUB DO 160 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * * First row actually rotated is MIN( N+JKL, M ) * First column actually rotated is N * IENDCH = MIN( N, M+JKU ) - 1 DO 150 JR = MIN( N+JKL, M ) - 1, 1 - JKU, -1 EXTRA = CZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) ICOL = MAX( 1, JR-JKL+1 ) IF( JR.GT.0 ) THEN IL = MIN( N, JR+JKU+1 ) + 1 - ICOL CALL CLAROT( .TRUE., .FALSE., JR+JKU.LT.N, IL, $ C, S, A( JR-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IR = JR DO 140 JCH = JR + JKU, IENDCH, JKL + JKU ILEXTR = IR.GT.0 IF( ILEXTR ) THEN CALL CLARTG( A( IR-ISKEW*JCH+IOFFST, JCH ), $ EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY END IF IR = MAX( 1, IR ) IROW = MIN( M-1, JCH+JKL ) ILTEMP = JCH + JKL.LT.M CTEMP = CZERO CALL CLAROT( .FALSE., ILEXTR, ILTEMP, IROW+2-IR, $ C, S, A( IR-ISKEW*JCH+IOFFST, $ JCH ), ILDA, EXTRA, CTEMP ) IF( ILTEMP ) THEN CALL CLARTG( A( IROW-ISKEW*JCH+IOFFST, JCH ), $ CTEMP, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = CZERO CALL CLAROT( .TRUE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( IROW-ISKEW*JCH+IOFFST, JCH ), $ ILDA, CTEMP, EXTRA ) IR = IROW END IF 140 CONTINUE 150 CONTINUE 160 CONTINUE * END IF * ELSE * * Symmetric -- A = U D U' * Hermitian -- A = U D U* * IPACKG = IPACK IOFFG = IOFFST * IF( TOPDWN ) THEN * * Top-Down -- Generate Upper triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 6 IOFFG = UUB + 1 ELSE IPACKG = 1 END IF * DO 170 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFG, J ) = CMPLX( D( J ) ) 170 CONTINUE * DO 200 K = 1, UUB DO 190 JC = 1, N - 1 IROW = MAX( 1, JC-K ) IL = MIN( JC+1, K+2 ) EXTRA = CZERO CTEMP = A( JC-ISKEW*( JC+1 )+IOFFG, JC+1 ) ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) IF( CSYM ) THEN CT = C ST = S ELSE CTEMP = CONJG( CTEMP ) CT = CONJG( C ) ST = CONJG( S ) END IF CALL CLAROT( .FALSE., JC.GT.K, .TRUE., IL, C, S, $ A( IROW-ISKEW*JC+IOFFG, JC ), ILDA, $ EXTRA, CTEMP ) CALL CLAROT( .TRUE., .TRUE., .FALSE., $ MIN( K, N-JC )+1, CT, ST, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ CTEMP, DUMMY ) * * Chase EXTRA back up the matrix * ICOL = JC DO 180 JCH = JC - K, 1, -K CALL CLARTG( A( JCH+1-ISKEW*( ICOL+1 )+IOFFG, $ ICOL+1 ), EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = CONJG( REALC*DUMMY ) S = CONJG( -S*DUMMY ) CTEMP = A( JCH-ISKEW*( JCH+1 )+IOFFG, JCH+1 ) IF( CSYM ) THEN CT = C ST = S ELSE CTEMP = CONJG( CTEMP ) CT = CONJG( C ) ST = CONJG( S ) END IF CALL CLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, CTEMP, EXTRA ) IROW = MAX( 1, JCH-K ) IL = MIN( JCH+1, K+2 ) EXTRA = CZERO CALL CLAROT( .FALSE., JCH.GT.K, .TRUE., IL, CT, $ ST, A( IROW-ISKEW*JCH+IOFFG, JCH ), $ ILDA, EXTRA, CTEMP ) ICOL = JCH 180 CONTINUE 190 CONTINUE 200 CONTINUE * * If we need lower triangle, copy from upper. Note that * the order of copying is chosen to work for 'q' -> 'b' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.3 ) THEN DO 230 JC = 1, N IROW = IOFFST - ISKEW*JC IF( CSYM ) THEN DO 210 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 210 CONTINUE ELSE DO 220 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = CONJG( A( JC-ISKEW*JR+ $ IOFFG, JR ) ) 220 CONTINUE END IF 230 CONTINUE IF( IPACK.EQ.5 ) THEN DO 250 JC = N - UUB + 1, N DO 240 JR = N + 2 - JC, UUB + 1 A( JR, JC ) = CZERO 240 CONTINUE 250 CONTINUE END IF IF( IPACKG.EQ.6 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF ELSE * * Bottom-Up -- Generate Lower triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 5 IF( IPACK.EQ.6 ) $ IOFFG = 1 ELSE IPACKG = 2 END IF * DO 260 J = 1, MNMIN A( ( 1-ISKEW )*J+IOFFG, J ) = CMPLX( D( J ) ) 260 CONTINUE * DO 290 K = 1, UUB DO 280 JC = N - 1, 1, -1 IL = MIN( N+1-JC, K+2 ) EXTRA = CZERO CTEMP = A( 1+( 1-ISKEW )*JC+IOFFG, JC ) ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE )*CLARND( 5, ISEED ) S = SIN( ANGLE )*CLARND( 5, ISEED ) IF( CSYM ) THEN CT = C ST = S ELSE CTEMP = CONJG( CTEMP ) CT = CONJG( C ) ST = CONJG( S ) END IF CALL CLAROT( .FALSE., .TRUE., N-JC.GT.K, IL, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ CTEMP, EXTRA ) ICOL = MAX( 1, JC-K+1 ) CALL CLAROT( .TRUE., .FALSE., .TRUE., JC+2-ICOL, $ CT, ST, A( JC-ISKEW*ICOL+IOFFG, $ ICOL ), ILDA, DUMMY, CTEMP ) * * Chase EXTRA back down the matrix * ICOL = JC DO 270 JCH = JC + K, N - 1, K CALL CLARTG( A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ EXTRA, REALC, S, DUMMY ) DUMMY = CLARND( 5, ISEED ) C = REALC*DUMMY S = S*DUMMY CTEMP = A( 1+( 1-ISKEW )*JCH+IOFFG, JCH ) IF( CSYM ) THEN CT = C ST = S ELSE CTEMP = CONJG( CTEMP ) CT = CONJG( C ) ST = CONJG( S ) END IF CALL CLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, EXTRA, CTEMP ) IL = MIN( N+1-JCH, K+2 ) EXTRA = CZERO CALL CLAROT( .FALSE., .TRUE., N-JCH.GT.K, IL, $ CT, ST, A( ( 1-ISKEW )*JCH+IOFFG, $ JCH ), ILDA, CTEMP, EXTRA ) ICOL = JCH 270 CONTINUE 280 CONTINUE 290 CONTINUE * * If we need upper triangle, copy from lower. Note that * the order of copying is chosen to work for 'b' -> 'q' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.4 ) THEN DO 320 JC = N, 1, -1 IROW = IOFFST - ISKEW*JC IF( CSYM ) THEN DO 300 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 300 CONTINUE ELSE DO 310 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = CONJG( A( JC-ISKEW*JR+ $ IOFFG, JR ) ) 310 CONTINUE END IF 320 CONTINUE IF( IPACK.EQ.6 ) THEN DO 340 JC = 1, UUB DO 330 JR = 1, UUB + 1 - JC A( JR, JC ) = CZERO 330 CONTINUE 340 CONTINUE END IF IF( IPACKG.EQ.5 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF END IF * * Ensure that the diagonal is real if Hermitian * IF( .NOT.CSYM ) THEN DO 350 JC = 1, N IROW = IOFFST + ( 1-ISKEW )*JC A( IROW, JC ) = CMPLX( REAL( A( IROW, JC ) ) ) 350 CONTINUE END IF * END IF * ELSE * * 4) Generate Banded Matrix by first * Rotating by random Unitary matrices, * then reducing the bandwidth using Householder * transformations. * * Note: we should get here only if LDA .ge. N * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * CALL CLAGGE( MR, NC, LLB, UUB, D, A, LDA, ISEED, WORK, $ IINFO ) ELSE * * Symmetric -- A = U D U' or * Hermitian -- A = U D U* * IF( CSYM ) THEN CALL CLAGSY( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) ELSE CALL CLAGHE( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) END IF END IF * IF( IINFO.NE.0 ) THEN INFO = 3 RETURN END IF END IF * * 5) Pack the matrix * IF( IPACK.NE.IPACKG ) THEN IF( IPACK.EQ.1 ) THEN * * 'U' -- Upper triangular, not packed * DO 370 J = 1, M DO 360 I = J + 1, M A( I, J ) = CZERO 360 CONTINUE 370 CONTINUE * ELSE IF( IPACK.EQ.2 ) THEN * * 'L' -- Lower triangular, not packed * DO 390 J = 2, M DO 380 I = 1, J - 1 A( I, J ) = CZERO 380 CONTINUE 390 CONTINUE * ELSE IF( IPACK.EQ.3 ) THEN * * 'C' -- Upper triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 410 J = 1, M DO 400 I = 1, J IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 400 CONTINUE 410 CONTINUE * ELSE IF( IPACK.EQ.4 ) THEN * * 'R' -- Lower triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 430 J = 1, M DO 420 I = J, M IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 420 CONTINUE 430 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * 'B' -- The lower triangle is packed as a band matrix. * 'Q' -- The upper triangle is packed as a band matrix. * 'Z' -- The whole matrix is packed as a band matrix. * IF( IPACK.EQ.5 ) $ UUB = 0 IF( IPACK.EQ.6 ) $ LLB = 0 * DO 450 J = 1, UUB DO 440 I = MIN( J+LLB, M ), 1, -1 A( I-J+UUB+1, J ) = A( I, J ) 440 CONTINUE 450 CONTINUE * DO 470 J = UUB + 2, N DO 460 I = J - UUB, MIN( J+LLB, M ) A( I-J+UUB+1, J ) = A( I, J ) 460 CONTINUE 470 CONTINUE END IF * * If packed, zero out extraneous elements. * * Symmetric/Triangular Packed -- * zero out everything after A(IROW,ICOL) * IF( IPACK.EQ.3 .OR. IPACK.EQ.4 ) THEN DO 490 JC = ICOL, M DO 480 JR = IROW + 1, LDA A( JR, JC ) = CZERO 480 CONTINUE IROW = 0 490 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * Packed Band -- * 1st row is now in A( UUB+2-j, j), zero above it * m-th row is now in A( M+UUB-j,j), zero below it * last non-zero diagonal is now in A( UUB+LLB+1,j ), * zero below it, too. * IR1 = UUB + LLB + 2 IR2 = UUB + M + 2 DO 520 JC = 1, N DO 500 JR = 1, UUB + 1 - JC A( JR, JC ) = CZERO 500 CONTINUE DO 510 JR = MAX( 1, MIN( IR1, IR2-JC ) ), LDA A( JR, JC ) = CZERO 510 CONTINUE 520 CONTINUE END IF END IF * RETURN * * End of CLATMS * END scalapack-1.8.0/TOOLS/LAPACK/clagsy.f0000640000175000017500000001474710604316307016617 0ustar muammarmuammar SUBROUTINE CLAGSY( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL D( * ) COMPLEX A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * CLAGSY generates a complex symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: * A = U*D*U**T. The semi-bandwidth may then be reduced to k by * additional unitary transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) REAL array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX array, dimension (LDA,N) * The generated n by n symmetric matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE, HALF PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ), $ HALF = ( 0.5E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, II, J, JJ REAL WN COMPLEX ALPHA, TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL CAXPY, CGEMV, CGERC, CLACGV, CLARNV, CSCAL, $ CSYMV, XERBLA * .. * .. External Functions .. REAL SCNRM2 COMPLEX CDOTC EXTERNAL SCNRM2, CDOTC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, REAL * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'CLAGSY', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of symmetric matrix * DO 60 I = N - 1, 1, -1 * * generate random reflection * CALL CLARNV( 3, ISEED, N-I+1, WORK ) WN = SCNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL CSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = REAL( WB / WA ) END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * conjg(u) * CALL CLACGV( N-I+1, WORK, 1 ) CALL CSYMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) CALL CLACGV( N-I+1, WORK, 1 ) * * compute v := y - 1/2 * tau * ( u, y ) * u * ALPHA = -HALF*TAU*CDOTC( N-I+1, WORK, 1, WORK( N+1 ), 1 ) CALL CAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * * CALL CSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, * $ A( I, I ), LDA ) * DO 50 JJ = I, N DO 40 II = JJ, N A( II, JJ ) = A( II, JJ ) - $ WORK( II-I+1 )*WORK( N+JJ-I+1 ) - $ WORK( N+II-I+1 )*WORK( JJ-I+1 ) 40 CONTINUE 50 CONTINUE 60 CONTINUE * * Reduce number of subdiagonals to K * DO 100 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = SCNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = ( WN / ABS( A( K+I, I ) ) )*A( K+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL CSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL CGEMV( 'Conjugate transpose', N-K-I+1, K-1, ONE, $ A( K+I, I+1 ), LDA, A( K+I, I ), 1, ZERO, WORK, 1 ) CALL CGERC( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * conjg(u) * CALL CLACGV( N-K-I+1, A( K+I, I ), 1 ) CALL CSYMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) CALL CLACGV( N-K-I+1, A( K+I, I ), 1 ) * * compute v := y - 1/2 * tau * ( u, y ) * u * ALPHA = -HALF*TAU*CDOTC( N-K-I+1, A( K+I, I ), 1, WORK, 1 ) CALL CAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply symmetric rank-2 update to A(k+i:n,k+i:n) * * CALL CSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, * $ A( K+I, K+I ), LDA ) * DO 80 JJ = K + I, N DO 70 II = JJ, N A( II, JJ ) = A( II, JJ ) - A( II, I )*WORK( JJ-K-I+1 ) - $ WORK( II-K-I+1 )*A( JJ, I ) 70 CONTINUE 80 CONTINUE * A( K+I, I ) = -WA DO 90 J = K + I + 1, N A( J, I ) = ZERO 90 CONTINUE 100 CONTINUE * * Store full symmetric matrix * DO 120 J = 1, N DO 110 I = J + 1, N A( J, I ) = A( I, J ) 110 CONTINUE 120 CONTINUE RETURN * * End of CLAGSY * END scalapack-1.8.0/TOOLS/LAPACK/dlatms.f0000640000175000017500000011243010604316307016605 0ustar muammarmuammar SUBROUTINE DLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, LDA, WORK, INFO ) * * -- LAPACK test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER INFO, KL, KU, LDA, M, MODE, N DOUBLE PRECISION COND, DMAX * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * DLATMS generates random matrices with specified singular values * (or symmetric/hermitian with specified eigenvalues) * for testing LAPACK programs. * * DLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a matrix with the appropriate band structure, by one * of two methods: * * Method A: * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * * Method B: * Convert the bandwidth-0 (i.e., diagonal) matrix to a * bandwidth-1 matrix using Givens rotations, "chasing" * out-of-band elements back, much as in QR; then * convert the bandwidth-1 to a bandwidth-2 matrix, etc. * Note that for reasonably small bandwidths (relative to * M and N) this requires less storage, as a dense matrix * is not generated. Also, for symmetric matrices, only * one triangle is generated. * * Method A is chosen if the bandwidth is a large fraction of the * order of the matrix, and LDA is at least M (so a dense * matrix can be stored.) Method B is chosen if the bandwidth * is small (< 1/2 N for symmetric, < .3 N+M for * non-symmetric), or LDA is less than M and not less than the * bandwidth. * * Pack the matrix if desired. Options specified by PACK are: * no packing * zero out upper half (if symmetric) * zero out lower half (if symmetric) * store the upper half columnwise (if symmetric or upper * triangular) * store the lower half columnwise (if symmetric or lower * triangular) * store the lower triangle in banded format (if symmetric * or lower triangular) * store the upper triangle in banded format (if symmetric * or upper triangular) * store the entire matrix in banded format * If Method B is chosen, and band format is specified, then the * matrix will be generated in the band format, so no repacking * will be necessary. * * Arguments * ========= * * M - INTEGER * The number of rows of A. Not modified. * * N - INTEGER * The number of columns of A. Not modified. * * DIST - CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to DLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - CHARACTER*1 * If SYM='S' or 'H', the generated matrix is symmetric, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is symmetric, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * Not modified. * * D - DOUBLE PRECISION array, dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - DOUBLE PRECISION * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - DOUBLE PRECISION * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * * KU - INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * * PACK - CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * 'U' => zero out all subdiagonal entries (if symmetric) * 'L' => zero out all superdiagonal entries (if symmetric) * 'C' => store the upper triangle columnwise * (only if the matrix is symmetric or upper triangular) * 'R' => store the lower triangle columnwise * (only if the matrix is symmetric or lower triangular) * 'B' => store the lower triangle in band storage scheme * (only if matrix symmetric or lower triangular) * 'Q' => store the upper triangle in band storage scheme * (only if matrix symmetric or upper triangular) * 'Z' => store the entire matrix in band storage scheme * (pivoting can be provided for by using this * option to store A in the trailing rows of * the allocated storage) * * Using these options, the various LAPACK packed and banded * storage schemes can be obtained: * GB - use 'Z' * PB, SB or TB - use 'B' or 'Q' * PP, SP or TP - use 'C' or 'R' * * If two calls to DLATMS differ only in the PACK parameter, * they will generate mathematically equivalent matrices. * Not modified. * * A - DOUBLE PRECISION array, dimension ( LDA, N ) * On exit A is the desired test matrix. A is first generated * in full (unpacked) form, and then packed, if so specified * by PACK. Thus, the first M elements of the first N * columns will always be modified. If PACK specifies a * packed or banded storage scheme, all LDA elements of the * first N columns will be modified; the elements of the * array which do not correspond to elements of the generated * matrix are set to zero. * Modified. * * LDA - INTEGER * LDA specifies the first dimension of A as declared in the * calling program. If PACK='N', 'U', 'L', 'C', or 'R', then * LDA must be at least M. If PACK='B' or 'Q', then LDA must * be at least MIN( KL, M-1) (which is equal to MIN(KU,N-1)). * If PACK='Z', LDA must be large enough to hold the packed * array: MIN( KU, N-1) + MIN( KL, M-1) + 1. * Not modified. * * WORK - DOUBLE PRECISION array, dimension ( 3*MAX( N , M ) ) * Workspace. * Modified. * * INFO - INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -12 => PACK illegal string, or PACK='U' or 'L', and SYM='N'; * or PACK='C' or 'Q' and SYM='N' and KL is not zero; * or PACK='R' or 'B' and SYM='N' and KU is not zero; * or PACK='U', 'L', 'C', 'R', 'B', or 'Q', and M is not * N. * -14 => LDA is less than M, or PACK='Z' and LDA is less than * MIN(KU,N-1) + MIN(KL,M-1) + 1. * 1 => Error return from DLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from DLAGGE or SLAGSY * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) DOUBLE PRECISION TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) * .. * .. Local Scalars .. LOGICAL GIVENS, ILEXTR, ILTEMP, TOPDWN INTEGER I, IC, ICOL, IDIST, IENDCH, IINFO, IL, ILDA, $ IOFFG, IOFFST, IPACK, IPACKG, IR, IR1, IR2, $ IROW, IRSIGN, ISKEW, ISYM, ISYMPK, J, JC, JCH, $ JKL, JKU, JR, K, LLB, MINLDA, MNMIN, MR, NC, $ UUB DOUBLE PRECISION ALPHA, ANGLE, C, DUMMY, EXTRA, S, TEMP * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLARND EXTERNAL LSAME, DLARND * .. * .. External Subroutines .. EXTERNAL DCOPY, DLAGGE, DLAGSY, DLAROT, DLARTG, DLASET, $ DLATM1, DSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, COS, DBLE, MAX, MIN, MOD, SIN * .. * .. Executable Statements .. * * 1) Decode and Test the input parameters. * Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * ISYMPK = 0 IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IF( LSAME( PACK, 'U' ) ) THEN IPACK = 1 ISYMPK = 1 ELSE IF( LSAME( PACK, 'L' ) ) THEN IPACK = 2 ISYMPK = 1 ELSE IF( LSAME( PACK, 'C' ) ) THEN IPACK = 3 ISYMPK = 2 ELSE IF( LSAME( PACK, 'R' ) ) THEN IPACK = 4 ISYMPK = 3 ELSE IF( LSAME( PACK, 'B' ) ) THEN IPACK = 5 ISYMPK = 3 ELSE IF( LSAME( PACK, 'Q' ) ) THEN IPACK = 6 ISYMPK = 2 ELSE IF( LSAME( PACK, 'Z' ) ) THEN IPACK = 7 ELSE IPACK = -1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) UUB = MIN( KU, N-1 ) MR = MIN( M, N+LLB ) NC = MIN( N, M+UUB ) * IF( IPACK.EQ.5 .OR. IPACK.EQ.6 ) THEN MINLDA = UUB + 1 ELSE IF( IPACK.EQ.7 ) THEN MINLDA = LLB + UUB + 1 ELSE MINLDA = M END IF * * Use Givens rotation method if bandwidth small enough, * or if LDA is too small to store the matrix unpacked. * GIVENS = .FALSE. IF( ISYM.EQ.1 ) THEN IF( DBLE( LLB+UUB ).LT.0.3D0*DBLE( MAX( 1, MR+NC ) ) ) $ GIVENS = .TRUE. ELSE IF( 2*LLB.LT.M ) $ GIVENS = .TRUE. END IF IF( LDA.LT.M .AND. LDA.GE.MINLDA ) $ GIVENS = .TRUE. * * Set INFO if an error * IF( M.LT.0 ) THEN INFO = -1 ELSE IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT.ONE ) $ THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( IPACK.EQ.-1 .OR. ( ISYMPK.EQ.1 .AND. ISYM.EQ.1 ) .OR. $ ( ISYMPK.EQ.2 .AND. ISYM.EQ.1 .AND. KL.GT.0 ) .OR. $ ( ISYMPK.EQ.3 .AND. ISYM.EQ.1 .AND. KU.GT.0 ) .OR. $ ( ISYMPK.NE.0 .AND. M.NE.N ) ) THEN INFO = -12 ELSE IF( LDA.LT.MAX( 1, MINLDA ) ) THEN INFO = -14 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'DLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL DLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * Choose Top-Down if D is (apparently) increasing, * Bottom-Up if D is (apparently) decreasing. * IF( ABS( D( 1 ) ).LE.ABS( D( MNMIN ) ) ) THEN TOPDWN = .TRUE. ELSE TOPDWN = .FALSE. END IF * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL DSCAL( MNMIN, ALPHA, D, 1 ) * END IF * * 3) Generate Banded Matrix using Givens rotations. * Also the special case of UUB=LLB=0 * * Compute Addressing constants to cover all * storage formats. Whether GE, SY, GB, or SB, * upper or lower triangle or both, * the (i,j)-th element is in * A( i - ISKEW*j + IOFFST, j ) * IF( IPACK.GT.4 ) THEN ILDA = LDA - 1 ISKEW = 1 IF( IPACK.GT.5 ) THEN IOFFST = UUB + 1 ELSE IOFFST = 1 END IF ELSE ILDA = LDA ISKEW = 0 IOFFST = 0 END IF * * IPACKG is the format that the matrix is generated in. If this is * different from IPACK, then the matrix must be repacked at the * end. It also signals how to compute the norm, for scaling. * IPACKG = 0 CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) * * Diagonal Matrix -- We are done, unless it * is to be stored SP/PP/TP (PACK='R' or 'C') * IF( LLB.EQ.0 .AND. UUB.EQ.0 ) THEN CALL DCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFST, 1 ), ILDA+1 ) IF( IPACK.LE.2 .OR. IPACK.GE.5 ) $ IPACKG = IPACK * ELSE IF( GIVENS ) THEN * * Check whether to use Givens rotations, * Householder transformations, or nothing. * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * IF( IPACK.GT.4 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF * CALL DCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFST, 1 ), ILDA+1 ) * IF( TOPDWN ) THEN JKL = 0 DO 50 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * Last row actually rotated is M * Last column actually rotated is MIN( M+JKU, N ) * DO 40 JR = 1, MIN( M+JKU, N ) + JKL - 1 EXTRA = ZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) ICOL = MAX( 1, JR-JKL ) IF( JR.LT.M ) THEN IL = MIN( N, JR+JKU ) + 1 - ICOL CALL DLAROT( .TRUE., JR.GT.JKL, .FALSE., IL, C, $ S, A( JR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IR = JR IC = ICOL DO 30 JCH = JR - JKL, 1, -JKL - JKU IF( IR.LT.M ) THEN CALL DLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, C, S, DUMMY ) END IF IROW = MAX( 1, JCH-JKU ) IL = IR + 2 - IROW TEMP = ZERO ILTEMP = JCH.GT.JKU CALL DLAROT( .FALSE., ILTEMP, .TRUE., IL, C, -S, $ A( IROW-ISKEW*IC+IOFFST, IC ), $ ILDA, TEMP, EXTRA ) IF( ILTEMP ) THEN CALL DLARTG( A( IROW+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), TEMP, C, S, DUMMY ) ICOL = MAX( 1, JCH-JKU-JKL ) IL = IC + 2 - ICOL EXTRA = ZERO CALL DLAROT( .TRUE., JCH.GT.JKU+JKL, .TRUE., $ IL, C, -S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ TEMP ) IC = ICOL IR = IROW END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE * JKU = UUB DO 80 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * DO 70 JC = 1, MIN( N+JKL, M ) + JKU - 1 EXTRA = ZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) IROW = MAX( 1, JC-JKU ) IF( JC.LT.N ) THEN IL = MIN( M, JC+JKL ) + 1 - IROW CALL DLAROT( .FALSE., JC.GT.JKU, .FALSE., IL, C, $ S, A( IROW-ISKEW*JC+IOFFST, JC ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IC = JC IR = IROW DO 60 JCH = JC - JKU, 1, -JKL - JKU IF( IC.LT.N ) THEN CALL DLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, C, S, DUMMY ) END IF ICOL = MAX( 1, JCH-JKL ) IL = IC + 2 - ICOL TEMP = ZERO ILTEMP = JCH.GT.JKL CALL DLAROT( .TRUE., ILTEMP, .TRUE., IL, C, -S, $ A( IR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, TEMP, EXTRA ) IF( ILTEMP ) THEN CALL DLARTG( A( IR+1-ISKEW*( ICOL+1 )+IOFFST, $ ICOL+1 ), TEMP, C, S, DUMMY ) IROW = MAX( 1, JCH-JKL-JKU ) IL = IR + 2 - IROW EXTRA = ZERO CALL DLAROT( .FALSE., JCH.GT.JKL+JKU, .TRUE., $ IL, C, -S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ TEMP ) IC = ICOL IR = IROW END IF 60 CONTINUE 70 CONTINUE 80 CONTINUE * ELSE * * Bottom-Up -- Start at the bottom right. * JKL = 0 DO 110 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * First row actually rotated is M * First column actually rotated is MIN( M+JKU, N ) * IENDCH = MIN( M, N+JKL ) - 1 DO 100 JC = MIN( M+JKU, N ) - 1, 1 - JKL, -1 EXTRA = ZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) IROW = MAX( 1, JC-JKU+1 ) IF( JC.GT.0 ) THEN IL = MIN( M, JC+JKL+1 ) + 1 - IROW CALL DLAROT( .FALSE., .FALSE., JC+JKL.LT.M, IL, $ C, S, A( IROW-ISKEW*JC+IOFFST, $ JC ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IC = JC DO 90 JCH = JC + JKL, IENDCH, JKL + JKU ILEXTR = IC.GT.0 IF( ILEXTR ) THEN CALL DLARTG( A( JCH-ISKEW*IC+IOFFST, IC ), $ EXTRA, C, S, DUMMY ) END IF IC = MAX( 1, IC ) ICOL = MIN( N-1, JCH+JKU ) ILTEMP = JCH + JKU.LT.N TEMP = ZERO CALL DLAROT( .TRUE., ILEXTR, ILTEMP, ICOL+2-IC, $ C, S, A( JCH-ISKEW*IC+IOFFST, IC ), $ ILDA, EXTRA, TEMP ) IF( ILTEMP ) THEN CALL DLARTG( A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), TEMP, C, S, DUMMY ) IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = ZERO CALL DLAROT( .FALSE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, TEMP, EXTRA ) IC = ICOL END IF 90 CONTINUE 100 CONTINUE 110 CONTINUE * JKU = UUB DO 140 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * * First row actually rotated is MIN( N+JKL, M ) * First column actually rotated is N * IENDCH = MIN( N, M+JKU ) - 1 DO 130 JR = MIN( N+JKL, M ) - 1, 1 - JKU, -1 EXTRA = ZERO ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) ICOL = MAX( 1, JR-JKL+1 ) IF( JR.GT.0 ) THEN IL = MIN( N, JR+JKU+1 ) + 1 - ICOL CALL DLAROT( .TRUE., .FALSE., JR+JKU.LT.N, IL, $ C, S, A( JR-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IR = JR DO 120 JCH = JR + JKU, IENDCH, JKL + JKU ILEXTR = IR.GT.0 IF( ILEXTR ) THEN CALL DLARTG( A( IR-ISKEW*JCH+IOFFST, JCH ), $ EXTRA, C, S, DUMMY ) END IF IR = MAX( 1, IR ) IROW = MIN( M-1, JCH+JKL ) ILTEMP = JCH + JKL.LT.M TEMP = ZERO CALL DLAROT( .FALSE., ILEXTR, ILTEMP, IROW+2-IR, $ C, S, A( IR-ISKEW*JCH+IOFFST, $ JCH ), ILDA, EXTRA, TEMP ) IF( ILTEMP ) THEN CALL DLARTG( A( IROW-ISKEW*JCH+IOFFST, JCH ), $ TEMP, C, S, DUMMY ) IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = ZERO CALL DLAROT( .TRUE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( IROW-ISKEW*JCH+IOFFST, JCH ), $ ILDA, TEMP, EXTRA ) IR = IROW END IF 120 CONTINUE 130 CONTINUE 140 CONTINUE END IF * ELSE * * Symmetric -- A = U D U' * IPACKG = IPACK IOFFG = IOFFST * IF( TOPDWN ) THEN * * Top-Down -- Generate Upper triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 6 IOFFG = UUB + 1 ELSE IPACKG = 1 END IF CALL DCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFG, 1 ), ILDA+1 ) * DO 170 K = 1, UUB DO 160 JC = 1, N - 1 IROW = MAX( 1, JC-K ) IL = MIN( JC+1, K+2 ) EXTRA = ZERO TEMP = A( JC-ISKEW*( JC+1 )+IOFFG, JC+1 ) ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) CALL DLAROT( .FALSE., JC.GT.K, .TRUE., IL, C, S, $ A( IROW-ISKEW*JC+IOFFG, JC ), ILDA, $ EXTRA, TEMP ) CALL DLAROT( .TRUE., .TRUE., .FALSE., $ MIN( K, N-JC )+1, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ TEMP, DUMMY ) * * Chase EXTRA back up the matrix * ICOL = JC DO 150 JCH = JC - K, 1, -K CALL DLARTG( A( JCH+1-ISKEW*( ICOL+1 )+IOFFG, $ ICOL+1 ), EXTRA, C, S, DUMMY ) TEMP = A( JCH-ISKEW*( JCH+1 )+IOFFG, JCH+1 ) CALL DLAROT( .TRUE., .TRUE., .TRUE., K+2, C, -S, $ A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, TEMP, EXTRA ) IROW = MAX( 1, JCH-K ) IL = MIN( JCH+1, K+2 ) EXTRA = ZERO CALL DLAROT( .FALSE., JCH.GT.K, .TRUE., IL, C, $ -S, A( IROW-ISKEW*JCH+IOFFG, JCH ), $ ILDA, EXTRA, TEMP ) ICOL = JCH 150 CONTINUE 160 CONTINUE 170 CONTINUE * * If we need lower triangle, copy from upper. Note that * the order of copying is chosen to work for 'q' -> 'b' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.3 ) THEN DO 190 JC = 1, N IROW = IOFFST - ISKEW*JC DO 180 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 180 CONTINUE 190 CONTINUE IF( IPACK.EQ.5 ) THEN DO 210 JC = N - UUB + 1, N DO 200 JR = N + 2 - JC, UUB + 1 A( JR, JC ) = ZERO 200 CONTINUE 210 CONTINUE END IF IF( IPACKG.EQ.6 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF ELSE * * Bottom-Up -- Generate Lower triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 5 IF( IPACK.EQ.6 ) $ IOFFG = 1 ELSE IPACKG = 2 END IF CALL DCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFG, 1 ), ILDA+1 ) * DO 240 K = 1, UUB DO 230 JC = N - 1, 1, -1 IL = MIN( N+1-JC, K+2 ) EXTRA = ZERO TEMP = A( 1+( 1-ISKEW )*JC+IOFFG, JC ) ANGLE = TWOPI*DLARND( 1, ISEED ) C = COS( ANGLE ) S = -SIN( ANGLE ) CALL DLAROT( .FALSE., .TRUE., N-JC.GT.K, IL, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ TEMP, EXTRA ) ICOL = MAX( 1, JC-K+1 ) CALL DLAROT( .TRUE., .FALSE., .TRUE., JC+2-ICOL, C, $ S, A( JC-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, DUMMY, TEMP ) * * Chase EXTRA back down the matrix * ICOL = JC DO 220 JCH = JC + K, N - 1, K CALL DLARTG( A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ EXTRA, C, S, DUMMY ) TEMP = A( 1+( 1-ISKEW )*JCH+IOFFG, JCH ) CALL DLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, EXTRA, TEMP ) IL = MIN( N+1-JCH, K+2 ) EXTRA = ZERO CALL DLAROT( .FALSE., .TRUE., N-JCH.GT.K, IL, C, $ S, A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, TEMP, EXTRA ) ICOL = JCH 220 CONTINUE 230 CONTINUE 240 CONTINUE * * If we need upper triangle, copy from lower. Note that * the order of copying is chosen to work for 'b' -> 'q' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.4 ) THEN DO 260 JC = N, 1, -1 IROW = IOFFST - ISKEW*JC DO 250 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 250 CONTINUE 260 CONTINUE IF( IPACK.EQ.6 ) THEN DO 280 JC = 1, UUB DO 270 JR = 1, UUB + 1 - JC A( JR, JC ) = ZERO 270 CONTINUE 280 CONTINUE END IF IF( IPACKG.EQ.5 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF END IF END IF * ELSE * * 4) Generate Banded Matrix by first * Rotating by random Unitary matrices, * then reducing the bandwidth using Householder * transformations. * * Note: we should get here only if LDA .ge. N * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * CALL DLAGGE( MR, NC, LLB, UUB, D, A, LDA, ISEED, WORK, $ IINFO ) ELSE * * Symmetric -- A = U D U' * CALL DLAGSY( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) * END IF IF( IINFO.NE.0 ) THEN INFO = 3 RETURN END IF END IF * * 5) Pack the matrix * IF( IPACK.NE.IPACKG ) THEN IF( IPACK.EQ.1 ) THEN * * 'U' -- Upper triangular, not packed * DO 300 J = 1, M DO 290 I = J + 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE * ELSE IF( IPACK.EQ.2 ) THEN * * 'L' -- Lower triangular, not packed * DO 320 J = 2, M DO 310 I = 1, J - 1 A( I, J ) = ZERO 310 CONTINUE 320 CONTINUE * ELSE IF( IPACK.EQ.3 ) THEN * * 'C' -- Upper triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 340 J = 1, M DO 330 I = 1, J IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 330 CONTINUE 340 CONTINUE * ELSE IF( IPACK.EQ.4 ) THEN * * 'R' -- Lower triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 360 J = 1, M DO 350 I = J, M IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 350 CONTINUE 360 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * 'B' -- The lower triangle is packed as a band matrix. * 'Q' -- The upper triangle is packed as a band matrix. * 'Z' -- The whole matrix is packed as a band matrix. * IF( IPACK.EQ.5 ) $ UUB = 0 IF( IPACK.EQ.6 ) $ LLB = 0 * DO 380 J = 1, UUB DO 370 I = MIN( J+LLB, M ), 1, -1 A( I-J+UUB+1, J ) = A( I, J ) 370 CONTINUE 380 CONTINUE * DO 400 J = UUB + 2, N DO 390 I = J - UUB, MIN( J+LLB, M ) A( I-J+UUB+1, J ) = A( I, J ) 390 CONTINUE 400 CONTINUE END IF * * If packed, zero out extraneous elements. * * Symmetric/Triangular Packed -- * zero out everything after A(IROW,ICOL) * IF( IPACK.EQ.3 .OR. IPACK.EQ.4 ) THEN DO 420 JC = ICOL, M DO 410 JR = IROW + 1, LDA A( JR, JC ) = ZERO 410 CONTINUE IROW = 0 420 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * Packed Band -- * 1st row is now in A( UUB+2-j, j), zero above it * m-th row is now in A( M+UUB-j,j), zero below it * last non-zero diagonal is now in A( UUB+LLB+1,j ), * zero below it, too. * IR1 = UUB + LLB + 2 IR2 = UUB + M + 2 DO 450 JC = 1, N DO 430 JR = 1, UUB + 1 - JC A( JR, JC ) = ZERO 430 CONTINUE DO 440 JR = MAX( 1, MIN( IR1, IR2-JC ) ), LDA A( JR, JC ) = ZERO 440 CONTINUE 450 CONTINUE END IF END IF * RETURN * * End of DLATMS * END scalapack-1.8.0/TOOLS/LAPACK/slaran.f0000640000175000017500000000641210604316307016603 0ustar muammarmuammar REAL FUNCTION SLARAN( ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * SLARAN returns a random real number from a uniform (0,1) * distribution. * * Arguments * ========= * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine uses a multiplicative congruential method with modulus * 2**48 and multiplier 33952834046453 (see G.S.Fishman, * 'Multiplicative congruential random number generators with modulus * 2**b: an exhaustive analysis for b = 32 and a partial analysis for * b = 48', Math. Comp. 189, pp 331-344, 1990). * * 48-bit integers are stored in 4 integer array elements with 12 bits * per element. Hence the routine is portable across machines with * integers of 32 bits or more. * * ===================================================================== * * .. Parameters .. INTEGER M1, M2, M3, M4 PARAMETER ( M1 = 494, M2 = 322, M3 = 2508, M4 = 2549 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) INTEGER IPW2 REAL R PARAMETER ( IPW2 = 4096, R = ONE / IPW2 ) * .. * .. Local Scalars .. INTEGER IT1, IT2, IT3, IT4 REAL RNDOUT * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL * .. * .. Executable Statements .. 10 CONTINUE * * multiply the seed by the multiplier modulo 2**48 * IT4 = ISEED( 4 )*M4 IT3 = IT4 / IPW2 IT4 = IT4 - IPW2*IT3 IT3 = IT3 + ISEED( 3 )*M4 + ISEED( 4 )*M3 IT2 = IT3 / IPW2 IT3 = IT3 - IPW2*IT2 IT2 = IT2 + ISEED( 2 )*M4 + ISEED( 3 )*M3 + ISEED( 4 )*M2 IT1 = IT2 / IPW2 IT2 = IT2 - IPW2*IT1 IT1 = IT1 + ISEED( 1 )*M4 + ISEED( 2 )*M3 + ISEED( 3 )*M2 + $ ISEED( 4 )*M1 IT1 = MOD( IT1, IPW2 ) * * return updated seed * ISEED( 1 ) = IT1 ISEED( 2 ) = IT2 ISEED( 3 ) = IT3 ISEED( 4 ) = IT4 * * convert 48-bit integer to a real number in the interval (0,1) * RNDOUT = R*( REAL( IT1 )+R*( REAL( IT2 )+R*( REAL( IT3 )+R* $ ( REAL( IT4 ) ) ) ) ) * IF (RNDOUT.EQ.1.0) THEN * If a real number has n bits of precision, and the first * n bits of the 48-bit integer above happen to be all 1 (which * will occur about once every 2**n calls), then SLARAN will * be rounded to exactly 1.0. In IEEE single precision arithmetic, * this will happen relatively often since n = 24. * Since SLARAN is not supposed to return exactly 0.0 or 1.0 * (and some callers of SLARAN, such as CLARND, depend on that), * the statistically correct thing to do in this situation is * simply to iterate again. * N.B. the case SLARAN = 0.0 should not be possible. * GOTO 10 END IF * SLARAN = RNDOUT RETURN * * End of SLARAN * END scalapack-1.8.0/TOOLS/LAPACK/icmax1.f0000640000175000017500000000426310363532303016505 0ustar muammarmuammar INTEGER FUNCTION ICMAX1( N, CX, INCX ) * * -- LAPACK auxiliary routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1999 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. COMPLEX CX( * ) * .. * * Purpose * ======= * * ICMAX1 finds the index of the element whose real part has maximum * absolute value. * * Based on ICAMAX from Level 1 BLAS. * The change is to use the 'genuine' absolute value. * * Contributed by Nick Higham for use with CLACON. * * Arguments * ========= * * N (input) INTEGER * The number of elements in the vector CX. * * CX (input) COMPLEX array, dimension (N) * The vector whose elements will be summed. * * INCX (input) INTEGER * The spacing between successive values of CX. INCX >= 1. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IX REAL SMAX COMPLEX ZDUM * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. * * NEXT LINE IS THE ONLY MODIFICATION. CABS1( ZDUM ) = ABS( ZDUM ) * .. * .. Executable Statements .. * ICMAX1 = 0 IF( N.LT.1 ) $ RETURN ICMAX1 = 1 IF( N.EQ.1 ) $ RETURN IF( INCX.EQ.1 ) $ GO TO 30 * * CODE FOR INCREMENT NOT EQUAL TO 1 * IX = 1 SMAX = CABS1( CX( 1 ) ) IX = IX + INCX DO 20 I = 2, N IF( CABS1( CX( IX ) ).LE.SMAX ) $ GO TO 10 ICMAX1 = I SMAX = CABS1( CX( IX ) ) 10 CONTINUE IX = IX + INCX 20 CONTINUE RETURN * * CODE FOR INCREMENT EQUAL TO 1 * 30 CONTINUE SMAX = CABS1( CX( 1 ) ) DO 40 I = 2, N IF( CABS1( CX( I ) ).LE.SMAX ) $ GO TO 40 ICMAX1 = I SMAX = CABS1( CX( I ) ) 40 CONTINUE RETURN * * End of ICMAX1 * END scalapack-1.8.0/TOOLS/LAPACK/zlarot.f0000640000175000017500000002364210604316307016642 0ustar muammarmuammar SUBROUTINE ZLAROT( LROWS, LLEFT, LRIGHT, NL, C, S, A, LDA, XLEFT, $ XRIGHT ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. LOGICAL LLEFT, LRIGHT, LROWS INTEGER LDA, NL COMPLEX*16 C, S, XLEFT, XRIGHT * .. * .. Array Arguments .. COMPLEX*16 A( * ) * .. * * Purpose * ======= * * ZLAROT applies a (Givens) rotation to two adjacent rows or * columns, where one element of the first and/or last column/row * November 2006 * for use on matrices stored in some format other than GE, so * that elements of the matrix may be used or modified for which * no array element is provided. * * One example is a symmetric matrix in SB format (bandwidth=4), for * which UPLO='L': Two adjacent rows will have the format: * * row j: * * * * * . . . . * row j+1: * * * * * . . . . * * '*' indicates elements for which storage is provided, * '.' indicates elements for which no storage is provided, but * are not necessarily zero; their values are determined by * symmetry. ' ' indicates elements which are necessarily zero, * and have no storage provided. * * Those columns which have two '*'s can be handled by DROT. * Those columns which have no '*'s can be ignored, since as long * as the Givens rotations are carefully applied to preserve * symmetry, their values are determined. * Those columns which have one '*' have to be handled separately, * by using separate variables "p" and "q": * * row j: * * * * * p . . . * row j+1: q * * * * * . . . . * * The element p would have to be set correctly, then that column * is rotated, setting p to its new value. The next call to * ZLAROT would rotate columns j and j+1, using p, and restore * symmetry. The element q would start out being zero, and be * made non-zero by the rotation. Later, rotations would presumably * be chosen to zero q out. * * Typical Calling Sequences: rotating the i-th and (i+1)-st rows. * ------- ------- --------- * * General dense matrix: * * CALL ZLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S, * A(i,1),LDA, DUMMY, DUMMY) * * General banded matrix in GB format: * * j = MAX(1, i-KL ) * NL = MIN( N, i+KU+1 ) + 1-j * CALL ZLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S, * A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,KL+1) ] * * Symmetric banded matrix in SY format, bandwidth K, * lower triangle only: * * j = MAX(1, i-K ) * NL = MIN( K+1, i ) + 1 * CALL ZLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S, * A(i,j), LDA, XLEFT, XRIGHT ) * * Same, but upper triangle only: * * NL = MIN( K+1, N-i ) + 1 * CALL ZLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S, * A(i,i), LDA, XLEFT, XRIGHT ) * * Symmetric banded matrix in SB format, bandwidth K, * lower triangle only: * * [ same as for SY, except:] * . . . . * A(i+1-j,j), LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,K+1) ] * * Same, but upper triangle only: * . . . * A(K+1,i), LDA-1, XLEFT, XRIGHT ) * * Rotating columns is just the transpose of rotating rows, except * for GB and SB: (rotating columns i and i+1) * * GB: * j = MAX(1, i-KU ) * NL = MIN( N, i+KL+1 ) + 1-j * CALL ZLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S, * A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * [note that KU+j+1-i is just MAX(1,KU+2-i)] * * SB: (upper triangle) * * . . . . . . * A(K+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * SB: (lower triangle) * * . . . . . . * A(1,i),LDA-1, XTOP, XBOTTM ) * * Arguments * ========= * * LROWS - LOGICAL * If .TRUE., then ZLAROT will rotate two rows. If .FALSE., * then it will rotate two columns. * Not modified. * * LLEFT - LOGICAL * If .TRUE., then XLEFT will be used instead of the * corresponding element of A for the first element in the * second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) * If .FALSE., then the corresponding element of A will be * used. * Not modified. * * LRIGHT - LOGICAL * If .TRUE., then XRIGHT will be used instead of the * corresponding element of A for the last element in the * first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If * .FALSE., then the corresponding element of A will be used. * Not modified. * * NL - INTEGER * The length of the rows (if LROWS=.TRUE.) or columns (if * LROWS=.FALSE.) to be rotated. If XLEFT and/or XRIGHT are * used, the columns/rows they are in should be included in * NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at * least 2. The number of rows/columns to be rotated * exclusive of those involving XLEFT and/or XRIGHT may * not be negative, i.e., NL minus how many of LLEFT and * LRIGHT are .TRUE. must be at least zero; if not, XERBLA * will be called. * Not modified. * * C, S - COMPLEX*16 * Specify the Givens rotation to be applied. If LROWS is * true, then the matrix ( c s ) * ( _ _ ) * (-s c ) is applied from the left; * if false, then the transpose (not conjugated) thereof is * applied from the right. Note that in contrast to the * output of ZROTG or to most versions of ZROT, both C and S * are complex. For a Givens rotation, |C|**2 + |S|**2 should * be 1, but this is not checked. * Not modified. * * A - COMPLEX*16 array. * The array containing the rows/columns to be rotated. The * first element of A should be the upper left element to * be rotated. * Read and modified. * * LDA - INTEGER * The "effective" leading dimension of A. If A contains * a matrix stored in GE, HE, or SY format, then this is just * the leading dimension of A as dimensioned in the calling * routine. If A contains a matrix stored in band (GB, HB, or * SB) format, then this should be *one less* than the leading * dimension used in the calling routine. Thus, if A were * dimensioned A(LDA,*) in ZLAROT, then A(1,j) would be the * j-th element in the first of the two rows to be rotated, * and A(2,j) would be the j-th in the second, regardless of * how the array may be stored in the calling routine. [A * cannot, however, actually be dimensioned thus, since for * band format, the row number may exceed LDA, which is not * legal FORTRAN.] * If LROWS=.TRUE., then LDA must be at least 1, otherwise * it must be at least NL minus the number of .TRUE. values * in XLEFT and XRIGHT. * Not modified. * * XLEFT - COMPLEX*16 * If LLEFT is .TRUE., then XLEFT will be used and modified * instead of A(2,1) (if LROWS=.TRUE.) or A(1,2) * (if LROWS=.FALSE.). * Read and modified. * * XRIGHT - COMPLEX*16 * If LRIGHT is .TRUE., then XRIGHT will be used and modified * instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1) * (if LROWS=.FALSE.). * Read and modified. * * ===================================================================== * * .. Local Scalars .. INTEGER IINC, INEXT, IX, IY, IYT, J, NT COMPLEX*16 TEMPX * .. * .. Local Arrays .. COMPLEX*16 XT( 2 ), YT( 2 ) * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DCONJG * .. * .. Executable Statements .. * * Set up indices, arrays for ends * IF( LROWS ) THEN IINC = LDA INEXT = 1 ELSE IINC = 1 INEXT = LDA END IF * IF( LLEFT ) THEN NT = 1 IX = 1 + IINC IY = 2 + LDA XT( 1 ) = A( 1 ) YT( 1 ) = XLEFT ELSE NT = 0 IX = 1 IY = 1 + INEXT END IF * IF( LRIGHT ) THEN IYT = 1 + INEXT + ( NL-1 )*IINC NT = NT + 1 XT( NT ) = XRIGHT YT( NT ) = A( IYT ) END IF * * Check for errors * IF( NL.LT.NT ) THEN CALL XERBLA( 'ZLAROT', 4 ) RETURN END IF IF( LDA.LE.0 .OR. ( .NOT.LROWS .AND. LDA.LT.NL-NT ) ) THEN CALL XERBLA( 'ZLAROT', 8 ) RETURN END IF * * Rotate * * ZROT( NL-NT, A(IX),IINC, A(IY),IINC, C, S ) with complex C, S * DO 10 J = 0, NL - NT - 1 TEMPX = C*A( IX+J*IINC ) + S*A( IY+J*IINC ) A( IY+J*IINC ) = -DCONJG( S )*A( IX+J*IINC ) + $ DCONJG( C )*A( IY+J*IINC ) A( IX+J*IINC ) = TEMPX 10 CONTINUE * * ZROT( NT, XT,1, YT,1, C, S ) with complex C, S * DO 20 J = 1, NT TEMPX = C*XT( J ) + S*YT( J ) YT( J ) = -DCONJG( S )*XT( J ) + DCONJG( C )*YT( J ) XT( J ) = TEMPX 20 CONTINUE * * Stuff values back into XLEFT, XRIGHT, etc. * IF( LLEFT ) THEN A( 1 ) = XT( 1 ) XLEFT = YT( 1 ) END IF * IF( LRIGHT ) THEN XRIGHT = XT( NT ) A( IYT ) = YT( NT ) END IF * RETURN * * End of ZLAROT * END scalapack-1.8.0/TOOLS/LAPACK/clatm1.f0000640000175000017500000001575010604316307016511 0ustar muammarmuammar SUBROUTINE CLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, N, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST, INFO, IRSIGN, MODE, N REAL COND * .. * .. Array Arguments .. INTEGER ISEED( 4 ) COMPLEX D( * ) * .. * * Purpose * ======= * * CLATM1 computes the entries of D(1..N) as specified by * MODE, COND and IRSIGN. IDIST and ISEED determine the generation * of random numbers. CLATM1 is called by CLATMR to generate * random test matrices for LAPACK programs. * * Arguments * ========= * * MODE - INTEGER * On entry describes how D is to be computed: * MODE = 0 means do not change D. * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * Not modified. * * COND - REAL * On entry, used as described under MODE above. * If used, it must be >= 1. Not modified. * * IRSIGN - INTEGER * On entry, if MODE neither -6, 0 nor 6, determines sign of * entries of D * 0 => leave entries of D unchanged * 1 => multiply each entry of D by random complex number * uniformly distributed with absolute value 1 * * IDIST - CHARACTER*1 * On entry, IDIST specifies the type of distribution to be * used to generate a random matrix . * 1 => real and imaginary parts each UNIFORM( 0, 1 ) * 2 => real and imaginary parts each UNIFORM( -1, 1 ) * 3 => real and imaginary parts each NORMAL( 0, 1 ) * 4 => complex number uniform in DISK( 0, 1 ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. The random number generator uses a * linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to CLATM1 * to continue the same random number sequence. * Changed on exit. * * D - COMPLEX array, dimension ( MIN( M , N ) ) * Array to be computed according to MODE, COND and IRSIGN. * May be changed on exit if MODE is nonzero. * * N - INTEGER * Number of entries of D. Not modified. * * INFO - INTEGER * 0 => normal termination * -1 => if MODE not in range -6 to 6 * -2 => if MODE neither -6, 0 nor 6, and * IRSIGN neither 0 nor 1 * -3 => if MODE neither -6, 0 nor 6 and COND less than 1 * -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 4 * -7 => if N negative * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER I REAL ALPHA, TEMP COMPLEX CTEMP * .. * .. External Functions .. REAL SLARAN COMPLEX CLARND EXTERNAL SLARAN, CLARND * .. * .. External Subroutines .. EXTERNAL CLARNV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, EXP, LOG, REAL * .. * .. Executable Statements .. * * Decode and Test the input parameters. Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Set INFO if an error * IF( MODE.LT.-6 .OR. MODE.GT.6 ) THEN INFO = -1 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ ( IRSIGN.NE.0 .AND. IRSIGN.NE.1 ) ) THEN INFO = -2 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ COND.LT.ONE ) THEN INFO = -3 ELSE IF( ( MODE.EQ.6 .OR. MODE.EQ.-6 ) .AND. $ ( IDIST.LT.1 .OR. IDIST.GT.4 ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -7 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'CLATM1', -INFO ) RETURN END IF * * Compute D according to COND and MODE * IF( MODE.NE.0 ) THEN GO TO ( 10, 30, 50, 70, 90, 110 )ABS( MODE ) * * One large D value: * 10 CONTINUE DO 20 I = 1, N D( I ) = ONE / COND 20 CONTINUE D( 1 ) = ONE GO TO 120 * * One small D value: * 30 CONTINUE DO 40 I = 1, N D( I ) = ONE 40 CONTINUE D( N ) = ONE / COND GO TO 120 * * Exponentially distributed D values: * 50 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN ALPHA = COND**( -ONE / REAL( N-1 ) ) DO 60 I = 2, N D( I ) = ALPHA**( I-1 ) 60 CONTINUE END IF GO TO 120 * * Arithmetically distributed D values: * 70 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN TEMP = ONE / COND ALPHA = ( ONE-TEMP ) / REAL( N-1 ) DO 80 I = 2, N D( I ) = REAL( N-I )*ALPHA + TEMP 80 CONTINUE END IF GO TO 120 * * Randomly distributed D values on ( 1/COND , 1): * 90 CONTINUE ALPHA = LOG( ONE / COND ) DO 100 I = 1, N D( I ) = EXP( ALPHA*SLARAN( ISEED ) ) 100 CONTINUE GO TO 120 * * Randomly distributed D values from IDIST * 110 CONTINUE CALL CLARNV( IDIST, ISEED, N, D ) * 120 CONTINUE * * If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign * random signs to D * IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ IRSIGN.EQ.1 ) THEN DO 130 I = 1, N CTEMP = CLARND( 3, ISEED ) D( I ) = D( I )*( CTEMP / ABS( CTEMP ) ) 130 CONTINUE END IF * * Reverse if MODE < 0 * IF( MODE.LT.0 ) THEN DO 140 I = 1, N / 2 CTEMP = D( I ) D( I ) = D( N+1-I ) D( N+1-I ) = CTEMP 140 CONTINUE END IF * END IF * RETURN * * End of CLATM1 * END scalapack-1.8.0/TOOLS/LAPACK/slarot.f0000640000175000017500000002226410604316307016632 0ustar muammarmuammar SUBROUTINE SLAROT( LROWS, LLEFT, LRIGHT, NL, C, S, A, LDA, XLEFT, $ XRIGHT ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. LOGICAL LLEFT, LRIGHT, LROWS INTEGER LDA, NL REAL C, S, XLEFT, XRIGHT * .. * .. Array Arguments .. REAL A( * ) * .. * * Purpose * ======= * * SLAROT applies a (Givens) rotation to two adjacent rows or * columns, where one element of the first and/or last column/row * November 2006 * for use on matrices stored in some format other than GE, so * that elements of the matrix may be used or modified for which * no array element is provided. * * One example is a symmetric matrix in SB format (bandwidth=4), for * which UPLO='L': Two adjacent rows will have the format: * * row j: * * * * * . . . . * row j+1: * * * * * . . . . * * '*' indicates elements for which storage is provided, * '.' indicates elements for which no storage is provided, but * are not necessarily zero; their values are determined by * symmetry. ' ' indicates elements which are necessarily zero, * and have no storage provided. * * Those columns which have two '*'s can be handled by SROT. * Those columns which have no '*'s can be ignored, since as long * as the Givens rotations are carefully applied to preserve * symmetry, their values are determined. * Those columns which have one '*' have to be handled separately, * by using separate variables "p" and "q": * * row j: * * * * * p . . . * row j+1: q * * * * * . . . . * * The element p would have to be set correctly, then that column * is rotated, setting p to its new value. The next call to * SLAROT would rotate columns j and j+1, using p, and restore * symmetry. The element q would start out being zero, and be * made non-zero by the rotation. Later, rotations would presumably * be chosen to zero q out. * * Typical Calling Sequences: rotating the i-th and (i+1)-st rows. * ------- ------- --------- * * General dense matrix: * * CALL SLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S, * A(i,1),LDA, DUMMY, DUMMY) * * General banded matrix in GB format: * * j = MAX(1, i-KL ) * NL = MIN( N, i+KU+1 ) + 1-j * CALL SLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S, * A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,KL+1) ] * * Symmetric banded matrix in SY format, bandwidth K, * lower triangle only: * * j = MAX(1, i-K ) * NL = MIN( K+1, i ) + 1 * CALL SLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S, * A(i,j), LDA, XLEFT, XRIGHT ) * * Same, but upper triangle only: * * NL = MIN( K+1, N-i ) + 1 * CALL SLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S, * A(i,i), LDA, XLEFT, XRIGHT ) * * Symmetric banded matrix in SB format, bandwidth K, * lower triangle only: * * [ same as for SY, except:] * . . . . * A(i+1-j,j), LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,K+1) ] * * Same, but upper triangle only: * . . . * A(K+1,i), LDA-1, XLEFT, XRIGHT ) * * Rotating columns is just the transpose of rotating rows, except * for GB and SB: (rotating columns i and i+1) * * GB: * j = MAX(1, i-KU ) * NL = MIN( N, i+KL+1 ) + 1-j * CALL SLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S, * A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * [note that KU+j+1-i is just MAX(1,KU+2-i)] * * SB: (upper triangle) * * . . . . . . * A(K+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * SB: (lower triangle) * * . . . . . . * A(1,i),LDA-1, XTOP, XBOTTM ) * * Arguments * ========= * * LROWS - LOGICAL * If .TRUE., then SLAROT will rotate two rows. If .FALSE., * then it will rotate two columns. * Not modified. * * LLEFT - LOGICAL * If .TRUE., then XLEFT will be used instead of the * corresponding element of A for the first element in the * second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) * If .FALSE., then the corresponding element of A will be * used. * Not modified. * * LRIGHT - LOGICAL * If .TRUE., then XRIGHT will be used instead of the * corresponding element of A for the last element in the * first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If * .FALSE., then the corresponding element of A will be used. * Not modified. * * NL - INTEGER * The length of the rows (if LROWS=.TRUE.) or columns (if * LROWS=.FALSE.) to be rotated. If XLEFT and/or XRIGHT are * used, the columns/rows they are in should be included in * NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at * least 2. The number of rows/columns to be rotated * exclusive of those involving XLEFT and/or XRIGHT may * not be negative, i.e., NL minus how many of LLEFT and * LRIGHT are .TRUE. must be at least zero; if not, XERBLA * will be called. * Not modified. * * C, S - REAL * Specify the Givens rotation to be applied. If LROWS is * true, then the matrix ( c s ) * (-s c ) is applied from the left; * if false, then the transpose thereof is applied from the * right. For a Givens rotation, C**2 + S**2 should be 1, * but this is not checked. * Not modified. * * A - REAL array. * The array containing the rows/columns to be rotated. The * first element of A should be the upper left element to * be rotated. * Read and modified. * * LDA - INTEGER * The "effective" leading dimension of A. If A contains * a matrix stored in GE or SY format, then this is just * the leading dimension of A as dimensioned in the calling * routine. If A contains a matrix stored in band (GB or SB) * format, then this should be *one less* than the leading * dimension used in the calling routine. Thus, if * A were dimensioned A(LDA,*) in SLAROT, then A(1,j) would * be the j-th element in the first of the two rows * to be rotated, and A(2,j) would be the j-th in the second, * regardless of how the array may be stored in the calling * routine. [A cannot, however, actually be dimensioned thus, * since for band format, the row number may exceed LDA, which * is not legal FORTRAN.] * If LROWS=.TRUE., then LDA must be at least 1, otherwise * it must be at least NL minus the number of .TRUE. values * in XLEFT and XRIGHT. * Not modified. * * XLEFT - REAL * If LLEFT is .TRUE., then XLEFT will be used and modified * instead of A(2,1) (if LROWS=.TRUE.) or A(1,2) * (if LROWS=.FALSE.). * Read and modified. * * XRIGHT - REAL * If LRIGHT is .TRUE., then XRIGHT will be used and modified * instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1) * (if LROWS=.FALSE.). * Read and modified. * * ===================================================================== * * .. Local Scalars .. INTEGER IINC, INEXT, IX, IY, IYT, NT * .. * .. Local Arrays .. REAL XT( 2 ), YT( 2 ) * .. * .. External Subroutines .. EXTERNAL SROT, XERBLA * .. * .. Executable Statements .. * * Set up indices, arrays for ends * IF( LROWS ) THEN IINC = LDA INEXT = 1 ELSE IINC = 1 INEXT = LDA END IF * IF( LLEFT ) THEN NT = 1 IX = 1 + IINC IY = 2 + LDA XT( 1 ) = A( 1 ) YT( 1 ) = XLEFT ELSE NT = 0 IX = 1 IY = 1 + INEXT END IF * IF( LRIGHT ) THEN IYT = 1 + INEXT + ( NL-1 )*IINC NT = NT + 1 XT( NT ) = XRIGHT YT( NT ) = A( IYT ) END IF * * Check for errors * IF( NL.LT.NT ) THEN CALL XERBLA( 'SLAROT', 4 ) RETURN END IF IF( LDA.LE.0 .OR. ( .NOT.LROWS .AND. LDA.LT.NL-NT ) ) THEN CALL XERBLA( 'SLAROT', 8 ) RETURN END IF * * Rotate * CALL SROT( NL-NT, A( IX ), IINC, A( IY ), IINC, C, S ) CALL SROT( NT, XT, 1, YT, 1, C, S ) * * Stuff values back into XLEFT, XRIGHT, etc. * IF( LLEFT ) THEN A( 1 ) = XT( 1 ) XLEFT = YT( 1 ) END IF * IF( LRIGHT ) THEN XRIGHT = XT( NT ) A( IYT ) = YT( NT ) END IF * RETURN * * End of SLAROT * END scalapack-1.8.0/TOOLS/LAPACK/claghe.f0000640000175000017500000001347210604316307016552 0ustar muammarmuammar SUBROUTINE CLAGHE( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL D( * ) COMPLEX A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * CLAGHE generates a complex hermitian matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: * A = U*D*U'. The semi-bandwidth may then be reduced to k by additional * unitary transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) REAL array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX array, dimension (LDA,N) * The generated n by n hermitian matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE, HALF PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ), $ HALF = ( 0.5E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J REAL WN COMPLEX ALPHA, TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL CAXPY, CGEMV, CGERC, CHEMV, CHER2, CLARNV, $ CSCAL, XERBLA * .. * .. External Functions .. REAL SCNRM2 COMPLEX CDOTC EXTERNAL SCNRM2, CDOTC * .. * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX, REAL * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'CLAGHE', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of hermitian matrix * DO 40 I = N - 1, 1, -1 * * generate random reflection * CALL CLARNV( 3, ISEED, N-I+1, WORK ) WN = SCNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL CSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = REAL( WB / WA ) END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * u * CALL CHEMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*CDOTC( N-I+1, WORK( N+1 ), 1, WORK, 1 ) CALL CAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * CALL CHER2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, $ A( I, I ), LDA ) 40 CONTINUE * * Reduce number of subdiagonals to K * DO 60 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = SCNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = ( WN / ABS( A( K+I, I ) ) )*A( K+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL CSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = REAL( WB / WA ) END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL CGEMV( 'Conjugate transpose', N-K-I+1, K-1, ONE, $ A( K+I, I+1 ), LDA, A( K+I, I ), 1, ZERO, WORK, 1 ) CALL CGERC( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * u * CALL CHEMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*CDOTC( N-K-I+1, WORK, 1, A( K+I, I ), 1 ) CALL CAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply hermitian rank-2 update to A(k+i:n,k+i:n) * CALL CHER2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, $ A( K+I, K+I ), LDA ) * A( K+I, I ) = -WA DO 50 J = K + I + 1, N A( J, I ) = ZERO 50 CONTINUE 60 CONTINUE * * Store full hermitian matrix * DO 80 J = 1, N DO 70 I = J + 1, N A( J, I ) = CONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE RETURN * * End of CLAGHE * END scalapack-1.8.0/TOOLS/LAPACK/dlagge.f0000640000175000017500000002165410604316307016553 0ustar muammarmuammar SUBROUTINE DLAGGE( M, N, KL, KU, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDA, M, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * DLAGGE generates a real general m by n matrix A, by pre- and post- * multiplying a real diagonal matrix D with random orthogonal matrices: * A = U*D*V. The lower and upper bandwidths may then be reduced to * kl and ku by additional orthogonal transformations. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= KL <= M-1. * * KU (input) INTEGER * The number of nonzero superdiagonals within the band of A. * 0 <= KU <= N-1. * * D (input) DOUBLE PRECISION array, dimension (min(M,N)) * The diagonal elements of the diagonal matrix D. * * A (output) DOUBLE PRECISION array, dimension (LDA,N) * The generated m by n matrix A. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= M. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) DOUBLE PRECISION array, dimension (M+N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J DOUBLE PRECISION TAU, WA, WB, WN * .. * .. External Subroutines .. EXTERNAL DGEMV, DGER, DLARNV, DSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SIGN * .. * .. External Functions .. DOUBLE PRECISION DNRM2 EXTERNAL DNRM2 * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN INFO = -3 ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN INFO = -4 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -7 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'DLAGGE', -INFO ) RETURN END IF * * initialize A to diagonal matrix * DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, MIN( M, N ) A( I, I ) = D( I ) 30 CONTINUE * * pre- and post-multiply A by random orthogonal matrices * DO 40 I = MIN( M, N ), 1, -1 IF( I.LT.M ) THEN * * generate random reflection * CALL DLARNV( 3, ISEED, M-I+1, WORK ) WN = DNRM2( M-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL DSCAL( M-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * multiply A(i:m,i:n) by random reflection from the left * CALL DGEMV( 'Transpose', M-I+1, N-I+1, ONE, A( I, I ), LDA, $ WORK, 1, ZERO, WORK( M+1 ), 1 ) CALL DGER( M-I+1, N-I+1, -TAU, WORK, 1, WORK( M+1 ), 1, $ A( I, I ), LDA ) END IF IF( I.LT.N ) THEN * * generate random reflection * CALL DLARNV( 3, ISEED, N-I+1, WORK ) WN = DNRM2( N-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL DSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * multiply A(i:m,i:n) by random reflection from the right * CALL DGEMV( 'No transpose', M-I+1, N-I+1, ONE, A( I, I ), $ LDA, WORK, 1, ZERO, WORK( N+1 ), 1 ) CALL DGER( M-I+1, N-I+1, -TAU, WORK( N+1 ), 1, WORK, 1, $ A( I, I ), LDA ) END IF 40 CONTINUE * * Reduce number of subdiagonals to KL and number of superdiagonals * to KU * DO 70 I = 1, MAX( M-1-KL, N-1-KU ) IF( KL.LE.KU ) THEN * * annihilate subdiagonal elements first (necessary if KL = 0) * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = DNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = SIGN( WN, A( KL+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL DSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL DGEMV( 'Transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL DGER( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, 1, $ A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = DNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = SIGN( WN, A( I, KU+I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL DSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = WB / WA END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL DGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL DGER( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF ELSE * * annihilate superdiagonal elements first (necessary if * KU = 0) * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = DNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = SIGN( WN, A( I, KU+I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL DSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = WB / WA END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL DGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL DGER( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = DNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = SIGN( WN, A( KL+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL DSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL DGEMV( 'Transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL DGER( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, 1, $ A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF END IF * DO 50 J = KL + I + 1, M A( J, I ) = ZERO 50 CONTINUE * DO 60 J = KU + I + 1, N A( I, J ) = ZERO 60 CONTINUE 70 CONTINUE RETURN * * End of DLAGGE * END scalapack-1.8.0/TOOLS/LAPACK/zlagge.f0000640000175000017500000002250210604316307016572 0ustar muammarmuammar SUBROUTINE ZLAGGE( M, N, KL, KU, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDA, M, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * ZLAGGE generates a complex general m by n matrix A, by pre- and post- * multiplying a real diagonal matrix D with random unitary matrices: * A = U*D*V. The lower and upper bandwidths may then be reduced to * kl and ku by additional unitary transformations. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= KL <= M-1. * * KU (input) INTEGER * The number of nonzero superdiagonals within the band of A. * 0 <= KU <= N-1. * * D (input) DOUBLE PRECISION array, dimension (min(M,N)) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX*16 array, dimension (LDA,N) * The generated m by n matrix A. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= M. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX*16 array, dimension (M+N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J DOUBLE PRECISION WN COMPLEX*16 TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL XERBLA, ZGEMV, ZGERC, ZLACGV, ZLARNV, ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 EXTERNAL DZNRM2 * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN INFO = -3 ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN INFO = -4 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -7 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'ZLAGGE', -INFO ) RETURN END IF * * initialize A to diagonal matrix * DO 20 J = 1, N DO 10 I = 1, M A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, MIN( M, N ) A( I, I ) = D( I ) 30 CONTINUE * * pre- and post-multiply A by random unitary matrices * DO 40 I = MIN( M, N ), 1, -1 IF( I.LT.M ) THEN * * generate random reflection * CALL ZLARNV( 3, ISEED, M-I+1, WORK ) WN = DZNRM2( M-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL ZSCAL( M-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = DBLE( WB / WA ) END IF * * multiply A(i:m,i:n) by random reflection from the left * CALL ZGEMV( 'Conjugate transpose', M-I+1, N-I+1, ONE, $ A( I, I ), LDA, WORK, 1, ZERO, WORK( M+1 ), 1 ) CALL ZGERC( M-I+1, N-I+1, -TAU, WORK, 1, WORK( M+1 ), 1, $ A( I, I ), LDA ) END IF IF( I.LT.N ) THEN * * generate random reflection * CALL ZLARNV( 3, ISEED, N-I+1, WORK ) WN = DZNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL ZSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = DBLE( WB / WA ) END IF * * multiply A(i:m,i:n) by random reflection from the right * CALL ZGEMV( 'No transpose', M-I+1, N-I+1, ONE, A( I, I ), $ LDA, WORK, 1, ZERO, WORK( N+1 ), 1 ) CALL ZGERC( M-I+1, N-I+1, -TAU, WORK( N+1 ), 1, WORK, 1, $ A( I, I ), LDA ) END IF 40 CONTINUE * * Reduce number of subdiagonals to KL and number of superdiagonals * to KU * DO 70 I = 1, MAX( M-1-KL, N-1-KU ) IF( KL.LE.KU ) THEN * * annihilate subdiagonal elements first (necessary if KL = 0) * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = DZNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = ( WN / ABS( A( KL+I, I ) ) )*A( KL+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL ZSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL ZGEMV( 'Conjugate transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL ZGERC( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, $ 1, A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = DZNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = ( WN / ABS( A( I, KU+I ) ) )*A( I, KU+I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL ZSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL ZLACGV( N-KU-I+1, A( I, KU+I ), LDA ) CALL ZGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL ZGERC( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF ELSE * * annihilate superdiagonal elements first (necessary if * KU = 0) * IF( I.LE.MIN( N-1-KU, M ) ) THEN * * generate reflection to annihilate A(i,ku+i+1:n) * WN = DZNRM2( N-KU-I+1, A( I, KU+I ), LDA ) WA = ( WN / ABS( A( I, KU+I ) ) )*A( I, KU+I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( I, KU+I ) + WA CALL ZSCAL( N-KU-I, ONE / WB, A( I, KU+I+1 ), LDA ) A( I, KU+I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(i+1:m,ku+i:n) from the right * CALL ZLACGV( N-KU-I+1, A( I, KU+I ), LDA ) CALL ZGEMV( 'No transpose', M-I, N-KU-I+1, ONE, $ A( I+1, KU+I ), LDA, A( I, KU+I ), LDA, ZERO, $ WORK, 1 ) CALL ZGERC( M-I, N-KU-I+1, -TAU, WORK, 1, A( I, KU+I ), $ LDA, A( I+1, KU+I ), LDA ) A( I, KU+I ) = -WA END IF * IF( I.LE.MIN( M-1-KL, N ) ) THEN * * generate reflection to annihilate A(kl+i+1:m,i) * WN = DZNRM2( M-KL-I+1, A( KL+I, I ), 1 ) WA = ( WN / ABS( A( KL+I, I ) ) )*A( KL+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( KL+I, I ) + WA CALL ZSCAL( M-KL-I, ONE / WB, A( KL+I+1, I ), 1 ) A( KL+I, I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(kl+i:m,i+1:n) from the left * CALL ZGEMV( 'Conjugate transpose', M-KL-I+1, N-I, ONE, $ A( KL+I, I+1 ), LDA, A( KL+I, I ), 1, ZERO, $ WORK, 1 ) CALL ZGERC( M-KL-I+1, N-I, -TAU, A( KL+I, I ), 1, WORK, $ 1, A( KL+I, I+1 ), LDA ) A( KL+I, I ) = -WA END IF END IF * DO 50 J = KL + I + 1, M A( J, I ) = ZERO 50 CONTINUE * DO 60 J = KU + I + 1, N A( I, J ) = ZERO 60 CONTINUE 70 CONTINUE RETURN * * End of ZLAGGE * END scalapack-1.8.0/TOOLS/LAPACK/zlaghe.f0000640000175000017500000001351610604316307016600 0ustar muammarmuammar SUBROUTINE ZLAGHE( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION D( * ) COMPLEX*16 A( LDA, * ), WORK( * ) * .. * * Purpose * ======= * * ZLAGHE generates a complex hermitian matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: * A = U*D*U'. The semi-bandwidth may then be reduced to k by additional * unitary transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) DOUBLE PRECISION array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) COMPLEX*16 array, dimension (LDA,N) * The generated n by n hermitian matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) COMPLEX*16 array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE, HALF PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ), $ HALF = ( 0.5D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J DOUBLE PRECISION WN COMPLEX*16 ALPHA, TAU, WA, WB * .. * .. External Subroutines .. EXTERNAL XERBLA, ZAXPY, ZGEMV, ZGERC, ZHEMV, ZHER2, $ ZLARNV, ZSCAL * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 COMPLEX*16 ZDOTC EXTERNAL DZNRM2, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, MAX * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'ZLAGHE', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of hermitian matrix * DO 40 I = N - 1, 1, -1 * * generate random reflection * CALL ZLARNV( 3, ISEED, N-I+1, WORK ) WN = DZNRM2( N-I+1, WORK, 1 ) WA = ( WN / ABS( WORK( 1 ) ) )*WORK( 1 ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL ZSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = DBLE( WB / WA ) END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * u * CALL ZHEMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*ZDOTC( N-I+1, WORK( N+1 ), 1, WORK, 1 ) CALL ZAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * CALL ZHER2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, $ A( I, I ), LDA ) 40 CONTINUE * * Reduce number of subdiagonals to K * DO 60 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = DZNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = ( WN / ABS( A( K+I, I ) ) )*A( K+I, I ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL ZSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = DBLE( WB / WA ) END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL ZGEMV( 'Conjugate transpose', N-K-I+1, K-1, ONE, $ A( K+I, I+1 ), LDA, A( K+I, I ), 1, ZERO, WORK, 1 ) CALL ZGERC( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * u * CALL ZHEMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*ZDOTC( N-K-I+1, WORK, 1, A( K+I, I ), 1 ) CALL ZAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply hermitian rank-2 update to A(k+i:n,k+i:n) * CALL ZHER2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, $ A( K+I, K+I ), LDA ) * A( K+I, I ) = -WA DO 50 J = K + I + 1, N A( J, I ) = ZERO 50 CONTINUE 60 CONTINUE * * Store full hermitian matrix * DO 80 J = 1, N DO 70 I = J + 1, N A( J, I ) = DCONJG( A( I, J ) ) 70 CONTINUE 80 CONTINUE RETURN * * End of ZLAGHE * END scalapack-1.8.0/TOOLS/LAPACK/dlarot.f0000640000175000017500000002234410604316307016612 0ustar muammarmuammar SUBROUTINE DLAROT( LROWS, LLEFT, LRIGHT, NL, C, S, A, LDA, XLEFT, $ XRIGHT ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. LOGICAL LLEFT, LRIGHT, LROWS INTEGER LDA, NL DOUBLE PRECISION C, S, XLEFT, XRIGHT * .. * .. Array Arguments .. DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * DLAROT applies a (Givens) rotation to two adjacent rows or * columns, where one element of the first and/or last column/row * November 2006 * for use on matrices stored in some format other than GE, so * that elements of the matrix may be used or modified for which * no array element is provided. * * One example is a symmetric matrix in SB format (bandwidth=4), for * which UPLO='L': Two adjacent rows will have the format: * * row j: * * * * * . . . . * row j+1: * * * * * . . . . * * '*' indicates elements for which storage is provided, * '.' indicates elements for which no storage is provided, but * are not necessarily zero; their values are determined by * symmetry. ' ' indicates elements which are necessarily zero, * and have no storage provided. * * Those columns which have two '*'s can be handled by DROT. * Those columns which have no '*'s can be ignored, since as long * as the Givens rotations are carefully applied to preserve * symmetry, their values are determined. * Those columns which have one '*' have to be handled separately, * by using separate variables "p" and "q": * * row j: * * * * * p . . . * row j+1: q * * * * * . . . . * * The element p would have to be set correctly, then that column * is rotated, setting p to its new value. The next call to * DLAROT would rotate columns j and j+1, using p, and restore * symmetry. The element q would start out being zero, and be * made non-zero by the rotation. Later, rotations would presumably * be chosen to zero q out. * * Typical Calling Sequences: rotating the i-th and (i+1)-st rows. * ------- ------- --------- * * General dense matrix: * * CALL DLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S, * A(i,1),LDA, DUMMY, DUMMY) * * General banded matrix in GB format: * * j = MAX(1, i-KL ) * NL = MIN( N, i+KU+1 ) + 1-j * CALL DLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S, * A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,KL+1) ] * * Symmetric banded matrix in SY format, bandwidth K, * lower triangle only: * * j = MAX(1, i-K ) * NL = MIN( K+1, i ) + 1 * CALL DLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S, * A(i,j), LDA, XLEFT, XRIGHT ) * * Same, but upper triangle only: * * NL = MIN( K+1, N-i ) + 1 * CALL DLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S, * A(i,i), LDA, XLEFT, XRIGHT ) * * Symmetric banded matrix in SB format, bandwidth K, * lower triangle only: * * [ same as for SY, except:] * . . . . * A(i+1-j,j), LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,K+1) ] * * Same, but upper triangle only: * . . . * A(K+1,i), LDA-1, XLEFT, XRIGHT ) * * Rotating columns is just the transpose of rotating rows, except * for GB and SB: (rotating columns i and i+1) * * GB: * j = MAX(1, i-KU ) * NL = MIN( N, i+KL+1 ) + 1-j * CALL DLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S, * A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * [note that KU+j+1-i is just MAX(1,KU+2-i)] * * SB: (upper triangle) * * . . . . . . * A(K+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * SB: (lower triangle) * * . . . . . . * A(1,i),LDA-1, XTOP, XBOTTM ) * * Arguments * ========= * * LROWS - LOGICAL * If .TRUE., then DLAROT will rotate two rows. If .FALSE., * then it will rotate two columns. * Not modified. * * LLEFT - LOGICAL * If .TRUE., then XLEFT will be used instead of the * corresponding element of A for the first element in the * second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) * If .FALSE., then the corresponding element of A will be * used. * Not modified. * * LRIGHT - LOGICAL * If .TRUE., then XRIGHT will be used instead of the * corresponding element of A for the last element in the * first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If * .FALSE., then the corresponding element of A will be used. * Not modified. * * NL - INTEGER * The length of the rows (if LROWS=.TRUE.) or columns (if * LROWS=.FALSE.) to be rotated. If XLEFT and/or XRIGHT are * used, the columns/rows they are in should be included in * NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at * least 2. The number of rows/columns to be rotated * exclusive of those involving XLEFT and/or XRIGHT may * not be negative, i.e., NL minus how many of LLEFT and * LRIGHT are .TRUE. must be at least zero; if not, XERBLA * will be called. * Not modified. * * C, S - DOUBLE PRECISION * Specify the Givens rotation to be applied. If LROWS is * true, then the matrix ( c s ) * (-s c ) is applied from the left; * if false, then the transpose thereof is applied from the * right. For a Givens rotation, C**2 + S**2 should be 1, * but this is not checked. * Not modified. * * A - DOUBLE PRECISION array. * The array containing the rows/columns to be rotated. The * first element of A should be the upper left element to * be rotated. * Read and modified. * * LDA - INTEGER * The "effective" leading dimension of A. If A contains * a matrix stored in GE or SY format, then this is just * the leading dimension of A as dimensioned in the calling * routine. If A contains a matrix stored in band (GB or SB) * format, then this should be *one less* than the leading * dimension used in the calling routine. Thus, if * A were dimensioned A(LDA,*) in DLAROT, then A(1,j) would * be the j-th element in the first of the two rows * to be rotated, and A(2,j) would be the j-th in the second, * regardless of how the array may be stored in the calling * routine. [A cannot, however, actually be dimensioned thus, * since for band format, the row number may exceed LDA, which * is not legal FORTRAN.] * If LROWS=.TRUE., then LDA must be at least 1, otherwise * it must be at least NL minus the number of .TRUE. values * in XLEFT and XRIGHT. * Not modified. * * XLEFT - DOUBLE PRECISION * If LLEFT is .TRUE., then XLEFT will be used and modified * instead of A(2,1) (if LROWS=.TRUE.) or A(1,2) * (if LROWS=.FALSE.). * Read and modified. * * XRIGHT - DOUBLE PRECISION * If LRIGHT is .TRUE., then XRIGHT will be used and modified * instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1) * (if LROWS=.FALSE.). * Read and modified. * * ===================================================================== * * .. Local Scalars .. INTEGER IINC, INEXT, IX, IY, IYT, NT * .. * .. Local Arrays .. DOUBLE PRECISION XT( 2 ), YT( 2 ) * .. * .. External Subroutines .. EXTERNAL DROT, XERBLA * .. * .. Executable Statements .. * * Set up indices, arrays for ends * IF( LROWS ) THEN IINC = LDA INEXT = 1 ELSE IINC = 1 INEXT = LDA END IF * IF( LLEFT ) THEN NT = 1 IX = 1 + IINC IY = 2 + LDA XT( 1 ) = A( 1 ) YT( 1 ) = XLEFT ELSE NT = 0 IX = 1 IY = 1 + INEXT END IF * IF( LRIGHT ) THEN IYT = 1 + INEXT + ( NL-1 )*IINC NT = NT + 1 XT( NT ) = XRIGHT YT( NT ) = A( IYT ) END IF * * Check for errors * IF( NL.LT.NT ) THEN CALL XERBLA( 'DLAROT', 4 ) RETURN END IF IF( LDA.LE.0 .OR. ( .NOT.LROWS .AND. LDA.LT.NL-NT ) ) THEN CALL XERBLA( 'DLAROT', 8 ) RETURN END IF * * Rotate * CALL DROT( NL-NT, A( IX ), IINC, A( IY ), IINC, C, S ) CALL DROT( NT, XT, 1, YT, 1, C, S ) * * Stuff values back into XLEFT, XRIGHT, etc. * IF( LLEFT ) THEN A( 1 ) = XT( 1 ) XLEFT = YT( 1 ) END IF * IF( LRIGHT ) THEN XRIGHT = XT( NT ) A( IYT ) = YT( NT ) END IF * RETURN * * End of DLAROT * END scalapack-1.8.0/TOOLS/LAPACK/slagsy.f0000640000175000017500000001307610604316307016631 0ustar muammarmuammar SUBROUTINE SLAGSY( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * SLAGSY generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. The semi-bandwidth may then be reduced to k by additional * orthogonal transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) REAL array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) REAL array, dimension (LDA,N) * The generated n by n symmetric matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) REAL array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 ) * .. * .. Local Scalars .. INTEGER I, J REAL ALPHA, TAU, WA, WB, WN * .. * .. External Subroutines .. EXTERNAL SAXPY, SGEMV, SGER, SLARNV, SSCAL, SSYMV, $ SSYR2, XERBLA * .. * .. External Functions .. REAL SDOT, SNRM2 EXTERNAL SDOT, SNRM2 * .. * .. Intrinsic Functions .. INTRINSIC MAX, SIGN * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'SLAGSY', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of symmetric matrix * DO 40 I = N - 1, 1, -1 * * generate random reflection * CALL SLARNV( 3, ISEED, N-I+1, WORK ) WN = SNRM2( N-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL SSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * u * CALL SSYMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*SDOT( N-I+1, WORK( N+1 ), 1, WORK, 1 ) CALL SAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * CALL SSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, $ A( I, I ), LDA ) 40 CONTINUE * * Reduce number of subdiagonals to K * DO 60 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = SNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = SIGN( WN, A( K+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL SSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL SGEMV( 'Transpose', N-K-I+1, K-1, ONE, A( K+I, I+1 ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) CALL SGER( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * u * CALL SSYMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*SDOT( N-K-I+1, WORK, 1, A( K+I, I ), 1 ) CALL SAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply symmetric rank-2 update to A(k+i:n,k+i:n) * CALL SSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, $ A( K+I, K+I ), LDA ) * A( K+I, I ) = -WA DO 50 J = K + I + 1, N A( J, I ) = ZERO 50 CONTINUE 60 CONTINUE * * Store full symmetric matrix * DO 80 J = 1, N DO 70 I = J + 1, N A( J, I ) = A( I, J ) 70 CONTINUE 80 CONTINUE RETURN * * End of SLAGSY * END scalapack-1.8.0/TOOLS/LAPACK/dlagsy.f0000640000175000017500000001314210604316307016604 0ustar muammarmuammar SUBROUTINE DLAGSY( N, K, D, A, LDA, ISEED, WORK, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER INFO, K, LDA, N * .. * .. Array Arguments .. INTEGER ISEED( 4 ) DOUBLE PRECISION A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * DLAGSY generates a real symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random orthogonal matrix: * A = U*D*U'. The semi-bandwidth may then be reduced to k by additional * orthogonal transformations. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * K (input) INTEGER * The number of nonzero subdiagonals within the band of A. * 0 <= K <= N-1. * * D (input) DOUBLE PRECISION array, dimension (N) * The diagonal elements of the diagonal matrix D. * * A (output) DOUBLE PRECISION array, dimension (LDA,N) * The generated n by n symmetric matrix A (the full matrix is * stored). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= N. * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * WORK (workspace) DOUBLE PRECISION array, dimension (2*N) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 ) * .. * .. Local Scalars .. INTEGER I, J DOUBLE PRECISION ALPHA, TAU, WA, WB, WN * .. * .. External Subroutines .. EXTERNAL DAXPY, DGEMV, DGER, DLARNV, DSCAL, DSYMV, $ DSYR2, XERBLA * .. * .. External Functions .. DOUBLE PRECISION DDOT, DNRM2 EXTERNAL DDOT, DNRM2 * .. * .. Intrinsic Functions .. INTRINSIC MAX, SIGN * .. * .. Executable Statements .. * * Test the input arguments * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 ELSE IF( K.LT.0 .OR. K.GT.N-1 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 END IF IF( INFO.LT.0 ) THEN CALL XERBLA( 'DLAGSY', -INFO ) RETURN END IF * * initialize lower triangle of A to diagonal matrix * DO 20 J = 1, N DO 10 I = J + 1, N A( I, J ) = ZERO 10 CONTINUE 20 CONTINUE DO 30 I = 1, N A( I, I ) = D( I ) 30 CONTINUE * * Generate lower triangle of symmetric matrix * DO 40 I = N - 1, 1, -1 * * generate random reflection * CALL DLARNV( 3, ISEED, N-I+1, WORK ) WN = DNRM2( N-I+1, WORK, 1 ) WA = SIGN( WN, WORK( 1 ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = WORK( 1 ) + WA CALL DSCAL( N-I, ONE / WB, WORK( 2 ), 1 ) WORK( 1 ) = ONE TAU = WB / WA END IF * * apply random reflection to A(i:n,i:n) from the left * and the right * * compute y := tau * A * u * CALL DSYMV( 'Lower', N-I+1, TAU, A( I, I ), LDA, WORK, 1, ZERO, $ WORK( N+1 ), 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*DDOT( N-I+1, WORK( N+1 ), 1, WORK, 1 ) CALL DAXPY( N-I+1, ALPHA, WORK, 1, WORK( N+1 ), 1 ) * * apply the transformation as a rank-2 update to A(i:n,i:n) * CALL DSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, $ A( I, I ), LDA ) 40 CONTINUE * * Reduce number of subdiagonals to K * DO 60 I = 1, N - 1 - K * * generate reflection to annihilate A(k+i+1:n,i) * WN = DNRM2( N-K-I+1, A( K+I, I ), 1 ) WA = SIGN( WN, A( K+I, I ) ) IF( WN.EQ.ZERO ) THEN TAU = ZERO ELSE WB = A( K+I, I ) + WA CALL DSCAL( N-K-I, ONE / WB, A( K+I+1, I ), 1 ) A( K+I, I ) = ONE TAU = WB / WA END IF * * apply reflection to A(k+i:n,i+1:k+i-1) from the left * CALL DGEMV( 'Transpose', N-K-I+1, K-1, ONE, A( K+I, I+1 ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) CALL DGER( N-K-I+1, K-1, -TAU, A( K+I, I ), 1, WORK, 1, $ A( K+I, I+1 ), LDA ) * * apply reflection to A(k+i:n,k+i:n) from the left and the right * * compute y := tau * A * u * CALL DSYMV( 'Lower', N-K-I+1, TAU, A( K+I, K+I ), LDA, $ A( K+I, I ), 1, ZERO, WORK, 1 ) * * compute v := y - 1/2 * tau * ( y, u ) * u * ALPHA = -HALF*TAU*DDOT( N-K-I+1, WORK, 1, A( K+I, I ), 1 ) CALL DAXPY( N-K-I+1, ALPHA, A( K+I, I ), 1, WORK, 1 ) * * apply symmetric rank-2 update to A(k+i:n,k+i:n) * CALL DSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, $ A( K+I, K+I ), LDA ) * A( K+I, I ) = -WA DO 50 J = K + I + 1, N A( J, I ) = ZERO 50 CONTINUE 60 CONTINUE * * Store full symmetric matrix * DO 80 J = 1, N DO 70 I = J + 1, N A( J, I ) = A( I, J ) 70 CONTINUE 80 CONTINUE RETURN * * End of DLAGSY * END scalapack-1.8.0/TOOLS/LAPACK/zlarnd.f0000640000175000017500000000557310604316307016624 0ustar muammarmuammar DOUBLE COMPLEX FUNCTION ZLARND( IDIST, ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST * .. * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * ZLARND returns a random complex number from a uniform or normal * distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: real and imaginary parts each uniform (0,1) * = 2: real and imaginary parts each uniform (-1,1) * = 3: real and imaginary parts each normal (0,1) * = 4: uniformly distributed on the disc abs(z) <= 1 * = 5: uniformly distributed on the circle abs(z) = 1 * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine calls the auxiliary routine DLARAN to generate a random * real number from a uniform (0,1) distribution. The Box-Muller method * is used to transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, TWO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) DOUBLE PRECISION TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION T1, T2 * .. * .. External Functions .. DOUBLE PRECISION DLARAN EXTERNAL DLARAN * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, EXP, LOG, SQRT * .. * .. Executable Statements .. * * Generate a pair of real random numbers from a uniform (0,1) * distribution * T1 = DLARAN( ISEED ) T2 = DLARAN( ISEED ) * IF( IDIST.EQ.1 ) THEN * * real and imaginary parts each uniform (0,1) * ZLARND = DCMPLX( T1, T2 ) ELSE IF( IDIST.EQ.2 ) THEN * * real and imaginary parts each uniform (-1,1) * ZLARND = DCMPLX( TWO*T1-ONE, TWO*T2-ONE ) ELSE IF( IDIST.EQ.3 ) THEN * * real and imaginary parts each normal (0,1) * ZLARND = SQRT( -TWO*LOG( T1 ) )*EXP( DCMPLX( ZERO, TWOPI*T2 ) ) ELSE IF( IDIST.EQ.4 ) THEN * * uniform distribution on the unit disc abs(z) <= 1 * ZLARND = SQRT( T1 )*EXP( DCMPLX( ZERO, TWOPI*T2 ) ) ELSE IF( IDIST.EQ.5 ) THEN * * uniform distribution on the unit circle abs(z) = 1 * ZLARND = EXP( DCMPLX( ZERO, TWOPI*T2 ) ) END IF RETURN * * End of ZLARND * END scalapack-1.8.0/TOOLS/LAPACK/slatms.f0000640000175000017500000011233210604316307016625 0ustar muammarmuammar SUBROUTINE SLATMS( M, N, DIST, ISEED, SYM, D, MODE, COND, DMAX, $ KL, KU, PACK, A, LDA, WORK, INFO ) * * -- LAPACK test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. CHARACTER DIST, PACK, SYM INTEGER INFO, KL, KU, LDA, M, MODE, N REAL COND, DMAX * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL A( LDA, * ), D( * ), WORK( * ) * .. * * Purpose * ======= * * SLATMS generates random matrices with specified singular values * (or symmetric/hermitian with specified eigenvalues) * for testing LAPACK programs. * * SLATMS operates by applying the following sequence of * operations: * * Set the diagonal to D, where D may be input or * computed according to MODE, COND, DMAX, and SYM * as described below. * * Generate a matrix with the appropriate band structure, by one * of two methods: * * Method A: * Generate a dense M x N matrix by multiplying D on the left * and the right by random unitary matrices, then: * * Reduce the bandwidth according to KL and KU, using * Householder transformations. * * Method B: * Convert the bandwidth-0 (i.e., diagonal) matrix to a * bandwidth-1 matrix using Givens rotations, "chasing" * out-of-band elements back, much as in QR; then * convert the bandwidth-1 to a bandwidth-2 matrix, etc. * Note that for reasonably small bandwidths (relative to * M and N) this requires less storage, as a dense matrix * is not generated. Also, for symmetric matrices, only * one triangle is generated. * * Method A is chosen if the bandwidth is a large fraction of the * order of the matrix, and LDA is at least M (so a dense * matrix can be stored.) Method B is chosen if the bandwidth * is small (< 1/2 N for symmetric, < .3 N+M for * non-symmetric), or LDA is less than M and not less than the * bandwidth. * * Pack the matrix if desired. Options specified by PACK are: * no packing * zero out upper half (if symmetric) * zero out lower half (if symmetric) * store the upper half columnwise (if symmetric or upper * triangular) * store the lower half columnwise (if symmetric or lower * triangular) * store the lower triangle in banded format (if symmetric * or lower triangular) * store the upper triangle in banded format (if symmetric * or upper triangular) * store the entire matrix in banded format * If Method B is chosen, and band format is specified, then the * matrix will be generated in the band format, so no repacking * will be necessary. * * Arguments * ========= * * M - INTEGER * The number of rows of A. Not modified. * * N - INTEGER * The number of columns of A. Not modified. * * DIST - CHARACTER*1 * On entry, DIST specifies the type of distribution to be used * to generate the random eigen-/singular values. * 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) * 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) * 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. They should lie between 0 and 4095 inclusive, * and ISEED(4) should be odd. The random number generator * uses a linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to SLATMS * to continue the same random number sequence. * Changed on exit. * * SYM - CHARACTER*1 * If SYM='S' or 'H', the generated matrix is symmetric, with * eigenvalues specified by D, COND, MODE, and DMAX; they * may be positive, negative, or zero. * If SYM='P', the generated matrix is symmetric, with * eigenvalues (= singular values) specified by D, COND, * MODE, and DMAX; they will not be negative. * If SYM='N', the generated matrix is nonsymmetric, with * singular values specified by D, COND, MODE, and DMAX; * they will not be negative. * Not modified. * * D - REAL array, dimension ( MIN( M , N ) ) * This array is used to specify the singular values or * eigenvalues of A (see SYM, above.) If MODE=0, then D is * assumed to contain the singular/eigenvalues, otherwise * they will be computed according to MODE, COND, and DMAX, * and placed in D. * Modified if MODE is nonzero. * * MODE - INTEGER * On entry this describes how the singular/eigenvalues are to * be specified: * MODE = 0 means use D as input * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * If SYM='S' or 'H', and MODE is neither 0, 6, nor -6, then * the elements of D will also be multiplied by a random * sign (i.e., +1 or -1.) * Not modified. * * COND - REAL * On entry, this is used as described under MODE above. * If used, it must be >= 1. Not modified. * * DMAX - REAL * If MODE is neither -6, 0 nor 6, the contents of D, as * computed according to MODE and COND, will be scaled by * DMAX / max(abs(D(i))); thus, the maximum absolute eigen- or * singular value (which is to say the norm) will be abs(DMAX). * Note that DMAX need not be positive: if DMAX is negative * (or zero), D will be scaled by a negative number (or zero). * Not modified. * * KL - INTEGER * This specifies the lower bandwidth of the matrix. For * example, KL=0 implies upper triangular, KL=1 implies upper * Hessenberg, and KL being at least M-1 means that the matrix * has full lower bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * * KU - INTEGER * This specifies the upper bandwidth of the matrix. For * example, KU=0 implies lower triangular, KU=1 implies lower * Hessenberg, and KU being at least N-1 means that the matrix * has full upper bandwidth. KL must equal KU if the matrix * is symmetric. * Not modified. * * PACK - CHARACTER*1 * This specifies packing of matrix as follows: * 'N' => no packing * 'U' => zero out all subdiagonal entries (if symmetric) * 'L' => zero out all superdiagonal entries (if symmetric) * 'C' => store the upper triangle columnwise * (only if the matrix is symmetric or upper triangular) * 'R' => store the lower triangle columnwise * (only if the matrix is symmetric or lower triangular) * 'B' => store the lower triangle in band storage scheme * (only if matrix symmetric or lower triangular) * 'Q' => store the upper triangle in band storage scheme * (only if matrix symmetric or upper triangular) * 'Z' => store the entire matrix in band storage scheme * (pivoting can be provided for by using this * option to store A in the trailing rows of * the allocated storage) * * Using these options, the various LAPACK packed and banded * storage schemes can be obtained: * GB - use 'Z' * PB, SB or TB - use 'B' or 'Q' * PP, SP or TP - use 'C' or 'R' * * If two calls to SLATMS differ only in the PACK parameter, * they will generate mathematically equivalent matrices. * Not modified. * * A - REAL array, dimension ( LDA, N ) * On exit A is the desired test matrix. A is first generated * in full (unpacked) form, and then packed, if so specified * by PACK. Thus, the first M elements of the first N * columns will always be modified. If PACK specifies a * packed or banded storage scheme, all LDA elements of the * first N columns will be modified; the elements of the * array which do not correspond to elements of the generated * matrix are set to zero. * Modified. * * LDA - INTEGER * LDA specifies the first dimension of A as declared in the * calling program. If PACK='N', 'U', 'L', 'C', or 'R', then * LDA must be at least M. If PACK='B' or 'Q', then LDA must * be at least MIN( KL, M-1) (which is equal to MIN(KU,N-1)). * If PACK='Z', LDA must be large enough to hold the packed * array: MIN( KU, N-1) + MIN( KL, M-1) + 1. * Not modified. * * WORK - REAL array, dimension ( 3*MAX( N , M ) ) * Workspace. * Modified. * * INFO - INTEGER * Error code. On exit, INFO will be set to one of the * following values: * 0 => normal return * -1 => M negative or unequal to N and SYM='S', 'H', or 'P' * -2 => N negative * -3 => DIST illegal string * -5 => SYM illegal string * -7 => MODE not in range -6 to 6 * -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 * -10 => KL negative * -11 => KU negative, or SYM='S' or 'H' and KU not equal to KL * -12 => PACK illegal string, or PACK='U' or 'L', and SYM='N'; * or PACK='C' or 'Q' and SYM='N' and KL is not zero; * or PACK='R' or 'B' and SYM='N' and KU is not zero; * or PACK='U', 'L', 'C', 'R', 'B', or 'Q', and M is not * N. * -14 => LDA is less than M, or PACK='Z' and LDA is less than * MIN(KU,N-1) + MIN(KL,M-1) + 1. * 1 => Error return from SLATM1 * 2 => Cannot scale to DMAX (max. sing. value is 0) * 3 => Error return from SLAGGE or SLAGSY * * ===================================================================== * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E0 ) REAL ONE PARAMETER ( ONE = 1.0E0 ) REAL TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) * .. * .. Local Scalars .. LOGICAL GIVENS, ILEXTR, ILTEMP, TOPDWN INTEGER I, IC, ICOL, IDIST, IENDCH, IINFO, IL, ILDA, $ IOFFG, IOFFST, IPACK, IPACKG, IR, IR1, IR2, $ IROW, IRSIGN, ISKEW, ISYM, ISYMPK, J, JC, JCH, $ JKL, JKU, JR, K, LLB, MINLDA, MNMIN, MR, NC, $ UUB REAL ALPHA, ANGLE, C, DUMMY, EXTRA, S, TEMP * .. * .. External Functions .. LOGICAL LSAME REAL SLARND EXTERNAL LSAME, SLARND * .. * .. External Subroutines .. EXTERNAL SCOPY, SLAGGE, SLAGSY, SLAROT, SLARTG, SLATM1, $ SLASET, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, COS, MAX, MIN, MOD, REAL, SIN * .. * .. Executable Statements .. * * 1) Decode and Test the input parameters. * Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Decode DIST * IF( LSAME( DIST, 'U' ) ) THEN IDIST = 1 ELSE IF( LSAME( DIST, 'S' ) ) THEN IDIST = 2 ELSE IF( LSAME( DIST, 'N' ) ) THEN IDIST = 3 ELSE IDIST = -1 END IF * * Decode SYM * IF( LSAME( SYM, 'N' ) ) THEN ISYM = 1 IRSIGN = 0 ELSE IF( LSAME( SYM, 'P' ) ) THEN ISYM = 2 IRSIGN = 0 ELSE IF( LSAME( SYM, 'S' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE IF( LSAME( SYM, 'H' ) ) THEN ISYM = 2 IRSIGN = 1 ELSE ISYM = -1 END IF * * Decode PACK * ISYMPK = 0 IF( LSAME( PACK, 'N' ) ) THEN IPACK = 0 ELSE IF( LSAME( PACK, 'U' ) ) THEN IPACK = 1 ISYMPK = 1 ELSE IF( LSAME( PACK, 'L' ) ) THEN IPACK = 2 ISYMPK = 1 ELSE IF( LSAME( PACK, 'C' ) ) THEN IPACK = 3 ISYMPK = 2 ELSE IF( LSAME( PACK, 'R' ) ) THEN IPACK = 4 ISYMPK = 3 ELSE IF( LSAME( PACK, 'B' ) ) THEN IPACK = 5 ISYMPK = 3 ELSE IF( LSAME( PACK, 'Q' ) ) THEN IPACK = 6 ISYMPK = 2 ELSE IF( LSAME( PACK, 'Z' ) ) THEN IPACK = 7 ELSE IPACK = -1 END IF * * Set certain internal parameters * MNMIN = MIN( M, N ) LLB = MIN( KL, M-1 ) UUB = MIN( KU, N-1 ) MR = MIN( M, N+LLB ) NC = MIN( N, M+UUB ) * IF( IPACK.EQ.5 .OR. IPACK.EQ.6 ) THEN MINLDA = UUB + 1 ELSE IF( IPACK.EQ.7 ) THEN MINLDA = LLB + UUB + 1 ELSE MINLDA = M END IF * * Use Givens rotation method if bandwidth small enough, * or if LDA is too small to store the matrix unpacked. * GIVENS = .FALSE. IF( ISYM.EQ.1 ) THEN IF( REAL( LLB+UUB ).LT.0.3*REAL( MAX( 1, MR+NC ) ) ) $ GIVENS = .TRUE. ELSE IF( 2*LLB.LT.M ) $ GIVENS = .TRUE. END IF IF( LDA.LT.M .AND. LDA.GE.MINLDA ) $ GIVENS = .TRUE. * * Set INFO if an error * IF( M.LT.0 ) THEN INFO = -1 ELSE IF( M.NE.N .AND. ISYM.NE.1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IDIST.EQ.-1 ) THEN INFO = -3 ELSE IF( ISYM.EQ.-1 ) THEN INFO = -5 ELSE IF( ABS( MODE ).GT.6 ) THEN INFO = -7 ELSE IF( ( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) .AND. COND.LT.ONE ) $ THEN INFO = -8 ELSE IF( KL.LT.0 ) THEN INFO = -10 ELSE IF( KU.LT.0 .OR. ( ISYM.NE.1 .AND. KL.NE.KU ) ) THEN INFO = -11 ELSE IF( IPACK.EQ.-1 .OR. ( ISYMPK.EQ.1 .AND. ISYM.EQ.1 ) .OR. $ ( ISYMPK.EQ.2 .AND. ISYM.EQ.1 .AND. KL.GT.0 ) .OR. $ ( ISYMPK.EQ.3 .AND. ISYM.EQ.1 .AND. KU.GT.0 ) .OR. $ ( ISYMPK.NE.0 .AND. M.NE.N ) ) THEN INFO = -12 ELSE IF( LDA.LT.MAX( 1, MINLDA ) ) THEN INFO = -14 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'SLATMS', -INFO ) RETURN END IF * * Initialize random number generator * DO 10 I = 1, 4 ISEED( I ) = MOD( ABS( ISEED( I ) ), 4096 ) 10 CONTINUE * IF( MOD( ISEED( 4 ), 2 ).NE.1 ) $ ISEED( 4 ) = ISEED( 4 ) + 1 * * 2) Set up D if indicated. * * Compute D according to COND and MODE * CALL SLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, MNMIN, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 RETURN END IF * * Choose Top-Down if D is (apparently) increasing, * Bottom-Up if D is (apparently) decreasing. * IF( ABS( D( 1 ) ).LE.ABS( D( MNMIN ) ) ) THEN TOPDWN = .TRUE. ELSE TOPDWN = .FALSE. END IF * IF( MODE.NE.0 .AND. ABS( MODE ).NE.6 ) THEN * * Scale by DMAX * TEMP = ABS( D( 1 ) ) DO 20 I = 2, MNMIN TEMP = MAX( TEMP, ABS( D( I ) ) ) 20 CONTINUE * IF( TEMP.GT.ZERO ) THEN ALPHA = DMAX / TEMP ELSE INFO = 2 RETURN END IF * CALL SSCAL( MNMIN, ALPHA, D, 1 ) * END IF * * 3) Generate Banded Matrix using Givens rotations. * Also the special case of UUB=LLB=0 * * Compute Addressing constants to cover all * storage formats. Whether GE, SY, GB, or SB, * upper or lower triangle or both, * the (i,j)-th element is in * A( i - ISKEW*j + IOFFST, j ) * IF( IPACK.GT.4 ) THEN ILDA = LDA - 1 ISKEW = 1 IF( IPACK.GT.5 ) THEN IOFFST = UUB + 1 ELSE IOFFST = 1 END IF ELSE ILDA = LDA ISKEW = 0 IOFFST = 0 END IF * * IPACKG is the format that the matrix is generated in. If this is * different from IPACK, then the matrix must be repacked at the * end. It also signals how to compute the norm, for scaling. * IPACKG = 0 CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) * * Diagonal Matrix -- We are done, unless it * is to be stored SP/PP/TP (PACK='R' or 'C') * IF( LLB.EQ.0 .AND. UUB.EQ.0 ) THEN CALL SCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFST, 1 ), ILDA+1 ) IF( IPACK.LE.2 .OR. IPACK.GE.5 ) $ IPACKG = IPACK * ELSE IF( GIVENS ) THEN * * Check whether to use Givens rotations, * Householder transformations, or nothing. * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * IF( IPACK.GT.4 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF * CALL SCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFST, 1 ), ILDA+1 ) * IF( TOPDWN ) THEN JKL = 0 DO 50 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * Last row actually rotated is M * Last column actually rotated is MIN( M+JKU, N ) * DO 40 JR = 1, MIN( M+JKU, N ) + JKL - 1 EXTRA = ZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) ICOL = MAX( 1, JR-JKL ) IF( JR.LT.M ) THEN IL = MIN( N, JR+JKU ) + 1 - ICOL CALL SLAROT( .TRUE., JR.GT.JKL, .FALSE., IL, C, $ S, A( JR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IR = JR IC = ICOL DO 30 JCH = JR - JKL, 1, -JKL - JKU IF( IR.LT.M ) THEN CALL SLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, C, S, DUMMY ) END IF IROW = MAX( 1, JCH-JKU ) IL = IR + 2 - IROW TEMP = ZERO ILTEMP = JCH.GT.JKU CALL SLAROT( .FALSE., ILTEMP, .TRUE., IL, C, -S, $ A( IROW-ISKEW*IC+IOFFST, IC ), $ ILDA, TEMP, EXTRA ) IF( ILTEMP ) THEN CALL SLARTG( A( IROW+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), TEMP, C, S, DUMMY ) ICOL = MAX( 1, JCH-JKU-JKL ) IL = IC + 2 - ICOL EXTRA = ZERO CALL SLAROT( .TRUE., JCH.GT.JKU+JKL, .TRUE., $ IL, C, -S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ TEMP ) IC = ICOL IR = IROW END IF 30 CONTINUE 40 CONTINUE 50 CONTINUE * JKU = UUB DO 80 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * DO 70 JC = 1, MIN( N+JKL, M ) + JKU - 1 EXTRA = ZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) IROW = MAX( 1, JC-JKU ) IF( JC.LT.N ) THEN IL = MIN( M, JC+JKL ) + 1 - IROW CALL SLAROT( .FALSE., JC.GT.JKU, .FALSE., IL, C, $ S, A( IROW-ISKEW*JC+IOFFST, JC ), $ ILDA, EXTRA, DUMMY ) END IF * * Chase "EXTRA" back up * IC = JC IR = IROW DO 60 JCH = JC - JKU, 1, -JKL - JKU IF( IC.LT.N ) THEN CALL SLARTG( A( IR+1-ISKEW*( IC+1 )+IOFFST, $ IC+1 ), EXTRA, C, S, DUMMY ) END IF ICOL = MAX( 1, JCH-JKL ) IL = IC + 2 - ICOL TEMP = ZERO ILTEMP = JCH.GT.JKL CALL SLAROT( .TRUE., ILTEMP, .TRUE., IL, C, -S, $ A( IR-ISKEW*ICOL+IOFFST, ICOL ), $ ILDA, TEMP, EXTRA ) IF( ILTEMP ) THEN CALL SLARTG( A( IR+1-ISKEW*( ICOL+1 )+IOFFST, $ ICOL+1 ), TEMP, C, S, DUMMY ) IROW = MAX( 1, JCH-JKL-JKU ) IL = IR + 2 - IROW EXTRA = ZERO CALL SLAROT( .FALSE., JCH.GT.JKL+JKU, .TRUE., $ IL, C, -S, A( IROW-ISKEW*ICOL+ $ IOFFST, ICOL ), ILDA, EXTRA, $ TEMP ) IC = ICOL IR = IROW END IF 60 CONTINUE 70 CONTINUE 80 CONTINUE * ELSE * * Bottom-Up -- Start at the bottom right. * JKL = 0 DO 110 JKU = 1, UUB * * Transform from bandwidth JKL, JKU-1 to JKL, JKU * * First row actually rotated is M * First column actually rotated is MIN( M+JKU, N ) * IENDCH = MIN( M, N+JKL ) - 1 DO 100 JC = MIN( M+JKU, N ) - 1, 1 - JKL, -1 EXTRA = ZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) IROW = MAX( 1, JC-JKU+1 ) IF( JC.GT.0 ) THEN IL = MIN( M, JC+JKL+1 ) + 1 - IROW CALL SLAROT( .FALSE., .FALSE., JC+JKL.LT.M, IL, $ C, S, A( IROW-ISKEW*JC+IOFFST, $ JC ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IC = JC DO 90 JCH = JC + JKL, IENDCH, JKL + JKU ILEXTR = IC.GT.0 IF( ILEXTR ) THEN CALL SLARTG( A( JCH-ISKEW*IC+IOFFST, IC ), $ EXTRA, C, S, DUMMY ) END IF IC = MAX( 1, IC ) ICOL = MIN( N-1, JCH+JKU ) ILTEMP = JCH + JKU.LT.N TEMP = ZERO CALL SLAROT( .TRUE., ILEXTR, ILTEMP, ICOL+2-IC, $ C, S, A( JCH-ISKEW*IC+IOFFST, IC ), $ ILDA, EXTRA, TEMP ) IF( ILTEMP ) THEN CALL SLARTG( A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), TEMP, C, S, DUMMY ) IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = ZERO CALL SLAROT( .FALSE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( JCH-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, TEMP, EXTRA ) IC = ICOL END IF 90 CONTINUE 100 CONTINUE 110 CONTINUE * JKU = UUB DO 140 JKL = 1, LLB * * Transform from bandwidth JKL-1, JKU to JKL, JKU * * First row actually rotated is MIN( N+JKL, M ) * First column actually rotated is N * IENDCH = MIN( N, M+JKU ) - 1 DO 130 JR = MIN( N+JKL, M ) - 1, 1 - JKU, -1 EXTRA = ZERO ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) ICOL = MAX( 1, JR-JKL+1 ) IF( JR.GT.0 ) THEN IL = MIN( N, JR+JKU+1 ) + 1 - ICOL CALL SLAROT( .TRUE., .FALSE., JR+JKU.LT.N, IL, $ C, S, A( JR-ISKEW*ICOL+IOFFST, $ ICOL ), ILDA, DUMMY, EXTRA ) END IF * * Chase "EXTRA" back down * IR = JR DO 120 JCH = JR + JKU, IENDCH, JKL + JKU ILEXTR = IR.GT.0 IF( ILEXTR ) THEN CALL SLARTG( A( IR-ISKEW*JCH+IOFFST, JCH ), $ EXTRA, C, S, DUMMY ) END IF IR = MAX( 1, IR ) IROW = MIN( M-1, JCH+JKL ) ILTEMP = JCH + JKL.LT.M TEMP = ZERO CALL SLAROT( .FALSE., ILEXTR, ILTEMP, IROW+2-IR, $ C, S, A( IR-ISKEW*JCH+IOFFST, $ JCH ), ILDA, EXTRA, TEMP ) IF( ILTEMP ) THEN CALL SLARTG( A( IROW-ISKEW*JCH+IOFFST, JCH ), $ TEMP, C, S, DUMMY ) IL = MIN( IENDCH, JCH+JKL+JKU ) + 2 - JCH EXTRA = ZERO CALL SLAROT( .TRUE., .TRUE., $ JCH+JKL+JKU.LE.IENDCH, IL, C, S, $ A( IROW-ISKEW*JCH+IOFFST, JCH ), $ ILDA, TEMP, EXTRA ) IR = IROW END IF 120 CONTINUE 130 CONTINUE 140 CONTINUE END IF * ELSE * * Symmetric -- A = U D U' * IPACKG = IPACK IOFFG = IOFFST * IF( TOPDWN ) THEN * * Top-Down -- Generate Upper triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 6 IOFFG = UUB + 1 ELSE IPACKG = 1 END IF CALL SCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFG, 1 ), ILDA+1 ) * DO 170 K = 1, UUB DO 160 JC = 1, N - 1 IROW = MAX( 1, JC-K ) IL = MIN( JC+1, K+2 ) EXTRA = ZERO TEMP = A( JC-ISKEW*( JC+1 )+IOFFG, JC+1 ) ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = SIN( ANGLE ) CALL SLAROT( .FALSE., JC.GT.K, .TRUE., IL, C, S, $ A( IROW-ISKEW*JC+IOFFG, JC ), ILDA, $ EXTRA, TEMP ) CALL SLAROT( .TRUE., .TRUE., .FALSE., $ MIN( K, N-JC )+1, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ TEMP, DUMMY ) * * Chase EXTRA back up the matrix * ICOL = JC DO 150 JCH = JC - K, 1, -K CALL SLARTG( A( JCH+1-ISKEW*( ICOL+1 )+IOFFG, $ ICOL+1 ), EXTRA, C, S, DUMMY ) TEMP = A( JCH-ISKEW*( JCH+1 )+IOFFG, JCH+1 ) CALL SLAROT( .TRUE., .TRUE., .TRUE., K+2, C, -S, $ A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, TEMP, EXTRA ) IROW = MAX( 1, JCH-K ) IL = MIN( JCH+1, K+2 ) EXTRA = ZERO CALL SLAROT( .FALSE., JCH.GT.K, .TRUE., IL, C, $ -S, A( IROW-ISKEW*JCH+IOFFG, JCH ), $ ILDA, EXTRA, TEMP ) ICOL = JCH 150 CONTINUE 160 CONTINUE 170 CONTINUE * * If we need lower triangle, copy from upper. Note that * the order of copying is chosen to work for 'q' -> 'b' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.3 ) THEN DO 190 JC = 1, N IROW = IOFFST - ISKEW*JC DO 180 JR = JC, MIN( N, JC+UUB ) A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 180 CONTINUE 190 CONTINUE IF( IPACK.EQ.5 ) THEN DO 210 JC = N - UUB + 1, N DO 200 JR = N + 2 - JC, UUB + 1 A( JR, JC ) = ZERO 200 CONTINUE 210 CONTINUE END IF IF( IPACKG.EQ.6 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF ELSE * * Bottom-Up -- Generate Lower triangle only * IF( IPACK.GE.5 ) THEN IPACKG = 5 IF( IPACK.EQ.6 ) $ IOFFG = 1 ELSE IPACKG = 2 END IF CALL SCOPY( MNMIN, D, 1, A( 1-ISKEW+IOFFG, 1 ), ILDA+1 ) * DO 240 K = 1, UUB DO 230 JC = N - 1, 1, -1 IL = MIN( N+1-JC, K+2 ) EXTRA = ZERO TEMP = A( 1+( 1-ISKEW )*JC+IOFFG, JC ) ANGLE = TWOPI*SLARND( 1, ISEED ) C = COS( ANGLE ) S = -SIN( ANGLE ) CALL SLAROT( .FALSE., .TRUE., N-JC.GT.K, IL, C, S, $ A( ( 1-ISKEW )*JC+IOFFG, JC ), ILDA, $ TEMP, EXTRA ) ICOL = MAX( 1, JC-K+1 ) CALL SLAROT( .TRUE., .FALSE., .TRUE., JC+2-ICOL, C, $ S, A( JC-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, DUMMY, TEMP ) * * Chase EXTRA back down the matrix * ICOL = JC DO 220 JCH = JC + K, N - 1, K CALL SLARTG( A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ EXTRA, C, S, DUMMY ) TEMP = A( 1+( 1-ISKEW )*JCH+IOFFG, JCH ) CALL SLAROT( .TRUE., .TRUE., .TRUE., K+2, C, S, $ A( JCH-ISKEW*ICOL+IOFFG, ICOL ), $ ILDA, EXTRA, TEMP ) IL = MIN( N+1-JCH, K+2 ) EXTRA = ZERO CALL SLAROT( .FALSE., .TRUE., N-JCH.GT.K, IL, C, $ S, A( ( 1-ISKEW )*JCH+IOFFG, JCH ), $ ILDA, TEMP, EXTRA ) ICOL = JCH 220 CONTINUE 230 CONTINUE 240 CONTINUE * * If we need upper triangle, copy from lower. Note that * the order of copying is chosen to work for 'b' -> 'q' * IF( IPACK.NE.IPACKG .AND. IPACK.NE.4 ) THEN DO 260 JC = N, 1, -1 IROW = IOFFST - ISKEW*JC DO 250 JR = JC, MAX( 1, JC-UUB ), -1 A( JR+IROW, JC ) = A( JC-ISKEW*JR+IOFFG, JR ) 250 CONTINUE 260 CONTINUE IF( IPACK.EQ.6 ) THEN DO 280 JC = 1, UUB DO 270 JR = 1, UUB + 1 - JC A( JR, JC ) = ZERO 270 CONTINUE 280 CONTINUE END IF IF( IPACKG.EQ.5 ) THEN IPACKG = IPACK ELSE IPACKG = 0 END IF END IF END IF END IF * ELSE * * 4) Generate Banded Matrix by first * Rotating by random Unitary matrices, * then reducing the bandwidth using Householder * transformations. * * Note: we should get here only if LDA .ge. N * IF( ISYM.EQ.1 ) THEN * * Non-symmetric -- A = U D V * CALL SLAGGE( MR, NC, LLB, UUB, D, A, LDA, ISEED, WORK, $ IINFO ) ELSE * * Symmetric -- A = U D U' * CALL SLAGSY( M, LLB, D, A, LDA, ISEED, WORK, IINFO ) * END IF IF( IINFO.NE.0 ) THEN INFO = 3 RETURN END IF END IF * * 5) Pack the matrix * IF( IPACK.NE.IPACKG ) THEN IF( IPACK.EQ.1 ) THEN * * 'U' -- Upper triangular, not packed * DO 300 J = 1, M DO 290 I = J + 1, M A( I, J ) = ZERO 290 CONTINUE 300 CONTINUE * ELSE IF( IPACK.EQ.2 ) THEN * * 'L' -- Lower triangular, not packed * DO 320 J = 2, M DO 310 I = 1, J - 1 A( I, J ) = ZERO 310 CONTINUE 320 CONTINUE * ELSE IF( IPACK.EQ.3 ) THEN * * 'C' -- Upper triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 340 J = 1, M DO 330 I = 1, J IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 330 CONTINUE 340 CONTINUE * ELSE IF( IPACK.EQ.4 ) THEN * * 'R' -- Lower triangle packed Columnwise. * ICOL = 1 IROW = 0 DO 360 J = 1, M DO 350 I = J, M IROW = IROW + 1 IF( IROW.GT.LDA ) THEN IROW = 1 ICOL = ICOL + 1 END IF A( IROW, ICOL ) = A( I, J ) 350 CONTINUE 360 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * 'B' -- The lower triangle is packed as a band matrix. * 'Q' -- The upper triangle is packed as a band matrix. * 'Z' -- The whole matrix is packed as a band matrix. * IF( IPACK.EQ.5 ) $ UUB = 0 IF( IPACK.EQ.6 ) $ LLB = 0 * DO 380 J = 1, UUB DO 370 I = MIN( J+LLB, M ), 1, -1 A( I-J+UUB+1, J ) = A( I, J ) 370 CONTINUE 380 CONTINUE * DO 400 J = UUB + 2, N DO 390 I = J - UUB, MIN( J+LLB, M ) A( I-J+UUB+1, J ) = A( I, J ) 390 CONTINUE 400 CONTINUE END IF * * If packed, zero out extraneous elements. * * Symmetric/Triangular Packed -- * zero out everything after A(IROW,ICOL) * IF( IPACK.EQ.3 .OR. IPACK.EQ.4 ) THEN DO 420 JC = ICOL, M DO 410 JR = IROW + 1, LDA A( JR, JC ) = ZERO 410 CONTINUE IROW = 0 420 CONTINUE * ELSE IF( IPACK.GE.5 ) THEN * * Packed Band -- * 1st row is now in A( UUB+2-j, j), zero above it * m-th row is now in A( M+UUB-j,j), zero below it * last non-zero diagonal is now in A( UUB+LLB+1,j ), * zero below it, too. * IR1 = UUB + LLB + 2 IR2 = UUB + M + 2 DO 450 JC = 1, N DO 430 JR = 1, UUB + 1 - JC A( JR, JC ) = ZERO 430 CONTINUE DO 440 JR = MAX( 1, MIN( IR1, IR2-JC ) ), LDA A( JR, JC ) = ZERO 440 CONTINUE 450 CONTINUE END IF END IF * RETURN * * End of SLATMS * END scalapack-1.8.0/TOOLS/LAPACK/dlaran.f0000640000175000017500000000627410604316307016572 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DLARAN( ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * DLARAN returns a random real number from a uniform (0,1) * distribution. * * Arguments * ========= * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine uses a multiplicative congruential method with modulus * 2**48 and multiplier 33952834046453 (see G.S.Fishman, * 'Multiplicative congruential random number generators with modulus * 2**b: an exhaustive analysis for b = 32 and a partial analysis for * b = 48', Math. Comp. 189, pp 331-344, 1990). * * 48-bit integers are stored in 4 integer array elements with 12 bits * per element. Hence the routine is portable across machines with * integers of 32 bits or more. * * ===================================================================== * * .. Parameters .. INTEGER M1, M2, M3, M4 PARAMETER ( M1 = 494, M2 = 322, M3 = 2508, M4 = 2549 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) INTEGER IPW2 DOUBLE PRECISION R PARAMETER ( IPW2 = 4096, R = ONE / IPW2 ) * .. * .. Local Scalars .. INTEGER IT1, IT2, IT3, IT4 DOUBLE PRECISION RNDOUT * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. * .. Executable Statements .. 10 CONTINUE * * multiply the seed by the multiplier modulo 2**48 * IT4 = ISEED( 4 )*M4 IT3 = IT4 / IPW2 IT4 = IT4 - IPW2*IT3 IT3 = IT3 + ISEED( 3 )*M4 + ISEED( 4 )*M3 IT2 = IT3 / IPW2 IT3 = IT3 - IPW2*IT2 IT2 = IT2 + ISEED( 2 )*M4 + ISEED( 3 )*M3 + ISEED( 4 )*M2 IT1 = IT2 / IPW2 IT2 = IT2 - IPW2*IT1 IT1 = IT1 + ISEED( 1 )*M4 + ISEED( 2 )*M3 + ISEED( 3 )*M2 + $ ISEED( 4 )*M1 IT1 = MOD( IT1, IPW2 ) * * return updated seed * ISEED( 1 ) = IT1 ISEED( 2 ) = IT2 ISEED( 3 ) = IT3 ISEED( 4 ) = IT4 * * convert 48-bit integer to a real number in the interval (0,1) * RNDOUT = R*( DBLE( IT1 )+R*( DBLE( IT2 )+R*( DBLE( IT3 )+R* $ ( DBLE( IT4 ) ) ) ) ) * IF (RNDOUT.EQ.1.0D+0) THEN * If a real number has n bits of precision, and the first * n bits of the 48-bit integer above happen to be all 1 (which * will occur about once every 2**n calls), then DLARAN will * be rounded to exactly 1.0. * Since DLARAN is not supposed to return exactly 0.0 or 1.0 * (and some callers of DLARAN, such as CLARND, depend on that), * the statistically correct thing to do in this situation is * simply to iterate again. * N.B. the case DLARAN = 0.0 should not be possible. * GOTO 10 END IF * DLARAN = RNDOUT RETURN * * End of DLARAN * END scalapack-1.8.0/TOOLS/LAPACK/slatm1.f0000640000175000017500000001544410604316307016531 0ustar muammarmuammar SUBROUTINE SLATM1( MODE, COND, IRSIGN, IDIST, ISEED, D, N, INFO ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST, INFO, IRSIGN, MODE, N REAL COND * .. * .. Array Arguments .. INTEGER ISEED( 4 ) REAL D( * ) * .. * * Purpose * ======= * * SLATM1 computes the entries of D(1..N) as specified by * MODE, COND and IRSIGN. IDIST and ISEED determine the generation * of random numbers. SLATM1 is called by SLATMR to generate * random test matrices for LAPACK programs. * * Arguments * ========= * * MODE - INTEGER * On entry describes how D is to be computed: * MODE = 0 means do not change D. * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) * MODE = 5 sets D to random numbers in the range * ( 1/COND , 1 ) such that their logarithms * are uniformly distributed. * MODE = 6 set D to random numbers from same distribution * as the rest of the matrix. * MODE < 0 has the same meaning as ABS(MODE), except that * the order of the elements of D is reversed. * Thus if MODE is positive, D has entries ranging from * 1 to 1/COND, if negative, from 1/COND to 1, * Not modified. * * COND - REAL * On entry, used as described under MODE above. * If used, it must be >= 1. Not modified. * * IRSIGN - INTEGER * On entry, if MODE neither -6, 0 nor 6, determines sign of * entries of D * 0 => leave entries of D unchanged * 1 => multiply each entry of D by 1 or -1 with probability .5 * * IDIST - CHARACTER*1 * On entry, IDIST specifies the type of distribution to be * used to generate a random matrix . * 1 => UNIFORM( 0, 1 ) * 2 => UNIFORM( -1, 1 ) * 3 => NORMAL( 0, 1 ) * Not modified. * * ISEED - INTEGER array, dimension ( 4 ) * On entry ISEED specifies the seed of the random number * generator. The random number generator uses a * linear congruential sequence limited to small * integers, and so should produce machine independent * random numbers. The values of ISEED are changed on * exit, and can be used in the next call to SLATM1 * to continue the same random number sequence. * Changed on exit. * * D - REAL array, dimension ( MIN( M , N ) ) * Array to be computed according to MODE, COND and IRSIGN. * May be changed on exit if MODE is nonzero. * * N - INTEGER * Number of entries of D. Not modified. * * INFO - INTEGER * 0 => normal termination * -1 => if MODE not in range -6 to 6 * -2 => if MODE neither -6, 0 nor 6, and * IRSIGN neither 0 nor 1 * -3 => if MODE neither -6, 0 nor 6 and COND less than 1 * -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 3 * -7 => if N negative * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E0 ) REAL HALF PARAMETER ( HALF = 0.5E0 ) * .. * .. Local Scalars .. INTEGER I REAL ALPHA, TEMP * .. * .. External Functions .. REAL SLARAN EXTERNAL SLARAN * .. * .. External Subroutines .. EXTERNAL SLARNV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, EXP, LOG, REAL * .. * .. Executable Statements .. * * Decode and Test the input parameters. Initialize flags & seed. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Set INFO if an error * IF( MODE.LT.-6 .OR. MODE.GT.6 ) THEN INFO = -1 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ ( IRSIGN.NE.0 .AND. IRSIGN.NE.1 ) ) THEN INFO = -2 ELSE IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ COND.LT.ONE ) THEN INFO = -3 ELSE IF( ( MODE.EQ.6 .OR. MODE.EQ.-6 ) .AND. $ ( IDIST.LT.1 .OR. IDIST.GT.3 ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -7 END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'SLATM1', -INFO ) RETURN END IF * * Compute D according to COND and MODE * IF( MODE.NE.0 ) THEN GO TO ( 10, 30, 50, 70, 90, 110 )ABS( MODE ) * * One large D value: * 10 CONTINUE DO 20 I = 1, N D( I ) = ONE / COND 20 CONTINUE D( 1 ) = ONE GO TO 120 * * One small D value: * 30 CONTINUE DO 40 I = 1, N D( I ) = ONE 40 CONTINUE D( N ) = ONE / COND GO TO 120 * * Exponentially distributed D values: * 50 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN ALPHA = COND**( -ONE / REAL( N-1 ) ) DO 60 I = 2, N D( I ) = ALPHA**( I-1 ) 60 CONTINUE END IF GO TO 120 * * Arithmetically distributed D values: * 70 CONTINUE D( 1 ) = ONE IF( N.GT.1 ) THEN TEMP = ONE / COND ALPHA = ( ONE-TEMP ) / REAL( N-1 ) DO 80 I = 2, N D( I ) = REAL( N-I )*ALPHA + TEMP 80 CONTINUE END IF GO TO 120 * * Randomly distributed D values on ( 1/COND , 1): * 90 CONTINUE ALPHA = LOG( ONE / COND ) DO 100 I = 1, N D( I ) = EXP( ALPHA*SLARAN( ISEED ) ) 100 CONTINUE GO TO 120 * * Randomly distributed D values from IDIST * 110 CONTINUE CALL SLARNV( IDIST, ISEED, N, D ) * 120 CONTINUE * * If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign * random signs to D * IF( ( MODE.NE.-6 .AND. MODE.NE.0 .AND. MODE.NE.6 ) .AND. $ IRSIGN.EQ.1 ) THEN DO 130 I = 1, N TEMP = SLARAN( ISEED ) IF( TEMP.GT.HALF ) $ D( I ) = -D( I ) 130 CONTINUE END IF * * Reverse if MODE < 0 * IF( MODE.LT.0 ) THEN DO 140 I = 1, N / 2 TEMP = D( I ) D( I ) = D( N+1-I ) D( N+1-I ) = TEMP 140 CONTINUE END IF * END IF * RETURN * * End of SLATM1 * END scalapack-1.8.0/TOOLS/LAPACK/clarot.f0000640000175000017500000002362110604316307016610 0ustar muammarmuammar SUBROUTINE CLAROT( LROWS, LLEFT, LRIGHT, NL, C, S, A, LDA, XLEFT, $ XRIGHT ) * * -- LAPACK auxiliary test routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. LOGICAL LLEFT, LRIGHT, LROWS INTEGER LDA, NL COMPLEX C, S, XLEFT, XRIGHT * .. * .. Array Arguments .. COMPLEX A( * ) * .. * * Purpose * ======= * * CLAROT applies a (Givens) rotation to two adjacent rows or * columns, where one element of the first and/or last column/row * November 2006 * for use on matrices stored in some format other than GE, so * that elements of the matrix may be used or modified for which * no array element is provided. * * One example is a symmetric matrix in SB format (bandwidth=4), for * which UPLO='L': Two adjacent rows will have the format: * * row j: * * * * * . . . . * row j+1: * * * * * . . . . * * '*' indicates elements for which storage is provided, * '.' indicates elements for which no storage is provided, but * are not necessarily zero; their values are determined by * symmetry. ' ' indicates elements which are necessarily zero, * and have no storage provided. * * Those columns which have two '*'s can be handled by SROT. * Those columns which have no '*'s can be ignored, since as long * as the Givens rotations are carefully applied to preserve * symmetry, their values are determined. * Those columns which have one '*' have to be handled separately, * by using separate variables "p" and "q": * * row j: * * * * * p . . . * row j+1: q * * * * * . . . . * * The element p would have to be set correctly, then that column * is rotated, setting p to its new value. The next call to * CLAROT would rotate columns j and j+1, using p, and restore * symmetry. The element q would start out being zero, and be * made non-zero by the rotation. Later, rotations would presumably * be chosen to zero q out. * * Typical Calling Sequences: rotating the i-th and (i+1)-st rows. * ------- ------- --------- * * General dense matrix: * * CALL CLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S, * A(i,1),LDA, DUMMY, DUMMY) * * General banded matrix in GB format: * * j = MAX(1, i-KL ) * NL = MIN( N, i+KU+1 ) + 1-j * CALL CLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S, * A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,KL+1) ] * * Symmetric banded matrix in SY format, bandwidth K, * lower triangle only: * * j = MAX(1, i-K ) * NL = MIN( K+1, i ) + 1 * CALL CLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S, * A(i,j), LDA, XLEFT, XRIGHT ) * * Same, but upper triangle only: * * NL = MIN( K+1, N-i ) + 1 * CALL CLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S, * A(i,i), LDA, XLEFT, XRIGHT ) * * Symmetric banded matrix in SB format, bandwidth K, * lower triangle only: * * [ same as for SY, except:] * . . . . * A(i+1-j,j), LDA-1, XLEFT, XRIGHT ) * * [ note that i+1-j is just MIN(i,K+1) ] * * Same, but upper triangle only: * . . . * A(K+1,i), LDA-1, XLEFT, XRIGHT ) * * Rotating columns is just the transpose of rotating rows, except * for GB and SB: (rotating columns i and i+1) * * GB: * j = MAX(1, i-KU ) * NL = MIN( N, i+KL+1 ) + 1-j * CALL CLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S, * A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * [note that KU+j+1-i is just MAX(1,KU+2-i)] * * SB: (upper triangle) * * . . . . . . * A(K+j+1-i,i),LDA-1, XTOP, XBOTTM ) * * SB: (lower triangle) * * . . . . . . * A(1,i),LDA-1, XTOP, XBOTTM ) * * Arguments * ========= * * LROWS - LOGICAL * If .TRUE., then CLAROT will rotate two rows. If .FALSE., * then it will rotate two columns. * Not modified. * * LLEFT - LOGICAL * If .TRUE., then XLEFT will be used instead of the * corresponding element of A for the first element in the * second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) * If .FALSE., then the corresponding element of A will be * used. * Not modified. * * LRIGHT - LOGICAL * If .TRUE., then XRIGHT will be used instead of the * corresponding element of A for the last element in the * first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If * .FALSE., then the corresponding element of A will be used. * Not modified. * * NL - INTEGER * The length of the rows (if LROWS=.TRUE.) or columns (if * LROWS=.FALSE.) to be rotated. If XLEFT and/or XRIGHT are * used, the columns/rows they are in should be included in * NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at * least 2. The number of rows/columns to be rotated * exclusive of those involving XLEFT and/or XRIGHT may * not be negative, i.e., NL minus how many of LLEFT and * LRIGHT are .TRUE. must be at least zero; if not, XERBLA * will be called. * Not modified. * * C, S - COMPLEX * Specify the Givens rotation to be applied. If LROWS is * true, then the matrix ( c s ) * ( _ _ ) * (-s c ) is applied from the left; * if false, then the transpose (not conjugated) thereof is * applied from the right. Note that in contrast to the * output of CROTG or to most versions of CROT, both C and S * are complex. For a Givens rotation, |C|**2 + |S|**2 should * be 1, but this is not checked. * Not modified. * * A - COMPLEX array. * The array containing the rows/columns to be rotated. The * first element of A should be the upper left element to * be rotated. * Read and modified. * * LDA - INTEGER * The "effective" leading dimension of A. If A contains * a matrix stored in GE, HE, or SY format, then this is just * the leading dimension of A as dimensioned in the calling * routine. If A contains a matrix stored in band (GB, HB, or * SB) format, then this should be *one less* than the leading * dimension used in the calling routine. Thus, if A were * dimensioned A(LDA,*) in CLAROT, then A(1,j) would be the * j-th element in the first of the two rows to be rotated, * and A(2,j) would be the j-th in the second, regardless of * how the array may be stored in the calling routine. [A * cannot, however, actually be dimensioned thus, since for * band format, the row number may exceed LDA, which is not * legal FORTRAN.] * If LROWS=.TRUE., then LDA must be at least 1, otherwise * it must be at least NL minus the number of .TRUE. values * in XLEFT and XRIGHT. * Not modified. * * XLEFT - COMPLEX * If LLEFT is .TRUE., then XLEFT will be used and modified * instead of A(2,1) (if LROWS=.TRUE.) or A(1,2) * (if LROWS=.FALSE.). * Read and modified. * * XRIGHT - COMPLEX * If LRIGHT is .TRUE., then XRIGHT will be used and modified * instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1) * (if LROWS=.FALSE.). * Read and modified. * * ===================================================================== * * .. Local Scalars .. INTEGER IINC, INEXT, IX, IY, IYT, J, NT COMPLEX TEMPX * .. * .. Local Arrays .. COMPLEX XT( 2 ), YT( 2 ) * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG * .. * .. Executable Statements .. * * Set up indices, arrays for ends * IF( LROWS ) THEN IINC = LDA INEXT = 1 ELSE IINC = 1 INEXT = LDA END IF * IF( LLEFT ) THEN NT = 1 IX = 1 + IINC IY = 2 + LDA XT( 1 ) = A( 1 ) YT( 1 ) = XLEFT ELSE NT = 0 IX = 1 IY = 1 + INEXT END IF * IF( LRIGHT ) THEN IYT = 1 + INEXT + ( NL-1 )*IINC NT = NT + 1 XT( NT ) = XRIGHT YT( NT ) = A( IYT ) END IF * * Check for errors * IF( NL.LT.NT ) THEN CALL XERBLA( 'CLAROT', 4 ) RETURN END IF IF( LDA.LE.0 .OR. ( .NOT.LROWS .AND. LDA.LT.NL-NT ) ) THEN CALL XERBLA( 'CLAROT', 8 ) RETURN END IF * * Rotate * * CROT( NL-NT, A(IX),IINC, A(IY),IINC, C, S ) with complex C, S * DO 10 J = 0, NL - NT - 1 TEMPX = C*A( IX+J*IINC ) + S*A( IY+J*IINC ) A( IY+J*IINC ) = -CONJG( S )*A( IX+J*IINC ) + $ CONJG( C )*A( IY+J*IINC ) A( IX+J*IINC ) = TEMPX 10 CONTINUE * * CROT( NT, XT,1, YT,1, C, S ) with complex C, S * DO 20 J = 1, NT TEMPX = C*XT( J ) + S*YT( J ) YT( J ) = -CONJG( S )*XT( J ) + CONJG( C )*YT( J ) XT( J ) = TEMPX 20 CONTINUE * * Stuff values back into XLEFT, XRIGHT, etc. * IF( LLEFT ) THEN A( 1 ) = XT( 1 ) XLEFT = YT( 1 ) END IF * IF( LRIGHT ) THEN XRIGHT = XT( NT ) A( IYT ) = YT( NT ) END IF * RETURN * * End of CLAROT * END scalapack-1.8.0/TOOLS/LAPACK/slarnd.f0000640000175000017500000000427210604316307016610 0ustar muammarmuammar REAL FUNCTION SLARND( IDIST, ISEED ) * * -- LAPACK auxiliary routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * November 2006 * * .. Scalar Arguments .. INTEGER IDIST * .. * .. Array Arguments .. INTEGER ISEED( 4 ) * .. * * Purpose * ======= * * SLARND returns a random real number from a uniform or normal * distribution. * * Arguments * ========= * * IDIST (input) INTEGER * Specifies the distribution of the random numbers: * = 1: uniform (0,1) * = 2: uniform (-1,1) * = 3: normal (0,1) * * ISEED (input/output) INTEGER array, dimension (4) * On entry, the seed of the random number generator; the array * elements must be between 0 and 4095, and ISEED(4) must be * odd. * On exit, the seed is updated. * * Further Details * =============== * * This routine calls the auxiliary routine SLARAN to generate a random * real number from a uniform (0,1) distribution. The Box-Muller method * is used to transform numbers from a uniform to a normal distribution. * * ===================================================================== * * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) REAL TWOPI PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) * .. * .. Local Scalars .. REAL T1, T2 * .. * .. External Functions .. REAL SLARAN EXTERNAL SLARAN * .. * .. Intrinsic Functions .. INTRINSIC COS, LOG, SQRT * .. * .. Executable Statements .. * * Generate a real random number from a uniform (0,1) distribution * T1 = SLARAN( ISEED ) * IF( IDIST.EQ.1 ) THEN * * uniform (0,1) * SLARND = T1 ELSE IF( IDIST.EQ.2 ) THEN * * uniform (-1,1) * SLARND = TWO*T1 - ONE ELSE IF( IDIST.EQ.3 ) THEN * * normal (0,1) * T2 = SLARAN( ISEED ) SLARND = SQRT( -TWO*LOG( T1 ) )*COS( TWOPI*T2 ) END IF RETURN * * End of SLARND * END scalapack-1.8.0/TOOLS/pdlawrite.f0000640000175000017500000001733610604624224016372 0ustar muammarmuammar SUBROUTINE PDLAWRITE( FILNAM, M, N, A, IA, JA, DESCA, IRWRIT, $ ICWRIT, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER IA, ICWRIT, IRWRIT, JA, M, N * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAWRITE writes to a file named FILNAMa distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent to and * written by the process of coordinates (IRWWRITE, ICWRIT). * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 13 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ DGERV2D, DGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN OPEN( NOUT, FILE=FILNAM, STATUS='UNKNOWN' ) WRITE( NOUT, FMT = * ) M, N END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CLOSE( NOUT ) END IF * 9999 FORMAT( D30.18 ) * RETURN * * End of PDLAWRITE * END scalapack-1.8.0/TOOLS/pslaprnt.f0000640000175000017500000002757110363532303016242 0ustar muammarmuammar SUBROUTINE PSLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAPRNT prints to the standard output a distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and * printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory to a * local array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IRPRNT (global input) INTEGER * The row index of the printing process. * * ICPRNT (global input) INTEGER * The column index of the printing process. * * CMATNM (global input) CHARACTER*(*) * Identifier of the distributed matrix to be printed. * * NOUT (global input) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. * * WORK (local workspace) REAL * Working array of minimum size equal to MB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL SGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL SGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT(A,'(',I6,',',I6,')=',E16.8) * RETURN * * End of PSLAPRNT * END scalapack-1.8.0/TOOLS/pzfillpad.f0000640000175000017500000000557010363532303016357 0ustar muammarmuammar SUBROUTINE PZFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX*16 CHKVAL * .. * .. Array Arguments .. COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZFILLPAD surrounds a two dimensional local array with a guard- * zone initialized to the value CHKVAL. The user may later call the * routine PZCHEKPAD to discover if the guardzone has been * violated. There are three guardzones. The first is a buffer of size * IPRE that is before the start of the array. The second is the buffer * of size IPOST which is after the end of the array to be padded. * Finally, there is a guard zone inside every column of the array to * be padded, in the elements of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (local input) INTEGER * The number of rows in the local array. * * N (local input) INTEGER * The number of columns in the local array. * * A (local input/local output) COMPLEX*16, array of * dimension (LDA,N). A location IPRE elements in front of * the matrix to be padded. * * LDA (local input) INTEGER * The leading Dimension of the local array to be padded. * * IPRE (local input) INTEGER * The size of the guard zone to put before the start of * padded array. * * IPOST (local input) INTEGER * The size of the guard zone to put after padded array. * * CHKVAL (local input) COMPLEX*16 * The value to pad matrix with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PZFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no post-guardzone in PZFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PZFILLPAD * END scalapack-1.8.0/TOOLS/pcelget.f0000640000175000017500000001630110363532303016007 0ustar muammarmuammar SUBROUTINE PCELGET( SCOPE, TOP, ALPHA, A, IA, JA, DESCA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 SCOPE, TOP INTEGER IA, JA COMPLEX ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCELGET sets alpha to the distributed matrix entry A( IA, JA ). * The value of alpha is set according to the scope. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCOPE (global input) CHARACTER*1 * The BLACS scope in which alpha is updated. * If SCOPE = 'R', alpha is updated only in the process row * containing A( IA, JA ), * If SCOPE = 'C', alpha is updated only in the process column * containing A( IA, JA ), * If SCOPE = 'A', alpha is updated in all the processes of the * grid, * otherwise alpha is updated only in the process containing * A( IA, JA ). * * TOP (global input) CHARACTER*1 * The topology to be used if broadcast is needed. * * ALPHA (global output) COMPLEX, the scalar alpha. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, IIA, IOFFA, JJA, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, INFOG2L * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * ALPHA = ZERO * IF( LSAME( SCOPE, 'R' ) ) THEN IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL CGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL CGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'C' ) ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL CGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL CGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'A' ) ) THEN IF( ( MYROW.EQ.IAROW ).AND.( MYCOL.EQ.IACOL ) ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL CGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL CGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF ELSE IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ ALPHA = A( IIA+(JJA-1)*DESCA( LLD_ ) ) END IF * RETURN * * End of PCELGET * END scalapack-1.8.0/TOOLS/ilcm.f0000640000175000017500000000234410363532303015312 0ustar muammarmuammar INTEGER FUNCTION ILCM( M, N ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER M, N * .. * * Purpose * ======= * * ILCM computes and returns the Least Common Multiple (LCM) of two * positive integers M and N. In fact the routine computes the greatest * common divisor (GCD) and use the fact that M*N = GCD*LCM. * * Arguments * ========= * * M (input) INTEGER * On entry, M >=0. Unchanged on exit. * * N (input) INTEGER * On entry, N >=0. Unchanged on exit. * * ===================================================================== * * .. Local Scalars .. INTEGER IA, IQ, IR * .. * .. Executable Statements .. * IF( M.GE.N ) THEN IA = M ILCM = N ELSE IA = N ILCM = M ENDIF * 10 CONTINUE IQ = IA / ILCM IR = IA - IQ * ILCM IF( IR.EQ.0 ) THEN ILCM = ( M * N ) / ILCM RETURN END IF IA = ILCM ILCM = IR GO TO 10 * * End of ILCM * END scalapack-1.8.0/TOOLS/pitreecomb.f0000640000175000017500000001324010363532303016514 0ustar muammarmuammar SUBROUTINE PITREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, $ SUBPTR ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SCOPE INTEGER CDEST0, ICTXT, N, RDEST0 * .. * .. Array Arguments .. INTEGER MINE( * ) * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PITREECOMB does a 1-tree parallel combine operation on scalars, * using the subroutine indicated by SUBPTR to perform the required * computation. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * SCOPE (global input) CHARACTER * The scope of the operation: 'Rowwise', 'Columnwise', or * 'All'. * * N (global input) INTEGER * The number of elements in MINE. N = 1 for the norm-2 * computation and 2 for the sum of square. * * MINE (local input/global output) @(typec) array of * dimension at least equal to N. The local data to use in the * combine. * * RDEST0 (global input) INTEGER * The process row to receive the answer. If RDEST0 = -1, * every process in the scope gets the answer. * * CDEST0 (global input) INTEGER * The process column to receive the answer. If CDEST0 = -1, * every process in the scope gets the answer. * * SUBPTR (local input) Pointer to the subroutine to call to perform * the required combine. * * ===================================================================== * * .. Local Scalars .. LOGICAL BCAST, RSCOPE, CSCOPE INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, $ RMSSG, TCDEST, TRDEST * .. * .. Local Arrays .. INTEGER HIS( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, $ IGERV2D, IGESD2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * See if everyone wants the answer (need to broadcast the answer) * BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) IF( BCAST ) THEN TRDEST = 0 TCDEST = 0 ELSE TRDEST = RDEST0 TCDEST = CDEST0 END IF * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure scope-dependant variables, or report illegal scope * RSCOPE = LSAME( SCOPE, 'R' ) CSCOPE = LSAME( SCOPE, 'C' ) * IF( RSCOPE ) THEN IF( BCAST ) THEN TRDEST = MYROW ELSE IF( MYROW.NE.TRDEST ) THEN RETURN END IF NP = NPCOL MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) ELSE IF( CSCOPE ) THEN IF( BCAST ) THEN TCDEST = MYCOL ELSE IF( MYCOL.NE.TCDEST ) THEN RETURN END IF NP = NPROW MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) ELSE IF( LSAME( SCOPE, 'A' ) ) THEN NP = NPROW * NPCOL IAM = MYROW*NPCOL + MYCOL DEST = TRDEST*NPCOL + TCDEST MYDIST = MOD( NP + IAM - DEST, NP ) ELSE RETURN END IF * IF( NP.LT.2 ) $ RETURN * MYDIST2 = MYDIST RMSSG = MYROW CMSSG = MYCOL I = 1 * 10 CONTINUE * IF( MOD( MYDIST, 2 ).NE.0 ) THEN * * If I am process that sends information * DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) * * Figure coordinates of dest of message * IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) END IF * CALL IGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) * GO TO 20 * ELSE * * If I am a process receiving information, figure coordinates * of source of message * DIST = MYDIST2 + I IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) HISDIST = MOD( NP + CMSSG - TCDEST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) HISDIST = MOD( NP + RMSSG - TRDEST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) END IF * IF( MYDIST2.LT.HISDIST ) THEN * * If I have anyone sending to me * CALL IGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) CALL SUBPTR( MINE, HIS ) * END IF MYDIST = MYDIST / 2 * END IF I = I * 2 * IF( I.LT.NP ) $ GO TO 10 * 20 CONTINUE * IF( BCAST ) THEN IF( MYDIST2.EQ.0 ) THEN CALL IGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) ELSE CALL IGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, $ TRDEST, TCDEST ) END IF END IF * RETURN * * End of PITREECOMB * END scalapack-1.8.0/TOOLS/ccdotu.f0000640000175000017500000000156110363532303015647 0ustar muammarmuammar SUBROUTINE CCDOTU( N, DOTU, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX DOTU * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * CCDOTU is a simple FORTRAN wrapper around the BLAS function * CDOTU returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. COMPLEX CDOTU EXTERNAL CDOTU * .. * .. Executable Statements .. * DOTU = CDOTU( N, X, INCX, Y, INCY ) * RETURN * * End of CCDOTU * END scalapack-1.8.0/TOOLS/pchkxmat.f0000640000175000017500000003721710363532303016214 0ustar muammarmuammar SUBROUTINE PCHK1MAT( MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, $ DESCAPOS0, NEXTRA, EX, EXPOS, INFO ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER DESCAPOS0, IA, INFO, JA, MA, MAPOS0, NA, $ NAPOS0, NEXTRA * .. * .. Array Arguments .. INTEGER DESCA( * ), EX( NEXTRA ), EXPOS( NEXTRA ) * .. * * Purpose * ======= * * PCHK1MAT checks that the values associated with one distributed * matrix are consistant across the entire process grid. * * Notes * ===== * * This routine checks that all values are the same across the grid. * It does no local checking; it is therefore legal to abuse the * definitions of the non-descriptor arguments, i.e., if the routine * you are checking does not possess a MA value, you may pass some * other integer that must be global into this argument instead. * * Arguments * ========= * * MA (global input) INTEGER * The global number of matrix rows of A being operated on. * * MAPOS0 (global input) INTEGER * Where in the calling routine's parameter list MA appears. * * NA (global input) INTEGER * The global number of matrix columns of A being operated on. * * NAPOS0 (global input) INTEGER * Where in the calling routine's parameter list NA appears. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * DESCAPOS0 (global input) INTEGER * Where in the calling routine's parameter list DESCA * appears. Note that we assume IA and JA are respectively 2 * and 1 entries behind DESCA. * * NEXTRA (global input) INTEGER * The number of extra parameters (i.e., besides the ones * above) to check. NEXTRA <= LDW - 11. * * EX (local input) INTEGER array of dimension (NEXTRA) * The values of these extra parameters * * EXPOS (local input) INTEGER array of dimension (NEXTRA) * The parameter list positions of these extra values. * * INFO (local input/global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER BIGNUM, DESCMULT, LDW PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT, $ LDW = 25 ) * .. * .. Local Scalars .. INTEGER DESCPOS, K * .. * .. Local Arrays .. INTEGER IWORK( LDW, 2 ), IWORK2( LDW ) * .. * .. External Subroutines .. EXTERNAL GLOBCHK * .. * .. Executable Statements .. * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Pack values and their positions in the parameter list, factoring * in the descriptor multiplier * IWORK( 1, 1 ) = MA IWORK( 1, 2 ) = MAPOS0 * DESCMULT IWORK( 2, 1 ) = NA IWORK( 2, 2 ) = NAPOS0 * DESCMULT IWORK( 3, 1 ) = IA IWORK( 3, 2 ) = (DESCAPOS0-2) * DESCMULT IWORK( 4, 1 ) = JA IWORK( 4, 2 ) = (DESCAPOS0-1) * DESCMULT DESCPOS = DESCAPOS0 * DESCMULT * IWORK( 5, 1 ) = DESCA( DTYPE_ ) IWORK( 5, 2 ) = DESCPOS + DTYPE_ IWORK( 6, 1 ) = DESCA( M_ ) IWORK( 6, 2 ) = DESCPOS + M_ IWORK( 7, 1 ) = DESCA( N_ ) IWORK( 7, 2 ) = DESCPOS + N_ IWORK( 8, 1 ) = DESCA( MB_ ) IWORK( 8, 2 ) = DESCPOS + MB_ IWORK( 9, 1 ) = DESCA( NB_ ) IWORK( 9, 2 ) = DESCPOS + NB_ IWORK( 10, 1 ) = DESCA( RSRC_ ) IWORK( 10, 2 ) = DESCPOS + RSRC_ IWORK( 11, 1 ) = DESCA( CSRC_ ) IWORK( 11, 2 ) = DESCPOS + CSRC_ * IF( NEXTRA.GT.0 ) THEN DO 10 K = 1, NEXTRA IWORK( 11+K, 1 ) = EX( K ) IWORK( 11+K, 2 ) = EXPOS( K ) 10 CONTINUE END IF K = 11 + NEXTRA * * Get the smallest error detected anywhere (BIGNUM if no error) * CALL GLOBCHK( DESCA( CTXT_ ), K, IWORK, LDW, IWORK2, INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT if * error is not in a descriptor entry * IF( INFO .EQ. BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * RETURN * * End of PCHK1MAT * END * SUBROUTINE PCHK2MAT( MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, $ DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, $ DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER DESCAPOS0, DESCBPOS0, IA, IB, INFO, JA, JB, MA, $ MAPOS0, MB, MBPOS0, NA, NAPOS0, NB, NBPOS0, $ NEXTRA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( 8 ), EX( NEXTRA ), $ EXPOS( NEXTRA ) * .. * * Purpose * ======= * * PCHK2MAT checks that the values associated with two distributed * matrices are consistant across the entire process grid. * * Notes * ===== * * This routine checks that all values are the same across the grid. * It does no local checking; it is therefore legal to abuse the * definitions of the non-descriptor arguments, i.e., if the routine * you are checking does not possess a MA value, you may pass some * other integer that must be global into this argument instead. * * Arguments * ========= * * MA (global input) INTEGER * The global number of matrix rows of A being operated on. * * MAPOS0 (global input) INTEGER * Where in the calling routine's parameter list MA appears. * * NA (global input) INTEGER * The global number of matrix columns of A being operated on. * * NAPOS0 (global input) INTEGER * Where in the calling routine's parameter list NA appears. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * DESCAPOS0 (global input) INTEGER * Where in the calling routine's parameter list DESCA * appears. Note that we assume IA and JA are respectively 2 * and 1 entries behind DESCA. * * MB (global input) INTEGER * The global number of matrix rows of B being operated on. * * MBPOS0 (global input) INTEGER * Where in the calling routine's parameter list MB appears. * * NB (global input) INTEGER * The global number of matrix columns of B being operated on. * * NBPOS0 (global input) INTEGER * Where in the calling routine's parameter list NB appears. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * DESCBPOS0 (global input) INTEGER * Where in the calling routine's parameter list DESCB * appears. Note that we assume IB and JB are respectively 2 * and 1 entries behind DESCB. * * NEXTRA (global input) INTEGER * The number of extra parameters (i.e., besides the ones * above) to check. NEXTRA <= LDW - 22. * * EX (local input) INTEGER array of dimension (NEXTRA) * The values of these extra parameters * * EXPOS (local input) INTEGER array of dimension (NEXTRA) * The parameter list positions of these extra values. * * INFO (local input/global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DESCMULT, BIGNUM, LDW PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT, $ LDW = 35 ) * .. * .. Local Scalars .. INTEGER K, DESCPOS * .. * .. Local Arrays .. INTEGER IWORK( LDW, 2 ), IWORK2( LDW ) * .. * .. External Subroutines .. EXTERNAL GLOBCHK * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Pack values and their positions in the parameter list, factoring * in the descriptor multiplier * IWORK( 1, 1 ) = MA IWORK( 1, 2 ) = MAPOS0 * DESCMULT IWORK( 2, 1 ) = NA IWORK( 2, 2 ) = NAPOS0 * DESCMULT IWORK( 3, 1 ) = IA IWORK( 3, 2 ) = (DESCAPOS0-2) * DESCMULT IWORK( 4, 1 ) = JA IWORK( 4, 2 ) = (DESCAPOS0-1) * DESCMULT DESCPOS = DESCAPOS0 * DESCMULT * IWORK( 5, 1 ) = DESCA( DTYPE_ ) IWORK( 5, 2 ) = DESCPOS + DTYPE_ IWORK( 6, 1 ) = DESCA( M_ ) IWORK( 6, 2 ) = DESCPOS + M_ IWORK( 7, 1 ) = DESCA( N_ ) IWORK( 7, 2 ) = DESCPOS + N_ IWORK( 8, 1 ) = DESCA( MB_ ) IWORK( 8, 2 ) = DESCPOS + MB_ IWORK( 9, 1 ) = DESCA( NB_ ) IWORK( 9, 2 ) = DESCPOS + NB_ IWORK( 10, 1 ) = DESCA( RSRC_ ) IWORK( 10, 2 ) = DESCPOS + RSRC_ IWORK( 11, 1 ) = DESCA( CSRC_ ) IWORK( 11, 2 ) = DESCPOS + CSRC_ * IWORK( 12, 1 ) = MB IWORK( 12, 2 ) = MBPOS0 * DESCMULT IWORK( 13, 1 ) = NB IWORK( 13, 2 ) = NBPOS0 * DESCMULT IWORK( 14, 1 ) = IB IWORK( 14, 2 ) = (DESCBPOS0-2) * DESCMULT IWORK( 15, 1 ) = JB IWORK( 15, 2 ) = (DESCBPOS0-1) * DESCMULT DESCPOS = DESCBPOS0 * DESCMULT * IWORK( 16, 1 ) = DESCB( DTYPE_ ) IWORK( 16, 2 ) = DESCPOS + DTYPE_ IWORK( 17, 1 ) = DESCB( M_ ) IWORK( 17, 2 ) = DESCPOS + M_ IWORK( 18, 1 ) = DESCB( N_ ) IWORK( 18, 2 ) = DESCPOS + N_ IWORK( 19, 1 ) = DESCB( MB_ ) IWORK( 19, 2 ) = DESCPOS + MB_ IWORK( 20, 1 ) = DESCB( NB_ ) IWORK( 20, 2 ) = DESCPOS + NB_ IWORK( 21, 1 ) = DESCB( RSRC_ ) IWORK( 21, 2 ) = DESCPOS + RSRC_ IWORK( 22, 1 ) = DESCB( CSRC_ ) IWORK( 22, 2 ) = DESCPOS + CSRC_ * IF( NEXTRA.GT.0 ) THEN DO 10 K = 1, NEXTRA IWORK( 22+K, 1 ) = EX( K ) IWORK( 22+K, 2 ) = EXPOS( K ) 10 CONTINUE END IF K = 22 + NEXTRA * * Get the smallest error detected anywhere (BIGNUM if no error) * CALL GLOBCHK( DESCA( CTXT_ ), K, IWORK, LDW, IWORK2, INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * RETURN * * End of PCHK2MAT * END * SUBROUTINE GLOBCHK( ICTXT, N, X, LDX, IWORK, INFO ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, INFO, LDX, N * .. * .. Array Arguments .. INTEGER IWORK( N ), X( LDX, 2 ) * .. * * Purpose * ======= * * GLOBCHK checks that values in X(i,1) are the same on all processes * in the process grid indicated by ICTXT. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle indicating the context over which * the values are to be the same. * * N (global input) INTEGER * The number of values to be compared. * * X (local input) INTEGER array, dimension (N,2) * The 1st column contains the values which should be the same * on all processes. The 2nd column indicates where in the * calling routine's parameter list the corresponding value * from column 1 came from. * * LDX (local input) INTEGER * The leading dimension of the array X. LDX >= MAX(1,N). * * IWORK (local workspace) INTEGER array, dimension (N) * Used to receive other processes' values for comparing with X. * * INFO (local input/global output) INTEGER * On entry, the smallest error flag so far generated, or BIGNUM * for no error. On exit: * = BIGNUM : no error * < 0: if INFO = -i*100, the i-th argument had an illegal * value, or was different between processes. * * ===================================================================== * * .. Local Scalars .. INTEGER K, MYROW, MYCOL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMN2D, IGEBR2D, IGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, IWORK, K, MYROW, MYCOL ) * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'All', ' ', N, 1, X, N ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', N, 1, IWORK, N, 0, 0 ) DO 10 K = 1, N IF( X( K, 1 ).NE.IWORK( K ) ) $ INFO = MIN( INFO, X( K, 2 ) ) 10 CONTINUE END IF * CALL IGAMN2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, K, K, -1, -1, 0 ) * RETURN * * End GLOBCHK * END scalapack-1.8.0/TOOLS/pclawrite.f0000640000175000017500000002040010604624224016353 0ustar muammarmuammar SUBROUTINE PCLAWRITE( FILNAM, M, N, A, IA, JA, DESCA, IRWRIT, $ ICWRIT, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER IA, ICWRIT, IRWRIT, JA, M, N * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLAWRITE writes to a file named FILNAMa distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent to and * written by the process of coordinates (IRWWRITE, ICWRIT). * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 13 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ CGERV2D, CGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN OPEN( NOUT, FILE=FILNAM, STATUS='UNKNOWN' ) WRITE( NOUT, FMT = * ) M, N END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) REAL(WORK( K )), $ AIMAG(WORK( K )) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ REAL (A( II+K+(JJ+H-1)*LDA )), $ AIMAG (A( II+K+(JJ+H-1)*LDA )) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) REAL (WORK( K )), $ AIMAG (WORK( K )) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ REAL (A( II+K+(JJ+H-1)*LDA )), $ AIMAG (A( II+K+(JJ+H-1)*LDA )) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) REAL (WORK( K )), $ AIMAG (WORK( K)) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ REAL (A( II+K+(JJ+H-1)*LDA )), $ AIMAG (A( II+K+(JJ+H-1)*LDA )) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) REAL (WORK( K )), $ AIMAG (WORK( K )) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CLOSE( NOUT ) END IF * 9999 FORMAT( D30.18,D30.18 ) * RETURN * * End of PCLAWRITE * END scalapack-1.8.0/TOOLS/pichekpad.f0000640000175000017500000001210510363532303016312 0ustar muammarmuammar SUBROUTINE PICHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N INTEGER CHKVAL * .. * .. Array Arguments .. CHARACTER MESS*(*) INTEGER A( * ) * .. * * Purpose * ======= * * PICHEKPAD checks that the padding around a local array has not * been overwritten since the call to PIFILLPAD. 3 types of errors * are reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells you that the 5th element of the IPRE long buffer has been * overwritten with the value 18, where it should still have the value * of CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells you that the 19th element after the end of A was overwritten * with the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells you that the element at the 12th row and 3rd column of A was * overwritten with the value of 22, where it should still have the * value of CHKVAL. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * MESS (local input) CHARACTER*(*) * String containing a user-defined message. * * M (local input) INTEGER * The number of rows in the local array A. * * N (input) INTEGER * The number of columns in the local array A. * * A (local input) @(typec) array of dimension (LDA,N). * A location IPRE elements in front of the array to be checked. * * LDA (local input) INTEGER * The leading Dimension of the local array to be checked. * * IPRE (local input) INTEGER * The size of the guard zone before the start of padded array. * * IPOST (local input) INTEGER * The size of guard zone after the padded array. * * CHKVAL (local input) @(typec) * The value the local array was padded with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ A( I ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PICHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, A( I ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PICHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, A( I ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', I8 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', I8 ) * RETURN * * End of PICHEKPAD * END scalapack-1.8.0/TOOLS/pdelget.f0000640000175000017500000001630710363532303016016 0ustar muammarmuammar SUBROUTINE PDELGET( SCOPE, TOP, ALPHA, A, IA, JA, DESCA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 SCOPE, TOP INTEGER IA, JA DOUBLE PRECISION ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDELGET sets alpha to the distributed matrix entry A( IA, JA ). * The value of alpha is set according to the scope. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCOPE (global input) CHARACTER*1 * The BLACS scope in which alpha is updated. * If SCOPE = 'R', alpha is updated only in the process row * containing A( IA, JA ), * If SCOPE = 'C', alpha is updated only in the process column * containing A( IA, JA ), * If SCOPE = 'A', alpha is updated in all the processes of the * grid, * otherwise alpha is updated only in the process containing * A( IA, JA ). * * TOP (global input) CHARACTER*1 * The topology to be used if broadcast is needed. * * ALPHA (global output) DOUBLE PRECISION, the scalar alpha. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, IIA, IOFFA, JJA, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, INFOG2L * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * ALPHA = ZERO * IF( LSAME( SCOPE, 'R' ) ) THEN IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL DGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL DGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'C' ) ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL DGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL DGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'A' ) ) THEN IF( ( MYROW.EQ.IAROW ).AND.( MYCOL.EQ.IACOL ) ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL DGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL DGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF ELSE IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ ALPHA = A( IIA+(JJA-1)*DESCA( LLD_ ) ) END IF * RETURN * * End of PDELGET * END scalapack-1.8.0/TOOLS/pcchekpad.f0000640000175000017500000001252710363532303016314 0ustar muammarmuammar SUBROUTINE PCCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX CHKVAL * .. * .. Array Arguments .. CHARACTER MESS*(*) COMPLEX A( * ) * .. * * Purpose * ======= * * PCCHEKPAD checks that the padding around a local array has not * been overwritten since the call to PCFILLPAD. 3 types of errors * are reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells you that the 5th element of the IPRE long buffer has been * overwritten with the value 18, where it should still have the value * of CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells you that the 19th element after the end of A was overwritten * with the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells you that the element at the 12th row and 3rd column of A was * overwritten with the value of 22, where it should still have the * value of CHKVAL. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * MESS (local input) CHARACTER*(*) * String containing a user-defined message. * * M (local input) INTEGER * The number of rows in the local array A. * * N (input) INTEGER * The number of columns in the local array A. * * A (local input) COMPLEX array of dimension (LDA,N). * A location IPRE elements in front of the array to be checked. * * LDA (local input) INTEGER * The leading Dimension of the local array to be checked. * * IPRE (local input) INTEGER * The size of the guard zone before the start of padded array. * * IPOST (local input) INTEGER * The size of guard zone after the padded array. * * CHKVAL (local input) COMPLEX * The value the local array was padded with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC REAL, AIMAG * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ REAL( A( I ) ), AIMAG( A( I ) ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PCCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, REAL( A( I ) ), $ AIMAG( A( I ) ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PCCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, REAL( A( I ) ), $ AIMAG( A( I ) ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G11.4, '+ i*', $ G11.4 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G11.4, $ '+ i*', G11.4 ) * RETURN * * End of PCCHEKPAD * END scalapack-1.8.0/TOOLS/chk1mat.f0000640000175000017500000001402710363532303015717 0ustar muammarmuammar SUBROUTINE CHK1MAT( MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, $ DESCAPOS0, INFO ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER DESCAPOS0, IA, INFO, JA, MA, MAPOS0, NA, NAPOS0 * .. * .. Array Arguments .. INTEGER DESCA( * ) * .. * * Purpose * ======= * * CHK1MAT checks that the values associated with one distributed matrix * make sense from a local viewpoint * * Arguments * ========= * * MA (global input) INTEGER * The number or matrix rows of A being operated on. * * MAPOS0 (global input) INTEGER * Where in the calling routine's parameter list MA appears. * * NA (global input) INTEGER * The number of matrix columns of A being operated on. * * NAPOS0 (global input) INTEGER * Where in the calling routine's parameter list NA appears. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * DESCAPOS0 (global input) INTEGER * Where in the calling routine's parameter list DESCA * appears. Note that we assume IA and JA are respectively 2 * and 1 entries behind DESCA. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) * .. * .. Local Scalars .. INTEGER DESCAPOS, IAPOS, JAPOS, MAPOS, NAPOS, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MAX * .. * .. Executable Statements .. * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the des- * criptor multiplier * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Figure where in parameter list each parameter was, factoring in * descriptor multiplier * MAPOS = MAPOS0 * DESCMULT NAPOS = NAPOS0 * DESCMULT IAPOS = (DESCAPOS0-2) * DESCMULT JAPOS = (DESCAPOS0-1) * DESCMULT DESCAPOS = DESCAPOS0 * DESCMULT * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Check that matrix values make sense from local viewpoint * IF( DESCA( DTYPE_ ) .NE. BLOCK_CYCLIC_2D ) THEN INFO = MIN( INFO, DESCAPOS+DTYPE_ ) ELSE IF( MA.LT.0 ) THEN INFO = MIN( INFO, MAPOS ) ELSE IF( NA.LT.0 ) THEN INFO = MIN( INFO, NAPOS ) ELSE IF( IA.LT.1 ) THEN INFO = MIN( INFO, IAPOS ) ELSE IF( JA.LT.1 ) THEN INFO = MIN( INFO, JAPOS ) ELSE IF( DESCA( MB_ ).LT.1 ) THEN INFO = MIN( INFO, DESCAPOS+MB_ ) ELSE IF( DESCA( NB_ ).LT.1 ) THEN INFO = MIN( INFO, DESCAPOS+NB_ ) ELSE IF( DESCA( RSRC_ ).LT.0 .OR. DESCA( RSRC_ ).GE.NPROW ) THEN INFO = MIN( INFO, DESCAPOS+RSRC_ ) ELSE IF( DESCA( CSRC_ ).LT.0 .OR. DESCA( CSRC_ ).GE.NPCOL ) THEN INFO = MIN( INFO, DESCAPOS+CSRC_ ) ELSE IF( DESCA( LLD_ ).LT.1 ) THEN INFO = MIN( INFO, DESCAPOS+LLD_ ) ELSE IF( DESCA( LLD_ ) .LT. $ NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, DESCA(RSRC_), $ NPROW ) ) THEN IF( NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) .GT. 0 ) $ INFO = MIN( INFO, DESCAPOS+LLD_ ) END IF * IF( MA.EQ.0 .OR. NA.EQ.0 ) THEN * * NULL matrix, relax some checks * IF( DESCA(M_).LT.0 ) $ INFO = MIN( INFO, DESCAPOS+M_ ) IF( DESCA(N_).LT.0 ) $ INFO = MIN( INFO, DESCAPOS+N_ ) * ELSE * * more rigorous checks for non-degenerate matrices * IF( DESCA( M_ ).LT.1 ) THEN INFO = MIN( INFO, DESCAPOS+M_ ) ELSE IF( DESCA( N_ ).LT.1 ) THEN INFO = MIN( INFO, DESCAPOS+N_ ) ELSE IF( IA.GT.DESCA( M_ ) ) THEN INFO = MIN( INFO, IAPOS ) ELSE IF( JA.GT.DESCA( N_ ) ) THEN INFO = MIN( INFO, JAPOS ) ELSE IF( IA+MA-1.GT.DESCA( M_ ) ) $ INFO = MIN( INFO, MAPOS ) IF( JA+NA-1.GT.DESCA( N_ ) ) $ INFO = MIN( INFO, NAPOS ) END IF END IF * END IF * * Prepare output: set info = 0 if no error, and divide by * DESCMULT if error is not in a descriptor entry * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * RETURN * * End CHK1MAT * END scalapack-1.8.0/TOOLS/pzlawrite.f0000640000175000017500000002037610604624224016416 0ustar muammarmuammar SUBROUTINE PZLAWRITE( FILNAM, M, N, A, IA, JA, DESCA, IRWRIT, $ ICWRIT, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER IA, ICWRIT, IRWRIT, JA, M, N * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLAWRITE writes to a file named FILNAMa distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent to and * written by the process of coordinates (IRWWRITE, ICWRIT). * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 13 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ ZGERV2D, ZGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN OPEN( NOUT, FILE=FILNAM, STATUS='UNKNOWN' ) WRITE( NOUT, FMT = * ) M, N END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) DBLE(WORK( K )), $ DIMAG(WORK( K )) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ DBLE (A( II+K+(JJ+H-1)*LDA )), $ DIMAG (A( II+K+(JJ+H-1)*LDA )) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) DBLE (WORK( K )), $ DIMAG (WORK( K )) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ DBLE (A( II+K+(JJ+H-1)*LDA )), $ DIMAG (A( II+K+(JJ+H-1)*LDA )) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) DBLE (WORK( K )), $ DIMAG (WORK( K)) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ DBLE (A( II+K+(JJ+H-1)*LDA )), $ DIMAG (A( II+K+(JJ+H-1)*LDA )) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRWRIT, ICWRIT ) ELSE IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CALL ZGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) DBLE (WORK( K )), $ DIMAG (WORK( K )) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * IF( MYROW.EQ.IRWRIT .AND. MYCOL.EQ.ICWRIT ) THEN CLOSE( NOUT ) END IF * 9999 FORMAT( E15.9,E15.9 ) * RETURN * * End of PZLAWRITE * END scalapack-1.8.0/TOOLS/pilaprnt.f0000640000175000017500000002757610363532303016235 0ustar muammarmuammar SUBROUTINE PILAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) INTEGER A( * ), WORK( * ) * .. * * Purpose * ======= * * PILAPRNT prints to the standard output a distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and * printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) @(typec) pointer into the local memory to a * local array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IRPRNT (global input) INTEGER * The row index of the printing process. * * ICPRNT (global input) INTEGER * The column index of the printing process. * * CMATNM (global input) CHARACTER*(*) * Identifier of the distributed matrix to be printed. * * NOUT (global input) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. * * WORK (local workspace) @(typec) * Working array of minimum size equal to MB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ IGERV2D, IGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL IGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL IGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL IGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL IGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL IGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL IGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL IGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL IGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT(A,'(',I6,',',I6,')=',I8) * RETURN * * End of PILAPRNT * END scalapack-1.8.0/TOOLS/pselset.f0000640000175000017500000001176510363532303016054 0ustar muammarmuammar SUBROUTINE PSELSET( A, IA, JA, DESCA, ALPHA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA REAL ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSELSET sets the distributed matrix entry A( IA, JA ) to ALPHA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (local output) REAL pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ALPHA (local input) REAL * The scalar alpha. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, JJA, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ A( IIA+(JJA-1)*DESCA( LLD_ ) ) = ALPHA * RETURN * * End of PSELSET * END scalapack-1.8.0/TOOLS/pcelset2.f0000640000175000017500000001245310363532303016111 0ustar muammarmuammar SUBROUTINE PCELSET2( ALPHA, A, IA, JA, DESCA, BETA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA COMPLEX ALPHA, BETA * .. * .. Array arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCELSET2 sets alpha to the distributed matrix entry A(IA,JA) * and A(IA,JA) to beta. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * ALPHA (local output) COMPLEX * The scalar alpha. * * A (local input/local ouput) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,*) containing * the local pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (local input) COMPLEX * The scalar beta. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, IOFFA, JJA, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) ALPHA = A( IOFFA ) A( IOFFA ) = BETA ELSE ALPHA = ZERO END IF * RETURN * * End of PCELSET2 * END scalapack-1.8.0/TOOLS/pzmatadd.f0000640000175000017500000002705410363532303016177 0ustar muammarmuammar SUBROUTINE PZMATADD( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, $ JC, DESCC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, JA, JC, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ) * .. * * Purpose * ======= * * PZMATADD performs a distributed matrix-matrix addition * * sub( C ) := alpha * sub( A ) + beta * sub( C ), * * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this * routine, the arrays are supposed to be aligned. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and * sub( C ). N >= 0. * * ALPHA (global input) COMPLEX*16 * The scalar ALPHA. * * A (local input) COMPLEX*16 pointer into the local memory * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This * array contains the local pieces of the distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (global input) COMPLEX*16 * The scalar BETA. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * This array contains the local pieces of the distributed * matrix sub( C ). On exit, this array contains the local * pieces of the resulting distributed matrix. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA, $ IIC, IOFFA, IOFFC, IROFF, J, JJA, JJC, LDA, $ LDC, MP, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA(CTXT_), NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR. $ ((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) * IROFF = MOD( IA-1, DESCA(MB_) ) ICOFF = MOD( JA-1, DESCA(NB_) ) MP = NUMROC( M+IROFF, DESCA(MB_), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA(NB_), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF LDA = DESCA(LLD_) LDC = DESCC(LLD_) * IF( NQ.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC + (JJC-1)*LDC DO 10 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 10 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 20 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 30 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 30 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 40 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 50 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 50 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 60 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC+(JJC-1)*LDC DO 80 J = 1, NQ DO 70 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 70 CONTINUE IOFFC = IOFFC + LDC 80 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 100 J = 1, NQ DO 90 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 90 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 120 J = 1, NQ DO 110 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 110 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 120 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 140 J = 1, NQ DO 130 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 130 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 160 J = 1, NQ DO 150 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 150 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 160 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 180 J = 1, NQ DO 170 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 170 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 180 CONTINUE END IF END IF END IF * RETURN * * End of PZMATADD * END scalapack-1.8.0/TOOLS/pcmatadd.f0000640000175000017500000002704010363532303016143 0ustar muammarmuammar SUBROUTINE PCMATADD( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, $ JC, DESCC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, JA, JC, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ) * .. * * Purpose * ======= * * PCMATADD performs a distributed matrix-matrix addition * * sub( C ) := alpha * sub( A ) + beta * sub( C ), * * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this * routine, the arrays are supposed to be aligned. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and * sub( C ). N >= 0. * * ALPHA (global input) COMPLEX * The scalar ALPHA. * * A (local input) COMPLEX pointer into the local memory * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This * array contains the local pieces of the distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (global input) COMPLEX * The scalar BETA. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * This array contains the local pieces of the distributed * matrix sub( C ). On exit, this array contains the local * pieces of the resulting distributed matrix. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA, $ IIC, IOFFA, IOFFC, IROFF, J, JJA, JJC, LDA, $ LDC, MP, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA(CTXT_), NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR. $ ((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) * IROFF = MOD( IA-1, DESCA(MB_) ) ICOFF = MOD( JA-1, DESCA(NB_) ) MP = NUMROC( M+IROFF, DESCA(MB_), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA(NB_), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF LDA = DESCA(LLD_) LDC = DESCC(LLD_) * IF( NQ.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC + (JJC-1)*LDC DO 10 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 10 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 20 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 30 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 30 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 40 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 50 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 50 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 60 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC+(JJC-1)*LDC DO 80 J = 1, NQ DO 70 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 70 CONTINUE IOFFC = IOFFC + LDC 80 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 100 J = 1, NQ DO 90 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 90 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 120 J = 1, NQ DO 110 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 110 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 120 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 140 J = 1, NQ DO 130 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 130 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 160 J = 1, NQ DO 150 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 150 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 160 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 180 J = 1, NQ DO 170 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 170 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 180 CONTINUE END IF END IF END IF * RETURN * * End of PCMATADD * END scalapack-1.8.0/TOOLS/zzdotc.f0000640000175000017500000000156110363532303015703 0ustar muammarmuammar SUBROUTINE ZZDOTC( N, DOTC, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX*16 DOTC * .. * .. Array Arguments .. COMPLEX*16 X( * ), Y( * ) * .. * * Purpose * ======= * * ZZDOTC is a simple FORTRAN wrapper around the BLAS function * ZDOTC returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. COMPLEX*16 ZDOTC EXTERNAL ZDOTC * .. * .. Executable Statements .. * DOTC = ZDOTC( N, X, INCX, Y, INCY ) * RETURN * * End of ZZDOTC * END scalapack-1.8.0/TOOLS/pdelset.f0000640000175000017500000001201510363532303016022 0ustar muammarmuammar SUBROUTINE PDELSET( A, IA, JA, DESCA, ALPHA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA DOUBLE PRECISION ALPHA * .. * .. Array arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDELSET sets the distributed matrix entry A( IA, JA ) to ALPHA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ALPHA (local input) DOUBLE PRECISION * The scalar alpha. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, JJA, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ A( IIA+(JJA-1)*DESCA( LLD_ ) ) = ALPHA * RETURN * * End of PDELSET * END scalapack-1.8.0/TOOLS/dsasum.f0000640000175000017500000000165110363532303015662 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DSASUM( N, X, INCX ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * DSASUM is a simple FORTRAN wrapper around the BLAS function SASUM * returning the result as a double allowing it to be callable by C * programs. * * ===================================================================== * * .. External Functions .. REAL SASUM EXTERNAL SASUM * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * DSASUM = DBLE( SASUM( N, X, INCX ) ) * RETURN * * End of DSASUM * END scalapack-1.8.0/TOOLS/pscol2row.f0000640000175000017500000002155110363532303016321 0ustar muammarmuammar SUBROUTINE PSCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. REAL VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a column of processes, and distribute those rows over a row of * processes. This routine minimizes communication by sending all * information it has that a given process in the RDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process column consisting of P processes, and I want to send all of * that vector that I own to a new vector distributed over Q processes * within a process row, that after I find the process in RDEST that * owns the row of the vector I'm currently looking at, he will want * every ( (LCM(P,Q) / P ) block of my vector (the block being of size * NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) REAL * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST. * * LDVS (local input) INTEGER * The leading dimension of VS, LDVS >= MAX( 1, MP ). * * VD (local output) REAL * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD, LDVD >= MAX( 1, MQ ). * * RSRC (global input) INTEGER * The process row the distributed block of vectors VS begins * on. * * CSRC (global input) INTEGER * The process column VS is distributed over. * * RDEST (global input) INTEGER * The process row to distribute VD over. * * CDEST (global input) INTEGER * The process column that VD begins on. * * WORK (local workspace) REAL * Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGESD2D, SGERV2D, SLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL SLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL SGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL SGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL SLACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL SLACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL SGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL SLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL SGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PSCOL2ROW * END scalapack-1.8.0/TOOLS/slatcpy.f0000640000175000017500000000434210363532303016045 0ustar muammarmuammar SUBROUTINE SLATCPY( UPLO, M, N, A, LDA, B, LDB ) * * * .. Scalar Arguments .. CHARACTER UPLO INTEGER LDA, LDB, M, N * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ) * .. * * Purpose * ======= * * SLATCPY copies all or part of a two-dimensional matrix A to another * matrix B in transpose form. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies the part of the matrix A to be copied to B. * = 'U': Upper triangular part * = 'L': Lower triangular part * Otherwise: All of the matrix A * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The m by n matrix A. If UPLO = 'U', only the upper triangle * or trapezoid is accessed; if UPLO = 'L', only the lower * triangle or trapezoid is accessed. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * B (output) DOUBLE PRECISION array, dimension (LDB,M) * On exit, B = A^T in the locations specified by UPLO. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * ===================================================================== * * .. Local Scalars .. INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * * .. * .. Executable Statements .. * IF( LSAME( UPLO, 'U' ) ) THEN DO 20 J = 1, N DO 10 I = 1, MIN( J, M ) B( J, I ) = A( I, J ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN DO 40 J = 1, N DO 30 I = J, M B( J, I ) = A( I, J ) 30 CONTINUE 40 CONTINUE ELSE DO 60 J = 1, N DO 50 I = 1, M B( J, I ) = A( I, J ) 50 CONTINUE 60 CONTINUE END IF RETURN * * End of SLATCPY * END scalapack-1.8.0/TOOLS/pdfillpad.f0000640000175000017500000000560410363532303016327 0ustar muammarmuammar SUBROUTINE PDFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N DOUBLE PRECISION CHKVAL * .. * .. Array Arguments .. DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDFILLPAD surrounds a two dimensional local array with a guard- * zone initialized to the value CHKVAL. The user may later call the * routine PDCHEKPAD to discover if the guardzone has been * violated. There are three guardzones. The first is a buffer of size * IPRE that is before the start of the array. The second is the buffer * of size IPOST which is after the end of the array to be padded. * Finally, there is a guard zone inside every column of the array to * be padded, in the elements of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (local input) INTEGER * The number of rows in the local array. * * N (local input) INTEGER * The number of columns in the local array. * * A (local input/local output) DOUBLE PRECISION, array of * dimension (LDA,N). A location IPRE elements in front of * the matrix to be padded. * * LDA (local input) INTEGER * The leading Dimension of the local array to be padded. * * IPRE (local input) INTEGER * The size of the guard zone to put before the start of * padded array. * * IPOST (local input) INTEGER * The size of the guard zone to put after padded array. * * CHKVAL (local input) DOUBLE PRECISION * The value to pad matrix with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PDFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no post-guardzone in PDFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PDFILLPAD * END scalapack-1.8.0/TOOLS/pzchekpad.f0000640000175000017500000001253510363532303016342 0ustar muammarmuammar SUBROUTINE PZCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N COMPLEX*16 CHKVAL * .. * .. Array Arguments .. CHARACTER MESS*(*) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZCHEKPAD checks that the padding around a local array has not * been overwritten since the call to PZFILLPAD. 3 types of errors * are reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells you that the 5th element of the IPRE long buffer has been * overwritten with the value 18, where it should still have the value * of CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells you that the 19th element after the end of A was overwritten * with the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells you that the element at the 12th row and 3rd column of A was * overwritten with the value of 22, where it should still have the * value of CHKVAL. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * MESS (local input) CHARACTER*(*) * String containing a user-defined message. * * M (local input) INTEGER * The number of rows in the local array A. * * N (input) INTEGER * The number of columns in the local array A. * * A (local input) COMPLEX*16 array of dimension (LDA,N). * A location IPRE elements in front of the array to be checked. * * LDA (local input) INTEGER * The leading Dimension of the local array to be checked. * * IPRE (local input) INTEGER * The size of the guard zone before the start of padded array. * * IPOST (local input) INTEGER * The size of guard zone after the padded array. * * CHKVAL (local input) COMPLEX*16 * The value the local array was padded with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DIMAG * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ DBLE( A( I ) ), DIMAG( A( I ) ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PZCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, DBLE( A( I ) ), $ DIMAG( A( I ) ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PZCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, DBLE( A( I ) ), $ DIMAG( A( I ) ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G20.7, '+ i*', $ G20.7 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G20.7, $ '+ i*', G20.7 ) * RETURN * * End of PZCHEKPAD * END scalapack-1.8.0/TOOLS/iceil.f0000640000175000017500000000142510363532303015452 0ustar muammarmuammar INTEGER FUNCTION ICEIL( INUM, IDENOM ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IDENOM, INUM * .. * * Purpose * ======= * * ICEIL returns the ceiling of the division of two integers. * * Arguments * ========= * * INUM (local input) INTEGER * The numerator, * * IDENOM (local input) INTEGER * and the denominator of the fraction to be evaluated. * * ===================================================================== * * .. Executable Statements .. * ICEIL = (INUM+IDENOM-1) / IDENOM * RETURN * * End of ICEIL * END scalapack-1.8.0/TOOLS/numroc.f0000640000175000017500000000416010363532303015667 0ustar muammarmuammar INTEGER FUNCTION NUMROC( N, NB, IPROC, ISRCPROC, NPROCS ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IPROC, ISRCPROC, N, NB, NPROCS * .. * * Purpose * ======= * * NUMROC computes the NUMber of Rows Or Columns of a distributed * matrix owned by the process indicated by IPROC. * * Arguments * ========= * * N (global input) INTEGER * The number of rows/columns in distributed matrix. * * NB (global input) INTEGER * Block size, size of the blocks the distributed matrix is * split into. * * IPROC (local input) INTEGER * The coordinate of the process whose local array row or * column is to be determined. * * ISRCPROC (global input) INTEGER * The coordinate of the process that possesses the first * row or column of the distributed matrix. * * NPROCS (global input) INTEGER * The total number processes over which the matrix is * distributed. * * ===================================================================== * * .. Local Scalars .. INTEGER EXTRABLKS, MYDIST, NBLOCKS * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Figure PROC's distance from source process * MYDIST = MOD( NPROCS+IPROC-ISRCPROC, NPROCS ) * * Figure the total number of whole NB blocks N is split up into * NBLOCKS = N / NB * * Figure the minimum number of rows/cols a process can have * NUMROC = (NBLOCKS/NPROCS) * NB * * See if there are any extra blocks * EXTRABLKS = MOD( NBLOCKS, NPROCS ) * * If I have an extra block * IF( MYDIST.LT.EXTRABLKS ) THEN NUMROC = NUMROC + NB * * If I have last block, it may be a partial block * ELSE IF( MYDIST.EQ.EXTRABLKS ) THEN NUMROC = NUMROC + MOD( N, NB ) END IF * RETURN * * End of NUMROC * END scalapack-1.8.0/TOOLS/pdmatadd.f0000640000175000017500000002702110363532303016143 0ustar muammarmuammar SUBROUTINE PDMATADD( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, $ JC, DESCC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IC, JA, JC, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ) * .. * * Purpose * ======= * * PDMATADD performs a distributed matrix-matrix addition * * sub( C ) := alpha * sub( A ) + beta * sub( C ), * * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this * routine, the arrays are supposed to be aligned. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and * sub( C ). N >= 0. * * ALPHA (global input) DOUBLE PRECISION * The scalar ALPHA. * * A (local input) DOUBLE PRECISION pointer into the local memory * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This * array contains the local pieces of the distributed matrix * sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (global input) DOUBLE PRECISION * The scalar BETA. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * This array contains the local pieces of the distributed * matrix sub( C ). On exit, this array contains the local * pieces of the resulting distributed matrix. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA, $ IIC, IOFFA, IOFFC, IROFF, J, JJA, JJC, LDA, $ LDC, MP, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA(CTXT_), NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR. $ ((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC, JJC, ICROW, ICCOL ) * IROFF = MOD( IA-1, DESCA(MB_) ) ICOFF = MOD( JA-1, DESCA(NB_) ) MP = NUMROC( M+IROFF, DESCA(MB_), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA(NB_), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP-IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFF LDA = DESCA(LLD_) LDC = DESCC(LLD_) * IF( NQ.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC + (JJC-1)*LDC DO 10 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 10 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 20 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 30 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 30 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 40 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 50 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 50 CONTINUE ELSE IOFFA = IIA + (JJA-1)*LDA IOFFC = IIC + (JJC-1)*LDC DO 60 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN IOFFC = IIC+(JJC-1)*LDC DO 80 J = 1, NQ DO 70 I = IOFFC, IOFFC+MP-1 C( I ) = ZERO 70 CONTINUE IOFFC = IOFFC + LDC 80 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 100 J = 1, NQ DO 90 I = IOFFC, IOFFC+MP-1 C( I ) = ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 90 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 120 J = 1, NQ DO 110 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 110 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 120 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 140 J = 1, NQ DO 130 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + A( IOFFA ) IOFFA = IOFFA + 1 130 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 160 J = 1, NQ DO 150 I = IOFFC, IOFFC+MP-1 C( I ) = C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 150 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 160 CONTINUE ELSE IOFFA = IIA+(JJA-1)*LDA IOFFC = IIC+(JJC-1)*LDC DO 180 J = 1, NQ DO 170 I = IOFFC, IOFFC+MP-1 C( I ) = BETA * C( I ) + ALPHA * A( IOFFA ) IOFFA = IOFFA + 1 170 CONTINUE IOFFA = IOFFA + LDA - MP IOFFC = IOFFC + LDC 180 CONTINUE END IF END IF END IF * RETURN * * End of PDMATADD * END scalapack-1.8.0/TOOLS/pdtreecomb.f0000640000175000017500000002233110363532303016510 0ustar muammarmuammar SUBROUTINE PDTREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, $ SUBPTR ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SCOPE INTEGER CDEST0, ICTXT, N, RDEST0 * .. * .. Array Arguments .. DOUBLE PRECISION MINE( * ) * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PDTREECOMB does a 1-tree parallel combine operation on scalars, * using the subroutine indicated by SUBPTR to perform the required * computation. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * SCOPE (global input) CHARACTER * The scope of the operation: 'Rowwise', 'Columnwise', or * 'All'. * * N (global input) INTEGER * The number of elements in MINE. N = 1 for the norm-2 * computation and 2 for the sum of square. * * MINE (local input/global output) DOUBLE PRECISION array of * dimension at least equal to N. The local data to use in the * combine. * * RDEST0 (global input) INTEGER * The process row to receive the answer. If RDEST0 = -1, * every process in the scope gets the answer. * * CDEST0 (global input) INTEGER * The process column to receive the answer. If CDEST0 = -1, * every process in the scope gets the answer. * * SUBPTR (local input) Pointer to the subroutine to call to perform * the required combine. * * ===================================================================== * * .. Local Scalars .. LOGICAL BCAST, RSCOPE, CSCOPE INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, $ RMSSG, TCDEST, TRDEST * .. * .. Local Arrays .. DOUBLE PRECISION HIS( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * See if everyone wants the answer (need to broadcast the answer) * BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) IF( BCAST ) THEN TRDEST = 0 TCDEST = 0 ELSE TRDEST = RDEST0 TCDEST = CDEST0 END IF * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure scope-dependant variables, or report illegal scope * RSCOPE = LSAME( SCOPE, 'R' ) CSCOPE = LSAME( SCOPE, 'C' ) * IF( RSCOPE ) THEN IF( BCAST ) THEN TRDEST = MYROW ELSE IF( MYROW.NE.TRDEST ) THEN RETURN END IF NP = NPCOL MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) ELSE IF( CSCOPE ) THEN IF( BCAST ) THEN TCDEST = MYCOL ELSE IF( MYCOL.NE.TCDEST ) THEN RETURN END IF NP = NPROW MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) ELSE IF( LSAME( SCOPE, 'A' ) ) THEN NP = NPROW * NPCOL IAM = MYROW*NPCOL + MYCOL DEST = TRDEST*NPCOL + TCDEST MYDIST = MOD( NP + IAM - DEST, NP ) ELSE RETURN END IF * IF( NP.LT.2 ) $ RETURN * MYDIST2 = MYDIST RMSSG = MYROW CMSSG = MYCOL I = 1 * 10 CONTINUE * IF( MOD( MYDIST, 2 ).NE.0 ) THEN * * If I am process that sends information * DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) * * Figure coordinates of dest of message * IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) END IF * CALL DGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) * GO TO 20 * ELSE * * If I am a process receiving information, figure coordinates * of source of message * DIST = MYDIST2 + I IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) HISDIST = MOD( NP + CMSSG - TCDEST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) HISDIST = MOD( NP + RMSSG - TRDEST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) END IF * IF( MYDIST2.LT.HISDIST ) THEN * * If I have anyone sending to me * CALL DGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) CALL SUBPTR( MINE, HIS ) * END IF MYDIST = MYDIST / 2 * END IF I = I * 2 * IF( I.LT.NP ) $ GO TO 10 * 20 CONTINUE * IF( BCAST ) THEN IF( MYDIST2.EQ.0 ) THEN CALL DGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) ELSE CALL DGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, $ TRDEST, TCDEST ) END IF END IF * RETURN * * End of PDTREECOMB * END * SUBROUTINE DCOMBAMAX( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. DOUBLE PRECISION V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * DCOMBAMAX finds the element having max. absolute value as well * as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) DOUBLE PRECISION array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) DOUBLE PRECISION array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * IF( ABS( V1( 1 ) ).LT.ABS( V2( 1 ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of DCOMBAMAX * END * SUBROUTINE DCOMBSSQ( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. DOUBLE PRECISION V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * DCOMBSSQ does a scaled sum of squares on two scalars. * * Arguments * ========= * * V1 (local input/local output) DOUBLE PRECISION array of * dimension 2. The first scaled sum. V1(1) = SCALE, * V1(2) = SUMSQ. * * V2 (local input) DOUBLE PRECISION array of dimension 2. * The second scaled sum. V2(1) = SCALE, V2(2) = SUMSQ. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Executable Statements .. * IF( V1( 1 ).GE.V2( 1 ) ) THEN IF( V1( 1 ).NE.ZERO ) $ V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) ELSE V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) V1( 1 ) = V2( 1 ) END IF * RETURN * * End of DCOMBSSQ * END * SUBROUTINE DCOMBNRM2( X, Y ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. * * Purpose * ======= * * DCOMBNRM2 combines local norm 2 results, taking care not to cause * unnecessary overflow. * * Arguments * ========= * * X (local input) DOUBLE PRECISION * Y (local input) DOUBLE PRECISION * X and Y specify the values x and y. X and Y are supposed to * be >= 0. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION W, Z * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT * .. * .. Executable Statements .. * W = MAX( X, Y ) Z = MIN( X, Y ) * IF( Z.EQ.ZERO ) THEN X = W ELSE X = W*SQRT( ONE+( Z / W )**2 ) END IF * RETURN * * End of DCOMBNRM2 * END scalapack-1.8.0/TOOLS/dscasum.f0000640000175000017500000000166110363532303016026 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DSCASUM( N, X, INCX ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * DSCASUM is a simple FORTRAN wrapper around the BLAS function SCASUM * returning the result as a double allowing it to be callable by C * programs. * * ===================================================================== * * .. External Functions .. REAL SCASUM EXTERNAL SCASUM * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * DSCASUM = DBLE( SCASUM( N, X, INCX ) ) * RETURN * * End of DSCASUM * END scalapack-1.8.0/TOOLS/pifillpad.f0000640000175000017500000000556410363532303016341 0ustar muammarmuammar SUBROUTINE PIFILLPAD( ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N INTEGER CHKVAL * .. * .. Array Arguments .. INTEGER A( * ) * .. * * Purpose * ======= * * PIFILLPAD surrounds a two dimensional local array with a guard- * zone initialized to the value CHKVAL. The user may later call the * routine PICHEKPAD to discover if the guardzone has been * violated. There are three guardzones. The first is a buffer of size * IPRE that is before the start of the array. The second is the buffer * of size IPOST which is after the end of the array to be padded. * Finally, there is a guard zone inside every column of the array to * be padded, in the elements of A(M+1:LDA, J). * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (local input) INTEGER * The number of rows in the local array. * * N (local input) INTEGER * The number of columns in the local array. * * A (local input/local output) @(typec), array of * dimension (LDA,N). A location IPRE elements in front of * the matrix to be padded. * * LDA (local input) INTEGER * The leading Dimension of the local array to be padded. * * IPRE (local input) INTEGER * The size of the guard zone to put before the start of * padded array. * * IPOST (local input) INTEGER * The size of the guard zone to put after padded array. * * CHKVAL (local input) @(typec) * The value to pad matrix with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, J, K * .. * .. Executable Statements .. * * Put check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE A( I ) = CHKVAL 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PIFILLPAD' END IF * * Put check buffer in back of A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 A( I ) = CHKVAL 20 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no post-guardzone in PIFILLPAD' END IF * * Put check buffer in all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 A( I ) = CHKVAL 30 CONTINUE K = K + LDA 40 CONTINUE END IF * RETURN * * End of PIFILLPAD * END scalapack-1.8.0/TOOLS/pielget.f0000640000175000017500000001620210363532303016015 0ustar muammarmuammar SUBROUTINE PIELGET( SCOPE, TOP, ALPHA, A, IA, JA, DESCA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER*1 SCOPE, TOP INTEGER ALPHA, IA, JA * .. * .. Array arguments .. INTEGER A( * ), DESCA( * ) * .. * * Purpose * ======= * * PIELGET sets alpha to the distributed matrix entry A( IA, JA ). * The value of alpha is set according to the scope. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SCOPE (global input) CHARACTER*1 * The BLACS scope in which alpha is updated. * If SCOPE = 'R', alpha is updated only in the process row * containing A( IA, JA ), * If SCOPE = 'C', alpha is updated only in the process column * containing A( IA, JA ), * If SCOPE = 'A', alpha is updated in all the processes of the * grid, * otherwise alpha is updated only in the process containing * A( IA, JA ). * * TOP (global input) CHARACTER*1 * The topology to be used if broadcast is needed. * * ALPHA (global output) @(typec), the scalar alpha. * * A (local input) @(typec) pointer into the local memory * to an array of dimension (LLD_A,*) containing the local * pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ZERO PARAMETER ( ZERO = 0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, IIA, IOFFA, JJA, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, INFOG2L * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * ALPHA = ZERO * IF( LSAME( SCOPE, 'R' ) ) THEN IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL IGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL IGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'C' ) ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL IGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL IGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF END IF ELSE IF( LSAME( SCOPE, 'A' ) ) THEN IF( ( MYROW.EQ.IAROW ).AND.( MYCOL.EQ.IACOL ) ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) CALL IGEBS2D( ICTXT, SCOPE, TOP, 1, 1, A( IOFFA ), 1 ) ALPHA = A( IOFFA ) ELSE CALL IGEBR2D( ICTXT, SCOPE, TOP, 1, 1, ALPHA, 1, $ IAROW, IACOL ) END IF ELSE IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) $ ALPHA = A( IIA+(JJA-1)*DESCA( LLD_ ) ) END IF * RETURN * * End of PIELGET * END scalapack-1.8.0/TOOLS/pdelset2.f0000640000175000017500000001247210363532303016113 0ustar muammarmuammar SUBROUTINE PDELSET2( ALPHA, A, IA, JA, DESCA, BETA ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA DOUBLE PRECISION ALPHA, BETA * .. * .. Array arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDELSET2 sets alpha to the distributed matrix entry A(IA,JA) * and A(IA,JA) to beta. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * ALPHA (local output) DOUBLE PRECISION * The scalar alpha. * * A (local input/local ouput) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,*) containing * the local pieces of the distributed matrix A. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * BETA (local input) DOUBLE PRECISION * The scalar beta. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, IIA, IOFFA, JJA, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFA = IIA+(JJA-1)*DESCA( LLD_ ) ALPHA = A( IOFFA ) A( IOFFA ) = BETA ELSE ALPHA = ZERO END IF * RETURN * * End of PDELSET2 * END scalapack-1.8.0/TOOLS/pzrow2col.f0000640000175000017500000002150710363532303016331 0ustar muammarmuammar SUBROUTINE PZROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, $ RSRC, CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. COMPLEX*16 VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a row of processes, and distribute those rows over a column of * processes. This routine minimizes communication by sending all * information it has that a given process in the CDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process row consisting of Q processes, and I want to send all of that * vector that I own to a new vector distributed over P processes within * a process column, that after I find the process in RDEST that owns * the row of the vector I'm currently looking at, he will want every * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a row. * * VS (local input) COMPLEX*16 * Array of dimension (LDVS,N), the block of vectors stored on * process row RSRC to be put into memory VD, and stored on * process column CDEST. * * LDVS (local input) INTEGER * The leading dimension of VS. * * VD (local output) COMPLEX*16 * Array of dimension (LDVD,N), on output, the contents of VD * stored on process column CDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD. * * RSRC (global input) INTEGER * The process row VS is distributed over. * * CSRC (global input) INTEGER * The process column the distributed block of vectors VS * begins on. * * RDEST (global input) INTEGER * The process row that VD begins on. * * CDEST (global input) INTEGER * The process column to distribute VD over. * * WORK (local workspace) COMPLEX*16 * Array, dimension (LDW). The required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mq = number of rows of VS on my process. * npcol = number of process columns * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZGESD2D, ZGERV2D, ZLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there is no * copying required * IF( NPROW .NE. NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYROW.EQ.RSRC ) THEN * ISTART = 1 * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my first block * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) * * Loop over all possible destination processes * DO 20 K = 1, RBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ZLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary, NOTE: will send extra if NB > JB * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL ZGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST, $ CDEST ) * * I am both source and destination, save where to start * copying from for later use * ELSE ICPY = ISTART END IF * ISTART = ISTART + NB IRDEST = MOD( IRDEST+NPCOL, NPROW ) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYCOL.EQ.CDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) * * Loop over all sending processes * DO 50 K = 1, CBLKSKIP * * If I don't already possess the required data * IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MP - ISTART + NB) / NB JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL ZGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL ZLACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1), $ LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL ZLACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1), $ LDVD ) JJ = JJ + NB*RBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB ICSRC = MOD( ICSRC+NPROW, NPCOL ) 50 CONTINUE END IF * * if NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYROW.EQ.RSRC ) THEN * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my piece of the vector * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN CALL ZGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST ) ELSE CALL ZLACPY( 'G', MQ, N, VS, LDVS, VD, LDVD ) END IF END IF IF( MYCOL.EQ.CDEST ) THEN * * Figure my distance from RDEST: the process in RSRC the same * distance from CSRC will have my piece of the vector * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) ) $ CALL ZGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC ) END IF END IF * RETURN * * End of PZROW2COL * END scalapack-1.8.0/TOOLS/dsnrm2.f0000640000175000017500000000165110363532303015573 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DSNRM2( N, X, INCX ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. REAL X( * ) * .. * * Purpose * ======= * * DSNRM2 is a simple FORTRAN wrapper around the BLAS function SNRM2 * returning the result as a double allowing it to be callable by C * programs. * * ===================================================================== * * .. External Functions .. REAL SNRM2 EXTERNAL SNRM2 * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * DSNRM2 = DBLE( SNRM2( N, X, INCX ) ) * RETURN * * End of DSNRM2 * END scalapack-1.8.0/TOOLS/cmatadd.f0000640000175000017500000001034610363532303015764 0ustar muammarmuammar SUBROUTINE CMATADD( M, N, ALPHA, A, LDA, BETA, C, LDC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDA, LDC, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. COMPLEX A( LDA, * ), C( LDC, * ) * .. * * Purpose * ======= * * CMATADD performs the following local matrix-matrix operation * * C := alpha * A + beta * C, * * where alpha and beta are scalars, and A and C are m by n arrays. * * Arguments * ========= * * M (local input) INTEGER * The number of rows of the array A. M >= 0. * * N (local input) INTEGER * The number of columns of the array A. N >= 0. * * ALPHA (local input) COMPLEX * The scalar ALPHA. * * A (local input) COMPLEX * Array, dimension (LDA,*), the array A. * * LDA (local input) INTEGER * The leading dimension of the array A, LDA >= MAX(1, M) * * BETA (local input) COMPLEX * The scalar BETA. * * C (local input/local output) COMPLEX * Array, dimension (LDC,*), the array C. * * LDC (local input) INTEGER * The leading dimension of the array C, LDC >= MAX(1, M) * * ===================================================================== * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR.((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * IF( N.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, M C( I, 1 ) = ZERO 10 CONTINUE ELSE DO 20 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 30 I = 1, M C( I, 1 ) = A( I, 1 ) + C( I, 1 ) 30 CONTINUE ELSE DO 40 I = 1, M C( I, 1 ) = A( I, 1 ) + BETA*C( I, 1 ) 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 50 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + C( I, 1 ) 50 CONTINUE ELSE DO 60 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + BETA*C( I, 1 ) 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M C( I, J ) = ZERO 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, M C( I, J ) = ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 120 J = 1, N DO 110 I = 1, M C( I, J ) = A( I, J ) + C( I, J ) 110 CONTINUE 120 CONTINUE ELSE DO 140 J = 1, N DO 130 I = 1, M C( I, J ) = A( I, J ) + BETA * C( I, J ) 130 CONTINUE 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M C( I, J ) = C( I, J ) + ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE ELSE DO 180 J = 1, N DO 170 I = 1, M C( I, J ) = ALPHA * A( I, J ) + BETA * C( I, J ) 170 CONTINUE 180 CONTINUE END IF END IF END IF * RETURN * * End of CMATADD * END scalapack-1.8.0/TOOLS/dscnrm2.f0000640000175000017500000000166110363532303015737 0ustar muammarmuammar DOUBLE PRECISION FUNCTION DSCNRM2( N, X, INCX ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, N * .. * .. Array Arguments .. COMPLEX X( * ) * .. * * Purpose * ======= * * DSCNRM2 is a simple FORTRAN wrapper around the BLAS function SCNRM2 * returning the result as a double allowing it to be callable by C * programs. * * ===================================================================== * * .. External Functions .. REAL SCNRM2 EXTERNAL SCNRM2 * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * DSCNRM2 = DBLE( SCNRM2( N, X, INCX ) ) * RETURN * * End of DSCNRM2 * END scalapack-1.8.0/TOOLS/zmatadd.f0000640000175000017500000001036210363532303016011 0ustar muammarmuammar SUBROUTINE ZMATADD( M, N, ALPHA, A, LDA, BETA, C, LDC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDA, LDC, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), C( LDC, * ) * .. * * Purpose * ======= * * ZMATADD performs the following local matrix-matrix operation * * C := alpha * A + beta * C, * * where alpha and beta are scalars, and A and C are m by n arrays. * * Arguments * ========= * * M (local input) INTEGER * The number of rows of the array A. M >= 0. * * N (local input) INTEGER * The number of columns of the array A. N >= 0. * * ALPHA (local input) COMPLEX*16 * The scalar ALPHA. * * A (local input) COMPLEX*16 * Array, dimension (LDA,*), the array A. * * LDA (local input) INTEGER * The leading dimension of the array A, LDA >= MAX(1, M) * * BETA (local input) COMPLEX*16 * The scalar BETA. * * C (local input/local output) COMPLEX*16 * Array, dimension (LDC,*), the array C. * * LDC (local input) INTEGER * The leading dimension of the array C, LDC >= MAX(1, M) * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR.((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * IF( N.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, M C( I, 1 ) = ZERO 10 CONTINUE ELSE DO 20 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 30 I = 1, M C( I, 1 ) = A( I, 1 ) + C( I, 1 ) 30 CONTINUE ELSE DO 40 I = 1, M C( I, 1 ) = A( I, 1 ) + BETA*C( I, 1 ) 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 50 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + C( I, 1 ) 50 CONTINUE ELSE DO 60 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + BETA*C( I, 1 ) 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M C( I, J ) = ZERO 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, M C( I, J ) = ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 120 J = 1, N DO 110 I = 1, M C( I, J ) = A( I, J ) + C( I, J ) 110 CONTINUE 120 CONTINUE ELSE DO 140 J = 1, N DO 130 I = 1, M C( I, J ) = A( I, J ) + BETA * C( I, J ) 130 CONTINUE 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M C( I, J ) = C( I, J ) + ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE ELSE DO 180 J = 1, N DO 170 I = 1, M C( I, J ) = ALPHA * A( I, J ) + BETA * C( I, J ) 170 CONTINUE 180 CONTINUE END IF END IF END IF * RETURN * * End of ZMATADD * END scalapack-1.8.0/TOOLS/ccdotc.f0000640000175000017500000000156110363532303015625 0ustar muammarmuammar SUBROUTINE CCDOTC( N, DOTC, X, INCX, Y, INCY ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, INCY, N COMPLEX DOTC * .. * .. Array Arguments .. COMPLEX X( * ), Y( * ) * .. * * Purpose * ======= * * CCDOTC is a simple FORTRAN wrapper around the BLAS function * CDOTC returning the result in the parameter list instead. * * ===================================================================== * * .. External Functions .. COMPLEX CDOTC EXTERNAL CDOTC * .. * .. Executable Statements .. * DOTC = CDOTC( N, X, INCX, Y, INCY ) * RETURN * * End of CCDOTC * END scalapack-1.8.0/TOOLS/SL_gridreshape.c0000640000175000017500000000430410363532303017254 0ustar muammarmuammar#include #include int SL_Cgridreshape(ctxt, pstart, row_major_in, row_major_out, P, Q) int ctxt, pstart, row_major_in, row_major_out, P, Q; { int Cblacs_pnum(); int nctxt, P0, Q0, Np, i, *g; Cblacs_gridinfo(ctxt, &P0, &Q0, &i, &Np); Np = P * Q; if (Np+pstart > P0*Q0) { fprintf(stderr, "Illegal reshape command in %s\n",__FILE__); Cblacs_abort(ctxt, -22); } g = (int *) malloc(Np * sizeof(int)); if (!g) { fprintf(stderr, "Cannot allocate memory in %s\n",__FILE__); Cblacs_abort(ctxt, -23); } if (row_major_in) /* Read in in row-major order */ { if (row_major_out) for (i=0; i != Np; i++) g[(i%Q)*P+i/Q] = Cblacs_pnum(ctxt, (pstart+i)/Q0, (pstart+i)%Q0); else for (i=0; i != Np; i++) g[i] = Cblacs_pnum(ctxt, (pstart+i)/Q0, (pstart+i)%Q0); } else /* read in in column-major order */ { if (row_major_out) for (i=0; i != Np; i++) g[(i%Q)*P+i/Q] = Cblacs_pnum(ctxt, (pstart+i)%P0, (pstart+i)/P0); else for (i=0; i != Np; i++) g[i] = Cblacs_pnum(ctxt, (pstart+i)%P0, (pstart+i)/P0); } Cblacs_get(ctxt, 10, &nctxt); Cblacs_gridmap(&nctxt, g, P, P, Q); free(g); return(nctxt); } int sl_gridreshape_(ctxt, pstart, row_major_in, row_major_out, P, Q) int *ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q; { return( SL_Cgridreshape(*ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q) ); } int SL_GRIDRESHAPE(ctxt, pstart, row_major_in, row_major_out, P, Q) int *ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q; { return( SL_Cgridreshape(*ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q) ); } int sl_gridreshape__(ctxt, pstart, row_major_in, row_major_out, P, Q) int *ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q; { return( SL_Cgridreshape(*ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q) ); } int sl_gridreshape(ctxt, pstart, row_major_in, row_major_out, P, Q) int *ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q; { return( SL_Cgridreshape(*ctxt, *pstart, *row_major_in, *row_major_out, *P, *Q) ); } scalapack-1.8.0/TOOLS/pschekpad.f0000640000175000017500000001210310363532303016322 0ustar muammarmuammar SUBROUTINE PSCHEKPAD( ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, $ CHKVAL ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT, IPOST, IPRE, LDA, M, N REAL CHKVAL * .. * .. Array Arguments .. CHARACTER MESS*(*) REAL A( * ) * .. * * Purpose * ======= * * PSCHEKPAD checks that the padding around a local array has not * been overwritten since the call to PSFILLPAD. 3 types of errors * are reported: * * 1) Overwrite in pre-guardzone. This indicates a memory overwrite has * occurred in the first IPRE elements which form a buffer before the * beginning of A. Therefore, the error message: * 'Overwrite in pre-guardzone: loc( 5) = 18.00000' * tells you that the 5th element of the IPRE long buffer has been * overwritten with the value 18, where it should still have the value * of CHKVAL. * * 2) Overwrite in post-guardzone. This indicates a memory overwrite has * occurred in the last IPOST elements which form a buffer after the end * of A. Error reports are refered from the end of A. Therefore, * 'Overwrite in post-guardzone: loc( 19) = 24.00000' * tells you that the 19th element after the end of A was overwritten * with the value 24, where it should still have the value of CHKVAL. * * 3) Overwrite in lda-m gap. Tells you elements between M and LDA were * overwritten. So, * 'Overwrite in lda-m gap: A( 12, 3) = 22.00000' * tells you that the element at the 12th row and 3rd column of A was * overwritten with the value of 22, where it should still have the * value of CHKVAL. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * MESS (local input) CHARACTER*(*) * String containing a user-defined message. * * M (local input) INTEGER * The number of rows in the local array A. * * N (input) INTEGER * The number of columns in the local array A. * * A (local input) REAL array of dimension (LDA,N). * A location IPRE elements in front of the array to be checked. * * LDA (local input) INTEGER * The leading Dimension of the local array to be checked. * * IPRE (local input) INTEGER * The size of the guard zone before the start of padded array. * * IPOST (local input) INTEGER * The size of guard zone after the padded array. * * CHKVAL (local input) REAL * The value the local array was padded with. * * ===================================================================== * * .. Local Scalars .. INTEGER I, IAM, IDUMM, INFO, J, K, MYCOL, MYROW, $ NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW*NPCOL + MYCOL INFO = -1 * * Check buffer in front of A * IF( IPRE.GT.0 ) THEN DO 10 I = 1, IPRE IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, ' pre', I, $ A( I ) INFO = IAM END IF 10 CONTINUE ELSE WRITE( *, FMT = * ) 'WARNING no pre-guardzone in PSCHEKPAD' END IF * * Check buffer after A * IF( IPOST.GT.0 ) THEN J = IPRE+LDA*N+1 DO 20 I = J, J+IPOST-1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9998 ) MYROW, MYCOL, MESS, 'post', $ I-J+1, A( I ) INFO = IAM END IF 20 CONTINUE ELSE WRITE( *, FMT = * ) $ 'WARNING no post-guardzone buffer in PSCHEKPAD' END IF * * Check all (LDA-M) gaps * IF( LDA.GT.M ) THEN K = IPRE + M + 1 DO 40 J = 1, N DO 30 I = K, K + (LDA-M) - 1 IF( A( I ).NE.CHKVAL ) THEN WRITE( *, FMT = 9997 ) MYROW, MYCOL, MESS, $ I-IPRE-LDA*(J-1), J, A( I ) INFO = IAM END IF 30 CONTINUE K = K + LDA 40 CONTINUE END IF * CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMM, IDUMM, -1, $ 0, 0 ) IF( IAM.EQ.0 .AND. INFO.GE.0 ) THEN WRITE( *, FMT = 9999 ) INFO / NPCOL, MOD( INFO, NPCOL ), MESS END IF * 9999 FORMAT( '{', I5, ',', I5, '}: Memory overwrite in ', A ) 9998 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ A4, '-guardzone: loc(', I3, ') = ', G11.4 ) 9997 FORMAT( '{', I5, ',', I5, '}: ', A, ' memory overwrite in ', $ 'lda-m gap: loc(', I3, ',', I3, ') = ', G11.4 ) * RETURN * * End of PSCHEKPAD * END scalapack-1.8.0/TOOLS/dmatadd.f0000640000175000017500000001032710363532303015764 0ustar muammarmuammar SUBROUTINE DMATADD( M, N, ALPHA, A, LDA, BETA, C, LDC ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDA, LDC, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), C( LDC, * ) * .. * * Purpose * ======= * * DMATADD performs the following local matrix-matrix operation * * C := alpha * A + beta * C, * * where alpha and beta are scalars, and A and C are m by n arrays. * * Arguments * ========= * * M (local input) INTEGER * The number of rows of the array A. M >= 0. * * N (local input) INTEGER * The number of columns of the array A. N >= 0. * * ALPHA (local input) DOUBLE PRECISION * The scalar ALPHA. * * A (local input) DOUBLE PRECISION * Array, dimension (LDA,*), the array A. * * LDA (local input) INTEGER * The leading dimension of the array A, LDA >= MAX(1, M) * * BETA (local input) DOUBLE PRECISION * The scalar BETA. * * C (local input/local output) DOUBLE PRECISION * Array, dimension (LDC,*), the array C. * * LDC (local input) INTEGER * The leading dimension of the array C, LDC >= MAX(1, M) * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J * .. * .. Executable Statements .. * * Quick return if possible. * IF( (M.EQ.0).OR.(N.EQ.0).OR.((ALPHA.EQ.ZERO).AND.(BETA.EQ.ONE)) ) $ RETURN * IF( N.EQ.1 ) THEN IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 10 I = 1, M C( I, 1 ) = ZERO 10 CONTINUE ELSE DO 20 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) 20 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 30 I = 1, M C( I, 1 ) = A( I, 1 ) + C( I, 1 ) 30 CONTINUE ELSE DO 40 I = 1, M C( I, 1 ) = A( I, 1 ) + BETA*C( I, 1 ) 40 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 50 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + C( I, 1 ) 50 CONTINUE ELSE DO 60 I = 1, M C( I, 1 ) = ALPHA*A( I, 1 ) + BETA*C( I, 1 ) 60 CONTINUE END IF END IF ELSE IF( BETA.EQ.ZERO ) THEN IF( ALPHA.EQ.ZERO ) THEN DO 80 J = 1, N DO 70 I = 1, M C( I, J ) = ZERO 70 CONTINUE 80 CONTINUE ELSE DO 100 J = 1, N DO 90 I = 1, M C( I, J ) = ALPHA * A( I, J ) 90 CONTINUE 100 CONTINUE END IF ELSE IF( ALPHA.EQ.ONE ) THEN IF( BETA.EQ.ONE ) THEN DO 120 J = 1, N DO 110 I = 1, M C( I, J ) = A( I, J ) + C( I, J ) 110 CONTINUE 120 CONTINUE ELSE DO 140 J = 1, N DO 130 I = 1, M C( I, J ) = A( I, J ) + BETA * C( I, J ) 130 CONTINUE 140 CONTINUE END IF ELSE IF( BETA.EQ.ONE ) THEN DO 160 J = 1, N DO 150 I = 1, M C( I, J ) = C( I, J ) + ALPHA * A( I, J ) 150 CONTINUE 160 CONTINUE ELSE DO 180 J = 1, N DO 170 I = 1, M C( I, J ) = ALPHA * A( I, J ) + BETA * C( I, J ) 170 CONTINUE 180 CONTINUE END IF END IF END IF * RETURN * * End of DMATADD * END scalapack-1.8.0/TOOLS/pdlaprnt.f0000640000175000017500000002762210363532303016220 0ustar muammarmuammar SUBROUTINE PDLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAPRNT prints to the standard output a distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and * printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory to a * local array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IRPRNT (global input) INTEGER * The row index of the printing process. * * ICPRNT (global input) INTEGER * The column index of the printing process. * * CMATNM (global input) CHARACTER*(*) * Identifier of the distributed matrix to be printed. * * NOUT (global input) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. * * WORK (local workspace) DOUBLE PRECISION * Working array of minimum size equal to MB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ DGERV2D, DGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, WORK( K ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, A( II+K+(JJ+H-1)*LDA ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, WORK( K ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, A( II+K+(JJ+H-1)*LDA ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, WORK( K ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, A( II+K+(JJ+H-1)*LDA ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL DGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL DGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, WORK( K ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT(A,'(',I6,',',I6,')=',D30.18) * RETURN * * End of PDLAPRNT * END scalapack-1.8.0/TOOLS/pstreecomb.f0000640000175000017500000002220510363532303016527 0ustar muammarmuammar SUBROUTINE PSTREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, $ SUBPTR ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SCOPE INTEGER CDEST0, ICTXT, N, RDEST0 * .. * .. Array Arguments .. REAL MINE( * ) * .. * .. Subroutine Arguments .. EXTERNAL SUBPTR * .. * * Purpose * ======= * * PSTREECOMB does a 1-tree parallel combine operation on scalars, * using the subroutine indicated by SUBPTR to perform the required * computation. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * SCOPE (global input) CHARACTER * The scope of the operation: 'Rowwise', 'Columnwise', or * 'All'. * * N (global input) INTEGER * The number of elements in MINE. N = 1 for the norm-2 * computation and 2 for the sum of square. * * MINE (local input/global output) REAL array of * dimension at least equal to N. The local data to use in the * combine. * * RDEST0 (global input) INTEGER * The process row to receive the answer. If RDEST0 = -1, * every process in the scope gets the answer. * * CDEST0 (global input) INTEGER * The process column to receive the answer. If CDEST0 = -1, * every process in the scope gets the answer. * * SUBPTR (local input) Pointer to the subroutine to call to perform * the required combine. * * ===================================================================== * * .. Local Scalars .. LOGICAL BCAST, RSCOPE, CSCOPE INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, $ RMSSG, TCDEST, TRDEST * .. * .. Local Arrays .. REAL HIS( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * See if everyone wants the answer (need to broadcast the answer) * BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) IF( BCAST ) THEN TRDEST = 0 TCDEST = 0 ELSE TRDEST = RDEST0 TCDEST = CDEST0 END IF * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure scope-dependant variables, or report illegal scope * RSCOPE = LSAME( SCOPE, 'R' ) CSCOPE = LSAME( SCOPE, 'C' ) * IF( RSCOPE ) THEN IF( BCAST ) THEN TRDEST = MYROW ELSE IF( MYROW.NE.TRDEST ) THEN RETURN END IF NP = NPCOL MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) ELSE IF( CSCOPE ) THEN IF( BCAST ) THEN TCDEST = MYCOL ELSE IF( MYCOL.NE.TCDEST ) THEN RETURN END IF NP = NPROW MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) ELSE IF( LSAME( SCOPE, 'A' ) ) THEN NP = NPROW * NPCOL IAM = MYROW*NPCOL + MYCOL DEST = TRDEST*NPCOL + TCDEST MYDIST = MOD( NP + IAM - DEST, NP ) ELSE RETURN END IF * IF( NP.LT.2 ) $ RETURN * MYDIST2 = MYDIST RMSSG = MYROW CMSSG = MYCOL I = 1 * 10 CONTINUE * IF( MOD( MYDIST, 2 ).NE.0 ) THEN * * If I am process that sends information * DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) * * Figure coordinates of dest of message * IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) END IF * CALL SGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) * GO TO 20 * ELSE * * If I am a process receiving information, figure coordinates * of source of message * DIST = MYDIST2 + I IF( RSCOPE ) THEN CMSSG = MOD( TCDEST + DIST, NP ) HISDIST = MOD( NP + CMSSG - TCDEST, NP ) ELSE IF( CSCOPE ) THEN RMSSG = MOD( TRDEST + DIST, NP ) HISDIST = MOD( NP + RMSSG - TRDEST, NP ) ELSE CMSSG = MOD( DEST + DIST, NP ) RMSSG = CMSSG / NPCOL CMSSG = MOD( CMSSG, NPCOL ) HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) END IF * IF( MYDIST2.LT.HISDIST ) THEN * * If I have anyone sending to me * CALL SGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) CALL SUBPTR( MINE, HIS ) * END IF MYDIST = MYDIST / 2 * END IF I = I * 2 * IF( I.LT.NP ) $ GO TO 10 * 20 CONTINUE * IF( BCAST ) THEN IF( MYDIST2.EQ.0 ) THEN CALL SGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) ELSE CALL SGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, $ TRDEST, TCDEST ) END IF END IF * RETURN * * End of PSTREECOMB * END * SUBROUTINE SCOMBAMAX( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. REAL V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * SCOMBAMAX finds the element having max. absolute value as well * as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) REAL array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) REAL array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * IF( ABS( V1( 1 ) ).LT.ABS( V2( 1 ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of SCOMBAMAX * END * SUBROUTINE SCOMBSSQ( V1, V2 ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. REAL V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * SCOMBSSQ does a scaled sum of squares on two scalars. * * Arguments * ========= * * V1 (local input/local output) REAL array of * dimension 2. The first scaled sum. V1(1) = SCALE, * V1(2) = SUMSQ. * * V2 (local input) REAL array of dimension 2. * The second scaled sum. V2(1) = SCALE, V2(2) = SUMSQ. * * ===================================================================== * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Executable Statements .. * IF( V1( 1 ).GE.V2( 1 ) ) THEN IF( V1( 1 ).NE.ZERO ) $ V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) ELSE V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) V1( 1 ) = V2( 1 ) END IF * RETURN * * End of SCOMBSSQ * END * SUBROUTINE SCOMBNRM2( X, Y ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. REAL X, Y * .. * * Purpose * ======= * * SCOMBNRM2 combines local norm 2 results, taking care not to cause * unnecessary overflow. * * Arguments * ========= * * X (local input) REAL * Y (local input) REAL * X and Y specify the values x and y. X and Y are supposed to * be >= 0. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. REAL W, Z * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT * .. * .. Executable Statements .. * W = MAX( X, Y ) Z = MIN( X, Y ) * IF( Z.EQ.ZERO ) THEN X = W ELSE X = W*SQRT( ONE+( Z / W )**2 ) END IF * RETURN * * End of SCOMBNRM2 * END scalapack-1.8.0/TOOLS/pzlaread.f0000640000175000017500000001114410604624224016170 0ustar muammarmuammar SUBROUTINE PZLAREAD( FILNAM, A, DESCA, IRREAD, ICREAD, WORK ) * * -- ScaLAPACK tools routine (version 1.8) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * * written by Antoine Petitet, August 1995 (petitet@cs.utk.edu) * adapted by Julie Langou, April 2007 (julie@cs.utk.edu) * * .. Scalar Arguments .. INTEGER ICREAD, IRREAD * .. * .. Array Arguments .. CHARACTER*(*) FILNAM INTEGER DESCA( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZLAREAD reads from a file named FILNAM a matrix and distribute * it to the process grid. * * Only the process of coordinates {IRREAD, ICREAD} read the file. * * WORK must be of size >= MB_ = DESCA( MB_ ). * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 11 ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IB, ICTXT, ICURCOL, ICURROW, II, J, JB, $ JJ, K, LDA, M, MYCOL, MYROW, N, NPCOL, NPROW DOUBLE PRECISION REAL_PART, IMAG_PART * .. * .. Local Arrays .. INTEGER IWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZGERV2D, ZGESD2D, $ IGEBS2D, IGEBR2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN OPEN( NIN, FILE=FILNAM, STATUS='OLD' ) READ( NIN, FMT = * ) ( IWORK( I ), I = 1, 2 ) CALL IGEBS2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2 ) ELSE CALL IGEBR2D( ICTXT, 'All', ' ', 2, 1, IWORK, 2, IRREAD, $ ICREAD ) END IF M = IWORK( 1 ) N = IWORK( 2 ) * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * IF( M.GT.DESCA( M_ ).OR. N.GT.DESCA( N_ ) ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WRITE( *, FMT = * ) 'PZLAREAD: Matrix too big to fit in' WRITE( *, FMT = * ) 'Abort ...' END IF CALL BLACS_ABORT( ICTXT, 0 ) END IF * II = 1 JJ = 1 ICURROW = DESCA( RSRC_ ) ICURCOL = DESCA( CSRC_ ) LDA = DESCA( LLD_ ) * * Loop over column blocks * DO 50 J = 1, N, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), N-J+1 ) DO 40 H = 0, JB-1 * * Loop over block of rows * DO 30 I = 1, M, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), M-I+1 ) IF( ICURROW.EQ.IRREAD .AND. ICURCOL.EQ.ICREAD ) THEN IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 10 K = 0, IB-1 READ( NIN , FMT = *) REAL_PART, IMAG_PART A( II+K+(JJ+H-1)*LDA ) = DCMPLX(REAL_PART, IMAG_PART) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL ZGERV2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRREAD, ICREAD ) ELSE IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN DO 20 K = 1, IB READ( NIN, FMT = * ) REAL_PART, IMAG_PART WORK(K)=DCMPLX(REAL_PART,IMAG_PART) 20 CONTINUE CALL ZGESD2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) 30 CONTINUE * II = 1 ICURROW = DESCA( RSRC_ ) 40 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * IF( MYROW.EQ.IRREAD .AND. MYCOL.EQ.ICREAD ) THEN CLOSE( NIN ) END IF * RETURN * * End of PZLAREAD * END scalapack-1.8.0/TOOLS/pclaprnt.f0000640000175000017500000003077710363532303016224 0ustar muammarmuammar SUBROUTINE PCLAPRNT( M, N, A, IA, JA, DESCA, IRPRNT, ICPRNT, $ CMATNM, NOUT, WORK ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, ICPRNT, IRPRNT, JA, M, N, NOUT * .. * .. Array Arguments .. CHARACTER*(*) CMATNM INTEGER DESCA( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCLAPRNT prints to the standard output a distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1). The local pieces are sent and * printed by the process of coordinates (IRPRNT, ICPRNT). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory to a * local array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IRPRNT (global input) INTEGER * The row index of the printing process. * * ICPRNT (global input) INTEGER * The column index of the printing process. * * CMATNM (global input) CHARACTER*(*) * Identifier of the distributed matrix to be printed. * * NOUT (global input) INTEGER * The unit number for output file. NOUT = 6, ouput to screen, * NOUT = 0, output to stderr. * * WORK (local workspace) COMPLEX * Working array of minimum size equal to MB_A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER H, I, IACOL, IAROW, IB, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, J, JB, JJ, JJA, JN, K, $ LDA, MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_GRIDINFO, INFOG2L, $ CGERV2D, CGESD2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, MIN, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) ICURROW = IAROW ICURCOL = IACOL II = IIA JJ = JJA LDA = DESCA( LLD_ ) * * Handle the first block of column separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 DO 60 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 10 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, JA+H, $ REAL( A(II+K+(JJ+H-1)*LDA) ), $ AIMAG( A(II+K+(JJ+H-1)*LDA) ) 10 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), LDA, $ IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 20 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, JA+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 20 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 50 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 30 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, JA+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 30 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 40 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, JA+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 40 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 50 CONTINUE * II = IIA ICURROW = IAROW 60 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining column blocks * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) DO 120 H = 0, JB-1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IB = IN-IA+1 IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 70 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K, J+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 70 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 80 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, IA+K-1, J+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 80 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) * * Loop over remaining block of rows * DO 110 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN DO 90 K = 0, IB-1 WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K, J+H, $ REAL( A( II+K+(JJ+H-1)*LDA ) ), $ AIMAG( A( II+K+(JJ+H-1)*LDA ) ) 90 CONTINUE END IF ELSE IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN CALL CGESD2D( ICTXT, IB, 1, A( II+(JJ+H-1)*LDA ), $ LDA, IRPRNT, ICPRNT ) ELSE IF( MYROW.EQ.IRPRNT .AND. MYCOL.EQ.ICPRNT ) THEN CALL CGERV2D( ICTXT, IB, 1, WORK, DESCA( MB_ ), $ ICURROW, ICURCOL ) DO 100 K = 1, IB WRITE( NOUT, FMT = 9999 ) $ CMATNM, I+K-1, J+H, REAL( WORK( K ) ), $ AIMAG( WORK( K ) ) 100 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + IB ICURROW = MOD( ICURROW+1, NPROW ) CALL BLACS_BARRIER( ICTXT, 'All' ) 110 CONTINUE * II = IIA ICURROW = IAROW 120 CONTINUE * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) CALL BLACS_BARRIER( ICTXT, 'All' ) * 130 CONTINUE * 9999 FORMAT(A,'(',I6,',',I6,')=',E16.8, '+i*(',E16.8, ')') * RETURN * * End of PCLAPRNT * END scalapack-1.8.0/TOOLS/pzcol2row.f0000640000175000017500000002157310363532303016334 0ustar muammarmuammar SUBROUTINE PZCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. COMPLEX*16 VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a column of processes, and distribute those rows over a row of * processes. This routine minimizes communication by sending all * information it has that a given process in the RDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process column consisting of P processes, and I want to send all of * that vector that I own to a new vector distributed over Q processes * within a process row, that after I find the process in RDEST that * owns the row of the vector I'm currently looking at, he will want * every ( (LCM(P,Q) / P ) block of my vector (the block being of size * NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) COMPLEX*16 * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST. * * LDVS (local input) INTEGER * The leading dimension of VS, LDVS >= MAX( 1, MP ). * * VD (local output) COMPLEX*16 * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD, LDVD >= MAX( 1, MQ ). * * RSRC (global input) INTEGER * The process row the distributed block of vectors VS begins * on. * * CSRC (global input) INTEGER * The process column VS is distributed over. * * RDEST (global input) INTEGER * The process row to distribute VD over. * * CDEST (global input) INTEGER * The process column that VD begins on. * * WORK (local workspace) COMPLEX*16 * Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZGESD2D, ZGERV2D, ZLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL ZLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL ZGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL ZGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ZLACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL ZLACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL ZGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL ZLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL ZGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PZCOL2ROW * END scalapack-1.8.0/TOOLS/pdrow2col.f0000640000175000017500000002153110363532303016300 0ustar muammarmuammar SUBROUTINE PDROW2COL( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, $ RSRC, CSRC, RDEST, CDEST, WORK) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. DOUBLE PRECISION VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * .. * * Purpose * ======= * * Take a block of vectors with M total rows which are distributed over * a row of processes, and distribute those rows over a column of * processes. This routine minimizes communication by sending all * information it has that a given process in the CDEST needs at once. * To do this it uses the least common multiple (LCM) concept. This is * simply the realization that if I have part of a vector split over a * process row consisting of Q processes, and I want to send all of that * vector that I own to a new vector distributed over P processes within * a process column, that after I find the process in RDEST that owns * the row of the vector I'm currently looking at, he will want every * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N). * * Arguments * ========= * * Rem: MP, resp. NQ, denotes the number of local rows, resp. local * ==== columns, necessary to store a global vector of dimension M * across P processes, resp. N over Q processes. * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a row. * * VS (local input) DOUBLE PRECISION * Array of dimension (LDVS,N), the block of vectors stored on * process row RSRC to be put into memory VD, and stored on * process column CDEST. * * LDVS (local input) INTEGER * The leading dimension of VS. * * VD (local output) DOUBLE PRECISION * Array of dimension (LDVD,N), on output, the contents of VD * stored on process column CDEST will be here. * * LDVD (local input) INTEGER * The leading dimension of VD. * * RSRC (global input) INTEGER * The process row VS is distributed over. * * CSRC (global input) INTEGER * The process column the distributed block of vectors VS * begins on. * * RDEST (global input) INTEGER * The process row that VD begins on. * * CDEST (global input) INTEGER * The process column to distribute VD over. * * WORK (local workspace) DOUBLE PRECISION * Array, dimension (LDW). The required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mq = number of rows of VS on my process. * npcol = number of process columns * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) ) * end if * * ===================================================================== * * .. Local Scalars .. INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGESD2D, DGERV2D, DLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there is no * copying required * IF( NPROW .NE. NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYROW.EQ.RSRC ) THEN * ISTART = 1 * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my first block * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) * * Loop over all possible destination processes * DO 20 K = 1, RBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL DLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary, NOTE: will send extra if NB > JB * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL DGESD2D( ICTXT, JJ, 1, WORK, JJ, IRDEST, $ CDEST ) * * I am both source and destination, save where to start * copying from for later use * ELSE ICPY = ISTART END IF * ISTART = ISTART + NB IRDEST = MOD( IRDEST+NPCOL, NPROW ) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYCOL.EQ.CDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) * * Loop over all sending processes * DO 50 K = 1, CBLKSKIP * * If I don't already possess the required data * IF( (MYROW.NE.RSRC).OR.(MYCOL.NE.ICSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MP - ISTART + NB) / NB JJ = ((NBLOCKS+CBLKSKIP-1) / CBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL DGERV2D( ICTXT, JJ, N, WORK, JJ, RSRC, ICSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL DLACPY( 'G', JB, N, WORK(JJ), JB, VD(II,1), $ LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MP, NB*CBLKSKIP JB = MIN( NB, MP-II+1 ) CALL DLACPY( 'G', JB, N, VS(JJ,1), LDVS, VD(II,1), $ LDVD ) JJ = JJ + NB*RBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB ICSRC = MOD( ICSRC+NPROW, NPCOL ) 50 CONTINUE END IF * * if NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYROW.EQ.RSRC ) THEN * * Figure my distance from CSRC: the process in CDEST the same * distance from RDEST will want my piece of the vector * MYDIST = MOD( NPCOL+MYCOL-CSRC, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CSRC, NPCOL ) IRDEST = MOD( RDEST+MYDIST, NPROW ) IF( (MYROW.NE.IRDEST).OR.(MYCOL.NE.CDEST) ) THEN CALL DGESD2D( ICTXT, MQ, N, VS, LDVS, IRDEST, CDEST ) ELSE CALL DLACPY( 'G', MQ, N, VS, LDVS, VD, LDVD ) END IF END IF IF( MYCOL.EQ.CDEST ) THEN * * Figure my distance from RDEST: the process in RSRC the same * distance from CSRC will have my piece of the vector * MYDIST = MOD( NPROW+MYROW-RDEST, NPROW ) MP = NUMROC( M, NB, MYROW, RDEST, NPROW ) ICSRC = MOD( CSRC+MYDIST, NPCOL ) IF( (MYCOL.NE.ICSRC).OR.(MYROW.NE. RSRC) ) $ CALL DGERV2D( ICTXT, MP, N, VD, LDVD, RSRC, ICSRC ) END IF END IF * RETURN * * End of PDROW2COL * END scalapack-1.8.0/TOOLS/indxg2l.f0000640000175000017500000000274010363532303015735 0ustar muammarmuammar INTEGER FUNCTION INDXG2L( INDXGLOB, NB, IPROC, ISRCPROC, NPROCS ) * * -- ScaLAPACK tools routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INDXGLOB, IPROC, ISRCPROC, NB, NPROCS * .. * * Purpose * ======= * * INDXG2L computes the local index of a distributed matrix entry * pointed to by the global index INDXGLOB. * * Arguments * ========= * * INDXGLOB (global input) INTEGER * The global index of the distributed matrix entry. * * NB (global input) INTEGER * Block size, size of the blocks the distributed matrix is * split into. * * IPROC (local dummy) INTEGER * Dummy argument in this case in order to unify the calling * sequence of the tool-routines. * * ISRCPROC (local dummy) INTEGER * Dummy argument in this case in order to unify the calling * sequence of the tool-routines. * * NPROCS (global input) INTEGER * The total number processes over which the distributed * matrix is distributed. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * INDXG2L = NB*((INDXGLOB-1)/(NB*NPROCS))+MOD(INDXGLOB-1,NB)+1 * RETURN * * End of INDXG2L * END scalapack-1.8.0/SRC/0000750000175000017500000000000010605234746013753 5ustar muammarmuammarscalapack-1.8.0/SRC/pdgeequ.f0000640000175000017500000003252210363532303015550 0ustar muammarmuammar SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N DOUBLE PRECISION AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), C( * ), R( * ) * .. * * Purpose * ======= * * PDGEEQU computes row and column scalings intended to equilibrate an * M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and * reduce its condition number. R returns the row scale factors and C * the column scale factors, chosen to try to make the largest entry in * each row and column of the distributed matrix B with elements * B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1. * * R(i) and C(j) are restricted to be between SMLNUM = smallest safe * number and BIGNUM = largest safe number. Use of these scaling * factors is not guaranteed to reduce the condition number of * sub( A ) but works well in practice. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the * local pieces of the M-by-N distributed matrix whose * equilibration factors are to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local output) DOUBLE PRECISION array, dimension LOCr(M_A) * If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row * scale factors for sub( A ). R is aligned with the distributed * matrix A, and replicated across every process column. R is * tied to the distributed matrix A. * * C (local output) DOUBLE PRECISION array, dimension LOCc(N_A) * If INFO = 0, C(JA:JA+N-1) contains the column scale factors * for sub( A ). C is aligned with the distributed matrix A, and * replicated down every process row. C is tied to the distri- * buted matrix A. * * ROWCND (global output) DOUBLE PRECISION * If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of * the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1). * If ROWCND >= 0.1 and AMAX is neither too large nor too small, * it is not worth scaling by R(IA:IA+M-1). * * COLCND (global output) DOUBLE PRECISION * If INFO = 0, COLCND contains the ratio of the smallest C(j) * to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it * is not worth scaling by C(JA:JA+N-1). * * AMAX (global output) DOUBLE PRECISION * Absolute value of largest distributed matrix element. If * AMAX is very close to overflow or very close to underflow, * the matrix should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, and i is * <= M: the i-th row of the distributed matrix sub( A ) * is exactly zero, * > M: the (i-M)-th column of the distributed * matrix sub( A ) is exactly zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA, $ IOFFA, IROFF, J, JJA, LDA, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM * .. * .. Local Arrays .. INTEGER DESCC( DLEN_ ), DESCR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMN2D, $ DGAMX2D, IGAMX2D, INFOG2L, PCHK1MAT, PB_TOPGET, $ PXERBLA * .. * .. External Functions .. INTEGER INDXL2G, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXL2G, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN ROWCND = ONE COLCND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Get machine constants and local indexes. * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Assign descriptors for R and C arrays * CALL DESCSET( DESCR, M, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Compute row scale factors. * DO 10 I = IIA, IIA+MP-1 R( I ) = ZERO 10 CONTINUE * * Find the maximum element in each row. * IOFFA = (JJA-1)*LDA DO 30 J = JJA, JJA+NQ-1 DO 20 I = IIA, IIA+MP-1 R( I ) = MAX( R( I ), ABS( A( IOFFA + I ) ) ) 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE CALL DGAMX2D( ICTXT, 'Rowwise', ROWCTOP, MP, 1, R( IIA ), $ MAX( 1, MP ), IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 40 I = IIA, IIA+MP-1 RCMAX = MAX( RCMAX, R( I ) ) RCMIN = MIN( RCMIN, R( I ) ) 40 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) AMAX = RCMAX * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 50 I = IIA, IIA+MP-1 IF( R( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = INDXL2G( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) - IA + 1 50 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 60 I = IIA, IIA+MP-1 R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) 60 CONTINUE * * Compute ROWCND = min(R(I)) / max(R(I)) * ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * * Compute column scale factors * DO 70 J = JJA, JJA+NQ-1 C( J ) = ZERO 70 CONTINUE * * Find the maximum element in each column, * assuming the row scaling computed above. * IOFFA = (JJA-1)*LDA DO 90 J = JJA, JJA+NQ-1 DO 80 I = IIA, IIA+MP-1 C( J ) = MAX( C( J ), ABS( A( IOFFA + I ) )*R( I ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, C( JJA ), $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 100 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 100 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 110 J = JJA, JJA+NQ-1 IF( C( J ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = M + INDXL2G( J, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) - JA + 1 110 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 120 J = JJA, JJA+NQ-1 C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) 120 CONTINUE * * Compute COLCND = min(C(J)) / max(C(J)) * COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * RETURN * * End of PDGEEQU * END scalapack-1.8.0/SRC/pstrtrs.f0000640000175000017500000003132310363532303015635 0ustar muammarmuammar SUBROUTINE PSTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSTRTRS solves a triangular system of the form * * sub( A ) * X = sub( B ) or sub( A )**T * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a triangular * distributed matrix of order N, and B(IB:IB+N-1,JB:JB+NRHS-1) is an * N-by-NRHS distributed matrix denoted by sub( B ). A check is made * to verify that sub( A ) is nonsingular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': Solve sub( A ) * X = sub( B ) (No transpose) * = 'T': Solve sub( A )**T * X = sub( B ) (Transpose) * = 'C': Solve sub( A )**T * X = sub( B ) (Transpose) * * DIAG (global input) CHARACTER * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed matrix sub( B ). NRHS >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1) ). This array * contains the local pieces of the distributed triangular * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of sub( A ) contains the lower triangular * matrix, and the strictly upper triangular part of sub( A ) * is not referenced. If DIAG = 'U', the diagonal elements of * sub( A ) are also not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the right hand side distributed matrix * sub( B ). On exit, if INFO = 0, sub( B ) is overwritten by * the solution matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the i-th diagonal element of sub( A ) is * zero, indicating that the submatrix is singular and the * solutions X have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER I, IAROW, IBROW, ICOFFA, ICTXT, ICURCOL, $ ICURROW, IROFFA, IROFFB, IDUM, II, IOFFA, J, $ JBLK, JJ, JN, LDA, LL, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK2MAT, PSTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) NOTRAN = LSAME( TRANS, 'N' ) * CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -8 ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -904 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -1304 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'D' ) END IF IDUM2( 3 ) = 3 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, NRHS, 5, $ IB, JB, DESCB, 13, 3, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTRTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first block separately * JBLK = JN-JA+1 IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 10 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 LL = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JBLK = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 20 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 LL = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Solve A * x = b or A' * x = b. * CALL PSTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * RETURN * * End of PSTRTRS * END scalapack-1.8.0/SRC/psgetrf.f0000640000175000017500000002644310363532303015575 0ustar muammarmuammar SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ) * .. * * Purpose * ======= * * PSGETRF computes an LU factorization of a general M-by-N distributed * matrix sub( A ) = (IA:IA+M-1,JA:JA+N-1) using partial pivoting with * row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal ele- * ments (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). L and U are stored in sub( A ). * * This is the right-looking Parallel Level 3 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be factored. On exit, this * array contains the local pieces of the factors L and U from * the factorization sub( A ) = P*L*U; the unit diagonal ele- * ments of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, COLCTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PSGEMM, PSGETF2, $ PSLASWP, PSTRSM, PXERBLA * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUM1, $ IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGETRF', -INFO ) RETURN END IF * * Quick return if possible * IF( DESCA( M_ ).EQ.1 ) THEN IPIV( 1 ) = 1 RETURN ELSE IF( M.EQ.0 .OR. N.EQ.0 ) THEN RETURN END IF * * Split-ring topology for the communication along process rows * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', ' ' ) * * Handle the first block of columns separately * MN = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PSGETF2( M, JB, A, IA, JA, DESCA, IPIV, INFO ) * IF( JB+1.LE.N ) THEN * * Apply interchanges to columns JN+1:JA+N-1. * CALL PSLASWP( 'Forward', 'Rows', N-JB, A, IA, JN+1, DESCA, $ IA, IN, IPIV ) * * Compute block row of U. * CALL PSTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-JB, ONE, A, IA, JA, DESCA, A, IA, JN+1, DESCA ) * IF( JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PSGEMM( 'No transpose', 'No transpose', M-JB, N-JB, JB, $ -ONE, A, IN+1, JA, DESCA, A, IA, JN+1, DESCA, $ ONE, A, IN+1, JN+1, DESCA ) * END IF END IF * * Loop over the remaining blocks of columns. * DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PSGETF2( M-J+JA, JB, A, I, J, DESCA, IPIV, IINFO ) * IF( INFO.EQ.0 .AND. IINFO.GT.0 ) $ INFO = IINFO + J - JA * * Apply interchanges to columns JA:J-JA. * CALL PSLASWP( 'Forward', 'Rowwise', J-JA, A, IA, JA, DESCA, $ I, I+JB-1, IPIV ) * IF( J-JA+JB+1.LE.N ) THEN * * Apply interchanges to columns J+JB:JA+N-1. * CALL PSLASWP( 'Forward', 'Rowwise', N-J-JB+JA, A, IA, J+JB, $ DESCA, I, I+JB-1, IPIV ) * * Compute block row of U. * CALL PSTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-J-JB+JA, ONE, A, I, J, DESCA, A, I, J+JB, $ DESCA ) * IF( J-JA+JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PSGEMM( 'No transpose', 'No transpose', M-J-JB+JA, $ N-J-JB+JA, JB, -ONE, A, I+JB, J, DESCA, A, $ I, J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * END IF END IF * 10 CONTINUE * IF( INFO.EQ.0 ) $ INFO = MN + 1 CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, INFO, 1, IDUM1, IDUM2, $ -1, -1, MYCOL ) IF( INFO.EQ.MN+1 ) $ INFO = 0 * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * RETURN * * End of PSGETRF * END scalapack-1.8.0/SRC/pclarfb.f0000640000175000017500000010420210363532303015522 0ustar muammarmuammar SUBROUTINE PCLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARFB applies a complex block reflector Q or its conjugate * transpose Q**H to a complex M-by-N distributed matrix sub( C ) * denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * DIRECT (global input) CHARACTER * Indicates how Q is formed from a product of elementary * reflectors * = 'F': Q = H(1) H(2) . . . H(k) (Forward) * = 'B': Q = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * V (local input) COMPLEX pointer into the local memory * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and * SIDE = 'R'. It contains the local pieces of the distributed * vectors V representing the Householder transformation. * See further details. * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1)); * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1)); * if STOREV = 'R', LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) COMPLEX array, dimension MB_V by MB_V * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian- * gular matrix T in the representation of the block reflector. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL FORWARD CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW, $ ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV, $ ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV, $ IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV, $ ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND, $ JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC, $ MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV, $ NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D,CGEMM, $ CGSUM2D, CLACPY, CLASET, CTRBR2D, $ CTRBS2D, CTRMM, INFOG1L, INFOG2L, PB_TOPGET, $ PBCTRAN * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF FORWARD = LSAME( DIRECT, 'F' ) IF( FORWARD ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) IROFFV = MOD( IV-1, MBV ) ICOFFV = MOD( JV-1, NBV ) MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYCOL.EQ.ICCOL ) $ NQC = NQC - ICOFFC IF( MYROW.EQ.ICROW ) $ MPC = MPC - IROFFC JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFC = IIC + ( JJC-1 ) * LDC IOFFV = IIV + ( JJV-1 ) * LDV * IF( LSAME( STOREV, 'C' ) ) THEN * * V is stored columnwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV ) * IPV = 1 IPW = IPV + MPC * K LV = MAX( 1, MPC ) LW = MAX( 1, NQC ) * * Broadcast V to the other process columns. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ V( IOFFV ), LDV ) IF( MYROW.EQ.IVROW ) $ CALL CTRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV ) CALL CLACPY( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ WORK( IPV ), LV, MYROW, IVCOL ) IF( MYROW.EQ.IVROW ) $ CALL CTRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV, MYROW, IVCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPV) = ( V1 ) where V1 is unit lower triangular, * ( V2 ) zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + MPC - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 10 CONTINUE IF( K-ITOP .GT.0 ) THEN CALL CLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 10 END IF * ELSE * * WORK(IPV) = ( V1 ) where V2 is unit upper triangular, * ( V2 ) zeroes lower triangular part of V2 * JJ = JJV IOFF = MOD( IV+M-K-1, MBV ) CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ), $ II, ILASTROW ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 20 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL CLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL CLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPV+II-IIV+ITOP*LV ), LV ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 20 END IF * END IF * * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K * IF( MPC.GT.0 ) THEN CALL CGEMM( 'Conjugate transpose', 'No transpose', NQC, $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), LV, $ ZERO, WORK( IPW ), LW ) ELSE CALL CLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * CALL CTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, NBV, WORK( IPW ), LW ) CALL CGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL CGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC, $ K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form sub( C )*Q or sub( C )*Q' * * ICOFFC = IROFFV is required by the current transposition * routine PBCTRAN * NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NPV = NPV0 - IROFFV ELSE NPV = NPV0 END IF IF( MYCOL.EQ.ICCOL ) THEN NQC0 = NQC + ICOFFC ELSE NQC0 = NQC END IF * * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]' * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]' * WORK( IPT ) is the workspace for PBCTRAN * IPV = 1 IPW = IPV + K * NQC0 IPT = IPW + NPV0 * K LV = MAX( 1, K ) LW = MAX( 1, NPV0 ) * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN CALL CLASET( 'All', IROFFV, K, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + IROFFV CALL CLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL CLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower * triangular, zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + NPV - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 30 CONTINUE IF( ( K-ITOP ).GT.0 ) THEN CALL CLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ), $ LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 30 END IF * ELSE * * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper * triangular, zeroes lower triangular part of V2. * JJ = JJV CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW, $ DESCV( RSRC_ ), II, ILASTROW ) IOFF = MOD( IV+N-K-1, MBV ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 40 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL CLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL CLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPW1+II-IIV+ITOP*LW ), LW ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 40 END IF END IF END IF * CALL PBCTRAN( ICTXT, 'Columnwise', 'Conjugate transpose', $ N+IROFFV, K, MBV, WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, -1, ICCOL, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC * IF( MYCOL.EQ.ICCOL ) $ IPV = IPV + ICOFFC * LV * * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K * LW = MAX( 1, MPC ) * IF( NQC.GT.0 ) THEN CALL CGEMM( 'No transpose', 'Conjugate transpose', MPC, $ K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL CLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN * * Broadcast the block reflector to the other rows. * CALL CTRBS2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV ) ELSE CALL CTRBR2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV, IVROW, MYCOL ) END IF CALL CTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, NBV, WORK( IPW ), LW ) * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V' * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL CGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) END IF * ELSE * * V is stored rowwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC = ICOFFV is required by the current transposition * routine PBCTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW ) THEN MPC0 = MPC + IROFFC ELSE MPC0 = MPC END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBCTRAN * IPV = 1 IPW = IPV + MPC0 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC0 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL CLASET( 'All', K, ICOFFV, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + ICOFFV * LW CALL CLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL CLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + MQV - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 50 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL CLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ), $ LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 50 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL, $ DESCV( CSRC_ ), JJ, ILASTCOL ) IOFF = MOD( JV+M-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 60 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL CLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL CLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 60 END IF END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K * CALL PBCTRAN( ICTXT, 'Rowwise', 'Conjugate transpose', K, $ M+ICOFFV, NBV, WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K * IF( MYROW.EQ.ICROW ) $ IPV = IPV + IROFFC * * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V' * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K * LW = MAX( 1, NQC ) * IF( MPC.GT.0 ) THEN CALL CGEMM( 'Conjugate transpose', 'No transpose', NQC, $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL CLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL CTRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV ) ELSE CALL CTRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL CTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V' * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL CGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC, $ K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC LV = MAX( 1, K ) LW = MAX( 1, MPC ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL CTRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV ) CALL CLACPY( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL CTRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + NQC - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 70 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL CLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ), $ LV ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 70 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ), $ JJ, ILASTCOL ) IOFF = MOD( JV+N-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 80 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL CLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL CLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 80 END IF * END IF * * WORK( IPV ) is K x NQC = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K * IF( NQC.GT.0 ) THEN CALL CGEMM( 'No transpose', 'Conjugate transpose', MPC, $ K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL CLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL CTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL CGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) * END IF * END IF * RETURN * * End of PCLARFB * END scalapack-1.8.0/SRC/pddbtrs.f0000640000175000017500000006410210363532303015557 0ustar muammarmuammar SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDDBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PDDBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Routine PDDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDBTRF and this is stored in AF. If a linear system * is to be solved using PDDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PDDBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -15 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -3 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( MAX( BWL, BWU )*NRHS ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, 'PDDBTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PDDBTRSV( 'L', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PDDBTRSV( 'U', 'T', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF * * Call backsolve routine * IF( ( LSAME( TRANS, 'C' ) ) .OR. ( LSAME( TRANS, 'T' ) ) ) THEN * CALL PDDBTRSV( 'L', 'T', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PDDBTRSV( 'U', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDDBTRS * END scalapack-1.8.0/SRC/psdbtrf.f0000640000175000017500000012677310363532303015576 0ustar muammarmuammar SUBROUTINE PSDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 12, 2002 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), AF( * ), WORK( * ) * .. * * Purpose * ======= * * PSDBTRF computes a LU factorization * of an N-by-N real banded * diagonally dominant-like distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PSDBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDBTRF and this is stored in AF. If a linear system * is to be solved using PSDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * max(bwl,bwu)*max(bwl,bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, I1, I2, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MAX_BW, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, UP_PREV_TRI_SIZE_M, $ UP_PREV_TRI_SIZE_N, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, SAXPY, SDBTRF, $ DESC_CONVERT, SGEMM, SGEMV, SGERV2D, SGESD2D, $ SLACPY, SLATCPY, STBTRS, STRMM, STRRV2D, $ STRSD2D, GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, $ PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX( BWL, BWU ) MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -2 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PSDBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PSDBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = NB*( BWL+BWU ) + 6*MAX( BWL, BWU )*MAX( BWL, BWU ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PSDBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = MAX( BWL, BWU )*MAX( BWL, BWU ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PSDBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 140 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW END IF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Zero out space for work * DO 20 I = 1, WORK_SIZE_MIN WORK( I ) = ZERO 20 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.GT.0 ) THEN UP_PREV_TRI_SIZE_M = MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) UP_PREV_TRI_SIZE_N = MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL STRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+( MY_NUM_COLS-BWL )* $ LLDA+( BWL+BWU+1 ) ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL SDBTRF( ODD_SIZE, ODD_SIZE, BWL, BWU, A( OFST+1 ), LLDA, $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 30 END IF * * IF( MYCOL.LT.NP-1 ) THEN * * Apply factorization to lower connection block BL_i * transpose the connection block in preparation. * Apply factorization to upper connection block BU_i * Move the connection block in preparation. * CALL SLATCPY( 'U', BWL, BWL, A( ( OFST+( BWL+BWU+1 )+ $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW ) CALL SLACPY( 'L', BWU, BWU, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW- $ BWU ), MAX_BW ) * * Perform the triangular system solve {L_i}{{BU'}_i} = {B_i} * CALL STBTRS( 'L', 'N', 'U', BWU, BWL, BWU, $ A( OFST+BWU+1+( ODD_SIZE-BWU )*LLDA ), LLDA, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, INFO ) * * Perform the triangular solve {U_i}^T{BL'}_i^T = {BL_i}^T * CALL STBTRS( 'U', 'T', 'N', BWL, BWU, BWL, $ A( OFST+1+( ODD_SIZE-BWL )*LLDA ), LLDA, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ INFO ) * * transpose resulting block to its location * in main storage. * CALL SLATCPY( 'L', BWL, BWL, AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW- $ BWL ), MAX_BW, A( ( OFST+( BWL+BWU+1 )+ $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1 ) * * Move the resulting block back to its location in main storage. * CALL SLACPY( 'L', BWU, BWU, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+ $ MAX_BW-BWU ), MAX_BW, A( ( OFST+1+ODD_SIZE* $ LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL SGEMM( 'T', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, ONE, $ A( OFST+ODD_SIZE*LLDA+1+BWU ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 30 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bwl, bwu)*bwl, bwu+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL STRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( WORK_U+1 ), BWL, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * * Transpose transmitted triangular matrix $DL_i$ * DO 50 I1 = 1, BWL DO 40 I2 = I1 + 1, BWL AF( WORK_U+I2+( I1-1 )*BWL ) = AF( WORK_U+I1+( I2-1 )* $ BWL ) AF( WORK_U+I1+( I2-1 )*BWL ) = ZERO 40 CONTINUE 50 CONTINUE * DO 60 I1 = 2, ODD_SIZE I2 = MIN( I1-1, BWL ) CALL SGEMV( 'N', BWL, I2, -ONE, $ AF( WORK_U+1+( I1-1-I2 )*BWL ), BWL, $ A( OFST+BWU+1+I2+( I1-1-I2 )*LLDA ), LLDA-1, $ ONE, AF( WORK_U+1+( I1-1 )*BWL ), 1 ) 60 CONTINUE * * * Calculate the "spike" fillin, ${U_i}^T {{GL}_i}^T = {DU_i}^T$ * * * Copy D block into AF storage for solve. * CALL SLACPY( 'L', UP_PREV_TRI_SIZE_N, UP_PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), BWU ) * DO 80 I1 = 1, ODD_SIZE I2 = MIN( BWU, I1-1 ) CALL SGEMV( 'N', BWU, I2, -ONE, AF( ( I1-1-I2 )*BWU+1 ), $ BWU, A( OFST+BWU+1-I2+( I1-1 )*LLDA ), 1, $ ONE, AF( ( I1-1 )*BWU+1 ), 1 ) * DO 70 I = 1, BWU AF( ( I1-1 )*BWU+I ) = AF( ( I1-1 )*BWU+I ) / $ A( ( I1-1 )*LLDA+BWU+1 ) 70 CONTINUE 80 CONTINUE * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * * * Zero out space in case result is smaller than storage block * DO 90 I = 1, MBW2 AF( ODD_SIZE*BWU+2*MBW2+I ) = ZERO 90 CONTINUE * CALL SGEMM( 'N', 'T', BWU, BWL, ODD_SIZE, -ONE, AF( 1 ), $ BWU, AF( WORK_U+1 ), BWL, ZERO, $ AF( 1+MAX( 0, BWL-BWU )+ODD_SIZE*BWU+( 2*MAX_BW+ $ MAX( 0, BWU-BWL ) )*MAX_BW ), MAX_BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * * Copy matrix HU_i (the last bwl rows of GU_i) to AFL storage * as per requirements of BLAS routine DTRMM. * Since we have GU_i stored, * transpose HU_i to HU_i^T. * CALL SLACPY( 'N', BWL, BWL, $ AF( WORK_U+( ODD_SIZE-BWL )*BWL+1 ), BWL, $ AF( ( ODD_SIZE )*BWU+1+( MAX_BW-BWL ) ), $ MAX_BW ) * CALL STRMM( 'R', 'U', 'T', 'N', BWL, BWL, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, AF( ( ODD_SIZE )*BWU+1+ $ ( MAX_BW-BWL ) ), MAX_BW ) * * * Copy matrix HL_i (the last bwu rows of GL_i^T) to AFU store * as per requirements of BLAS routine DTRMM. * Since we have GL_i^T stored, * transpose HL_i^T to HL_i. * CALL SLACPY( 'N', BWU, BWU, AF( ( ODD_SIZE-BWU )*BWU+1 ), $ BWU, AF( WORK_U+( ODD_SIZE )*BWL+1+MAX_BW- $ BWU ), MAX_BW ) * CALL STRMM( 'R', 'L', 'N', 'N', BWU, BWU, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( WORK_U+( ODD_SIZE )*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 130 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 120 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, MYCOL-1 ) * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL SLACPY( 'N', MAX_BW, MAX_BW, A( OFST+ODD_SIZE*LLDA+BWU+1 ), $ LLDA-1, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL SAXPY( MBW2, ONE, AF( ODD_SIZE*BWU+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 100 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 110 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, AF( ODD_SIZE*BWU+MBW2+1 ), $ 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, AF( ODD_SIZE*BWU+MBW2+1 ), $ 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 100 110 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL SDBTRF( MAX_BW, MAX_BW, MIN( MAX_BW-1, BWL ), $ MIN( MAX_BW-1, BWU ), AF( ODD_SIZE*BWU+MBW2+1- $ ( MIN( MAX_BW-1, BWU ) ) ), MAX_BW+1, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL SLACPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW ) * CALL SLACPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, COMM_PROC ) * CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL STBTRS( 'L', 'N', 'U', BWU, MIN( BWL, BWU-1 ), BWU, $ AF( ODD_SIZE*BWU+MBW2+1+( MAX_BW+1 )*( MAX_BW- $ BWU ) ), MAX_BW+1, AF( WORK_U+ODD_SIZE*BWL+1+ $ MAX_BW-BWU ), MAX_BW, INFO ) * * Modify lower off_diagonal block with diagonal block * * CALL STBTRS( 'U', 'T', 'N', BWL, MIN( BWU, BWL-1 ), BWL, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ BWL-1 )+( MAX_BW+1 )*( MAX_BW-BWL ) ), $ MAX_BW+1, AF( ODD_SIZE*BWU+1+MAX_BW-BWL ), $ MAX_BW, INFO ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST / 2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * * * Since STBTRS has no "left-right" option, we must transpose * CALL SLATCPY( 'N', MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW ) * CALL STBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ BWL, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ WORK( 1+MAX_BW*( MAX_BW-BWL ) ), MAX_BW, INFO ) * * Transpose back * CALL SLATCPY( 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW ) * * * * Since STBTRS has no "left-right" option, we must transpose * CALL SLATCPY( 'N', MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW ) * CALL STBTRS( 'U', 'T', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ BWU, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ WORK( 1+MAX_BW*( MAX_BW-BWU ) ), MAX_BW, INFO ) * * Transpose back * CALL SLATCPY( 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'T', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+( ODD_SIZE )*BWL+2*MBW2+1 ), MAX_BW, $ ZERO, WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL SGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ AF( ODD_SIZE*BWU+1 ), MAX_BW, ZERO, WORK( 1 ), $ MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ COMM_PROC ) * CALL SGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 120 CONTINUE * * 130 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 140 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PSDBTRF * END scalapack-1.8.0/SRC/pddtsv.f0000640000175000017500000004620610363532303015426 0ustar muammarmuammar SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION B( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PDDTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal diagonally dominant-like distributed * matrix. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PDDTTRF and PDDTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL+3*NB) * +max(10*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PDDTTRF, PDDTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDDTTRF and PDDTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PDDTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL+3*NB) * * Factor the matrix * CALL PDDTTRF( N, DL, D, DU, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PDDTTRS( 'N', N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) RETURN END IF * RETURN * * End of PDDTSV * END scalapack-1.8.0/SRC/pslapiv.f0000640000175000017500000003364110363532303015577 0ustar muammarmuammar SUBROUTINE PSLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ), IWORK( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLAPIV applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), resulting in row or column * pivoting. The pivot vector may be distributed across a process row * or a column. The pivot vector should be aligned with the distributed * matrix A. This routine will transpose the pivot vector if necessary. * For example if the row pivots should be applied to the columns of * sub( A ), pass ROWCOL='C' and PIVROC='C'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Restrictions * ============ * * IPIV must always be a distributed vector (not a matrix). Thus: * IF( ROWPIV .EQ. 'C' ) THEN * JP must be 1 * ELSE * IP must be 1 * END IF * * The following restrictions apply when IPIV must be transposed: * IF( ROWPIV.EQ.'C' .AND. PIVROC.EQ.'C') THEN * DESCIP(MB_) must equal DESCA(NB_) * ELSE IF( ROWPIV.EQ.'R" .AND. PIVROC.EQ.'R') THEN * DESCIP(NB_) must equal DESCA(MB_) * END IF * * Arguments * ========= * * DIREC (global input) CHARACTER*1 * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P*sub( A ). * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P )*sub( A ). * * ROWCOL (global input) CHARACTER*1 * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * PIVROC (global input) CHARACTER*1 * Specifies whether IPIV is distributed over a process row * or column: * = 'R' IPIV distributed over a process row * = 'C' IPIV distributed over a process column * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * distributed submatrix sub( A ) to which the row or column * interchanges will be applied. On exit, the local pieces * of the permuted distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension (LIPIV) where LIPIV is * when ROWCOL='R' or 'r': * >= LOCr( IA+M-1 ) + MB_A if PIVROC='C' or 'c', * >= LOCc( M + MOD(JP-1,NB_P) ) if PIVROC='R' or 'r', and, * when ROWCOL='C' or 'c': * >= LOCr( N + MOD(IP-1,MB_P) ) if PIVROC='C' or 'c', * >= LOCc( JA+N-1 ) + NB_A if PIVROC='R' or 'r'. * This array contains the pivoting information. IPIV(i) is the * global row (column), local row (column) i was swapped with. * When ROWCOL='R' or 'r' and PIVROC='C' or 'c', or ROWCOL='C' * or 'c' and PIVROC='R' or 'r', the last piece of this array of * size MB_A (resp. NB_A) is used as workspace. In those cases, * this array is tied to the distributed matrix A. * * IP (global input) INTEGER * The row index in the global array P indicating the first * row of sub( P ). * * JP (global input) INTEGER * The column index in the global array P indicating the * first column of sub( P ). * * DESCIP (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed vector IPIV. * * IWORK (local workspace) INTEGER array, dimension (LDW) * where LDW is equal to the workspace necessary for * transposition, and the storage of the tranposed IPIV: * * Let LCM be the least common multiple of NPROW and NPCOL. * IF( ROWCOL.EQ.'R' .AND. PIVROC.EQ.'R' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + NB_P * ELSE * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + * NB_P * CEIL( CEIL(LOCc(N_P)/NB_P) / (LCM/NPCOL) ) * END IF * ELSE IF( ROWCOL.EQ.'C' .AND. PIVROC.EQ.'C' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P * ELSE * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * END IF * ELSE * IWORK is not referenced. * END IF * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ROWPVT INTEGER I, ICTXT, ICURCOL, ICURROW, IIP, ITMP, IPT, $ JJP, JPT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCPT( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, $ INFOG2L, PICOL2ROW, PIROW2COL, PSLAPV2 * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC, INDXG2P EXTERNAL LSAME, NUMROC, INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'C' ) ) THEN CALL PSLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PSLAPV2 * ELSE * * Take IPIV distributed over row 0, and store it in * iwork, distributed over column 0 * IPT = MOD( JP-1, DESCA(MB_) ) DESCPT(M_) = M + IPT + NPROW*DESCA(MB_) DESCPT(N_) = 1 DESCPT(MB_) = DESCA(MB_) DESCPT(NB_) = 1 DESCPT(RSRC_) = INDXG2P( IA, DESCA(MB_), IA, DESCA(RSRC_), $ NPROW ) DESCPT(CSRC_) = MYCOL DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = NUMROC( DESCPT(M_), DESCPT(MB_), MYROW, $ DESCPT(RSRC_), NPROW ) ITMP = NUMROC( DESCIP(N_), DESCIP(NB_), MYCOL, $ DESCIP(CSRC_), NPCOL ) CALL INFOG2L( IP, JP-IPT, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) CALL PIROW2COL( ICTXT, M+IPT, 1, DESCIP(NB_), IPIV(JJP), $ ITMP, IWORK, DESCPT(LLD_), 0, ICURCOL, $ DESCPT(RSRC_), $ MYCOL, IWORK(DESCPT(LLD_)-DESCPT(MB_)+1) ) * * Send column-distributed pivots to all columns * ITMP = DESCPT(LLD_) - DESCPT(MB_) IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP, $ MYROW, 0 ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * IPT = IPT + 1 DO 10 I = 1, ITMP IWORK(I) = IWORK(I) - JP + IPT 10 CONTINUE CALL PSLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ IPT, 1, DESCPT ) END IF * * Otherwise, we're pivoting the columns of sub( A ) * ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'R' ) ) THEN CALL PSLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PSLAPV2 * ELSE * * Take IPIV distributed over column 0, and store it in * iwork, distributed over row 0 * JPT = MOD( IP-1, DESCA(NB_) ) DESCPT(M_) = 1 DESCPT(N_) = N + JPT + NPCOL*DESCA(NB_) DESCPT(MB_) = 1 DESCPT(NB_) = DESCA(NB_) DESCPT(RSRC_) = MYROW DESCPT(CSRC_) = INDXG2P( JA, DESCA(NB_), JA, DESCA(CSRC_), $ NPCOL ) DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = 1 CALL INFOG2L( IP-JPT, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) ITMP = NUMROC( N+JPT, DESCPT(NB_), MYCOL, DESCPT(CSRC_), $ NPCOL ) CALL PICOL2ROW( ICTXT, N+JPT, 1, DESCIP(MB_), IPIV(IIP), $ DESCIP(LLD_), IWORK, MAX(1, ITMP), ICURROW, $ 0, 0, DESCPT(CSRC_), IWORK(ITMP+1) ) * * Send row-distributed pivots to all rows * IF( MYROW.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP, 0, MYCOL ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * JPT = JPT + 1 DO 20 I = 1, ITMP IWORK(I) = IWORK(I) - IP + JPT 20 CONTINUE CALL PSLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ 1, JPT, DESCPT ) END IF END IF * RETURN * * End of PSLAPIV * END scalapack-1.8.0/SRC/pstrcon.f0000640000175000017500000004031710363532303015607 0ustar muammarmuammar SUBROUTINE PSTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LIWORK, LWORK, N REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSTRCON estimates the reciprocal of the condition number of a * triangular distributed matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm. * * The norm of A(IA:IA+N-1,JA:JA+N-1) is computed and an estimate is * obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), then the reciprocal * of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm; * = 'I': Infinity-norm. * * UPLO (global input) CHARACTER * = 'U': A(IA:IA+N-1,JA:JA+N-1) is upper triangular; * = 'L': A(IA:IA+N-1,JA:JA+N-1) is lower triangular. * * DIAG (global input) CHARACTER * = 'N': A(IA:IA+N-1,JA:JA+N-1) is non-unit triangular; * = 'U': A(IA:IA+N-1,JA:JA+N-1) is unit triangular. * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the triangular distributed * matrix A(IA:IA+N-1,JA:JA+N-1). If UPLO = 'U', the leading * N-by-N upper triangular part of this distributed matrix con- * tains the upper triangular matrix, and its strictly lower * triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of this ditributed * matrix contains the lower triangular matrix, and the strictly * upper triangular part is not referenced. If DIAG = 'U', the * diagonal elements of A(IA:IA+N-1,JA:JA+N-1) are also not * referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + LOCc(N+MOD(JA-1,NB_A)) * + MAX( 2, MAX( NB_A*MAX( 1, CEIL(NPROW-1,NPCOL) ), * LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX( 1, CEIL(NPCOL-1,NPROW) ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOUNIT, ONENRM, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPN, IPV, IPW, $ IPX, IROFF, IV, IX, IXX, JJA, JV, JX, KASE, $ KASE1, LIWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, $ NPMOD, NPROW, NQ, NQMOD REAL AINVNM, ANORM, SCALE, SMLNUM REAL WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 5 ), $ IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK1MAT, PSAMAX, PSLATRS, PSLACON, $ PSRSCL, PB_TOPGET, PB_TOPSET, PXERBLA, SGEBR2D, $ SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH, PSLANTR EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH, $ PSLANTR * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) NOUNIT = LSAME( DIAG, 'N' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 11 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 13 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 5, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTRCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * RCOND = ZERO SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' )*REAL( MAX( 1, N ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPN = IPV + NP IPW = IPN + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Compute the norm of the triangular matrix A. * ANORM = PSLANTR( NORM, UPLO, DIAG, N, N, A, IA, JA, DESCA, WORK ) * * Continue only if ANORM > 0. * IF( ANORM.GT.ZERO ) THEN * * Estimate the norm of the inverse of A. * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 10 CONTINUE CALL PSLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), $ IX, JX, DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(A). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( UPLO, 'No transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, WORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(A'). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( UPLO, 'Transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, WORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF NORMIN = 'Y' * * Multiply by 1/SCALE if doing so will not cause overflow. * IF( SCALE.NE.ONE ) THEN CALL PSAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, $ DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL SGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1 ) ELSE CALL SGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1, IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / ANORM ) / AINVNM END IF * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PSTRCON * END scalapack-1.8.0/SRC/pclasmsub.f0000640000175000017500000003161510602576752016125 0ustar muammarmuammar SUBROUTINE PCLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, K, L, LWORK REAL SMLNUM * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), BUF( * ) * .. * * Purpose * ======= * * PCLASMSUB looks for a small subdiagonal element from the bottom * of the matrix that it can safely set to zero. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) COMPLEX array, dimension (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * K (global output) INTEGER * On exit, this yields the bottom portion of the unreduced * submatrix. This will satisfy: L <= M <= I-1. * * SMLNUM (global input) REAL * On entry, a "small number" for the given matrix. * Unchanged on exit. * * BUF (local output) COMPLEX array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * This code is basically a parallelization of the following snip * of LAPACK code from CLAHQR: * * Look for a single small subdiagonal element. * * DO 20 K = I, L + 1, -1 * TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) * IF( TST1.EQ.ZERO ) * $ TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) * IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) * $ GO TO 30 * 20 CONTINUE * 30 CONTINUE * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2, $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC, $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA, $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, $ RIGHT, UP REAL TST1, ULP COMPLEX CDUM, H10, H11, H22 * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH EXTERNAL ILCM, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, INFOG1L, INFOG2L, $ CGERV2D, CGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, AIMAG, MAX, MOD * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS * ISTR1 = 0 ISTR2 = ( ( I-L ) / HBL ) IF( ISTR2*HBL.LT.( I-L ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.2*ISTR2 ) THEN * * Error! * RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY * UP THE TRIDIAGONAL * IBUF1 = 0 IBUF2 = 0 IRCV1 = 0 IRCV2 = 0 DO 10 K = I, L + 1, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * ADD UP THE RECEIVES * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT. $ 1 ) ) ) THEN * * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN * * WE MUST RECEIVE H(K ,K-1) FROM LEFT * IRCV2 = IRCV2 + 1 END IF END IF * * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND * IF( IBUF1.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW, $ RIGHT ) END IF * * RECEIVE APPROPRIATE DATA IF THERE IS ANY * IF( IRCV1.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW, $ LEFT ) END IF * * START MAIN LOOP * IBUF1 = 0 IBUF2 = 0 CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT. * * Start loop for subdiagonal search * DO 40 K = I, L + 1, -1 IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN * * Grab information from WORK array * IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H11 = BUF( ISTR1+IBUF1 ) ELSE H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF2 = IBUF2 + 1 H10 = BUF( ISTR2+IBUF2 ) ELSE H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF ELSE * * Information is local * H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF H22 = A( ( ICOL1-1 )*LDA+IROW1 ) TST1 = CABS1( H11 ) + CABS1( H22 ) IF( TST1.EQ.ZERO ) THEN * * FIND SOME NORM OF THE LOCAL H(L:I,L:I) * CALL INFOG1L( L, HBL, NPROW, MYROW, 0, ITMP1, III ) IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW ) CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ITMP2, III ) ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL ) DO 30 III = ITMP1, IROW2 DO 20 JJJ = ITMP2, ICOL2 TST1 = TST1 + CABS1( A( ( JJJ-1 )*LDA+III ) ) 20 CONTINUE 30 CONTINUE END IF IF( CABS1( H10 ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 50 IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN II = MOD( II+NPROW-1, NPROW ) JJ = MOD( JJ+NPCOL-1, NPCOL ) CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) END IF 40 CONTINUE 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) RETURN * * End of PCLASMSUB * END scalapack-1.8.0/SRC/pcgbtrf.f0000640000175000017500000010771510363532303015554 0ustar muammarmuammar SUBROUTINE PCGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PCGBTRF computes a LU factorization * of an N-by-N complex banded * distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PCGBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) Q = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P and Q are permutation matrices. * The matrix Q represents reordering of columns * for parallelism's sake, while P represents * reordering of rows for numerical stability using * classic partial pivoting. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCGBTRF and this is stored in AF. If a linear system * is to be solved using PCGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 1 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian Natonal University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW, $ CSRC, DBPTR, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM3, J, JA_NEW, JPTR, L, LAF_MIN, $ LBWL, LBWU, LDB, LDBB, LLDA, LM, LMJ, LN, LNJ, $ LPTR, MYCOL, MYROW, MY_NUM_COLS, NB, NEICOL, $ NP, NPACT, NPCOL, NPROW, NPSTR, NP_SAVE, NRHS, $ ODD_N, ODD_SIZE, ODPTR, OFST, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, CAXPY, CGEMM, $ CGERV2D, CGESD2D, CLACPY, CLATCPY, CPBTRF, $ CPOTRF, CSYRK, CTBTRS, CTRMM, CTRRV2D, CTRSD2D, $ CTRSM, CTRTRS, DESC_CONVERT, GLOBCHK, PXERBLA, $ RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -11 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -2 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (2*BWL+2*BWU+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * BW = BWU+BWL * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PCGBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.(BWL+BWU+1) )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PCGBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * IF( LAF .LT. LAF_MIN ) THEN INFO = -9 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PCGBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 1 * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -11 * put minimum value of work into work( 1 ) WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, $ 'PCGBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 11 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCGBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * DO 9 J = 1, ODD_SIZE DO 8 I = 1, BW A( I+(J-1)*LLDA ) = CZERO 8 CONTINUE 9 CONTINUE * * Begin main code * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Transfer triangle B_i of local matrix to next processor * for fillin. Overlap the send with the factorization of A_i. * IF (MYCOL .LE. NPCOL-2) THEN * * The last processor does not need to send anything. * BIPTR = location of triangle B_i in memory BIPTR = (NB-BW)*LLDA + 2*BW+1 * CALL CTRSD2D( ICTXT, 'U', 'N', $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0, NPCOL ) ), $ BW, A(BIPTR), LLDA-1, 0, MYCOL+1) * ENDIF * * Factor main partition P_i A_i = L_i U_i on each processor * * LBWL, LBWU: lower and upper bandwidth of local solver * Note that for MYCOL > 0 one has lower triangular blocks! * LM is the number of rows which is usually NB except for * MYCOL = 0 where it is BWU less and MYCOL=NPCOL-1 where it * is NR+BWU where NR is the number of columns on the last processor * Finally APTR is the pointer to the first element of A. As LAPACK * has a slightly different matrix format than Scalapack the pointer * has to be adjusted on processor MYCOL=0. * IF (MYCOL .NE. 0) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1+BWU ENDIF * IF (MYCOL .NE. NPCOL-1) THEN LM = NB - LBWU LN = NB - BW ELSE IF (MYCOL .NE. 0) THEN LM = ODD_SIZE + BWU LN = MAX(ODD_SIZE-BW,0) ELSE LM = N LN = MAX( N-BW, 0 ) ENDIF * IF (LN .GT. 0) THEN * CALL CGBTRF(LM,LN, LBWL,LBWU, A(APTR),LLDA, IPIV, INFO) * IF( INFO.NE.0 ) THEN INFO = INFO + NB*MYCOL GO TO 90 END IF * NRHS = BW LDB = LLDA-1 * * Update the last BW columns of A_i (code modified from CGBTRS) * * Only the eliminations of unknowns > LN-BW have an effect on * the last BW columns. Loop over them... * DO 23 J = MAX(LN-BW+1,1), LN * LMJ = MIN( LBWL, LM-J ) LNJ = MIN( BW, J+BW-LN+APTR-1 ) * L = IPIV( J ) * JPTR = J-(LN+1)+2*BW+1-LBWL + LN*LLDA * IF( L.NE.J ) THEN * * Element (L,LN+1) is swapped with element (J,LN+1) etc * Furthermore, the elements in the same row are LDB=LLDA-1 apart * The complicated formulas are to cope with the banded * data format: * LPTR = L-(LN+1)+2*BW+1-LBWL + LN*LLDA * CALL CSWAP( LNJ, A(LPTR),LDB, A(JPTR), LDB ) * ENDIF * * LPTR is the pointer to the beginning of the * coefficients of L * LPTR = BW+1+APTR + (J-1)*LLDA * CALL CGERU(LMJ,LNJ,-CONE, A(LPTR),1, A(JPTR),LDB, $ A(JPTR+1),LDB) 23 CONTINUE * ENDIF * * Compute spike fill-in, L_i F_i = P_i B_{i-1} * * Receive triangle B_{i-1} from previous processor * IF (MYCOL .GT. 0) THEN * CALL CTRRV2D( ICTXT, 'U', 'N', MIN(BW, LM), BW, AF( 1 ), $ LM, 0, MYCOL-1) * * * Permutation and forward elimination (triang. solve) * DO 24 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L .NE. J ) THEN * CALL CSWAP(NRHS, AF(L), LM, AF(J), LM ) ENDIF * LPTR = BW+1+APTR + (J-1)*LLDA * CALL CGERU( LMJ,NRHS, -CONE, A(LPTR),1, $ AF(J), LM, AF(J+1), LM) * 24 CONTINUE * ENDIF * 90 CONTINUE * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU BN = BW ELSE BM = MIN(BW,ODD_SIZE) + BWU BN = MIN(BW,ODD_SIZE) ENDIF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = (NB+BWU)*BW + 1 LDBB = 2*BW + BWU * * Copy from A and AF into block bidiagonal matrix (tail of AF) * * DBPTR = Pointer to diagonal blocks in A DBPTR = BW+1 + LBWU + LN*LLDA * CALL CLACPY('G',BM,BN, A(DBPTR),LLDA-1, $ AF(BBPTR + BW*LDBB),LDBB) * * Zero out any junk entries that were copied * DO 870 J=1, BM DO 880 I=J+LBWL, BM-1 AF( BBPTR+BW*LDBB+(J-1)*LDBB+I ) = CZERO 880 CONTINUE 870 CONTINUE * IF (MYCOL .NE. 0) THEN * * ODPTR = Pointer to offdiagonal blocks in A * ODPTR = LM-BM+1 CALL CLACPY('G',BM,BW, AF(ODPTR),LM, $ AF(BBPTR +2*BW*LDBB),LDBB) ENDIF * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. CALL CGETRF( N-LN, N-LN, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) * ENDIF * * Loop over levels ... only occurs if npcol > 1 * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) are used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels * 200 IF (NPACT .LE. 1) GOTO 300 * * Test if processor is active * IF (MOD(MYCOL,NPSTR) .EQ. 0) THEN * * Send/Receive blocks * * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * * This node will potentially do more work later * NEICOL = MYCOL + NPSTR * IF (NEICOL/NPSTR .LT. NPACT-1) THEN BMN = BW ELSE IF (NEICOL/NPSTR .EQ. NPACT-1) THEN ODD_N = NUMROC(N, NB, NPCOL-1, 0, NPCOL) BMN = MIN(BW,ODD_N) + BWU ELSE * * Last processor skips to next level GOTO 250 ENDIF * * BM1 = M for 1st block on proc pair, BM2 2nd block * BM1 = BM BM2 = BMN * IF (NEICOL/NPSTR .LE. NPACT-1 )THEN * CALL CGESD2D( ICTXT, BM, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL ) * CALL CGERV2D( ICTXT, BMN, 2*BW, AF(BBPTR+BM), $ LDBB, 0, NEICOL) * IF( NPACT .EQ. 2 ) THEN * * Copy diagonal block to align whole system * CALL CLACPY( 'G', BMN, BW, AF( BBPTR+BM ), $ LDBB, AF( BBPTR+2*BW*LDBB+BM ), LDBB ) ENDIF * ENDIF * ELSE * * This node stops work after this stage -- an extra copy * is required to make the odd and even frontal matrices * look identical * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * BM1 = BMN BM2 = BM * CALL CGESD2D( ICTXT, BM, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL ) * CALL CLACPY('G',BM, 2*BW, AF(BBPTR+BW*LDBB),LDBB, $ AF(BBPTR+BMN),LDBB) * DO 31 J=BBPTR+2*BW*LDBB, BBPTR+3*BW*LDBB-1, LDBB DO 32 I=0, LDBB-1 AF(I+J) = CZERO 32 CONTINUE 31 CONTINUE * CALL CGERV2D( ICTXT, BMN, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL) * IF( NPACT .EQ. 2 ) THEN * * Copy diagonal block to align whole system * CALL CLACPY( 'G', BM, BW, AF( BBPTR+BMN ), $ LDBB, AF( BBPTR+2*BW*LDBB+BMN ), LDBB ) ENDIF * ENDIF * * LU factorization with partial pivoting * IF (NPACT .NE. 2) THEN * CALL CGETRF(BM+BMN, BW, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) * * Backsolve left side * DO 301 J=BBPTR,BBPTR+BW*LDBB-1, LDBB DO 302 I=0, BM1-1 AF(I+J) = CZERO 302 CONTINUE 301 CONTINUE * CALL CLASWP(BW, AF(BBPTR), LDBB, 1, BW, $ IPIV(LN+1), 1) * CALL CTRSM('L','L','N','U', BW, BW, CONE, $ AF(BBPTR+BW*LDBB), LDBB, AF(BBPTR), LDBB) * * Use partial factors to update remainder * CALL CGEMM( 'N', 'N', BM+BMN-BW, BW, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ AF( BBPTR ), LDBB, CONE, $ AF( BBPTR+BW ), LDBB ) * * Backsolve right side * NRHS = BW * CALL CLASWP(NRHS, AF(BBPTR+2*BW*LDBB), LDBB, 1, BW, $ IPIV(LN+1), 1) * CALL CTRSM('L','L','N','U', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, AF(BBPTR+2*BW*LDBB), LDBB) * * Use partial factors to update remainder * CALL CGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB, CONE, $ AF( BBPTR+2*BW*LDBB+BW ), LDBB ) * * * Test if processor is active in next round * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * * Reset BM * BM = BM1+BM2-BW * * Local copying in the block bidiagonal area * * CALL CLACPY('G',BM,BW, $ AF(BBPTR+BW), $ LDBB, AF(BBPTR+BW*LDBB), LDBB) CALL CLACPY('G',BM,BW, $ AF(BBPTR+2*BW*LDBB+BW), $ LDBB, AF(BBPTR+2*BW*LDBB), LDBB) * * Zero out space that held original copy * DO 1020 J=0, BW-1 DO 1021 I=0, BM-1 AF(BBPTR+2*BW*LDBB+BW+J*LDBB+I) = CZERO 1021 CONTINUE 1020 CONTINUE * ENDIF * ELSE * * Factor the final 2 by 2 block matrix * CALL CGETRF(BM+BMN,BM+BMN, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) ENDIF * ENDIF * * Last processor in an odd-sized NPACT skips to here * 250 CONTINUE * NPACT = (NPACT + 1)/2 NPSTR = NPSTR * 2 GOTO 200 * 300 CONTINUE * End loop over levels * 1000 CONTINUE * If error was found in Phase 1, processors jump here. * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 1234 CONTINUE * If this processor did not hold part of the grid it * jumps here. * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PCGBTRF * END * scalapack-1.8.0/SRC/pzlantr.f0000640000175000017500000011064610363532303015614 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLANTR returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * trapezoidal or triangular distributed matrix sub( A ) denoting * A(IA:IA+M-1, JA:JA+N-1). * * PZLANTR returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with ia <= i <= ia+m-1, * ( and ja <= j <= ja+n-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PZLANTR as described * above. * * UPLO (global input) CHARACTER * Specifies whether the matrix sub( A ) is upper or lower * trapezoidal. * = 'U': Upper trapezoidal * = 'L': Lower trapezoidal * Note that sub( A ) is triangular instead of trapezoidal * if M = N. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) has * unit diagonal. * = 'N': Non-unit diagonal * = 'U': Unit diagonal * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PZLANTR is * set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PZLANTR is set to zero. N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL UDIAG INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, IOFFA, $ IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, MP, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, INFOG2L, $ PDTREECOMB, ZLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL LSAME, ICEIL, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * IF( UDIAG ) THEN VALUE = ONE ELSE VALUE = ZERO END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 20 LL = JJ, JJ + JB -1 DO 10 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE DO 40 LL = JJ, JJ + JB -1 DO 30 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE END IF ELSE DO 60 LL = JJ, JJ + JB -1 DO 50 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 80 LL = JJ, JJ + JB -1 DO 70 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 70 CONTINUE IOFFA = IOFFA + LDA 80 CONTINUE ELSE DO 100 LL = JJ, JJ + JB -1 DO 90 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 90 CONTINUE IOFFA = IOFFA + LDA 100 CONTINUE END IF ELSE DO 120 LL = JJ, JJ + JB -1 DO 110 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 110 CONTINUE IOFFA = IOFFA + LDA 120 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 130 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 150 LL = JJ, JJ + JB -1 DO 140 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE ELSE DO 170 LL = JJ, JJ + JB -1 DO 160 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 160 CONTINUE IOFFA = IOFFA + LDA 170 CONTINUE END IF ELSE DO 190 LL = JJ, JJ + JB -1 DO 180 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 180 CONTINUE IOFFA = IOFFA + LDA 190 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 210 LL = JJ, JJ + JB -1 DO 200 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 200 CONTINUE IOFFA = IOFFA + LDA 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 DO 220 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE END IF ELSE DO 250 LL = JJ, JJ + JB -1 DO 240 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 260 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 280 LL = JJ, JJ + JB -1 SUM = ONE DO 270 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 270 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 SUM = ZERO DO 290 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 290 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 300 CONTINUE END IF ELSE DO 320 LL = JJ, JJ + JB -1 SUM = ZERO DO 310 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 310 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 320 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 390 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 340 LL = JJ, JJ + JB -1 SUM = ONE DO 330 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 330 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 340 CONTINUE ELSE DO 360 LL = JJ, JJ + JB -1 SUM = ZERO DO 350 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 360 CONTINUE END IF ELSE DO 380 LL = JJ, JJ + JB -1 SUM = ZERO DO 370 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 380 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 410 LL = JJ, JJ + JB -1 SUM = ONE DO 400 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 400 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 410 CONTINUE ELSE DO 430 LL = JJ, JJ + JB -1 SUM = ZERO DO 420 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 420 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 430 CONTINUE END IF ELSE DO 450 LL = JJ, JJ + JB -1 SUM = ZERO DO 440 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 440 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 450 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 520 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 470 LL = JJ, JJ + JB -1 SUM = ONE DO 460 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 460 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 470 CONTINUE ELSE DO 490 LL = JJ, JJ + JB -1 SUM = ZERO DO 480 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 480 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 490 CONTINUE END IF ELSE DO 510 LL = JJ, JJ + JB -1 SUM = ZERO DO 500 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 500 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 510 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 520 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * IF( LSAME( UPLO, 'U' ) ) THEN IF( UDIAG ) THEN DO 530 KK = IIA, IIA+MP-1 WORK( KK ) = ONE 530 CONTINUE ELSE DO 540 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 540 CONTINUE END IF ELSE IF( UDIAG ) THEN NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF DO 550 KK = IIA, IIA+NP-1 WORK( KK ) = ONE 550 CONTINUE DO 560 KK = IIA+NP, IIA+MP-1 WORK( KK ) = ZERO 560 CONTINUE ELSE DO 570 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 570 CONTINUE END IF END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 590 LL = JJ, JJ + JB -1 DO 580 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 580 CONTINUE IOFFA = IOFFA + LDA 590 CONTINUE ELSE DO 610 LL = JJ, JJ + JB -1 DO 600 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 600 CONTINUE IOFFA = IOFFA + LDA 610 CONTINUE END IF ELSE DO 630 LL = JJ, JJ + JB -1 DO 620 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 620 CONTINUE IOFFA = IOFFA + LDA 630 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 700 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 650 LL = JJ, JJ + JB -1 DO 640 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 640 CONTINUE IOFFA = IOFFA + LDA 650 CONTINUE ELSE DO 670 LL = JJ, JJ + JB -1 DO 660 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 660 CONTINUE IOFFA = IOFFA + LDA 670 CONTINUE END IF ELSE DO 690 LL = JJ, JJ + JB -1 DO 680 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 680 CONTINUE IOFFA = IOFFA + LDA 690 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 700 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 720 LL = JJ, JJ + JB -1 DO 710 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 710 CONTINUE IOFFA = IOFFA + LDA 720 CONTINUE ELSE DO 740 LL = JJ, JJ + JB -1 DO 730 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 730 CONTINUE IOFFA = IOFFA + LDA 740 CONTINUE END IF ELSE DO 760 LL = JJ, JJ + JB -1 DO 750 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 750 CONTINUE IOFFA = IOFFA + LDA 760 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 830 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 780 LL = JJ, JJ + JB -1 DO 770 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 770 CONTINUE IOFFA = IOFFA + LDA 780 CONTINUE ELSE DO 800 LL = JJ, JJ + JB -1 DO 790 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 790 CONTINUE IOFFA = IOFFA + LDA 800 CONTINUE END IF ELSE DO 820 LL = JJ, JJ + JB -1 DO 810 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 810 CONTINUE IOFFA = IOFFA + LDA 820 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 830 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( IDAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * IF( UDIAG ) THEN SCALE = ONE SUM = DBLE( MIN( M, N ) ) / DBLE( NPROW*NPCOL ) ELSE SCALE = ZERO SUM = ONE END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 840 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 840 CONTINUE ELSE DO 850 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 850 CONTINUE END IF ELSE DO 860 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 860 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 900 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 870 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 870 CONTINUE ELSE DO 880 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 880 CONTINUE END IF ELSE DO 890 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 890 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 900 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 910 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 910 CONTINUE ELSE DO 920 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 920 CONTINUE END IF ELSE DO 930 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 930 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 970 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 940 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 940 CONTINUE ELSE DO 950 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 950 CONTINUE END IF ELSE DO 960 LL = JJ, JJ + JB -1 CALL ZLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 960 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 970 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to every process in the grid. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PZLANTR = VALUE * RETURN * * End of PZLANTR * END scalapack-1.8.0/SRC/pzpttrf.f0000640000175000017500000010515710363532303015634 0ustar muammarmuammar SUBROUTINE PZPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 AF( * ), E( * ), WORK( * ) DOUBLE PRECISION D( * ) * .. * * * Purpose * ======= * * PZPTTRF computes a Cholesky factorization * of an N-by-N complex tridiagonal * symmetric positive definite distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PZPTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' D U or * * P A(1:N, JA:JA+N-1) P^T = L D L', * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPTTRF and this is stored in AF. If a linear system * is to be solved using PZPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, INT_TEMP, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZAXPY, $ ZGEMM, ZGERV2D, ZGESD2D, ZLACPY, ZLATCPY, $ ZPBTRF, ZPOTRF, ZSYRK, ZTBTRS, ZTRMM, ZTRRV2D, $ ZTRSD2D, ZTRSM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 5*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -9 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 5*100 + 6 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 5*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PZPTTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPTTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (12*NPCOL + 3*NB) * IF( LAF .LT. LAF_MIN ) THEN INFO = -7 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PZPTTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -9 CALL PXERBLA( ICTXT, $ 'PZPTTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA(5) PARAM_CHECK( 6, 1 ) = DESCA(4) PARAM_CHECK( 5, 1 ) = DESCA(3) PARAM_CHECK( 4, 1 ) = DESCA(1) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 505 PARAM_CHECK( 6, 2 ) = 504 PARAM_CHECK( 5, 2 ) = 503 PARAM_CHECK( 4, 2 ) = 501 PARAM_CHECK( 3, 2 ) = 4 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 9 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL ZTRSD2D( ICTXT, 'U', 'N', 1, 1, $ E( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {L_i}^C in each processor * Or A_i = {U_i}^C {U_i} if E is the upper superdiagonal * CALL ZPTTRF( ODD_SIZE, D( PART_OFFSET+1 ), E( PART_OFFSET+1 ), $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * * Perform the triangular system solve {L_i}{{B'}_i}^C = {B_i}^C * by dividing B_i by diagonal element * E( PART_OFFSET+ODD_SIZE ) = E( PART_OFFSET+ODD_SIZE )/ $ D( PART_OFFSET+ODD_SIZE ) * * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^C * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 )- $ D( PART_OFFSET+ODD_SIZE )*DBLE( E( PART_OFFSET+ODD_SIZE )* $ DCONJG( E( PART_OFFSET+ODD_SIZE ) ) ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL ZTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * CALL ZPTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * Divide by D * DO 20 I=1, ODD_SIZE AF( I ) = AF( I )/D( PART_OFFSET+I ) 20 CONTINUE * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * * * Since there is no element-by-element vector multiplication in * the BLAS, this loop must be hardwired in without a BLAS call * INT_TEMP = ODD_SIZE*INT_ONE+2+1 AF( INT_TEMP ) = 0 * DO 30 I=1, ODD_SIZE AF( INT_TEMP ) = AF( INT_TEMP )-D( PART_OFFSET+I )* $ ( AF( I )*DCONJG( AF( I ) ) ) 30 CONTINUE * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * AF( ODD_SIZE+1 ) = $ - D( PART_OFFSET+ODD_SIZE ) $ * DCONJG( E( PART_OFFSET+ODD_SIZE ) $ * AF( ODD_SIZE ) ) * * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = $ DCMPLX( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+AF( ODD_SIZE+3 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ) .EQ. CZERO ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 )/AF( ODD_SIZE+2 ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*AF( ODD_SIZE+1 )*AF( ODD_SIZE+2 ) $ *DCONJG( AF( ODD_SIZE+1 ) ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = ( AF( ODD_SIZE+3 ) ) $ /AF( ODD_SIZE+2 ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 ) $ *DCONJG( AF( ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ * AF( ODD_SIZE+2 ) $ * AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PZPTTRF * END scalapack-1.8.0/SRC/dlasorte.f0000640000175000017500000001003710602576752015744 0ustar muammarmuammar SUBROUTINE DLASORTE( S, LDS, J, OUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER INFO, J, LDS * .. * .. Array Arguments .. DOUBLE PRECISION OUT( J, * ), S( LDS, * ) * .. * * Purpose * ======= * * DLASORTE sorts eigenpairs so that real eigenpairs are together and * complex are together. This way one can employ 2x2 shifts easily * since every 2nd subdiagonal is guaranteed to be zero. * This routine does no parallel work. * * Arguments * ========= * * S (local input/output) DOUBLE PRECISION array, dimension LDS * On entry, a matrix already in Schur form. * On exit, the diagonal blocks of S have been rewritten to pair * the eigenvalues. The resulting matrix is no longer * similar to the input. * * LDS (local input) INTEGER * On entry, the leading dimension of the local array S. * Unchanged on exit. * * J (local input) INTEGER * On entry, the order of the matrix S. * Unchanged on exit. * * OUT (local input/output) DOUBLE PRECISION array, dimension Jx2 * This is the work buffer required by this routine. * * INFO (local input) INTEGER * This is set if the input matrix had an odd number of real * eigenvalues and things couldn't be paired or if the input * matrix S was not originally in Schur form. * 0 indicates successful completion. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER BOT, I, LAST, TOP * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * LAST = J TOP = 1 BOT = J INFO = 0 DO 10 I = J - 1, 1, -1 IF( S( I+1, I ).EQ.ZERO ) THEN IF( LAST-I.EQ.2 ) THEN OUT( BOT-1, 1 ) = S( I+1, I+1 ) OUT( BOT, 2 ) = S( I+2, I+2 ) OUT( BOT-1, 2 ) = S( I+1, I+2 ) OUT( BOT, 1 ) = S( I+2, I+1 ) BOT = BOT - 2 END IF IF( LAST-I.EQ.1 ) THEN IF( MOD( TOP, 2 ).EQ.1 ) THEN * * FIRST OF A PAIR * IF( ( I.EQ.J-1 ) .OR. ( I.EQ.1 ) ) THEN OUT( TOP, 1 ) = S( I+1, I+1 ) ELSE OUT( TOP, 1 ) = S( I+1, I+1 ) END IF OUT( TOP, 2 ) = ZERO ELSE * * SECOND OF A PAIR * IF( ( I.EQ.J-1 ) .OR. ( I.EQ.1 ) ) THEN OUT( TOP, 2 ) = S( I+1, I+1 ) ELSE OUT( TOP, 2 ) = S( I+1, I+1 ) END IF OUT( TOP, 1 ) = ZERO END IF TOP = TOP + 1 END IF IF( LAST-I.GT.2 ) THEN INFO = I RETURN END IF LAST = I END IF 10 CONTINUE IF( LAST.EQ.2 ) THEN * * GRAB LAST DOUBLE PAIR * OUT( BOT-1, 1 ) = S( 1, 1 ) OUT( BOT, 2 ) = S( 2, 2 ) OUT( BOT-1, 2 ) = S( 1, 2 ) OUT( BOT, 1 ) = S( 2, 1 ) BOT = BOT - 2 END IF IF( LAST.EQ.1 .and. mod(top, 2) .eq. 0 ) THEN * * GRAB SECOND PART OF LAST PAIR * OUT(TOP, 2) = s(1,1) OUT(TOP, 1) = zero TOP = TOP + 1 END IF IF( TOP-1.NE.BOT ) THEN INFO = -BOT RETURN END IF * * Overwrite the S diagonals * DO 20 I = 1, J, 2 S( I, I ) = OUT( I, 1 ) S( I+1, I ) = OUT( I+1, 1 ) S( I, I+1 ) = OUT( I, 2 ) S( I+1, I+1 ) = OUT( I+1, 2 ) 20 CONTINUE * RETURN * * End of DLASORTE * END scalapack-1.8.0/SRC/pzungql.f0000640000175000017500000002704210363532303015617 0ustar muammarmuammar SUBROUTINE PZUNGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGQL generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PZGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PZGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JN, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZLASET, PZUNG2L * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Set A(ia+m-n+jn-ja+1:ia-m+1,ja:jn) to zero. * CALL PZLASET( 'All', N-JN+JA-1, JN-JA+1, ZERO, ZERO, A, $ IA+M-N+JN-JA+1, JA, DESCA ) * * Use unblocked code for the first or only block. * CALL PZUNG2L( M-N+JN-JA+1, JN-JA+1, JN-JA-N+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PZLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the left * CALL PZLARFB( 'Left', 'No transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H to rows ia:m-k+i+ib-1 of current block * CALL PZUNG2L( M-N+J+JB-JA, JB, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia+m-n+j+jb-ja:ia+m-1,j:j+jb-1 of current block to * zero * CALL PZLASET( 'All', N-J-JB+JA, JB, ZERO, ZERO, A, $ IA+M-N+J+JB-JA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGQL * END scalapack-1.8.0/SRC/psgerfs.f0000640000175000017500000010177410363532303015575 0ustar muammarmuammar SUBROUTINE PSGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, $ JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, $ LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX, $ LIWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ),IPIV( * ), IWORK( * ) REAL A( * ), AF( * ), B( * ), BERR( * ), FERR( * ), $ WORK( * ), X( * ) * .. * * Purpose * ======= * * PSGERFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**T * sub( X ) = sub( B ) * (Conjugate transpose = Transpose) * * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) REAL pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) REAL pointer into the local * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * factors of the matrix sub( A ) = P * L * U as computed by * PSGETRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF. * This array contains the pivoting information as computed * by PSGETRF. IPIV(i) -> The global row local row i * was swapped with. This array is tied to the distributed * matrix A. * * B (local input) REAL pointer into the local * memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local * pieces of the distributed matrix of right hand sides * sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input and output) REAL pointer into the * local memory to an array of local dimension * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains * the local pieces of the distributed matrix solution * sub( X ). On exit, the improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD(IA-1,MB_A) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD(IB-1,MB_B) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) REAL TWO, THREE PARAMETER ( TWO = 2.0E+0, THREE = 3.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN CHARACTER TRANST INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK2MAT, PSAGEMV, PSAXPY, PSCOPY, $ PSGEMV, PSGETRS, PSLACON, PXERBLA, $ SGAMX2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * NOTRAN = LSAME( TRANS, 'N' ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 20, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3 * NPMOD LIWMIN = NPMOD WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( ( .NOT.NOTRAN ) .AND. ( .NOT.LSAME( TRANS, 'T' ) ) .AND. $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -14 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1600 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1600 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 2000 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -18 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 2000 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -19 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 2000 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -24 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -26 END IF END IF * IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 24 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 26 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 5, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, N, 2, NRHS, 3, $ IX, JX, DESCX, 20, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGERFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PSCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, IX, $ JX+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PSAGEMV( TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PSGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PSAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PSGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PSGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PSCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, $ IX, J+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) + * abs(sub(B)))(i)) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PSAGEMV( TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ J+K, DESCX, 1, ONE, WORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PSGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PSAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PSGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PSGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, $ INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, $ 1, IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSGERFS * END scalapack-1.8.0/SRC/pdormrq.f0000640000175000017500000004240010363532303015576 0ustar muammarmuammar SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMRQ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, LCM, $ LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, NI, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDLARFB, $ PDLARFT, PDORMR2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PDORMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PDLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-IA, IB, $ A, I, JA, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+i+ib-ia-1,jc:jc+n-1) * MI = M - K + I + IB - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+i+ib-ia-1) * NI = N - K + I + IB - IA END IF * * Apply H or H' * CALL PDLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, $ IB, A, I, JA, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PDORMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMRQ * END scalapack-1.8.0/SRC/pslatra.f0000640000175000017500000001544210363532303015566 0ustar muammarmuammar REAL FUNCTION PSLATRA( N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLATRA computes the trace of an N-by-N distributed matrix sub( A ) * denoting A( IA:IA+N-1, JA:JA+N-1 ). The result is left on every * process of the grid. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix the trace * is to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER ICURCOL, ICURROW, II, IOFFA, J, JB, JJ, JN, $ LDA, LL, MYCOL, MYROW, NPCOL, NPROW REAL TRACE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SGSUM2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * TRACE = ZERO IF( N.EQ.0 ) THEN PSLATRA = TRACE RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ ICURROW, ICURCOL ) * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first diagonal block separately * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over the remaining block of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * CALL SGSUM2D( DESCA( CTXT_ ), 'All', ' ', 1, 1, TRACE, 1, -1, $ MYCOL ) * PSLATRA = TRACE * RETURN * * End of PSLATRA * END scalapack-1.8.0/SRC/pzgbtrf.f0000640000175000017500000010772610363532303015605 0ustar muammarmuammar SUBROUTINE PZGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PZGBTRF computes a LU factorization * of an N-by-N complex banded * distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PZGBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) Q = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P and Q are permutation matrices. * The matrix Q represents reordering of columns * for parallelism's sake, while P represents * reordering of rows for numerical stability using * classic partial pivoting. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZGBTRF and this is stored in AF. If a linear system * is to be solved using PZGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 1 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian Natonal University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW, $ CSRC, DBPTR, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM3, J, JA_NEW, JPTR, L, LAF_MIN, $ LBWL, LBWU, LDB, LDBB, LLDA, LM, LMJ, LN, LNJ, $ LPTR, MYCOL, MYROW, MY_NUM_COLS, NB, NEICOL, $ NP, NPACT, NPCOL, NPROW, NPSTR, NP_SAVE, NRHS, $ ODD_N, ODD_SIZE, ODPTR, OFST, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE, ZAXPY, ZGEMM, $ ZGERV2D, ZGESD2D, ZLACPY, ZLATCPY, ZPBTRF, $ ZPOTRF, ZSYRK, ZTBTRS, ZTRMM, ZTRRV2D, ZTRSD2D, $ ZTRSM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -11 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -2 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (2*BWL+2*BWU+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * BW = BWU+BWL * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PZGBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.(BWL+BWU+1) )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PZGBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * IF( LAF .LT. LAF_MIN ) THEN INFO = -9 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PZGBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 1 * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -11 * put minimum value of work into work( 1 ) WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, $ 'PZGBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 11 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZGBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * DO 9 J = 1, ODD_SIZE DO 8 I = 1, BW A( I+(J-1)*LLDA ) = CZERO 8 CONTINUE 9 CONTINUE * * Begin main code * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Transfer triangle B_i of local matrix to next processor * for fillin. Overlap the send with the factorization of A_i. * IF (MYCOL .LE. NPCOL-2) THEN * * The last processor does not need to send anything. * BIPTR = location of triangle B_i in memory BIPTR = (NB-BW)*LLDA + 2*BW+1 * CALL ZTRSD2D( ICTXT, 'U', 'N', $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0, NPCOL ) ), $ BW, A(BIPTR), LLDA-1, 0, MYCOL+1) * ENDIF * * Factor main partition P_i A_i = L_i U_i on each processor * * LBWL, LBWU: lower and upper bandwidth of local solver * Note that for MYCOL > 0 one has lower triangular blocks! * LM is the number of rows which is usually NB except for * MYCOL = 0 where it is BWU less and MYCOL=NPCOL-1 where it * is NR+BWU where NR is the number of columns on the last processor * Finally APTR is the pointer to the first element of A. As LAPACK * has a slightly different matrix format than Scalapack the pointer * has to be adjusted on processor MYCOL=0. * IF (MYCOL .NE. 0) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1+BWU ENDIF * IF (MYCOL .NE. NPCOL-1) THEN LM = NB - LBWU LN = NB - BW ELSE IF (MYCOL .NE. 0) THEN LM = ODD_SIZE + BWU LN = MAX(ODD_SIZE-BW,0) ELSE LM = N LN = MAX( N-BW, 0 ) ENDIF * IF (LN .GT. 0) THEN * CALL ZGBTRF(LM,LN, LBWL,LBWU, A(APTR),LLDA, IPIV, INFO) * IF( INFO.NE.0 ) THEN INFO = INFO + NB*MYCOL GO TO 90 END IF * NRHS = BW LDB = LLDA-1 * * Update the last BW columns of A_i (code modified from ZGBTRS) * * Only the eliminations of unknowns > LN-BW have an effect on * the last BW columns. Loop over them... * DO 23 J = MAX(LN-BW+1,1), LN * LMJ = MIN( LBWL, LM-J ) LNJ = MIN( BW, J+BW-LN+APTR-1 ) * L = IPIV( J ) * JPTR = J-(LN+1)+2*BW+1-LBWL + LN*LLDA * IF( L.NE.J ) THEN * * Element (L,LN+1) is swapped with element (J,LN+1) etc * Furthermore, the elements in the same row are LDB=LLDA-1 apart * The complicated formulas are to cope with the banded * data format: * LPTR = L-(LN+1)+2*BW+1-LBWL + LN*LLDA * CALL ZSWAP( LNJ, A(LPTR),LDB, A(JPTR), LDB ) * ENDIF * * LPTR is the pointer to the beginning of the * coefficients of L * LPTR = BW+1+APTR + (J-1)*LLDA * CALL ZGERU(LMJ,LNJ,-CONE, A(LPTR),1, A(JPTR),LDB, $ A(JPTR+1),LDB) 23 CONTINUE * ENDIF * * Compute spike fill-in, L_i F_i = P_i B_{i-1} * * Receive triangle B_{i-1} from previous processor * IF (MYCOL .GT. 0) THEN * CALL ZTRRV2D( ICTXT, 'U', 'N', MIN(BW, LM), BW, AF( 1 ), $ LM, 0, MYCOL-1) * * * Permutation and forward elimination (triang. solve) * DO 24 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L .NE. J ) THEN * CALL ZSWAP(NRHS, AF(L), LM, AF(J), LM ) ENDIF * LPTR = BW+1+APTR + (J-1)*LLDA * CALL ZGERU( LMJ,NRHS, -CONE, A(LPTR),1, $ AF(J), LM, AF(J+1), LM) * 24 CONTINUE * ENDIF * 90 CONTINUE * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU BN = BW ELSE BM = MIN(BW,ODD_SIZE) + BWU BN = MIN(BW,ODD_SIZE) ENDIF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = (NB+BWU)*BW + 1 LDBB = 2*BW + BWU * * Copy from A and AF into block bidiagonal matrix (tail of AF) * * DBPTR = Pointer to diagonal blocks in A DBPTR = BW+1 + LBWU + LN*LLDA * CALL ZLACPY('G',BM,BN, A(DBPTR),LLDA-1, $ AF(BBPTR + BW*LDBB),LDBB) * * Zero out any junk entries that were copied * DO 870 J=1, BM DO 880 I=J+LBWL, BM-1 AF( BBPTR+BW*LDBB+(J-1)*LDBB+I ) = CZERO 880 CONTINUE 870 CONTINUE * IF (MYCOL .NE. 0) THEN * * ODPTR = Pointer to offdiagonal blocks in A * ODPTR = LM-BM+1 CALL ZLACPY('G',BM,BW, AF(ODPTR),LM, $ AF(BBPTR +2*BW*LDBB),LDBB) ENDIF * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. CALL ZGETRF( N-LN, N-LN, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) * ENDIF * * Loop over levels ... only occurs if npcol > 1 * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) are used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels * 200 IF (NPACT .LE. 1) GOTO 300 * * Test if processor is active * IF (MOD(MYCOL,NPSTR) .EQ. 0) THEN * * Send/Receive blocks * * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * * This node will potentially do more work later * NEICOL = MYCOL + NPSTR * IF (NEICOL/NPSTR .LT. NPACT-1) THEN BMN = BW ELSE IF (NEICOL/NPSTR .EQ. NPACT-1) THEN ODD_N = NUMROC(N, NB, NPCOL-1, 0, NPCOL) BMN = MIN(BW,ODD_N) + BWU ELSE * * Last processor skips to next level GOTO 250 ENDIF * * BM1 = M for 1st block on proc pair, BM2 2nd block * BM1 = BM BM2 = BMN * IF (NEICOL/NPSTR .LE. NPACT-1 )THEN * CALL ZGESD2D( ICTXT, BM, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL ) * CALL ZGERV2D( ICTXT, BMN, 2*BW, AF(BBPTR+BM), $ LDBB, 0, NEICOL) * IF( NPACT .EQ. 2 ) THEN * * Copy diagonal block to align whole system * CALL ZLACPY( 'G', BMN, BW, AF( BBPTR+BM ), $ LDBB, AF( BBPTR+2*BW*LDBB+BM ), LDBB ) ENDIF * ENDIF * ELSE * * This node stops work after this stage -- an extra copy * is required to make the odd and even frontal matrices * look identical * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * BM1 = BMN BM2 = BM * CALL ZGESD2D( ICTXT, BM, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL ) * CALL ZLACPY('G',BM, 2*BW, AF(BBPTR+BW*LDBB),LDBB, $ AF(BBPTR+BMN),LDBB) * DO 31 J=BBPTR+2*BW*LDBB, BBPTR+3*BW*LDBB-1, LDBB DO 32 I=0, LDBB-1 AF(I+J) = CZERO 32 CONTINUE 31 CONTINUE * CALL ZGERV2D( ICTXT, BMN, 2*BW, AF(BBPTR+BW*LDBB), $ LDBB, 0, NEICOL) * IF( NPACT .EQ. 2 ) THEN * * Copy diagonal block to align whole system * CALL ZLACPY( 'G', BM, BW, AF( BBPTR+BMN ), $ LDBB, AF( BBPTR+2*BW*LDBB+BMN ), LDBB ) ENDIF * ENDIF * * LU factorization with partial pivoting * IF (NPACT .NE. 2) THEN * CALL ZGETRF(BM+BMN, BW, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) * * Backsolve left side * DO 301 J=BBPTR,BBPTR+BW*LDBB-1, LDBB DO 302 I=0, BM1-1 AF(I+J) = CZERO 302 CONTINUE 301 CONTINUE * CALL ZLASWP(BW, AF(BBPTR), LDBB, 1, BW, $ IPIV(LN+1), 1) * CALL ZTRSM('L','L','N','U', BW, BW, CONE, $ AF(BBPTR+BW*LDBB), LDBB, AF(BBPTR), LDBB) * * Use partial factors to update remainder * CALL ZGEMM( 'N', 'N', BM+BMN-BW, BW, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ AF( BBPTR ), LDBB, CONE, $ AF( BBPTR+BW ), LDBB ) * * Backsolve right side * NRHS = BW * CALL ZLASWP(NRHS, AF(BBPTR+2*BW*LDBB), LDBB, 1, BW, $ IPIV(LN+1), 1) * CALL ZTRSM('L','L','N','U', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, AF(BBPTR+2*BW*LDBB), LDBB) * * Use partial factors to update remainder * CALL ZGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB, CONE, $ AF( BBPTR+2*BW*LDBB+BW ), LDBB ) * * * Test if processor is active in next round * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * * Reset BM * BM = BM1+BM2-BW * * Local copying in the block bidiagonal area * * CALL ZLACPY('G',BM,BW, $ AF(BBPTR+BW), $ LDBB, AF(BBPTR+BW*LDBB), LDBB) CALL ZLACPY('G',BM,BW, $ AF(BBPTR+2*BW*LDBB+BW), $ LDBB, AF(BBPTR+2*BW*LDBB), LDBB) * * Zero out space that held original copy * DO 1020 J=0, BW-1 DO 1021 I=0, BM-1 AF(BBPTR+2*BW*LDBB+BW+J*LDBB+I) = CZERO 1021 CONTINUE 1020 CONTINUE * ENDIF * ELSE * * Factor the final 2 by 2 block matrix * CALL ZGETRF(BM+BMN,BM+BMN, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), INFO) ENDIF * ENDIF * * Last processor in an odd-sized NPACT skips to here * 250 CONTINUE * NPACT = (NPACT + 1)/2 NPSTR = NPSTR * 2 GOTO 200 * 300 CONTINUE * End loop over levels * 1000 CONTINUE * If error was found in Phase 1, processors jump here. * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 1234 CONTINUE * If this processor did not hold part of the grid it * jumps here. * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PZGBTRF * END * scalapack-1.8.0/SRC/zlahqr2.f0000640000175000017500000003452710363532303015510 0ustar muammarmuammar SUBROUTINE ZLAHQR2( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, $ IHIZ, Z, LDZ, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 22, 2000 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N * .. * .. Array Arguments .. COMPLEX*16 H( LDH, * ), W( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * ZLAHQR2 is an auxiliary routine called by ZHSEQR to update the * eigenvalues and Schur decomposition already computed by ZHSEQR, by * dealing with the Hessenberg submatrix in rows and columns ILO to IHI. * This version of ZLAHQR (not the standard LAPACK version) uses a * double-shift algorithm (like LAPACK's DLAHQR). * Unlike the standard LAPACK convention, this does not assume the * subdiagonal is real, nor does it work to preserve this quality if * given. * * Arguments * ========= * * WANTT (input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (input) INTEGER * The order of the matrix H. N >= 0. * * ILO (input) INTEGER * IHI (input) INTEGER * It is assumed that H is already upper triangular in rows and * columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). * ZLAHQR works primarily with the Hessenberg submatrix in rows * and columns ILO to IHI, but applies transformations to all of * H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * H (input/output) COMPLEX*16 array, dimension (LDH,N) * On entry, the upper Hessenberg matrix H. * On exit, if WANTT is .TRUE., H is upper triangular in rows * and columns ILO:IHI. If WANTT is .FALSE., the contents of H * are unspecified on exit. * * LDH (input) INTEGER * The leading dimension of the array H. LDH >= max(1,N). * * W (output) COMPLEX*16 array, dimension (N) * The computed eigenvalues ILO to IHI are stored in the * corresponding elements of W. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in H, with W(i) = H(i,i). * * ILOZ (input) INTEGER * IHIZ (input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (input/output) COMPLEX*16 array, dimension (LDZ,N) * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations, and on exit Z has been updated; * transformations are applied only to the submatrix * Z(ILOZ:IHIZ,ILO:IHI). If WANTZ is .FALSE., Z is not * referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * > 0: if INFO = i, ZLAHQR failed to compute all the * eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) * iterations; elements i+1:ihi of W contain those * eigenvalues which have been successfully computed. * * Further Details * =============== * * Modified by Mark R. Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0 ) DOUBLE PRECISION DAT1, DAT2 PARAMETER ( DAT1 = 0.75D+0, DAT2 = -0.4375D+0 ) * .. * .. Local Scalars .. INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ DOUBLE PRECISION CS, OVFL, S, SMLNUM, TST1, ULP, UNFL COMPLEX*16 CDUM, H00, H10, H11, H12, H21, H22, H33, H33S, $ H43H34, H44, H44S, SN, SUM, T1, T2, T3, V1, V2, $ V3 * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 1 ) COMPLEX*16 V( 3 ) * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, ZLANHS EXTERNAL DLAMCH, ZLANHS * .. * .. External Subroutines .. EXTERNAL DLABAD, ZCOPY, ZLANV2, ZLARFG, ZROT * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN IF( ILO.EQ.IHI ) THEN W( ILO ) = H( ILO, ILO ) RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * * Set machine-dependent constants for the stopping criterion. * If norm(H) <= sqrt(OVFL), overflow should not occur. * UNFL = DLAMCH( 'Safe minimum' ) OVFL = RONE / UNFL CALL DLABAD( UNFL, OVFL ) ULP = DLAMCH( 'Precision' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = 30*NH * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of 1 or 2. Each iteration of the loop works * with the active submatrix in rows and columns L to I. * Eigenvalues I+1 to IHI have already converged. Either L = ILO, or * H(L,L-1) is negligible so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 150 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 130 ITS = 0, ITN * * Look for a single small subdiagonal element. * DO 20 K = I, L + 1, -1 TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) IF( TST1.EQ.RZERO ) $ TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE L = K IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * H( L, L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * IF( L.GE.I-1 ) $ GO TO 140 * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN * * Exceptional shift. * * S = ABS( DBLE( H( I,I-1 ) ) ) + ABS( DBLE( H( I-1,I-2 ) ) ) S = CABS1( H( I, I-1 ) ) + CABS1( H( I-1, I-2 ) ) H44 = DAT1*S H33 = H44 H43H34 = DAT2*S*S ELSE * * Prepare to use Wilkinson's shift. * H44 = H( I, I ) H33 = H( I-1, I-1 ) H43H34 = H( I, I-1 )*H( I-1, I ) END IF * * Look for two consecutive small subdiagonal elements. * DO 40 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S = CABS1( V1 ) + CABS1( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 IF( M.EQ.L ) $ GO TO 50 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 ) $ GO TO 50 40 CONTINUE 50 CONTINUE * * Double-shift QR step * DO 120 K = M, I - 1 * * The first iteration of this loop determines a reflection G * from the vector V and applies it from left and right to H, * thus creating a nonzero bulge below the subdiagonal. * * Each subsequent iteration determines a reflection G to * restore the Hessenberg form in the (K-1)th column, and thus * chases the bulge one step toward the bottom of the active * submatrix. NR is the order of G * NR = MIN( 3, I-K+1 ) IF( K.GT.M ) $ CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.I-1 ) $ H( K+2, K-1 ) = ZERO ELSE IF( M.GT.L ) THEN * The real double-shift code uses H( K, K-1 ) = -H( K, K-1 ) * instead of the following. H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 * * Apply G from the left to transform the rows of the matrix * in columns K to I2. * DO 60 J = K, I2 SUM = DCONJG( T1 )*H( K, J ) + $ DCONJG( T2 )*H( K+1, J ) + $ DCONJG( T3 )*H( K+2, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 H( K+2, J ) = H( K+2, J ) - SUM*V3 60 CONTINUE * * Apply G from the right to transform the columns of the * matrix in rows I1 to min(K+3,I). * DO 70 J = I1, MIN( K+3, I ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 ) H( J, K+2 ) = H( J, K+2 ) - SUM*DCONJG( V3 ) 70 CONTINUE * IF( WANTZ ) THEN * * Accumulate transformations in the matrix Z * DO 80 J = ILOZ, IHIZ SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + $ T3*Z( J, K+2 ) Z( J, K ) = Z( J, K ) - SUM Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 ) Z( J, K+2 ) = Z( J, K+2 ) - SUM*DCONJG( V3 ) 80 CONTINUE END IF ELSE IF( NR.EQ.2 ) THEN * * Apply G from the left to transform the rows of the matrix * in columns K to I2. * DO 90 J = K, I2 SUM = DCONJG( T1 )*H( K, J ) + $ DCONJG( T2 )*H( K+1, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 90 CONTINUE * * Apply G from the right to transform the columns of the * matrix in rows I1 to min(K+2,I). * DO 100 J = I1, MIN( K+2, I ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 ) 100 CONTINUE * IF( WANTZ ) THEN * * Accumulate transformations in the matrix Z * DO 110 J = ILOZ, IHIZ SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) Z( J, K ) = Z( J, K ) - SUM Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 ) 110 CONTINUE END IF END IF * * Since at the start of the QR step we have for M > L * H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 ) * then we don't need to do the following * IF( K.EQ.M .AND. M.GT.L ) THEN * If the QR step was started at row M > L because two * consecutive small subdiagonals were found, then H(M,M-1) * must also be updated by a factor of (1-T1). * TEMP = ONE - T1 * H( m, m-1 ) = H( m, m-1 )*DCONJG( TEMP ) * END IF 120 CONTINUE * * Ensure that H(I,I-1) is real. * 130 CONTINUE * * Failure to converge in remaining number of iterations * INFO = I RETURN * 140 CONTINUE * IF( L.EQ.I ) THEN * * H(I,I-1) is negligible: one eigenvalue has converged. * W( I ) = H( I, I ) * ELSE IF( L.EQ.I-1 ) THEN * * H(I-1,I-2) is negligible: a pair of eigenvalues have converged. * * Transform the 2-by-2 submatrix to standard Schur form, * and compute and store the eigenvalues. * CALL ZLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), $ H( I, I ), W( I-1 ), W( I ), CS, SN ) * IF( WANTT ) THEN * * Apply the transformation to the rest of H. * IF( I2.GT.I ) $ CALL ZROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, $ CS, SN ) CALL ZROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, $ DCONJG( SN ) ) END IF IF( WANTZ ) THEN * * Apply the transformation to Z. * CALL ZROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, $ DCONJG( SN ) ) END IF * END IF * * Decrement number of remaining iterations, and return to start of * the main loop with new value of I. * ITN = ITN - ITS I = L - 1 GO TO 10 * 150 CONTINUE RETURN * * End of ZLAHQR2 * END scalapack-1.8.0/SRC/pdlared2d.f0000640000175000017500000001513110367447133015766 0ustar muammarmuammar SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 12, 2005 * * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESC( * ) DOUBLE PRECISION BYALL( * ), BYROW( * ), WORK( LWORK ) * .. * * Purpose * ======= * * PDLARED2D redistributes a 1D array * * It assumes that the input array, BYROW, is distributed across * columns and that all process rows contain the same copy of * BYROW. The output array, BYALL, will be identical on all processes * and will contain the entire array. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NP = Number of local rows in BYROW() * * N (global input) INTEGER * The number of elements to be redistributed. N >= 0. * * IA (global input) INTEGER * IA must be equal to 1 * * JA (global input) INTEGER * JA must be equal to 1 * * DESC (global/local input) INTEGER Array of dimension DLEN_ * A 2D array descriptor, which describes BYROW * * BYROW (local input) distributed block cyclic DOUBLE PRECISION array * global dimension (N), local dimension (NP) * BYROW is distributed across the process columns * All process rows are assumed to contain the same value * * BYALL (global output) DOUBLE PRECISION global dimension( N ) * local dimension (N) * BYALL is exactly duplicated on all processes * It contains the same values as BYROW, but it is replicated * across all processes rather than being distributed * * BYALL(i) = BYROW( NUMROC(i,DESC( MB_ ),MYCOL,0,NPCOL ) on the procs * whose MYCOL == mod((i-1)/DESC( MB_ ),NPCOL) * * WORK (local workspace) DOUBLE PRECISION dimension (LWORK) * Used to hold the buffers sent from one process to another * * LWORK (local input) INTEGER size of WORK array * LWORK >= NUMROC(N, DESC( MB_ ), 0, 0, NPROW) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ALLI, BUFLEN, I, II, MB, MYCOL, MYROW, NPCOL, $ NPROW, PROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) MB = DESC( MB_ ) * DO 30 PROW = 0, NPROW - 1 BUFLEN = NUMROC( N, MB, PROW, 0, NPROW ) IF( MYROW.EQ.PROW ) THEN CALL DCOPY( BUFLEN, BYROW, 1, WORK, 1 ) CALL DGEBS2D( DESC( CTXT_ ), 'C', ' ', BUFLEN, 1, WORK, $ BUFLEN ) ELSE CALL DGEBR2D( DESC( CTXT_ ), 'C', ' ', BUFLEN, 1, WORK, $ BUFLEN, PROW, MYCOL ) END IF * ALLI = PROW*MB DO 20 II = 1, BUFLEN, MB DO 10 I = 1, MIN( MB, BUFLEN-II+1 ) BYALL( ALLI+I ) = WORK( II-1+I ) 10 CONTINUE ALLI = ALLI + MB*NPROW 20 CONTINUE 30 CONTINUE * RETURN * * End of PSLARED2D * END scalapack-1.8.0/SRC/pzaxpy.c0000640000175000017500000003655210363532303015455 0ustar muammarmuammar/* --------------------------------------------------------------------- * * Mark R. Fahey * August 2000 * This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0 * which fixes a bug in the incx=1 and incy=1 case. * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pzaxpy_( n, alpha, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy ) /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; complex16 * alpha; /* .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex16 X[], Y[]; { /* * Purpose * ======= * * PZAXPY adds one distributed vector to another, * * sub( Y ) := sub( Y ) + alpha * sub( X ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * Parameters * ========== * * N (global input) pointer to INTEGER. * The length of the distributed vectors to be added. N >= 0. * * ALPHA (global input) pointer to COMPLEX*16 * The scalar used to multiply each component of sub( X ). * * X (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input/local output) COMPLEX*16 array * containing the local pieces of a distributed matrix of * dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * On exit sub( Y ) is overwritten by sub( Y ) + alpha*sub( X ). * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * ===================================================================== * * .. Local Scalars .. */ int ictxt, info, iix, iiy, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, lcmq, mycol, myrow, nn, np, np0, nprow, npcol, nq, nq0, nz, ione=1, tmp1, wksz; complex16 one, tmp, zero; /* .. * .. PBLAS Buffer .. */ complex16 * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void zgerv2d_(); void zgesd2d_(); void pbchkvect(); void pberror_(); char * getpbbuf(); F_VOID_FCT zaxpy_(); F_VOID_FCT zcopy_(); F_VOID_FCT pbztrnv_(); F_INTG_FCT ilcm_(); F_INTG_FCT numroc_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(600+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -10; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -9; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1100+MB_+1); } else { info = -12; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -10; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -9; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1100+MB_+1); } else { info = -12; } } else { info = -7; } } if( ictxt != desc_Y[CTXT_] ) info = -(1100+CTXT_+1); } } if( info ) { pberror_( &ictxt, "PZAXPY", &info ); return; } /* * Quick return if possible. */ if( *n == 0 ) return; /* * y <- y + alpha * x */ if( *n == 1 ) { if( ( myrow == iyrow ) && ( mycol == iycol ) ) { if( ( myrow != ixrow ) || ( mycol != ixcol ) ) zgerv2d_( &ictxt, n, n, &tmp, n, &ixrow, &ixcol ); else tmp = X[iix-1+(jjx-1)*desc_X[LLD_]]; zaxpy_( n, alpha, &tmp, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n ); } else if( ( myrow == ixrow ) && ( mycol == ixcol ) ) zgesd2d_( &ictxt, n, n, &X[iix-1+(jjx-1)*desc_X[LLD_]], n, &iyrow, &iycol ); return; } one.re = ONE; one.im = ZERO; zero.re = ZERO; zero.im = ZERO; if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) zaxpy_( &nq, alpha, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); } else { if( myrow == ixrow ) zgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); else if( myrow == iyrow ) { buff = (complex16 *)getpbbuf( "PZAXPY", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol ); zaxpy_( &nq, alpha, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) zaxpy_( &np, alpha, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); } else { if( mycol == ixcol ) zgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); else if( mycol == iycol ) { buff = (complex16 *)getpbbuf( "PZAXPY", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &ixcol ); zaxpy_( &np, alpha, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmq = lcm / npcol; nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[NB_]; nq0 = MYROC0( tmp1, nn, desc_Y[NB_], npcol ); tmp1 = nq0 / desc_Y[NB_]; wksz = np + MYROC0( tmp1, nq0, desc_Y[NB_], lcmq ); buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) ); if( myrow == ixrow ) np -= nz; if( mycol == ixcol ) { zcopy_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, incx ); zscal_( &np, alpha, buff, incx ); } pbztrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n, &desc_X[MB_], &nz, buff, incx, &one, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &ixcol, &iyrow, &iycol, buff+np ); } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*iy-1) % desc_Y[MB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_Y[MB_], &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) ); pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { if( myrow == iyrow ) np -= nz; zaxpy_( &np, alpha, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); } } } } scalapack-1.8.0/SRC/pdgecon.f0000640000175000017500000003717310363532303015544 0ustar muammarmuammar SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, INFO, JA, LIWORK, LWORK, N DOUBLE PRECISION ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDGECON estimates the reciprocal of the condition number of a general * distributed real matrix A(IA:IA+N-1,JA:JA+N-1), in either the 1-norm * or the infinity-norm, using the LU factorization computed by PDGETRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm * = 'I': Infinity-norm * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, * this array contains the local pieces of the factors L and U * from the factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U; the * unit diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) DOUBLE PRECISION * If NORM = '1' or 'O', the 1-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * If NORM = 'I', the infinity-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + 2*LOCc(N+MOD(JA-1,NB_A)) * + MAX( 2, MAX( NB_A*MAX( 1, CEIL(NPROW-1,NPCOL) ), * LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX( 1, CEIL(NPCOL-1,NPROW) ) ). * * LOCr and LOCc values can be computed using the ScaLAPACK * tool function NUMROC; NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, ONENRM CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, JX, $ KASE, KASE1, LIWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPMOD, NPROW, NQ, NQMOD DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU, WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGEBR2D, $ DGEBS2D, INFOG2L, PCHK1MAT, PDAMAX, $ PDLATRS, PDLACON, PDRSCL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600 + CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + 2*NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LIWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGECON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPNL = IPV + NP IPNU = IPNL + NQ IPW = IPNU + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the norm of inv(A). * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 * 10 CONTINUE CALL PDLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Divide X by 1/(SL*SU) if doing so will not cause overflow. * SCALE = SL*SU NORMIN = 'Y' IF( SCALE.NE.ONE ) THEN CALL PDAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL DGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PDGECON * END scalapack-1.8.0/SRC/pdorgql.f0000640000175000017500000002701010363532303015562 0ustar muammarmuammar SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGQL generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PDGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PDGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JN, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDLARFB, $ PDLARFT, PDLASET, PDORG2L, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Set A(ia+m-n+jn-ja+1:ia-m+1,ja:jn) to zero. * CALL PDLASET( 'All', N-JN+JA-1, JN-JA+1, ZERO, ZERO, A, $ IA+M-N+JN-JA+1, JA, DESCA ) * * Use unblocked code for the first or only block. * CALL PDORG2L( M-N+JN-JA+1, JN-JA+1, JN-JA-N+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PDLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the left * CALL PDLARFB( 'Left', 'No transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H to rows ia:m-k+i+ib-1 of current block * CALL PDORG2L( M-N+J+JB-JA, JB, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia+m-n+j+jb-ja:ia+m-1,j:j+jb-1 of current block to * zero * CALL PDLASET( 'All', N-J-JB+JA, JB, ZERO, ZERO, A, $ IA+M-N+J+JB-JA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGQL * END scalapack-1.8.0/SRC/pzggqrf.f0000640000175000017500000003633010363532303015577 0ustar muammarmuammar SUBROUTINE PZGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PZGGQRF computes a generalized QR factorization of * an N-by-M matrix sub( A ) = A(IA:IA+N-1,JA:JA+M-1) and * an N-by-P matrix sub( B ) = B(IB:IB+N-1,JB:JB+P-1): * * sub( A ) = Q*R, sub( B ) = Q*T*Z, * * where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, * and R and T assume one of the forms: * * if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, * ( 0 ) N-M N M-N * M * * where R11 is upper triangular, and * * if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, * P-N N ( T21 ) P * P * * where T12 or T21 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GQR * factorization of sub( A ) and sub( B ) implicitly gives the QR * factorization of inv( sub( B ) )* sub( A ): * * inv( sub( B ) )*sub( A )= Z'*(inv(T)*R) * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the conjugate transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( B ). N >= 0. * * M (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( B ). P >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+M-1)). * On entry, the local pieces of the N-by-M distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(N,M) by M * upper trapezoidal matrix R (R is upper triangular if N >= M); * the elements below the diagonal, with the array TAUA, * represent the unitary matrix Q as a product of min(N,M) * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) COMPLEX*16, array, dimension * LOCc(JA+MIN(N,M)-1). This array contains the scalar factors * TAUA of the elementary reflectors which represent the unitary * matrix Q. TAUA is tied to the distributed matrix A. (see * Further Details). * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+P-1)). * On entry, the local pieces of the N-by-P distributed matrix * sub( B ) which is to be factored. On exit, if N <= P, the * upper triangle of B(IB:IB+N-1,JB+P-N:JB+P-1) contains the * N by N upper triangular matrix T; if N > P, the elements on * and above the (N-P)-th subdiagonal contain the N by P upper * trapezoidal matrix T; the remaining elements, with the array * TAUB, represent the unitary matrix Z as a product of * elementary reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) COMPLEX*16, array, dimension LOCr(IB+N-1) * This array contains the scalar factors of the elementary * reflectors which represent the unitary matrix Z. TAUB is * tied to the distributed matrix B (see Further Details). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB_A * ( NpA0 + MqA0 + NB_A ), * MAX( (NB_A*(NB_A-1))/2, (PqB0 + NpB0)*NB_A ) + * NB_A * NB_A, * MB_B * ( NpB0 + PqB0 + MB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * PqB0 = NUMROC( P+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(n,m). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in * A(ia+i:ia+n-1,ja+i-1), and taua in TAUA(ja+i-1). * To form Q explicitly, use ScaLAPACK subroutine PZUNGQR. * To use Q to update another matrix, use ScaLAPACK subroutine PZUNMQR. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(ib)' H(ib+1)' . . . H(ib+k-1)', where k = min(n,p). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a complex scalar, and v is a complex vector with * v(p-k+i+1:p) = 0 and v(p-k+i) = 1; conjg(v(1:p-k+i-1)) is stored on * exit in B(ib+n-k+i-1,jb:jb+p-k+i-2), and taub in TAUB(ib+n-k+i-1). * To form Z explicitly, use ScaLAPACK subroutine PZUNGRQ. * To use Z to update another matrix, use ScaLAPACK subroutine PZUNMRQ. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( MB_A.EQ.MB_B .AND. IROFFA.EQ.IROFFB .AND. IAROW.EQ.IBROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MQA0, MYCOL, $ MYROW, NPA0, NPB0, NPCOL, NPROW, PQB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZGEQRF, PZGERQF, PZUNMQR * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, INT, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( N, 1, M, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 1, P, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) PQB0 = NUMROC( P+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( NB_ ) * ( NPA0 + MQA0 + DESCA( NB_ ) ), $ MAX( MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( PQB0 + NPB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ), $ DESCB( MB_ ) * ( NPB0 + PQB0 + DESCB( MB_ ) ) ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IAROW.NE.IBROW .OR. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -1203 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( N, 1, M, 2, IA, JA, DESCA, 7, N, 1, P, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGGQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * QR factorization of N-by-M matrix sub( A ): sub( A ) = Q*R * CALL PZGEQRF( N, M, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := Q'*sub( B ). * CALL PZUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, $ IA, JA, DESCA, TAUA, B, IB, JB, DESCB, WORK, LWORK, $ INFO ) LWMIN = MIN( LWMIN, INT( WORK( 1 ) ) ) * * RQ factorization of N-by-P matrix sub( B ): sub( B ) = T*Z. * CALL PZGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DCMPLX( DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) ) * RETURN * * End of PZGGQRF * END scalapack-1.8.0/SRC/pdpotf2.f0000640000175000017500000003055210363532303015475 0ustar muammarmuammar SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDPOTF2 computes the Cholesky factorization of a real symmetric * positive definite distributed matrix sub( A )=A(IA:IA+N-1,JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURR, IDIAG, IIA, $ IOFFA, IROFF, J, JJA, LDA, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DGEMV, $ DSCAL, IGEBR2D, IGEBS2D, INFOG2L, PB_TOPGET, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD, SQRT * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DDOT EXTERNAL LSAME, DDOT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Compute local information * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF ( UPPER ) THEN * * Process (IAROW, IACOL) owns block to be factorized * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN * * Compute the Cholesky factorization A = U'*U. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 10 J = JA, JA+N-1 * * Compute U(J,J) and test for non-positive-definiteness. * AJJ = A( IDIAG ) - $ DDOT( J-JA, A( IOFFA ), 1, A( IOFFA ), 1 ) IF( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 20 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of row J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + LDA CALL DGEMV( 'Transpose', J-JA, JA+N-J-1, -ONE, $ A( IOFFA+LDA ), LDA, A( IOFFA ), 1, $ ONE, A( ICURR ), LDA ) CALL DSCAL( N-J+JA-1, ONE / AJJ, A( ICURR ), LDA ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE * 20 CONTINUE * * Broadcast INFO to all processes in my IAROW. * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) END IF * * IAROW bcasts along columns so that everyone has INFO * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1, $ IAROW, MYCOL ) * END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Compute the Cholesky factorization A = L*L'. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 30 J = JA, JA+N-1 * * Compute L(J,J) and test for non-positive-definiteness. * AJJ = A( IDIAG ) - $ DDOT( J-JA, A( IOFFA ), LDA, A( IOFFA ), LDA ) IF ( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 40 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of column J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + 1 CALL DGEMV( 'No transpose', JA+N-J-1, J-JA, -ONE, $ A( IOFFA+1 ), LDA, A( IOFFA ), LDA, $ ONE, A( ICURR ), 1 ) CALL DSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), 1 ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 30 CONTINUE * 40 CONTINUE * * Broadcast INFO to everyone in IACOL * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1, IAROW, MYCOL ) * END IF * * IACOL bcasts INFO along rows so that everyone has it * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) * END IF * END IF * RETURN * * End of PDPOTF2 * END scalapack-1.8.0/SRC/pzgebd2.f0000640000175000017500000004361510363532303015460 0ustar muammarmuammar SUBROUTINE PZGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEBD2 reduces a complex general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an unitary transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * unitary matrix Q as a product of elementary reflectors, and * the elements above the first superdiagonal, with the array * TAUP, represent the orthogonal matrix P as a product of * elementary reflectors. If M < N, the diagonal and the first * subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the unitary matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX*16 array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX*16 array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MpA0, NqA0 ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ) * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J, $ JJ, K, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 COMPLEX*16 ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DESCSET, $ DGEBR2D, DGEBS2D, INFOG2L, PXERBLA, $ PDELSET, PZELSET, PZLACGV, PZLARF, $ PZLARFC, PZLARFG, ZGEBR2D, ZGEBS2D, $ ZLARFG * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = MAX( MPA0, NQA0 ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( M.EQ.1 .AND. N.EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN I = II+(JJ-1)*DESCA( LLD_ ) CALL ZLARFG( 1, A( I ), A( I ), 1, TAUQ( JJ ) ) D( JJ ) = DBLE( A( I ) ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1 ) CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1, IAROW, IACOL ) CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1, IAROW, IACOL ) END IF END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO RETURN END IF * ALPHA = ZERO * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, N I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector H(j) to annihilate * A(ia+i:ia+m-1,j) * CALL PZLARFG( M-K+1, ALPHA, I, J, A, MIN( I+1, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PDELSET( D, 1, J, DESCD, DBLE( ALPHA ) ) CALL PZELSET( A, I, J, DESCA, ONE ) * * Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left * CALL PZLARFC( 'Left', M-K+1, N-K, A, I, J, DESCA, 1, TAUQ, $ A, I, J+1, DESCA, WORK ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( ALPHA ) ) ) * IF( K.LT.N ) THEN * * Generate elementary reflector G(i) to annihilate * A(i,ja+j+1:ja+n-1) * CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, JA+N-1 ), DESCA, DESCA( M_ ), $ TAUP ) CALL PDELSET( E, I, 1, DESCE, DBLE( ALPHA ) ) CALL PZELSET( A, I, J+1, DESCA, ONE ) * * Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right * CALL PZLARF( 'Right', M-K, N-K, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP, A, I+1, J+1, DESCA, $ WORK ) CALL PZELSET( A, I, J+1, DESCA, DCMPLX( DBLE( ALPHA ) ) ) CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) ELSE CALL PZELSET( TAUP, I, 1, DESCE, ZERO ) END IF 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, M I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector G(i) to annihilate * A(i,ja+j:ja+n-1) * CALL PZLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) CALL PZLARFG( N-K+1, ALPHA, I, J, A, I, $ MIN( J+1, JA+N-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PDELSET( D, I, 1, DESCD, DBLE( ALPHA ) ) CALL PZELSET( A, I, J, DESCA, ONE ) * * Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right * CALL PZLARF( 'Right', M-K, N-K+1, A, I, J, DESCA, $ DESCA( M_ ), TAUP, A, MIN( I+1, IA+M-1 ), J, $ DESCA, WORK ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( ALPHA ) ) ) CALL PZLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) * IF( K.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:ia+m-1,j) * CALL PZLARFG( M-K, ALPHA, I+1, J, A, $ MIN( I+2, IA+M-1 ), J, DESCA, 1, TAUQ ) CALL PDELSET( E, 1, J, DESCE, DBLE( ALPHA ) ) CALL PZELSET( A, I+1, J, DESCA, ONE ) * * Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left * CALL PZLARFC( 'Left', M-K, N-K, A, I+1, J, DESCA, 1, $ TAUQ, A, I+1, J+1, DESCA, WORK ) CALL PZELSET( A, I+1, J, DESCA, DCMPLX( DBLE( ALPHA ) ) ) ELSE CALL PZELSET( TAUQ, 1, J, DESCE, ZERO ) END IF 20 CONTINUE END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEBD2 * END scalapack-1.8.0/SRC/pdsygvx.f0000640000175000017500000010470310377154001015623 0ustar muammarmuammar SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, $ WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, $ GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, $ LIWORK, LWORK, M, N, NZ DOUBLE PRECISION ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCZ( * ), $ ICLUSTR( * ), IFAIL( * ), IWORK( * ) DOUBLE PRECISION A( * ), B( * ), GAP( * ), W( * ), WORK( * ), $ Z( * ) * .. * * Purpose * * ======= * * PDSYGVX computes all the eigenvalues, and optionally, * the eigenvectors * of a real generalized SY-definite eigenproblem, of the form * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or * sub( B )*sub( A )*x=(lambda)*x. * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be * SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed * to be symmetric positive definite. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * JOBZ (global input) CHARACTER*1 * = 'N': Compute eigenvalues only; * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored; * = 'L': Lower triangles of sub( A ) and sub( B ) are stored. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix. * * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains * the distributed matrix Z of eigenvectors. The eigenvectors * are normalized as follows: * if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I; * if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I. * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') * or the lower triangle (if UPLO='L') of sub( A ), including * the diagonal, is destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PDSYGVX cannot guarantee * correct error reporting. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( B ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( B ) contains * the lower triangular part of the matrix. * * On exit, if INFO <= N, the part of sub( B ) containing the * matrix is overwritten by the triangular factor U or L from * the Cholesky factorization sub( B ) = U**T*U or * sub( B ) = L*L**T. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * DESCB( CTXT_ ) must equal DESCA( CTXT_ ) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) DOUBLE PRECISION * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PDLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PDLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PDSYGVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PDSYGVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) DOUBLE PRECISION * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * The row index in the global array Z indicating the first * row of sub( Z ). * * JZ (global input) INTEGER * The column index in the global array Z indicating the * first column of sub( Z ). * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension max(3,LWORK) * if JOBZ='N' WORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' WORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = * DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LWORK is too small: * If LWORK is too small to guarantee orthogonality, * PDSYGVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-23 * is returned. Note that when RANGE='V', PDSYGVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LWORK is large enough to allow PDSYGVX to * compute the eigenvalues, PDSYGVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * Greater performance can be achieved if adequate workspace * is provided. On the other hand, in some situations, * performance can decrease as the workspace provided * increases above the workspace amount shown below: * * For optimal performance, greater workspace may be * needed, i.e. * LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT, * NSYGST_LWOPT ) * Where: * LWORK, as defined previously, depends upon the number * of eigenvectors requested, and * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 3 ) * NPS * NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB * * ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L', * 0, 0, 0, 0) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * NB = DESCA( MB_ ) * NP0 = NUMROC( N, NB, 0, 0, NPROW ) * NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * For large N, no extra workspace is needed, however the * biggest boost in performance comes for small N, so it * is wise to provide the extra workspace (typically less * than a Megabyte per process). * * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PDSTEIN will perform no better than DSTEIN on 1 processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance on all work arrays. * Each of these values is returned in the first entry of the * corresponding work array, and no error message is issued by * PXERBLA. * * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (output) INTEGER array, dimension (N) * IFAIL provides additional information when INFO .NE. 0 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * * If neither of the above error conditions hold and JOBZ = 'V', * then the first M elements of IFAIL are set to zero. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PDSYGVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to * compute eigenvalues. * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/16,2).NE.0), then B was not positive * definite. IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1), * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties, * namely the following expressions should be true: * * DESCA(MB_) = DESCA(NB_) * IA = IB = IZ * JA = IB = JZ * DESCA(M_) = DESCB(M_) =DESCZ(M_) * DESCA(N_) = DESCB(N_)= DESCZ(N_) * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_) * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_) * MOD( IA-1, DESCA( MB_ ) ) = 0 * MOD( JA-1, DESCA( NB_ ) ) = 0 * MOD( IB-1, DESCB( MB_ ) ) = 0 * MOD( JB-1, DESCB( NB_ ) ) = 0 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION FIVE, ZERO PARAMETER ( FIVE = 5.0D+0, ZERO = 0.0D+0 ) INTEGER IERRNPD PARAMETER ( IERRNPD = 16 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ CHARACTER TRANS INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA, $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN, $ LWOPT, MQ0, MYCOL, MYROW, NB, NEIG, NN, NP0, $ NPCOL, NPROW, NPS, NQ0, NSYGST_LWOPT, $ NSYTRD_LWOPT, SQNPC DOUBLE PRECISION EPS, SCALE * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DSCAL, PCHK1MAT, PCHK2MAT, PDPOTRF, PDSYEVX, $ PDSYNGST, PDTRMM, PDTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2600+CTXT_ ) ELSE * * Get machine constants. * EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) * WANTZ = LSAME( JOBZ, 'V' ) UPPER = LSAME( UPLO, 'U' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( VALEIG ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, $ 0, 0 ) END IF IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) $ LQUERY = .TRUE. * LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) * NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) ELSE LWOPT = LWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD and GST algorithms * ANB = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS NB = DESCA( MB_ ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB LWOPT = MAX( LWOPT, N+NSYTRD_LWOPT, NSYGST_LWOPT ) * * Version 1.0 Limitations * IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -2 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -3 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN INFO = -( 1300+M_ ) ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN INFO = -( 1300+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -( 1300+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN INFO = -( 1300+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN INFO = -( 1300+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2200+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2200+CTXT_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -11 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -12 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -15 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -16 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -17 ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -14 ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -15 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) $ THEN INFO = -18 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( WANTZ ) THEN IDUM1( 2 ) = ICHAR( 'V' ) ELSE IDUM1( 2 ) = ICHAR( 'N' ) END IF IDUM2( 2 ) = 2 IF( UPPER ) THEN IDUM1( 3 ) = ICHAR( 'U' ) ELSE IDUM1( 3 ) = ICHAR( 'L' ) END IF IDUM2( 3 ) = 3 IF( ALLEIG ) THEN IDUM1( 4 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 4 ) = ICHAR( 'I' ) ELSE IDUM1( 4 ) = ICHAR( 'V' ) END IF IDUM2( 4 ) = 4 IF( LQUERY ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 5 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, $ JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, $ INFO ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = DBLE( LWOPT ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGVX ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Form a Cholesky factorization of sub( B ). * CALL PDPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) IF( INFO.NE.0 ) THEN IWORK( 1 ) = LIWMIN WORK( 1 ) = DBLE( LWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD RETURN END IF * * Transform problem to standard eigenvalue problem and solve. * CALL PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) CALL PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, $ IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, $ LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO ) * IF( WANTZ ) THEN * * Backtransform eigenvectors to the original problem. * NEIG = M IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN * * For sub( A )*x=(lambda)*sub( B )*x and * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors: * x = inv(L)'*y or inv(U)*y * IF( UPPER ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF * CALL PDTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * For sub( B )*sub( A )*x=(lambda)*x; * backtransform eigenvectors: x = L*y or U'*y * IF( UPPER ) THEN TRANS = 'T' ELSE TRANS = 'N' END IF * CALL PDTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) END IF END IF * IF( SCALE.NE.ONE ) THEN CALL DSCAL( N, SCALE, W, 1 ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = DBLE( LWOPT ) RETURN * * End of PDSYGVX * END scalapack-1.8.0/SRC/pctzrzf.f0000640000175000017500000003107610363532303015623 0ustar muammarmuammar SUBROUTINE PCTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper triangular form by means * of unitary transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N unitary matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements M+1 to N of the first M rows of * sub( A ), with the array TAU, represent the unitary matrix Z * as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), whose conjugate transpose is used to * introduce zeros into the (m - k + 1)th row of sub( A ), is given in * the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IL, IN, IPW, $ IROFFA, J, JM1, L, LWMIN, MP0, MYCOL, MYROW, $ NPCOL, NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG1L, PCHK1MAT, $ PCLATRZ, PCLARZB, PCLARZT, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -9 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTZRZF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ IIA, IAROW ) IF( MYROW.EQ.IAROW ) $ MP0 = MP0 - IROFFA DO 10 I = IIA, IIA+MP0-1 TAU( I ) = ZERO 10 CONTINUE * ELSE * L = N-M JM1 = JA + MIN( M+1, N ) - 1 IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * * Use blocked code initially * DO 20 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) J = JA + I - IA * * Compute the complete orthogonal factorization of the current * block A(i:i+ib-1,j:ja+n-1) * CALL PCLATRZ( IB, JA+N-J, L, A, I, J, DESCA, TAU, WORK ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PCLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,j:ja+n-1) from the right * CALL PCLARZB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, JA+N-J, IB, L, A, I, JM1, $ DESCA, WORK, A, IA, J, DESCA, WORK( IPW ) ) END IF * 20 CONTINUE * * Use unblocked code to factor the last or only block * CALL PCLATRZ( IN-IA+1, N, N-M, A, IA, JA, DESCA, TAU, WORK ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCTZRZF * END scalapack-1.8.0/SRC/pztzrzf.f0000640000175000017500000003111210363532303015641 0ustar muammarmuammar SUBROUTINE PZTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper triangular form by means * of unitary transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N unitary matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements M+1 to N of the first M rows of * sub( A ), with the array TAU, represent the unitary matrix Z * as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), whose conjugate transpose is used to * introduce zeros into the (m - k + 1)th row of sub( A ), is given in * the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IL, IN, IPW, $ IROFFA, J, JM1, L, LWMIN, MP0, MYCOL, MYROW, $ NPCOL, NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG1L, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZLATRZ, $ PZLARZB, PZLARZT * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -9 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTZRZF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ IIA, IAROW ) IF( MYROW.EQ.IAROW ) $ MP0 = MP0 - IROFFA DO 10 I = IIA, IIA+MP0-1 TAU( I ) = ZERO 10 CONTINUE * ELSE * L = N-M JM1 = JA + MIN( M+1, N ) - 1 IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * * Use blocked code initially * DO 20 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) J = JA + I - IA * * Compute the complete orthogonal factorization of the current * block A(i:i+ib-1,j:ja+n-1) * CALL PZLATRZ( IB, JA+N-J, L, A, I, J, DESCA, TAU, WORK ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PZLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,j:ja+n-1) from the right * CALL PZLARZB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, JA+N-J, IB, L, A, I, JM1, $ DESCA, WORK, A, IA, J, DESCA, WORK( IPW ) ) END IF * 20 CONTINUE * * Use unblocked code to factor the last or only block * CALL PZLATRZ( IN-IA+1, N, N-M, A, IA, JA, DESCA, TAU, WORK ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZTZRZF * END scalapack-1.8.0/SRC/pslarft.f0000640000175000017500000004430010363532303015566 0ustar muammarmuammar SUBROUTINE PSLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) REAL TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARFT forms the triangular factor T of a real block reflector H * of order n, which is defined as a product of k elementary reflectors. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the distributed matrix V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the distributed matrix V, and * * H = I - V' * T * V * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER*1 * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER*1 * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise * = 'R': rowwise * * N (global input) INTEGER * The order of the block reflector H. N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) REAL pointer into the local memory * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1)) * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if * STOREV = 'R'. The distributed matrix V contains the * Householder vectors. See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) REAL, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) REAL array, dimension (NB_V,NB_V) * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains * the k-by-k triangular factor of the block reflector asso- * ciated with V. If DIRECT = 'F', T is upper triangular; * if DIRECT = 'B', T is lower triangular. * * WORK (local workspace) REAL array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 ) * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 ) * ( v1 v2 1 ) ( 1 v3 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 ) * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 ) * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) * ( 1 v3 ) * ( 1 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL FORWARD INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJ, JJV, LDV, MICOL, MIROW, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL VII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SCOPY, SGEMV, $ SGSUM2D, SLASET, STRMV * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 .OR. K.LE.0 ) $ RETURN * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( LSAME( STOREV, 'C' ) .AND. MYCOL.EQ.IVCOL ) THEN * IW = 1 LDV = DESCV( LLD_ ) IROFF = MOD( IV-1, DESCV( MB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NP = NP - IROFF II = IIV + 1 ELSE II = IIV END IF IF( IROFF+1.EQ.DESCV( MB_ ) ) THEN MIROW = MOD( IVROW+1, NPROW ) ELSE MIROW = IVROW END IF ITMP0 = 0 * DO 10 JJ = JJV+1, JJV+K-1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( NP-II+IIV.GT.0 ) THEN CALL SGEMV( 'Transpose', NP-II+IIV, ITMP0, $ -TAU( JJ ), V( II+(JJV-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II + 1 END IF * IF( MOD( IV+ITMP0, DESCV( MB_ ) ).EQ.0 ) $ MIROW = MOD( MIROW+1, NPROW ) * 10 CONTINUE * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( JJV ) * DO 20 JJ = JJV+1, JJV+K-1 * * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( NB_ ) CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL STRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( NB_ ), T( ITMP1 ), 1 ) T(ITMP1+ITMP0) = TAU( JJ ) * 20 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF-1, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ NP = NP - IROFF MIROW = INDXG2P( IV+N-2, DESCV( MB_ ), MYROW, $ DESCV( RSRC_ ), NPROW ) II = IIV + NP - 1 ITMP0 = 0 * DO 30 JJ = JJV+K-2, JJV, -1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( II-IIV+1.GT.0 ) THEN CALL SGEMV( 'Transpose', II-IIV+1, ITMP0, -TAU( JJ ), $ V( IIV+JJ*LDV ), LDV, $ V( IIV+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II - 1 END IF * IF( MOD( IV+N-ITMP0-2, DESCV(MB_) ).EQ.0 ) $ MIROW = MOD( MIROW+NPROW-1, NPROW ) * 30 CONTINUE * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( NB_ ) * T( ITMP1-1 ) = TAU( JJV+K-1 ) * DO 40 JJ = JJV+K-2, JJV, -1 * * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( NB_ ) - 1 CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL STRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( NB_ ) ), $ DESCV( NB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( JJ ) * 40 CONTINUE * END IF * END IF * ELSE IF( LSAME( STOREV, 'R' ) .AND. MYROW.EQ.IVROW ) THEN * IW = 1 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN NQ = NQ - ICOFF JJ = JJV + 1 ELSE JJ = JJV END IF IF( ICOFF+1.EQ.DESCV( NB_ ) ) THEN MICOL = MOD( IVCOL+1, NPCOL ) ELSE MICOL = IVCOL END IF ITMP0 = 0 * DO 50 II = IIV+1, IIV+K-1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( iv+i-1 ) * * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ-JJ+JJV.GT.0 ) THEN CALL SGEMV( 'No transpose', ITMP0, NQ-JJ+JJV, $ -TAU(II), V( IIV+(JJ-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) ELSE CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ + 1 END IF * IF( MOD( JV+ITMP0, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+1, NPCOL ) * 50 CONTINUE * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( IIV ) * DO 60 II = IIV+1, IIV+K-1 * * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( MB_ ) CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL STRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1+ITMP0 ) = TAU( II ) * 60 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF-1, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF MICOL = INDXG2P( JV+N-2, DESCV( NB_ ), MYCOL, $ DESCV( CSRC_ ), NPCOL ) JJ = JJV + NQ - 1 ITMP0 = 0 * DO 70 II = IIV+K-2, IIV, -1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)' * ITMP0 = ITMP0 + 1 IF( JJ-JJV+1.GT.0 ) THEN CALL SGEMV( 'No transpose', ITMP0, JJ-JJV+1, $ -TAU( II ), V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) ELSE CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ - 1 END IF * IF( MOD( JV+N-ITMP0-2, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+NPCOL-1, NPCOL ) * 70 CONTINUE * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 80 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL STRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( MB_ ) ), $ DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 80 CONTINUE * END IF * END IF * END IF * RETURN * * End of PSLARFT * END scalapack-1.8.0/SRC/pzlarfg.f0000640000175000017500000002440110363532303015560 0ustar muammarmuammar SUBROUTINE PZLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N COMPLEX*16 ALPHA * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 TAU( * ), X( * ) * .. * * Purpose * ======= * * PZLARFG generates a complex elementary reflector H of order n, such * that * * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I. * ( x ) ( 0 ) * * where alpha is a real scalar, and sub( X ) is an (N-1)-element * complex distributed vector X(IX:IX+N-2,JX) if INCX = 1 and * X(IX,JX:JX+N-2) if INCX = DESCX(M_). H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v' ) , * ( v ) * * where tau is a complex scalar and v is a complex (N-1)-element * vector. Note that H is not Hermitian. * * If the elements of sub( X ) are all zero and X(IAX,JAX) is real, * then tau = 0 and H is taken to be the unit matrix. * * Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The global order of the elementary reflector. N >= 0. * * ALPHA (local output) COMPLEX*16 * On exit, alpha is computed in the process scope having the * vector sub( X ). * * IAX (global input) INTEGER * The global row index in X of X(IAX,JAX). * * JAX (global input) INTEGER * The global column index in X of X(IAX,JAX). * * X (local input/local output) COMPLEX*16, pointer into the * local memory to an array of dimension (LLD_X,*). This array * contains the local pieces of the distributed vector sub( X ). * Before entry, the incremented array sub( X ) must contain * the vector x. On exit, it is overwritten with the vector v. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * TAU (local output) COMPLEX*16, array, dimension LOCc(JX) * if INCX = 1, and LOCr(IX) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX, $ KNT, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PDZNRM2, $ ZGEBR2D, ZGEBS2D, PZSCAL, $ PZDSCAL * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY3 COMPLEX*16 ZLADIV EXTERNAL DLAMCH, DLAPY3, ZLADIV * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is distributed across a process row. * CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYROW.NE.IXROW ) $ RETURN * * Broadcast X(IAX,JAX) across the process row. * IF( MYCOL.EQ.IXCOL ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1, $ MYROW, IXCOL ) END IF * INDXTAU = IIAX * ELSE * * sub( X ) is distributed across a process column. * CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYCOL.NE.IXCOL ) $ RETURN * * Broadcast X(IAX,JAX) across the process column. * IF( MYROW.EQ.IXROW ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, ALPHA, 1, $ IXROW, MYCOL ) END IF * INDXTAU = JJAX * END IF * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO RETURN END IF * CALL PDZNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) ALPHR = DBLE( ALPHA ) ALPHI = DIMAG( ALPHA ) * IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN * * H = I * TAU( INDXTAU ) = ZERO * ELSE * * General case * BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) SAFMIN = DLAMCH( 'S' ) RSAFMN = ONE / SAFMIN IF( ABS( BETA ).LT.SAFMIN ) THEN * * XNORM, BETA may be inaccurate; scale X and recompute them * KNT = 0 10 CONTINUE KNT = KNT + 1 CALL PZDSCAL( N-1, RSAFMN, X, IX, JX, DESCX, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN * CALL PDZNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) ALPHA = DCMPLX( ALPHR, ALPHI ) BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) TAU( INDXTAU ) = DCMPLX( ( BETA-ALPHR ) / BETA, $ -ALPHI / BETA ) ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) CALL PZSCAL( N-1, ALPHA, X, IX, JX, DESCX, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * ALPHA = BETA DO 20 J = 1, KNT ALPHA = ALPHA*SAFMIN 20 CONTINUE ELSE TAU( INDXTAU ) = DCMPLX( ( BETA-ALPHR ) / BETA, $ -ALPHI / BETA ) ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) CALL PZSCAL( N-1, ALPHA, X, IX, JX, DESCX, INCX ) ALPHA = BETA END IF END IF * RETURN * * End of PZLARFG * END scalapack-1.8.0/SRC/pcheevx.f0000640000175000017500000011654310377154001015566 0ustar muammarmuammar SUBROUTINE PCHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, IFAIL, ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK, $ LWORK, M, N, NZ REAL ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ), $ IFAIL( * ), IWORK( * ) REAL GAP( * ), RWORK( * ), W( * ) COMPLEX A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCHEEVX computes selected eigenvalues and, optionally, eigenvectors * of a complex hermitian matrix A by calling the recommended sequence * of ScaLAPACK routines. Eigenvalues/vectors can be selected by * specifying a range of values or a range of indices for the desired * eigenvalues. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * PCHEEVX assumes IEEE 754 standard compliant arithmetic. To port * to a system which does not have IEEE 754 arithmetic, modify * the appropriate SLmake.inc file to include the compiler switch * -DNO_IEEE. This switch only affects the compilation of pslaiect.c. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ) * * On entry, the Hermitian matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the Hermitian matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * Hermitian matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PCHEEVX cannot guarantee * correct error reporting. * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) REAL * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PSLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PSLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PCHEEVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PCHEEVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) REAL * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX array, * dimension (LWORK) * WORK(1) returns workspace adequate workspace to allow * optimal performance. * * LWORK (local input) INTEGER * Size of WORK array. If only eigenvalues are requested: * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) * If eigenvectors are requested: * LWORK >= N + ( NP0 + MQ0 + NB ) * NB * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). * * For optimal performance, greater workspace is needed, i.e. * LWORK >= MAX( LWORK, NHETRD_LWORK ) * Where LWORK is as defined above, and * NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 1 ) * NPS * * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/output) REAL array, * dimension max(3,LRWORK) * On return, WORK(1) contains the optimal amount of * workspace required for efficient execution. * if JOBZ='N' RWORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' RWORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LRWORK (local input) INTEGER * Size of RWORK * See below for definitions of variables used to define LRWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LRWORK >= 5 * NN + 4 * N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LRWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LRWORK is too small: * If LRWORK is too small to guarantee orthogonality, * PCHEEVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LRWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-25 * is returned. Note that when RANGE='V', PCHEEVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LRWORK is large enough to allow PCHEEVX to * compute the eigenvalues, PCHEEVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PCSTEIN will perform no better than CSTEIN on 1 * processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then * IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) REAL array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PCHEEVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to compute * eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Differences between PCHEEVX and CHEEVX * ====================================== * * A, LDA -> A, IA, JA, DESCA * Z, LDZ -> Z, IZ, JZ, DESCZ * WORKSPACE needs are larger for PCHEEVX. * LIWORK parameter added * * ORFAC, ICLUSTER() and GAP() parameters added * meaning of INFO is changed * * Functional differences: * PCHEEVX does not promise orthogonality for eigenvectors associated * with tighly clustered eigenvalues. * PCHEEVX does not reorthogonalize eigenvectors * that are on different processes. The extent of reorthogonalization * is controlled by the input parameter LWORK. * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, TEN, FIVE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0, $ FIVE = 5.0E+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN, $ VALEIG, WANTZ CHARACTER ORDER INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO, $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP, $ INDRWORK, INDTAU, INDWORK, IROFFA, IROFFZ, $ ISCALE, ISIZESTEBZ, ISIZESTEIN, IZROW, $ LALLWORK, LIWMIN, LLRWORK, LLWORK, LRWMIN, $ LRWOPT, LWMIN, LWOPT, MAXEIGS, MB_A, MQ0, $ MYCOL, MYROW, NB, NB_A, NEIG, NHETRD_LWOPT, NN, $ NNP, NP0, NPCOL, NPROCS, NPROW, NPS, NQ0, $ NSPLIT, NZZ, OFFSET, RSRC_A, RSRC_Z, SIZEHEEVX, $ SIZESTEIN, SQNPC REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, $ SIGMA, SMLNUM, VLL, VUU * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV REAL PCLANHE, PSLAMCH EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, $ PCLANHE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCELGET, $ PCHENTRD, PCHK1MAT, PCHK2MAT, PCLASCL, PCSTEIN, $ PCUNMTR, PSLARED1D, PSSTEBZ, PXERBLA, SGEBR2D, $ SGEBS2D, SLASRT, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, DBLE, ICHAR, INT, MAX, MIN, MOD, $ REAL, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 800+CTXT_ ) ELSE IF( WANTZ ) THEN IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) EPS = PSLAMCH( ICTXT, 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL LOWER = LSAME( UPLO, 'L' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) * * Set up pointers into the WORK array * INDTAU = 1 INDWORK = INDTAU + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDE = 1 INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDRWORK = INDE2 + N LLRWORK = LRWORK - INDRWORK + 1 * * Set up pointers into the IWORK array * ISIZESTEIN = 3*N + NPROCS + 1 ISIZESTEBZ = MAX( 4*N, 14, NPROCS ) INDIBL = ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 INDISP = INDIBL + N * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) $ LQUERY = .TRUE. * NNP = MAX( N, NPROCS+1, 4 ) LIWMIN = 6*NNP * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A NN = MAX( N, NB, 2 ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) NP0 = NUMROC( N+IROFFA, NB, 0, 0, NPROW ) MQ0 = NUMROC( N+ICOFFA, NB, 0, 0, NPCOL ) IF( WANTZ ) THEN RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = N + MAX( NB*( NP0+1 ), 3 ) LWOPT = LWMIN LRWMIN = 5*NN + 4*N IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LRWOPT = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( N, NPROW*NPCOL )*NN ELSE LRWOPT = LRWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+NQ0+NB )*NB LRWMIN = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LRWOPT = LRWMIN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD code * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS LWOPT = MAX( LWOPT, N+NHETRD_LWOPT ) * END IF IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN RWORK( 1 ) = ABSTOL IF( VALEIG ) THEN RWORK( 2 ) = VL RWORK( 3 ) = VU ELSE RWORK( 2 ) = ZERO RWORK( 3 ) = ZERO END IF CALL SGEBS2D( ICTXT, 'ALL', ' ', 3, 1, RWORK, 3 ) ELSE CALL SGEBR2D( ICTXT, 'ALL', ' ', 3, 1, RWORK, 3, 0, 0 ) END IF IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -2 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -3 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -10 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -11 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -12 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -23 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -25 ELSE IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN INFO = -27 ELSE IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -9 ELSE IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -10 ELSE IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* $ ABS( ABSTOL ) ) THEN INFO = -13 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -19 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -19 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2100+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2100+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2100+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2100+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2100+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2100+CSRC_ ) ELSE IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( ALLEIG ) THEN IDUM1( 3 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 3 ) = ICHAR( 'I' ) ELSE IDUM1( 3 ) = ICHAR( 'V' ) END IF IDUM2( 3 ) = 3 IF( LQUERY ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 4 IF( WANTZ ) THEN CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, $ JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 4, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = CMPLX( LWOPT ) RWORK( 1 ) = REAL( LRWOPT ) IWORK( 1 ) = LIWMIN END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHEEVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 ICLUSTR( 1 ) = 0 END IF M = 0 WORK( 1 ) = CMPLX( LWOPT ) RWORK( 1 ) = REAL( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN END IF * * Scale matrix to allowable range, if necessary. * ABSTLL = ABSTOL ISCALE = 0 IF( VALEIG ) THEN VLL = VL VUU = VU ELSE VLL = ZERO VUU = ZERO END IF * ANRM = PCLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ANRM = ANRM*SIGMA ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM ANRM = ANRM*SIGMA END IF * IF( ISCALE.EQ.1 ) THEN CALL PCLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) IF( ABSTOL.GT.0 ) $ ABSTLL = ABSTOL*SIGMA IF( VALEIG ) THEN VLL = VL*SIGMA VUU = VU*SIGMA IF( VUU.EQ.VLL ) THEN VUU = VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) END IF END IF END IF * * Call PCHENTRD to reduce Hermitian matrix to tridiagonal form. * LALLWORK = LLRWORK * CALL PCHENTRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, RWORK( INDRWORK ), LLRWORK, IINFO ) * * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet work with arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PSLARED1D( N, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDD2 ), RWORK( INDRWORK ), LLRWORK ) * CALL PSLARED1D( N, IA, JA, DESCA, RWORK( INDE ), $ RWORK( INDE2 ), RWORK( INDRWORK ), LLRWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PCELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, $ I+JA-1, DESCA ) RWORK( INDD2+I-1 ) = REAL( WORK( INDD2+I-1 ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, $ I+JA, DESCA ) RWORK( INDE2+I-1 ) = REAL( WORK( INDE2+I-1 ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, $ I+JA-1, DESCA ) RWORK( INDE2+I-1 ) = REAL( WORK( INDE2+I-1 ) ) 30 CONTINUE END IF END IF * * Call PSSTEBZ and, if eigenvectors are desired, PCSTEIN. * IF( WANTZ ) THEN ORDER = 'B' ELSE ORDER = 'E' END IF * CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, $ RWORK( INDD2 ), RWORK( INDE2+OFFSET ), M, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWORK ), $ LLRWORK, IWORK( 1 ), ISIZESTEBZ, IINFO ) * * * IF PSSTEBZ fails, the error propogates to INFO, but * we do not propogate the eigenvalue(s) which failed because: * 1) This should never happen if the user specifies * ABSTOL = 2 * PSLAMCH( 'U' ) * 2) PSSTEIN will confirm/deny whether the eigenvalues are * close enough. * IF( IINFO.NE.0 ) THEN INFO = INFO + IERREBZ DO 40 I = 1, M IWORK( INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) 40 CONTINUE END IF IF( WANTZ ) THEN * IF( VALEIG ) THEN * * Compute the maximum number of eigenvalues that we can * compute in the * workspace that we have, and that we can store in Z. * * Loop through the possibilities looking for the largest * NZ that we can feed to PCSTEIN and PCUNMTR * * Since all processes must end up with the same value * of NZ, we first compute the minimum of LALLWORK * CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LALLWORK, 1, 1, 1, -1, $ -1, -1 ) * MAXEIGS = DESCZ( N_ ) * DO 50 NZ = MIN( MAXEIGS, M ), 0, -1 MQ0 = NUMROC( NZ, NB, 0, 0, NPCOL ) SIZESTEIN = ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) SIZEHEEVX = SIZESTEIN IF( SIZEHEEVX.LE.LALLWORK ) $ GO TO 60 50 CONTINUE 60 CONTINUE ELSE NZ = M END IF NZ = MAX( NZ, 0 ) IF( NZ.NE.M ) THEN INFO = INFO + IERRSPC * DO 70 I = 1, M IFAIL( I ) = 0 70 CONTINUE * * The following code handles a rare special case * - NZ .NE. M means that we don't have enough room to store * all the vectors. * - NSPLIT .GT. 1 means that the matrix split * In this case, we cannot simply take the first NZ eigenvalues * because PSSTEBZ sorts the eigenvalues by block when * a split occurs. So, we have to make another call to * PSSTEBZ with a new upper limit - VUU. * IF( NSPLIT.GT.1 ) THEN CALL SLASRT( 'I', M, W, IINFO ) NZZ = 0 IF( NZ.GT.0 ) THEN * VUU = W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) IF( VLL.GE.VUU ) THEN NZZ = 0 ELSE CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, $ IU, ABSTLL, RWORK( INDD2 ), $ RWORK( INDE2+OFFSET ), NZZ, NSPLIT, $ W, IWORK( INDIBL ), IWORK( INDISP ), $ RWORK( INDRWORK ), LLRWORK, $ IWORK( 1 ), ISIZESTEBZ, IINFO ) END IF * IF( MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN IF( NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN INFO = INFO + IERREBZ END IF END IF END IF NZ = MIN( NZ, NZZ ) * END IF END IF CALL PCSTEIN( N, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), NZ, W, $ IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, $ JZ, DESCZ, RWORK( INDRWORK ), LALLWORK, $ IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, $ IINFO ) * IF( IINFO.GE.NZ+1 ) $ INFO = INFO + IERRCLS IF( MOD( IINFO, NZ+1 ).NE.0 ) $ INFO = INFO + IERREIN * * Z = Q * Z * * IF( NZ.GT.0 ) THEN CALL PCUNMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) END IF * END IF * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL SSCAL( M, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = CMPLX( LWOPT ) RWORK( 1 ) = REAL( LRWOPT ) IWORK( 1 ) = LIWMIN * RETURN * * End of PCHEEVX * END scalapack-1.8.0/SRC/pdlasrt.f0000640000175000017500000002072610363532303015572 0ustar muammarmuammar SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 22, 2000 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, IQ, JQ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) DOUBLE PRECISION D( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PDLASRT Sort the numbers in D in increasing order and the * corresponding vectors in Q. * * Arguments * ========= * * ID (global input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. (NOT IMPLEMENTED YET) * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( Q ). N >= 0. * * D (global input/output) DOUBLE PRECISION array, dimmension (N) * On exit, the number in D are sorted in increasing order. * * Q (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_Q, LOCc(JQ+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IQ (global input) INTEGER * The row index in the global array A indicating the first * row of sub( Q ). * * JQ (global input) INTEGER * The column index in the global array A indicating the * first column of sub( Q ). * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK = MAX( N, NP * ( NB + NQ )) * where * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK = N + 2*NB + 2*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CL, COL, DUMMY, I, ICTXT, IID, IIQ, INDCOL, $ INDX, INDXC, INDXG, IPQ, IPQ2, IPW, IPWORK, J, $ JJQ, K, L, LDQ, LEND, LIWMIN, LWMIN, MYCOL, $ MYROW, NB, ND, NP, NPCOL, NPROW, NQ, PSQ, QCOL, $ QTOT, SBUF * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, DCOPY, $ DGERV2D, DGESD2D, DLACPY, DLAPST * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IQ, JQ, DESCQ, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) LWMIN = MAX( N, NP*( NB+NQ ) ) LIWMIN = N + 2*( NB+NPCOL ) IF( .NOT.LSAME( ID, 'I' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -9 ELSE IF( LIWORK.LT.LIWMIN ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASRT', -INFO ) RETURN END IF * * Set Pointers * INDXC = 1 INDX = INDXC + N INDXG = INDX INDCOL = INDXG + NB QTOT = INDCOL + NB PSQ = QTOT + NPCOL * IID = 1 IPQ2 = 1 IPW = IPQ2 + NP*NQ * DUMMY = 0 IIQ = INDXG2L( IQ, NB, DUMMY, DUMMY, NPROW ) * * Sort the eigenvalues in D * CALL DLAPST( 'I', N, D, IWORK( INDX ), INFO ) * DO 10 L = 0, N - 1 WORK( IID+L ) = D( IWORK( INDX+L ) ) IWORK( INDXC-1+IWORK( INDX+L ) ) = IID + L 10 CONTINUE CALL DCOPY( N, WORK, 1, D, 1 ) * ND = 0 20 CONTINUE IF( ND.LT.N ) THEN LEND = MIN( NB, N-ND ) J = JQ + ND QCOL = INDXG2P( J, NB, DUMMY, DESCQ( CSRC_ ), NPCOL ) K = 0 DO 30 L = 0, LEND - 1 I = JQ - 1 + IWORK( INDXC+ND+L ) CL = INDXG2P( I, NB, DUMMY, DESCQ( CSRC_ ), NPCOL ) IWORK( INDCOL+L ) = CL IF( MYCOL.EQ.CL ) THEN IWORK( INDXG+K ) = IWORK( INDXC+ND+L ) K = K + 1 END IF 30 CONTINUE * IF( MYCOL.EQ.QCOL ) THEN DO 40 CL = 0, NPCOL - 1 IWORK( QTOT+CL ) = 0 40 CONTINUE DO 50 L = 0, LEND - 1 IWORK( QTOT+IWORK( INDCOL+L ) ) = IWORK( QTOT+ $ IWORK( INDCOL+L ) ) + 1 50 CONTINUE IWORK( PSQ ) = 1 DO 60 CL = 1, NPCOL - 1 IWORK( PSQ+CL ) = IWORK( PSQ+CL-1 ) + IWORK( QTOT+CL-1 ) 60 CONTINUE DO 70 L = 0, LEND - 1 CL = IWORK( INDCOL+L ) I = JQ + ND + L JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = IIQ + ( JJQ-1 )*LDQ IPWORK = IPW + ( IWORK( PSQ+CL )-1 )*NP CALL DCOPY( NP, Q( IPQ ), 1, WORK( IPWORK ), 1 ) IWORK( PSQ+CL ) = IWORK( PSQ+CL ) + 1 70 CONTINUE IWORK( PSQ ) = 1 DO 80 CL = 1, NPCOL - 1 IWORK( PSQ+CL ) = IWORK( PSQ+CL-1 ) + IWORK( QTOT+CL-1 ) 80 CONTINUE DO 90 L = 0, K - 1 I = IWORK( INDXG+L ) JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = IPQ2 + ( JJQ-1 )*NP IPWORK = IPW + ( IWORK( PSQ+MYCOL )-1 )*NP CALL DCOPY( NP, WORK( IPWORK ), 1, WORK( IPQ ), 1 ) IWORK( PSQ+MYCOL ) = IWORK( PSQ+MYCOL ) + 1 90 CONTINUE DO 100 CL = 1, NPCOL - 1 COL = MOD( MYCOL+CL, NPCOL ) SBUF = IWORK( QTOT+COL ) IF( SBUF.NE.0 ) THEN IPWORK = IPW + ( IWORK( PSQ+COL )-1 )*NP CALL DGESD2D( DESCQ( CTXT_ ), NP, SBUF, $ WORK( IPWORK ), NP, MYROW, COL ) END IF 100 CONTINUE * ELSE * IF( K.NE.0 ) THEN CALL DGERV2D( DESCQ( CTXT_ ), NP, K, WORK( IPW ), NP, $ MYROW, QCOL ) DO 110 L = 0, K - 1 I = JQ - 1 + IWORK( INDXG+L ) JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = 1 + ( JJQ-1 )*NP IPWORK = IPW + L*NP CALL DCOPY( NP, WORK( IPWORK ), 1, WORK( IPQ ), 1 ) 110 CONTINUE END IF END IF ND = ND + NB GO TO 20 END IF CALL DLACPY( 'Full', NP, NQ, WORK, NP, Q( IIQ ), LDQ ) * * End of PDLASRT * END scalapack-1.8.0/SRC/pzpbtrf.f0000640000175000017500000014172210363532303015610 0ustar muammarmuammar SUBROUTINE PZPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PZPBTRF computes a Cholesky factorization * of an N-by-N complex banded * symmetric positive definite distributed matrix * with bandwidth BW: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PZPBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' U , if UPLO = 'U', or * * P A(1:N, JA:JA+N-1) P^T = L L', if UPLO = 'L' * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPBTRF and this is stored in AF. If a linear system * is to be solved using PZPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * bw*bw * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZAXPY, $ ZGEMM, ZGERV2D, ZGESD2D, ZLACPY, ZLATCPY, $ ZPBTRF, ZPOTRF, ZSYRK, ZTBTRS, ZTRMM, ZTRRV2D, $ ZTRSD2D, ZTRSM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW * BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZPBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (NB+2*BW)*BW * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PZPBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = BW*BW * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PZPBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 10 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Zero out space for work * DO 20 I=1, WORK_SIZE_MIN WORK( I ) = CZERO 20 CONTINUE * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL ZTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+ODD_SIZE*LLDA+(BW+1) ), $ LLDA-1, 0, MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {L_i}^C in each processor * CALL ZPBTRF( UPLO, ODD_SIZE, BW, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * conjugate transpose the connection block in preparation. * CALL ZLATCPY( 'U', BW, BW, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * Perform the triangular system solve {L_i}{{B'}_i}^C = {B_i}^C * CALL ZTRTRS( 'L', 'N', 'N', BW, BW, $ A( OFST+1+(ODD_SIZE-BW)*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * * conjugate transpose resulting block to its location * in main storage. * CALL ZLATCPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), $ LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^C * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL ZHERK( UPLO, 'C', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+1+ODD_SIZE*LLDA ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL ZTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * CALL ZTBTRS( 'L', 'N', 'N', ODD_SIZE, BW, BW, A( OFST + 1 ), $ LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * CALL ZHERK( 'L', 'C', BW, ODD_SIZE, $ -ONE, AF( 1 ), ODD_SIZE, ZERO, $ AF( 1 + (ODD_SIZE+2*BW)*BW), BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL ZGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine ZTRMM. * Since we have G_i^C stored, conjugate transpose * H_i^C to H_i. * CALL ZLATCPY( 'N', BW, BW, $ AF( ODD_SIZE-BW+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BW+1), BW ) * CALL ZTRMM( 'R', 'U', 'C', 'N', BW, BW, -CONE, $ A( ( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA ) ), LLDA-1, $ AF( (ODD_SIZE)*BW+1 ), BW ) * * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL ZGESD2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL ZLACPY( 'N', BW, BW, $ A( OFST+ODD_SIZE*LLDA+1 ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), $ BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL ZAXPY( MBW2, CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL ZGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL ZPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL ZLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL ZTRSM( 'L', 'L', 'N', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL ZHERK( 'L', 'C', BW, BW, -ONE, $ AF( (ODD_SIZE)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL ZTRSM( 'R', 'L', 'C', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL ZHERK( 'L', 'N', BW, BW, -ONE, $ AF( (ODD_SIZE+2*BW)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL ZGEMM( 'N', 'N', BW, BW, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, CZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * * CASE UPLO = 'U' * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * * * Factor main partition A_i^C = U_i {U_i}^C in each processor * CALL ZPBTRF( UPLO, ODD_SIZE, BW, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1600 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * Move the connection block in preparation. * CALL ZLACPY( 'L', BW, BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * * Perform the triangular solve {L_i}{{B'}_i}^C = {B_i}^C * CALL ZTRTRS( 'U', 'C', 'N', BW, BW, $ A( OFST+BW+1+(ODD_SIZE-BW)*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * Move the resulting block back to its location in main storage. * CALL ZLACPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i^C = {C_i}^C-{{B'}_i}^C{{B'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL ZHERK( UPLO, 'C', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+BW+1+ODD_SIZE*LLDA ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1600 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * * * Copy D block into AF storage for solve. * CALL ZLATCPY( 'L', PREV_TRI_SIZE_N, PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * IF ( INFO.EQ.0 ) THEN * CALL ZTBTRS( 'U', 'C', 'N', ODD_SIZE, BW, BW, $ A( OFST + 1 ), LLDA, $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * CALL ZHERK( 'L', 'C', BW, ODD_SIZE, $ -ONE, AF( 1 ), ODD_SIZE, ZERO, $ AF( 1 + (ODD_SIZE+2*BW)*BW), BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL ZGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine ZTRMM. * Since we have G_i^C stored, conjugate transpose * H_i^C to H_i. * CALL ZLATCPY( 'N', BW, BW, $ AF( ODD_SIZE-BW+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BW+1), BW ) * CALL ZTRMM( 'R', 'L', 'N', 'N', BW, BW, -CONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( (ODD_SIZE)*BW+1 ), BW ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL ZGESD2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, MYCOL-1 ) * ENDIF * * Transpose last diagonal block into AF storage for subsequent * operations. * CALL ZLATCPY( 'U', BW, BW, $ A( OFST+ ODD_SIZE*LLDA+1+BW ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), $ BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL ZAXPY( MBW2, CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 22 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 21 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL ZGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 22 21 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL ZPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL ZLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL ZTRSM( 'L', 'L', 'N', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL ZHERK( 'L', 'C', BW, BW, -ONE, $ AF( (ODD_SIZE)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL ZTRSM( 'R', 'L', 'C', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL ZHERK( 'L', 'N', BW, BW, -ONE, $ AF( (ODD_SIZE+2*BW)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL ZGEMM( 'N', 'N', BW, BW, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, CZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 24 CONTINUE * ENDIF * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PZPBTRF * END scalapack-1.8.0/SRC/pzrot.c0000640000175000017500000004230510602576752015305 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- Mark R. Fahey * June 28, 2000 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pzrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s ) /* * Mark Fahey * June 22, 2000 */ /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; double * c; complex16 * s; /* * .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex16 X[], Y[]; { /* * Purpose * ======= * * PZROT applies a plane rotation, where the cos (C) is real and the * sin (S) is complex, and the vectors CX and CY are complex, i.e., * * [ sub( X ) ] := [ C S ] [ sub( X ) ] * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ] * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y, * * and where C*C + S*CONJG(S) = 1.0. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * Arguments * ========= * * N (input) INTEGER * The number of elements in the vectors CX and CY. * * X (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * On output, CX is overwritten with C*X + S*Y. * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * On output, CY is overwritten with -CONJG(S)*X + C*Y. * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * C (input) pointer to DOUBLE * S (input) pointer COMPLEX * C and S define a rotation * [ C S ] * [ -conjg(S) C ] * where C*C + S*CONJG(S) = 1.0. * * ===================================================================== * * .. Local Scalars .. */ int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, mycol, myrow, nn, np, np0, nprow, npcol, nq, nz, ione=1, tmp1, wksz; complex16 xwork[1], ywork[1], zero; /* .. * .. PBLAS Buffer .. */ complex16 * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void zgerv2d_(); void zgesd2d_(); void pbchkvect(); void PB_Cabort(); char * getpbbuf(); F_INTG_FCT pbztrnv_(); F_INTG_FCT zrot_(); F_INTG_FCT ilcm_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(500+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -9; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -8; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1000+MB_+1); } else { info = -11; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -9; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -8; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1000+MB_+1); } else { info = -11; } } else { info = -6; } } if( ictxt != desc_Y[CTXT_] ) info = -(1000+CTXT_+1); } } if( info ) { PB_Cabort( ictxt, "PZROT", info ); return; } /* if( info ) { pberror_( &ictxt, "PZROT", &info ); return; } */ /* * Quick return if possible. */ zero.re = ZERO; zero.im = ZERO; if( *n == 0 ) return; /* * rotation */ if( *n == 1 ) { if( ( myrow == ixrow ) && ( mycol == ixcol ) ) { buff = &X[iix-1+(jjx-1)*desc_X[LLD_]]; if( ( myrow != iyrow ) || ( mycol != iycol ) ) { zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol ); zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol ); } else *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]]; zrot_( n, buff, n, ywork, n, c, s ); X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff; if( ( myrow == iyrow ) && ( mycol == iycol ) ) Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork; } else if( ( myrow == iyrow ) && ( mycol == iycol ) ) { zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, &ixrow, &ixcol ); zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol ); zrot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s ); } return; } if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) { zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } else { if( myrow == ixrow ) { zgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol ); zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], buff, &ione, c, s ); } else if( myrow == iyrow ) { zgesd2d_( &ictxt, &ione, &nq, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &mycol ); buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol ); zrot_( &nq, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) { zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } else { if( mycol == ixcol ) { zgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol ); zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } else if( mycol == iycol ) { zgesd2d_( &ictxt, &np, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &myrow, &ixcol ); buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol ); zrot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmp = lcm / nprow; nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow ); tmp1 = np0 / desc_X[MB_]; wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) ); if( mycol == iycol ) jjy -= nz; if( myrow == ixrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol, &ixrow, &ixcol, buff+np ); if( mycol == ixcol ) { zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, buff, &ione, &zero, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &ixcol, &iyrow, &iycol, buff+np ); } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*jx-1) % desc_X[NB_]; nn = *n + nz; tmp1 = nn / desc_X[MB_]; np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) ); if( myrow == iyrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { zrot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, buff, &ione, &zero, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &iycol, &ixrow, &ixcol, buff+np ); } } } scalapack-1.8.0/SRC/psgerqf.f0000640000175000017500000002714710363532303015574 0ustar muammarmuammar SUBROUTINE PSGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGERQF computes a RQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSGERQ2, $ PSLARFB, PSLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGERQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( IL.GE.IN+1 ) THEN * * Use blocked code initially * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute the RQ factorization of the current block * A(i:i+ib-1,ja:ja+n-m+i+ib-ia-1) * CALL PSGERQ2( IB, N-M+I+IB-IA, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PSLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, $ I, JA, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the * right * CALL PSLARFB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = IN - IA + 1 NU = N - M + IN - IA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PSGERQ2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGERQF * END scalapack-1.8.0/SRC/psormlq.f0000640000175000017500000004174110363532303015616 0ustar muammarmuammar SUBROUTINE PSORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMLQ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PSGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JCC, $ LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSLARFB, $ PSLARFT, PSORML2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PSORML2( SIDE, TRANS, M, N, I1-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PSLARFT( 'Forward', 'Rowwise', NQ-I+IA, IB, A, I, JA+I-IA, $ DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PSLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, $ A, I, JA+I-IA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PSORML2( SIDE, TRANS, M, N, I2-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMLQ * END scalapack-1.8.0/SRC/pdormlq.f0000640000175000017500000004202010363532303015566 0ustar muammarmuammar SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMLQ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PDGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JCC, $ LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDLARFB, $ PDLARFT, PDORML2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PDORML2( SIDE, TRANS, M, N, I1-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PDLARFT( 'Forward', 'Rowwise', NQ-I+IA, IB, A, I, JA+I-IA, $ DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PDLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, $ A, I, JA+I-IA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PDORML2( SIDE, TRANS, M, N, I2-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMLQ * END scalapack-1.8.0/SRC/pdpbtrf.f0000640000175000017500000014154410363532303015564 0ustar muammarmuammar SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPBTRF computes a Cholesky factorization * of an N-by-N real banded * symmetric positive definite distributed matrix * with bandwidth BW: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PDPBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' U , if UPLO = 'U', or * * P A(1:N, JA:JA+N-1) P^T = L L', if UPLO = 'L' * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPBTRF and this is stored in AF. If a linear system * is to be solved using PDPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * bw*bw * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DAXPY, $ DESC_CONVERT, DGEMM, DGERV2D, DGESD2D, DLACPY, $ DLATCPY, DPBTRF, DPOTRF, DSYRK, DTBTRS, DTRMM, $ DTRRV2D, DTRSD2D, DTRSM, DTRTRS, GLOBCHK, $ IGAMX2D, IGEBR2D, IGEBS2D, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( NB+2*BW )*BW * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = BW*BW * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PDPBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 10 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 120 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW END IF * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Zero out space for work * DO 20 I = 1, WORK_SIZE_MIN WORK( I ) = ZERO 20 CONTINUE * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL DTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+ODD_SIZE*LLDA+( BW+ $ 1 ) ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {L_i}^T in each processor * CALL DPBTRF( UPLO, ODD_SIZE, BW, A( OFST+1 ), LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 30 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * transpose the connection block in preparation. * CALL DLATCPY( 'U', BW, BW, A( ( OFST+( BW+1 )+( ODD_SIZE- $ BW )*LLDA ) ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * Perform the triangular system solve {L_i}{{B'}_i}^T = {B_i}^T * CALL DTRTRS( 'L', 'N', 'N', BW, BW, $ A( OFST+1+( ODD_SIZE-BW )*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * * transpose resulting block to its location * in main storage. * CALL DLATCPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A( ( OFST+( BW+1 )+( ODD_SIZE-BW )* $ LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^T * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL DSYRK( UPLO, 'T', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+1+ODD_SIZE*LLDA ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 30 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL DTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * CALL DTBTRS( 'L', 'N', 'N', ODD_SIZE, BW, BW, $ A( OFST+1 ), LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * CALL DSYRK( 'L', 'T', BW, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, ZERO, AF( 1+( ODD_SIZE+2*BW )*BW ), $ BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL DGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine DTRMM. * Since we have G_i^T stored, transpose * H_i^T to H_i. * CALL DLATCPY( 'N', BW, BW, AF( ODD_SIZE-BW+1 ), $ ODD_SIZE, AF( ( ODD_SIZE )*BW+1 ), BW ) * CALL DTRMM( 'R', 'U', 'T', 'N', BW, BW, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )* $ LLDA ) ), LLDA-1, AF( ( ODD_SIZE )*BW+1 ), $ BW ) * * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, $ 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 110 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 60 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL DGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL DLACPY( 'N', BW, BW, A( OFST+ODD_SIZE*LLDA+1 ), LLDA-1, $ AF( ODD_SIZE*BW+MBW2+1 ), BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL DAXPY( MBW2, ONE, AF( ODD_SIZE*BW+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL DGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL DPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL DLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL DTRSM( 'L', 'L', 'N', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL DSYRK( 'L', 'T', BW, BW, -ONE, AF( ( ODD_SIZE )*BW+1 ), $ BW, ZERO, WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL DTRSM( 'R', 'L', 'T', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL DSYRK( 'L', 'N', BW, BW, -ONE, $ AF( ( ODD_SIZE+2*BW )*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL DGEMM( 'N', 'N', BW, BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, ZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 60 CONTINUE * ELSE * * CASE UPLO = 'U' * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * * * * Factor main partition A_i^T = U_i {U_i}^T in each processor * CALL DPBTRF( UPLO, ODD_SIZE, BW, A( OFST+1 ), LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 70 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * Move the connection block in preparation. * CALL DLACPY( 'L', BW, BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * * Perform the triangular solve {L_i}{{B'}_i}^T = {B_i}^T * CALL DTRTRS( 'U', 'T', 'N', BW, BW, $ A( OFST+BW+1+( ODD_SIZE-BW )*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * Move the resulting block back to its location in main storage. * CALL DLACPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i^T = {C_i}^T-{{B'}_i}^T{{B'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL DSYRK( UPLO, 'T', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+BW+1+ODD_SIZE*LLDA ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 70 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * * * Copy D block into AF storage for solve. * CALL DLATCPY( 'L', PREV_TRI_SIZE_N, PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * IF( INFO.EQ.0 ) THEN * CALL DTBTRS( 'U', 'T', 'N', ODD_SIZE, BW, BW, $ A( OFST+1 ), LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * CALL DSYRK( 'L', 'T', BW, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, ZERO, AF( 1+( ODD_SIZE+2*BW )*BW ), $ BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL DGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine DTRMM. * Since we have G_i^T stored, transpose * H_i^T to H_i. * CALL DLATCPY( 'N', BW, BW, AF( ODD_SIZE-BW+1 ), $ ODD_SIZE, AF( ( ODD_SIZE )*BW+1 ), BW ) * CALL DTRMM( 'R', 'L', 'N', 'N', BW, BW, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( ( ODD_SIZE )*BW+1 ), BW ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, $ 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 110 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 100 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL DGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ MYCOL-1 ) * END IF * * Transpose last diagonal block into AF storage for subsequent * operations. * CALL DLATCPY( 'U', BW, BW, A( OFST+ODD_SIZE*LLDA+1+BW ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL DAXPY( MBW2, ONE, AF( ODD_SIZE*BW+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 80 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 90 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL DGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 80 90 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL DPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL DLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL DTRSM( 'L', 'L', 'N', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL DSYRK( 'L', 'T', BW, BW, -ONE, AF( ( ODD_SIZE )*BW+1 ), $ BW, ZERO, WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL DTRSM( 'R', 'L', 'T', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL DSYRK( 'L', 'N', BW, BW, -ONE, $ AF( ( ODD_SIZE+2*BW )*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL DGEMM( 'N', 'N', BW, BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, ZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 100 CONTINUE * END IF * 110 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 120 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PDPBTRF * END scalapack-1.8.0/SRC/pclaset.f0000640000175000017500000002170010363532303015545 0ustar muammarmuammar SUBROUTINE PCLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASET initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) COMPLEX * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) COMPLEX * The constant to which the diagonal elements are to be set. * * A (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBLK, IN, ITMP, J, JAA, JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PCLASE2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PCLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) CALL PCLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) JAA = JA + ITMP CALL PCLASE2( UPLO, IBLK, N-ITMP, ALPHA, BETA, $ A, I, JAA, DESCA ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL PCLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IAA = IA + JTMP CALL PCLASE2( UPLO, M-JTMP, JBLK, ALPHA, BETA, A, IAA, $ J, DESCA ) 20 CONTINUE ELSE IF( M.LE.N ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), $ IA+M-1 ) CALL PCLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) CALL PCLASE2( UPLO, IBLK, I-IA, ALPHA, ALPHA, A, I, $ JA, DESCA ) CALL PCLASE2( UPLO, IBLK, N-I+IA, ALPHA, BETA, A, I, $ JA+I-IA, DESCA ) 30 CONTINUE ELSE JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA+N-1 ) CALL PCLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) CALL PCLASE2( UPLO, J-JA, JBLK, ALPHA, ALPHA, A, IA, $ J, DESCA ) CALL PCLASE2( UPLO, M-J+JA, JBLK, ALPHA, BETA, A, $ IA+J-JA, J, DESCA ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PCLASET * END scalapack-1.8.0/SRC/pcpocon.f0000640000175000017500000003664610363532303015572 0ustar muammarmuammar SUBROUTINE PCPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LRWORK, LWORK, N REAL ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL RWORK( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a complex Hermitian positive definite distributed matrix * using the Cholesky factorization A = U**H*U or A = L*L**H computed by * PCPOTRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the factor stored in * A(IA:IA+N-1,JA:JA+N-1) is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX pointer into the local memory to * an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, this * array contains the local pieces of the factors L or U from * the Cholesky factorization A(IA:IA+N-1,JA:JA+N-1) = U'*U or * L*L', as computed by PCPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) REAL * The 1-norm (or infinity-norm) of the hermitian distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*MAX(1,CEIL(P-1,Q)),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX(1,CEIL(Q-1,P))) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 2*LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, $ JX, KASE, LRWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPROW, NPMOD, NQ, NQMOD REAL AINVNM, SCALE, SL, SU, SMLNUM COMPLEX WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT, $ DESCSET, INFOG2L, PCAMAX, PCHK1MAT, PCLATRS, $ PCLACON, PCSRSCL, PB_TOPGET, PB_TOPSET, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, ICHAR, MAX, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LRWMIN = 2*NQMOD RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPNL = 1 IPNU = IPNL + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the 1-norm (or I-norm) of inv(A). * AINVNM = ZERO KASE = 0 NORMIN = 'N' * 10 CONTINUE CALL PCLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( UPPER ) THEN * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SL, RWORK( IPNL ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, RWORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SU, RWORK( IPNU ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Multiply by 1/SCALE if doing so will not cause overflow. * SCALE = SL*SU IF( SCALE.NE.ONE ) THEN CALL PCAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL CGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL CGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PCSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PCPOCON * END scalapack-1.8.0/SRC/pzdotu.c0000640000175000017500000005211110363532303015434 0ustar muammarmuammar/* --------------------------------------------------------------------- * * Mark R. Fahey * August 2000 * This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0 * which fixes a bug in the incx=1 and incy=1 case. * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pzdotu_( n, dotu, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy ) /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; complex16 * dotu; /* .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex16 X[], Y[]; { /* * Purpose * ======= * * PZDOTU forms the dot product of two distributed vectors, * * dotu := sub( X )**T * sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * * Parameters * ========== * * N (global input) pointer to INTEGER * The length of the distributed vectors to be multiplied. * N >= 0. * * DOTU (local output) pointer to COMPLEX*16 * The dot product of sub( X ) and sub( Y ) only in their scope. * * X (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * ===================================================================== * * .. Local Scalars .. */ char * cbtop, * cctop, * rbtop, * rctop; int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, mone=-1, mycol, myrow, nn, np, np0, nprow, npcol, nq, nz, ione=1, tmp1, wksz; complex16 xwork[1], ywork[1], zero; /* .. * .. PBLAS Buffer .. */ complex16 * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void zgebr2d_(); void zgebs2d_(); void zgerv2d_(); void zgesd2d_(); void zgsum2d_(); void pbchkvect(); void pberror_(); char * getpbbuf(); char * ptop(); F_VOID_FCT pbztrnv_(); F_VOID_FCT zzdotu_(); F_INTG_FCT ilcm_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(600+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -10; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -9; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1100+MB_+1); } else { info = -12; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -10; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -9; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1100+MB_+1); } else { info = -12; } } else { info = -7; } } if( ictxt != desc_Y[CTXT_] ) info = -(1100+CTXT_+1); } } if( info ) { pberror_( &ictxt, "PZDOTU", &info ); return; } /* * Quick return if possible. */ dotu->re = ZERO; dotu->im = ZERO; zero.re = ZERO; zero.im = ZERO; if( *n == 0 ) return; /* * dot <- x^{t} * y */ if( *n == 1 ) { if( ( myrow == ixrow ) && ( mycol == ixcol ) ) { buff = &X[iix-1+(jjx-1)*desc_X[LLD_]]; if( ( myrow != iyrow ) || ( mycol != iycol ) ) { zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol ); zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol ); } else *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]]; zzdotu_( n, dotu, buff, n, ywork, n ); } else if( ( myrow == iyrow ) && ( mycol == iycol ) ) { zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, &ixrow, &ixcol ); zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol ); zzdotu_( n, dotu, xwork, n, &Y[iiy-1+(jjx-1)*desc_X[LLD_]], n ); } if( ( *incx == desc_X[M_] ) && ( desc_X[M_] != 1 ) ) { if( myrow == ixrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == ixcol ) { zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione, &myrow, &ixcol ); } } } else if( ( *incx == 1 ) && ( desc_X[M_] != 1 ) ) { if( mycol == ixcol ) { cbtop = ptop( BROADCAST, COLUMN, TOPGET ); if( myrow == ixrow ) { zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotu, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotu, &ione, &ixrow, &mycol ); } } } if( ( *incy == desc_Y[M_] ) && ( desc_Y[M_] != 1 ) ) { if( myrow == iyrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == iycol ) { zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione, &myrow, &iycol ); } } } else if( ( *incy == 1 ) && ( desc_Y[M_] != 1 ) ) { if( mycol == iycol ) { cbtop = ptop( BROADCAST, COLUMN, TOPGET ); if( myrow == iyrow ) { zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotu, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotu, &ione, &iyrow, &mycol ); } } } return; } if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zzdotu_( &nq, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } } else { if( myrow == ixrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); buff = (complex16 *)getpbbuf( "PZDOTU", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol ); zzdotu_( &nq, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } else if( myrow == iyrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zgesd2d_( &ictxt, &ione, &nq, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &mycol ); buff = (complex16 *)getpbbuf( "PZDOTU", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol ); zzdotu_( &nq, dotu, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotu_( &np, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } } else { if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); buff = (complex16 *)getpbbuf( "PZDOTU", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &iycol ); zzdotu_( &np, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } else if( mycol == iycol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zgesd2d_( &ictxt, &np, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &myrow, &ixcol ); buff = (complex16 *)getpbbuf( "PZDOTU", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &ixcol ); zzdotu_( &np, dotu, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmp = lcm / nprow; nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow ); tmp1 = np0 / desc_X[MB_]; wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZDOTU", wksz*sizeof(complex16) ); if( mycol == iycol ) jjy -= nz; if( myrow == ixrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol, &ixrow, &ixcol, buff+np ); if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotu_( &np, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } if( myrow == iyrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == ixcol ) zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione ); else zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione, &myrow, &ixcol ); } } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*jx-1) % desc_X[NB_]; nn = *n + nz; tmp1 = nn / desc_X[MB_]; np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZDOTU", wksz*sizeof(complex16) ); if( myrow == iyrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotu_( &np, dotu, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotu, &ione, &mone, &mycol ); } if( myrow == ixrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == iycol ) zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione ); else zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotu, &ione, &myrow, &iycol ); } } } } scalapack-1.8.0/SRC/pdstedc.f0000640000175000017500000002302310363532303015540 0ustar muammarmuammar SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, IQ, JQ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) DOUBLE PRECISION D( * ), E( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * PDSTEDC computes all eigenvalues and eigenvectors of a * symmetric tridiagonal matrix in parallel, using the divide and * conquer algorithm. * * This code makes very mild assumptions about floating point * arithmetic. It will work on machines with a guard digit in * add/subtract, or on those binary machines without guard digits * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. * It could conceivably fail on hexadecimal or decimal machines * without guard digits, but we know of none. See DLAED3 for details. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. (NOT IMPLEMENTED YET) * = 'I': Compute eigenvectors of tridiagonal matrix also. * = 'V': Compute eigenvectors of original dense symmetric * matrix also. On entry, Z contains the orthogonal * matrix used to reduce the original matrix to * tridiagonal form. (NOT IMPLEMENTED YET) * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input/output) DOUBLE PRECISION array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in descending order. * * E (global input/output) DOUBLE PRECISION array, dimension (N-1) * On entry, the subdiagonal elements of the tridiagonal matrix. * On exit, E has been destroyed. * * Q (local output) DOUBLE PRECISION array, * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * On output, Q is distributed across the P processes in block * cyclic format. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed. * * LWORK (local input/output) INTEGER, * the dimension of the array WORK. * LWORK = 6*N + 2*NP*NQ * NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) * NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 2 + 7*N + 8*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER ICOFFQ, IIQ, IPQ, IQCOL, IQROW, IROFFQ, JJQ, $ LDQ, LIWMIN, LWMIN, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW, NQ DOUBLE PRECISION ORGNRM * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION DLANST EXTERNAL INDXG2P, LSAME, NUMROC, DLANST * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DLASCL, DSTEDC, $ INFOG2L, PDLAED0, PDLASRT, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) LDQ = DESCQ( LLD_ ) NB = DESCQ( NB_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IQ, JQ, DESCQ, 8, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCQ( NB_ ) IROFFQ = MOD( IQ-1, DESCQ( MB_ ) ) ICOFFQ = MOD( JQ-1, DESCQ( NB_ ) ) IQROW = INDXG2P( IQ, NB, MYROW, DESCQ( RSRC_ ), NPROW ) IQCOL = INDXG2P( JQ, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) LWMIN = 6*N + 2*NP*NQ LIWMIN = 2 + 7*N + 8*NPCOL * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( .NOT.LSAME( COMPZ, 'I' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IROFFQ.NE.ICOFFQ .OR. ICOFFQ.NE.0 ) THEN INFO = -5 ELSE IF( DESCQ( MB_ ).NE.DESCQ( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDSTEDC', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return * IF( N.EQ.0 ) $ GO TO 10 CALL INFOG2L( IQ, JQ, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) IF( N.EQ.1 ) THEN IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) $ Q( 1 ) = ONE GO TO 10 END IF * * If N is smaller than the minimum divide size NB, then * solve the problem with the serial divide and conquer * code locally. * IF( N.LE.NB ) THEN IF( ( MYROW.EQ.IQROW ) .AND. ( MYCOL.EQ.IQCOL ) ) THEN IPQ = IIQ + ( JJQ-1 )*LDQ CALL DSTEDC( 'I', N, D, E, Q( IPQ ), LDQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) IF( INFO.NE.0 ) THEN INFO = ( N+1 ) + N GO TO 10 END IF END IF GO TO 10 END IF * * If P=NPROW*NPCOL=1, solve the problem with DSTEDC. * IF( NPCOL*NPROW.EQ.1 ) THEN IPQ = IIQ + ( JJQ-1 )*LDQ CALL DSTEDC( 'I', N, D, E, Q( IPQ ), LDQ, WORK, LWORK, IWORK, $ LIWORK, INFO ) GO TO 10 END IF * * Scale matrix to allowable range, if necessary. * ORGNRM = DLANST( 'M', N, D, E ) IF( ORGNRM.NE.ZERO ) THEN CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N-1, 1, E, N-1, INFO ) END IF * CALL PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * Sort eigenvalues and corresponding eigenvectors * CALL PDLASRT( 'I', N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * * Scale back. * IF( ORGNRM.NE.ZERO ) $ CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) * 10 CONTINUE * IF( LWORK.GT.0 ) $ WORK( 1 ) = DBLE( LWMIN ) IF( LIWORK.GT.0 ) $ IWORK( 1 ) = LIWMIN RETURN * * End of PDSTEDC * END scalapack-1.8.0/SRC/pdtrtrs.f0000640000175000017500000003135310363532303015621 0ustar muammarmuammar SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDTRTRS solves a triangular system of the form * * sub( A ) * X = sub( B ) or sub( A )**T * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a triangular * distributed matrix of order N, and B(IB:IB+N-1,JB:JB+NRHS-1) is an * N-by-NRHS distributed matrix denoted by sub( B ). A check is made * to verify that sub( A ) is nonsingular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': Solve sub( A ) * X = sub( B ) (No transpose) * = 'T': Solve sub( A )**T * X = sub( B ) (Transpose) * = 'C': Solve sub( A )**T * X = sub( B ) (Transpose) * * DIAG (global input) CHARACTER * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed matrix sub( B ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1) ). This array * contains the local pieces of the distributed triangular * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of sub( A ) contains the lower triangular * matrix, and the strictly upper triangular part of sub( A ) * is not referenced. If DIAG = 'U', the diagonal elements of * sub( A ) are also not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the right hand side distributed matrix * sub( B ). On exit, if INFO = 0, sub( B ) is overwritten by * the solution matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the i-th diagonal element of sub( A ) is * zero, indicating that the submatrix is singular and the * solutions X have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER I, IAROW, IBROW, ICOFFA, ICTXT, ICURCOL, $ ICURROW, IROFFA, IROFFB, IDUM, II, IOFFA, J, $ JBLK, JJ, JN, LDA, LL, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK2MAT, PDTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) NOTRAN = LSAME( TRANS, 'N' ) * CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -8 ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -904 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -1304 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'D' ) END IF IDUM2( 3 ) = 3 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, NRHS, 5, $ IB, JB, DESCB, 13, 3, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first block separately * JBLK = JN-JA+1 IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 10 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 LL = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JBLK = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 20 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 LL = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Solve A * x = b or A' * x = b. * CALL PDTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * RETURN * * End of PDTRTRS * END scalapack-1.8.0/SRC/pdporfs.f0000640000175000017500000010062710363532303015575 0ustar muammarmuammar SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, $ LIWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IWORK( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), $ BERR( * ), FERR( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PDPORFS improves the computed solution to a system of linear * equations when the coefficient matrix is symmetric positive definite * and provides error bounds and backward error estimates for the * solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ). * This array contains the local pieces of the N-by-N symmetric * distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * On entry, this array contains the factors L or U from the * Cholesky factorization sub( A ) = L*L**T or U**T*U, as * computed by PDPOTRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). On exit, it contains the * improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD( IA-1, MB_A ) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) DOUBLE PRECISION TWO, THREE PARAMETER ( TWO = 2.0D+0, THREE = 3.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ DGEBR2D, DGEBS2D, INFOG2L, PCHK2MAT, $ PDASYMV, PDAXPY, PDCOPY, PDSYMV, $ PDPOTRS, PDLACON, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 19, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3 * NPMOD LIWMIN = NPMOD WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -13 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1500 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1500 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1900 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -17 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1900 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -18 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1900 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -25 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 23 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 25 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 0, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, N, 2, NRHS, 3, $ IX, JX, DESCX, 19, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPORFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X) * CALL PDCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDSYMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to * the i-th components of the numerator and denominator * before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PDASYMV( UPLO, N, ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J) is larger than machine epsilon, and * 2) BERR(J) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PDAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub( B ) - sub( A )*sub( X ). * CALL PDCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDSYMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PDASYMV( UPLO, N, ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PDAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + NZ*EPS*( * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X)) * +abs(sub(B))) is incremented by SAFE1 if the i-th component * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PDPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDPORFS * END scalapack-1.8.0/SRC/pdsyngst.f0000640000175000017500000004147610363532303016001 0ustar muammarmuammar SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ) * .. * * Purpose * * ======= * * PDSYNGST reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * * PDSYNGST performs the same function as PDHEGST, but is based on * rank 2K updates, which are faster and more scalable than * triangular solves (the basis of PDSYNGST). * * PDSYNGST calls PDHEGST when UPLO='U', hence PDHENGST provides * improved performance only when UPLO='L', IBTYPE=1. * * PDSYNGST also calls PDHEGST when insufficient workspace is * provided, hence PDSYNGST provides improved * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PDPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PDPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) DOUBLE PRECISION * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB ) * * When IBTYPE = 1 and UPLO = 'L', PDSYNGST provides improved * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * where NB = MB_A = NB_A, * NP0 = NUMROC( N, NB, 0, 0, NPROW ), * NQ0 = NUMROC( N, NB, 0, 0, NPROW ), * * NUMROC ia a ScaLAPACK tool functions * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * * .. Parameters .. DOUBLE PRECISION ONEHALF, ONE, MONE PARAMETER ( ONEHALF = 0.5D0, ONE = 1.0D0, MONE = -1.0D0 ) INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( DLEN_ = 9, CTXT_ = 2, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, INDAA, INDG, INDR, INDRT, IROFFA, $ IROFFB, J, K, KB, LWMIN, LWOPT, MYCOL, MYROW, $ NB, NP0, NPCOL, NPK, NPROW, NQ0, POSTK * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ), $ DESCR( DLEN_ ), DESCRT( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL LSAME, INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PDGEMM, PDLACPY, PDSYGST, PDSYMM, PDSYR2K, $ PDTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) SCALE = 1.0D0 * NB = DESCA( MB_ ) * * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) LWMIN = MAX( NB*( NP0+1 ), 3*NB ) IF( IBTYPE.EQ.1 .AND. .NOT.UPPER ) THEN LWOPT = 2*NP0*NB + NQ0*NB + NB*NB ELSE LWOPT = LWMIN END IF WORK( 1 ) = DBLE( LWOPT ) LQUERY = ( LWORK.EQ.-1 ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNGST', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) RETURN END IF * CALL DESCSET( DESCG, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCR, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCRT, NB, N, NB, NB, IAROW, IACOL, ICTXT, NB ) CALL DESCSET( DESCAA, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * INDG = 1 INDR = INDG + DESCG( LLD_ )*NB INDAA = INDR + DESCR( LLD_ )*NB INDRT = INDAA + DESCAA( LLD_ )*NB * DO 30 K = 1, N, NB * KB = MIN( N-K+1, NB ) POSTK = K + KB NPK = N - POSTK + 1 * * CALL PDLACPY( 'A', N-POSTK+1, KB, B, POSTK+IB-1, K+JB-1, DESCB, $ WORK( INDG ), POSTK, 1, DESCG ) CALL PDLACPY( 'A', N-POSTK+1, KB, A, POSTK+IA-1, K+JA-1, DESCA, $ WORK( INDR ), POSTK, 1, DESCR ) CALL PDLACPY( 'A', KB, K-1, A, K+IA-1, JA, DESCA, $ WORK( INDRT ), 1, 1, DESCRT ) * CALL PDLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDR ), K, 1, DESCR ) CALL PDTRSM( 'Right', 'L', 'N', 'N', NPK, KB, MONE, B, K+IB-1, $ K+JB-1, DESCB, WORK( INDG ), POSTK, 1, DESCG ) * CALL PDSYMM( 'Right', 'L', NPK, KB, ONEHALF, A, K+IA-1, K+JA-1, $ DESCA, WORK( INDG ), POSTK, 1, DESCG, ONE, $ WORK( INDR ), POSTK, 1, DESCR ) * CALL PDSYR2K( 'Lower', 'No T', NPK, KB, ONE, WORK( INDG ), $ POSTK, 1, DESCG, WORK( INDR ), POSTK, 1, DESCR, $ ONE, A, POSTK+IA-1, POSTK+JA-1, DESCA ) * CALL PDGEMM( 'No T', 'No Conj', NPK, K-1, KB, ONE, $ WORK( INDG ), POSTK, 1, DESCG, WORK( INDRT ), 1, $ 1, DESCRT, ONE, A, POSTK+IA-1, JA, DESCA ) * CALL PDSYMM( 'Right', 'L', NPK, KB, ONE, WORK( INDR ), K, 1, $ DESCR, WORK( INDG ), POSTK, 1, DESCG, ONE, A, $ POSTK+IA-1, K+JA-1, DESCA ) * CALL PDTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, K-1, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, K+IA-1, JA, $ DESCA ) * CALL PDLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDAA ), 1, 1, DESCAA ) * IF( MYROW.EQ.DESCAA( RSRC_ ) .AND. MYCOL.EQ.DESCAA( CSRC_ ) ) $ THEN DO 20 I = 1, KB DO 10 J = 1, I WORK( INDAA+J-1+( I-1 )*DESCAA( LLD_ ) ) $ = WORK( INDAA+I-1+( J-1 )*DESCAA( LLD_ ) ) 10 CONTINUE 20 CONTINUE END IF * CALL PDTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, $ 1, DESCAA ) * CALL PDTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', KB, KB, ONE, $ B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, 1, $ DESCAA ) * CALL PDLACPY( 'L', KB, KB, WORK( INDAA ), 1, 1, DESCAA, A, $ K+IA-1, K+JA-1, DESCA ) * CALL PDTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', NPK, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, POSTK+IA-1, $ K+JA-1, DESCA ) * DESCR( CSRC_ ) = MOD( DESCR( CSRC_ )+1, NPCOL ) DESCG( CSRC_ ) = MOD( DESCG( CSRC_ )+1, NPCOL ) DESCRT( RSRC_ ) = MOD( DESCRT( RSRC_ )+1, NPROW ) DESCAA( RSRC_ ) = MOD( DESCAA( RSRC_ )+1, NPROW ) DESCAA( CSRC_ ) = MOD( DESCAA( CSRC_ )+1, NPCOL ) 30 CONTINUE * WORK( 1 ) = DBLE( LWOPT ) * RETURN END scalapack-1.8.0/SRC/pcungr2.f0000640000175000017500000002617510363532303015505 0ustar muammarmuammar SUBROUTINE PCUNGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGR2 generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PCGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, LWMIN, MP, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLACGV, PCLARFC, PCLASET, PCSCAL, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia:ia+m-k-1 to rows of the unit matrix * CALL PCLASET( 'All', M-K, N-M, ZERO, ZERO, A, IA, JA, DESCA ) CALL PCLASET( 'All', M-K, M, ZERO, ONE, A, IA, JA+N-M, DESCA ) * END IF * TAUI = ZERO MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+M-K, IA+M-1 * * Apply H(i)' to A(ia:i,ja:ja+n-m+i-ia) from the right * CALL PCLACGV( I-IA+N-M, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCELSET( A, I, JA+N-M+I-IA, DESCA, ONE ) CALL PCLARFC( 'Right', I-IA, I-IA+N-M+1, A, I, JA, DESCA, $ DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, MP ) ) CALL PCSCAL( I-IA+N-M, -TAUI, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCLACGV( I-IA+N-M, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCELSET( A, I, JA+N-M+I-IA, DESCA, ONE-CONJG( TAUI ) ) * * Set A(i,ja+n-m+i-ia+1:ja+n-1) to zero * CALL PCLASET( 'All', 1, IA+M-1-I, ZERO, ZERO, A, I, $ JA+N-M+I-IA+1, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGR2 * END scalapack-1.8.0/SRC/psggqrf.f0000640000175000017500000003620510363532303015571 0ustar muammarmuammar SUBROUTINE PSGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PSGGQRF computes a generalized QR factorization of * an N-by-M matrix sub( A ) = A(IA:IA+N-1,JA:JA+M-1) and * an N-by-P matrix sub( B ) = B(IB:IB+N-1,JB:JB+P-1): * * sub( A ) = Q*R, sub( B ) = Q*T*Z, * * where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal * matrix, and R and T assume one of the forms: * * if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, * ( 0 ) N-M N M-N * M * * where R11 is upper triangular, and * * if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, * P-N N ( T21 ) P * P * * where T12 or T21 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GQR * factorization of sub( A ) and sub( B ) implicitly gives the QR * factorization of inv( sub( B ) )* sub( A ): * * inv( sub( B ) )*sub( A )= Z'*(inv(T)*R) * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( B ). N >= 0. * * M (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( B ). P >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+M-1)). * On entry, the local pieces of the N-by-M distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(N,M) by M * upper trapezoidal matrix R (R is upper triangular if N >= M); * the elements below the diagonal, with the array TAUA, * represent the orthogonal matrix Q as a product of min(N,M) * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) REAL, array, dimension * LOCc(JA+MIN(N,M)-1). This array contains the scalar factors * TAUA of the elementary reflectors which represent the * orthogonal matrix Q. TAUA is tied to the distributed matrix * A. (see Further Details). * * B (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+P-1)). * On entry, the local pieces of the N-by-P distributed matrix * sub( B ) which is to be factored. On exit, if N <= P, the * upper triangle of B(IB:IB+N-1,JB+P-N:JB+P-1) contains the * N by N upper triangular matrix T; if N > P, the elements on * and above the (N-P)-th subdiagonal contain the N by P upper * trapezoidal matrix T; the remaining elements, with the array * TAUB, represent the orthogonal matrix Z as a product of * elementary reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) REAL, array, dimension LOCr(IB+N-1) * This array contains the scalar factors of the elementary * reflectors which represent the orthogonal unitary matrix Z. * TAUB is tied to the distributed matrix B (see Further * Details). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB_A * ( NpA0 + MqA0 + NB_A ), * MAX( (NB_A*(NB_A-1))/2, (PqB0 + NpB0)*NB_A ) + * NB_A * NB_A, * MB_B * ( NpB0 + PqB0 + MB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * PqB0 = NUMROC( P+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(n,m). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a real scalar, and v is a real vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in * A(ia+i:ia+n-1,ja+i-1), and taua in TAUA(ja+i-1). * To form Q explicitly, use ScaLAPACK subroutine PSORGQR. * To use Q to update another matrix, use ScaLAPACK subroutine PSORMQR. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(ib) H(ib+1) . . . H(ib+k-1), where k = min(n,p). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a real scalar, and v is a real vector with * v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in * B(ib+n-k+i-1,jb:jb+p-k+i-2), and taub in TAUB(ib+n-k+i-1). * To form Z explicitly, use ScaLAPACK subroutine PSORGRQ. * To use Z to update another matrix, use ScaLAPACK subroutine PSORMRQ. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( MB_A.EQ.MB_B .AND. IROFFA.EQ.IROFFB .AND. IAROW.EQ.IBROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MQA0, MYCOL, $ MYROW, NPA0, NPB0, NPCOL, NPROW, PQB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSGEQRF, $ PSGERQF, PSORMQR, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( N, 1, M, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 1, P, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) PQB0 = NUMROC( P+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( NB_ ) * ( NPA0 + MQA0 + DESCA( NB_ ) ), $ MAX( MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( PQB0 + NPB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ), $ DESCB( MB_ ) * ( NPB0 + PQB0 + DESCB( MB_ ) ) ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IAROW.NE.IBROW .OR. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -1203 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( N, 1, M, 2, IA, JA, DESCA, 7, N, 1, P, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGGQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * QR factorization of N-by-M matrix sub( A ): sub( A ) = Q*R * CALL PSGEQRF( N, M, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := Q'*sub( B ). * CALL PSORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, IA, JA, $ DESCA, TAUA, B, IB, JB, DESCB, WORK, LWORK, INFO ) LWMIN = MIN( LWMIN, INT( WORK( 1 ) ) ) * * RQ factorization of N-by-P matrix sub( B ): sub( B ) = T*Z. * CALL PSGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = REAL( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * RETURN * * End of PSGGQRF * END scalapack-1.8.0/SRC/pssygst.f0000640000175000017500000004177410363532303015643 0ustar muammarmuammar* * SUBROUTINE PSSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSSYGST reduces a real symmetric-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**T or L**T*sub( A )*L. * * sub( B ) must have been previously factorized as U**T*U or L*L**T by * PSPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T); * = 2 or 3: compute U*sub( A )*U**T or L**T*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**T*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**T. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PSPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) REAL * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, HALF PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, K, KB, MYCOL, MYROW, NB, $ NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSSYGS2, $ PSSYMM, PSSYR2K, PSTRMM, PSTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * SCALE = ONE * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYGST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN * * Compute inv(U')*sub( A )*inv(U) * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 10 CONTINUE * * Update the upper triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PSSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, IB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PSTRSM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, $ N-K-KB+1, ONE, B, IB+K-1, JB+K-1, DESCB, A, $ IA+K-1, JA+K+KB-1, DESCA ) CALL PSSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, ONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PSSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, A, $ IA+K-1, JA+K+KB-1, DESCA, B, IB+K-1, $ JB+K+KB-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PSSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, ONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PSTRSM( 'Right', UPLO, 'No transpose', 'Non-unit', $ KB, N-K-KB+1, ONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 10 * ELSE * * Compute inv(L)*sub( A )*inv(L') * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 20 CONTINUE * * Update the lower triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PSSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PSTRSM( 'Right', UPLO, 'Transpose', 'Non-unit', $ N-K-KB+1, KB, ONE, B, IB+K-1, JB+K-1, DESCB, $ A, IA+K+KB-1, JA+K-1, DESCA ) CALL PSSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, ONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PSSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, -ONE, $ A, IA+K+KB-1, JA+K-1, DESCA, B, IB+K+KB-1, $ JB+K-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PSSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, ONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PSTRSM( 'Left', UPLO, 'No transpose', 'Non-unit', $ N-K-KB+1, KB, ONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 20 * END IF * ELSE * IF( UPPER ) THEN * * Compute U*sub( A )*U' * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 30 CONTINUE * * Update the upper triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PSTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', K-1, $ KB, ONE, B, IB, JB, DESCB, A, IA, JA+K-1, $ DESCA ) CALL PSSYMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA+K-1, DESCA ) CALL PSSYR2K( UPLO, 'No transpose', K-1, KB, ONE, A, IA, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA, DESCA ) CALL PSSYMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA+K-1, DESCA ) CALL PSTRMM( 'Right', UPLO, 'Transpose', 'Non-unit', K-1, $ KB, ONE, B, IB+K-1, JB+K-1, DESCB, A, IA, $ JA+K-1, DESCA ) CALL PSSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 30 * ELSE * * Compute L'*sub( A )*L * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 40 CONTINUE * * Update the lower triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PSTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', KB, $ K-1, ONE, B, IB, JB, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PSSYMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA+K-1, $ JA, DESCA ) CALL PSSYR2K( UPLO, 'Transpose', K-1, KB, ONE, A, IA+K-1, $ JA, DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA, $ JA, DESCA ) CALL PSSYMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA+K-1, $ JA, DESCA ) CALL PSTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, K-1, $ ONE, B, IB+K-1, JB+K-1, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PSSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 40 * END IF * END IF * RETURN * * End of PSSYGST * END scalapack-1.8.0/SRC/pclanhe.f0000640000175000017500000010243710363532303015533 0ustar muammarmuammar REAL FUNCTION PCLANHE( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLANHE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * complex hermitian distributed matrix sub(A) = A(IA:IA+N-1,JA:JA+N-1). * * PCLANHE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PCLANHE as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * hermitian matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PCLANHE is set to zero. N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the hermitian distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL ABSA, SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASSQ, PSCOL2ROW, $ PSTREECOMB, SAXPY, SCOMBSSQ, $ SGAMX2D, SGSUM2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL ICEIL, ISAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is Hermitian, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( MYCOL.EQ.IACOL ) THEN IF( JJ.LE.JJA+NQ-1 ) THEN VALUE = MAX( VALUE, $ ABS( REAL( A( K+(JJ-1)*LDA ) ) ) ) DO 30 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF ELSE IF( JJ.LE.JJA+NQ-1 ) THEN DO 35 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 35 CONTINUE END IF END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( MYCOL.EQ.ICURCOL ) THEN IF( JJ.LE.JJA+NQ-1 ) THEN VALUE = MAX( VALUE, $ ABS( REAL( A( K+(JJ-1)*LDA ) ) ) ) DO 70 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF ELSE IF( JJ.LE.JJA+NQ-1 ) THEN DO 75 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 75 CONTINUE END IF END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.IAROW ) THEN IF( II.LE.IIA+NP-1 ) THEN VALUE = MAX( VALUE, ABS( REAL( A( II+K ) ) ) ) DO 100 LL = II+1, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF ELSE IF( II.LE.IIA+NP-1 ) THEN DO 105 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 105 CONTINUE END IF END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.ICURROW ) THEN IF( II.LE.IIA+NP-1 ) THEN VALUE = MAX( VALUE, $ ABS( REAL( A( II+K ) ) ) ) DO 140 LL = II+1, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF ELSE IF( II.LE.IIA+NP-1 ) THEN DO 145 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 145 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * hermitian). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( MYCOL.EQ.IACOL ) THEN IF( JJA+NQ.GT.JJ ) THEN SUM = ABS( REAL( A( K+(JJ-1)*LDA ) ) ) DO 210 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF ELSE IF( JJA+NQ.GT.JJ ) THEN DO 215 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 215 CONTINUE END IF END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( JJA+NQ.GT.JJ ) THEN SUM = ABS( REAL( A( K+(JJ-1)*LDA ) ) ) DO 250 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF ELSE IF( JJA+NQ.GT.JJ ) THEN DO 255 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 255 CONTINUE END IF END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( MYROW.EQ.IAROW ) THEN IF( IIA+NP.GT.II ) THEN SUM = ABS( REAL( A( IOFFA+II ) ) ) DO 280 LL = II+1, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF ELSE DO 285 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 285 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( MYROW.EQ.ICURROW ) THEN IF( IIA+NP.GT.II ) THEN SUM = ABS( REAL( A( II+IOFFA ) ) ) DO 320 LL = II+1, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE ELSE IF( II.EQ.IIA+NP-1 ) THEN SUM = ABS( REAL( A( II+IOFFA ) ) ) END IF ELSE DO 325 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 325 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to SGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PSCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL SAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( ISAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) THEN IF( REAL( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( REAL( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) THEN IF( REAL( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( REAL( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM*( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.IAROW ) THEN IF( REAL( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( REAL( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.ICURROW ) THEN IF( REAL( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( REAL( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PCLANHE = VALUE * RETURN * * End of PCLANHE * END scalapack-1.8.0/SRC/pzgeequ.f0000640000175000017500000003312010363532303015571 0ustar muammarmuammar SUBROUTINE PZGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N DOUBLE PRECISION AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION C( * ), R( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZGEEQU computes row and column scalings intended to equilibrate an * M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and * reduce its condition number. R returns the row scale factors and C * the column scale factors, chosen to try to make the largest entry in * each row and column of the distributed matrix B with elements * B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1. * * R(i) and C(j) are restricted to be between SMLNUM = smallest safe * number and BIGNUM = largest safe number. Use of these scaling * factors is not guaranteed to reduce the condition number of * sub( A ) but works well in practice. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the * local pieces of the M-by-N distributed matrix whose * equilibration factors are to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local output) DOUBLE PRECISION array, dimension LOCr(M_A) * If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row * scale factors for sub( A ). R is aligned with the distributed * matrix A, and replicated across every process column. R is * tied to the distributed matrix A. * * C (local output) DOUBLE PRECISION array, dimension LOCc(N_A) * If INFO = 0, C(JA:JA+N-1) contains the column scale factors * for sub( A ). C is aligned with the distributed matrix A, and * replicated down every process row. C is tied to the distri- * buted matrix A. * * ROWCND (global output) DOUBLE PRECISION * If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of * the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1). * If ROWCND >= 0.1 and AMAX is neither too large nor too small, * it is not worth scaling by R(IA:IA+M-1). * * COLCND (global output) DOUBLE PRECISION * If INFO = 0, COLCND contains the ratio of the smallest C(j) * to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it * is not worth scaling by C(JA:JA+N-1). * * AMAX (global output) DOUBLE PRECISION * Absolute value of largest distributed matrix element. If * AMAX is very close to overflow or very close to underflow, * the matrix should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, and i is * <= M: the i-th row of the distributed matrix sub( A ) * is exactly zero, * > M: the (i-M)-th column of the distributed * matrix sub( A ) is exactly zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA, $ IOFFA, IROFF, J, JJA, LDA, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM COMPLEX*16 ZDUM * .. * .. Local Arrays .. INTEGER DESCC( DLEN_ ), DESCR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMN2D, $ DGAMX2D, IGAMX2D, INFOG2L, PCHK1MAT, PB_TOPGET, $ PXERBLA * .. * .. External Functions .. INTEGER INDXL2G, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXL2G, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, MOD * .. * .. Statement Functions .. DOUBLE PRECISION ZABS1 * .. * .. Statement Function definitions .. ZABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN ROWCND = ONE COLCND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Get machine constants and local indexes. * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Assign descriptors for R and C arrays * CALL DESCSET( DESCR, M, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Compute row scale factors. * DO 10 I = IIA, IIA+MP-1 R( I ) = ZERO 10 CONTINUE * * Find the maximum element in each row. * IOFFA = (JJA-1)*LDA DO 30 J = JJA, JJA+NQ-1 DO 20 I = IIA, IIA+MP-1 R( I ) = MAX( R( I ), ZABS1( A( IOFFA + I ) ) ) 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE CALL DGAMX2D( ICTXT, 'Rowwise', ROWCTOP, MP, 1, R( IIA ), $ MAX( 1, MP ), IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 40 I = IIA, IIA+MP-1 RCMAX = MAX( RCMAX, R( I ) ) RCMIN = MIN( RCMIN, R( I ) ) 40 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) AMAX = RCMAX * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 50 I = IIA, IIA+MP-1 IF( R( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = INDXL2G( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) - IA + 1 50 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 60 I = IIA, IIA+MP-1 R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) 60 CONTINUE * * Compute ROWCND = min(R(I)) / max(R(I)) * ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * * Compute column scale factors * DO 70 J = JJA, JJA+NQ-1 C( J ) = ZERO 70 CONTINUE * * Find the maximum element in each column, * assuming the row scaling computed above. * IOFFA = (JJA-1)*LDA DO 90 J = JJA, JJA+NQ-1 DO 80 I = IIA, IIA+MP-1 C( J ) = MAX( C( J ), ZABS1( A( IOFFA + I ) )*R( I ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, C( JJA ), $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 100 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 100 CONTINUE CALL DGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 110 J = JJA, JJA+NQ-1 IF( C( J ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = M + INDXL2G( J, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) - JA + 1 110 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 120 J = JJA, JJA+NQ-1 C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) 120 CONTINUE * * Compute COLCND = min(C(J)) / max(C(J)) * COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * RETURN * * End of PZGEEQU * END scalapack-1.8.0/SRC/pcgebd2.f0000640000175000017500000004354210363532303015430 0ustar muammarmuammar SUBROUTINE PCGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEBD2 reduces a complex general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an unitary transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * unitary matrix Q as a product of elementary reflectors, and * the elements above the first superdiagonal, with the array * TAUP, represent the orthogonal matrix P as a product of * elementary reflectors. If M < N, the diagonal and the first * subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the unitary matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MpA0, NqA0 ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ) * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J, $ JJ, K, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 COMPLEX ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CGEBR2D, $ CGEBS2D, CHK1MAT, CLARFG, DESCSET, INFOG2L, $ PCELSET, PCLACGV, PCLARF, PCLARFC, $ PCLARFG, PSELSET, PXERBLA, SGEBR2D, $ SGEBS2D * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Test the input parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = MAX( MPA0, NQA0 ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( M.EQ.1 .AND. N.EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN I = II+(JJ-1)*DESCA( LLD_ ) CALL CLARFG( 1, A( I ), A( I ), 1, TAUQ( JJ ) ) D( JJ ) = REAL( A( I ) ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1 ) CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1, IAROW, IACOL ) CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1, IAROW, IACOL ) END IF END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO RETURN END IF * ALPHA = ZERO * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, N I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector H(j) to annihilate * A(ia+i:ia+m-1,j) * CALL PCLARFG( M-K+1, ALPHA, I, J, A, MIN( I+1, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PSELSET( D, 1, J, DESCD, REAL( ALPHA ) ) CALL PCELSET( A, I, J, DESCA, ONE ) * * Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left * CALL PCLARFC( 'Left', M-K+1, N-K, A, I, J, DESCA, 1, TAUQ, $ A, I, J+1, DESCA, WORK ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( ALPHA ) ) ) * IF( K.LT.N ) THEN * * Generate elementary reflector G(i) to annihilate * A(i,ja+j+1:ja+n-1) * CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, JA+N-1 ), DESCA, DESCA( M_ ), $ TAUP ) CALL PSELSET( E, I, 1, DESCE, REAL( ALPHA ) ) CALL PCELSET( A, I, J+1, DESCA, ONE ) * * Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right * CALL PCLARF( 'Right', M-K, N-K, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP, A, I+1, J+1, DESCA, $ WORK ) CALL PCELSET( A, I, J+1, DESCA, CMPLX( REAL( ALPHA ) ) ) CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) ELSE CALL PCELSET( TAUP, I, 1, DESCE, ZERO ) END IF 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, M I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector G(i) to annihilate * A(i,ja+j:ja+n-1) * CALL PCLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) CALL PCLARFG( N-K+1, ALPHA, I, J, A, I, $ MIN( J+1, JA+N-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PSELSET( D, I, 1, DESCD, REAL( ALPHA ) ) CALL PCELSET( A, I, J, DESCA, ONE ) * * Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right * CALL PCLARF( 'Right', M-K, N-K+1, A, I, J, DESCA, $ DESCA( M_ ), TAUP, A, MIN( I+1, IA+M-1 ), J, $ DESCA, WORK ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( ALPHA ) ) ) CALL PCLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) * IF( K.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:ia+m-1,j) * CALL PCLARFG( M-K, ALPHA, I+1, J, A, $ MIN( I+2, IA+M-1 ), J, DESCA, 1, TAUQ ) CALL PSELSET( E, 1, J, DESCE, REAL( ALPHA ) ) CALL PCELSET( A, I+1, J, DESCA, ONE ) * * Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left * CALL PCLARFC( 'Left', M-K, N-K, A, I+1, J, DESCA, 1, $ TAUQ, A, I+1, J+1, DESCA, WORK ) CALL PCELSET( A, I+1, J, DESCA, CMPLX( REAL( ALPHA ) ) ) ELSE CALL PCELSET( TAUQ, 1, J, DESCE, ZERO ) END IF 20 CONTINUE END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEBD2 * END scalapack-1.8.0/SRC/slasrt2.f0000640000175000017500000001612710363532303015513 0ustar muammarmuammar* * SUBROUTINE SLASRT2( ID, N, D, KEY, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, N * .. * .. Array Arguments .. INTEGER KEY( * ) REAL D( * ) * .. * * Purpose * ======= * * Sort the numbers in D in increasing order (if ID = 'I') or * in decreasing order (if ID = 'D' ). * * Use Quick Sort, reverting to Insertion sort on arrays of * size <= 20. Dimension of STACK limits N to about 2**32. * * Arguments * ========= * * ID (input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. * * N (input) INTEGER * The length of the array D. * * D (input/output) REAL array, dimension (N) * On entry, the array to be sorted. * On exit, D has been sorted into increasing order * (D(1) <= ... <= D(N) ) or into decreasing order * (D(1) >= ... >= D(N) ), depending on ID. * * KEY (input/output) INTEGER array, dimension (N) * On entry, KEY contains a key to each of the entries in D() * Typically, KEY(I) = I for all I * On exit, KEY is permuted in exactly the same manner as * D() was permuted from input to output * Therefore, if KEY(I) = I for all I upon input, then * D_out(I) = D_in(KEY(I)) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. INTEGER SELECT PARAMETER ( SELECT = 20 ) * .. * .. Local Scalars .. INTEGER DIR, ENDD, I, J, START, STKPNT, TMPKEY REAL D1, D2, D3, DMNMX, TMP * .. * .. Local Arrays .. INTEGER STACK( 2, 32 ) * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Executable Statements .. * * Test the input paramters. * * INFO = 0 DIR = -1 IF( LSAME( ID, 'D' ) ) THEN DIR = 0 ELSE IF( LSAME( ID, 'I' ) ) THEN DIR = 1 END IF IF( DIR.EQ.-1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SLASRT2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.LE.1 ) $ RETURN * STKPNT = 1 STACK( 1, 1 ) = 1 STACK( 2, 1 ) = N 10 CONTINUE START = STACK( 1, STKPNT ) ENDD = STACK( 2, STKPNT ) STKPNT = STKPNT - 1 IF( ENDD-START.GT.0 ) THEN * * Do Insertion sort on D( START:ENDD ) * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * DO 30 I = START + 1, ENDD DO 20 J = I, START + 1, -1 IF( D( J ).GT.D( J-1 ) ) THEN DMNMX = D( J ) D( J ) = D( J-1 ) D( J-1 ) = DMNMX TMPKEY = KEY( J ) KEY( J ) = KEY( J-1 ) KEY( J-1 ) = TMPKEY ELSE GO TO 30 END IF 20 CONTINUE 30 CONTINUE * ELSE * * Sort into increasing order * DO 50 I = START + 1, ENDD DO 40 J = I, START + 1, -1 IF( D( J ).LT.D( J-1 ) ) THEN DMNMX = D( J ) D( J ) = D( J-1 ) D( J-1 ) = DMNMX TMPKEY = KEY( J ) KEY( J ) = KEY( J-1 ) KEY( J-1 ) = TMPKEY ELSE GO TO 50 END IF 40 CONTINUE 50 CONTINUE * END IF * ELSE IF( ENDD-START.GT.SELECT ) THEN * * Partition D( START:ENDD ) and stack parts, largest one first * * Choose partition entry as median of 3 * D1 = D( START ) D2 = D( ENDD ) I = ( START+ENDD ) / 2 D3 = D( I ) IF( D1.LT.D2 ) THEN IF( D3.LT.D1 ) THEN DMNMX = D1 ELSE IF( D3.LT.D2 ) THEN DMNMX = D3 ELSE DMNMX = D2 END IF ELSE IF( D3.LT.D2 ) THEN DMNMX = D2 ELSE IF( D3.LT.D1 ) THEN DMNMX = D3 ELSE DMNMX = D1 END IF END IF * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * I = START - 1 J = ENDD + 1 60 CONTINUE 70 CONTINUE J = J - 1 IF( D( J ).LT.DMNMX ) $ GO TO 70 80 CONTINUE I = I + 1 IF( D( I ).GT.DMNMX ) $ GO TO 80 IF( I.LT.J ) THEN TMP = D( I ) D( I ) = D( J ) D( J ) = TMP TMPKEY = KEY( J ) KEY( J ) = KEY( I ) KEY( I ) = TMPKEY GO TO 60 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF ELSE * * Sort into increasing order * I = START - 1 J = ENDD + 1 90 CONTINUE 100 CONTINUE J = J - 1 IF( D( J ).GT.DMNMX ) $ GO TO 100 110 CONTINUE I = I + 1 IF( D( I ).LT.DMNMX ) $ GO TO 110 IF( I.LT.J ) THEN TMP = D( I ) D( I ) = D( J ) D( J ) = TMP TMPKEY = KEY( J ) KEY( J ) = KEY( I ) KEY( I ) = TMPKEY GO TO 90 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF END IF END IF IF( STKPNT.GT.0 ) $ GO TO 10 * * RETURN * * End of SLASRT2 * END scalapack-1.8.0/SRC/pcdtsv.f0000640000175000017500000004636010363532303015426 0ustar muammarmuammar SUBROUTINE PCDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX B( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal diagonally dominant-like distributed * matrix. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PCDTTRF and PCDTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL+3*NB) * +max(10*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PCDTTRF, PCDTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PCDTTRF and PCDTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PCDTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL+3*NB) * * Factor the matrix * CALL PCDTTRF( N, DL, D, DU, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PCDTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PCDTTRS( 'N', N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCDTSV', -INFO ) RETURN END IF * RETURN * * End of PCDTSV * END scalapack-1.8.0/SRC/pzgerfs.f0000640000175000017500000010273610363532303015603 0ustar muammarmuammar SUBROUTINE PZGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, $ JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, $ LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX, $ LRWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ) DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PZGERFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**H * sub( X ) = sub( B ) * (Conjugate transpose) * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) COMPLEX*16 pointer into the local * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * factors of the matrix sub( A ) = P * L * U as computed by * PZGETRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF. * This array contains the pivoting information as computed * by PZGETRF. IPIV(i) -> The global row local row i * was swapped with. This array is tied to the distributed * matrix A. * * B (local input) COMPLEX*16 pointer into the local * memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local * pieces of the distributed matrix of right hand sides * sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input and output) COMPLEX*16 pointer into the * local memory to an array of local dimension * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains * the local pieces of the distributed matrix solution * sub( X ). On exit, the improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD(IA-1,MB_A) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD(IB-1,MB_B) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ZERO, RONE, TWO, THREE PARAMETER ( ZERO = 0.0D+0, RONE = 1.0D+0, TWO = 2.0D+0, $ THREE = 3.0D+0 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN CHARACTER TRANSN, TRANST INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX*16 ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ INFOG2L, PCHK2MAT, PXERBLA, PZAGEMV, PZAXPY, $ PZCOPY, PZGEMV, PZGETRS, PZLACON, $ ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, ICHAR, MAX, MIN, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * NOTRAN = LSAME( TRANS, 'N' ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 20, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2 * NPMOD LRWMIN = NPMOD WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( ( .NOT.NOTRAN ) .AND. ( .NOT.LSAME( TRANS, 'T' ) ) .AND. $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -14 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1600 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1600 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 2000 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -18 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 2000 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -19 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 2000 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -24 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -26 END IF END IF * IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 24 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 26 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 5, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, N, 2, NRHS, 3, $ IX, JX, DESCX, 20, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGERFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANSN = 'N' TRANST = 'C' ELSE TRANSN = 'C' TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PZCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, IX, $ JX+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PZAGEMV( TRANS, N, N, RONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PZGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PZAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PZGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PZGETRS( TRANSN, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PZCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, $ IX, J+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) + * abs(sub(B)))(i)) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PZAGEMV( TRANS, N, N, RONE, A, IA, JA, DESCA, X, IX, $ J+K, DESCX, 1, RONE, RWORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PZGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PZAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PZGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PZGETRS( TRANSN, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, $ INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, $ 1, IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) * RETURN * * End of PZGERFS * END scalapack-1.8.0/SRC/pslapv2.f0000640000175000017500000003672510363532303015516 0ustar muammarmuammar SUBROUTINE PSLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLAPV2 applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1), resulting in row or column pivoting. The * pivot vector should be aligned with the distributed matrix A. For * pivoting the rows of sub( A ), IPIV should be distributed along a * process column and replicated over all process rows. Similarly, * IPIV should be distributed along a process row and replicated over * all process columns for column pivoting. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P * sub( A ); * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P ) * sub( A ). * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this local array contains the local pieces of the * distributed matrix sub( A ) to which the row or columns * interchanges will be applied. On exit, this array contains * the local pieces of the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (input) INTEGER array, dimension >= LOCr(M_A)+MB_A if * ROWCOL = 'R', LOCc(N_A)+NB_A otherwise. It contains * the pivoting information. IPIV(i) is the global row (column), * local row (column) i was swapped with. The last piece of the * array of size MB_A (resp. NB_A) is used as workspace. IPIV is * tied to the distributed matrix A. * * IP (global input) INTEGER * IPIV's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JP (global input) INTEGER * IPIV's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCIP (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix IPIV. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL FORWRD, ROWPVT INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIP, IP1, ITMP, $ IPVWRK, J, JB, JJP, JP1, K, MA, MBA, MYCOL, $ MYROW, NBA, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBS2D, IGEBR2D, INFOG2L, $ PSSWAP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN END IF FORWRD = LSAME( DIREC, 'F' ) * * * Get grid and matrix parameters * MA = DESCA( M_ ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * IF( FORWRD ) THEN CALL INFOG2L( IP, JP, DESCIP, NPROW, NPCOL, MYROW, MYCOL, $ IIP, JJP, ICURROW, ICURCOL ) * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * Loop over rows of sub( A ) * I = IA IB = MIN( M, ICEIL( IA, MBA ) * MBA - IA + 1 ) 10 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP ), IB ) ITMP = IIP IIP = IIP + IB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( ITMP ), IB, ICURROW, MYCOL ) END IF * * Pivot the block of rows * DO 20 K = I, I+IB-1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PSSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP + 1 20 CONTINUE * * Go on to next row of processes, increment row counter, * and figure number of rows to pivot next * ICURROW = MOD( ICURROW+1, NPROW ) I = I + IB IB = MIN( MBA, M-I+IA ) IF( IB .GT. 0 ) GOTO 10 * * If I am pivoting the columns of sub( A ) * ELSE IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * Loop over columns of sub( A ) * J = JA JB = MIN( N, ICEIL( JA, NBA ) * NBA - JA + 1 ) 30 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP ), JB ) ITMP = JJP JJP = JJP + JB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( ITMP ), JB, MYROW, ICURCOL ) END IF * * Pivot the block of columns * DO 40 K = J, J+JB-1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PSSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP + 1 40 CONTINUE * * Go on to next column of processes, increment column * counter, and figure number of columns to pivot next * ICURCOL = MOD( ICURCOL+1, NPCOL ) J = J + JB JB = MIN( NBA, N-J+JA ) IF( JB .GT. 0 ) GOTO 30 END IF * * If I want to apply pivots in reverse order, i.e. reversing * pivoting done earlier. Thus this section computes * inv( P ) * sub( A ). * ELSE * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN CALL INFOG2L( IP+M-1, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) * IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * If I'm not in the current process row, my IIP points out * past end of pivot vector (since I don't own a piece of the * last row). Adjust IIP so it points at last pivot entry. * IF( MYROW.NE.ICURROW ) IIP = IIP - 1 * * Loop over rows in reverse order, starting at last row * I = IA + M - 1 IB = MOD( I, MBA ) IF( IB .EQ. 0 ) IB = MBA IB = MIN( IB, M ) 50 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN ITMP = IIP IIP = IIP - IB CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP+1 ), IB ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IPVWRK ), IB, ICURROW, MYCOL ) ITMP = IPVWRK + IB - 1 END IF * * Pivot the block of rows * DO 60 K = I, I-IB+1, -1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PSSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP - 1 60 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURROW = MOD( NPROW+ICURROW-1, NPROW ) I = I - IB IB = MIN( MBA, I-IA+1 ) IF( IB .GT. 0 ) GOTO 50 * * Otherwise, I'm pivoting the columns of sub( A ) * ELSE CALL INFOG2L( IP, JP+N-1, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * If I'm not in the current process column, my JJP points out * past end of pivot vector (since I don't own a piece of the * last column). Adjust JJP so it points at last pivot entry. * IF( MYCOL.NE.ICURCOL ) JJP = JJP - 1 * * Loop over columns in reverse order starting at last column * J = JA + N - 1 JB = MOD( J, NBA ) IF( JB .EQ. 0 ) JB = NBA JB = MIN( JB, N ) 70 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN ITMP = JJP JJP = JJP - JB CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP+1 ), JB ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( IPVWRK ), JB, MYROW, ICURCOL ) ITMP = IPVWRK + JB - 1 END IF * * Pivot a block of columns * DO 80 K = J, J-JB+1, -1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PSSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP - 1 80 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL ) J = J - JB JB = MIN( NBA, J-JA+1 ) IF( JB .GT. 0 ) GOTO 70 END IF * END IF * RETURN * * End PSLAPV2 * END scalapack-1.8.0/SRC/pclase2.f0000640000175000017500000003736310363532303015457 0ustar muammarmuammar SUBROUTINE PCLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N COMPLEX ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASE2 initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. PCLASE2 requires that only dimension of the matrix * operand is distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) COMPLEX * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) COMPLEX * The constant to which the diagonal elements are to be set. * * A (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, ICOFFA, II, IIA, $ IIBEG, IIEND, IINXT, ILEFT, IRIGHT, IROFFA, $ ITOP, JJ, JJA, JJBEG, JJEND, JJNXT, LDA, MBA, $ MP, MPA, MYCOL, MYDIST, MYROW, NBA, NPCOL, $ NPROW, NQ, NQA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASET, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MB_A * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MPA.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEG = IIA IIEND = IIA + MPA - 1 IINXT = MIN( ICEIL( IIBEG, MBA ) * MBA, IIEND ) * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL CLASET( UPLO, IINXT-IIBEG+1, N-ITOP, ALPHA, BETA, $ A( IIBEG+(JJA+ITOP-1)*LDA ), LDA ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEG = IINXT +1 IINXT = MIN( IINXT+MBA, IIEND ) GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL CLASET( 'All', MP, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL CLASET( UPLO, MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 30 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL CLASET( 'All', MPA, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( IIA+(JJ-1)*LDA ), LDA ) CALL CLASET( 'All', MPA-MP, HEIGHT, ALPHA, ALPHA, $ A( IIA+(JJA+ITOP-1)*LDA ), LDA ) CALL CLASET( 'All', MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 30 END IF * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQA.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEG = JJA JJEND = JJA + NQA - 1 JJNXT = MIN( ICEIL( JJBEG, NBA ) * NBA, JJEND ) * 40 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL CLASET( UPLO, M-ILEFT, JJNXT-JJBEG+1, ALPHA, $ BETA, A( IIA+ILEFT+(JJBEG-1)*LDA ), LDA ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBA, JJEND ) GO TO 40 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 50 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL CLASET( 'All', ILEFT-II+IIA, NQ, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL CLASET( UPLO, WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 50 END IF * ELSE * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 60 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL CLASET( 'All', ILEFT-II+IIA, NQA, ALPHA, ALPHA, $ A( II+(JJA-1)*LDA ), LDA ) CALL CLASET( 'All', WIDE, NQA-NQ, ALPHA, ALPHA, $ A( IIA+ILEFT+(JJA-1)*LDA ), LDA ) CALL CLASET( 'All', WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 60 END IF * END IF * END IF * END IF * RETURN * * End of PCLASE2 * END scalapack-1.8.0/SRC/pcpbsv.f0000640000175000017500000004527410363532303015423 0ustar muammarmuammar SUBROUTINE PCPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCPBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded symmetric positive definite distributed * matrix with bandwidth BW. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PCPBTRF and PCPBTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+2*bw)*bw * +max((bw*NRHS), bw*bw) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PCPBTRF, PCPBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PCPBTRF and PCPBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PCPBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+2*BW)*BW * * Factor the matrix * CALL PCPBTRF( UPLO, N, BW, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PCPBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PCPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPBSV', -INFO ) RETURN END IF * RETURN * * End of PCPBSV * END scalapack-1.8.0/SRC/zsteqr2.f0000640000175000017500000004457710363532303015545 0ustar muammarmuammar SUBROUTINE ZSTEQR2( COMPZ, N, D, E, Z, LDZ, NR, WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * November 15, 1997 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, LDZ, N, NR * .. * .. Array Arguments .. DOUBLE PRECISION D( * ), E( * ), WORK( * ) COMPLEX*16 Z( LDZ, * ) * .. * * Purpose * ======= * * ZSTEQR2 is a modified version of LAPACK routine ZSTEQR. * ZSTEQR2 computes all eigenvalues and, optionally, eigenvectors of a * symmetric tridiagonal matrix using the implicit QL or QR method. * ZSTEQR2 is modified from ZSTEQR to allow each ScaLAPACK process * running ZSTEQR2 to perform updates on a distributed matrix Q. * Proper usage of ZSTEQR2 can be gleaned from * examination of ScaLAPACK's * PZHEEV. * ZSTEQR2 incorporates changes attributed to Greg Henry. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. * = 'I': Compute eigenvalues and eigenvectors of the * tridiagonal matrix. Z must be initialized to the * identity matrix by PZLASET or ZLASET prior * to entering this subroutine. * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input/output) DOUBLE PRECISION array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in ascending order. * * E (input/output) DOUBLE PRECISION array, dimension (N-1) * On entry, the (n-1) subdiagonal elements of the tridiagonal * matrix. * On exit, E has been destroyed. * * Z (local input/local output) COMPLEX*16 array, global * dimension (N, N), local dimension (LDZ, NR). * On entry, if COMPZ = 'V', then Z contains the orthogonal * matrix used in the reduction to tridiagonal form. * On exit, if INFO = 0, then if COMPZ = 'V', Z contains the * orthonormal eigenvectors of the original symmetric matrix, * and if COMPZ = 'I', Z contains the orthonormal eigenvectors * of the symmetric tridiagonal matrix. * If COMPZ = 'N', then Z is not referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= 1, and if * eigenvectors are desired, then LDZ >= max(1,N). * * NR (input) INTEGER * NR = MAX(1, NUMROC( N, NB, MYPROW, 0, NPROCS ) ). * If COMPZ = 'N', then NR is not referenced. * * WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) * If COMPZ = 'N', then WORK is not referenced. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: the algorithm has failed to find all the eigenvalues in * a total of 30*N iterations; if INFO = i, then i * elements of E have not converged to zero; on exit, D * and E contain the elements of a symmetric tridiagonal * matrix which is orthogonally similar to the original * matrix. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, TWO, THREE, HALF PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, $ THREE = 3.0D0, HALF = 0.5D0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D0, 1.0D0 ) ) INTEGER MAXIT, NMAXLOOK PARAMETER ( MAXIT = 30, NMAXLOOK = 15 ) * .. * .. Local Scalars .. INTEGER I, ICOMPZ, II, ILAST, ISCALE, J, JTOT, K, L, $ L1, LEND, LENDM1, LENDP1, LENDSV, LM1, LSV, M, $ MM, MM1, NLOOK, NM1, NMAXIT DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, GP, OLDEL, OLDGP, $ OLDRP, P, R, RP, RT1, RT2, S, SAFMAX, SAFMIN, $ SSFMAX, SSFMIN, TST, TST1 * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 * .. * .. External Subroutines .. EXTERNAL DLAEV2, DLARTG, DLASCL, DSTERF, XERBLA, ZLASR, $ ZSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * ILAST = 0 INFO = 0 * IF( LSAME( COMPZ, 'N' ) ) THEN ICOMPZ = 0 ELSEIF( LSAME( COMPZ, 'I' ) ) THEN ICOMPZ = 1 ELSE ICOMPZ = -1 ENDIF IF( ICOMPZ.LT.0 ) THEN INFO = -1 ELSEIF( N.LT.0 ) THEN INFO = -2 ELSEIF( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, NR ) ) THEN INFO = -6 ENDIF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZSTEQR2', -INFO ) RETURN ENDIF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * If eigenvectors aren't not desired, this is faster * IF( ICOMPZ.EQ.0 ) THEN CALL DSTERF( N, D, E, INFO ) RETURN ENDIF * IF( N.EQ.1 ) THEN Z( 1, 1 ) = CONE RETURN ENDIF * * Determine the unit roundoff and over/underflow thresholds. * EPS = DLAMCH( 'E' ) EPS2 = EPS**2 SAFMIN = DLAMCH( 'S' ) SAFMAX = ONE / SAFMIN SSFMAX = SQRT( SAFMAX ) / THREE SSFMIN = SQRT( SAFMIN ) / EPS2 * * Compute the eigenvalues and eigenvectors of the tridiagonal * matrix. * NMAXIT = N*MAXIT JTOT = 0 * * Determine where the matrix splits and choose QL or QR iteration * for each block, according to whether top or bottom diagonal * element is smaller. * L1 = 1 NM1 = N - 1 * 10 CONTINUE IF( L1.GT.N ) $ GOTO 220 IF( L1.GT.1 ) $ E( L1-1 ) = ZERO IF( L1.LE.NM1 ) THEN DO 20 M = L1, NM1 TST = ABS( E( M ) ) IF( TST.EQ.ZERO ) $ GOTO 30 IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ $ 1 ) ) ) )*EPS ) THEN E( M ) = ZERO GOTO 30 ENDIF 20 CONTINUE ENDIF M = N * 30 CONTINUE L = L1 LSV = L LEND = M LENDSV = LEND L1 = M + 1 IF( LEND.EQ.L ) $ GOTO 10 * * Scale submatrix in rows and columns L to LEND * ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) ISCALE = 0 IF( ANORM.EQ.ZERO ) $ GOTO 10 IF( ANORM.GT.SSFMAX ) THEN ISCALE = 1 CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, $ INFO ) CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, $ INFO ) ELSEIF( ANORM.LT.SSFMIN ) THEN ISCALE = 2 CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, $ INFO ) CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, $ INFO ) ENDIF * * Choose between QL and QR iteration * IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN LEND = LSV L = LENDSV ENDIF * IF( LEND.GT.L ) THEN * * QL Iteration * * Look for small subdiagonal element. * 40 CONTINUE IF( L.NE.LEND ) THEN LENDM1 = LEND - 1 DO 50 M = L, LENDM1 TST = ABS( E( M ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ $ SAFMIN )GOTO 60 50 CONTINUE ENDIF * M = LEND * 60 CONTINUE IF( M.LT.LEND ) $ E( M ) = ZERO P = D( L ) IF( M.EQ.L ) $ GOTO 110 * * If remaining matrix is 2-by-2, use DLAE2 or DLAEV2 * to compute its eigensystem. * IF( M.EQ.L+1 ) THEN CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) WORK( L ) = C WORK( N-1+L ) = S CALL ZLASR( 'R', 'V', 'B', NR, 2, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) D( L ) = RT1 D( L+1 ) = RT2 E( L ) = ZERO L = L + 2 IF( L.LE.LEND ) $ GOTO 40 GOTO 200 ENDIF * IF( JTOT.EQ.NMAXIT ) $ GOTO 200 JTOT = JTOT + 1 * * Form shift. * G = ( D( L+1 )-P ) / ( TWO*E( L ) ) R = DLAPY2( G, ONE ) G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) * IF( ICOMPZ.EQ.0 ) THEN * Do not do a lookahead! GOTO 90 ENDIF * OLDEL = ABS( E( L ) ) GP = G RP = R TST = ABS( E( L ) )**2 TST = TST / ( ( EPS2*ABS( D( L ) ) )*ABS( D( L+1 ) )+SAFMIN ) * NLOOK = 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) THEN 70 CONTINUE * * This is the lookahead loop, going until we have * convergence or too many steps have been taken. * S = ONE C = ONE P = ZERO MM1 = M - 1 DO 80 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL DLARTG( GP, F, C, S, RP ) GP = D( I+1 ) - P RP = ( D( I )-GP )*S + TWO*C*B P = S*RP IF( I.NE.L ) $ GP = C*RP - B 80 CONTINUE OLDGP = GP OLDRP = RP * Find GP & RP for the next iteration IF( ABS( C*OLDRP-B ).GT.SAFMIN ) THEN GP = ( ( OLDGP+P )-( D( L )-P ) ) / ( TWO*( C*OLDRP-B ) ) ELSE * * Goto put in by G. Henry to fix ALPHA problem * GOTO 90 * GP = ( ( OLDGP+P )-( D( L )-P ) ) / * $ ( TWO*( C*OLDRP-B )+SAFMIN ) ENDIF RP = DLAPY2( GP, ONE ) GP = D( M ) - ( D( L )-P ) + $ ( ( C*OLDRP-B ) / ( GP+SIGN( RP, GP ) ) ) TST1 = TST TST = ABS( C*OLDRP-B )**2 TST = TST / ( ( EPS2*ABS( D( L )-P ) )*ABS( OLDGP+P )+ $ SAFMIN ) * Make sure that we are making progress IF( ABS( C*OLDRP-B ).GT.0.9D0*OLDEL ) THEN IF( ABS( C*OLDRP-B ).GT.OLDEL ) THEN GP = G RP = R ENDIF TST = HALF ELSE OLDEL = ABS( C*OLDRP-B ) ENDIF NLOOK = NLOOK + 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) $ GOTO 70 ENDIF * IF( ( TST.LE.ONE ) .AND. ( TST.NE.HALF ) .AND. $ ( ABS( P ).LT.EPS*ABS( D( L ) ) ) .AND. $ ( ILAST.EQ.L ) .AND. ( ABS( E( L ) )**2.LE.10000.0D0* $ ( ( EPS2*ABS( D( L ) ) )*ABS( D( L+1 ) )+SAFMIN ) ) ) THEN * * Skip the current step: the subdiagonal info is just noise. * M = L E( M ) = ZERO P = D( L ) JTOT = JTOT - 1 GOTO 110 ENDIF G = GP R = RP * * Lookahead over * 90 CONTINUE * S = ONE C = ONE P = ZERO * * Inner loop * MM1 = M - 1 DO 100 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL DLARTG( G, F, C, S, R ) IF( I.NE.M-1 ) $ E( I+1 ) = R G = D( I+1 ) - P R = ( D( I )-G )*S + TWO*C*B P = S*R D( I+1 ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * WORK( I ) = C WORK( N-1+I ) = -S * 100 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * MM = M - L + 1 CALL ZLASR( 'R', 'V', 'B', NR, MM, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) * D( L ) = D( L ) - P E( L ) = G ILAST = L GOTO 40 * * Eigenvalue found. * 110 CONTINUE D( L ) = P * L = L + 1 IF( L.LE.LEND ) $ GOTO 40 GOTO 200 * ELSE * * QR Iteration * * Look for small superdiagonal element. * 120 CONTINUE IF( L.NE.LEND ) THEN LENDP1 = LEND + 1 DO 130 M = L, LENDP1, -1 TST = ABS( E( M-1 ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ $ SAFMIN )GOTO 140 130 CONTINUE ENDIF * M = LEND * 140 CONTINUE IF( M.GT.LEND ) $ E( M-1 ) = ZERO P = D( L ) IF( M.EQ.L ) $ GOTO 190 * * If remaining matrix is 2-by-2, use DLAE2 or DLAEV2 * to compute its eigensystem. * IF( M.EQ.L-1 ) THEN CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) WORK( M ) = C WORK( N-1+M ) = S CALL ZLASR( 'R', 'V', 'F', NR, 2, WORK( M ), WORK( N-1+M ), $ Z( 1, L-1 ), LDZ ) D( L-1 ) = RT1 D( L ) = RT2 E( L-1 ) = ZERO L = L - 2 IF( L.GE.LEND ) $ GOTO 120 GOTO 200 ENDIF * IF( JTOT.EQ.NMAXIT ) $ GOTO 200 JTOT = JTOT + 1 * * Form shift. * G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) R = DLAPY2( G, ONE ) G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) * IF( ICOMPZ.EQ.0 ) THEN * Do not do a lookahead! GOTO 170 ENDIF * OLDEL = ABS( E( L-1 ) ) GP = G RP = R TST = ABS( E( L-1 ) )**2 TST = TST / ( ( EPS2*ABS( D( L ) ) )*ABS( D( L-1 ) )+SAFMIN ) NLOOK = 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) THEN 150 CONTINUE * * This is the lookahead loop, going until we have * convergence or too many steps have been taken. * S = ONE C = ONE P = ZERO * * Inner loop * LM1 = L - 1 DO 160 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL DLARTG( GP, F, C, S, RP ) GP = D( I ) - P RP = ( D( I+1 )-GP )*S + TWO*C*B P = S*RP IF( I.LT.LM1 ) $ GP = C*RP - B 160 CONTINUE OLDGP = GP OLDRP = RP * Find GP & RP for the next iteration IF( ABS( C*OLDRP-B ).GT.SAFMIN ) THEN GP = ( ( OLDGP+P )-( D( L )-P ) ) / ( TWO*( C*OLDRP-B ) ) ELSE * * Goto put in by G. Henry to fix ALPHA problem * GOTO 170 * GP = ( ( OLDGP+P )-( D( L )-P ) ) / * $ ( TWO*( C*OLDRP-B )+SAFMIN ) ENDIF RP = DLAPY2( GP, ONE ) GP = D( M ) - ( D( L )-P ) + $ ( ( C*OLDRP-B ) / ( GP+SIGN( RP, GP ) ) ) TST1 = TST TST = ABS( ( C*OLDRP-B ) )**2 TST = TST / ( ( EPS2*ABS( D( L )-P ) )*ABS( OLDGP+P )+ $ SAFMIN ) * Make sure that we are making progress IF( ABS( C*OLDRP-B ).GT.0.9D0*OLDEL ) THEN IF( ABS( C*OLDRP-B ).GT.OLDEL ) THEN GP = G RP = R ENDIF TST = HALF ELSE OLDEL = ABS( C*OLDRP-B ) ENDIF NLOOK = NLOOK + 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) $ GOTO 150 ENDIF IF( ( TST.LE.ONE ) .AND. ( TST.NE.HALF ) .AND. $ ( ABS( P ).LT.EPS*ABS( D( L ) ) ) .AND. $ ( ILAST.EQ.L ) .AND. ( ABS( E( L-1 ) )**2.LE.10000.0D0* $ ( ( EPS2*ABS( D( L-1 ) ) )*ABS( D( L ) )+SAFMIN ) ) ) THEN * * Skip the current step: the subdiagonal info is just noise. * M = L E( M-1 ) = ZERO P = D( L ) JTOT = JTOT - 1 GOTO 190 ENDIF * G = GP R = RP * * Lookahead over * 170 CONTINUE * S = ONE C = ONE P = ZERO DO 180 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL DLARTG( G, F, C, S, R ) IF( I.NE.M ) $ E( I-1 ) = R G = D( I ) - P R = ( D( I+1 )-G )*S + TWO*C*B P = S*R D( I ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * WORK( I ) = C WORK( N-1+I ) = S * 180 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * MM = L - M + 1 CALL ZLASR( 'R', 'V', 'F', NR, MM, WORK( M ), WORK( N-1+M ), $ Z( 1, M ), LDZ ) * D( L ) = D( L ) - P E( LM1 ) = G ILAST = L GOTO 120 * * Eigenvalue found. * 190 CONTINUE D( L ) = P * L = L - 1 IF( L.GE.LEND ) $ GOTO 120 GOTO 200 * ENDIF * * Undo scaling if necessary * 200 CONTINUE IF( ISCALE.EQ.1 ) THEN CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ELSEIF( ISCALE.EQ.2 ) THEN CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ENDIF * * Check for no convergence to an eigenvalue after a total * of N*MAXIT iterations. * IF( JTOT.LT.NMAXIT ) $ GOTO 10 DO 210 I = 1, N - 1 IF( E( I ).NE.ZERO ) $ INFO = INFO + 1 210 CONTINUE GOTO 250 * * Order eigenvalues and eigenvectors. * 220 CONTINUE * * Use Selection Sort to minimize swaps of eigenvectors * DO 240 II = 2, N I = II - 1 K = I P = D( I ) DO 230 J = II, N IF( D( J ).LT.P ) THEN K = J P = D( J ) ENDIF 230 CONTINUE IF( K.NE.I ) THEN D( K ) = D( I ) D( I ) = P CALL ZSWAP( NR, Z( 1, I ), 1, Z( 1, K ), 1 ) ENDIF 240 CONTINUE * 250 CONTINUE * WRITE( *, FMT = * )'JTOT', JTOT RETURN * * End of DSTEQR2 * END scalapack-1.8.0/SRC/pcdttrsv.f0000640000175000017500000014216210363532303015771 0ustar muammarmuammar SUBROUTINE PCDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PCDTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Gaussian elimination code PC@(dom_pre)TTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PCDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDTTRF and this is stored in AF. If a linear system * is to be solved using PCDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CGEMM, CGERV2D, CGESD2D, CLACPY, CMATADD, $ CTBTRS, CTRMM, CTRTRS, DESC_CONVERT, GLOBCHK, $ PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX CDOTC EXTERNAL CDOTC, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 12*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 12*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 12*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 12*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 12*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 12*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -4 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -8 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 9*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PCDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PCDTTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1205 PARAM_CHECK( 15, 2 ) = 1204 PARAM_CHECK( 14, 2 ) = 1203 PARAM_CHECK( 13, 2 ) = 1202 PARAM_CHECK( 12, 2 ) = 1201 PARAM_CHECK( 11, 2 ) = 11 PARAM_CHECK( 10, 2 ) = 905 PARAM_CHECK( 9, 2 ) = 904 PARAM_CHECK( 8, 2 ) = 903 PARAM_CHECK( 7, 2 ) = 901 PARAM_CHECK( 6, 2 ) = 8 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -DL( PART_OFFSET+ODD_SIZE+1 ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE, $ AF( 1 ), ODD_SIZE, B( PART_OFFSET+1 ), LLDB, $ CZERO, WORK( 1+INT_ONE-INT_ONE ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTBTRS( 'L', 'N', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTBTRS( 'L', 'C', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-INT_ONE ), INT_ONE, $ CONE, B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -CONJG( DL( PART_OFFSET+ODD_SIZE+1 ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -CONJG( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, CZERO, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTBTRS( 'U', 'C', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTBTRS( 'U', 'N', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, $ WORK( 1+INT_ONE-INT_ONE ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DU( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCDTTRSV * END scalapack-1.8.0/SRC/pctrcon.f0000640000175000017500000004041010363532303015561 0ustar muammarmuammar SUBROUTINE PCTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LRWORK, LWORK, N REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL RWORK( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCTRCON estimates the reciprocal of the condition number of a * triangular distributed matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm. * * The norm of A(IA:IA+N-1,JA:JA+N-1) is computed and an estimate is * obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), then the reciprocal * of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm; * = 'I': Infinity-norm. * * UPLO (global input) CHARACTER * = 'U': A(IA:IA+N-1,JA:JA+N-1) is upper triangular; * = 'L': A(IA:IA+N-1,JA:JA+N-1) is lower triangular. * * DIAG (global input) CHARACTER * = 'N': A(IA:IA+N-1,JA:JA+N-1) is non-unit triangular; * = 'U': A(IA:IA+N-1,JA:JA+N-1) is unit triangular. * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the triangular distributed * matrix A(IA:IA+N-1,JA:JA+N-1). If UPLO = 'U', the leading * N-by-N upper triangular part of this distributed matrix con- * tains the upper triangular matrix, and its strictly lower * triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of this ditributed * matrix contains the lower triangular matrix, and the strictly * upper triangular part is not referenced. If DIAG = 'U', the * diagonal elements of A(IA:IA+N-1,JA:JA+N-1) are also not * referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*CEIL(P-1,Q),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(Q-1,P)) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOUNIT, ONENRM, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPN, IPV, IPW, $ IPX, IROFF, IV, IX, IXX, JJA, JV, JX, KASE, $ KASE1, LRWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, $ NPMOD, NPROW, NQMOD REAL AINVNM, ANORM, SCALE, SMLNUM COMPLEX WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 5 ), $ IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT, $ DESCSET, INFOG2L, PCAMAX, PCHK1MAT, PCLATRS, $ PCLACON, PCSRSCL, PB_TOPGET, PB_TOPSET, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PCLANTR, PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PCLANTR, $ PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, ICHAR, MAX, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) NOUNIT = LSAME( DIAG, 'N' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LRWMIN = NQMOD RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 11 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 13 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 5, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTRCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * RCOND = ZERO SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' )*REAL( MAX( 1, N ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPN = 1 * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Compute the norm of the triangular matrix A. * ANORM = PCLANTR( NORM, UPLO, DIAG, N, N, A, IA, JA, DESCA, RWORK ) * * Continue only if ANORM > 0. * IF( ANORM.GT.ZERO ) THEN * * Estimate the norm of the inverse of A. * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 10 CONTINUE CALL PCLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), $ IX, JX, DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(A). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( UPLO, 'No transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, RWORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(A'). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, RWORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF NORMIN = 'Y' * * Multiply by 1/SCALE if doing so will not cause overflow. * IF( SCALE.NE.ONE ) THEN CALL PCAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, $ DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL CGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1 ) ELSE CALL CGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1, IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PCSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / ANORM ) / AINVNM END IF * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PCTRCON * END scalapack-1.8.0/SRC/pzptsv.f0000640000175000017500000004605510363532303015472 0ustar muammarmuammar SUBROUTINE PZPTSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 B( * ), E( * ), WORK( * ) DOUBLE PRECISION D( * ) * .. * * * Purpose * ======= * * PZPTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal symmetric positive definite distributed * matrix. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PZPTTRF and PZPTTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL + 3*NB) * +max((10+2*min(100,NRHS))*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PXERBLA, PZPTTRF, PZPTTRS * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PZPTTRF and PZPTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 5*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PZPTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL + 3*NB) * * Factor the matrix * CALL PZPTTRF( N, D, E, JA, DESCA, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR ), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PZPTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PZPTTRS( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPTSV', -INFO ) RETURN END IF * RETURN * * End of PZPTSV * END scalapack-1.8.0/SRC/pcgehd2.f0000640000175000017500000002677310363532303015445 0ustar muammarmuammar SUBROUTINE PCGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEHD2 reduces a complex general distributed matrix sub( A ) * to upper Hessenberg form H by an unitary similarity transformation: * Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+JLO-2 * and JA+JHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB + MAX( NpA0, NB ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on * exit in A(ia+ilo+i:ia+ihi-1,ja+ilo+i-2), and tau in TAU(ja+ilo+i-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follo- * wing example, with n = 7, ilo = 2 and ihi = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), h denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(ja+ilo+i-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IAROW, ICOFFA, ICTXT, IROFFA, J, K, LWMIN, $ MYCOL, MYROW, NPA0, NPCOL, NPROW COMPLEX AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLARF, PCLARFC, PCLARFG, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NPA0 = NUMROC( IHI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = DESCA( NB_ ) + MAX( NPA0, DESCA( NB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * DO 10 K = ILO, IHI-1 I = IA + K - 1 J = JA + K - 1 * * Compute elementary reflector H(j) to annihilate * A(i+2:ihi+ia-1,j) * CALL PCLARFG( IHI-K, AII, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PCELSET( A, I+1, J, DESCA, ONE ) * * Apply H(k) to A(ia:ihi+ia-1,j+1:ihi+ja-1) from the right * CALL PCLARF( 'Right', IHI, IHI-K, A, I+1, J, DESCA, 1, TAU, A, $ IA, J+1, DESCA, WORK ) * * Apply H(j) to A(i+1:ia+ihi-1,j+1:ja+n-1) from the left * CALL PCLARFC( 'Left', IHI-K, N-K, A, I+1, J, DESCA, 1, TAU, A, $ I+1, J+1, DESCA, WORK ) * CALL PCELSET( A, I+1, J, DESCA, AII ) 10 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEHD2 * END scalapack-1.8.0/SRC/pdpttrs.f0000640000175000017500000006433310363532303015623 0ustar muammarmuammar SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, $ LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION AF( * ), B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PDPTTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal symmetric positive definite distributed * matrix. * * Routine PDPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * IMPORTANT NOTE: The current version of this code supports * only IB=JA * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPTTRF and this is stored in AF. If a linear system * is to be solved using PDPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, PART_OFFSET, PART_SIZE, $ RETURN_CODE, STORE_M_B, STORE_N_A, TEMP, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 14, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ DSCAL, GLOBCHK, PDPTTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 5*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 8*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 8*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 8*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -12 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 5*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 8*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 8*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -2 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -4 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 5*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( 10+2*MIN( 100, NRHS ) )*NPCOL + 4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -12 CALL PXERBLA( ICTXT, 'PDPTTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 14, 1 ) = DESCB( 5 ) PARAM_CHECK( 13, 1 ) = DESCB( 4 ) PARAM_CHECK( 12, 1 ) = DESCB( 3 ) PARAM_CHECK( 11, 1 ) = DESCB( 2 ) PARAM_CHECK( 10, 1 ) = DESCB( 1 ) PARAM_CHECK( 9, 1 ) = IB PARAM_CHECK( 8, 1 ) = DESCA( 5 ) PARAM_CHECK( 7, 1 ) = DESCA( 4 ) PARAM_CHECK( 6, 1 ) = DESCA( 3 ) PARAM_CHECK( 5, 1 ) = DESCA( 1 ) PARAM_CHECK( 4, 1 ) = JA PARAM_CHECK( 3, 1 ) = NRHS PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 14, 2 ) = 905 PARAM_CHECK( 13, 2 ) = 904 PARAM_CHECK( 12, 2 ) = 903 PARAM_CHECK( 11, 2 ) = 902 PARAM_CHECK( 10, 2 ) = 901 PARAM_CHECK( 9, 2 ) = 8 PARAM_CHECK( 8, 2 ) = 505 PARAM_CHECK( 7, 2 ) = 504 PARAM_CHECK( 6, 2 ) = 503 PARAM_CHECK( 5, 2 ) = 501 PARAM_CHECK( 4, 2 ) = 4 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 12 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 14, PARAM_CHECK, 14, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 30 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * * CALL PDPTTRSV( 'L', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * * Divide by the main diagonal: B <- D^{-1} B * * The main partition is first * DO 10 I = PART_OFFSET + 1, PART_OFFSET + ODD_SIZE CALL DSCAL( NRHS, DBLE( ONE / D( I ) ), B( I ), LLDB ) 10 CONTINUE * * Reduced system is next * IF( MYCOL.LT.NPCOL-1 ) THEN I = PART_OFFSET + ODD_SIZE + 1 CALL DSCAL( NRHS, ONE / AF( ODD_SIZE+2 ), B( I ), LLDB ) END IF * * Call backsolve routine * * CALL PDPTTRSV( 'U', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) 20 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 30 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDPTTRS * END scalapack-1.8.0/SRC/pdsygs2.f0000640000175000017500000003634510363532303015520 0ustar muammarmuammar* * SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDSYGS2 reduces a real symmetric-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**T or L**T*sub( A )*L. * * sub( B ) must have been previously factorized as U**T*U or L*L**T by * PDPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T); * = 2 or 3: compute U*sub( A )*U**T or L**T*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**T*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**T. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PDPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, HALF PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IIA, IIB, IOFFA, IOFFB, IROFFA, IROFFB, $ JJA, JJB, K, LDA, LDB, MYCOL, MYROW, NPCOL, $ NPROW DOUBLE PRECISION AKK, BKK, CT * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GRIDINFO, CHK1MAT, DAXPY, $ DSCAL, DSYR2, DTRMV, DTRSV, INFOG2L, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( N+ICOFFA.GT.DESCA( NB_ ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) LDB = DESCB( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * IF( IBTYPE.EQ.1 ) THEN * IF( UPPER ) THEN * IOFFA = IIA + JJA*LDA IOFFB = IIB + JJB*LDB * * Compute inv(U')*sub( A )*inv(U) * DO 10 K = 1, N * * Update the upper triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = A( IOFFA-LDA ) BKK = B( IOFFB-LDB ) AKK = AKK / BKK**2 A( IOFFA-LDA ) = AKK IF( K.LT.N ) THEN CALL DSCAL( N-K, ONE / BKK, A( IOFFA ), LDA ) CT = -HALF*AKK CALL DAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL DSYR2( UPLO, N-K, -ONE, A( IOFFA ), LDA, $ B( IOFFB ), LDB, A( IOFFA+1 ), LDA ) CALL DAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K, $ B( IOFFB+1 ), LDB, A( IOFFA ), LDA ) END IF * * A( IOFFA ) -> A( K, K+1 ) * B( IOFFB ) -> B( K, K+1 ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 10 CONTINUE * ELSE * IOFFA = IIA + 1 + ( JJA-1 )*LDA IOFFB = IIB + 1 + ( JJB-1 )*LDB * * Compute inv(L)*sub( A )*inv(L') * DO 20 K = 1, N * * Update the lower triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = A( IOFFA-1 ) BKK = B( IOFFB-1 ) AKK = AKK / BKK**2 A( IOFFA-1 ) = AKK * IF( K.LT.N ) THEN CALL DSCAL( N-K, ONE / BKK, A( IOFFA ), 1 ) CT = -HALF*AKK CALL DAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL DSYR2( UPLO, N-K, -ONE, A( IOFFA ), 1, $ B( IOFFB ), 1, A( IOFFA+LDA ), LDA ) CALL DAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K, $ B( IOFFB+LDB ), LDB, A( IOFFA ), 1 ) END IF * * A( IOFFA ) -> A( K+1, K ) * B( IOFFB ) -> B( K+1, K ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 20 CONTINUE * END IF * ELSE * IF( UPPER ) THEN * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute U*sub( A )*U' * DO 30 K = 1, N * * Update the upper triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = A( IOFFA+K-1 ) BKK = B( IOFFB+K-1 ) CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), 1 ) CT = HALF*AKK CALL DAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL DSYR2( UPLO, K-1, ONE, A( IOFFA ), 1, B( IOFFB ), 1, $ A( IIA+( JJA-1 )*LDA ), LDA ) CALL DAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL DSCAL( K-1, BKK, A( IOFFA ), 1 ) A( IOFFA+K-1 ) = AKK*BKK**2 * * A( IOFFA ) -> A( 1, K ) * B( IOFFB ) -> B( 1, K ) * IOFFA = IOFFA + LDA IOFFB = IOFFB + LDB * 30 CONTINUE * ELSE * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute L'*sub( A )*L * DO 40 K = 1, N * * Update the lower triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = A( IOFFA+( K-1 )*LDA ) BKK = B( IOFFB+( K-1 )*LDB ) CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), $ LDA ) CT = HALF*AKK CALL DAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL DSYR2( UPLO, K-1, ONE, A( IOFFA ), LDA, B( IOFFB ), $ LDB, A( IIA+( JJA-1 )*LDA ), LDA ) CALL DAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL DSCAL( K-1, BKK, A( IOFFA ), LDA ) A( IOFFA+( K-1 )*LDA ) = AKK*BKK**2 * * A( IOFFA ) -> A( K, 1 ) * B( IOFFB ) -> B( K, 1 ) * IOFFA = IOFFA + 1 IOFFB = IOFFB + 1 * 40 CONTINUE * END IF * END IF * RETURN * * End of PDSYGS2 * END scalapack-1.8.0/SRC/pzpotrf.f0000640000175000017500000003165010363532303015623 0ustar muammarmuammar SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZPOTRF computes the Cholesky factorization of an N-by-N complex * hermitian positive definite distributed matrix sub( A ) denoting * A(IA:IA+N-1, JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZPOTF2, PZHERK, $ PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( UPPER ) THEN * * Split-ring topology for the communication along process * columns, 1-tree topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * * A is upper triangular, compute Cholesky factorization A = U'*U. * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA(NB_), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PZPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PZTRSM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-Unit', JB, N-JB, CONE, A, IA, JA, DESCA, $ A, IA, JA+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PZHERK( UPLO, 'Conjugate transpose', N-JB, JB, -ONE, A, $ IA, JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PZPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PZTRSM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-Unit', JB, N-J-JB+JA, CONE, A, I, J, $ DESCA, A, I, J+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PZHERK( UPLO, 'Conjugate transpose', N-J-JB+JA, JB, $ -ONE, A, I, J+JB, DESCA, ONE, A, I+JB, $ J+JB, DESCA ) END IF 10 CONTINUE * ELSE * * 1-tree topology for the communication along process columns, * Split-ring topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * A is lower triangular, compute Cholesky factorization A = L*L' * (right-looking) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PZPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PZTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-Unit', N-JB, JB, CONE, A, IA, JA, DESCA, $ A, IA+JB, JA, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PZHERK( UPLO, 'No Transpose', N-JB, JB, -ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * END IF * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PZPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PZTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-Unit', N-J-JB+JA, JB, CONE, A, I, J, $ DESCA, A, I+JB, J, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PZHERK( UPLO, 'No Transpose', N-J-JB+JA, JB, -ONE, $ A, I+JB, J, DESCA, ONE, A, I+JB, J+JB, $ DESCA ) * END IF 20 CONTINUE * END IF * 30 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PZPOTRF * END scalapack-1.8.0/SRC/pcunglq.f0000640000175000017500000003076710363532303015600 0ustar muammarmuammar SUBROUTINE PCUNGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGLQ generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PCGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PCGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ J, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCLARFB, $ PCLARFT, PCLASET, PCUNGL2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL PCLASET( 'All', IA+M-IL, IL-IA, ZERO, ZERO, A, IL, JA, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PCUNGL2( IA+M-IL, N-IL+IA, IA+K-IL, A, IL, JA+IL-IA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of rows to loop over ? * IF( IL.GT.IN+1 ) THEN * * Use blocked code * DO 10 I = IL-DESCA( MB_ ), IN+1, -DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) J = JA + I - IA * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PCLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-I-IB+IA, N-I+IA, IB, A, I, J, $ DESCA, WORK, A, I+IB, J, DESCA, $ WORK( IPW ) ) END IF * * Apply H' to columns j:ja+n-1 of current block * CALL PCUNGL2( IB, N-I+IA, IB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set columns ia:i-1 of current block to zero * CALL PCLASET( 'All', IB, I-IA, ZERO, ZERO, A, I, JA, DESCA ) 10 CONTINUE * END IF * * Handle first block separately * IF( IL.GT.IA ) THEN * IB = IN - IA + 1 * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PCLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-IB, N, IB, A, IA, JA, DESCA, WORK, $ A, IA+IB, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-1 of current block * CALL PCUNGL2( IB, N, IB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGLQ * END scalapack-1.8.0/SRC/pdormhr.f0000640000175000017500000003650210363532303015573 0ustar muammarmuammar SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMHR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of IHI-ILO elementary reflectors, as returned by PDGEHRD: * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * ILO and IHI must have the same values as in the previous call * of PDGEHRD. Q is equal to the unit matrix except in the * distributed submatrix Q(ia+ilo:ia+ihi-1,ia+ilo:ja+ihi-1). * If SIDE = 'L', 1 <= ILO <= IHI <= max(1,M); * if SIDE = 'R', 1 <= ILO <= IHI <= max(1,N); * ILO and IHI are relative indexes. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PDGEHRD. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+M-2) * if SIDE = 'L', and LOCc(JA+N-2) if SIDE = 'R'. This array * contains the scalar factors TAU(j) of the elementary * reflectors H(j) as returned by PDGEHRD. TAU is tied to * the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * IAA = IA + ILO; JAA = JA+ILO-1; * If SIDE = 'L', * MI = IHI-ILO; NI = N; ICC = IC + ILO; JCC = JC; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; NI = IHI-ILO; ICC = IC; JCC = JC + ILO; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NH, NI, NPA0, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDORMQR, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 NH = IHI - ILO IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) IAA = IA + ILO JAA = JA + ILO - 1 * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = NH NI = N ICC = IC + ILO JCC = JC CALL CHK1MAT( M, 3, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N MI = M NI = NH ICC = IC JCC = JC + ILO CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN INFO = -5 ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN INFO = -6 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = ILO IDUM2( 3 ) = 5 IDUM1( 4 ) = IHI IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMHR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) $ RETURN * CALL PDORMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMHR * END scalapack-1.8.0/SRC/psgeequ.f0000640000175000017500000003241210363532303015565 0ustar muammarmuammar SUBROUTINE PSGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N REAL AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), C( * ), R( * ) * .. * * Purpose * ======= * * PSGEEQU computes row and column scalings intended to equilibrate an * M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and * reduce its condition number. R returns the row scale factors and C * the column scale factors, chosen to try to make the largest entry in * each row and column of the distributed matrix B with elements * B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1. * * R(i) and C(j) are restricted to be between SMLNUM = smallest safe * number and BIGNUM = largest safe number. Use of these scaling * factors is not guaranteed to reduce the condition number of * sub( A ) but works well in practice. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the * local pieces of the M-by-N distributed matrix whose * equilibration factors are to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local output) REAL array, dimension LOCr(M_A) * If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row * scale factors for sub( A ). R is aligned with the distributed * matrix A, and replicated across every process column. R is * tied to the distributed matrix A. * * C (local output) REAL array, dimension LOCc(N_A) * If INFO = 0, C(JA:JA+N-1) contains the column scale factors * for sub( A ). C is aligned with the distributed matrix A, and * replicated down every process row. C is tied to the distri- * buted matrix A. * * ROWCND (global output) REAL * If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of * the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1). * If ROWCND >= 0.1 and AMAX is neither too large nor too small, * it is not worth scaling by R(IA:IA+M-1). * * COLCND (global output) REAL * If INFO = 0, COLCND contains the ratio of the smallest C(j) * to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it * is not worth scaling by C(JA:JA+N-1). * * AMAX (global output) REAL * Absolute value of largest distributed matrix element. If * AMAX is very close to overflow or very close to underflow, * the matrix should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, and i is * <= M: the i-th row of the distributed matrix sub( A ) * is exactly zero, * > M: the (i-M)-th column of the distributed * matrix sub( A ) is exactly zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA, $ IOFFA, IROFF, J, JJA, LDA, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ REAL BIGNUM, RCMAX, RCMIN, SMLNUM * .. * .. Local Arrays .. INTEGER DESCC( DLEN_ ), DESCR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGAMX2D, $ INFOG2L, PCHK1MAT, PB_TOPGET, PXERBLA, SGAMN2D, $ SGAMX2D * .. * .. External Functions .. INTEGER INDXL2G, NUMROC REAL PSLAMCH EXTERNAL INDXL2G, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN ROWCND = ONE COLCND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Get machine constants and local indexes. * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Assign descriptors for R and C arrays * CALL DESCSET( DESCR, M, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Compute row scale factors. * DO 10 I = IIA, IIA+MP-1 R( I ) = ZERO 10 CONTINUE * * Find the maximum element in each row. * IOFFA = (JJA-1)*LDA DO 30 J = JJA, JJA+NQ-1 DO 20 I = IIA, IIA+MP-1 R( I ) = MAX( R( I ), ABS( A( IOFFA + I ) ) ) 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE CALL SGAMX2D( ICTXT, 'Rowwise', ROWCTOP, MP, 1, R( IIA ), $ MAX( 1, MP ), IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 40 I = IIA, IIA+MP-1 RCMAX = MAX( RCMAX, R( I ) ) RCMIN = MIN( RCMIN, R( I ) ) 40 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) AMAX = RCMAX * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 50 I = IIA, IIA+MP-1 IF( R( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = INDXL2G( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) - IA + 1 50 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 60 I = IIA, IIA+MP-1 R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) 60 CONTINUE * * Compute ROWCND = min(R(I)) / max(R(I)) * ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * * Compute column scale factors * DO 70 J = JJA, JJA+NQ-1 C( J ) = ZERO 70 CONTINUE * * Find the maximum element in each column, * assuming the row scaling computed above. * IOFFA = (JJA-1)*LDA DO 90 J = JJA, JJA+NQ-1 DO 80 I = IIA, IIA+MP-1 C( J ) = MAX( C( J ), ABS( A( IOFFA + I ) )*R( I ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, C( JJA ), $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 100 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 100 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 110 J = JJA, JJA+NQ-1 IF( C( J ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = M + INDXL2G( J, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) - JA + 1 110 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 120 J = JJA, JJA+NQ-1 C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) 120 CONTINUE * * Compute COLCND = min(C(J)) / max(C(J)) * COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * RETURN * * End of PSGEEQU * END scalapack-1.8.0/SRC/pzlase2.f0000640000175000017500000003737410363532303015510 0ustar muammarmuammar SUBROUTINE PZLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASE2 initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. PZLASE2 requires that only dimension of the matrix * operand is distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) COMPLEX*16 * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) COMPLEX*16 * The constant to which the diagonal elements are to be set. * * A (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, ICOFFA, II, IIA, $ IIBEG, IIEND, IINXT, ILEFT, IRIGHT, IROFFA, $ ITOP, JJ, JJA, JJBEG, JJEND, JJNXT, LDA, MBA, $ MP, MPA, MYCOL, MYDIST, MYROW, NBA, NPCOL, $ NPROW, NQ, NQA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MB_A * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MPA.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEG = IIA IIEND = IIA + MPA - 1 IINXT = MIN( ICEIL( IIBEG, MBA ) * MBA, IIEND ) * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL ZLASET( UPLO, IINXT-IIBEG+1, N-ITOP, ALPHA, BETA, $ A( IIBEG+(JJA+ITOP-1)*LDA ), LDA ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEG = IINXT +1 IINXT = MIN( IINXT+MBA, IIEND ) GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL ZLASET( 'All', MP, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL ZLASET( UPLO, MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 30 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL ZLASET( 'All', MPA, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( IIA+(JJ-1)*LDA ), LDA ) CALL ZLASET( 'All', MPA-MP, HEIGHT, ALPHA, ALPHA, $ A( IIA+(JJA+ITOP-1)*LDA ), LDA ) CALL ZLASET( 'All', MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 30 END IF * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQA.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEG = JJA JJEND = JJA + NQA - 1 JJNXT = MIN( ICEIL( JJBEG, NBA ) * NBA, JJEND ) * 40 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL ZLASET( UPLO, M-ILEFT, JJNXT-JJBEG+1, ALPHA, $ BETA, A( IIA+ILEFT+(JJBEG-1)*LDA ), LDA ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBA, JJEND ) GO TO 40 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 50 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL ZLASET( 'All', ILEFT-II+IIA, NQ, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL ZLASET( UPLO, WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 50 END IF * ELSE * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 60 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL ZLASET( 'All', ILEFT-II+IIA, NQA, ALPHA, ALPHA, $ A( II+(JJA-1)*LDA ), LDA ) CALL ZLASET( 'All', WIDE, NQA-NQ, ALPHA, ALPHA, $ A( IIA+ILEFT+(JJA-1)*LDA ), LDA ) CALL ZLASET( 'All', WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 60 END IF * END IF * END IF * END IF * RETURN * * End of PZLASE2 * END scalapack-1.8.0/SRC/Makefile0000640000175000017500000002345710370215332015414 0ustar muammarmuammar############################################################################ # # Program: ScaLAPACK # # Module: Makefile # # Purpose: SRC Makefile # # Creation date: March 20, 1995 # # Modified: # # Send bug reports, comments or suggestions to scalapack@cs.utk.edu # ############################################################################ include ../SLmake.inc ####################################################################### # This is the makefile to create a library for ScaLAPACK. # The files are organized as follows: # ALLAUX -- Auxiliary routines called from all precisions # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX # DZLAUX -- Auxiliary routines called from both DOUBLE PRECISION # and COMPLEX*16 # SLASRC -- Single precision real ScaLAPACK routines # CLASRC -- Single precision complex ScaLAPACK routines # DLASRC -- Double precision real ScaLAPACK routines # ZLASRC -- Double precision complex ScaLAPACK routines # # The library can be set up to include routines for any combination # of the four precisions. First, modify the ARCH, ARCHFLAGS, RANLIB, # F77 and F77FLAGS definitions in ../SLmake.inc to match your library # archiver, compiler and the options to be used. # Then to create or add to the library, enter make followed by one or # more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # The name of the library is defined by $(SCALAPACKLIB) in # ../SLmake.inc and is created at the next higher directory level. # # To remove the object files after the library is created, enter # make clean # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC # ####################################################################### ALLAUX = pjlaenv.o SCLAUX = \ pslabad.o pslaed0.o pslaed1.o pslaed2.o pslaed3.o pslaedz.o pslaiect.o \ pslamch.o pslared1d.o pslasrt.o psstebz.o psstedc.o slapst.o slasrt2.o \ sstein2.o pbchkvect.o getpbbuf.o DZLAUX = \ pdlabad.o pdlaed0.o pdlaed1.o pdlaed2.o pdlaed3.o pdlaedz.o pdlaiect.o \ pdlamch.o pdlared1d.o pdlasrt.o pdstebz.o pdstedc.o dlapst.o dlasrt2.o \ dstein2.o pbchkvect.o getpbbuf.o SLASRC = \ psdbsv.o psdbtrf.o psdbtrs.o psdbtrsv.o \ psdtsv.o psdttrf.o psdttrs.o psdttrsv.o \ psgbsv.o psgbtrf.o psgbtrs.o \ psgebd2.o psgebrd.o psgecon.o psgeequ.o psgehd2.o psgehrd.o \ psgelq2.o psgelqf.o psgels.o psgeql2.o psgeqlf.o psgeqpf.o psgeqr2.o \ psgeqrf.o psgerfs.o psgerq2.o psgerqf.o \ psgesv.o psgesvd.o psgesvx.o psgetf2.o psgetrf.o psgetri.o psgetrs.o \ psggqrf.o psggrqf.o pslabrd.o \ pslacon.o pslacp2.o pslacpy.o pslahrd.o pslange.o pslanhs.o pslansy.o \ pslantr.o pslapiv.o pslapv2.o pslaqge.o pslaqsy.o pslarf.o pslarfb.o \ pslarfg.o pslarft.o pslase2.o pslaset.o pslascl.o pslassq.o pslaswp.o \ pslatra.o pslatrd.o pslatrs.o pslauu2.o pslauum.o psorg2l.o psorg2r.o \ psorgl2.o psorglq.o psorgql.o psorgqr.o psorgr2.o \ psorgrq.o psorm2l.o psorm2r.o psormbr.o psormhr.o psorml2.o \ psormlq.o psormql.o psormqr.o psormr2.o psormrq.o psormtr.o pspocon.o \ pspbsv.o pspbtrf.o pspbtrs.o pspbtrsv.o \ psptsv.o pspttrf.o pspttrs.o pspttrsv.o \ pspoequ.o psporfs.o psposv.o psposvx.o pspotf2.o pspotrf.o pspotri.o \ pspotrs.o psrscl.o psstein.o pssyev.o pssyevd.o pssyevx.o \ pssygs2.o pssygst.o \ pssygvx.o pssyngst.o pssyntrd.o pssyttrd.o pssytd2.o pssytrd.o \ pstrti2.o pstrtri.o pstrtrs.o \ pslaevswp.o \ pslarzb.o pslarzt.o pslarz.o pslatrz.o pstzrzf.o psormr3.o psormrz.o \ pslahqr.o pslaconsb.o pslacp3.o pslawil.o \ pslasmsub.o pslared2d.o pslamr1d.o slaref.o slamsh.o slasorte.o ssteqr2.o \ sdbtf2.o sdbtrf.o sdttrf.o sdttrsv.o spttrsv.o strmvt.o CLASRC = \ pcdbsv.o pcdbtrf.o pcdbtrs.o pcdbtrsv.o \ pcdtsv.o pcdttrf.o pcdttrs.o pcdttrsv.o \ pcgbsv.o pcgbtrf.o pcgbtrs.o \ pcgebd2.o pcgebrd.o pcgecon.o pcgeequ.o pcgehd2.o pcgehrd.o \ pcgelq2.o pcgelqf.o pcgels.o pcgeql2.o pcgeqlf.o pcgeqpf.o pcgeqr2.o \ pcgeqrf.o pcgerfs.o pcgerq2.o pcgerqf.o \ pcgesv.o pcgesvd.o pcgesvx.o pcgetf2.o pcgetrf.o pcgetri.o pcgetrs.o \ pcggqrf.o \ pcggrqf.o pcheev.o pcheevd.o pcheevx.o pchegs2.o pchegst.o pchegvx.o \ pchengst.o pchentrd.o pchettrd.o pchetd2.o \ pchetrd.o pclabrd.o pclacon.o pclacgv.o pclacp2.o pclacpy.o pclahrd.o \ pclahqr.o pclaconsb.o pclasmsub.o pclacp3.o pclawil.o pcrot.o \ pclange.o pclanhe.o pclanhs.o pclansy.o pclantr.o pclapiv.o pclapv2.o \ pclaqge.o pclaqsy.o pclarf.o pclarfb.o pclarfc.o pclarfg.o pclarft.o \ pclascl.o pclase2.o pclaset.o pclassq.o pclaswp.o pclatra.o pclatrd.o \ pclatrs.o pclauu2.o pclauum.o pcpocon.o pcpoequ.o pcporfs.o pcposv.o \ pcpbsv.o pcpbtrf.o pcpbtrs.o pcpbtrsv.o \ pcptsv.o pcpttrf.o pcpttrs.o pcpttrsv.o \ pcposvx.o pcpotf2.o pcpotrf.o pcpotri.o pcpotrs.o pcsrscl.o pcstein.o \ pctrevc.o pctrti2.o pctrtri.o pctrtrs.o pcung2l.o pcung2r.o \ pcungl2.o pcunglq.o pcungql.o pcungqr.o pcungr2.o pcungrq.o \ pcunm2l.o pcunm2r.o pcunmbr.o pcunmhr.o pcunml2.o pcunmlq.o \ pcunmql.o pcunmqr.o pcunmr2.o pcunmrq.o pcunmtr.o \ pclaevswp.o \ pclarzb.o pclarzt.o pclarz.o pclarzc.o pclatrz.o pctzrzf.o \ pclattrs.o \ pcunmr3.o pcunmrz.o pcmax1.o pscsum1.o pclamr1d.o \ cdbtf2.o cdbtrf.o cdttrf.o cdttrsv.o cpttrsv.o csteqr2.o ctrmvt.o \ clamsh.o claref.o clanv2.o clahqr2.o DLASRC = \ pddbsv.o pddbtrf.o pddbtrs.o pddbtrsv.o \ pddtsv.o pddttrf.o pddttrs.o pddttrsv.o \ pdgbsv.o pdgbtrf.o pdgbtrs.o \ pdgebd2.o pdgebrd.o pdgecon.o pdgeequ.o pdgehd2.o pdgehrd.o \ pdgelq2.o pdgelqf.o pdgels.o pdgeql2.o pdgeqlf.o pdgeqpf.o pdgeqr2.o \ pdgeqrf.o pdgerfs.o pdgerq2.o pdgerqf.o \ pdgesv.o pdgesvd.o pdgesvx.o pdgetf2.o pdgetrf.o pdgetri.o pdgetrs.o \ pdggqrf.o pdggrqf.o pdlabrd.o \ pdlacon.o pdlacp2.o pdlacpy.o pdlahrd.o pdlange.o pdlanhs.o pdlansy.o \ pdlantr.o pdlapiv.o pdlapv2.o pdlaqge.o pdlaqsy.o pdlarf.o pdlarfb.o \ pdlarfg.o pdlarft.o pdlase2.o pdlaset.o pdlascl.o pdlassq.o pdlaswp.o \ pdlatra.o pdlatrd.o pdlatrs.o pdlauu2.o pdlauum.o pdorg2l.o pdorg2r.o \ pdorgl2.o pdorglq.o pdorgql.o pdorgqr.o pdorgr2.o \ pdorgrq.o pdorm2l.o pdorm2r.o pdormbr.o pdormhr.o pdorml2.o \ pdormlq.o pdormql.o pdormqr.o pdormr2.o pdormrq.o pdormtr.o pdpocon.o \ pdpbsv.o pdpbtrf.o pdpbtrs.o pdpbtrsv.o \ pdptsv.o pdpttrf.o pdpttrs.o pdpttrsv.o \ pdpoequ.o pdporfs.o pdposv.o pdposvx.o pdpotf2.o pdpotrf.o pdpotri.o \ pdpotrs.o pdrscl.o pdstein.o pdsyev.o pdsyevd.o pdsyevx.o \ pdsygs2.o pdsygst.o \ pdsygvx.o pdsyngst.o pdsyntrd.o pdsyttrd.o pdsytd2.o pdsytrd.o pdtrti2.o \ pdtrtri.o pdtrtrs.o \ pdlaevswp.o \ pdlarzb.o pdlarzt.o pdlarz.o pdlatrz.o pdtzrzf.o pdormr3.o pdormrz.o \ pdlahqr.o pdlaconsb.o pdlacp3.o pdlawil.o \ pdlasmsub.o pdlared2d.o pdlamr1d.o dlaref.o dlamsh.o dlasorte.o dsteqr2.o \ ddbtf2.o ddbtrf.o ddttrf.o ddttrsv.o dpttrsv.o dtrmvt.o ZLASRC = \ pzdbsv.o pzdbtrf.o pzdbtrs.o pzdbtrsv.o \ pzdtsv.o pzdttrf.o pzdttrs.o pzdttrsv.o \ pzgbsv.o pzgbtrf.o pzgbtrs.o \ pzgebd2.o pzgebrd.o pzgecon.o pzgeequ.o pzgehd2.o pzgehrd.o \ pzgelq2.o pzgelqf.o pzgels.o pzgeql2.o pzgeqlf.o pzgeqpf.o pzgeqr2.o \ pzgeqrf.o pzgerfs.o pzgerq2.o pzgerqf.o \ pzgesv.o pzgesvd.o pzgesvx.o pzgetf2.o pzgetrf.o pzgetri.o pzgetrs.o \ pzggqrf.o \ pzggrqf.o pzheev.o pzheevd.o pzheevx.o pzhegs2.o pzhegst.o pzhegvx.o \ pzhengst.o pzhentrd.o pzhettrd.o pzhetd2.o \ pzhetrd.o pzlabrd.o pzlacon.o pzlacgv.o pzlacp2.o pzlacpy.o pzlahrd.o \ pzlahqr.o pzlaconsb.o pzlasmsub.o pzlacp3.o pzlawil.o pzrot.o \ pzlange.o pzlanhe.o pzlanhs.o pzlansy.o pzlantr.o pzlapiv.o pzlapv2.o \ pzlaqge.o pzlaqsy.o pzlarf.o pzlarfb.o pzlarfc.o pzlarfg.o pzlarft.o \ pzlascl.o pzlase2.o pzlaset.o pzlassq.o pzlaswp.o pzlatra.o pzlatrd.o \ pzlattrs.o \ pzlatrs.o pzlauu2.o pzlauum.o pzpocon.o pzpoequ.o pzporfs.o pzposv.o \ pzpbsv.o pzpbtrf.o pzpbtrs.o pzpbtrsv.o \ pzptsv.o pzpttrf.o pzpttrs.o pzpttrsv.o \ pzposvx.o pzpotf2.o pzpotrf.o pzpotri.o pzpotrs.o pzdrscl.o pzstein.o \ pztrevc.o pztrti2.o pztrtri.o pztrtrs.o pzung2l.o pzung2r.o \ pzungl2.o pzunglq.o pzungql.o pzungqr.o pzungr2.o pzungrq.o \ pzunm2l.o pzunm2r.o pzunmbr.o pzunmhr.o pzunml2.o pzunmlq.o \ pzunmql.o pzunmqr.o pzunmr2.o pzunmrq.o pzunmtr.o \ pzlaevswp.o \ pzlarzb.o pzlarzt.o pzlarz.o pzlarzc.o pzlatrz.o pztzrzf.o \ pzunmr3.o pzunmrz.o pzmax1.o pdzsum1.o pzlamr1d.o \ zdbtf2.o zdbtrf.o zdttrf.o zdttrsv.o zpttrsv.o zsteqr2.o ztrmvt.o \ zlamsh.o zlaref.o zlanv2.o zlahqr2.o all: single complex double complex16 single: $(SLASRC) $(SCLAUX) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(SLASRC) $(SCLAUX) \ $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) complex: $(CLASRC) $(SCLAUX) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(CLASRC) $(SCLAUX) \ $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) double: $(DLASRC) $(DZLAUX) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(DLASRC) $(DZLAUX) \ $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) complex16: $(ZLASRC) $(DZLAUX) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(SCALAPACKLIB) $(ZLASRC) $(DZLAUX) \ $(ALLAUX) $(RANLIB) $(SCALAPACKLIB) $(SCLAUX): $(FRC) $(DZLAUX): $(FRC) $(SLASRC): $(FRC) $(CLASRC): $(FRC) $(DLASRC): $(FRC) $(ZLASRC): $(FRC) FRC: @FRC=$(FRC) clean : rm -f *.o .f.o : ; $(F77) -c $(F77FLAGS) $*.f .c.o : ; $(CC) -c $(CCFLAGS) $(SRCFLAG) $(CDEFS) $*.c scalapack-1.8.0/SRC/ddttrf.f0000640000175000017500000000626510363532303015412 0ustar muammarmuammar SUBROUTINE DDTTRF( N, DL, D, DU, INFO ) * * Written by Andrew J. Cleary, November 1996. * Modified from DGTTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. INTEGER INFO, N * .. * .. Array Arguments .. DOUBLE PRECISION D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * DDTTRF computes an LU factorization of a complex tridiagonal matrix A * using elimination without partial pivoting. * * The factorization has the form * A = L * U * where L is a product of unit lower bidiagonal * matrices and U is upper triangular with nonzeros in only the main * diagonal and first superdiagonal. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * DL (input/output) COMPLEX array, dimension (N-1) * On entry, DL must contain the (n-1) subdiagonal elements of * A. * On exit, DL is overwritten by the (n-1) multipliers that * define the matrix L from the LU factorization of A. * * D (input/output) COMPLEX array, dimension (N) * On entry, D must contain the diagonal elements of A. * On exit, D is overwritten by the n diagonal elements of the * upper triangular matrix U from the LU factorization of A. * * DU (input/output) COMPLEX array, dimension (N-1) * On entry, DU must contain the (n-1) superdiagonal elements * of A. * On exit, DU is overwritten by the (n-1) elements of the first * superdiagonal of U. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Local Scalars .. INTEGER I DOUBLE PRECISION FACT * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Executable Statements .. * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 CALL XERBLA( 'DDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * DO 20 I = 1, N - 1 IF( DL( I ).EQ.ZERO ) THEN * * Subdiagonal is zero, no elimination is required. * IF( D( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I ELSE * FACT = DL( I ) / D( I ) DL( I ) = FACT D( I+1 ) = D( I+1 ) - FACT*DU( I ) END IF 20 CONTINUE IF( D( N ).EQ.ZERO .AND. INFO.EQ.0 ) THEN INFO = N RETURN END IF * RETURN * * End of DDTTRF * END scalapack-1.8.0/SRC/psposvx.f0000640000175000017500000006556710363532303015657 0ustar muammarmuammar SUBROUTINE PSPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, $ LWORK, N, NRHS REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IWORK( * ) REAL A( * ), AF( * ), $ B( * ), BERR( * ), FERR( * ), $ SC( * ), SR( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PSPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to * compute the solution to a real system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. In the following comments Y denotes Y(IY:IY+M-1,JY:JY+K-1) * a M-by-K matrix where Y can be A, AF, B and X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * diag(SR) * A * diag(SC) * inv(diag(SC)) * X = diag(SR) * B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(SR)*A*diag(SC) and B by diag(SR)*B. * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as * A = U**T* U, if UPLO = 'U', or * A = L * L**T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If equilibration was used, the matrix X is premultiplied by * diag(SR) so that it solves the original system before * equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix A is * supplied on entry, and if not, whether the matrix A should be * equilibrated before it is factored. * = 'F': On entry, AF contains the factored form of A. * If EQUED = 'Y', the matrix A has been equilibrated * with scaling factors given by S. A and AF will not * be modified. * = 'N': The matrix A will be copied to AF and factored. * = 'E': The matrix A will be equilibrated if necessary, then * copied to AF and factored. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A is stored; * = 'L': Lower triangle of A is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices B and X. NRHS >= 0. * * A (local input/local output) REAL pointer into * the local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). * On entry, the symmetric matrix A, except if FACT = 'F' and * EQUED = 'Y', then A must contain the equilibrated matrix * diag(SR)*A*diag(SC). If UPLO = 'U', the leading * N-by-N upper triangular part of A contains the upper * triangular part of the matrix A, and the strictly lower * triangular part of A is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper * triangular part of A is not referenced. A is not modified if * FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. * * On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by * diag(SR)*A*diag(SC). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) REAL pointer * into the local memory to an array of local dimension * ( LLD_AF, LOCc(JA+N-1)). * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(SR)*A*diag(SC). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * EQUED (global input/global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'Y': Equilibration was done, i.e., A has been replaced by * diag(SR) * A * diag(SC). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * SR (local input/local output) REAL array, * dimension (LLD_A) * The scale factors for A distributed across process rows; * not accessed if EQUED = 'N'. SR is an input variable if * FACT = 'F'; otherwise, SR is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SR must be * positive. * * SC (local input/local output) REAL array, * dimension (LOC(N_A)) * The scale factors for A distributed across * process columns; not accessed if EQUED = 'N'. SC is an input * variable if FACT = 'F'; otherwise, SC is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SC must be * positive. * * B (local input/local output) REAL pointer into * the local memory to an array of local dimension * ( LLD_B, LOCc(JB+NRHS-1) ). * On entry, the N-by-NRHS right-hand side matrix B. * On exit, if EQUED = 'N', B is not modified; if TRANS = 'N' * and EQUED = 'R' or 'B', B is overwritten by diag(R)*B; if * TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is overwritten * by diag(C)*B. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) REAL pointer into * the local memory to an array of local dimension * ( LLD_X, LOCc(JX+NRHS-1) ). * If INFO = 0, the N-by-NRHS solution matrix X to the original * system of equations. Note that A and B are modified on exit * if EQUED .ne. 'N', and the solution to the equilibrated * system is inv(diag(SC))*X if TRANS = 'N' and EQUED = 'C' or * 'B', or inv(diag(SR))*X if TRANS = 'T' or 'C' and EQUED = 'R' * or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) REAL * The estimate of the reciprocal condition number of the matrix * A after equilibration (if done). If RCOND is less than the * machine precision (in particular, if RCOND = 0), the matrix * is singular to working precision. This condition is * indicated by a return code of INFO > 0, and the solution and * error bounds are not computed. * * FERR (local output) REAL array, dimension (LOC(N_B)) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution, FERR(j) bounds the magnitude * of the largest entry in (X(j) - XTRUE) divided by * the magnitude of the largest entry in X(j). The quality of * the error bound depends on the quality of the estimate of * norm(inv(A)) computed in the code; if the estimate of * norm(inv(A)) is accurate, the error bound is guaranteed. * * BERR (local output) REAL array, dimension (LOC(N_B)) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A or B that makes X(j) an exact solution). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PSPOCON( LWORK ), PSPORFS( LWORK ) ) * + LOCr( N_A ). * LWORK = 3*DESCA( LLD_ ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK = DESCA( LLD_ ) * LIWORK = LOCr(N_A). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: if INFO = i, the leading minor of order i of A * is not positive definite, so the factorization * could not be completed, and the solution and error * bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the matrix * is singular to working precision, and the solution * and error bounds have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL EQUIL, LQUERY, NOFACT, RCEQU INTEGER I, IACOL, IAROW, IAFROW, IBROW, IBCOL, ICOFF, $ ICOFFA, ICTXT, IDUMM, IIA, IIB, IIX, INFEQU, $ IROFF, IROFFA, IROFFAF, IROFFB, IROFFX, IXCOL, $ IXROW, J, JJA, JJB, JJX, LDB, LDX, LIWMIN, $ LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NRHSQ, $ NQ REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, INFOG2L, $ PSPOCON, PSPOEQU, PSPORFS, $ PSPOTRF, PSPOTRS, $ PSLACPY, PSLAQSY, PXERBLA, $ SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PSLANSY, PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PSLANSY, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA LWMIN = 3*DESCA( LLD_ ) LIWMIN = NP NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' RCEQU = .FALSE. ELSE RCEQU = LSAME( EQUED, 'Y' ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( RCEQU ) THEN * SMIN = BIGNUM SMAX = ZERO DO 10 J = IIA, IIA + NP - 1 SMIN = MIN( SMIN, SR( J ) ) SMAX = MAX( SMAX, SR( J ) ) 10 CONTINUE CALL SGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, SMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, SMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( SMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) ELSE SCOND = ONE END IF END IF END IF END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( UPLO ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 13 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 28 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 5, IDUM1, IDUM2, $ INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 28 IF( LIWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 4, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOSVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PSPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFEQU ) * IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PSLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) RCEQU = LSAME( EQUED, 'Y' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) LDB = DESCB( LLD_ ) IROFF = MOD( IB-1, DESCB( MB_ ) ) ICOFF = MOD( JB-1, DESCB( NB_ ) ) NP = NUMROC( N+IROFF, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 30 J = JJB, JJB+NRHSQ-1 DO 20 I = IIB, IIB+NP-1 B( I + ( J-1 )*LDB ) = SR( I )*B( I + ( J-1 )*LDB ) 20 CONTINUE 30 CONTINUE END IF * IF( NOFACT .OR. EQUIL ) THEN * * Compute the Cholesky factorization A = U'*U or A = L*L'. * CALL PSLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PSPOTRF( UPLO, N, AF, IAF, JAF, DESCAF, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * ANORM = PSLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * * Compute the reciprocal of the condition number of A. * CALL PSPOCON( UPLO, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PSLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PSLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PSPOTRS( UPLO, N, NRHS, AF, IAF, JAF, DESCAF, X, IX, JX, $ DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PSPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX( MB_ ) ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 50 J = JJX, JJX+NRHSQ-1 DO 40 I = IIX, IIX+NP-1 X( I + ( J-1 )*LDX ) = SR( I )*X( I + ( J-1 )*LDX ) 40 CONTINUE 50 CONTINUE DO 60 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / SCOND 60 CONTINUE END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN RETURN * * End of PSPOSVX * END scalapack-1.8.0/SRC/pdgelq2.f0000640000175000017500000002460010363532303015452 0ustar muammarmuammar SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGELQ2 computes a LQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1) H(ia+k-2) . . . H(ia), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1)=0 * and v(i) = 1; v(i+1:n) is stored on exit in A(ia+i-1,ja+i:ja+n-1), * and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLARFG, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * K = MIN( M, N ) DO 10 I = IA, IA+K-1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i,j+1:ja+n-1) * CALL PDLARFG( N-J+JA, AII, I, J, A, I, MIN( J+1, JA+N-1 ), $ DESCA, DESCA( M_ ), TAU ) * IF( I.LT.IA+M-1 ) THEN * * Apply H(i) to A(i+1:ia+m-1,j:ja+n-1) from the right * CALL PDELSET( A, I, J, DESCA, ONE ) CALL PDLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PDELSET( A, I, J, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGELQ2 * END scalapack-1.8.0/SRC/pslawil.f0000640000175000017500000002350610363532303015573 0ustar muammarmuammar SUBROUTINE PSLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER II, JJ, M REAL H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), V( * ) * .. * * Purpose * ======= * * PSLAWIL gets the transform given by H44,H33, & H43H34 into V * starting at row M. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * II (global input) INTEGER * Row owner of H(M+2,M+2) * * JJ (global input) INTEGER * Column owner of H(M+2,M+2) * * M (global input) INTEGER * On entry, this is where the transform starts (row M.) * Unchanged on exit. * * A (global input) REAL array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * Unchanged on exit. * * H44 * H33 * H43H34 (global input) REAL * These three values are for the double shift QR iteration. * Unchanged on exit. * * V (global output) REAL array of size 3. * Contains the transform on output. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, ICOL, IROW, JSRC, LDA, LEFT, $ MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, $ RSRC, UP REAL H11, H12, H21, H22, H33S, H44S, S, V1, V2, V3 * .. * .. Local Arrays .. REAL BUF( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGERV2D, SGESD2D, INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * On node (II,JJ) collect all DIA,SUP,SUB info from M, M+1 * MODKM1 = MOD( M+1, HBL ) IF( MODKM1.EQ.0 ) THEN IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) CALL SGESD2D( CONTXT, 1, 1, BUF, 1, II, JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) BUF( 2 ) = A( ( ICOL-1 )*LDA+IROW+1 ) BUF( 3 ) = A( ICOL*LDA+IROW ) BUF( 4 ) = A( ICOL*LDA+IROW+1 ) CALL SGESD2D( CONTXT, 4, 1, BUF, 4, II, JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NPCOL.GT.1 ) THEN CALL SGERV2D( CONTXT, 1, 1, V3, 1, MYROW, LEFT ) ELSE V3 = A( ( ICOL-2 )*LDA+IROW ) END IF IF( NUM.GT.1 ) THEN CALL SGERV2D( CONTXT, 4, 1, BUF, 4, UP, LEFT ) H11 = BUF( 1 ) H21 = BUF( 2 ) H12 = BUF( 3 ) H22 = BUF( 4 ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) END IF END IF END IF IF( MODKM1.EQ.1 ) THEN IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) CALL SGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. ( NPROW.GT.1 ) ) $ THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL SGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL SGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NUM.GT.1 ) THEN CALL SGERV2D( CONTXT, 1, 1, H11, 1, UP, LEFT ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) END IF IF( NPROW.GT.1 ) THEN CALL SGERV2D( CONTXT, 1, 1, H12, 1, UP, MYCOL ) ELSE H12 = A( ( ICOL-2 )*LDA+IROW-2 ) END IF IF( NPCOL.GT.1 ) THEN CALL SGERV2D( CONTXT, 1, 1, H21, 1, MYROW, LEFT ) ELSE H21 = A( ( ICOL-3 )*LDA+IROW-1 ) END IF H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF END IF IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) $ RETURN * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF * H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 * RETURN * * End of PSLAWIL * END scalapack-1.8.0/SRC/pdorglq.f0000640000175000017500000003066510363532303015574 0ustar muammarmuammar SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGLQ generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k) . . . H(2) H(1) * * as returned by PDGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PDGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ J, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDLARFB, $ PDLARFT, PDLASET, PDORGL2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL PDLASET( 'All', IA+M-IL, IL-IA, ZERO, ZERO, A, IL, JA, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PDORGL2( IA+M-IL, N-IL+IA, IA+K-IL, A, IL, JA+IL-IA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of rows to loop over ? * IF( IL.GT.IN+1 ) THEN * * Use blocked code * DO 10 I = IL-DESCA( MB_ ), IN+1, -DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) J = JA + I - IA * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PDLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-I+IA, IB, A, I, J, DESCA, $ WORK, A, I+IB, J, DESCA, WORK( IPW ) ) END IF * * Apply H' to columns j:ja+n-1 of current block * CALL PDORGL2( IB, N-I+IA, IB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set columns ia:i-1 of current block to zero * CALL PDLASET( 'All', IB, I-IA, ZERO, ZERO, A, I, JA, DESCA ) 10 CONTINUE * END IF * * Handle first block separately * IF( IL.GT.IA ) THEN * IB = IN - IA + 1 * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PDLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PDLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M-IB, $ N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, JA, $ DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-1 of current block * CALL PDORGL2( IB, N, IB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGLQ * END scalapack-1.8.0/SRC/pzlange.f0000640000175000017500000002666610363532303015572 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLANGE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * distributed matrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1). * * PZLANGE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+M-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PZLANGE as described * above. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PZLANGE * is set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PZLANGE is set to zero. N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, ICOFF, IOFFA, $ IROFF, J, JJ, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, INFOG2L, $ PDTREECOMB, ZLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER IDAMAX, NUMROC EXTERNAL LSAME, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO IF( NQ.GT.0 .AND. MP.GT.0 ) THEN IOFFA = (JJ-1)*LDA DO 20 J = JJ, JJ+NQ-1 DO 10 I = II, MP+II-1 VALUE = MAX( VALUE, ABS( A( IOFFA+I ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE END IF CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, J, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * * Find norm1( sub( A ) ). * IF( NQ.GT.0 ) THEN IOFFA = ( JJ - 1 ) * LDA DO 40 J = JJ, JJ+NQ-1 SUM = ZERO IF( MP.GT.0 ) THEN DO 30 I = II, MP+II-1 SUM = SUM + ABS( A( IOFFA+I ) ) 30 CONTINUE END IF IOFFA = IOFFA + LDA WORK( J-JJ+1 ) = SUM 40 CONTINUE END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, J, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * * Find normI( sub( A ) ). * IF( MP.GT.0 ) THEN IOFFA = II + ( JJ - 1 ) * LDA DO 60 I = II, II+MP-1 SUM = ZERO IF( NQ.GT.0 ) THEN DO 50 J = IOFFA, IOFFA + NQ*LDA - 1, LDA SUM = SUM + ABS( A( J ) ) 50 CONTINUE END IF WORK( I-II+1 ) = SUM IOFFA = IOFFA + 1 60 CONTINUE END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for supnorm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( IDAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, I, $ J, -1, 0, 0 ) END IF * ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE IOFFA = II + ( JJ - 1 ) * LDA IF( NQ.GT.0 ) THEN DO 70 J = IOFFA, IOFFA + NQ*LDA - 1, LDA CALL ZLASSQ( MP, A( J ), 1, SCALE, SUM ) 70 CONTINUE END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PZLANGE = VALUE * RETURN * * End of PZLANGE * END scalapack-1.8.0/SRC/dtrmvt.f0000640000175000017500000001121710363532303015434 0ustar muammarmuammar SUBROUTINE DTRMVT( UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, $ INCZ ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER INCW, INCX, INCY, INCZ, LDT, N * .. * .. Array Arguments .. DOUBLE PRECISION T( LDT, * ), W( * ), X( * ), Y( * ), Z( * ) * .. * * Purpose * ======= * * DTRMVT performs the matrix-vector operations * * x := T' *y, and w := T *z, * * where x is an n element vector and T is an n by n * upper or lower triangular matrix. * * Arguments * ========= * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix A. * N must be at least zero. * Unchanged on exit. * * T - DOUBLE PRECISION array of DIMENSION ( LDT, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array T must contain the upper * triangular matrix and the strictly lower triangular part of * T is not referenced. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array T must contain the lower * triangular matrix and the strictly upper triangular part of * T is not referenced. * * LDT - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least * max( 1, n ). * Unchanged on exit. * * X - DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). * On exit, X = T' * y * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * Unchanged on exit. * * Y - DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). * Before entry, the incremented array Y must contain the n * element vector y. Unchanged on exit. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * W - DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCW ) ). * On exit, W = T * z * * INCW - INTEGER. * On entry, INCW specifies the increment for the elements of * W. INCW must not be zero. * Unchanged on exit. * * Z - DOUBLE PRECISION array of dimension at least * ( 1 + ( n - 1 )*abs( INCZ ) ). * Before entry, the incremented array Z must contain the n * element vector z. Unchanged on exit. * * INCY - INTEGER. * On entrz, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * * Level 2 Blas routine. * * * .. Local Scalars .. INTEGER INFO * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL DCOPY, DTRMV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( LDT.LT.MAX( 1, N ) ) THEN INFO = 4 ELSE IF( INCW.EQ.0 ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 10 ELSE IF( INCZ.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DTRMVT', INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * * IF( INCX.NE.1 .OR. INCY.NE.1 .OR. INCW.NE.1 .OR. INCZ.NE.1 .OR. $ .TRUE. ) THEN CALL DCOPY( N, Y, INCY, X, INCX ) CALL DTRMV( UPLO, 'C', 'N', N, T, LDT, X, INCX ) CALL DCOPY( N, Z, INCZ, W, INCW ) CALL DTRMV( UPLO, 'N', 'N', N, T, LDT, W, INCW ) RETURN END IF * RETURN * * End of DTRMVT. * END scalapack-1.8.0/SRC/pzlaevswp.f0000640000175000017500000002576710363532303016166 0ustar muammarmuammar* * SUBROUTINE PZLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ RWORK, LRWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LRWORK, N * .. * .. Array Arguments .. INTEGER DESCZ( * ), KEY( * ), NVS( * ) DOUBLE PRECISION RWORK( * ), ZIN( LDZI, * ) COMPLEX*16 Z( * ) * .. * * Purpose * ======= * * PZLAEVSWP moves the eigenvectors (potentially unsorted) from * where they are computed, to a ScaLAPACK standard block cyclic * array, sorted so that the corresponding eigenvalues are sorted. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * ZIN (local input) DOUBLE PRECISION array, * dimension ( LDZI, NVS(iam) ) * The eigenvectors on input. Each eigenvector resides entirely * in one process. Each process holds a contiguous set of * NVS(iam) eigenvectors. The first eigenvector which the * process holds is: sum for i=[0,iam-1) of NVS(i) * * LDZI (locl input) INTEGER * leading dimension of the ZIN array * * Z (local output) COMPLEX*16 array * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ) * The eigenvectors on output. The eigenvectors are distributed * in a block cyclic manner in both dimensions, with a * block size of NB. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * NVS (global input) INTEGER array, dimension( nprocs+1 ) * nvs(i) = number of processes * number of eigenvectors held by processes [0,i-1) * nvs(1) = number of eigen vectors held by [0,1-1) == 0 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) == * total number of eigenvectors * * KEY (global input) INTEGER array, dimension( N ) * Indicates the actual index (after sorting) for each of the * eigenvectors. * * RWORK (local workspace) DOUBLE PRECISION array, dimension (LRWORK) * * LRWORK (local input) INTEGER dimension of RWORK * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J, $ MAXI, MAXII, MINI, MINII, MYCOL, MYROW, NB, $ NBUFSIZE, NPCOL, NPROCS, NPROW, PCOL, RECVCOL, $ RECVFROM, RECVROW, SENDCOL, SENDROW, SENDTO * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P EXTERNAL INDXG2L, INDXG2P * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGERV2D, DGESD2D * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * NB = DESCZ( MB_ ) * NPROCS = NPROW*NPCOL * * If PxSTEIN operates on a sub-matrix of a global matrix, the * key [] that contains the indicies of the eigenvectors is refe- * renced to the dimensions of the sub-matrix and not the global * distrubited matrix. Because of this, PxLAEVSWP will incorrectly * map the eigenvectors to the global eigenvector matrix, Z, unless * the key[] elements are shifted as below. * DO 10 J = DESCZ( N_ ), 1, -1 KEY( J ) = KEY( J-JZ+1 ) + ( JZ-1 ) 10 CONTINUE * DO 110 DIST = 0, NPROCS - 1 * SENDTO = MOD( IAM+DIST, NPROCS ) RECVFROM = MOD( NPROCS+IAM-DIST, NPROCS ) * SENDROW = MOD( SENDTO, NPROW ) SENDCOL = SENDTO / NPROW RECVROW = MOD( RECVFROM, NPROW ) RECVCOL = RECVFROM / NPROW * SENDROW = SENDTO / NPCOL SENDCOL = MOD( SENDTO, NPCOL ) RECVROW = RECVFROM / NPCOL RECVCOL = MOD( RECVFROM, NPCOL ) * * Figure out what I have that process "sendto" wants * NBUFSIZE = 0 * * We are looping through the eigenvectors that I presently own. * DO 40 J = NVS( 1+IAM ) + JZ, NVS( 1+IAM+1 ) + JZ - 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( SENDCOL.EQ.PCOL ) THEN MINII = MOD( SENDROW+DESCZ( RSRC_ ), NPROW )* $ DESCZ( MB_ ) + 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 30 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+DESCZ( MB_ )-1, N+IZ-1 ) DO 20 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 RWORK( NBUFSIZE ) = ZIN( I+1-IZ, $ J-NVS( 1+IAM )+1-JZ ) 20 CONTINUE 30 CONTINUE END IF 40 CONTINUE * * IF( MYROW.NE.SENDROW .OR. MYCOL.NE.SENDCOL ) $ CALL DGESD2D( DESCZ( CTXT_ ), NBUFSIZE, 1, RWORK, NBUFSIZE, $ SENDROW, SENDCOL ) * * * Figure out what process "recvfrom" has that I want * NBUFSIZE = 0 DO 70 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 60 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 50 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 50 CONTINUE 60 CONTINUE END IF 70 CONTINUE * * * IF( MYROW.NE.RECVROW .OR. MYCOL.NE.RECVCOL ) $ CALL DGERV2D( DESCZ( CTXT_ ), 1, NBUFSIZE, RWORK, 1, $ RECVROW, RECVCOL ) * NBUFSIZE = 0 DO 100 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CYCLIC_J = INDXG2L( KEY( J ), DESCZ( MB_ ), -1, -1, $ NPCOL ) CYCLIC_I = 1 MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 90 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) CYCLIC_I = INDXG2L( MINI, DESCZ( MB_ ), -1, -1, $ NPROW ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 80 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 Z( CYCLIC_I+( CYCLIC_J-1 )*DESCZ( LLD_ ) ) $ = DCMPLX( RWORK( NBUFSIZE ) ) CYCLIC_I = CYCLIC_I + 1 80 CONTINUE 90 CONTINUE END IF 100 CONTINUE * 110 CONTINUE RETURN * * End of PZLAEVSWP * END scalapack-1.8.0/SRC/pcdbtrf.f0000640000175000017500000012615210363532303015545 0ustar muammarmuammar SUBROUTINE PCDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDBTRF computes a LU factorization * of an N-by-N complex banded * diagonally dominant-like distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PCDBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDBTRF and this is stored in AF. If a linear system * is to be solved using PCDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * max(bwl,bwu)*max(bwl,bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MAX_BW, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, UP_PREV_TRI_SIZE_M, $ UP_PREV_TRI_SIZE_N, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CAXPY, CGEMM, CGERV2D, CGESD2D, CLACPY, $ CLATCPY, CPBTRF, CPOTRF, CSYRK, CTBTRS, CTRMM, $ CTRRV2D, CTRSD2D, CTRSM, CTRTRS, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) MBW2 = MAX_BW * MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -2 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PCDBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PCDBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = MAX(BWL,BWU)*MAX(BWL,BWU) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PCDBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Zero out space for work * DO 20 I=1, WORK_SIZE_MIN WORK( I ) = CZERO 20 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .GT. 0 ) THEN UP_PREV_TRI_SIZE_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) UP_PREV_TRI_SIZE_N=MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL CTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, $ A( OFST+(MY_NUM_COLS-BWL)*LLDA+(BWL+BWU+1) ), $ LLDA-1, 0, MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL CDBTRF( ODD_SIZE, ODD_SIZE, BWL, BWU, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * * Apply factorization to lower connection block BL_i * conjugate transpose the connection block in preparation. * Apply factorization to upper connection block BU_i * Move the connection block in preparation. * CALL CLATCPY( 'U', BWL, BWL, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), $ MAX_BW ) CALL CLACPY( 'L', BWU, BWU, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW ) * * Perform the triangular system solve {L_i}{{BU'}_i} = {B_i} * CALL CTBTRS( 'L', 'N', 'U', BWU, BWL, BWU, $ A( OFST+BWU+1+(ODD_SIZE-BWU )*LLDA ), LLDA, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, INFO ) * * Perform the triangular solve {U_i}^C{BL'}_i^C = {BL_i}^C * CALL CTBTRS( 'U', 'C', 'N', BWL, BWU, BWL, $ A( OFST+1+(ODD_SIZE-BWL)*LLDA ), LLDA, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ INFO ) * * conjugate transpose resulting block to its location * in main storage. * CALL CLATCPY( 'L', BWL, BWL, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1 ) * * Move the resulting block back to its location in main storage. * CALL CLACPY( 'L', BWU, BWU, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL CGEMM( 'C', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE , $ AF( ODD_SIZE*BWU+2*MBW2+1), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1), MAX_BW, CONE, $ A( OFST+ODD_SIZE*LLDA+1+BWU ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bwl, bwu)*bwl, bwu+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL CTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( WORK_U+1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL CTBTRS( 'L', 'N', 'U', ODD_SIZE, BWL, BWL, $ A( OFST + BWU+1 ), LLDA, AF( WORK_U+1 ), $ ODD_SIZE, INFO ) * * * Calculate the "spike" fillin, ${U_i}^C {{GL}_i}^C = {DU_i}^C$ * * * Copy D block into AF storage for solve. * CALL CLATCPY( 'L', UP_PREV_TRI_SIZE_N, UP_PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * CALL CTBTRS( 'U', 'C', 'N', ODD_SIZE, BWU, BWU, $ A( OFST + 1 ), LLDA, $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * * * Zero out space in case result is smaller than storage block * DO 30 I=1, MBW2 AF( ODD_SIZE*BWU+2*MBW2+I ) = CZERO 30 CONTINUE * CALL CGEMM( 'C', 'N', BWU, BWL, ODD_SIZE, $ -CONE, AF( 1 ), ODD_SIZE, $ AF( WORK_U+1 ), ODD_SIZE, CZERO, $ AF( 1+MAX(0,BWL-BWU)+ODD_SIZE*BWU+ $ (2*MAX_BW+MAX(0,BWU-BWL))*MAX_BW), $ MAX_BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * * Copy matrix HU_i (the last bwl rows of GU_i) to AFL storage * as per requirements of BLAS routine CTRMM. * Since we have GU_i stored, * conjugate transpose HU_i to HU_i^C. * CALL CLATCPY( 'N', BWL, BWL, $ AF( WORK_U+ODD_SIZE-BWL+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ), $ MAX_BW ) * CALL CTRMM( 'R', 'U', 'C', 'N', BWL, BWL, -CONE, $ A( ( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA ) ), $ LLDA-1, AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ), $ MAX_BW ) * * * Copy matrix HL_i (the last bwu rows of GL_i^C) to AFU store * as per requirements of BLAS routine CTRMM. * Since we have GL_i^C stored, * conjugate transpose HL_i^C to HL_i. * CALL CLATCPY( 'N', BWU, BWU, $ AF( ODD_SIZE-BWU+1 ), ODD_SIZE, $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * CALL CTRMM( 'R', 'L', 'N', 'N', BWU, BWU, -CONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, MYCOL-1 ) * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL CLACPY( 'N', MAX_BW, MAX_BW, $ A( OFST+ODD_SIZE*LLDA+BWU+1 ), $ LLDA-1, AF( ODD_SIZE*BWU+MBW2+1 ), $ MAX_BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL CAXPY( MBW2, CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL CDBTRF( MAX_BW, MAX_BW, MIN( MAX_BW-1, BWL ), $ MIN( MAX_BW-1, BWU ), AF( ODD_SIZE*BWU+MBW2+1 $ -( MIN( MAX_BW-1, BWU ))), MAX_BW+1, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL CLACPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW ) * CALL CLACPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, COMM_PROC ) * CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL CTBTRS( $ 'L', 'N', 'U', BWU, MIN( BWL, BWU-1 ), BWU, $ AF( ODD_SIZE*BWU+ $ MBW2+1+(MAX_BW+1)*(MAX_BW-BWU)), MAX_BW+1, $ AF( WORK_U+ODD_SIZE*BWL+1+MAX_BW-BWU), MAX_BW, INFO ) * * Modify lower off_diagonal block with diagonal block * * CALL CTBTRS( $ 'U', 'C', 'N', BWL, MIN( BWU, BWL-1 ), BWL, $ AF( ODD_SIZE*BWU+ $ MBW2+1-MIN( BWU, BWL-1 )+(MAX_BW+1)*(MAX_BW-BWL)), MAX_BW+1, $ AF( ODD_SIZE*BWU+1+MAX_BW-BWL), MAX_BW, INFO ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), MAX_BW, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * * * Since CTBTRS has no "left-right" option, we must transpose * CALL CLATCPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+ $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW ) * CALL CTBTRS( $ 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), BWL, $ AF( ODD_SIZE*BWU+MBW2+1), MAX_BW+1, $ WORK( 1+MAX_BW*(MAX_BW-BWL) ), MAX_BW, INFO ) * * Transpose back * CALL CLATCPY( $ 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+ $ 2*MBW2+1), MAX_BW ) * * * * Since CTBTRS has no "left-right" option, we must transpose * CALL CLATCPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+ $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW ) * CALL CTBTRS( $ 'U', 'C', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), BWU, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 )), MAX_BW+1, $ WORK( 1+MAX_BW*(MAX_BW-BWU) ), MAX_BW, INFO ) * * Transpose back * CALL CLATCPY( $ 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( ODD_SIZE*BWU+ $ 2*MBW2+1), MAX_BW ) * * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'C', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+(ODD_SIZE)*BWL+2*MBW2+1 ), MAX_BW, $ CZERO, WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL CGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ AF( ODD_SIZE*BWU+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, COMM_PROC ) * CALL CGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PCDBTRF * END scalapack-1.8.0/SRC/pdgetf2.f0000640000175000017500000002264610363532303015457 0ustar muammarmuammar SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDGETF2 computes an LU factorization of a general M-by-N * distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using * partial pivoting with row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal * elements (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). * * This is the right-looking Parallel Level 2 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). * NB_A-MOD(JA-1, NB_A) >= N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ). On exit, this array contains * the local pieces of the factors L and U from the factoriza- * tion sub( A ) = P*L*U; the unit diagonal elements of L are * not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IROFF, J, $ JJA, MN, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION GMAX * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PDAMAX, PDGER, $ PDSCAL, PDSWAP, PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * MN = MIN( M, N ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) * IF( MYCOL.EQ.IACOL ) THEN DO 10 J = JA, JA+MN-1 I = IA + J - JA * * Find pivot and test for singularity. * CALL PDAMAX( M-J+JA, GMAX, IPIV( IIA+J-JA ), A, I, J, $ DESCA, 1 ) IF( GMAX.NE.ZERO ) THEN * * Apply the row interchanges to columns JA:JA+N-1 * CALL PDSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, $ IPIV( IIA+J-JA ), JA, DESCA, DESCA( M_ ) ) * * Compute elements I+1:IA+M-1 of J-th column. * IF( J-JA+1.LT.M ) $ CALL PDSCAL( M-J+JA-1, ONE / GMAX, A, I+1, J, $ DESCA, 1 ) ELSE IF( INFO.EQ.0 ) THEN INFO = J - JA + 1 END IF * * Update trailing submatrix * IF( J-JA+1.LT.MN ) THEN CALL PDGER( M-J+JA-1, N-J+JA-1, -ONE, A, I+1, J, DESCA, $ 1, A, I, J+1, DESCA, DESCA( M_ ), A, I+1, $ J+1, DESCA ) END IF 10 CONTINUE * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN, MYROW, IACOL ) * END IF * RETURN * * End of PDGETF2 * END scalapack-1.8.0/SRC/pdgesv.f0000640000175000017500000002312710367447133015421 0ustar muammarmuammar SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * Jan 30, 2006 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDGESV computes the solution to a real system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed * matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS * distributed matrices. * * The LU decomposition with partial pivoting and row interchanges is * used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu- * tation matrix, L is unit lower triangular, and U is upper triangular. * L and U are stored in sub( A ). The factored form of sub( A ) is then * used to solve the system of equations sub( A ) * X = sub( B ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the N-by-N distributed matrix * sub( A ) to be factored. On exit, this array contains the * local pieces of the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand side * distributed matrix sub( B ). On exit, if INFO = 0, sub( B ) * is overwritten by the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, so the solution could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDGETRF, $ PDGETRS, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P EXTERNAL INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( N, 1, NRHS, 2, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( IBROW.NE.IAROW .OR. ICOFFA.NE.IROFFB ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1100+CTXT_) END IF END IF CALL PCHK2MAT( N, 1, N, 1, IA, JA, DESCA, 6, N, 1, NRHS, 2, $ IB, JB, DESCB, 11, 0, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESV', -INFO ) RETURN END IF * * Compute the LU factorization of sub( A ). * CALL PDGETRF( N, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ), overwriting sub( B ) * with X. * CALL PDGETRS( 'No transpose', N, NRHS, A, IA, JA, DESCA, IPIV, $ B, IB, JB, DESCB, INFO ) * END IF * RETURN * * End of PDGESV * END scalapack-1.8.0/SRC/pddbtrf.f0000640000175000017500000012703510363532303015547 0ustar muammarmuammar SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), AF( * ), WORK( * ) * .. * * Purpose * ======= * * PDDBTRF computes a LU factorization * of an N-by-N real banded * diagonally dominant-like distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PDDBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDBTRF and this is stored in AF. If a linear system * is to be solved using PDDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * max(bwl,bwu)*max(bwl,bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, I1, I2, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MAX_BW, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, UP_PREV_TRI_SIZE_M, $ UP_PREV_TRI_SIZE_N, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DAXPY, DDBTRF, $ DESC_CONVERT, DGEMM, DGEMV, DGERV2D, DGESD2D, $ DLACPY, DLATCPY, DTBTRS, DTRMM, DTRRV2D, $ DTRSD2D, GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, $ PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX( BWL, BWU ) MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -2 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = NB*( BWL+BWU ) + 6*MAX( BWL, BWU )*MAX( BWL, BWU ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = MAX( BWL, BWU )*MAX( BWL, BWU ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PDDBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 140 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW END IF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Zero out space for work * DO 20 I = 1, WORK_SIZE_MIN WORK( I ) = ZERO 20 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.GT.0 ) THEN UP_PREV_TRI_SIZE_M = MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) UP_PREV_TRI_SIZE_N = MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL DTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+( MY_NUM_COLS-BWL )* $ LLDA+( BWL+BWU+1 ) ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL DDBTRF( ODD_SIZE, ODD_SIZE, BWL, BWU, A( OFST+1 ), LLDA, $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 30 END IF * * IF( MYCOL.LT.NP-1 ) THEN * * Apply factorization to lower connection block BL_i * transpose the connection block in preparation. * Apply factorization to upper connection block BU_i * Move the connection block in preparation. * CALL DLATCPY( 'U', BWL, BWL, A( ( OFST+( BWL+BWU+1 )+ $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW ) CALL DLACPY( 'L', BWU, BWU, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW- $ BWU ), MAX_BW ) * * Perform the triangular system solve {L_i}{{BU'}_i} = {B_i} * CALL DTBTRS( 'L', 'N', 'U', BWU, BWL, BWU, $ A( OFST+BWU+1+( ODD_SIZE-BWU )*LLDA ), LLDA, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, INFO ) * * Perform the triangular solve {U_i}^T{BL'}_i^T = {BL_i}^T * CALL DTBTRS( 'U', 'T', 'N', BWL, BWU, BWL, $ A( OFST+1+( ODD_SIZE-BWL )*LLDA ), LLDA, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ INFO ) * * transpose resulting block to its location * in main storage. * CALL DLATCPY( 'L', BWL, BWL, AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW- $ BWL ), MAX_BW, A( ( OFST+( BWL+BWU+1 )+ $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1 ) * * Move the resulting block back to its location in main storage. * CALL DLACPY( 'L', BWU, BWU, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+ $ MAX_BW-BWU ), MAX_BW, A( ( OFST+1+ODD_SIZE* $ LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL DGEMM( 'T', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, ONE, $ A( OFST+ODD_SIZE*LLDA+1+BWU ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 30 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bwl, bwu)*bwl, bwu+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL DTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( WORK_U+1 ), BWL, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * * Transpose transmitted triangular matrix $DL_i$ * DO 50 I1 = 1, BWL DO 40 I2 = I1 + 1, BWL AF( WORK_U+I2+( I1-1 )*BWL ) = AF( WORK_U+I1+( I2-1 )* $ BWL ) AF( WORK_U+I1+( I2-1 )*BWL ) = ZERO 40 CONTINUE 50 CONTINUE * DO 60 I1 = 2, ODD_SIZE I2 = MIN( I1-1, BWL ) CALL DGEMV( 'N', BWL, I2, -ONE, $ AF( WORK_U+1+( I1-1-I2 )*BWL ), BWL, $ A( OFST+BWU+1+I2+( I1-1-I2 )*LLDA ), LLDA-1, $ ONE, AF( WORK_U+1+( I1-1 )*BWL ), 1 ) 60 CONTINUE * * * Calculate the "spike" fillin, ${U_i}^T {{GL}_i}^T = {DU_i}^T$ * * * Copy D block into AF storage for solve. * CALL DLACPY( 'L', UP_PREV_TRI_SIZE_N, UP_PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), BWU ) * DO 80 I1 = 1, ODD_SIZE I2 = MIN( BWU, I1-1 ) CALL DGEMV( 'N', BWU, I2, -ONE, AF( ( I1-1-I2 )*BWU+1 ), $ BWU, A( OFST+BWU+1-I2+( I1-1 )*LLDA ), 1, $ ONE, AF( ( I1-1 )*BWU+1 ), 1 ) * DO 70 I = 1, BWU AF( ( I1-1 )*BWU+I ) = AF( ( I1-1 )*BWU+I ) / $ A( ( I1-1 )*LLDA+BWU+1 ) 70 CONTINUE 80 CONTINUE * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * * * Zero out space in case result is smaller than storage block * DO 90 I = 1, MBW2 AF( ODD_SIZE*BWU+2*MBW2+I ) = ZERO 90 CONTINUE * CALL DGEMM( 'N', 'T', BWU, BWL, ODD_SIZE, -ONE, AF( 1 ), $ BWU, AF( WORK_U+1 ), BWL, ZERO, $ AF( 1+MAX( 0, BWL-BWU )+ODD_SIZE*BWU+( 2*MAX_BW+ $ MAX( 0, BWU-BWL ) )*MAX_BW ), MAX_BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * * Copy matrix HU_i (the last bwl rows of GU_i) to AFL storage * as per requirements of BLAS routine DTRMM. * Since we have GU_i stored, * transpose HU_i to HU_i^T. * CALL DLACPY( 'N', BWL, BWL, $ AF( WORK_U+( ODD_SIZE-BWL )*BWL+1 ), BWL, $ AF( ( ODD_SIZE )*BWU+1+( MAX_BW-BWL ) ), $ MAX_BW ) * CALL DTRMM( 'R', 'U', 'T', 'N', BWL, BWL, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, AF( ( ODD_SIZE )*BWU+1+ $ ( MAX_BW-BWL ) ), MAX_BW ) * * * Copy matrix HL_i (the last bwu rows of GL_i^T) to AFU store * as per requirements of BLAS routine DTRMM. * Since we have GL_i^T stored, * transpose HL_i^T to HL_i. * CALL DLACPY( 'N', BWU, BWU, AF( ( ODD_SIZE-BWU )*BWU+1 ), $ BWU, AF( WORK_U+( ODD_SIZE )*BWL+1+MAX_BW- $ BWU ), MAX_BW ) * CALL DTRMM( 'R', 'L', 'N', 'N', BWU, BWU, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( WORK_U+( ODD_SIZE )*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 130 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 120 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, MYCOL-1 ) * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL DLACPY( 'N', MAX_BW, MAX_BW, A( OFST+ODD_SIZE*LLDA+BWU+1 ), $ LLDA-1, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL DAXPY( MBW2, ONE, AF( ODD_SIZE*BWU+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 100 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 110 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, AF( ODD_SIZE*BWU+MBW2+1 ), $ 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DAXPY( MBW2, ONE, WORK( 1 ), 1, AF( ODD_SIZE*BWU+MBW2+1 ), $ 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 100 110 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL DDBTRF( MAX_BW, MAX_BW, MIN( MAX_BW-1, BWL ), $ MIN( MAX_BW-1, BWU ), AF( ODD_SIZE*BWU+MBW2+1- $ ( MIN( MAX_BW-1, BWU ) ) ), MAX_BW+1, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL DLACPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW ) * CALL DLACPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, COMM_PROC ) * CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL DTBTRS( 'L', 'N', 'U', BWU, MIN( BWL, BWU-1 ), BWU, $ AF( ODD_SIZE*BWU+MBW2+1+( MAX_BW+1 )*( MAX_BW- $ BWU ) ), MAX_BW+1, AF( WORK_U+ODD_SIZE*BWL+1+ $ MAX_BW-BWU ), MAX_BW, INFO ) * * Modify lower off_diagonal block with diagonal block * * CALL DTBTRS( 'U', 'T', 'N', BWL, MIN( BWU, BWL-1 ), BWL, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ BWL-1 )+( MAX_BW+1 )*( MAX_BW-BWL ) ), $ MAX_BW+1, AF( ODD_SIZE*BWU+1+MAX_BW-BWL ), $ MAX_BW, INFO ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST / 2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * * * Since DTBTRS has no "left-right" option, we must transpose * CALL DLATCPY( 'N', MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW ) * CALL DTBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ BWL, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ WORK( 1+MAX_BW*( MAX_BW-BWL ) ), MAX_BW, INFO ) * * Transpose back * CALL DLATCPY( 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW ) * * * * Since DTBTRS has no "left-right" option, we must transpose * CALL DLATCPY( 'N', MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW ) * CALL DTBTRS( 'U', 'T', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ BWU, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ WORK( 1+MAX_BW*( MAX_BW-BWU ) ), MAX_BW, INFO ) * * Transpose back * CALL DLATCPY( 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'T', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+( ODD_SIZE )*BWL+2*MBW2+1 ), MAX_BW, $ ZERO, WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL DGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ AF( ODD_SIZE*BWU+1 ), MAX_BW, ZERO, WORK( 1 ), $ MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ COMM_PROC ) * CALL DGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 120 CONTINUE * * 130 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 140 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PDDBTRF * END scalapack-1.8.0/SRC/pdsygst.f0000640000175000017500000004204010363532303015607 0ustar muammarmuammar* * SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDSYGST reduces a real symmetric-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**T or L**T*sub( A )*L. * * sub( B ) must have been previously factorized as U**T*U or L*L**T by * PDPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T); * = 2 or 3: compute U*sub( A )*U**T or L**T*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**T*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**T. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PDPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) DOUBLE PRECISION * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, HALF PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, K, KB, MYCOL, MYROW, NB, $ NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDSYGS2, $ PDSYMM, PDSYR2K, PDTRMM, PDTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * SCALE = ONE * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN * * Compute inv(U')*sub( A )*inv(U) * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 10 CONTINUE * * Update the upper triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PDSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, IB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PDTRSM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, $ N-K-KB+1, ONE, B, IB+K-1, JB+K-1, DESCB, A, $ IA+K-1, JA+K+KB-1, DESCA ) CALL PDSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, ONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PDSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, A, $ IA+K-1, JA+K+KB-1, DESCA, B, IB+K-1, $ JB+K+KB-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PDSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, ONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PDTRSM( 'Right', UPLO, 'No transpose', 'Non-unit', $ KB, N-K-KB+1, ONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 10 * ELSE * * Compute inv(L)*sub( A )*inv(L') * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 20 CONTINUE * * Update the lower triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PDSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PDTRSM( 'Right', UPLO, 'Transpose', 'Non-unit', $ N-K-KB+1, KB, ONE, B, IB+K-1, JB+K-1, DESCB, $ A, IA+K+KB-1, JA+K-1, DESCA ) CALL PDSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, ONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PDSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, -ONE, $ A, IA+K+KB-1, JA+K-1, DESCA, B, IB+K+KB-1, $ JB+K-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PDSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, ONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PDTRSM( 'Left', UPLO, 'No transpose', 'Non-unit', $ N-K-KB+1, KB, ONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 20 * END IF * ELSE * IF( UPPER ) THEN * * Compute U*sub( A )*U' * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 30 CONTINUE * * Update the upper triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PDTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', K-1, $ KB, ONE, B, IB, JB, DESCB, A, IA, JA+K-1, $ DESCA ) CALL PDSYMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA+K-1, DESCA ) CALL PDSYR2K( UPLO, 'No transpose', K-1, KB, ONE, A, IA, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA, DESCA ) CALL PDSYMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA+K-1, DESCA ) CALL PDTRMM( 'Right', UPLO, 'Transpose', 'Non-unit', K-1, $ KB, ONE, B, IB+K-1, JB+K-1, DESCB, A, IA, $ JA+K-1, DESCA ) CALL PDSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 30 * ELSE * * Compute L'*sub( A )*L * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 40 CONTINUE * * Update the lower triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PDTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', KB, $ K-1, ONE, B, IB, JB, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PDSYMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA+K-1, $ JA, DESCA ) CALL PDSYR2K( UPLO, 'Transpose', K-1, KB, ONE, A, IA+K-1, $ JA, DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA, $ JA, DESCA ) CALL PDSYMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, ONE, A, IA+K-1, $ JA, DESCA ) CALL PDTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, K-1, $ ONE, B, IB+K-1, JB+K-1, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PDSYGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 40 * END IF * END IF * RETURN * * End of PDSYGST * END scalapack-1.8.0/SRC/pdlahqr.f0000640000175000017500000026577010602576752015602 0ustar muammarmuammar SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, $ ILWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.3) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * 1.7.3: March 22, 2006 * modification suggested by Mark Fahey and Greg Henry * 1.7.1: January 30, 2006 * 1.7.0: December 31, 1998 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) DOUBLE PRECISION A( * ), WI( * ), WORK( * ), WR( * ), Z( * ) * .. * * Purpose * ======= * * PDLAHQR is an auxiliary routine used to find the Schur decomposition * and or eigenvalues of a matrix already in Hessenberg form from * cols ILO to IHI. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * WANTT (global input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (global input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (global input) INTEGER * The order of the Hessenberg matrix A (and Z if WANTZ). * N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that A is already upper quasi-triangular in * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless * ILO = 1). PDLAHQR works primarily with the Hessenberg * submatrix in rows and columns ILO to IHI, but applies * transformations to all of H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * A (global input/output) DOUBLE PRECISION array, dimension * (DESCA(LLD_),*) * On entry, the upper Hessenberg matrix A. * On exit, if WANTT is .TRUE., A is upper quasi-triangular in * rows and columns ILO:IHI, with any 2-by-2 or larger diagonal * blocks not yet in standard form. If WANTT is .FALSE., the * contents of A are unspecified on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WR (global replicated output) DOUBLE PRECISION array, * dimension (N) * WI (global replicated output) DOUBLE PRECISION array, * dimension (N) * The real and imaginary parts, respectively, of the computed * eigenvalues ILO to IHI are stored in the corresponding * elements of WR and WI. If two eigenvalues are computed as a * complex conjugate pair, they are stored in consecutive * elements of WR and WI, say the i-th and (i+1)th, with * WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in A. A may be returned with * larger diagonal blocks until the next release. * * ILOZ (global input) INTEGER * IHIZ (global input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (global input/output) DOUBLE PRECISION array. * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations accumulated by PDHSEQR, and on * exit Z has been updated; transformations are applied only to * the submatrix Z(ILOZ:IHIZ,ILO:IHI). * If WANTZ is .FALSE., Z is not referenced. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local output) DOUBLE PRECISION array of size LWORK * * LWORK (local input) INTEGER * WORK(LWORK) is a local array and LWORK is assumed big enough * so that LWORK >= 3*N + * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N), * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) ) * * IWORK (global and local input) INTEGER array of size ILWORK * * ILWORK (local input) INTEGER * This holds the some of the IBLK integer arrays. This is held * as a place holder for the next release. * * INFO (global output) INTEGER * < 0: parameter number -INFO incorrect or inconsistent * = 0: successful exit * > 0: PDLAHQR failed to compute all the eigenvalues ILO to IHI * in a total of 30*(IHI-ILO+1) iterations; if INFO = i, * elements i+1:ihi of WR and WI contain those eigenvalues * which have been successfully computed. * * Logic: * This algorithm is very similar to _LAHQR. Unlike _LAHQR, * instead of sending one double shift through the largest * unreduced submatrix, this algorithm sends multiple double shifts * and spaces them apart so that there can be parallelism across * several processor row/columns. Another critical difference is * that this algorithm aggregrates multiple transforms together in * order to apply them in a block fashion. * * Important Local Variables: * IBLK = The maximum number of bulges that can be computed. * Currently fixed. Future releases this won't be fixed. * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) * ROTN = The number of transforms to block together * NBULGE = The number of bulges that will be attempted on the * current submatrix. * IBULGE = The current number of bulges started. * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). * * Subroutines: * This routine calls: * PDLACONSB -> To determine where to start each iteration * PDLAWIL -> Given the shift, get the transformation * DLASORTE -> Pair up eigenvalues so that reals are paired. * PDLACP3 -> Parallel array to local replicated array copy & * back. * DLAREF -> Row/column reflector applier. Core routine * here. * PDLASMSUB -> Finds negligible subdiagonal elements. * * Current Notes and/or Restrictions: * 1.) This code requires the distributed block size to be square * and at least six (6); unlike simpler codes like LU, this * algorithm is extremely sensitive to block size. Unwise * choices of too small a block size can lead to bad * performance. * 2.) This code requires A and Z to be distributed identically * and have identical contxts. * 3.) This release currently does not have a routine for * resolving the Schur blocks into regular 2x2 form after * this code is completed. Because of this, a significant * performance impact is required while the deflation is done * by sometimes a single column of processors. * 4.) This code does not currently block the initial transforms * so that none of the rows or columns for any bulge are * completed until all are started. To offset pipeline * start-up it is recommended that at least 2*LCM(NPROW,NPCOL) * bulges are used (if possible) * 5.) The maximum number of bulges currently supported is fixed at * 32. In future versions this will be limited only by the * incoming WORK array. * 6.) The matrix A must be in upper Hessenberg form. If elements * below the subdiagonal are nonzero, the resulting transforms * may be nonsimilar. This is also true with the LAPACK * routine. * 7.) For this release, it is assumed RSRC_=CSRC_=0 * 8.) Currently, all the eigenvalues are distributed to all the * nodes. Future releases will probably distribute the * eigenvalues by the column partitioning. * 9.) The internals of this routine are subject to change. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 ) DOUBLE PRECISION CONST PARAMETER ( CONST = 1.50D+0 ) INTEGER IBLK PARAMETER ( IBLK = 32 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE, $ ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II, $ IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART, $ ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP, $ ITERMAX, ITMP1, ITMP2, ITN, ITS, J, JAFIRST, $ JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ, LEFT, $ LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1, LOCALI2, $ LOCALK, LOCALM, M, MODKM1, MYCOL, MYROW, $ NBULGE, NH, NODE, NPCOL, NPROW, NR, NUM, NZ, $ RIGHT, ROTN, UP, VECSIDX DOUBLE PRECISION AVE, DISC, H00, H10, H11, H12, H21, H22, H33, $ H43H34, H44, OVFL, S, SMLNUM, SUM, T1, T1COPY, $ T2, T3, ULP, UNFL, V1SAVE, V2, V2SAVE, V3, $ V3SAVE * .. * .. Local Arrays .. INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ), $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ), $ KP2ROW( IBLK ), KROW( IBLK ), LOCALK2( IBLK ) DOUBLE PRECISION S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ), $ VCOPY( 3 ) * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, DGSUM2D, DLAHQR, DLAREF, $ DLARFG, DLASORTE, IGAMN2D, INFOG1L, INFOG2L, $ PDLABAD, PDLACONSB, PDLACP3, PDLASMSUB, $ PDLAWIL, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SIGN, SQRT * .. * .. Executable Statements .. * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) * ITERMAX = 0 IF( N.EQ.0 ) $ RETURN * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) IAFIRST = DESCA( RSRC_ ) JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) LCMRC = ILCM( NPROW, NPCOL ) * * Determine the number of columns we have so we can check workspace * LOCALK = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL ) JJ = N / HBL IF( JJ*HBL.LT.N ) $ JJ = JJ + 1 JJ = 7*JJ / LCMRC IF( LWORK.LT.3*N+MAX( 2*MAX( LDA, LDZ )+2*LOCALK, JJ ) ) THEN INFO = -15 END IF IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) END IF IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1300+NB_ ) END IF IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1300+MB_ ) END IF IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN INFO = -( 700+RSRC_ ) END IF IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN INFO = -( 1300+RSRC_ ) END IF IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN INFO = -4 END IF IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN INFO = -5 END IF IF( HBL.LT.5 ) THEN INFO = -( 700+MB_ ) END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PDLAHQR', -INFO ) RETURN END IF * * Set work array indices * VECSIDX = 0 IDIA = 3*N ISUB = 3*N ISUP = 3*N IRBUF = 3*N ICBUF = 3*N * * Find a value for ROTN * ROTN = HBL / 3 ROTN = MAX( ROTN, HBL-2 ) ROTN = MIN( ROTN, 1 ) * IF( ILO.EQ.IHI ) THEN CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN WR( ILO ) = A( ( ICOL-1 )*LDA+IROW ) ELSE WR( ILO ) = ZERO END IF WI( ILO ) = ZERO RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, 0, LILOZ, LIHIZ ) LIHIZ = NUMROC( IHIZ, HBL, MYROW, 0, NPROW ) * * Set machine-dependent constants for the stopping criterion. * If NORM(H) <= SQRT(OVFL), overflow should not occur. * UNFL = PDLAMCH( CONTXT, 'SAFE MINIMUM' ) OVFL = ONE / UNFL CALL PDLABAD( CONTXT, UNFL, OVFL ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = ITERMAX * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of our schur block size (<=2*IBLK). Each * iteration of the loop works with the active submatrix in rows * and columns L to I. Eigenvalues I+1 to IHI have already * converged. Either L = ILO or the global A(L,L-1) is negligible * so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 450 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 420 ITS = 0, ITN * * Look for a single small subdiagonal element. * CALL PDLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ), $ LWORK-IRBUF ) L = K * IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN A( ( ICOL-1 )*LDA+IROW ) = ZERO END IF WORK( ISUB+L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * M = L - 10 * IF ( L .GE. I - (2*IBLK-1) ) * IF ( L .GE. I - MAX(2*IBLK-1,HBL) ) IF( L.GE.I-1 ) $ GO TO 430 * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * * Copy submatrix of size 2*JBLK and prepare to do generalized * Wilkinson shift or an exceptional shift * JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 ) IF( JBLK.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * JBLK = JBLK - MOD( JBLK, LCMRC ) END IF JBLK = MIN( JBLK, 2*LCMRC ) JBLK = MAX( JBLK, 1 ) * CALL PDLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1, $ 0 ) IF( ITS.EQ.20 .OR. ITS.EQ.40 ) THEN * * Exceptional shift. * DO 20 II = 2*JBLK, 2, -1 S1( II, II ) = CONST*( ABS( S1( II, II ) )+ $ ABS( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE S1( 1, 1 ) = CONST*ABS( S1( 1, 1 ) ) ELSE CALL DLAHQR( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1, $ 2*IBLK, WORK( IRBUF+1 ), WORK( ICBUF+1 ), 1, $ 2*JBLK, Z, LDZ, IERR ) * * Prepare to use Wilkinson's double shift * H44 = S1( 2*JBLK, 2*JBLK ) H33 = S1( 2*JBLK-1, 2*JBLK-1 ) H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 ) IF( ( JBLK.GT.1 ) .AND. ( ITS.GT.30 ) ) THEN S = S1( 2*JBLK-1, 2*JBLK-2 ) DISC = ( H33-H44 )*HALF DISC = DISC*DISC + H43H34 IF( DISC.GT.ZERO ) THEN * * Real roots: Use Wilkinson's shift twice * DISC = SQRT( DISC ) AVE = HALF*( H33+H44 ) IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN H33 = H33*H44 - H43H34 H44 = H33 / ( SIGN( DISC, AVE )+AVE ) ELSE H44 = SIGN( DISC, AVE ) + AVE END IF H33 = H44 H43H34 = ZERO END IF END IF END IF * * Look for two consecutive small subdiagonal elements: * PDLACONSB is the routine that does this. * CALL PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, $ WORK( IRBUF+1 ), LWORK-IRBUF ) * * Skip small submatrices * * IF ( M .GE. I - 5 ) * $ GO TO 80 * * Double-shift QR step * * NBULGE is the number of bulges that will be attempted * ISTOP = MIN( M+ROTN-MOD( M, ROTN ), I-2 ) ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) ) ISTOP = MIN( ISTOP, I2-2 ) ISTOP = MAX( ISTOP, M ) NBULGE = ( I-1-ISTOP ) / HBL * * Do not exceed maximum determined. * NBULGE = MIN( NBULGE, JBLK ) IF( NBULGE.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * NBULGE = NBULGE - MOD( NBULGE, LCMRC ) END IF NBULGE = MAX( NBULGE, 1 ) * IF( ( ITS.NE.20 ) .AND. ( ITS.NE.40 ) .AND. ( NBULGE.GT.1 ) ) $ THEN * * sort the eigenpairs so that they are in twos for double * shifts. only call if several need sorting * CALL DLASORTE( S1( 2*( JBLK-NBULGE )+1, $ 2*( JBLK-NBULGE )+1 ), 2*IBLK, 2*NBULGE, $ WORK( IRBUF+1 ), IERR ) END IF * * IBULGE is the number of bulges going so far * IBULGE = 1 * * "A" row defs : main row transforms from LOCALK to LOCALI2 * CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, ITMP1, LOCALK ) LOCALK = NUMROC( N, HBL, MYCOL, 0, NPCOL ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ICOL1, LOCALI2 ) LOCALI2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL ) * * "A" col defs : main col transforms from LOCALI1 to LOCALM * CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, LOCALI1, ICOL1 ) ICOL1 = NUMROC( N, HBL, MYROW, 0, NPROW ) CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, LOCALM, ICOL1 ) ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, 0, NPROW ) * * Which row & column will start the bulges * ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST * CALL INFOG1L( M, HBL, NPROW, MYROW, 0, II, ITMP2 ) ITMP2 = NUMROC( N, HBL, MYROW, 0, NPROW ) CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, JJ, ITMP2 ) ITMP2 = NUMROC( N, HBL, MYCOL, 0, NPCOL ) CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, ISTOP, KP2ROW( 1 ) ) KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, 0, NPROW ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ISTOP, KP2COL( 1 ) ) KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, 0, NPCOL ) * * Set all values for bulges. All bulges are stored in * intermediate steps as loops over KI. Their current "task" * over the global M to I-1 values is always K1(KI) to K2(KI). * However, because there are many bulges, K1(KI) & K2(KI) might * go past that range while later bulges (KI+1,KI+2,etc..) are * finishing up. * * Rules: * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPTTRF and this is stored in AF. If a linear system * is to be solved using PCPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CGEMM, CGERV2D, CGESD2D, CLACPY, CMATADD, $ CTBTRS, CTRMM, CTRTRS, DESC_CONVERT, GLOBCHK, $ PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -4 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PCPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PCPTTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1105 PARAM_CHECK( 15, 2 ) = 1104 PARAM_CHECK( 14, 2 ) = 1103 PARAM_CHECK( 13, 2 ) = 1102 PARAM_CHECK( 12, 2 ) = 1101 PARAM_CHECK( 11, 2 ) = 10 PARAM_CHECK( 10, 2 ) = 805 PARAM_CHECK( 9, 2 ) = 804 PARAM_CHECK( 8, 2 ) = 803 PARAM_CHECK( 7, 2 ) = 801 PARAM_CHECK( 6, 2 ) = 7 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CPTTRSV( UPLO, 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', 1, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTRTRS( 'L', 'C', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, 1, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-1 ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -CONJG( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CPTTRSV( UPLO, 'C', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CPTTRSV( UPLO, 'C', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -CONJG( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'T', 'N', 1, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTRTRS( 'L', 'C', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'C', 'N', ODD_SIZE, NRHS, 1, -CONE, AF( 1 ), $ INT_ONE, WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL CAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CPTTRSV( UPLO, 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCPTTRSV * END scalapack-1.8.0/SRC/pdposvx.f0000640000175000017500000006575710363532303015641 0ustar muammarmuammar SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, $ LWORK, N, NRHS DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IWORK( * ) DOUBLE PRECISION A( * ), AF( * ), $ B( * ), BERR( * ), FERR( * ), $ SC( * ), SR( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PDPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to * compute the solution to a real system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. In the following comments Y denotes Y(IY:IY+M-1,JY:JY+K-1) * a M-by-K matrix where Y can be A, AF, B and X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * diag(SR) * A * diag(SC) * inv(diag(SC)) * X = diag(SR) * B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(SR)*A*diag(SC) and B by diag(SR)*B. * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as * A = U**T* U, if UPLO = 'U', or * A = L * L**T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If equilibration was used, the matrix X is premultiplied by * diag(SR) so that it solves the original system before * equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix A is * supplied on entry, and if not, whether the matrix A should be * equilibrated before it is factored. * = 'F': On entry, AF contains the factored form of A. * If EQUED = 'Y', the matrix A has been equilibrated * with scaling factors given by S. A and AF will not * be modified. * = 'N': The matrix A will be copied to AF and factored. * = 'E': The matrix A will be equilibrated if necessary, then * copied to AF and factored. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A is stored; * = 'L': Lower triangle of A is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices B and X. NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). * On entry, the symmetric matrix A, except if FACT = 'F' and * EQUED = 'Y', then A must contain the equilibrated matrix * diag(SR)*A*diag(SC). If UPLO = 'U', the leading * N-by-N upper triangular part of A contains the upper * triangular part of the matrix A, and the strictly lower * triangular part of A is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper * triangular part of A is not referenced. A is not modified if * FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. * * On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by * diag(SR)*A*diag(SC). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) DOUBLE PRECISION pointer * into the local memory to an array of local dimension * ( LLD_AF, LOCc(JA+N-1)). * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(SR)*A*diag(SC). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * EQUED (global input/global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'Y': Equilibration was done, i.e., A has been replaced by * diag(SR) * A * diag(SC). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * SR (local input/local output) DOUBLE PRECISION array, * dimension (LLD_A) * The scale factors for A distributed across process rows; * not accessed if EQUED = 'N'. SR is an input variable if * FACT = 'F'; otherwise, SR is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SR must be * positive. * * SC (local input/local output) DOUBLE PRECISION array, * dimension (LOC(N_A)) * The scale factors for A distributed across * process columns; not accessed if EQUED = 'N'. SC is an input * variable if FACT = 'F'; otherwise, SC is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SC must be * positive. * * B (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of local dimension * ( LLD_B, LOCc(JB+NRHS-1) ). * On entry, the N-by-NRHS right-hand side matrix B. * On exit, if EQUED = 'N', B is not modified; if TRANS = 'N' * and EQUED = 'R' or 'B', B is overwritten by diag(R)*B; if * TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is overwritten * by diag(C)*B. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of local dimension * ( LLD_X, LOCc(JX+NRHS-1) ). * If INFO = 0, the N-by-NRHS solution matrix X to the original * system of equations. Note that A and B are modified on exit * if EQUED .ne. 'N', and the solution to the equilibrated * system is inv(diag(SC))*X if TRANS = 'N' and EQUED = 'C' or * 'B', or inv(diag(SR))*X if TRANS = 'T' or 'C' and EQUED = 'R' * or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) DOUBLE PRECISION * The estimate of the reciprocal condition number of the matrix * A after equilibration (if done). If RCOND is less than the * machine precision (in particular, if RCOND = 0), the matrix * is singular to working precision. This condition is * indicated by a return code of INFO > 0, and the solution and * error bounds are not computed. * * FERR (local output) DOUBLE PRECISION array, dimension (LOC(N_B)) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution, FERR(j) bounds the magnitude * of the largest entry in (X(j) - XTRUE) divided by * the magnitude of the largest entry in X(j). The quality of * the error bound depends on the quality of the estimate of * norm(inv(A)) computed in the code; if the estimate of * norm(inv(A)) is accurate, the error bound is guaranteed. * * BERR (local output) DOUBLE PRECISION array, dimension (LOC(N_B)) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A or B that makes X(j) an exact solution). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PDPOCON( LWORK ), PDPORFS( LWORK ) ) * + LOCr( N_A ). * LWORK = 3*DESCA( LLD_ ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK = DESCA( LLD_ ) * LIWORK = LOCr(N_A). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: if INFO = i, the leading minor of order i of A * is not positive definite, so the factorization * could not be completed, and the solution and error * bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the matrix * is singular to working precision, and the solution * and error bounds have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL EQUIL, LQUERY, NOFACT, RCEQU INTEGER I, IACOL, IAROW, IAFROW, IBROW, IBCOL, ICOFF, $ ICOFFA, ICTXT, IDUMM, IIA, IIB, IIX, INFEQU, $ IROFF, IROFFA, IROFFAF, IROFFB, IROFFX, IXCOL, $ IXROW, J, JJA, JJB, JJX, LDB, LDX, LIWMIN, $ LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NRHSQ, $ NQ DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, $ DGAMN2D, DGAMX2D, INFOG2L, $ PDPOCON, PDPOEQU, PDPORFS, $ PDPOTRF, $ PDPOTRS, PDLACPY, PDLAQSY, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL PDLAMCH, INDXG2P, LSAME, NUMROC, PDLANSY * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA LWMIN = 3*DESCA( LLD_ ) LIWMIN = NP NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' RCEQU = .FALSE. ELSE RCEQU = LSAME( EQUED, 'Y' ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( RCEQU ) THEN * SMIN = BIGNUM SMAX = ZERO DO 10 J = IIA, IIA + NP - 1 SMIN = MIN( SMIN, SR( J ) ) SMAX = MAX( SMAX, SR( J ) ) 10 CONTINUE CALL DGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, SMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, SMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( SMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) ELSE SCOND = ONE END IF END IF END IF END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( UPLO ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 13 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 28 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 5, IDUM1, IDUM2, $ INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 28 IF( LIWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 4, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFEQU ) * IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) RCEQU = LSAME( EQUED, 'Y' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) LDB = DESCB( LLD_ ) IROFF = MOD( IB-1, DESCB( MB_ ) ) ICOFF = MOD( JB-1, DESCB( NB_ ) ) NP = NUMROC( N+IROFF, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 30 J = JJB, JJB+NRHSQ-1 DO 20 I = IIB, IIB+NP-1 B( I + ( J-1 )*LDB ) = SR( I )*B( I + ( J-1 )*LDB ) 20 CONTINUE 30 CONTINUE END IF * IF( NOFACT .OR. EQUIL ) THEN * * Compute the Cholesky factorization A = U'*U or A = L*L'. * CALL PDLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PDPOTRF( UPLO, N, AF, IAF, JAF, DESCAF, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * ANORM = PDLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK ) * * Compute the reciprocal of the condition number of A. * CALL PDPOCON( UPLO, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PDLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PDPOTRS( UPLO, N, NRHS, AF, IAF, JAF, DESCAF, X, IX, JX, $ DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX( MB_ ) ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 50 J = JJX, JJX+NRHSQ-1 DO 40 I = IIX, IIX+NP-1 X( I + ( J-1 )*LDX ) = SR( I )*X( I + ( J-1 )*LDX ) 40 CONTINUE 50 CONTINUE DO 60 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / SCOND 60 CONTINUE END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN RETURN * * End of PDPOSVX * END scalapack-1.8.0/SRC/pcpotf2.f0000640000175000017500000003133410363532303015473 0ustar muammarmuammar SUBROUTINE PCPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCPOTF2 computes the Cholesky factorization of a complex hermitian * positive definite distributed matrix sub( A )=A(IA:IA+N-1,JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) COMPLEX CONE PARAMETER ( CONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURR, IDIAG, IIA, $ IOFFA, IROFF, J, JJA, LDA, MYCOL, MYROW, $ NPCOL, NPROW REAL AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, CGEMV, $ CLACGV, CSSCAL, IGEBR2D, IGEBS2D, $ INFOG2L, PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL, SQRT * .. * .. External Functions .. LOGICAL LSAME COMPLEX CDOTC EXTERNAL LSAME, CDOTC * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Compute local information * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF ( UPPER ) THEN * * Process (IAROW, IACOL) owns block to be factorized * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN * * Compute the Cholesky factorization A = U'*U. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 10 J = JA, JA+N-1 * * Compute U(J,J) and test for non-positive-definiteness. * AJJ = REAL( A( IDIAG ) ) - $ CDOTC( J-JA, A( IOFFA ), 1, A( IOFFA ), 1 ) IF( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 20 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of row J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + LDA CALL CLACGV( J-JA, A( IOFFA ), 1 ) CALL CGEMV( 'Transpose', J-JA, JA+N-J-1, -CONE, $ A( IOFFA+LDA ), LDA, A( IOFFA ), 1, $ CONE, A( ICURR ), LDA ) CALL CLACGV( J-JA, A( IOFFA ), 1 ) CALL CSSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), $ LDA ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE * 20 CONTINUE * * Broadcast INFO to all processes in my IAROW. * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) END IF * * IAROW bcasts along columns so that everyone has INFO * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1, $ IAROW, MYCOL ) * END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Compute the Cholesky factorization A = L*L'. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 30 J = JA, JA+N-1 * * Compute L(J,J) and test for non-positive-definiteness. * AJJ = REAL( A( IDIAG ) ) - $ CDOTC( J-JA, A( IOFFA ), LDA, A( IOFFA ), LDA ) IF ( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 40 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of column J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + 1 CALL CLACGV( J-JA, A( IOFFA ), LDA ) CALL CGEMV( 'No transpose', JA+N-J-1, J-JA, -CONE, $ A( IOFFA+1 ), LDA, A( IOFFA ), LDA, $ CONE, A( ICURR ), 1 ) CALL CLACGV( J-JA, A( IOFFA ), LDA ) CALL CSSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), 1 ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 30 CONTINUE * 40 CONTINUE * * Broadcast INFO to everyone in IACOL * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1, IAROW, MYCOL ) * END IF * * IACOL bcasts INFO along rows so that everyone has it * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) * END IF * END IF * RETURN * * End of PCPOTF2 * END scalapack-1.8.0/SRC/psdbtrsv.f0000640000175000017500000014452010363532303015767 0ustar muammarmuammar SUBROUTINE PSDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSDBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Gaussian elimination code PD@(dom_pre)BTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PSDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDBTRF and this is stored in AF. If a linear system * is to be solved using PSDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MAX_BW, MBW2, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 18, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ SGEMM, SGERV2D, SGESD2D, SLACPY, SMATADD, $ STBTRS, STRMM, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 9*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 12*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 12*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 12*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 12*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX( BWL, BWU ) MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 9*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -4 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -5 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 9*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 9*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 12*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 12*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -6 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -8 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 9*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PSDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PSDBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = MAX( BWL, BWU )*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PSDBTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 18, 1 ) = DESCB( 5 ) PARAM_CHECK( 17, 1 ) = DESCB( 4 ) PARAM_CHECK( 16, 1 ) = DESCB( 3 ) PARAM_CHECK( 15, 1 ) = DESCB( 2 ) PARAM_CHECK( 14, 1 ) = DESCB( 1 ) PARAM_CHECK( 13, 1 ) = IB PARAM_CHECK( 12, 1 ) = DESCA( 5 ) PARAM_CHECK( 11, 1 ) = DESCA( 4 ) PARAM_CHECK( 10, 1 ) = DESCA( 3 ) PARAM_CHECK( 9, 1 ) = DESCA( 1 ) PARAM_CHECK( 8, 1 ) = JA PARAM_CHECK( 7, 1 ) = NRHS PARAM_CHECK( 6, 1 ) = BWU PARAM_CHECK( 5, 1 ) = BWL PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 18, 2 ) = 1205 PARAM_CHECK( 17, 2 ) = 1204 PARAM_CHECK( 16, 2 ) = 1203 PARAM_CHECK( 15, 2 ) = 1202 PARAM_CHECK( 14, 2 ) = 1201 PARAM_CHECK( 13, 2 ) = 11 PARAM_CHECK( 12, 2 ) = 905 PARAM_CHECK( 11, 2 ) = 904 PARAM_CHECK( 10, 2 ) = 903 PARAM_CHECK( 9, 2 ) = 901 PARAM_CHECK( 8, 2 ) = 8 PARAM_CHECK( 7, 2 ) = 6 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 18, PARAM_CHECK, 18, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 200 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW END IF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'N', 'U', ODD_SIZE, BWL, NRHS, $ A( OFST+1+BWU ), LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BWL, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWL+1 ), LLDB, $ WORK( 1 ), MAX_BW ) * CALL STRMM( 'L', 'U', 'N', 'N', BWL, NRHS, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, WORK( 1 ), MAX_BW ) * CALL SMATADD( BWL, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Clear garbage out of workspace block * DO 10 IDUM1 = 1, WORK_SIZE_MIN WORK( IDUM1 ) = 0.0 10 CONTINUE * * IF( MYCOL.NE.0 ) THEN * * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'N', 'N', BWU, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ BWU, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+MAX_BW-BWU ), MAX_BW ) * END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 40 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 20 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 30 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 20 30 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 40 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 90 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 50 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 60 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 50 60 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, WORK( 1 ), $ MAX_BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STBTRS( 'L', 'T', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * ***Modification Loop ******* * 70 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 80 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 70 80 CONTINUE * [End of GOTO Loop] * 90 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'T', 'N', ODD_SIZE, NRHS, BWU, -ONE, AF( 1 ), $ BWU, WORK( 1+MAX_BW-BWU ), MAX_BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BWL, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL STRMM( 'L', 'U', 'T', 'N', BWL, NRHS, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, WORK( 1+MAX_BW-BWL ), $ MAX_BW ) * CALL SMATADD( BWL, NRHS, ONE, WORK( 1+MAX_BW-BWL ), $ MAX_BW, ONE, B( PART_OFFSET+ODD_SIZE-BWL+ $ 1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'T', 'U', ODD_SIZE, BWL, NRHS, $ A( OFST+1+BWU ), LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'T', 'N', ODD_SIZE, BWU, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BWU, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWU+1 ), LLDB, $ WORK( 1 ), MAX_BW ) * CALL STRMM( 'L', 'L', 'T', 'N', BWU, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1 ), MAX_BW ) * CALL SMATADD( BWU, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Clear garbage out of workspace block * DO 100 IDUM1 = 1, WORK_SIZE_MIN WORK( IDUM1 ) = 0.0 100 CONTINUE * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'N', 'N', BWL, NRHS, ODD_SIZE, -ONE, $ AF( WORK_U+1 ), BWL, B( PART_OFFSET+1 ), $ LLDB, ZERO, WORK( 1+MAX_BW-BWL ), MAX_BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 130 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 110 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 120 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 110 120 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STBTRS( 'U', 'T', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 130 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 180 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 140 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 140 150 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STBTRS( 'U', 'N', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * ***Modification Loop ******* * 160 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 170 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 160 170 CONTINUE * [End of GOTO Loop] * 180 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'T', 'N', ODD_SIZE, NRHS, BWL, -ONE, $ AF( WORK_U+1 ), BWL, WORK( 1+MAX_BW-BWL ), $ MAX_BW, ONE, B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BWU, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL STRMM( 'L', 'L', 'N', 'N', BWU, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL SMATADD( BWU, NRHS, ONE, WORK( 1+MAX_BW-BWU ), $ MAX_BW+BWL, ONE, B( PART_OFFSET+ODD_SIZE- $ BWU+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'N', 'N', ODD_SIZE, BWU, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 190 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 200 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSDBTRSV * END scalapack-1.8.0/SRC/pdormr3.f0000640000175000017500000003512610363532303015507 0ustar muammarmuammar SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMR3 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JAA, JCC, LCM, $ LCMP, LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDLARZ, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( L.LT.0 .OR. L.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic+i-ia:icc+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * CALL PDLARZ( SIDE, MI, NI, L, A, I, JAA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMR3 * END scalapack-1.8.0/SRC/pdlase2.f0000640000175000017500000003741610363532303015457 0ustar muammarmuammar SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASE2 initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. PDLASE2 requires that only dimension of the matrix * operand is distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) DOUBLE PRECISION * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) DOUBLE PRECISION * The constant to which the diagonal elements are to be set. * * A (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, ICOFFA, II, IIA, $ IIBEG, IIEND, IINXT, ILEFT, IRIGHT, IROFFA, $ ITOP, JJ, JJA, JJBEG, JJEND, JJNXT, LDA, MBA, $ MP, MPA, MYCOL, MYDIST, MYROW, NBA, NPCOL, $ NPROW, NQ, NQA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DLASET, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MB_A * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MPA.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEG = IIA IIEND = IIA + MPA - 1 IINXT = MIN( ICEIL( IIBEG, MBA ) * MBA, IIEND ) * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL DLASET( UPLO, IINXT-IIBEG+1, N-ITOP, ALPHA, BETA, $ A( IIBEG+(JJA+ITOP-1)*LDA ), LDA ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEG = IINXT +1 IINXT = MIN( IINXT+MBA, IIEND ) GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL DLASET( 'All', MP, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL DLASET( UPLO, MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 30 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL DLASET( 'All', MPA, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( IIA+(JJ-1)*LDA ), LDA ) CALL DLASET( 'All', MPA-MP, HEIGHT, ALPHA, ALPHA, $ A( IIA+(JJA+ITOP-1)*LDA ), LDA ) CALL DLASET( 'All', MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 30 END IF * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQA.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEG = JJA JJEND = JJA + NQA - 1 JJNXT = MIN( ICEIL( JJBEG, NBA ) * NBA, JJEND ) * 40 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL DLASET( UPLO, M-ILEFT, JJNXT-JJBEG+1, ALPHA, $ BETA, A( IIA+ILEFT+(JJBEG-1)*LDA ), LDA ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBA, JJEND ) GO TO 40 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 50 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL DLASET( 'All', ILEFT-II+IIA, NQ, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL DLASET( UPLO, WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 50 END IF * ELSE * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 60 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL DLASET( 'All', ILEFT-II+IIA, NQA, ALPHA, ALPHA, $ A( II+(JJA-1)*LDA ), LDA ) CALL DLASET( 'All', WIDE, NQA-NQ, ALPHA, ALPHA, $ A( IIA+ILEFT+(JJA-1)*LDA ), LDA ) CALL DLASET( 'All', WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 60 END IF * END IF * END IF * END IF * RETURN * * End of PDLASE2 * END scalapack-1.8.0/SRC/pzgeqr2.f0000640000175000017500000002756410363532303015522 0ustar muammarmuammar SUBROUTINE PZGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQR2 computes a QR factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1), and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, II, IACOL, IAROW, ICTXT, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX*16 AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLARFC, PZLARFG, ZGEBR2D, ZGEBS2D, $ ZLARFG, ZSCAL * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) I = II+(JJ-1)*DESCA( LLD_ ) IF( MYCOL.EQ.IACOL ) THEN AJJ = A( I ) CALL ZLARFG( 1, AJJ, A( I ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - DCONJG( TAU( JJ ) ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL ZSCAL( NQ-JJ, ALPHA, A( I+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL ZSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), 1, $ IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA, JA+K-1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(i+1:ia+m-1,j) * CALL PZLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(j)' to A(i:ia+m-1,j+1:ja+n-1) from the left * CALL PZELSET( A, I, J, DESCA, ONE ) * CALL PZLARFC( 'Left', M-J+JA, N-J+JA-1, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK ) END IF CALL PZELSET( A, I, J, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEQR2 * END scalapack-1.8.0/SRC/pdgerqf.f0000640000175000017500000002721210363532303015546 0ustar muammarmuammar SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGERQF computes a RQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDGERQ2, $ PDLARFB, PDLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( IL.GE.IN+1 ) THEN * * Use blocked code initially * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute the RQ factorization of the current block * A(i:i+ib-1,ja:ja+n-m+i+ib-ia-1) * CALL PDGERQ2( IB, N-M+I+IB-IA, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PDLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, $ I, JA, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the * right * CALL PDLARFB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = IN - IA + 1 NU = N - M + IN - IA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PDGERQ2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGERQF * END scalapack-1.8.0/SRC/pdormr2.f0000640000175000017500000003452610363532303015511 0ustar muammarmuammar SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMR2 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IROFFC, LCM, LCMP, LWMIN, MI, $ MPC0, MYCOL, MYROW, NI, NPCOL, NPROW, NQ, NQC0 DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDELSET2, PDLARF, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic:ic+m-k+i-ia,jc:jc+n-1) * MI = M - K + I - IA + 1 ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc:jc+n-k+i-ia+1) * NI = N - K + I - IA + 1 END IF * * Apply H(i) or H(i)' * CALL PDELSET2( AII, A, I, JA+NQ-K+I-IA, DESCA, ONE ) CALL PDLARF( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) CALL PDELSET( A, I, JA+NQ-K+I-IA, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMR2 * END scalapack-1.8.0/SRC/pdrscl.f0000640000175000017500000001630610363532303015407 0ustar muammarmuammar SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SA * .. * .. Array Arguments .. INTEGER DESCX( * ) DOUBLE PRECISION SX( * ) * .. * * Purpose * ======= * * PDRSCL multiplies an N-element real distributed vector sub( X ) by * the real scalar 1/a. This is done without overflow or underflow as * long as the final result sub( X )/a does not overflow or underflow. * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * SA (global input) DOUBLE PRECISION * The scalar a which is used to divide each component of * sub( X ). SA must be >= 0, or the subroutine will divide by * zero. * * SX (local input/local output) DOUBLE PRECISION array * containing the local pieces of a distributed matrix of * dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER ICTXT, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PDLABAD, PDSCAL * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) * * Initialize the denominator to SA and the numerator to 1. * CDEN = SA CNUM = ONE * 10 CONTINUE CDEN1 = CDEN*SMLNUM CNUM1 = CNUM / BIGNUM IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN * * Pre-multiply sub( X ) by SMLNUM if CDEN is large compared to * CNUM. * MUL = SMLNUM DONE = .FALSE. CDEN = CDEN1 ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN * * Pre-multiply sub( X ) by BIGNUM if CDEN is small compared to * CNUM. * MUL = BIGNUM DONE = .FALSE. CNUM = CNUM1 ELSE * * Multiply sub( X ) by CNUM / CDEN and return. * MUL = CNUM / CDEN DONE = .TRUE. END IF * * Scale the vector sub( X ) by MUL * CALL PDSCAL( N, MUL, SX, IX, JX, DESCX, INCX ) * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PDRSCL * END scalapack-1.8.0/SRC/pdgbtrs.f0000640000175000017500000011321710363532303015564 0ustar muammarmuammar SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PDGBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PDGBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded distributed * matrix with bandwidth BWL, BWU. * * Routine PDGBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDGBTRF and this is stored in AF. If a linear system * is to be solved using PDGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NRHS*(NB+2*bwl+4*bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * ===================================================================== * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian National University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC, $ FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, J, JA_NEW, L, LBWL, LBWU, LDBB, $ LDW, LLDA, LLDB, LM, LMJ, LN, LPTR, MYCOL, $ MYROW, NB, NEICOL, NP, NPACT, NPCOL, NPROW, $ NPSTR, NP_SAVE, ODD_SIZE, PART_OFFSET, $ RECOVERY_VAL, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WPTR * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DCOPY, $ DESC_CONVERT, DGEMM, DGEMV, DGER, DGERV2D, $ DGESD2D, DGETRS, DLACPY, DLASWP, DSCAL, DSWAP, $ DTRSM, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -3 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( 2*BWL+2*BWU+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * BW = BWU + BWL * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check worksize * WORK_SIZE_MIN = NRHS*( NB+2*BWL+4*BWU ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PDGBTRS: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 16 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 100 END IF * * * * Begin main code * * Move data into workspace - communicate/copy (overlap) * IF( MYCOL.LT.NPCOL-1 ) THEN CALL DGESD2D( ICTXT, BWU, NRHS, B( NB-BWU+1 ), LLDB, 0, $ MYCOL+1 ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN LM = NB - BWU ELSE LM = NB END IF * IF( MYCOL.GT.0 ) THEN WPTR = BWU + 1 ELSE WPTR = 1 END IF * LDW = NB + BWU + 2*BW + BWU * CALL DLACPY( 'G', LM, NRHS, B( 1 ), LLDB, WORK( WPTR ), LDW ) * * Zero out rest of work * DO 20 J = 1, NRHS DO 10 L = WPTR + LM, LDW WORK( ( J-1 )*LDW+L ) = ZERO 10 CONTINUE 20 CONTINUE * IF( MYCOL.GT.0 ) THEN CALL DGERV2D( ICTXT, BWU, NRHS, WORK( 1 ), LDW, 0, MYCOL-1 ) END IF * ******************************************************************** * PHASE 1: Local computation phase -- Solve L*X = B ******************************************************************** * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( MYCOL.NE.0 ) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1 + BWU END IF * IF( MYCOL.NE.NPCOL-1 ) THEN LM = NB - LBWU LN = NB - BW ELSE IF( MYCOL.NE.0 ) THEN LM = ODD_SIZE + BWU LN = MAX( ODD_SIZE-BW, 0 ) ELSE LM = N LN = MAX( N-BW, 0 ) END IF * DO 30 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L.NE.J ) THEN CALL DSWAP( NRHS, WORK( L ), LDW, WORK( J ), LDW ) END IF * LPTR = BW + 1 + ( J-1 )*LLDA + APTR * CALL DGER( LMJ, NRHS, -ONE, A( LPTR ), 1, WORK( J ), LDW, $ WORK( J+1 ), LDW ) * 30 CONTINUE * ******************************************************************** * PHASE 2: Global computation phase -- Solve L*X = B ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU BN = BW ELSE BM = MIN( BW, ODD_SIZE ) + BWU BN = MIN( BW, ODD_SIZE ) END IF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = ( NB+BWU )*BW + 1 LDBB = 2*BW + BWU * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. CALL DGETRS( 'N', N-LN, NRHS, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), WORK( LN+1 ), LDW, INFO ) * END IF * * Loop over levels ... * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) is used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels 40 CONTINUE IF( NPACT.LE.1 ) $ GO TO 50 * * Test if processor is active IF( MOD( MYCOL, NPSTR ).EQ.0 ) THEN * * Send/Receive blocks * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW ELSE BMN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) + $ BWU END IF * CALL DGESD2D( ICTXT, BM, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * IF( NPACT.NE.2 ) THEN * * Receive answers back from partner processor * CALL DGERV2D( ICTXT, BM+BMN-BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * BM = BM + BMN - BW * END IF * END IF * ELSE * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * CALL DLACPY( 'G', BM, NRHS, WORK( LN+1 ), LDW, $ WORK( NB+BWU+BMN+1 ), LDW ) * CALL DGERV2D( ICTXT, BMN, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ NEICOL ) * * and do the permutations and eliminations * IF( NPACT.NE.2 ) THEN * * Solve locally for BW variables * CALL DLASWP( NRHS, WORK( NB+BWU+1 ), LDW, 1, BW, $ IPIV( LN+1 ), 1 ) * CALL DTRSM( 'L', 'L', 'N', 'U', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) * * Use soln just calculated to update RHS * CALL DGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, $ WORK( NB+BWU+1 ), LDW, ONE, $ WORK( NB+BWU+1+BW ), LDW ) * * Give answers back to partner processor * CALL DGESD2D( ICTXT, BM+BMN-BW, NRHS, $ WORK( NB+BWU+1+BW ), LDW, 0, NEICOL ) * ELSE * * Finish up calculations for final level * CALL DLASWP( NRHS, WORK( NB+BWU+1 ), LDW, 1, BM+BMN, $ IPIV( LN+1 ), 1 ) * CALL DTRSM( 'L', 'L', 'N', 'U', BM+BMN, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) END IF * END IF * NPACT = ( NPACT+1 ) / 2 NPSTR = NPSTR*2 GO TO 40 * END IF * 50 CONTINUE * * ************************************** * BACKSOLVE ******************************************************************** * PHASE 2: Global computation phase -- Solve U*Y = X ******************************************************************** * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. * In fact, the backsolve portion was done in the call to * DGETRS in the frontsolve. * END IF * * Compute variable needed to reverse loop structure in * reduced system. * RECOVERY_VAL = NPACT*NPSTR - NPCOL * * Loop over levels * Terminal values of NPACT and NPSTR from frontsolve are used * 60 CONTINUE IF( NPACT.GE.NPCOL ) $ GO TO 80 * NPSTR = NPSTR / 2 * NPACT = NPACT*2 * * Have to adjust npact for non-power-of-2 * NPACT = NPACT - MOD( ( RECOVERY_VAL / NPSTR ), 2 ) * * Find size of submatrix in this proc at this level * IF( MYCOL / NPSTR.LT.NPACT-1 ) THEN BN = BW ELSE BN = MIN( BW, NUMROC( N, NB, NPCOL-1, 0, NPCOL ) ) END IF * * If this processor is even in this level... * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW BNN = BW ELSE BMN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) + BWU BNN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) END IF * IF( NPACT.GT.2 ) THEN * CALL DGESD2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * CALL DGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * ELSE * CALL DGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * END IF * END IF * ELSE * This processor is odd on this level * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * IF( NEICOL.LT.NPCOL-1 ) THEN BNN = BW ELSE BNN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) END IF * IF( NPACT.GT.2 ) THEN * * Move RHS to make room for received solutions * CALL DLACPY( 'G', BW, NRHS, WORK( NB+BWU+1 ), LDW, $ WORK( NB+BWU+BW+1 ), LDW ) * CALL DGERV2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * CALL DGEMM( 'N', 'N', BW, NRHS, BN, -ONE, AF( BBPTR ), LDBB, $ WORK( LN+1 ), LDW, ONE, WORK( NB+BWU+BW+1 ), $ LDW ) * * IF( MYCOL.GT.NPSTR ) THEN * CALL DGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( BBPTR+2*BW*LDBB ), LDBB, WORK( LN+BW+1 ), $ LDW, ONE, WORK( NB+BWU+BW+1 ), LDW ) * END IF * CALL DTRSM( 'L', 'U', 'N', 'N', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+BW+1 ), $ LDW ) * * Send new solution to neighbor * CALL DGESD2D( ICTXT, BW, NRHS, WORK( NB+BWU+BW+1 ), LDW, 0, $ NEICOL ) * * Copy new solution into expected place * CALL DLACPY( 'G', BW, NRHS, WORK( NB+BWU+1+BW ), LDW, $ WORK( LN+BW+1 ), LDW ) * ELSE * * Solve with local diagonal block * CALL DTRSM( 'L', 'U', 'N', 'N', BN+BNN, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) * * Send new solution to neighbor * CALL DGESD2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ NEICOL ) * * Shift solutions into expected positions * CALL DLACPY( 'G', BNN+BN-BW, NRHS, WORK( NB+BWU+1+BW ), LDW, $ WORK( LN+1 ), LDW ) * * IF( ( NB+BWU+1 ).NE.( LN+1+BW ) ) THEN * * Copy one row at a time since spaces may overlap * DO 70 J = 1, BW CALL DCOPY( NRHS, WORK( NB+BWU+J ), LDW, $ WORK( LN+BW+J ), LDW ) 70 CONTINUE * END IF * END IF * END IF * GO TO 60 * 80 CONTINUE * End of loop over levels * ******************************************************************** * PHASE 1: (Almost) Local computation phase -- Solve U*Y = X ******************************************************************** * * Reset BM to value it had before reduced system frontsolve... * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU ELSE BM = MIN( BW, ODD_SIZE ) + BWU END IF * * First metastep is to account for the fillin blocks AF * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, WORK( NB-BW+1 ), LDW, 0, $ MYCOL+1 ) * END IF * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ MYCOL-1 ) * * Modify local right hand sides with received rhs's * CALL DGEMM( 'T', 'N', LM-BM, NRHS, BW, -ONE, AF( 1 ), BW, $ WORK( NB+BWU+1 ), LDW, ONE, WORK( 1 ), LDW ) * END IF * DO 90 J = LN, 1, -1 * LMJ = MIN( BW, ODD_SIZE-1 ) * LPTR = BW - 1 + J*LLDA + APTR * * In the following, the TRANS=T option is used to reverse * the order of multiplication, not as a true transpose * CALL DGEMV( 'T', LMJ, NRHS, -ONE, WORK( J+1 ), LDW, A( LPTR ), $ LLDA-1, ONE, WORK( J ), LDW ) * * Divide by diagonal element * CALL DSCAL( NRHS, ONE / A( LPTR-LLDA+1 ), WORK( J ), LDW ) 90 CONTINUE * * * CALL DLACPY( 'G', ODD_SIZE, NRHS, WORK( 1 ), LDW, B( 1 ), LLDB ) * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 100 CONTINUE * * Restore saved input parameters * NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * RETURN * * End of PDGBTRS * END scalapack-1.8.0/SRC/pzgecon.f0000640000175000017500000003750310363532303015567 0ustar muammarmuammar SUBROUTINE PZGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, INFO, JA, LRWORK, LWORK, N DOUBLE PRECISION ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZGECON estimates the reciprocal of the condition number of a general * distributed complex matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm, using the LU factorization computed by * PZGETRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm * = 'I': Infinity-norm * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, * this array contains the local pieces of the factors L and U * from the factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U; the * unit diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) DOUBLE PRECISION * If NORM = '1' or 'O', the 1-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * If NORM = 'I', the infinity-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*CEIL(NPROW-1,NPCOL),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(NPCOL-1,NPROW)) ). * * LOCr and LOCc values can be computed using the ScaLAPACK * tool function NUMROC; NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 2*LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, ONENRM CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, JX, $ KASE, KASE1, LRWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPMOD, NPROW, NQ, NQMOD DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU COMPLEX*16 WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK1MAT, PB_TOPGET, PB_TOPSET, PXERBLA, $ PZAMAX, PZLATRS, PZLACON, PZDRSCL, $ ZGEBR2D, ZGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, ICHAR, MAX, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600 + CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LRWMIN = 2*NQMOD RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGECON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPNL = 1 IPNU = IPNL + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the norm of inv(A). * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 * 10 CONTINUE CALL PZLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, RWORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), IX, $ JX, DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Lower', 'Conjugate transpose', 'Unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SL, RWORK( IPNL ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Divide X by 1/(SL*SU) if doing so will not cause overflow. * SCALE = SL*SU NORMIN = 'Y' IF( SCALE.NE.ONE ) THEN CALL PZAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL ZGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL ZGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PZDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PZGECON * END scalapack-1.8.0/SRC/pzgebrd.f0000640000175000017500000004025410363532303015554 0ustar muammarmuammar SUBROUTINE PZGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEBRD reduces a complex general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an unitary transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * unitary matrix Q as a product of elementary reflectors, and * the elements above the first superdiagonal, with the array * TAUP, represent the orthogonal matrix P as a product of * elementary reflectors. If M < N, the diagonal and the first * subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the unitary matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX*16 array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX*16 array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0 * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY, $ IW, J, JB, JS, JW, K, L, LWMIN, MN, MP, MYCOL, $ MYROW, NB, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ), $ IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZGEBD2, PZGEMM, PZLABRD * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( MB_ ) IOFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) LWMIN = NB*( MP+NQ+1 ) + NQ * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IOFF.NE.MOD( JA-1, DESCA( NB_ ) ) ) THEN INFO = -5 ELSE IF( NB.NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEBRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * MN = MIN( M, N ) IF( MN.EQ.0 ) $ RETURN * * Initialize parameters. * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPY = MP * NB + 1 IPW = NQ * NB + IPY * CALL DESCSET( DESCWX, M+IOFF, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCWY, NB, N+IOFF, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * MP = NUMROC( M+IA-1, NB, MYROW, DESCA( RSRC_ ), NPROW ) NQ = NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) K = 1 JB = NB - IOFF IW = IOFF + 1 JW = IOFF + 1 * DO 10 L = 1, MN+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce rows and columns i:i+nb-1 to bidiagonal form and return * the matrices X and Y which are needed to update the unreduced * part of the matrix. * CALL PZLABRD( M-K+1, N-K+1, JB, A, I, J, DESCA, D, E, TAUQ, $ TAUP, WORK, IW, JW, DESCWX, WORK( IPY ), IW, $ JW, DESCWY, WORK( IPW ) ) * * Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using * an update of the form A := A - V*Y' - X*U'. * CALL PZGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, A, I+JB, J, DESCA, $ WORK( IPY ), IW, JW+JB, DESCWY, ONE, A, I+JB, $ J+JB, DESCA ) CALL PZGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, WORK, IW+JB, JW, DESCWX, A, I, $ J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * * Copy last off-diagonal elements of B back into sub( A ). * IF( M.GE.N ) THEN JS = MIN( INDXG2L( I+JB-1, NB, 0, DESCA( RSRC_ ), NPROW ), $ MP ) IF( JS.GT.0 ) $ CALL PZELSET( A, I+JB-1, J+JB, DESCA, DCMPLX( E( JS ) ) ) ELSE JS = MIN( INDXG2L( J+JB-1, NB, 0, DESCA( CSRC_ ), NPCOL ), $ NQ ) IF( JS.GT.0 ) $ CALL PZELSET( A, I+JB, J+JB-1, DESCA, DCMPLX( E( JS ) ) ) END IF * K = K + JB JB = NB IW = 1 JW = 1 DESCWX( M_ ) = DESCWX( M_ ) - JB DESCWX( RSRC_ ) = MOD( DESCWX( RSRC_ ) + 1, NPROW ) DESCWX( CSRC_ ) = MOD( DESCWX( CSRC_ ) + 1, NPCOL ) DESCWY( N_ ) = DESCWY( N_ ) - JB DESCWY( RSRC_ ) = MOD( DESCWY( RSRC_ ) + 1, NPROW ) DESCWY( CSRC_ ) = MOD( DESCWY( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the remainder of the matrix. * CALL PZGEBD2( M-K+1, N-K+1, A, IA+K-1, JA+K-1, DESCA, D, E, TAUQ, $ TAUP, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEBRD * END scalapack-1.8.0/SRC/pdggqrf.f0000640000175000017500000003627710363532303015563 0ustar muammarmuammar SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PDGGQRF computes a generalized QR factorization of * an N-by-M matrix sub( A ) = A(IA:IA+N-1,JA:JA+M-1) and * an N-by-P matrix sub( B ) = B(IB:IB+N-1,JB:JB+P-1): * * sub( A ) = Q*R, sub( B ) = Q*T*Z, * * where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal * matrix, and R and T assume one of the forms: * * if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, * ( 0 ) N-M N M-N * M * * where R11 is upper triangular, and * * if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, * P-N N ( T21 ) P * P * * where T12 or T21 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GQR * factorization of sub( A ) and sub( B ) implicitly gives the QR * factorization of inv( sub( B ) )* sub( A ): * * inv( sub( B ) )*sub( A )= Z'*(inv(T)*R) * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( B ). N >= 0. * * M (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( B ). P >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+M-1)). * On entry, the local pieces of the N-by-M distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(N,M) by M * upper trapezoidal matrix R (R is upper triangular if N >= M); * the elements below the diagonal, with the array TAUA, * represent the orthogonal matrix Q as a product of min(N,M) * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(N,M)-1). This array contains the scalar factors * TAUA of the elementary reflectors which represent the * orthogonal matrix Q. TAUA is tied to the distributed matrix * A. (see Further Details). * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+P-1)). * On entry, the local pieces of the N-by-P distributed matrix * sub( B ) which is to be factored. On exit, if N <= P, the * upper triangle of B(IB:IB+N-1,JB+P-N:JB+P-1) contains the * N by N upper triangular matrix T; if N > P, the elements on * and above the (N-P)-th subdiagonal contain the N by P upper * trapezoidal matrix T; the remaining elements, with the array * TAUB, represent the orthogonal matrix Z as a product of * elementary reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) DOUBLE PRECISION array, dimension LOCr(IB+N-1) * This array contains the scalar factors of the elementary * reflectors which represent the orthogonal unitary matrix Z. * TAUB is tied to the distributed matrix B (see Further * Details). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB_A * ( NpA0 + MqA0 + NB_A ), * MAX( (NB_A*(NB_A-1))/2, (PqB0 + NpB0)*NB_A ) + * NB_A * NB_A, * MB_B * ( NpB0 + PqB0 + MB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * PqB0 = NUMROC( P+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(n,m). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a real scalar, and v is a real vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in * A(ia+i:ia+n-1,ja+i-1), and taua in TAUA(ja+i-1). * To form Q explicitly, use ScaLAPACK subroutine PDORGQR. * To use Q to update another matrix, use ScaLAPACK subroutine PDORMQR. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(ib) H(ib+1) . . . H(ib+k-1), where k = min(n,p). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a real scalar, and v is a real vector with * v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in * B(ib+n-k+i-1,jb:jb+p-k+i-2), and taub in TAUB(ib+n-k+i-1). * To form Z explicitly, use ScaLAPACK subroutine PDORGRQ. * To use Z to update another matrix, use ScaLAPACK subroutine PDORMRQ. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( MB_A.EQ.MB_B .AND. IROFFA.EQ.IROFFB .AND. IAROW.EQ.IBROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MQA0, MYCOL, $ MYROW, NPA0, NPB0, NPCOL, NPROW, PQB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDGEQRF, $ PDGERQF, PDORMQR, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( N, 1, M, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 1, P, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) PQB0 = NUMROC( P+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( NB_ ) * ( NPA0 + MQA0 + DESCA( NB_ ) ), $ MAX( MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( PQB0 + NPB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ), $ DESCB( MB_ ) * ( NPB0 + PQB0 + DESCB( MB_ ) ) ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IAROW.NE.IBROW .OR. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -1203 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( N, 1, M, 2, IA, JA, DESCA, 7, N, 1, P, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * QR factorization of N-by-M matrix sub( A ): sub( A ) = Q*R * CALL PDGEQRF( N, M, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := Q'*sub( B ). * CALL PDORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, IA, JA, $ DESCA, TAUA, B, IB, JB, DESCB, WORK, LWORK, INFO ) LWMIN = MIN( LWMIN, INT( WORK( 1 ) ) ) * * RQ factorization of N-by-P matrix sub( B ): sub( B ) = T*Z. * CALL PDGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * RETURN * * End of PDGGQRF * END scalapack-1.8.0/SRC/pcposv.f0000640000175000017500000002432210363532303015427 0ustar muammarmuammar SUBROUTINE PCPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCPOSV computes the solution to a complex system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is an N-by-N * hermitian distributed positive definite matrix and X and sub( B ) * denoting B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS distributed * matrices. * * The Cholesky decomposition is used to factor sub( A ) as * * sub( A ) = U**H * U, if UPLO = 'U', or * * sub( A ) = L * L**H, if UPLO = 'L', * * where U is an upper triangular matrix and L is a lower triangular * matrix. The factored form of sub( A ) is then used to solve the * system of equations. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if INFO = 0, this array contains the * local pieces of the factor U or L from the Cholesky factori- * zation sub( A ) = U**H*U or L*L**H. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_B,LOC(JB+NRHS-1)). * On entry, the local pieces of the right hand sides distribu- * ted matrix sub( B ). On exit, if INFO = 0, sub( B ) is over- * written with the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed, and the * solution has not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCPOTRF, $ PCPOTRS, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1000+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOSV', -INFO ) RETURN END IF * * Compute the Cholesky factorization sub( A ) = U'*U or L*L'. * CALL PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ) overwriting sub( B ) * with X. * CALL PCPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * END IF * RETURN * * End of PCPOSV * END scalapack-1.8.0/SRC/pzgbsv.f0000640000175000017500000004563010363532303015435 0ustar muammarmuammar SUBROUTINE PZGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX*16 A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZGBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination with pivoting * is used to factor a reordering * of the matrix into P L U. * * See PZGBTRF and PZGBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * +max(NRHS*(NB+2*bwl+4*bwu), 1) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PXERBLA, PZGBTRF, PZGBTRS * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PZGBTRF and PZGBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PZGBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Factor the matrix * CALL PZGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PZGBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PZGBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGBSV', -INFO ) RETURN END IF * RETURN * * End of PZGBSV * END scalapack-1.8.0/SRC/pzgelq2.f0000640000175000017500000002510510363532303015501 0ustar muammarmuammar SUBROUTINE PZGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGELQ2 computes a LQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1)' H(ia+k-2)' . . . H(ia)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in * A(ia+i-1,ja+i:ja+n-1), and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ COMPLEX*16 AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLACGV, PZLARF, PZLARFG * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * K = MIN( M, N ) DO 10 I = IA, IA+K-1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i,j+1:ja+n-1) * CALL PZLACGV( N-J+JA, A, I, J, DESCA, DESCA( M_ ) ) CALL PZLARFG( N-J+JA, AII, I, J, A, I, MIN( J+1, JA+N-1 ), $ DESCA, DESCA( M_ ), TAU ) * IF( I.LT.IA+M-1 ) THEN * * Apply H(i) to A(i+1:ia+m-1,j:ja+n-1) from the right * CALL PZELSET( A, I, J, DESCA, ONE ) CALL PZLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PZELSET( A, I, J, DESCA, AII ) CALL PZLACGV( N-J+JA, A, I, J, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGELQ2 * END scalapack-1.8.0/SRC/psrscl.f0000640000175000017500000001625610363532303015432 0ustar muammarmuammar SUBROUTINE PSRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N REAL SA * .. * .. Array Arguments .. INTEGER DESCX( * ) REAL SX( * ) * .. * * Purpose * ======= * * PSRSCL multiplies an N-element real distributed vector sub( X ) by * the real scalar 1/a. This is done without overflow or underflow as * long as the final result sub( X )/a does not overflow or underflow. * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * SA (global input) REAL * The scalar a which is used to divide each component of * sub( X ). SA must be >= 0, or the subroutine will divide by * zero. * * SX (local input/local output) REAL array * containing the local pieces of a distributed matrix of * dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER ICTXT, MYCOL, MYROW, NPCOL, NPROW REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSLABAD, PSSCAL * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) * * Initialize the denominator to SA and the numerator to 1. * CDEN = SA CNUM = ONE * 10 CONTINUE CDEN1 = CDEN*SMLNUM CNUM1 = CNUM / BIGNUM IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN * * Pre-multiply sub( X ) by SMLNUM if CDEN is large compared to * CNUM. * MUL = SMLNUM DONE = .FALSE. CDEN = CDEN1 ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN * * Pre-multiply sub( X ) by BIGNUM if CDEN is small compared to * CNUM. * MUL = BIGNUM DONE = .FALSE. CNUM = CNUM1 ELSE * * Multiply sub( X ) by CNUM / CDEN and return. * MUL = CNUM / CDEN DONE = .TRUE. END IF * * Scale the vector sub( X ) by MUL * CALL PSSCAL( N, MUL, SX, IX, JX, DESCX, INCX ) * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PSRSCL * END scalapack-1.8.0/SRC/pzungl2.f0000640000175000017500000002643210363532303015522 0ustar muammarmuammar SUBROUTINE PZUNGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGL2 generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PZGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PZGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, J, KP, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX*16 TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLACGV, PZLARFC, PZLASET, PZSCAL * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia+k:ia+m-1 to rows of the unit matrix * CALL PZLASET( 'All', M-K, K, ZERO, ZERO, A, IA+K, JA, DESCA ) CALL PZLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, $ DESCA ) * END IF * TAUI = ZERO KP = NUMROC( IA+K-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+K-1, IA, -1 * * Apply H(i)' to A(i:ia+m-1,ja+i-ia:ja+n-1) from the right * J = JA + I - IA II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, KP ) ) IF( J.LT.JA+N-1 ) THEN CALL PZLACGV( N-J+JA-1, A, I, J+1, DESCA, DESCA( M_ ) ) IF( I.LT.IA+M-1 ) THEN CALL PZELSET( A, I, J, DESCA, ONE ) CALL PZLARFC( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PZSCAL( N-J+JA-1, -TAUI, A, I, J+1, DESCA, $ DESCA( M_ ) ) CALL PZLACGV( N-J+JA-1, A, I, J+1, DESCA, DESCA( M_ ) ) END IF CALL PZELSET( A, I, J, DESCA, ONE-DCONJG( TAUI ) ) * * Set A(i,ja:j-1) to zero * CALL PZLASET( 'All', 1, J-JA, ZERO, ZERO, A, I, JA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGL2 * END scalapack-1.8.0/SRC/zpttrsv.f0000640000175000017500000001120710363532303015647 0ustar muammarmuammar SUBROUTINE ZPTTRSV( UPLO, TRANS, N, NRHS, D, E, B, LDB, $ INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * November, 1996. * Modified from ZPTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. DOUBLE PRECISION D( * ) COMPLEX*16 B( LDB, * ), E( * ) * .. * * Purpose * ======= * * ZPTTRSV solves one of the triangular systems * L * X = B, or L**H * X = B, * U * X = B, or U**H * X = B, * where L or U is the Cholesky factor of a Hermitian positive * definite tridiagonal matrix A such that * A = U**H*D*U or A = L*D*L**H (computed by ZPTTRF). * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether the superdiagonal or the subdiagonal * of the tridiagonal matrix A is stored and the form of the * factorization: * = 'U': E is the superdiagonal of U, and A = U'*D*U; * = 'L': E is the subdiagonal of L, and A = L*D*L'. * (The two forms are equivalent if A is real.) * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': L * X = B (No transpose) * = 'N': L * X = B (No transpose) * = 'C': U**H * X = B (Conjugate transpose) * = 'C': L**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the tridiagonal matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * D (input) REAL array, dimension (N) * The n diagonal elements of the diagonal matrix D from the * factorization computed by ZPTTRF. * * E (input) COMPLEX array, dimension (N-1) * The (n-1) off-diagonal elements of the unit bidiagonal * factor U or L from the factorization computed by ZPTTRF * (see UPLO). * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL NOTRAN, UPPER INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX * .. * .. Executable Statements .. * * Test the input arguments. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -8 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( UPPER ) THEN * IF( .NOT.NOTRAN ) THEN * DO 30 J = 1, NRHS * * Solve U**T (or H) * x = b. * DO 10 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) ) 10 CONTINUE 30 CONTINUE * ELSE * DO 35 J = 1, NRHS * * Solve U * x = b. * DO 20 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) 20 CONTINUE 35 CONTINUE ENDIF * ELSE * IF( NOTRAN ) THEN * DO 60 J = 1, NRHS * * Solve L * x = b. * DO 40 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) 40 CONTINUE 60 CONTINUE * ELSE * DO 65 J = 1, NRHS * * Solve L**H * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - $ B( I+1, J )*DCONJG( E( I ) ) 50 CONTINUE 65 CONTINUE ENDIF * END IF * RETURN * * End of ZPTTRS * END scalapack-1.8.0/SRC/pslaedz.f0000640000175000017500000001225210363532303015556 0ustar muammarmuammar SUBROUTINE PSLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER ID, IQ, JQ, LDQ, N, N1 * .. * .. Array Arguments .. INTEGER DESCQ( * ) REAL Q( LDQ, * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSLAEDZ Form the z-vector which consists of the last row of Q_1 * and the first row of Q_2. * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. * INTEGER COL, I, IBUF, ICTXT, IIQ, IIZ1, IIZ2, IQCOL, $ IQROW, IZ, IZ1, IZ1COL, IZ1ROW, IZ2, IZ2COL, $ IZ2ROW, J, JJQ, JJZ1, JJZ2, MYCOL, MYROW, N2, $ NB, NBLOC, NPCOL, NPROW, NQ1, NQ2, ZSIZ * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SCOPY, SGEBR2D, $ SGEBS2D, SGERV2D, SGESD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( ID, ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) N2 = N - N1 * * Form z1 which consist of the last row of Q1 * CALL INFOG2L( IQ-1+( ID+N1-1 ), JQ-1+ID, DESCQ, NPROW, NPCOL, $ MYROW, MYCOL, IIZ1, JJZ1, IZ1ROW, IZ1COL ) NQ1 = NUMROC( N1, NB, MYCOL, IZ1COL, NPCOL ) IF( ( MYROW.EQ.IZ1ROW ) .AND. ( NQ1.NE.0 ) ) THEN CALL SCOPY( NQ1, Q( IIZ1, JJZ1 ), LDQ, WORK, 1 ) IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) $ CALL SGESD2D( ICTXT, NQ1, 1, WORK, NQ1, IQROW, IQCOL ) END IF * * Proc (IQROW, IQCOL) receive the parts of z1 * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN COL = IZ1COL DO 20 I = 0, NPCOL - 1 NQ1 = NUMROC( N1, NB, COL, IZ1COL, NPCOL ) IF( NQ1.GT.0 ) THEN IF( IZ1ROW.NE.IQROW .OR. COL.NE.IQCOL ) THEN IBUF = N1 + 1 CALL SGERV2D( ICTXT, NQ1, 1, WORK( IBUF ), NQ1, $ IZ1ROW, COL ) ELSE IBUF = 1 END IF IZ1 = 0 IZ = I*NB + 1 NBLOC = ( NQ1-1 ) / NB + 1 DO 10 J = 1, NBLOC ZSIZ = MIN( NB, NQ1-IZ1 ) CALL SCOPY( ZSIZ, WORK( IBUF+IZ1 ), 1, Z( IZ ), 1 ) IZ1 = IZ1 + NB IZ = IZ + NB*NPCOL 10 CONTINUE END IF COL = MOD( COL+1, NPCOL ) 20 CONTINUE END IF * * Form z2 which consist of the first row of Q2 * CALL INFOG2L( IQ-1+( ID+N1 ), JQ-1+( ID+N1 ), DESCQ, NPROW, NPCOL, $ MYROW, MYCOL, IIZ2, JJZ2, IZ2ROW, IZ2COL ) NQ2 = NUMROC( N2, NB, MYCOL, IZ2COL, NPCOL ) IF( ( MYROW.EQ.IZ2ROW ) .AND. ( NQ2.NE.0 ) ) THEN CALL SCOPY( NQ2, Q( IIZ2, JJZ2 ), LDQ, WORK, 1 ) IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) $ CALL SGESD2D( ICTXT, NQ2, 1, WORK, NQ2, IQROW, IQCOL ) END IF * * Proc (IQROW, IQCOL) receive the parts of z2 * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN COL = IZ2COL DO 40 I = 0, NPCOL - 1 NQ2 = NUMROC( N2, NB, COL, IZ2COL, NPCOL ) IF( NQ2.GT.0 ) THEN IF( IQROW.NE.IZ2ROW .OR. IQCOL.NE.COL ) THEN IBUF = 1 + N2 CALL SGERV2D( ICTXT, NQ2, 1, WORK( IBUF ), NQ2, $ IZ2ROW, COL ) ELSE IBUF = 1 END IF IZ2 = 0 IZ = NB*I + N1 + 1 NBLOC = ( NQ2-1 ) / NB + 1 DO 30 J = 1, NBLOC ZSIZ = MIN( NB, NQ2-IZ2 ) CALL SCOPY( ZSIZ, WORK( IBUF+IZ2 ), 1, Z( IZ ), 1 ) IZ2 = IZ2 + NB IZ = IZ + NB*NPCOL 30 CONTINUE END IF COL = MOD( COL+1, NPCOL ) 40 CONTINUE END IF * * proc(IQROW,IQCOL) broadcast Z=(Z1,Z2) * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', N, 1, Z, N ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', N, 1, Z, N, IQROW, IQCOL ) END IF * RETURN * * End of PSLAEDZ * * END scalapack-1.8.0/SRC/pdgebrd.f0000640000175000017500000004020410363532303015521 0ustar muammarmuammar SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PDGEBRD reduces a real general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an orthogonal transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * orthogonal matrix Q as a product of elementary reflectors, * and the elements above the first superdiagonal, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. If M < N, the diagonal and the * first subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the orthogonal matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) DOUBLE PRECISION array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0 * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY, $ IW, J, JB, JS, JW, K, L, LWMIN, MN, MP, MYCOL, $ MYROW, NB, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ), $ IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PDELSET, PDGEBD2, PDGEMM, PDLABRD, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( MB_ ) IOFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) LWMIN = NB*( MP+NQ+1 ) + NQ * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IOFF.NE.MOD( JA-1, DESCA( NB_ ) ) ) THEN INFO = -5 ELSE IF( NB.NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * MN = MIN( M, N ) IF( MN.EQ.0 ) $ RETURN * * Initialize parameters. * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPY = MP * NB + 1 IPW = NQ * NB + IPY * CALL DESCSET( DESCWX, M+IOFF, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCWY, NB, N+IOFF, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * MP = NUMROC( M+IA-1, NB, MYROW, DESCA( RSRC_ ), NPROW ) NQ = NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) K = 1 JB = NB - IOFF IW = IOFF + 1 JW = IOFF + 1 * DO 10 L = 1, MN+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce rows and columns i:i+nb-1 to bidiagonal form and return * the matrices X and Y which are needed to update the unreduced * part of the matrix. * CALL PDLABRD( M-K+1, N-K+1, JB, A, I, J, DESCA, D, E, TAUQ, $ TAUP, WORK, IW, JW, DESCWX, WORK( IPY ), IW, $ JW, DESCWY, WORK( IPW ) ) * * Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using * an update of the form A := A - V*Y' - X*U'. * CALL PDGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, A, I+JB, J, DESCA, $ WORK( IPY ), IW, JW+JB, DESCWY, ONE, A, I+JB, $ J+JB, DESCA ) CALL PDGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, WORK, IW+JB, JW, DESCWX, A, I, $ J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * * Copy last off-diagonal elements of B back into sub( A ). * IF( M.GE.N ) THEN JS = MIN( INDXG2L( I+JB-1, NB, 0, DESCA( RSRC_ ), NPROW ), $ MP ) IF( JS.GT.0 ) $ CALL PDELSET( A, I+JB-1, J+JB, DESCA, E( JS ) ) ELSE JS = MIN( INDXG2L( J+JB-1, NB, 0, DESCA( CSRC_ ), NPCOL ), $ NQ ) IF( JS.GT.0 ) $ CALL PDELSET( A, I+JB, J+JB-1, DESCA, E( JS ) ) END IF * K = K + JB JB = NB IW = 1 JW = 1 DESCWX( M_ ) = DESCWX( M_ ) - JB DESCWX( RSRC_ ) = MOD( DESCWX( RSRC_ ) + 1, NPROW ) DESCWX( CSRC_ ) = MOD( DESCWX( CSRC_ ) + 1, NPCOL ) DESCWY( N_ ) = DESCWY( N_ ) - JB DESCWY( RSRC_ ) = MOD( DESCWY( RSRC_ ) + 1, NPROW ) DESCWY( CSRC_ ) = MOD( DESCWY( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the remainder of the matrix. * CALL PDGEBD2( M-K+1, N-K+1, A, IA+K-1, JA+K-1, DESCA, D, E, TAUQ, $ TAUP, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEBRD * END scalapack-1.8.0/SRC/pcggqrf.f0000640000175000017500000003630610363532303015553 0ustar muammarmuammar SUBROUTINE PCGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PCGGQRF computes a generalized QR factorization of * an N-by-M matrix sub( A ) = A(IA:IA+N-1,JA:JA+M-1) and * an N-by-P matrix sub( B ) = B(IB:IB+N-1,JB:JB+P-1): * * sub( A ) = Q*R, sub( B ) = Q*T*Z, * * where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, * and R and T assume one of the forms: * * if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, * ( 0 ) N-M N M-N * M * * where R11 is upper triangular, and * * if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, * P-N N ( T21 ) P * P * * where T12 or T21 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GQR * factorization of sub( A ) and sub( B ) implicitly gives the QR * factorization of inv( sub( B ) )* sub( A ): * * inv( sub( B ) )*sub( A )= Z'*(inv(T)*R) * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the conjugate transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrices sub( A ) and sub( B ). N >= 0. * * M (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( B ). P >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+M-1)). * On entry, the local pieces of the N-by-M distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(N,M) by M * upper trapezoidal matrix R (R is upper triangular if N >= M); * the elements below the diagonal, with the array TAUA, * represent the unitary matrix Q as a product of min(N,M) * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) COMPLEX, array, dimension * LOCc(JA+MIN(N,M)-1). This array contains the scalar factors * TAUA of the elementary reflectors which represent the unitary * matrix Q. TAUA is tied to the distributed matrix A. (see * Further Details). * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+P-1)). * On entry, the local pieces of the N-by-P distributed matrix * sub( B ) which is to be factored. On exit, if N <= P, the * upper triangle of B(IB:IB+N-1,JB+P-N:JB+P-1) contains the * N by N upper triangular matrix T; if N > P, the elements on * and above the (N-P)-th subdiagonal contain the N by P upper * trapezoidal matrix T; the remaining elements, with the array * TAUB, represent the unitary matrix Z as a product of * elementary reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) COMPLEX, array, dimension LOCr(IB+N-1) * This array contains the scalar factors of the elementary * reflectors which represent the unitary matrix Z. TAUB is * tied to the distributed matrix B (see Further Details). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB_A * ( NpA0 + MqA0 + NB_A ), * MAX( (NB_A*(NB_A-1))/2, (PqB0 + NpB0)*NB_A ) + * NB_A * NB_A, * MB_B * ( NpB0 + PqB0 + MB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * PqB0 = NUMROC( P+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(n,m). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in * A(ia+i:ia+n-1,ja+i-1), and taua in TAUA(ja+i-1). * To form Q explicitly, use ScaLAPACK subroutine PCUNGQR. * To use Q to update another matrix, use ScaLAPACK subroutine PCUNMQR. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(ib)' H(ib+1)' . . . H(ib+k-1)', where k = min(n,p). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a complex scalar, and v is a complex vector with * v(p-k+i+1:p) = 0 and v(p-k+i) = 1; conjg(v(1:p-k+i-1)) is stored on * exit in B(ib+n-k+i-1,jb:jb+p-k+i-2), and taub in TAUB(ib+n-k+i-1). * To form Z explicitly, use ScaLAPACK subroutine PCUNGRQ. * To use Z to update another matrix, use ScaLAPACK subroutine PCUNMRQ. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( MB_A.EQ.MB_B .AND. IROFFA.EQ.IROFFB .AND. IAROW.EQ.IBROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MQA0, MYCOL, $ MYROW, NPA0, NPB0, NPCOL, NPROW, PQB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCGEQRF, PCGERQF, $ PCHK2MAT, PCUNMQR, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, INT, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( N, 1, M, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 1, P, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) PQB0 = NUMROC( P+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( NB_ ) * ( NPA0 + MQA0 + DESCA( NB_ ) ), $ MAX( MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( PQB0 + NPB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ), $ DESCB( MB_ ) * ( NPB0 + PQB0 + DESCB( MB_ ) ) ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IAROW.NE.IBROW .OR. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -1203 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( N, 1, M, 2, IA, JA, DESCA, 7, N, 1, P, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGGQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * QR factorization of N-by-M matrix sub( A ): sub( A ) = Q*R * CALL PCGEQRF( N, M, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := Q'*sub( B ). * CALL PCUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, $ IA, JA, DESCA, TAUA, B, IB, JB, DESCB, WORK, LWORK, $ INFO ) LWMIN = MIN( LWMIN, INT( WORK( 1 ) ) ) * * RQ factorization of N-by-P matrix sub( B ): sub( B ) = T*Z. * CALL PCGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = CMPLX( REAL( MAX( LWMIN, INT( WORK( 1 ) ) ) ) ) * RETURN * * End of PCGGQRF * END scalapack-1.8.0/SRC/pdgesvd.f0000640000175000017500000005530010377355407015566 0ustar muammarmuammar SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, + VT,IVT,JVT,DESCVT,WORK,LWORK,INFO) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Oak Ridge National Laboratory * and Univ. of California Berkeley. * Jan 2006 * * .. Scalar Arguments .. CHARACTER JOBU,JOBVT INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N * .. * .. Array Arguments .. INTEGER DESCA(*),DESCU(*),DESCVT(*) DOUBLE PRECISION A(*),U(*),VT(*),WORK(*) DOUBLE PRECISION S(*) * .. * * Purpose * ======= * * PDGESVD computes the singular value decomposition (SVD) of an * M-by-N matrix A, optionally computing the left and/or right * singular vectors. The SVD is written as * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A and the columns of U and V are the * corresponding right and left singular vectors, respectively. The * singular values are returned in array S in decreasing order and * only the first min(M,N) columns of U and rows of VT = V**T are * computed. * * Notes * ===== * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, and * assume that its process grid has dimension r x c. LOCr( K ) denotes * the number of elements of K that a process would receive if K were * distributed over the r processes of its process column. Similarly, * LOCc( K ) denotes the number of elements of K that a process would * receive if K were distributed over the c processes of its process * row. The values of LOCr() and LOCc() may be determined via a call * to the ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZE = min( M, N ) * SIZEQ = number of local columns in U * SIZEP = number of local rows in VT * * JOBU (global input) CHARACTER*1 * Specifies options for computing U: * = 'V': the first SIZE columns of U (the left singular * vectors) are returned in the array U; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (global input) CHARACTER*1 * Specifies options for computing V**T: * = 'V': the first SIZE rows of V**T (the right singular * vectors) are returned in the array VT; * = 'N': no rows of V**T (no right singular vectors) are * computed. * * M (global input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (global input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (local input/workspace) block cyclic DOUBLE PRECISION * array, * global dimension (M, N), local dimension (MP, NQ) * On exit, the contents of A are destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * S (global output) DOUBLE PRECISION array, dimension SIZE * The singular values of A, sorted so that S(i) >= S(i+1). * * U (local output) DOUBLE PRECISION array, local dimension * (MP, SIZEQ), global dimension (M, SIZE) * if JOBU = 'V', U contains the first min(m,n) columns of U * if JOBU = 'N', U is not referenced. * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local output) DOUBLE PRECISION array, local dimension * (SIZEP, NQ), global dimension (SIZE, N). * If JOBVT = 'V', VT contains the first SIZE rows of * V**T. If JOBVT = 'N', VT is not referenced. * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * WORK (local workspace/output) DOUBLE PRECISION array, dimension * (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * LWORK >= 1 + 6*SIZEB + MAX(WATOBD, WBDTOSVD), * * where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer, * respectively, to the workspace required to bidiagonalize * the matrix A and to go from the bidiagonal matrix to the * singular value decomposition U*S*VT. * * For WATOBD, the following holds: * * WATOBD = MAX(MAX(WPDLANGE,WPDGEBRD), * MAX(WPDLARED2D,WP(pre)LARED1D)), * * where WPDLANGE, WPDLARED1D, WPDLARED2D, WPDGEBRD are the * workspaces required respectively for the subprograms * PDLANGE, PDLARED1D, PDLARED2D, PDGEBRD. Using the * standard notation * * MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW), * NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL), * * the workspaces required for the above subprograms are * * WPDLANGE = MP, * WPDLARED1D = NQ0, * WPDLARED2D = MP0, * WPDGEBRD = NB*(MP + NQ + 1) + NQ, * * where NQ0 and MP0 refer, respectively, to the values obtained * at MYCOL = 0 and MYROW = 0. In general, the upper limit for * the workspace is given by a workspace required on * processor (0,0): * * WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0. * * In case of a homogeneous process grid this upper limit can * be used as an estimate of the minimum workspace for every * processor. * * For WBDTOSVD, the following holds: * * WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) + * MAX(WDBDSQR, * MAX(WANTU*WPDORMBRQLN, WANTVT*WPDORMBRPRT)), * * where * * 1, if left(right) singular vectors are wanted * WANTU(WANTVT) = * 0, otherwise * * and WDBDSQR, WPDORMBRQLN and WPDORMBRPRT refer respectively * to the workspace required for the subprograms DBDSQR, * PDORMBR(QLN), and PDORMBR(PRT), where QLN and PRT are the * values of the arguments VECT, SIDE, and TRANS in the call * to PDORMBR. NRU is equal to the local number of rows of * the matrix U when distributed 1-dimensional "column" of * processes. Analogously, NCVT is equal to the local number * of columns of the matrix VT when distributed across * 1-dimensional "row" of processes. Calling the LAPACK * procedure DBDSQR requires * * WDBDSQR = MAX(1, 4*SIZE ) * * on every processor. Finally, * * WPDORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB, * WPDORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB, * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the work array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if DBDSQR did not converge * If INFO = MIN(M,N) + 1, then PDGESVD has detected * heterogeneity by finding that eigenvalues were not * identical across the process grid. In this case, the * accuracy of the results from PDGESVD cannot be * guaranteed. * * ===================================================================== * * The results of PDGEBRD, and therefore PDGESVD, may vary slightly * from run to run with the same input data. If repeatability is an * issue, call BLACS_SET with the appropriate option after defining * the process grid. * * Alignment requirements * ====================== * * The routine PDGESVD inherits the same alignement requirement as * the routine PDGEBRD, namely: * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * * ===================================================================== * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_, + CSRC_,LLD_,ITHVAL PARAMETER (BLOCK_CYCLIC_2D=1,DLEN_=9,DTYPE_=1,CTXT_=2,M_=3,N_=4, + MB_=5,NB_=6,RSRC_=7,CSRC_=8,LLD_=9,ITHVAL=10) DOUBLE PRECISION ZERO,ONE PARAMETER (ZERO= (0.0D+0),ONE= (1.0D+0)) * .. * .. Local Scalars .. CHARACTER UPLO INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ, + INDU,INDV,INDWORK,IOFFD,IOFFE,ISCALE,J,K,LDU,LDVT,LLWORK, + LWMIN,MAXIM,MB,MP,MYPCOL,MYPCOLC,MYPCOLR,MYPROW,MYPROWC, + MYPROWR,NB,NCVT,NPCOL,NPCOLC,NPCOLR,NPROCS,NPROW,NPROWC, + NPROWR,NQ,NRU,SIZE,SIZEB,SIZEP,SIZEPOS,SIZEQ,WANTU,WANTVT, + WATOBD,WBDTOSVD,WDBDSQR,WPDGEBRD,WPDLANGE,WPDORMBRPRT, + WPDORMBRQLN DOUBLE PRECISION ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM * .. * .. Local Arrays .. INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3) DOUBLE PRECISION C(1,1) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH,PDLANGE EXTERNAL LSAME,NUMROC,PDLAMCH,PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GET,BLACS_GRIDEXIT,BLACS_GRIDINFO,BLACS_GRIDINIT, + CHK1MAT,DBDSQR,DESCINIT,DGAMN2D,DGAMX2D,DSCAL,IGAMX2D, + IGEBR2D,IGEBS2D,PCHK1MAT,PDGEBRD,PDGEMR2D,PDLARED1D, + PDLARED2D,PDLASCL,PDLASET,PDORMBR,PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,DBLE * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) ISCALE = 0 INFO = 0 * IF (NPROW.EQ.-1) THEN INFO = - (800+CTXT_) ELSE * SIZE = MIN(M,N) SIZEB = MAX(M,N) NPROCS = NPROW*NPCOL IF (M.GE.N) THEN IOFFD = JA - 1 IOFFE = IA - 1 SIZEPOS = 1 ELSE IOFFD = IA - 1 IOFFE = JA - 1 SIZEPOS = 3 END IF * IF (LSAME(JOBU,'V')) THEN WANTU = 1 ELSE WANTU = 0 END IF IF (LSAME(JOBVT,'V')) THEN WANTVT = 1 ELSE WANTVT = 0 END IF * CALL CHK1MAT(M,3,N,4,IA,JA,DESCA,8,INFO) IF (WANTU.EQ.1) THEN CALL CHK1MAT(M,3,SIZE,SIZEPOS,IU,JU,DESCU,13,INFO) END IF IF (WANTVT.EQ.1) THEN CALL CHK1MAT(SIZE,SIZEPOS,N,4,IVT,JVT,DESCVT,17,INFO) END IF CALL IGAMX2D(DESCA(CTXT_),'A',' ',1,1,INFO,1,1,1,-1,-1,0) * IF (INFO.EQ.0) THEN * * Set up pointers into the WORK array. * INDD = 2 INDE = INDD + SIZEB + IOFFD INDD2 = INDE + SIZEB + IOFFE INDE2 = INDD2 + SIZEB + IOFFD * INDTAUQ = INDE2 + SIZEB + IOFFE INDTAUP = INDTAUQ + SIZEB + JA - 1 INDWORK = INDTAUP + SIZEB + IA - 1 LLWORK = LWORK - INDWORK + 1 * * Initialize contexts for "column" and "row" process matrices. * CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTC) CALL BLACS_GRIDINIT(CONTEXTC,'R',NPROCS,1) CALL BLACS_GRIDINFO(CONTEXTC,NPROWC,NPCOLC,MYPROWC, + MYPCOLC) CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTR) CALL BLACS_GRIDINIT(CONTEXTR,'R',1,NPROCS) CALL BLACS_GRIDINFO(CONTEXTR,NPROWR,NPCOLR,MYPROWR, + MYPCOLR) * * Set local dimensions of matrices (this is for MB=NB=1). * NRU = NUMROC(M,1,MYPROWC,0,NPROCS) NCVT = NUMROC(N,1,MYPCOLR,0,NPROCS) NB = DESCA(NB_) MB = DESCA(MB_) MP = NUMROC(M,MB,MYPROW,DESCA(RSRC_),NPROW) NQ = NUMROC(N,NB,MYPCOL,DESCA(CSRC_),NPCOL) IF (WANTVT.EQ.1) THEN SIZEP = NUMROC(SIZE,DESCVT(MB_),MYPROW,DESCVT(RSRC_), + NPROW) ELSE SIZEP = 0 END IF IF (WANTU.EQ.1) THEN SIZEQ = NUMROC(SIZE,DESCU(NB_),MYPCOL,DESCU(CSRC_), + NPCOL) ELSE SIZEQ = 0 END IF * * Transmit MAX(NQ0, MP0). * IF (MYPROW.EQ.0 .AND. MYPCOL.EQ.0) THEN MAXIM = MAX(NQ,MP) CALL IGEBS2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1) ELSE CALL IGEBR2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1,0,0) END IF * WPDLANGE = MP WPDGEBRD = NB* (MP+NQ+1) + NQ WATOBD = MAX(MAX(WPDLANGE,WPDGEBRD),MAXIM) * WDBDSQR = MAX(1,4*SIZE) WPDORMBRQLN = MAX((NB* (NB-1))/2, (SIZEQ+MP)*NB) + NB*NB WPDORMBRPRT = MAX((MB* (MB-1))/2, (SIZEP+NQ)*MB) + MB*MB WBDTOSVD = SIZE* (WANTU*NRU+WANTVT*NCVT) + + MAX(WDBDSQR,MAX(WANTU*WPDORMBRQLN, + WANTVT*WPDORMBRPRT)) * * Finally, calculate required workspace. * LWMIN = 1 + 6*SIZEB + MAX(WATOBD,WBDTOSVD) WORK(1) = DBLE(LWMIN) * IF (WANTU.NE.1 .AND. .NOT. (LSAME(JOBU,'N'))) THEN INFO = -1 ELSE IF (WANTVT.NE.1 .AND. .NOT. (LSAME(JOBVT,'N'))) THEN INFO = -2 ELSE IF (LWORK.LT.LWMIN .AND. LWORK.NE.-1) THEN INFO = -19 END IF * END IF * IDUM1(1) = WANTU IDUM1(2) = WANTVT IF (LWORK.EQ.-1) THEN IDUM1(3) = -1 ELSE IDUM1(3) = 1 END IF IDUM2(1) = 1 IDUM2(2) = 2 IDUM2(3) = 19 CALL PCHK1MAT(M,3,N,4,IA,JA,DESCA,8,3,IDUM1,IDUM2,INFO) IF (INFO.EQ.0) THEN IF (WANTU.EQ.1) THEN CALL PCHK1MAT(M,3,SIZE,4,IU,JU,DESCU,13,0,IDUM1,IDUM2, + INFO) END IF IF (WANTVT.EQ.1) THEN CALL PCHK1MAT(SIZE,3,N,4,IVT,JVT,DESCVT,17,0,IDUM1, + IDUM2,INFO) END IF END IF * END IF * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PDGESVD',-INFO) RETURN ELSE IF (LWORK.EQ.-1) THEN GO TO 40 END IF * * Quick return if possible. * IF (M.LE.0 .OR. N.LE.0) GO TO 40 * * Get machine constants. * SAFMIN = PDLAMCH(DESCA(CTXT_),'Safe minimum') EPS = PDLAMCH(DESCA(CTXT_),'Precision') SMLNUM = SAFMIN/EPS BIGNUM = ONE/SMLNUM RMIN = SQRT(SMLNUM) RMAX = MIN(SQRT(BIGNUM),ONE/SQRT(SQRT(SAFMIN))) * * Scale matrix to allowable range, if necessary. * ANRM = PDLANGE('1',M,N,A,IA,JA,DESCA,WORK(INDWORK)) IF (ANRM.GT.ZERO .AND. ANRM.LT.RMIN) THEN ISCALE = 1 SIGMA = RMIN/ANRM ELSE IF (ANRM.GT.RMAX) THEN ISCALE = 1 SIGMA = RMAX/ANRM END IF * IF (ISCALE.EQ.1) THEN CALL PDLASCL('G',ONE,SIGMA,M,N,A,IA,JA,DESCA,INFO) END IF * CALL PDGEBRD(M,N,A,IA,JA,DESCA,WORK(INDD),WORK(INDE), + WORK(INDTAUQ),WORK(INDTAUP),WORK(INDWORK),LLWORK, + INFO) * * Copy D and E to all processes. * Array D is in local array of dimension: * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * Array E is in local array of dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * IF (M.GE.N) THEN * Distribute D CALL PDLARED1D(N+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PDLARED2D(M+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2), + WORK(INDWORK),LLWORK) ELSE * Distribute D CALL PDLARED2D(M+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PDLARED1D(N+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2), + WORK(INDWORK),LLWORK) END IF * * Prepare for calling PDBDSQR. * IF (M.GE.N) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * INDU = INDWORK INDV = INDU + SIZE*NRU*WANTU INDWORK = INDV + SIZE*NCVT*WANTVT * LDU = MAX(1,NRU) LDVT = MAX(1,SIZE) * CALL DESCINIT(DESCTU,M,SIZE,1,1,0,0,CONTEXTC,LDU,INFO) CALL DESCINIT(DESCTVT,SIZE,N,1,1,0,0,CONTEXTR,LDVT,INFO) * IF (WANTU.EQ.1) THEN CALL PDLASET('Full',M,SIZE,ZERO,ONE,WORK(INDU),1,1,DESCTU) ELSE NRU = 0 END IF * IF (WANTVT.EQ.1) THEN CALL PDLASET('Full',SIZE,N,ZERO,ONE,WORK(INDV),1,1,DESCTVT) ELSE NCVT = 0 END IF * CALL DBDSQR(UPLO,SIZE,NCVT,NRU,0,WORK(INDD2+IOFFD), + WORK(INDE2+IOFFE),WORK(INDV),SIZE,WORK(INDU),LDU,C,1, + WORK(INDWORK),INFO) * * Redistribute elements of U and VT in the block-cyclic fashion. * IF (WANTU.EQ.1) CALL PDGEMR2D(M,SIZE,WORK(INDU),1,1,DESCTU,U,IU, + JU,DESCU,DESCU(CTXT_)) * IF (WANTVT.EQ.1) CALL PDGEMR2D(SIZE,N,WORK(INDV),1,1,DESCTVT,VT, + IVT,JVT,DESCVT,DESCVT(CTXT_)) * * Set to ZERO "non-square" elements of the larger matrices U, VT. * IF (M.GT.N .AND. WANTU.EQ.1) THEN CALL PDLASET('Full',M-SIZE,SIZE,ZERO,ZERO,U,IA+SIZE,JU,DESCU) ELSE IF (N.GT.M .AND. WANTVT.EQ.1) THEN CALL PDLASET('Full',SIZE,N-SIZE,ZERO,ZERO,VT,IVT,JVT+SIZE, + DESCVT) END IF * * Multiply Householder rotations from bidiagonalized matrix. * IF (WANTU.EQ.1) CALL PDORMBR('Q','L','N',M,SIZE,N,A,IA,JA,DESCA, + WORK(INDTAUQ),U,IU,JU,DESCU, + WORK(INDWORK),LLWORK,INFO) * IF (WANTVT.EQ.1) CALL PDORMBR('P','R','T',SIZE,N,M,A,IA,JA,DESCA, + WORK(INDTAUP),VT,IVT,JVT,DESCVT, + WORK(INDWORK),LLWORK,INFO) * * Copy singular values into output array S. * DO 10 I = 1,SIZE S(I) = WORK(INDD2+IOFFD+I-1) 10 CONTINUE * * If matrix was scaled, then rescale singular values appropriately. * IF (ISCALE.EQ.1) THEN CALL DSCAL(SIZE,ONE/SIGMA,S,1) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF (SIZE.LE.ITHVAL) THEN J = SIZE K = 1 ELSE J = SIZE/ITHVAL K = ITHVAL END IF * DO 20 I = 1,J WORK(I+INDE) = S((I-1)*K+1) WORK(I+INDD2) = S((I-1)*K+1) 20 CONTINUE * CALL DGAMN2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDE),J,1,1,-1,-1,0) CALL DGAMX2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDD2),J,1,1,-1,-1,0) * DO 30 I = 1,J IF ((WORK(I+INDE)-WORK(I+INDD2)).NE.ZERO) THEN INFO = SIZE + 1 END IF 30 CONTINUE * 40 CONTINUE * CALL BLACS_GRIDEXIT(CONTEXTC) CALL BLACS_GRIDEXIT(CONTEXTR) * * End of PDGESVD * RETURN END scalapack-1.8.0/SRC/pdstebz.f0000640000175000017500000014600210363532303015570 0ustar muammarmuammar SUBROUTINE PDSTEBZ( ICTXT, RANGE, ORDER, N, VL, VU, IL, IU, $ ABSTOL, D, E, M, NSPLIT, W, IBLOCK, ISPLIT, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER ORDER, RANGE INTEGER ICTXT, IL, INFO, IU, LIWORK, LWORK, M, N, $ NSPLIT DOUBLE PRECISION ABSTOL, VL, VU * .. * .. Array Arguments .. INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PDSTEBZ computes the eigenvalues of a symmetric tridiagonal matrix in * parallel. The user may ask for all eigenvalues, all eigenvalues in * the interval [VL, VU], or the eigenvalues indexed IL through IU. A * static partitioning of work is done at the beginning of PDSTEBZ which * results in all processes finding an (almost) equal number of * eigenvalues. * * NOTE : It is assumed that the user is on an IEEE machine. If the user * is not on an IEEE mchine, set the compile time flag NO_IEEE * to 1 (in SLmake.inc). The features of IEEE arithmetic that * are needed for the "fast" Sturm Count are : (a) infinity * arithmetic (b) the sign bit of a single precision floating * point number is assumed be in the 32nd bit position * (c) the sign of negative zero. * * See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal * Matrix", Report CS41, Computer Science Dept., Stanford * University, July 21, 1966. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle. * * RANGE (global input) CHARACTER * Specifies which eigenvalues are to be found. * = 'A': ("All") all eigenvalues will be found. * = 'V': ("Value") all eigenvalues in the interval * [VL, VU] will be found. * = 'I': ("Index") the IL-th through IU-th eigenvalues (of the * entire matrix) will be found. * * ORDER (global input) CHARACTER * Specifies the order in which the eigenvalues and their block * numbers are stored in W and IBLOCK. * = 'B': ("By Block") the eigenvalues will be grouped by * split-off block (see IBLOCK, ISPLIT) and * ordered from smallest to largest within * the block. * = 'E': ("Entire matrix") * the eigenvalues for the entire matrix * will be ordered from smallest to largest. * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Eigenvalues less than VL will not be * returned. Not referenced if RANGE='A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Eigenvalues greater than VU will not be * returned. VU must be greater than VL. Not referenced if * RANGE='A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL must be at least 1. * Not referenced if RANGE='A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. IU must be at least IL * and no greater than N. Not referenced if RANGE='A' or 'V'. * * ABSTOL (global input) DOUBLE PRECISION * The absolute tolerance for the eigenvalues. An eigenvalue * (or cluster) is considered to be located if it has been * determined to lie in an interval whose width is ABSTOL or * less. If ABSTOL is less than or equal to zero, then ULP*|T| * will be used, where |T| means the 1-norm of T. * Eigenvalues will be computed most accurately when ABSTOL is * set to the underflow threshold DLAMCH('U'), not zero. * Note : If eigenvectors are desired later by inverse iteration * ( PDSTEIN ), ABSTOL should be set to 2*PDLAMCH('S'). * * D (global input) DOUBLE PRECISION array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. To * avoid overflow, the matrix must be scaled so that its largest * entry is no greater than overflow**(1/2) * underflow**(1/4) * in absolute value, and for greatest accuracy, it should not * be much smaller than that. * * E (global input) DOUBLE PRECISION array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * To avoid overflow, the matrix must be scaled so that its * largest entry is no greater than overflow**(1/2) * * underflow**(1/4) in absolute value, and for greatest * accuracy, it should not be much smaller than that. * * M (global output) INTEGER * The actual number of eigenvalues found. 0 <= M <= N. * (See also the description of INFO=2) * * NSPLIT (global output) INTEGER * The number of diagonal blocks in the matrix T. * 1 <= NSPLIT <= N. * * W (global output) DOUBLE PRECISION array, dimension (N) * On exit, the first M elements of W contain the eigenvalues * on all processes. * * IBLOCK (global output) INTEGER array, dimension (N) * At each row/column j where E(j) is zero or small, the * matrix T is considered to split into a block diagonal * matrix. On exit IBLOCK(i) specifies which block (from 1 * to the number of blocks) the eigenvalue W(i) belongs to. * NOTE: in the (theoretically impossible) event that bisection * does not converge for some or all eigenvalues, INFO is set * to 1 and the ones for which it did not are identified by a * negative block number. * * ISPLIT (global output) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. * (Only the first NSPLIT elements will actually be used, but * since the user cannot know a priori what value NSPLIT will * have, N words must be reserved for ISPLIT.) * * WORK (local workspace) DOUBLE PRECISION array, * dimension ( MAX( 5*N, 7 ) ) * * LWORK (local input) INTEGER * size of array WORK must be >= MAX( 5*N, 7 ) * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace) INTEGER array, dimension ( MAX( 4*N, 14 ) ) * * LIWORK (local input) INTEGER * size of array IWORK must be >= MAX( 4*N, 14, NPROCS ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0 : successful exit * < 0 : if INFO = -i, the i-th argument had an illegal value * > 0 : some or all of the eigenvalues failed to converge or * were not computed: * = 1 : Bisection failed to converge for some eigenvalues; * these eigenvalues are flagged by a negative block * number. The effect is that the eigenvalues may not * be as accurate as the absolute and relative * tolerances. This is generally caused by arithmetic * which is less accurate than PDLAMCH says. * = 2 : There is a mismatch between the number of * eigenvalues output and the number desired. * = 3 : RANGE='i', and the Gershgorin interval initially * used was incorrect. No eigenvalues were computed. * Probable cause: your machine has sloppy floating * point arithmetic. * Cure: Increase the PARAMETER "FUDGE", recompile, * and try again. * * Internal Parameters * =================== * * RELFAC DOUBLE PRECISION, default = 2.0 * The relative tolerance. An interval [a,b] lies within * "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), * where "ulp" is the machine precision (distance from 1 to * the next larger floating point number.) * * FUDGE DOUBLE PRECISION, default = 2.0 * A "fudge factor" to widen the Gershgorin intervals. Ideally, * a value of 1 should work, but on machines with sloppy * arithmetic, this needs to be larger. The default for * publicly released versions should be large enough to handle * the worst machine around. Note that this has no effect * on the accuracy of the solution. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER BLACS_PNUM DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, BLACS_PNUM, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_FREEBUFF, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDMAP, DGEBR2D, $ DGEBS2D, DGERV2D, DGESD2D, DLASRT2, GLOBCHK, $ IGEBR2D, IGEBS2D, IGERV2D, IGESD2D, IGSUM2D, $ PDLAEBZ, PDLAIECTB, PDLAIECTL, PDLAPDCT, $ PDLASNBT, PXERBLA * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER BIGNUM, DESCMULT PARAMETER ( BIGNUM = 10000, DESCMULT = 100 ) DOUBLE PRECISION ZERO, ONE, TWO, FIVE, HALF PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, $ FIVE = 5.0D+0, HALF = 1.0D+0 / TWO ) DOUBLE PRECISION FUDGE, RELFAC PARAMETER ( FUDGE = 2.0D+0, RELFAC = 2.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER BLKNO, FOUND, I, IBEGIN, IEFLAG, IEND, IFRST, $ IINFO, ILAST, ILOAD, IM, IMYLOAD, IN, INDRIW1, $ INDRIW2, INDRW1, INDRW2, INXTLOAD, IOFF, $ IORDER, IOUT, IRANGE, IRECV, IREM, ITMP1, $ ITMP2, J, JB, K, LAST, LEXTRA, LREQ, MYCOL, $ MYROW, NALPHA, NBETA, NCMP, NEIGINT, NEXT, NGL, $ NGLOB, NGU, NINT, NPCOL, NPROW, OFFSET, $ ONEDCONTEXT, P, PREV, REXTRA, RREQ, SELF, $ TORECV DOUBLE PRECISION ALPHA, ATOLI, BETA, BNORM, DRECV, DSEND, GL, $ GU, INITVL, INITVU, LSAVE, MID, PIVMIN, RELTOL, $ SAFEMN, TMP1, TMP2, TNORM, ULP * .. * .. Local Arrays .. INTEGER IDUM( 5, 2 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Set up process grid * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 M = 0 * * Decode RANGE * IF( LSAME( RANGE, 'A' ) ) THEN IRANGE = 1 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IRANGE = 2 ELSE IF( LSAME( RANGE, 'I' ) ) THEN IRANGE = 3 ELSE IRANGE = 0 END IF * * Decode ORDER * IF( LSAME( ORDER, 'B' ) ) THEN IORDER = 2 ELSE IF( LSAME( ORDER, 'E' ) .OR. LSAME( ORDER, 'A' ) ) THEN IORDER = 1 ELSE IORDER = 0 END IF * * Check for Errors * IF( NPROW.EQ.-1 ) THEN INFO = -1 ELSE * * Get machine constants * SAFEMN = PDLAMCH( ICTXT, 'S' ) ULP = PDLAMCH( ICTXT, 'P' ) RELTOL = ULP*RELFAC IDUM( 1, 1 ) = ICHAR( RANGE ) IDUM( 1, 2 ) = 2 IDUM( 2, 1 ) = ICHAR( ORDER ) IDUM( 2, 2 ) = 3 IDUM( 3, 1 ) = N IDUM( 3, 2 ) = 4 NGLOB = 5 IF( IRANGE.EQ.3 ) THEN IDUM( 4, 1 ) = IL IDUM( 4, 2 ) = 7 IDUM( 5, 1 ) = IU IDUM( 5, 2 ) = 8 ELSE IDUM( 4, 1 ) = 0 IDUM( 4, 2 ) = 0 IDUM( 5, 1 ) = 0 IDUM( 5, 2 ) = 0 END IF IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( IRANGE.EQ.2 ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL DGEBS2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL DGEBR2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3, 0, 0 ) END IF LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IRANGE.EQ.0 ) THEN INFO = -2 ELSE IF( IORDER.EQ.0 ) THEN INFO = -3 ELSE IF( IRANGE.EQ.2 .AND. VL.GE.VU ) THEN INFO = -5 ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, $ N ) ) ) THEN INFO = -6 ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, $ IL ) .OR. IU.GT.N ) ) THEN INFO = -7 ELSE IF( LWORK.LT.MAX( 5*N, 7 ) .AND. .NOT.LQUERY ) THEN INFO = -18 ELSE IF( LIWORK.LT.MAX( 4*N, 14, NPROW*NPCOL ) .AND. .NOT. $ LQUERY ) THEN INFO = -20 ELSE IF( IRANGE.EQ.2 .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE* $ ULP*ABS( VL ) ) ) THEN INFO = -5 ELSE IF( IRANGE.EQ.2 .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE* $ ULP*ABS( VU ) ) ) THEN INFO = -6 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*ULP*ABS( ABSTOL ) ) $ THEN INFO = -9 END IF END IF IF( INFO.EQ.0 ) $ INFO = BIGNUM CALL GLOBCHK( ICTXT, NGLOB, IDUM, 5, IWORK, INFO ) IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF END IF WORK( 1 ) = DBLE( MAX( 5*N, 7 ) ) IWORK( 1 ) = MAX( 4*N, 14, NPROW*NPCOL ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSTEBZ', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .AND. LIWORK.EQ.-1 ) THEN RETURN END IF * * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * K = 1 DO 20 I = 0, NPROW - 1 DO 10 J = 0, NPCOL - 1 IWORK( K ) = BLACS_PNUM( ICTXT, I, J ) K = K + 1 10 CONTINUE 20 CONTINUE * P = NPROW*NPCOL NPROW = 1 NPCOL = P * CALL BLACS_GET( ICTXT, 10, ONEDCONTEXT ) CALL BLACS_GRIDMAP( ONEDCONTEXT, IWORK, NPROW, NPROW, NPCOL ) CALL BLACS_GRIDINFO( ONEDCONTEXT, I, J, K, SELF ) * * Simplifications: * IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) $ IRANGE = 1 * NEXT = MOD( SELF+1, P ) PREV = MOD( P+SELF-1, P ) * * Compute squares of off-diagonals, splitting points and pivmin. * Interleave diagonals and off-diagonals. * INDRW1 = MAX( 2*N, 4 ) INDRW2 = INDRW1 + 2*N INDRIW1 = MAX( 2*N, 8 ) NSPLIT = 1 WORK( INDRW1+2*N ) = ZERO PIVMIN = ONE * DO 30 I = 1, N - 1 TMP1 = E( I )**2 J = 2*I WORK( INDRW1+J-1 ) = D( I ) IF( ABS( D( I+1 )*D( I ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN ISPLIT( NSPLIT ) = I NSPLIT = NSPLIT + 1 WORK( INDRW1+J ) = ZERO ELSE WORK( INDRW1+J ) = TMP1 PIVMIN = MAX( PIVMIN, TMP1 ) END IF 30 CONTINUE WORK( INDRW1+2*N-1 ) = D( N ) ISPLIT( NSPLIT ) = N PIVMIN = PIVMIN*SAFEMN * * Compute Gershgorin interval [gl,gu] for entire matrix * GU = D( 1 ) GL = D( 1 ) TMP1 = ZERO * DO 40 I = 1, N - 1 TMP2 = ABS( E( I ) ) GU = MAX( GU, D( I )+TMP1+TMP2 ) GL = MIN( GL, D( I )-TMP1-TMP2 ) TMP1 = TMP2 40 CONTINUE GU = MAX( GU, D( N )+TMP1 ) GL = MIN( GL, D( N )-TMP1 ) TNORM = MAX( ABS( GL ), ABS( GU ) ) GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN * IF( ABSTOL.LE.ZERO ) THEN ATOLI = ULP*TNORM ELSE ATOLI = ABSTOL END IF * * Find out if on an IEEE machine, the sign bit is the * 32nd bit (Big Endian) or the 64th bit (Little Endian) * IF( IRANGE.EQ.1 .OR. NSPLIT.EQ.1 ) THEN CALL PDLASNBT( IEFLAG ) ELSE IEFLAG = 0 END IF LEXTRA = 0 REXTRA = 0 * * Form Initial Interval containing desired eigenvalues * IF( IRANGE.EQ.1 ) THEN INITVL = GL INITVU = GU WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = 0 IWORK( 2 ) = N IFRST = 1 ILAST = N ELSE IF( IRANGE.EQ.2 ) THEN IF( VL.GT.GL ) THEN IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( VL, N, WORK( INDRW1+1 ), PIVMIN, IFRST ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( VL, N, WORK( INDRW1+1 ), IFRST ) ELSE CALL PDLAIECTL( VL, N, WORK( INDRW1+1 ), IFRST ) END IF IFRST = IFRST + 1 INITVL = VL ELSE INITVL = GL IFRST = 1 END IF IF( VU.LT.GU ) THEN IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( VU, N, WORK( INDRW1+1 ), PIVMIN, ILAST ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( VU, N, WORK( INDRW1+1 ), ILAST ) ELSE CALL PDLAIECTL( VU, N, WORK( INDRW1+1 ), ILAST ) END IF INITVU = VU ELSE INITVU = GU ILAST = N END IF WORK( 1 ) = INITVL WORK( 2 ) = INITVU IWORK( 1 ) = IFRST - 1 IWORK( 2 ) = ILAST ELSE IF( IRANGE.EQ.3 ) THEN WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = 0 IWORK( 2 ) = N IWORK( 5 ) = IL - 1 IWORK( 6 ) = IU CALL PDLAEBZ( 0, N, 2, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+1 ), IWORK( 5 ), WORK, IWORK, NINT, $ LSAVE, IEFLAG, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 3 GO TO 230 END IF IF( NINT.GT.1 ) THEN IF( IWORK( 5 ).EQ.IL-1 ) THEN WORK( 2 ) = WORK( 4 ) IWORK( 2 ) = IWORK( 4 ) ELSE WORK( 1 ) = WORK( 3 ) IWORK( 1 ) = IWORK( 3 ) END IF IF( IWORK( 1 ).LT.0 .OR. IWORK( 1 ).GT.IL-1 .OR. $ IWORK( 2 ).LE.MIN( IU-1, IWORK( 1 ) ) .OR. $ IWORK( 2 ).GT.N ) THEN INFO = 3 GO TO 230 END IF END IF LEXTRA = IL - 1 - IWORK( 1 ) REXTRA = IWORK( 2 ) - IU INITVL = WORK( 1 ) INITVU = WORK( 2 ) IFRST = IL ILAST = IU END IF * NVL = IFRST - 1 * NVU = ILAST GL = INITVL GU = INITVU NGL = IWORK( 1 ) NGU = IWORK( 2 ) IM = 0 FOUND = 0 INDRIW2 = INDRIW1 + NGU - NGL IEND = 0 IF( IFRST.GT.ILAST ) $ GO TO 100 IF( IFRST.EQ.1 .AND. ILAST.EQ.N ) $ IRANGE = 1 * * Find Eigenvalues -- Loop Over Blocks * DO 90 JB = 1, NSPLIT IOFF = IEND IBEGIN = IOFF + 1 IEND = ISPLIT( JB ) IN = IEND - IOFF IF( JB.NE.1 ) THEN IF( IRANGE.NE.1 ) THEN FOUND = IM * * Find total number of eigenvalues found thus far * CALL IGSUM2D( ONEDCONTEXT, 'All', ' ', 1, 1, FOUND, 1, $ -1, -1 ) ELSE FOUND = IOFF END IF END IF * IF( SELF.GE.P ) * $ GO TO 30 IF( IN.NE.N ) THEN * * Compute Gershgorin interval [gl,gu] for split matrix * GU = D( IBEGIN ) GL = D( IBEGIN ) TMP1 = ZERO * DO 50 J = IBEGIN, IEND - 1 TMP2 = ABS( E( J ) ) GU = MAX( GU, D( J )+TMP1+TMP2 ) GL = MIN( GL, D( J )-TMP1-TMP2 ) TMP1 = TMP2 50 CONTINUE * GU = MAX( GU, D( IEND )+TMP1 ) GL = MIN( GL, D( IEND )-TMP1 ) BNORM = MAX( ABS( GL ), ABS( GU ) ) GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN * * Compute ATOLI for the current submatrix * IF( ABSTOL.LE.ZERO ) THEN ATOLI = ULP*BNORM ELSE ATOLI = ABSTOL END IF * IF( GL.LT.INITVL ) THEN GL = INITVL IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( GL, IN, WORK( INDRW1+2*IOFF+1 ), $ PIVMIN, NGL ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( GL, IN, WORK( INDRW1+2*IOFF+1 ), NGL ) ELSE CALL PDLAIECTL( GL, IN, WORK( INDRW1+2*IOFF+1 ), NGL ) END IF ELSE NGL = 0 END IF IF( GU.GT.INITVU ) THEN GU = INITVU IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( GU, IN, WORK( INDRW1+2*IOFF+1 ), $ PIVMIN, NGU ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( GU, IN, WORK( INDRW1+2*IOFF+1 ), NGU ) ELSE CALL PDLAIECTL( GU, IN, WORK( INDRW1+2*IOFF+1 ), NGU ) END IF ELSE NGU = IN END IF IF( NGL.GE.NGU ) $ GO TO 90 WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = NGL IWORK( 2 ) = NGU END IF OFFSET = FOUND - NGL BLKNO = JB * * Do a static partitioning of work so that each process * has to find an (almost) equal number of eigenvalues * NCMP = NGU - NGL ILOAD = NCMP / P IREM = NCMP - ILOAD*P ITMP1 = MOD( SELF-FOUND, P ) IF( ITMP1.LT.0 ) $ ITMP1 = ITMP1 + P IF( ITMP1.LT.IREM ) THEN IMYLOAD = ILOAD + 1 ELSE IMYLOAD = ILOAD END IF IF( IMYLOAD.EQ.0 ) THEN GO TO 90 ELSE IF( IN.EQ.1 ) THEN WORK( INDRW2+IM+1 ) = WORK( INDRW1+2*IOFF+1 ) IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = OFFSET + 1 IM = IM + 1 GO TO 90 ELSE INXTLOAD = ILOAD ITMP2 = MOD( SELF+1-FOUND, P ) IF( ITMP2.LT.0 ) $ ITMP2 = ITMP2 + P IF( ITMP2.LT.IREM ) $ INXTLOAD = INXTLOAD + 1 LREQ = NGL + ITMP1*ILOAD + MIN( IREM, ITMP1 ) RREQ = LREQ + IMYLOAD IWORK( 5 ) = LREQ IWORK( 6 ) = RREQ TMP1 = WORK( 1 ) ITMP1 = IWORK( 1 ) CALL PDLAEBZ( 1, IN, 1, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+2*IOFF+1 ), IWORK( 5 ), WORK, $ IWORK, NINT, LSAVE, IEFLAG, IINFO ) ALPHA = WORK( 1 ) BETA = WORK( 2 ) NALPHA = IWORK( 1 ) NBETA = IWORK( 2 ) DSEND = BETA IF( NBETA.GT.RREQ+INXTLOAD ) THEN NBETA = RREQ DSEND = ALPHA END IF LAST = MOD( FOUND+MIN( NGU-NGL, P )-1, P ) IF( LAST.LT.0 ) $ LAST = LAST + P IF( SELF.NE.LAST ) THEN CALL DGESD2D( ONEDCONTEXT, 1, 1, DSEND, 1, 0, NEXT ) CALL IGESD2D( ONEDCONTEXT, 1, 1, NBETA, 1, 0, NEXT ) END IF IF( SELF.NE.MOD( FOUND, P ) ) THEN CALL DGERV2D( ONEDCONTEXT, 1, 1, DRECV, 1, 0, PREV ) CALL IGERV2D( ONEDCONTEXT, 1, 1, IRECV, 1, 0, PREV ) ELSE DRECV = TMP1 IRECV = ITMP1 END IF WORK( 1 ) = MAX( LSAVE, DRECV ) IWORK( 1 ) = IRECV ALPHA = MAX( ALPHA, WORK( 1 ) ) NALPHA = MAX( NALPHA, IRECV ) IF( BETA-ALPHA.LE.MAX( ATOLI, RELTOL*MAX( ABS( ALPHA ), $ ABS( BETA ) ) ) ) THEN MID = HALF*( ALPHA+BETA ) DO 60 J = OFFSET + NALPHA + 1, OFFSET + NBETA WORK( INDRW2+IM+1 ) = MID IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = J IM = IM + 1 60 CONTINUE WORK( 2 ) = ALPHA IWORK( 2 ) = NALPHA END IF END IF NEIGINT = IWORK( 2 ) - IWORK( 1 ) IF( NEIGINT.LE.0 ) $ GO TO 90 * * Call the main computational routine * CALL PDLAEBZ( 2, IN, NEIGINT, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+2*IOFF+1 ), IWORK, WORK, IWORK, $ IOUT, LSAVE, IEFLAG, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 END IF DO 80 I = 1, IOUT MID = HALF*( WORK( 2*I-1 )+WORK( 2*I ) ) IF( I.GT.IOUT-IINFO ) $ BLKNO = -BLKNO DO 70 J = OFFSET + IWORK( 2*I-1 ) + 1, $ OFFSET + IWORK( 2*I ) WORK( INDRW2+IM+1 ) = MID IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = J IM = IM + 1 70 CONTINUE 80 CONTINUE 90 CONTINUE * * Find out total number of eigenvalues computed * 100 CONTINUE M = IM CALL IGSUM2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, M, 1, -1, -1 ) * * Move the eigenvalues found to their final destinations * DO 130 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, IM, 1 ) IF( IM.NE.0 ) THEN CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ IWORK( INDRIW2+1 ), IM ) CALL DGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ WORK( INDRW2+1 ), IM ) CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ IWORK( INDRIW1+1 ), IM ) DO 110 J = 1, IM W( IWORK( INDRIW2+J ) ) = WORK( INDRW2+J ) IBLOCK( IWORK( INDRIW2+J ) ) = IWORK( INDRIW1+J ) 110 CONTINUE END IF ELSE CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, TORECV, 1, 0, $ I-1 ) IF( TORECV.NE.0 ) THEN CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, IWORK, $ TORECV, 0, I-1 ) CALL DGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, WORK, $ TORECV, 0, I-1 ) CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, $ IWORK( N+1 ), TORECV, 0, I-1 ) DO 120 J = 1, TORECV W( IWORK( J ) ) = WORK( J ) IBLOCK( IWORK( J ) ) = IWORK( N+J ) 120 CONTINUE END IF END IF 130 CONTINUE IF( NSPLIT.GT.1 .AND. IORDER.EQ.1 ) THEN * * Sort the eigenvalues * * DO 140 I = 1, M IWORK( M+I ) = I 140 CONTINUE CALL DLASRT2( 'I', M, W, IWORK( M+1 ), IINFO ) DO 150 I = 1, M IWORK( I ) = IBLOCK( I ) 150 CONTINUE DO 160 I = 1, M IBLOCK( I ) = IWORK( IWORK( M+I ) ) 160 CONTINUE END IF IF( IRANGE.EQ.3 .AND. ( LEXTRA.GT.0 .OR. REXTRA.GT.0 ) ) THEN * * Discard unwanted eigenvalues (occurs only when RANGE = 'I', * and eigenvalues IL, and/or IU are in a cluster) * DO 170 I = 1, M WORK( I ) = W( I ) IWORK( I ) = I IWORK( M+I ) = I 170 CONTINUE DO 190 I = 1, LEXTRA ITMP1 = I DO 180 J = I + 1, M IF( WORK( J ).LT.WORK( ITMP1 ) ) THEN ITMP1 = J END IF 180 CONTINUE TMP1 = WORK( I ) WORK( I ) = WORK( ITMP1 ) WORK( ITMP1 ) = TMP1 IWORK( IWORK( M+ITMP1 ) ) = I IWORK( IWORK( M+I ) ) = ITMP1 ITMP2 = IWORK( M+I ) IWORK( M+I ) = IWORK( M+ITMP1 ) IWORK( M+ITMP1 ) = ITMP2 190 CONTINUE DO 210 I = 1, REXTRA ITMP1 = M - I + 1 DO 200 J = M - I, LEXTRA + 1, -1 IF( WORK( J ).GT.WORK( ITMP1 ) ) THEN ITMP1 = J END IF 200 CONTINUE TMP1 = WORK( M-I+1 ) WORK( M-I+1 ) = WORK( ITMP1 ) WORK( ITMP1 ) = TMP1 IWORK( IWORK( M+ITMP1 ) ) = M - I + 1 IWORK( IWORK( 2*M-I+1 ) ) = ITMP1 ITMP2 = IWORK( 2*M-I+1 ) IWORK( 2*M-I+1 ) = IWORK( M+ITMP1 ) IWORK( M+ITMP1 ) = ITMP2 * IWORK( ITMP1 ) = 1 210 CONTINUE J = 0 DO 220 I = 1, M IF( IWORK( I ).GT.LEXTRA .AND. IWORK( I ).LE.M-REXTRA ) THEN J = J + 1 W( J ) = WORK( IWORK( I ) ) IBLOCK( J ) = IBLOCK( I ) END IF 220 CONTINUE M = M - LEXTRA - REXTRA END IF IF( M.NE.ILAST-IFRST+1 ) THEN INFO = 2 END IF * 230 CONTINUE CALL BLACS_FREEBUFF( ONEDCONTEXT, 1 ) CALL BLACS_GRIDEXIT( ONEDCONTEXT ) RETURN * * End of PDSTEBZ * END * SUBROUTINE PDLAEBZ( IJOB, N, MMAX, MINP, ABSTOL, RELTOL, PIVMIN, $ D, NVAL, INTVL, INTVLCT, MOUT, LSAVE, IEFLAG, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER IEFLAG, IJOB, INFO, MINP, MMAX, MOUT, N DOUBLE PRECISION ABSTOL, LSAVE, PIVMIN, RELTOL * .. * .. Array Arguments .. INTEGER INTVLCT( * ), NVAL( * ) DOUBLE PRECISION D( * ), INTVL( * ) * .. * * Purpose * ======= * * PDLAEBZ contains the iteration loop which computes the eigenvalues * contained in the input intervals [ INTVL(2*j-1), INTVL(2*j) ] where * j = 1,...,MINP. It uses and computes the function N(w), which is * the count of eigenvalues of a symmetric tridiagonal matrix less than * or equal to its argument w. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * IJOB (input) INTEGER * Specifies the computation done by PDLAEBZ * = 0 : Find an interval with desired values of N(w) at the * endpoints of the interval. * = 1 : Find a floating point number contained in the initial * interval with a desired value of N(w). * = 2 : Perform bisection iteration to find eigenvalues of T. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * MMAX (input) INTEGER * The maximum number of intervals that may be generated. If * more than MMAX intervals are generated, then PDLAEBZ will * quit with INFO = MMAX+1. * * MINP (input) INTEGER * The initial number of intervals. MINP <= MMAX. * * ABSTOL (input) DOUBLE PRECISION * The minimum (absolute) width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * This must be at least zero. * * RELTOL (input) DOUBLE PRECISION * The minimum relative width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This should be at least radix*machine epsilon. * * PIVMIN (input) DOUBLE PRECISION * The minimum absolute of a "pivot" in the "paranoid" * implementation of the Sturm sequence loop. This must be at * least max_j |e(j)^2| *safe_min, and at least safe_min, where * safe_min is at least the smallest number that can divide 1.0 * without overflow. * See PDLAPDCT for the "paranoid" implementation of the Sturm * sequence loop. * * D (input) DOUBLE PRECISION array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * NVAL (input/output) INTEGER array, dimension (4) * If IJOB = 0, the desired values of N(w) are in NVAL(1) and * NVAL(2). * If IJOB = 1, NVAL(2) is the desired value of N(w). * If IJOB = 2, not referenced. * This array will, in general, be reordered on output. * * INTVL (input/output) DOUBLE PRECISION array, dimension (2*MMAX) * The endpoints of the intervals. INTVL(2*j-1) is the left * endpoint of the j-th interval, and INTVL(2*j) is the right * endpoint of the j-th interval. The input intervals will, * in general, be modified, split and reordered by the * calculation. * On input, INTVL contains the MINP input intervals. * On output, INTVL contains the converged intervals. * * INTVLCT (input/output) INTEGER array, dimension (2*MMAX) * The counts at the endpoints of the intervals. INTVLCT(2*j-1) * is the count at the left endpoint of the j-th interval, i.e., * the function value N(INTVL(2*j-1)), and INTVLCT(2*j) is the * count at the right endpoint of the j-th interval. * On input, INTVLCT contains the counts at the endpoints of * the MINP input intervals. * On output, INTVLCT contains the counts at the endpoints of * the converged intervals. * * MOUT (output) INTEGER * The number of intervals output. * * LSAVE (output) DOUBLE PRECISION * If IJOB = 0 or 2, not referenced. * If IJOB = 1, this is the largest floating point number * encountered which has count N(w) = NVAL(1). * * IEFLAG (input) INTEGER * A flag which indicates whether N(w) should be speeded up by * exploiting IEEE Arithmetic. * * INFO (output) INTEGER * = 0 : All intervals converged. * = 1 - MMAX : The last INFO intervals did not converge. * = MMAX + 1 : More than MMAX intervals were generated. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, INT, LOG, MAX, MIN * .. * .. External Subroutines .. EXTERNAL PDLAECV, PDLAIECTB, PDLAIECTL, PDLAPDCT * .. * .. Parameters .. DOUBLE PRECISION ZERO, TWO, HALF PARAMETER ( ZERO = 0.0D+0, TWO = 2.0D+0, $ HALF = 1.0D+0 / TWO ) * .. * .. Local Scalars .. INTEGER I, ITMAX, J, K, KF, KL, KLNEW, L, LCNT, LREQ, $ NALPHA, NBETA, NMID, RCNT, RREQ DOUBLE PRECISION ALPHA, BETA, MID * .. * .. Executable Statements .. * KF = 1 KL = MINP + 1 INFO = 0 IF( INTVL( 2 )-INTVL( 1 ).LE.ZERO ) THEN INFO = MINP MOUT = KF RETURN END IF IF( IJOB.EQ.0 ) THEN * * Check if some input intervals have "converged" * CALL PDLAECV( 0, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 * * Compute upper bound on number of iterations needed * ITMAX = INT( ( LOG( INTVL( 2 )-INTVL( 1 )+PIVMIN )- $ LOG( PIVMIN ) ) / LOG( TWO ) ) + 2 * * Iteration Loop * DO 20 I = 1, ITMAX KLNEW = KL DO 10 J = KF, KL - 1 K = 2*J * * Bisect the interval and find the count at that point * MID = HALF*( INTVL( K-1 )+INTVL( K ) ) IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( MID, N, D, NMID ) ELSE CALL PDLAIECTL( MID, N, D, NMID ) END IF LREQ = NVAL( K-1 ) RREQ = NVAL( K ) IF( KL.EQ.1 ) $ NMID = MIN( INTVLCT( K ), $ MAX( INTVLCT( K-1 ), NMID ) ) IF( NMID.LE.NVAL( K-1 ) ) THEN INTVL( K-1 ) = MID INTVLCT( K-1 ) = NMID END IF IF( NMID.GE.NVAL( K ) ) THEN INTVL( K ) = MID INTVLCT( K ) = NMID END IF IF( NMID.GT.LREQ .AND. NMID.LT.RREQ ) THEN L = 2*KLNEW INTVL( L-1 ) = MID INTVL( L ) = INTVL( K ) INTVLCT( L-1 ) = NVAL( K ) INTVLCT( L ) = INTVLCT( K ) INTVL( K ) = MID INTVLCT( K ) = NVAL( K-1 ) NVAL( L-1 ) = NVAL( K ) NVAL( L ) = NVAL( L-1 ) NVAL( K ) = NVAL( K-1 ) KLNEW = KLNEW + 1 END IF 10 CONTINUE KL = KLNEW CALL PDLAECV( 0, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 20 CONTINUE ELSE IF( IJOB.EQ.1 ) THEN ALPHA = INTVL( 1 ) BETA = INTVL( 2 ) NALPHA = INTVLCT( 1 ) NBETA = INTVLCT( 2 ) LSAVE = ALPHA LREQ = NVAL( 1 ) RREQ = NVAL( 2 ) 30 CONTINUE IF( NBETA.NE.RREQ .AND. BETA-ALPHA.GT. $ MAX( ABSTOL, RELTOL*MAX( ABS( ALPHA ), ABS( BETA ) ) ) ) $ THEN * * Bisect the interval and find the count at that point * MID = HALF*( ALPHA+BETA ) IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( MID, N, D, NMID ) ELSE CALL PDLAIECTL( MID, N, D, NMID ) END IF NMID = MIN( NBETA, MAX( NALPHA, NMID ) ) IF( NMID.GE.RREQ ) THEN BETA = MID NBETA = NMID ELSE ALPHA = MID NALPHA = NMID IF( NMID.EQ.LREQ ) $ LSAVE = ALPHA END IF GO TO 30 END IF KL = KF INTVL( 1 ) = ALPHA INTVL( 2 ) = BETA INTVLCT( 1 ) = NALPHA INTVLCT( 2 ) = NBETA ELSE IF( IJOB.EQ.2 ) THEN * * Check if some input intervals have "converged" * CALL PDLAECV( 1, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 * * Compute upper bound on number of iterations needed * ITMAX = INT( ( LOG( INTVL( 2 )-INTVL( 1 )+PIVMIN )- $ LOG( PIVMIN ) ) / LOG( TWO ) ) + 2 * * Iteration Loop * DO 50 I = 1, ITMAX KLNEW = KL DO 40 J = KF, KL - 1 K = 2*J MID = HALF*( INTVL( K-1 )+INTVL( K ) ) IF( IEFLAG.EQ.0 ) THEN CALL PDLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE IF( IEFLAG.EQ.1 ) THEN CALL PDLAIECTB( MID, N, D, NMID ) ELSE CALL PDLAIECTL( MID, N, D, NMID ) END IF LCNT = INTVLCT( K-1 ) RCNT = INTVLCT( K ) NMID = MIN( RCNT, MAX( LCNT, NMID ) ) * * Form New Interval(s) * IF( NMID.EQ.LCNT ) THEN INTVL( K-1 ) = MID ELSE IF( NMID.EQ.RCNT ) THEN INTVL( K ) = MID ELSE IF( KLNEW.LT.MMAX+1 ) THEN L = 2*KLNEW INTVL( L-1 ) = MID INTVL( L ) = INTVL( K ) INTVLCT( L-1 ) = NMID INTVLCT( L ) = INTVLCT( K ) INTVL( K ) = MID INTVLCT( K ) = NMID KLNEW = KLNEW + 1 ELSE INFO = MMAX + 1 RETURN END IF 40 CONTINUE KL = KLNEW CALL PDLAECV( 1, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 50 CONTINUE END IF 60 CONTINUE INFO = MAX( KL-KF, 0 ) MOUT = KL - 1 RETURN * * End of PDLAEBZ * END * * SUBROUTINE PDLAECV( IJOB, KF, KL, INTVL, INTVLCT, NVAL, ABSTOL, $ RELTOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER IJOB, KF, KL DOUBLE PRECISION ABSTOL, RELTOL * .. * .. Array Arguments .. INTEGER INTVLCT( * ), NVAL( * ) DOUBLE PRECISION INTVL( * ) * .. * * Purpose * ======= * * PDLAECV checks if the input intervals [ INTVL(2*i-1), INTVL(2*i) ], * i = KF, ... , KL-1, have "converged". * PDLAECV modifies KF to be the index of the last converged interval, * i.e., on output, all intervals [ INTVL(2*i-1), INTVL(2*i) ], i < KF, * have converged. Note that the input intervals may be reordered by * PDLAECV. * * This is a SCALAPACK internal procedure and arguments are not checked * for unreasonable values. * * Arguments * ========= * * IJOB (input) INTEGER * Specifies the criterion for "convergence" of an interval. * = 0 : When an interval is narrower than ABSTOL, or than * RELTOL times the larger (in magnitude) endpoint, then * it is considered to have "converged". * = 1 : When an interval is narrower than ABSTOL, or than * RELTOL times the larger (in magnitude) endpoint, or if * the counts at the endpoints are identical to the counts * specified by NVAL ( see NVAL ) then the interval is * considered to have "converged". * * KF (input/output) INTEGER * On input, the index of the first input interval is 2*KF-1. * On output, the index of the last converged interval * is 2*KF-3. * * KL (input) INTEGER * The index of the last input interval is 2*KL-3. * * INTVL (input/output) DOUBLE PRECISION array, dimension (2*(KL-KF)) * The endpoints of the intervals. INTVL(2*j-1) is the left * oendpoint f the j-th interval, and INTVL(2*j) is the right * endpoint of the j-th interval. The input intervals will, * in general, be reordered on output. * On input, INTVL contains the KL-KF input intervals. * On output, INTVL contains the converged intervals, 1 thru' * KF-1, and the unconverged intervals, KF thru' KL-1. * * INTVLCT (input/output) INTEGER array, dimension (2*(KL-KF)) * The counts at the endpoints of the intervals. INTVLCT(2*j-1) * is the count at the left endpoint of the j-th interval, i.e., * the function value N(INTVL(2*j-1)), and INTVLCT(2*j) is the * count at the right endpoint of the j-th interval. This array * will, in general, be reordered on output. * See the comments in PDLAEBZ for more on the function N(w). * * NVAL (input/output) INTEGER array, dimension (2*(KL-KF)) * The desired counts, N(w), at the endpoints of the * corresponding intervals. This array will, in general, * be reordered on output. * * ABSTOL (input) DOUBLE PRECISION * The minimum (absolute) width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This must be at least zero. * * RELTOL (input) DOUBLE PRECISION * The minimum relative width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This should be at least radix*machine epsilon. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Local Scalars .. LOGICAL CONDN INTEGER I, ITMP1, ITMP2, J, K, KFNEW DOUBLE PRECISION TMP1, TMP2, TMP3, TMP4 * .. * .. Executable Statements .. * KFNEW = KF DO 10 I = KF, KL - 1 K = 2*I TMP3 = INTVL( K-1 ) TMP4 = INTVL( K ) TMP1 = ABS( TMP4-TMP3 ) TMP2 = MAX( ABS( TMP3 ), ABS( TMP4 ) ) CONDN = TMP1.LT.MAX( ABSTOL, RELTOL*TMP2 ) IF( IJOB.EQ.0 ) $ CONDN = CONDN .OR. ( ( INTVLCT( K-1 ).EQ.NVAL( K-1 ) ) .AND. $ INTVLCT( K ).EQ.NVAL( K ) ) IF( CONDN ) THEN IF( I.GT.KFNEW ) THEN * * Reorder Intervals * J = 2*KFNEW TMP1 = INTVL( K-1 ) TMP2 = INTVL( K ) ITMP1 = INTVLCT( K-1 ) ITMP2 = INTVLCT( K ) INTVL( K-1 ) = INTVL( J-1 ) INTVL( K ) = INTVL( J ) INTVLCT( K-1 ) = INTVLCT( J-1 ) INTVLCT( K ) = INTVLCT( J ) INTVL( J-1 ) = TMP1 INTVL( J ) = TMP2 INTVLCT( J-1 ) = ITMP1 INTVLCT( J ) = ITMP2 IF( IJOB.EQ.0 ) THEN ITMP1 = NVAL( K-1 ) NVAL( K-1 ) = NVAL( J-1 ) NVAL( J-1 ) = ITMP1 ITMP1 = NVAL( K ) NVAL( K ) = NVAL( J ) NVAL( J ) = ITMP1 END IF END IF KFNEW = KFNEW + 1 END IF 10 CONTINUE KF = KFNEW RETURN * * End of PDLAECV * END * SUBROUTINE PDLAPDCT( SIGMA, N, D, PIVMIN, COUNT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER COUNT, N DOUBLE PRECISION PIVMIN, SIGMA * .. * .. Array Arguments .. DOUBLE PRECISION D( * ) * .. * * Purpose * ======= * * PDLAPDCT counts the number of negative eigenvalues of (T - SIGMA I). * This implementation of the Sturm Sequence loop has conditionals in * the innermost loop to avoid overflow and determine the sign of a * floating point number. PDLAPDCT will be referred to as the "paranoid" * implementation of the Sturm Sequence loop. * * This is a SCALAPACK internal procedure and arguments are not checked * for unreasonable values. * * Arguments * ========= * * SIGMA (input) DOUBLE PRECISION * The shift. PDLAPDCT finds the number of eigenvalues of T less * than or equal to SIGMA. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * D (input) DOUBLE PRECISION array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * PIVMIN (input) DOUBLE PRECISION * The minimum absolute of a "pivot" in this "paranoid" * implementation of the Sturm sequence loop. This must be at * least max_j |e(j)^2| *safe_min, and at least safe_min, where * safe_min is at least the smallest number that can divide 1.0 * without overflow. * * COUNT (output) INTEGER * The count of the number of eigenvalues of T less than or * equal to SIGMA. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I DOUBLE PRECISION TMP * .. * .. Executable Statements .. * TMP = D( 1 ) - SIGMA IF( ABS( TMP ).LE.PIVMIN ) $ TMP = -PIVMIN COUNT = 0 IF( TMP.LE.ZERO ) $ COUNT = 1 DO 10 I = 3, 2*N - 1, 2 TMP = D( I ) - D( I-1 ) / TMP - SIGMA IF( ABS( TMP ).LE.PIVMIN ) $ TMP = -PIVMIN IF( TMP.LE.ZERO ) $ COUNT = COUNT + 1 10 CONTINUE * RETURN * * End of PDLAPDCT * END scalapack-1.8.0/SRC/pdorg2r.f0000640000175000017500000002567710363532303015512 0ustar muammarmuammar SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORG2R generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PDGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PDGEQRF in the K columns of its array * argument A(IA:*,JA:JA+K-1). On exit, this array contains * the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, KQ, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 DOUBLE PRECISION TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLASET, PDSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja+k:ja+n-1 to columns of the unit matrix * CALL PDLASET( 'All', K, N-K, ZERO, ZERO, A, IA, JA+K, DESCA ) CALL PDLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, DESCA ) * TAUJ = ZERO KQ = MAX( 1, NUMROC( JA+K-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) DO 10 J = JA+K-1, JA, -1 * * Apply H(j) to A(ia+j-ja:ia+m-1,j:ja+n-1) from the left * IF( J.LT.JA+N-1 ) THEN CALL PDELSET( A, IA+J-JA, J, DESCA, ONE ) CALL PDLARF( 'Left', M-J+JA, JA+N-J-1, A, IA+J-JA, J, DESCA, $ 1, TAU, A, IA+J-JA, J+1, DESCA, WORK ) END IF * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, KQ ) ) IF( J-JA.LT.M-1 ) $ CALL PDSCAL( M-J+JA-1, -TAUJ, A, IA+J-JA+1, J, DESCA, 1 ) CALL PDELSET( A, IA+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia:ia+j-ja-1,j) to zero * CALL PDLASET( 'All', J-JA, 1, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORG2R * END scalapack-1.8.0/SRC/pzpbtrsv.f0000640000175000017500000014173710363532303016021 0ustar muammarmuammar SUBROUTINE PZPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, $ IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZPBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Cholesky factorization code PZPBTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PZPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPBTRF and this is stored in AF. If a linear system * is to be solved using PZPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MBW2, MYCOL, MYROW, MY_NUM_COLS, NB, NP, $ NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZGEMM, $ ZGERV2D, ZGESD2D, ZLACPY, ZMATADD, ZTBTRS, $ ZTRMM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW * BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PZPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ BW*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, $ 'PZPBTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BW PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1), LLDB, $ WORK( 1 ), BW ) * CALL ZTRMM( 'L', 'U', 'N', 'N', BW, NRHS, -CONE, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ WORK( 1 ), BW ) * CALL ZMATADD( BW, NRHS, CONE, WORK( 1 ), BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', BW, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+BW-BW ), BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTRTRS( 'L', 'N', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTRTRS( 'L', 'C', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+BW-BW ), BW ) * CALL ZTRMM( 'L', 'U', 'C', 'N', BW, NRHS, -CONE, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL ZMATADD( BW, NRHS, CONE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1), LLDB, $ WORK( 1 ), BW ) * CALL ZTRMM( 'L', 'L', 'C', 'N', BW, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, WORK( 1 ), $ BW ) * CALL ZMATADD( BW, NRHS, CONE, WORK( 1 ), BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', BW, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+BW-BW ), BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTRTRS( 'L', 'N', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTRTRS( 'L', 'C', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+BW-BW ), BW ) * CALL ZTRMM( 'L', 'L', 'N', 'N', BW, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL ZMATADD( BW, NRHS, CONE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZPBTRSV * END scalapack-1.8.0/SRC/pchetd2.f0000640000175000017500000004306310363532303015451 0ustar muammarmuammar SUBROUTINE PCHETD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCHETD2 reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*N. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J, $ JJ, JK, JN, LDA, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW COMPLEX ALPHA, TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CAXPY, CGEBR2D, $ CGEBS2D, CHK1MAT, CHEMV, $ CHER2, CLARFG, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME COMPLEX CDOTC EXTERNAL LSAME, CDOTC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) LWMIN = 3 * N * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHETD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( UPPER ) THEN * * Process(IAROW, IACOL) owns block to be reduced * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the upper triangle of sub( A ) * IK = II+N-1+(JJ+N-2)*LDA A( IK ) = REAL( A( IK ) ) DO 10 J = N-1, 1, -1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA:IA+J-1,JA:JA+J-1) * ALPHA = A( IK+JK*LDA ) CALL CLARFG( J, ALPHA, A( II+JK*LDA ), 1, TAUI ) E( JK+1 ) = REAL( ALPHA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA:IA+J-1,JA:JA+J-1) * A( IK+JK*LDA ) = ONE * * Compute x := tau * A * v storing x in TAU(1:i) * CALL CHEMV( UPLO, J, TAUI, A( II+(JJ-1)*LDA ), $ LDA, A( II+JK*LDA ), 1, ZERO, $ TAU( JJ ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*CDOTC( J, TAU( JJ ), 1, $ A( II+JK*LDA ), 1 ) CALL CAXPY( J, ALPHA, A( II+JK*LDA ), 1, $ TAU( JJ ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL CHER2( UPLO, J, -ONE, A( II+JK*LDA ), 1, $ TAU( JJ ), 1, A( II+(JJ-1)*LDA ), $ LDA ) END IF * * Copy D, E, TAU to broadcast them columnwise. * A( IK+JK*LDA ) = CMPLX( E( JK+1 ) ) D( JK+1 ) = REAL( A( IK+1+JK*LDA ) ) WORK( J+1 ) = CMPLX( D( JK+1 ) ) WORK( N+J+1 ) = CMPLX( E( JK+1 ) ) TAU( JK+1 ) = TAUI WORK( 2*N+J+1 ) = TAU( JK+1 ) * 10 CONTINUE D( JJ ) = REAL( A( II+(JJ-1)*LDA ) ) WORK( 1 ) = CMPLX( D( JJ ) ) WORK( N+1 ) = ZERO WORK( 2*N+1 ) = ZERO * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1 ) * ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1, $ IAROW, IACOL ) DO 20 J = 2, N JN = JJ + J - 1 D( JN ) = REAL( WORK( J ) ) E( JN ) = REAL( WORK( N+J ) ) TAU( JN ) = WORK( 2*N+J ) 20 CONTINUE D( JJ ) = REAL( WORK( 1 ) ) END IF END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the lower triangle of sub( A ) * A( II+(JJ-1)*LDA ) = REAL( A( II+(JJ-1)*LDA ) ) DO 30 J = 1, N - 1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1) * ALPHA = A( IK+1+(JK-1)*LDA ) CALL CLARFG( N-J, ALPHA, A( IK+2+(JK-1)*LDA ), 1, $ TAUI ) E( JK ) = REAL( ALPHA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1) * A( IK+1+(JK-1)*LDA ) = ONE * * Compute x := tau * A * v storing y in TAU(i:n-1) * CALL CHEMV( UPLO, N-J, TAUI, A( IK+1+JK*LDA ), $ LDA, A( IK+1+(JK-1)*LDA ), 1, $ ZERO, TAU( JK ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*CDOTC( N-J, TAU( JK ), 1, $ A( IK+1+(JK-1)*LDA ), 1 ) CALL CAXPY( N-J, ALPHA, A( IK+1+(JK-1)*LDA ), $ 1, TAU( JK ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL CHER2( UPLO, N-J, -ONE, $ A( IK+1+(JK-1)*LDA ), 1, $ TAU( JK ), 1, A( IK+1+JK*LDA ), $ LDA ) END IF * * Copy D(JK), E(JK), TAU(JK) to broadcast them * columnwise. * A( IK+1+(JK-1)*LDA ) = CMPLX( E( JK ) ) D( JK ) = REAL( A( IK+(JK-1)*LDA ) ) WORK( J ) = CMPLX( D( JK ) ) WORK( N+J ) = CMPLX( E( JK ) ) TAU( JK ) = TAUI WORK( 2*N+J ) = TAU( JK ) 30 CONTINUE JN = JJ + N - 1 D( JN ) = REAL( A( II+N-1+(JN-1)*LDA ) ) WORK( N ) = CMPLX( D( JN ) ) TAU( JN ) = ZERO WORK( 2*N ) = ZERO * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1 ) * ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1, IAROW, IACOL ) DO 40 J = 1, N - 1 JN = JJ + J - 1 D( JN ) = REAL( WORK( J ) ) E( JN ) = REAL( WORK( N+J ) ) TAU( JN ) = WORK( 2*N+J ) 40 CONTINUE JN = JJ + N - 1 D( JN ) = REAL( WORK( N ) ) TAU( JN ) = ZERO END IF END IF END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCHETD2 * END scalapack-1.8.0/SRC/pdlaiect.c0000640000175000017500000002210010363532303015667 0ustar muammarmuammar /* --------------------------------------------------------------------- * * -- ScaLAPACK routine (version 1.5) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * --------------------------------------------------------------------- */ /* * Include Files */ #include "pxsyevx.h" #include #include #define proto(x) () void pdlasnbt_( ieflag ) /* * .. Scalar Arguments .. */ int *ieflag; { /* * * Purpose * ======= * * pdalsnbt finds the position of the signbit of a double * double precision floating point number. This routine assumes IEEE * arithmetic, and hence, tests only the 32nd and 64th bits as * possibilities for the sign bit. * * Note : For this release, we assume that sizeof(int) is 4 bytes. * * Note : If a compile time flag (NO_IEEE) indicates that the * machine does not have IEEE arithmetic, IEFLAG = 0 is returned. * * Arguments * ========= * * IEFLAG (output) INTEGER * This indicates the position of the signbit of any double * precision floating point number. * IEFLAG = 0 if the compile time flag, NO_IEEE, indicates * that the machine does not have IEEE arithmetic, or if * sizeof(int) is different from 4 bytes. * IEFLAG = 1 indicates that the sign bit is the 32nd * bit ( Big Endian ). * IEFLAG = 2 indicates that the sign bit is the 64th * bit ( Little Endian ). * * ===================================================================== * * .. Local Scalars .. */ double x; int negone=-1, errornum; unsigned int *ix; /* .. * .. Executable Statements .. */ #ifdef NO_IEEE *ieflag = 0; #else if(sizeof(int) != 4){ *ieflag = 0; return; } x = (double) -1.0; ix = (unsigned int *) &x; if(( *ix == 0xbff00000) && ( *(ix+1) == 0x0) ) { *ieflag = 1; } else if(( *(ix+1) == 0xbff00000) && ( *ix == 0x0) ) { *ieflag = 2; } else { *ieflag = 0; } #endif } void pdlaiectb_( sigma, n, d, count ) /* * .. Scalar Arguments .. */ double *sigma, *d; int *n, *count; { /* * * Purpose * ======= * * pdlaiectb computes the number of negative eigenvalues of (A- SIGMA I). * This implementation of the Sturm Sequence loop exploits IEEE Arithmetic * and has no conditionals in the innermost loop. To extract the signbit, * this routine assumes that the double precision word is stored in * "Big Endian" word order, i.e, the signbit is assumed to be bit 32. * * Note that all arguments are call-by-reference so that this routine * can be directly called from Fortran code. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * SIGMA (input) DOUBLE PRECISION * The shift. pdlaiectb finds the number of eigenvalues * less than equal to SIGMA. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * D (input) DOUBLE PRECISION array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * COUNT (output) INTEGER * The count of the number of eigenvalues of T less than or * equal to SIGMA. * * ===================================================================== * * .. Local Scalars .. */ double lsigma, tmp, *pd, *pe2; int i; /* .. * .. Executable Statements .. */ lsigma = *sigma; pd = d; pe2 = d+1; tmp = *pd - lsigma; pd += 2; *count = (*((int *)&tmp) >> 31) & 1; for(i = 1;i < *n;i++){ tmp = *pd - *pe2/tmp - lsigma; pd += 2; pe2 += 2; *count += ((*((int *)&tmp)) >> 31) & 1; } } void pdlaiectl_( sigma, n, d, count ) /* * .. Scalar Arguments .. */ double *sigma, *d; int *n, *count; { /* * * Purpose * ======= * * pdlaiectl computes the number of negative eigenvalues of (A- SIGMA I). * This implementation of the Sturm Sequence loop exploits IEEE Arithmetic * and has no conditionals in the innermost loop. To extract the signbit, * this routine assumes that the double precision word is stored in * "Little Endian" word order, i.e, the signbit is assumed to be bit 64. * * Note that all arguments are call-by-reference so that this routine * can be directly called from Fortran code. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * SIGMA (input) DOUBLE PRECISION * The shift. pdlaiectl finds the number of eigenvalues * less than equal to SIGMA. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * D (input) DOUBLE PRECISION array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * COUNT (output) INTEGER * The count of the number of eigenvalues of T less than or * equal to SIGMA. * * ===================================================================== * * .. Local Scalars .. */ double lsigma, tmp, *pd, *pe2; int i; /* .. * .. Executable Statements .. */ lsigma = *sigma; pd = d; pe2 = d+1; tmp = *pd - lsigma; pd += 2; *count = (*(((int *)&tmp)+1) >> 31) & 1; for(i = 1;i < *n;i++){ tmp = *pd - *pe2/tmp - lsigma; pd += 2; pe2 += 2; *count += (*(((int *)&tmp)+1) >> 31) & 1; } } pdlachkieee_( isieee, rmax, rmin ) /* * .. Scalar Arguments .. */ double *rmax, *rmin; int *isieee; { /* * * Purpose * ======= * * pdlachkieee performs a simple check to make sure that the features * of the IEEE standard that we rely on are implemented. In some * implementations, pdlachkieee may not return. * * Note that all arguments are call-by-reference so that this routine * can be directly called from Fortran code. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * ISIEEE (local output) INTEGER * On exit, ISIEEE = 1 implies that all the features of the * IEEE standard that we rely on are implemented. * On exit, ISIEEE = 0 implies that some the features of the * IEEE standard that we rely on are missing. * * RMAX (local input) DOUBLE PRECISION * The overflow threshold ( = DLAMCH('O') ). * * RMIN (local input) DOUBLE PRECISION * The underflow threshold ( = DLAMCH('U') ). * * ===================================================================== * * .. Local Scalars .. */ double x, pinf, pzero, ninf, nzero; int ieflag, *ix, sbit1, sbit2, negone=-1, errornum; /* .. * .. Executable Statements .. */ pdlasnbt_( &ieflag ); pinf = *rmax / *rmin; pzero = 1.0 / pinf; pinf = 1.0 / pzero; if( pzero != 0.0 ){ printf("pzero = %g should be zero\n",pzero); *isieee = 0; return ; } if( ieflag == 1 ){ sbit1 = (*((int *)&pzero) >> 31) & 1; sbit2 = (*((int *)&pinf) >> 31) & 1; }else if(ieflag == 2){ sbit1 = (*(((int *)&pzero)+1) >> 31) & 1; sbit2 = (*(((int *)&pinf)+1) >> 31) & 1; } if( sbit1 == 1 ){ printf("Sign of positive infinity is incorrect\n"); *isieee = 0; } if( sbit2 == 1 ){ printf("Sign of positive zero is incorrect\n"); *isieee = 0; } ninf = -pinf; nzero = 1.0 / ninf; ninf = 1.0 / nzero; if( nzero != 0.0 ){ printf("nzero = %g should be zero\n",nzero); *isieee = 0; } if( ieflag == 1 ){ sbit1 = (*((int *)&nzero) >> 31) & 1; sbit2 = (*((int *)&ninf) >> 31) & 1; }else if(ieflag == 2){ sbit1 = (*(((int *)&nzero)+1) >> 31) & 1; sbit2 = (*(((int *)&ninf)+1) >> 31) & 1; } if( sbit1 == 0 ){ printf("Sign of negative infinity is incorrect\n"); *isieee = 0; } if( sbit2 == 0 ){ printf("Sign of negative zero is incorrect\n"); *isieee = 0; } } scalapack-1.8.0/SRC/pdlahrd.f0000640000175000017500000002413010602576752015544 0ustar muammarmuammar SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCY( * ) DOUBLE PRECISION A( * ), T( * ), TAU( * ), WORK( * ), Y( * ) * .. * * Purpose * ======= * * PDLAHRD reduces the first NB columns of a real general N-by-(N-K+1) * distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that elements below the * k-th subdiagonal are zero. The reduction is performed by an orthogo- * nal similarity transformation Q' * A * Q. The routine returns the * matrices V and T which determine Q as a block reflector I - V*T*V', * and also the matrix Y = A * V * T. * * This is an auxiliary routine called by PDGEHRD. In the following * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). * N >= 0. * * K (global input) INTEGER * The offset for the reduction. Elements below the k-th * subdiagonal in the first NB columns are reduced to zero. * * NB (global input) INTEGER * The number of columns to be reduced. * * A (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of dimension (LLD_A, * LOCc(JA+N-K)). On entry, this array contains the the local * pieces of the N-by-(N-K+1) general distributed matrix * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above * the k-th subdiagonal in the first NB columns are overwritten * with the corresponding elements of the reduced distributed * matrix; the elements below the k-th subdiagonal, with the * array TAU, represent the matrix Q as a product of elementary * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are * unchanged. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). TAU is tied to the distributed matrix A. * * T (local output) DOUBLE PRECISION array, dimension (NB_A,NB_A) * The upper triangular matrix T. * * Y (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_Y,NB_A). On exit, this array * contains the local pieces of the N-by-NB distributed * matrix Y. LLD_Y >= LOCr(IA+N-1). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) DOUBLE PRECISION array, dimension (NB) * * Further Details * =============== * * The matrix Q is represented as a product of nb elementary reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the (n-k+1)-by-nb matrix * V which is needed, with T and Y, to apply the transformation to the * unreduced part of the matrix, using an update of the form: * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V'). * * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the * following example with n = 7, k = 3 and nb = 2: * * ( a h a a a ) * ( a h a a a ) * ( a h a a a ) * ( h h a a a ) * ( v1 h a a a ) * ( v1 v2 a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper * Hessenberg matrix H, and vi denotes an element of the vector * defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL IPROC INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL, $ JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ DOUBLE PRECISION EI * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DAXPY, DESCSET, DCOPY, $ DSCAL, DTRMV, INFOG2L, PDELSET, $ PDGEMV, PDLARFG, PDSCAL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.1 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) * IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - IOFF * EI = ZERO JW = IOFF + 1 CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW, $ IACOL, ICTXT, 1 ) * DO 10 L = 1, NB I = IA + K + L - 2 J = JA + L - 1 * IF( L.GT.1 ) THEN * * Update A(ia:ia+n-1,j) * * Compute i-th column of A - Y * V' * CALL PDGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J, $ DESCA, 1 ) * * Apply I - V * T' * V' to this column (call it b) from the * left, using the last column of T as workspace * * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) * ( V2 ) ( b2 ) * * where V1 is unit lower triangular * * w := V1' * b1 * IF( IPROC ) THEN CALL DCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1, $ WORK( JW ), 1 ) CALL DTRMV( 'Lower', 'Transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) END IF * * w := w + V2'*b2 * CALL PDGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA, $ DESCA, A, I+1, J, DESCA, 1, ONE, WORK, 1, JW, $ DESCW, DESCW( M_ ) ) * * w := T'*w * IF( IPROC ) $ CALL DTRMV( 'Upper', 'Transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), WORK( JW ), 1 ) * * b2 := b2 - V2*w * CALL PDGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE, $ A, I+1, J, DESCA, 1 ) * * b1 := b1 - V1*w * IF( IPROC ) THEN CALL DTRMV( 'Lower', 'No transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) CALL DAXPY( L-1, -ONE, WORK( JW ), 1, $ A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 ) END IF CALL PDELSET( A, I, J-1, DESCA, EI ) END IF * * Generate the elementary reflector H(i) to annihilate * A(ia+k+i:ia+n-1,j) * CALL PDLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PDELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(iy:y+n-1,jy+l-1) * CALL PDGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1, $ DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1, $ DESCY, 1 ) CALL PDGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW, DESCW, $ DESCW( M_ ) ) CALL PDGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY, $ JY+L-1, DESCY, 1 ) JL = MIN( JJ+L-1, JA+NQ-1 ) CALL PDSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 ) * * Compute T(1:i,i) * IF( IPROC ) THEN JT = ( L-1 ) * DESCA( NB_ ) CALL DSCAL( L-1, -TAU( JL ), WORK( JW ), 1 ) CALL DCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 ) CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), T( JT+1 ), 1 ) T( JT+L ) = TAU( JL ) END IF 10 CONTINUE * CALL PDELSET( A, K+NB+IA-1, J, DESCA, EI ) * RETURN * * End of PDLAHRD * END scalapack-1.8.0/SRC/pclauu2.f0000640000175000017500000002041310363532303015465 0ustar muammarmuammar SUBROUTINE PCLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAUU2 computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the unblocked form of the algorithm, calling Level 2 BLAS. * No communication is performed by this routine, the matrix to operate * on should be strictly local to one process. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the matrix * sub( A ) is upper or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the order of the triangular factor U or L. N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA, $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEMV, CLACGV, $ CSSCAL, INFOG2L * .. * .. External Functions .. LOGICAL LSAME COMPLEX CDOTC EXTERNAL CDOTC, LSAME * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * DO 10 NA = N-1, 1, -1 AII = A( IDIAG ) ICURR = IDIAG + LDA A( IDIAG ) = AII*AII + REAL( CDOTC( NA, A( ICURR ), LDA, $ A( ICURR ), LDA ) ) CALL CLACGV( NA, A( ICURR ), LDA ) CALL CGEMV( 'No transpose', N-NA-1, NA, ONE, $ A( IOFFA+LDA ), LDA, A( ICURR ), LDA, $ CMPLX( AII ), A( IOFFA ), 1 ) CALL CLACGV( NA, A( ICURR ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE AII = A( IDIAG ) CALL CSSCAL( N, AII, A( IOFFA ), 1 ) * ELSE * * Compute the product L' * L. * DO 20 NA = 1, N-1 AII = A( IDIAG ) ICURR = IDIAG + 1 A(IDIAG) = AII*AII + REAL( CDOTC( N-NA, A( ICURR ), 1, $ A( ICURR ), 1 ) ) CALL CLACGV( NA-1, A( IOFFA ), LDA ) CALL CGEMV( 'Conjugate transpose', N-NA, NA-1, ONE, $ A( IOFFA+1 ), LDA, A( ICURR ), 1, $ CMPLX( AII ), A( IOFFA ), LDA ) CALL CLACGV( NA-1, A( IOFFA ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 20 CONTINUE AII = A( IDIAG ) CALL CSSCAL( N, AII, A( IOFFA ), LDA ) * END IF * END IF * RETURN * * End of PCLAUU2 * END scalapack-1.8.0/SRC/pdorg2l.f0000640000175000017500000002563010363532303015471 0ustar muammarmuammar SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORG2L generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PDGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PDGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0 DOUBLE PRECISION TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLASET, PDSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja:ja+n-k-1 to columns of the unit matrix * CALL PDLASET( 'All', M-N, N-K, ZERO, ZERO, A, IA, JA, DESCA ) CALL PDLASET( 'All', N, N-K, ZERO, ONE, A, IA+M-N, JA, DESCA ) * TAUJ = ZERO NQA0 = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) ) DO 10 J = JA+N-K, JA+N-1 * * Apply H(j) to A(ia:ia+m-n+j-ja,ja:j) from the left * CALL PDELSET( A, IA+M-N+J-JA, J, DESCA, ONE ) CALL PDLARF( 'Left', M-N+J-JA+1, J-JA, A, IA, J, DESCA, 1, TAU, $ A, IA, JA, DESCA, WORK ) * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, NQA0 ) ) CALL PDSCAL( M-N+J-JA, -TAUJ, A, IA, J, DESCA, 1 ) CALL PDELSET( A, IA+M-N+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia+m-n+j-ja+1:ia+m-1,j) to zero * CALL PDLASET( 'All', JA+N-1-J, 1, ZERO, ZERO, A, IA+M-N+J-JA+1, $ J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORG2L * END scalapack-1.8.0/SRC/ddbtrf.f0000640000175000017500000002470210363532303015364 0ustar muammarmuammar SUBROUTINE DDBTRF( M, N, KL, KU, AB, LDAB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from DGBTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. DOUBLE PRECISION AB( LDAB, * ) * .. * * Purpose * ======= * * Ddbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting or row interchanges. * * This is the blocked version of the algorithm, calling Level 3 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) REAL array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER NBMAX, LDWORK PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) * .. * .. Local Scalars .. INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP, $ JU, KM, KV, NB, NW * .. * .. Local Arrays .. DOUBLE PRECISION WORK13( LDWORK, NBMAX ), $ WORK31( LDWORK, NBMAX ) * .. * .. External Functions .. INTEGER ILAENV, ISAMAX EXTERNAL ILAENV, ISAMAX * .. * .. External Subroutines .. EXTERNAL DCOPY, DDBTF2, DGEMM, DGER, DSCAL, $ DSWAP, DTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U * KV = KU * * Test the input parameters. * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 ) THEN INFO = -3 ELSE IF( KU.LT.0 ) THEN INFO = -4 ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Determine the block size for this environment * NB = ILAENV( 1, 'DDBTRF', ' ', M, N, KL, KU ) * * The block size must not exceed the limit set by the size of the * local arrays WORK13 and WORK31. * NB = MIN( NB, NBMAX ) * IF( NB.LE.1 .OR. NB.GT.KL ) THEN * * Use unblocked code * CALL DDBTF2( M, N, KL, KU, AB, LDAB, INFO ) ELSE * * Use blocked code * * Zero the superdiagonal elements of the work array WORK13 * DO 20 J = 1, NB DO 10 I = 1, J - 1 WORK13( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * * Zero the subdiagonal elements of the work array WORK31 * DO 40 J = 1, NB DO 30 I = J + 1, NB WORK31( I, J ) = ZERO 30 CONTINUE 40 CONTINUE * * JU is the index of the last column affected by the current * stage of the factorization * JU = 1 * DO 180 J = 1, MIN( M, N ), NB JB = MIN( NB, MIN( M, N )-J+1 ) * * The active part of the matrix is partitioned * * A11 A12 A13 * A21 A22 A23 * A31 A32 A33 * * Here A11, A21 and A31 denote the current block of JB columns * which is about to be factorized. The number of rows in the * partitioning are JB, I2, I3 respectively, and the numbers * of columns are JB, J2, J3. The superdiagonal elements of A13 * and the subdiagonal elements of A31 lie outside the band. * I2 = MIN( KL-JB, M-J-JB+1 ) I3 = MIN( JB, M-J-KL+1 ) * * J2 and J3 are computed after JU has been updated. * * Factorize the current block of JB columns * DO 80 JJ = J, J + JB - 1 * * Find pivot and test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-JJ ) JP = 1 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) * * Compute multipliers * CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), $ 1 ) * * Update trailing submatrix within the band and within * the current block. JM is the index of the last column * which needs to be updated. * JM = MIN( JU, J+JB-1 ) IF( JM.GT.JJ ) THEN CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, $ AB( KV, JJ+1 ), LDAB-1, $ AB( KV+1, JJ+1 ), LDAB-1 ) END IF END IF * * Copy current column of A31 into the work array WORK31 * NW = MIN( JJ-J+1, I3 ) IF( NW.GT.0 ) $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, $ WORK31( 1, JJ-J+1 ), 1 ) 80 CONTINUE IF( J+JB.LE.N ) THEN * * Apply the row interchanges to the other blocks. * J2 = MIN( JU-J+1, KV ) - JB J3 = MAX( 0, JU-J-KV+1 ) * * Update the relevant part of the trailing submatrix * IF( J2.GT.0 ) THEN * * Update A12 * CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1 ) * IF( I2.GT.0 ) THEN * * Update A22 * CALL DGEMM( 'No transpose', 'No transpose', I2, J2, $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, $ AB( KV+1, J+JB ), LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A32 * CALL DGEMM( 'No transpose', 'No transpose', I3, J2, $ JB, -ONE, WORK31, LDWORK, $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) END IF END IF * IF( J3.GT.0 ) THEN * * Copy the lower triangle of A13 into the work array * WORK13 * DO 130 JJ = 1, J3 DO 120 II = JJ, JB WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 120 CONTINUE 130 CONTINUE * * Update A13 in the work array * CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, $ WORK13, LDWORK ) * IF( I2.GT.0 ) THEN * * Update A23 * CALL DGEMM( 'No transpose', 'No transpose', I2, J3, $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), $ LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A33 * CALL DGEMM( 'No transpose', 'No transpose', I3, J3, $ JB, -ONE, WORK31, LDWORK, WORK13, $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) END IF * * Copy the lower triangle of A13 back into place * DO 150 JJ = 1, J3 DO 140 II = JJ, JB AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 140 CONTINUE 150 CONTINUE END IF ELSE END IF * * copy the upper triangle of A31 back into place * DO 170 JJ = J + JB - 1, J, -1 * * Copy the current column of A31 back into place * NW = MIN( I3, JJ-J+1 ) IF( NW.GT.0 ) $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1, $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 170 CONTINUE 180 CONTINUE END IF * RETURN * * End of DDBTRF * END scalapack-1.8.0/SRC/pcgerqf.f0000640000175000017500000002723110363532303015546 0ustar muammarmuammar SUBROUTINE PCGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGERQF computes a RQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCGERQ2, $ PCLARFB, PCLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGERQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( IL.GE.IN+1 ) THEN * * Use blocked code initially * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute the RQ factorization of the current block * A(i:i+ib-1,ja:ja+n-m+i+ib-ia-1) * CALL PCGERQ2( IB, N-M+I+IB-IA, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PCLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, $ I, JA, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the * right * CALL PCLARFB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = IN - IA + 1 NU = N - M + IN - IA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PCGERQ2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGERQF * END scalapack-1.8.0/SRC/pdlanhs.f0000640000175000017500000006250310363532303015551 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLANHS returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * Hessenberg distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PDLANHS returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PDLANHS as described * above. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PDLANHS is set to zero. N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, INXTROW, $ IOFFA, IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, DLASSQ, $ INFOG2L, PDTREECOMB * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL LSAME, ICEIL, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * VALUE = ZERO * * Find max(abs(A(i,j))). * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 20 LL = JJ, JJ+JB-1 DO 10 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 40 LL = JJ, JJ+JB-1 DO 30 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 50 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 70 LL = JJ, JJ + JB -1 DO 60 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE ELSE DO 90 LL = JJ, JJ+JB-1 DO 80 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 140 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 110 LL = JJ, JJ + JB -1 DO 100 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE ELSE DO 130 LL = JJ, JJ + JB -1 DO 120 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, $ ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 140 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 160 LL = JJ, JJ+JB-1 SUM = ZERO DO 150 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 150 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 160 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 190 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 180 LL = JJ, JJ+JB-1 SUM = ZERO DO 170 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 170 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 180 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 190 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 210 LL = JJ, JJ + JB -1 SUM = ZERO DO 200 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 200 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 SUM = ZERO DO 220 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 220 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 230 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 280 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 250 LL = JJ, JJ + JB -1 SUM = ZERO DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 240 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 250 CONTINUE ELSE DO 270 LL = JJ, JJ + JB -1 SUM = ZERO DO 260 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 260 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 270 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 280 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * DO 290 KK = IIA, IIA+NP-1 WORK( KK ) = ZERO 290 CONTINUE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 310 LL = JJ, JJ+JB-1 DO 300 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 300 CONTINUE IOFFA = IOFFA + LDA 310 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 340 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 330 LL = JJ, JJ+JB-1 DO 320 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 320 CONTINUE IOFFA = IOFFA + LDA 330 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 340 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 360 LL = JJ, JJ + JB -1 DO 350 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA 360 CONTINUE ELSE DO 380 LL = JJ, JJ + JB -1 DO 370 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA 380 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 430 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 400 LL = JJ, JJ + JB -1 DO 390 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 390 CONTINUE IOFFA = IOFFA + LDA 400 CONTINUE ELSE DO 420 LL = JJ, JJ + JB -1 DO 410 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS(A(IOFFA+KK)) 410 CONTINUE IOFFA = IOFFA + LDA 420 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 430 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK, MAX( 1, NP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( NP.GT.0 ) THEN VALUE = WORK( IDAMAX( NP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * SCALE = ZERO SUM = ONE II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 440 LL = JJ, JJ+JB-1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 440 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 460 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 450 LL = JJ, JJ+JB-1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 450 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 460 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 470 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 470 CONTINUE ELSE DO 480 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 480 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL DLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 510 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 490 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 490 CONTINUE ELSE DO 500 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 500 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL DLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 510 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PDLANHS = VALUE * RETURN * * End of PDLANHS * END scalapack-1.8.0/SRC/pzgeqpf.f0000640000175000017500000005243210363532303015574 0ustar muammarmuammar SUBROUTINE PZGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. INTEGER IA, JA, INFO, LRWORK, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQPF computes a QR factorization with column pivoting of a * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1): * * sub( A ) * P = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX(3,Mp0 + Nq0). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCc(JA+N-1)+Nq0. * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * LOCc(JA+N-1) = NUMROC( JA+N-1, NB_A, MYCOL, CSRC_A, NPCOL ) * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(1) H(2) . . . H(n) * * Each H(i) has the form * * H = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1). * * The matrix P is represented in jpvt as follows: If * jpvt(j) = i * then the jth column of P is the ith canonical unit vector. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, ICURROW, $ ICURCOL, II, IIA, IOFFA, IPCOL, IROFF, ITEMP, $ J, JB, JJ, JJA, JJPVT, JN, KB, K, KK, KSTART, $ KSTEP, LDA, LL, LRWMIN, LWMIN, MN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ, NQ0, PVT DOUBLE PRECISION TEMP, TEMP2 COMPLEX*16 AJJ, ALPHA * .. * .. Local Arrays .. INTEGER DESCN( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGERV2D, $ IGESD2D, INFOG1L, INFOG2L, PCHK1MAT, PDAMAX, $ PDZNRM2, PXERBLA, PZELSET, $ PZLARFC, PZLARFG, ZCOPY, ZGEBR2D, $ ZGEBS2D, ZGERV2D, ZGESD2D, ZLARFG, $ ZSWAP * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DCMPLX, DCONJG, IDINT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NQ0 = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) LWMIN = MAX( 3, MP + NQ ) LRWMIN = NQ0 + NQ * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEQPF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF MN = MIN( M, N ) * * Initialize the array of pivots * LDA = DESCA( LLD_ ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) KSTEP = NPCOL * DESCA( NB_ ) * IF( MYCOL.EQ.IACOL ) THEN * * Handle first block separately * JB = JN - JA + 1 DO 10 LL = JJA, JJA+JB-1 IPIV( LL ) = JA + LL - JJA 10 CONTINUE KSTART = JN + KSTEP - DESCA( NB_ ) * * Loop over remaining block of columns * DO 30 KK = JJA+JB, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 20 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 20 CONTINUE KSTART = KSTART + KSTEP 30 CONTINUE ELSE KSTART = JN + ( MOD( MYCOL-IACOL+NPCOL, NPCOL )-1 )* $ DESCA( NB_ ) DO 50 KK = JJA, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 40 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 40 CONTINUE KSTART = KSTART + KSTEP 50 CONTINUE END IF * * Initialize partial column norms, handle first block separately * CALL DESCSET( DESCN, 1, DESCA( N_ ), 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), ICTXT, 1 ) * JJ = JJA IF( MYCOL.EQ.IACOL ) THEN DO 60 KK = 0, JB-1 CALL PDZNRM2( M, RWORK( JJ+KK ), A, IA, JA+KK, DESCA, 1 ) RWORK( NQ+JJ+KK ) = RWORK( JJ+KK ) 60 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining blocks of columns * DO 80 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 70 KK = 0, JB-1 CALL PDZNRM2( M, RWORK( JJ+KK ), A, IA, J+KK, DESCA, 1 ) RWORK( NQ+JJ+KK ) = RWORK( JJ+KK ) 70 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) 80 CONTINUE * * Compute factorization * DO 120 J = JA, JA+MN-1 I = IA + J - JA * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, ICURCOL ) K = JA + N - J IF( K.GT.1 ) THEN CALL PDAMAX( K, TEMP, PVT, RWORK, 1, J, DESCN, $ DESCN( M_ ) ) ELSE PVT = J END IF IF( J.NE.PVT ) THEN CALL INFOG1L( PVT, DESCA( NB_ ), NPCOL, MYCOL, $ DESCA( CSRC_ ), JJPVT, IPCOL ) IF( ICURCOL.EQ.IPCOL ) THEN IF( MYCOL.EQ.ICURCOL ) THEN CALL ZSWAP( MP, A( IIA+(JJ-1)*LDA ), 1, $ A( IIA+(JJPVT-1)*LDA ), 1 ) ITEMP = IPIV( JJPVT ) IPIV( JJPVT ) = IPIV( JJ ) IPIV( JJ ) = ITEMP RWORK( JJPVT ) = RWORK( JJ ) RWORK( NQ+JJPVT ) = RWORK( NQ+JJ ) END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN * CALL ZGESD2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) WORK( 1 ) = DCMPLX( DBLE( IPIV( JJ ) ) ) WORK( 2 ) = DCMPLX( RWORK( JJ ) ) WORK( 3 ) = DCMPLX( RWORK( JJ + NQ ) ) CALL ZGESD2D( ICTXT, 3, 1, WORK, 3, MYROW, IPCOL ) * CALL ZGERV2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) CALL IGERV2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, $ IPCOL ) * ELSE IF( MYCOL.EQ.IPCOL ) THEN * CALL ZGESD2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL IGESD2D( ICTXT, 1, 1, IPIV( JJPVT ), 1, MYROW, $ ICURCOL ) * CALL ZGERV2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL ZGERV2D( ICTXT, 3, 1, WORK, 3, MYROW, ICURCOL ) IPIV( JJPVT ) = IDINT( DBLE( WORK( 1 ) ) ) RWORK( JJPVT ) = DBLE( WORK( 2 ) ) RWORK( JJPVT+NQ ) = DBLE( WORK( 3 ) ) * END IF * END IF * END IF * * Generate elementary reflector H(i) * CALL INFOG1L( I, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, ICURROW ) IF( DESCA( M_ ).EQ.1 ) THEN IF( MYROW.EQ.ICURROW ) THEN IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*DESCA( LLD_ ) AJJ = A( IOFFA ) CALL ZLARFG( 1, AJJ, A( IOFFA ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = CMPLX( ONE ) - DCONJG( TAU( JJ ) ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1 ) CALL ZSCAL( NQ-JJ, ALPHA, A( IOFFA+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ ), 1 ) A( IOFFA ) = AJJ ELSE IF( N.GT.1 ) THEN CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, ICURROW, ICURCOL ) CALL ZSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1, ICURROW, ICURCOL ) END IF * ELSE * CALL PZLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(i) to A(ia+j-ja:ia+m-1,j+1:ja+n-1) from the left * CALL PZELSET( A, I, J, DESCA, DCMPLX( ONE ) ) CALL PZLARFC( 'Left', M-J+JA, JA+N-1-J, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK ) END IF CALL PZELSET( A, I, J, DESCA, AJJ ) * END IF * * Update partial columns norms * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 IF( MOD( J, DESCA( NB_ ) ).EQ.0 ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) IF( (JJA+NQ-JJ).GT.0 ) THEN IF( MYROW.EQ.ICURROW ) THEN CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, JJA+NQ-JJ, $ A( II+( MIN( JJA+NQ-1, JJ )-1 )*LDA ), $ LDA ) CALL ZCOPY( JJA+NQ-JJ, A( II+( MIN( JJA+NQ-1, JJ ) $ -1)*LDA ), LDA, WORK( MIN( JJA+NQ-1, JJ ) ), $ 1 ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', JJA+NQ-JJ, 1, $ WORK( MIN( JJA+NQ-1, JJ ) ), MAX( 1, NQ ), $ ICURROW, MYCOL ) END IF END IF * JN = MIN( ICEIL( J+1, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA + N - 1 ) IF( MYCOL.EQ.ICURCOL ) THEN DO 90 LL = JJ, JJ + JN - J - 1 IF( RWORK( LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( LL ) ) / RWORK( LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05D+0*TEMP* $ ( RWORK( LL ) / RWORK( NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PDZNRM2( IA+M-I-1, RWORK( LL ), A, $ I+1, J+LL-JJ, DESCA, 1 ) RWORK( NQ+LL ) = RWORK( LL ) ELSE RWORK( LL ) = ZERO RWORK( NQ+LL ) = ZERO END IF ELSE RWORK( LL ) = RWORK( LL ) * SQRT( TEMP ) END IF END IF 90 CONTINUE JJ = JJ + JN - J END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 110 K = JN+1, JA+N-1, DESCA( NB_ ) KB = MIN( JA+N-K, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 100 LL = JJ, JJ+KB-1 IF( RWORK(LL).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( LL ) ) / RWORK( LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05D+0*TEMP* $ ( RWORK( LL ) / RWORK( NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PDZNRM2( IA+M-I-1, RWORK( LL ), A, $ I+1, K+LL-JJ, DESCA, 1 ) RWORK( NQ+LL ) = RWORK( LL ) ELSE RWORK( LL ) = ZERO RWORK( NQ+LL ) = ZERO END IF ELSE RWORK( LL ) = RWORK( LL ) * SQRT( TEMP ) END IF END IF 100 CONTINUE JJ = JJ + KB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * 110 CONTINUE * 120 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) * RETURN * * End of PZGEQPF * END scalapack-1.8.0/SRC/pcgesvd.f0000640000175000017500000005574010377355407015575 0ustar muammarmuammar SUBROUTINE PCGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, + VT,IVT,JVT,DESCVT,WORK,LWORK,RWORK,INFO) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Oak Ridge National Laboratory * and Univ. of California Berkeley. * Jan 2006 * * .. Scalar Arguments .. CHARACTER JOBU,JOBVT INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N * .. * .. Array Arguments .. INTEGER DESCA(*),DESCU(*),DESCVT(*) COMPLEX A(*),U(*),VT(*),WORK(*) REAL S(*) REAL RWORK(*) * .. * * Purpose * ======= * * PCGESVD computes the singular value decomposition (SVD) of an * M-by-N matrix A, optionally computing the left and/or right * singular vectors. The SVD is written as * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A and the columns of U and V are the * corresponding right and left singular vectors, respectively. The * singular values are returned in array S in decreasing order and * only the first min(M,N) columns of U and rows of VT = V**T are * computed. * * Notes * ===== * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, and * assume that its process grid has dimension r x c. LOCr( K ) denotes * the number of elements of K that a process would receive if K were * distributed over the r processes of its process column. Similarly, * LOCc( K ) denotes the number of elements of K that a process would * receive if K were distributed over the c processes of its process * row. The values of LOCr() and LOCc() may be determined via a call * to the ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZE = min( M, N ) * SIZEQ = number of local columns in U * SIZEP = number of local rows in VT * * JOBU (global input) CHARACTER*1 * Specifies options for computing U: * = 'V': the first SIZE columns of U (the left singular * vectors) are returned in the array U; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (global input) CHARACTER*1 * Specifies options for computing V**T: * = 'V': the first SIZE rows of V**T (the right singular * vectors) are returned in the array VT; * = 'N': no rows of V**T (no right singular vectors) are * computed. * * M (global input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (global input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX * array, * global dimension (M, N), local dimension (MP, NQ) * On exit, the contents of A are destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * S (global output) REAL array, dimension SIZE * The singular values of A, sorted so that S(i) >= S(i+1). * * U (local output) COMPLEX array, local dimension * (MP, SIZEQ), global dimension (M, SIZE) * if JOBU = 'V', U contains the first min(m,n) columns of U * if JOBU = 'N', U is not referenced. * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local output) COMPLEX array, local dimension * (SIZEP, NQ), global dimension (SIZE, N). * If JOBVT = 'V', VT contains the first SIZE rows of * V**T. If JOBVT = 'N', VT is not referenced. * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * WORK (local workspace/output) COMPLEX array, dimension * (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * LWORK >= 1 + 2*SIZEB + MAX(WATOBD, WBDTOSVD), * * where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer, * respectively, to the workspace required to bidiagonalize * the matrix A and to go from the bidiagonal matrix to the * singular value decomposition U*S*VT. * * For WATOBD, the following holds: * * WATOBD = MAX(MAX(WPCLANGE,WPCGEBRD), * MAX(WPCLARED2D,WP(pre)LARED1D)), * * where WPCLANGE, WPCLARED1D, WPCLARED2D, WPCGEBRD are the * workspaces required respectively for the subprograms * PCLANGE, PSLARED1D, PSLARED2D, PCGEBRD. Using the * standard notation * * MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW), * NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL), * * the workspaces required for the above subprograms are * * WPCLANGE = MP, * WPSLARED1D = NQ0, * WPSLARED2D = MP0, * WPCGEBRD = NB*(MP + NQ + 1) + NQ, * * where NQ0 and MP0 refer, respectively, to the values obtained * at MYCOL = 0 and MYROW = 0. In general, the upper limit for * the workspace is given by a workspace required on * processor (0,0): * * WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0. * * In case of a homogeneous process grid this upper limit can * be used as an estimate of the minimum workspace for every * processor. * * For WBDTOSVD, the following holds: * * WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) + * MAX(WCBDSQR, * MAX(WANTU*WPCORMBRQLN, WANTVT*WPCORMBRPRT)), * * where * * 1, if left(right) singular vectors are wanted * WANTU(WANTVT) = * 0, otherwise * * and WCBDSQR, WPCORMBRQLN and WPCORMBRPRT refer respectively * to the workspace required for the subprograms CBDSQR, * PCUNMBR(QLN), and PCUNMBR(PRT), where QLN and PRT are the * values of the arguments VECT, SIDE, and TRANS in the call * to PCUNMBR. NRU is equal to the local number of rows of * the matrix U when distributed 1-dimensional "column" of * processes. Analogously, NCVT is equal to the local number * of columns of the matrix VT when distributed across * 1-dimensional "row" of processes. Calling the LAPACK * procedure CBDSQR requires * * WCBDSQR = MAX(1, 4*SIZE ) * * on every processor. Finally, * * WPCORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB, * WPCORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB, * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the work array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * RWORK (workspace) REAL array, dimension (1+4*SIZEB) * On exit, if INFO = 0, RWORK(1) returns the necessary size * for RWORK. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if CBDSQR did not converge * If INFO = MIN(M,N) + 1, then PCGESVD has detected * heterogeneity by finding that eigenvalues were not * identical across the process grid. In this case, the * accuracy of the results from PCGESVD cannot be * guaranteed. * * ===================================================================== * * The results of PCGEBRD, and therefore PCGESVD, may vary slightly * from run to run with the same input data. If repeatability is an * issue, call BLACS_SET with the appropriate option after defining * the process grid. * * Alignment requirements * ====================== * * The routine PCGESVD inherits the same alignement requirement as * the routine PCGEBRD, namely: * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * * ===================================================================== * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_, + CSRC_,LLD_,ITHVAL PARAMETER (BLOCK_CYCLIC_2D=1,DLEN_=9,DTYPE_=1,CTXT_=2,M_=3,N_=4, + MB_=5,NB_=6,RSRC_=7,CSRC_=8,LLD_=9,ITHVAL=10) COMPLEX ZERO,ONE PARAMETER (ZERO= ((0.0E+0,0.0E+0)),ONE= ((1.0E+0,0.0E+0))) REAL DZERO,DONE PARAMETER (DZERO=0.0D+0,DONE=1.0D+0) * .. * .. Local Scalars .. CHARACTER UPLO INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ, + INDU,INDV,INDWORK,IOFFD,IOFFE,ISCALE,J,K,LDU,LDVT,LLWORK, + LWMIN,MAXIM,MB,MP,MYPCOL,MYPCOLC,MYPCOLR,MYPROW,MYPROWC, + MYPROWR,NB,NCVT,NPCOL,NPCOLC,NPCOLR,NPROCS,NPROW,NPROWC, + NPROWR,NQ,NRU,SIZE,SIZEB,SIZEP,SIZEPOS,SIZEQ,WANTU,WANTVT, + WATOBD,WBDTOSVD,WCBDSQR,WPCGEBRD,WPCLANGE,WPCORMBRPRT, + WPCORMBRQLN REAL ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM * .. * .. Local Arrays .. INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3) REAL C(1,1) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH,PCLANGE EXTERNAL LSAME,NUMROC,PDLAMCH,PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GET,BLACS_GRIDEXIT,BLACS_GRIDINFO,BLACS_GRIDINIT, + CHK1MAT,CBDSQR,DESCINIT,SGAMN2D,SGAMX2D,SSCAL,IGAMX2D, + IGEBR2D,IGEBS2D,PCHK1MAT,PCGEBRD,PCGEMR2D,PSLARED1D, + PSLARED2D,PCLASCL,PCLASET,PCUNMBR,PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,REAL INTRINSIC CMPLX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) ISCALE = 0 INFO = 0 * IF (NPROW.EQ.-1) THEN INFO = - (800+CTXT_) ELSE * SIZE = MIN(M,N) SIZEB = MAX(M,N) NPROCS = NPROW*NPCOL IF (M.GE.N) THEN IOFFD = JA - 1 IOFFE = IA - 1 SIZEPOS = 1 ELSE IOFFD = IA - 1 IOFFE = JA - 1 SIZEPOS = 3 END IF * IF (LSAME(JOBU,'V')) THEN WANTU = 1 ELSE WANTU = 0 END IF IF (LSAME(JOBVT,'V')) THEN WANTVT = 1 ELSE WANTVT = 0 END IF * CALL CHK1MAT(M,3,N,4,IA,JA,DESCA,8,INFO) IF (WANTU.EQ.1) THEN CALL CHK1MAT(M,3,SIZE,SIZEPOS,IU,JU,DESCU,13,INFO) END IF IF (WANTVT.EQ.1) THEN CALL CHK1MAT(SIZE,SIZEPOS,N,4,IVT,JVT,DESCVT,17,INFO) END IF CALL IGAMX2D(DESCA(CTXT_),'A',' ',1,1,INFO,1,1,1,-1,-1,0) * IF (INFO.EQ.0) THEN * * Set up pointers into the WORK array. * INDD = 2 INDE = INDD + SIZEB + IOFFD INDD2 = INDE + SIZEB + IOFFE INDE2 = INDD2 + SIZEB + IOFFD * INDTAUQ = 2 INDTAUP = INDTAUQ + SIZEB + JA - 1 INDWORK = INDTAUP + SIZEB + IA - 1 LLWORK = LWORK - INDWORK + 1 * * Initialize contexts for "column" and "row" process matrices. * CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTC) CALL BLACS_GRIDINIT(CONTEXTC,'R',NPROCS,1) CALL BLACS_GRIDINFO(CONTEXTC,NPROWC,NPCOLC,MYPROWC, + MYPCOLC) CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTR) CALL BLACS_GRIDINIT(CONTEXTR,'R',1,NPROCS) CALL BLACS_GRIDINFO(CONTEXTR,NPROWR,NPCOLR,MYPROWR, + MYPCOLR) * * Set local dimensions of matrices (this is for MB=NB=1). * NRU = NUMROC(M,1,MYPROWC,0,NPROCS) NCVT = NUMROC(N,1,MYPCOLR,0,NPROCS) NB = DESCA(NB_) MB = DESCA(MB_) MP = NUMROC(M,MB,MYPROW,DESCA(RSRC_),NPROW) NQ = NUMROC(N,NB,MYPCOL,DESCA(CSRC_),NPCOL) IF (WANTVT.EQ.1) THEN SIZEP = NUMROC(SIZE,DESCVT(MB_),MYPROW,DESCVT(RSRC_), + NPROW) ELSE SIZEP = 0 END IF IF (WANTU.EQ.1) THEN SIZEQ = NUMROC(SIZE,DESCU(NB_),MYPCOL,DESCU(CSRC_), + NPCOL) ELSE SIZEQ = 0 END IF * * Transmit MAX(NQ0, MP0). * IF (MYPROW.EQ.0 .AND. MYPCOL.EQ.0) THEN MAXIM = MAX(NQ,MP) CALL IGEBS2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1) ELSE CALL IGEBR2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1,0,0) END IF * WPCLANGE = MP WPCGEBRD = NB* (MP+NQ+1) + NQ WATOBD = MAX(MAX(WPCLANGE,WPCGEBRD),MAXIM) * WCBDSQR = MAX(1,4*SIZE) WPCORMBRQLN = MAX((NB* (NB-1))/2, (SIZEQ+MP)*NB) + NB*NB WPCORMBRPRT = MAX((MB* (MB-1))/2, (SIZEP+NQ)*MB) + MB*MB WBDTOSVD = SIZE* (WANTU*NRU+WANTVT*NCVT) + + MAX(WCBDSQR,MAX(WANTU*WPCORMBRQLN, + WANTVT*WPCORMBRPRT)) * * Finally, calculate required workspace. * LWMIN = 1 + 2*SIZEB + MAX(WATOBD,WBDTOSVD) WORK(1) = CMPLX(LWMIN,0D+00) RWORK(1) = REAL(1+4*SIZEB) * IF (WANTU.NE.1 .AND. .NOT. (LSAME(JOBU,'N'))) THEN INFO = -1 ELSE IF (WANTVT.NE.1 .AND. .NOT. (LSAME(JOBVT,'N'))) THEN INFO = -2 ELSE IF (LWORK.LT.LWMIN .AND. LWORK.NE.-1) THEN INFO = -19 END IF * END IF * IDUM1(1) = WANTU IDUM1(2) = WANTVT IF (LWORK.EQ.-1) THEN IDUM1(3) = -1 ELSE IDUM1(3) = 1 END IF IDUM2(1) = 1 IDUM2(2) = 2 IDUM2(3) = 19 CALL PCHK1MAT(M,3,N,4,IA,JA,DESCA,8,3,IDUM1,IDUM2,INFO) IF (INFO.EQ.0) THEN IF (WANTU.EQ.1) THEN CALL PCHK1MAT(M,3,SIZE,4,IU,JU,DESCU,13,0,IDUM1,IDUM2, + INFO) END IF IF (WANTVT.EQ.1) THEN CALL PCHK1MAT(SIZE,3,N,4,IVT,JVT,DESCVT,17,0,IDUM1, + IDUM2,INFO) END IF END IF * END IF * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PCGESVD',-INFO) RETURN ELSE IF (LWORK.EQ.-1) THEN GO TO 40 END IF * * Quick return if possible. * IF (M.LE.0 .OR. N.LE.0) GO TO 40 * * Get machine constants. * SAFMIN = PSLAMCH(DESCA(CTXT_),'Safe minimum') EPS = PSLAMCH(DESCA(CTXT_),'Precision') SMLNUM = SAFMIN/EPS BIGNUM = DONE/SMLNUM RMIN = SQRT(SMLNUM) RMAX = MIN(SQRT(BIGNUM),DONE/SQRT(SQRT(SAFMIN))) * * Scale matrix to allowable range, if necessary. * ANRM = PCLANGE('1',M,N,A,IA,JA,DESCA,WORK(INDWORK)) IF (ANRM.GT.DZERO .AND. ANRM.LT.RMIN) THEN ISCALE = 1 SIGMA = RMIN/ANRM ELSE IF (ANRM.GT.RMAX) THEN ISCALE = 1 SIGMA = RMAX/ANRM END IF * IF (ISCALE.EQ.1) THEN CALL PCLASCL('G',DONE,SIGMA,M,N,A,IA,JA,DESCA,INFO) END IF * CALL PCGEBRD(M,N,A,IA,JA,DESCA,RWORK(INDD),RWORK(INDE), + WORK(INDTAUQ),WORK(INDTAUP),WORK(INDWORK),LLWORK, + INFO) * * Copy D and E to all processes. * Array D is in local array of dimension: * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * Array E is in local array of dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * IF (M.GE.N) THEN * Distribute D CALL PSLARED1D(N+IOFFD,IA,JA,DESCA,RWORK(INDD),RWORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PSLARED2D(M+IOFFE,IA,JA,DESCA,RWORK(INDE),RWORK(INDE2), + WORK(INDWORK),LLWORK) ELSE * Distribute D CALL PSLARED2D(M+IOFFD,IA,JA,DESCA,RWORK(INDD),RWORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PSLARED1D(N+IOFFE,IA,JA,DESCA,RWORK(INDE),RWORK(INDE2), + WORK(INDWORK),LLWORK) END IF * * Prepare for calling PCBDSQR. * IF (M.GE.N) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * INDU = INDWORK INDV = INDU + SIZE*NRU*WANTU INDWORK = INDV + SIZE*NCVT*WANTVT * LDU = MAX(1,NRU) LDVT = MAX(1,SIZE) * CALL DESCINIT(DESCTU,M,SIZE,1,1,0,0,CONTEXTC,LDU,INFO) CALL DESCINIT(DESCTVT,SIZE,N,1,1,0,0,CONTEXTR,LDVT,INFO) * IF (WANTU.EQ.1) THEN CALL PCLASET('Full',M,SIZE,ZERO,ONE,WORK(INDU),1,1,DESCTU) ELSE NRU = 0 END IF * IF (WANTVT.EQ.1) THEN CALL PCLASET('Full',SIZE,N,ZERO,ONE,WORK(INDV),1,1,DESCTVT) ELSE NCVT = 0 END IF * CALL CBDSQR(UPLO,SIZE,NCVT,NRU,0,RWORK(INDD2+IOFFD), + RWORK(INDE2+IOFFE),WORK(INDV),SIZE,WORK(INDU),LDU,C,1, + WORK(INDWORK),INFO) * * Redistribute elements of U and VT in the block-cyclic fashion. * IF (WANTU.EQ.1) CALL PCGEMR2D(M,SIZE,WORK(INDU),1,1,DESCTU,U,IU, + JU,DESCU,DESCU(CTXT_)) * IF (WANTVT.EQ.1) CALL PCGEMR2D(SIZE,N,WORK(INDV),1,1,DESCTVT,VT, + IVT,JVT,DESCVT,DESCVT(CTXT_)) * * Set to ZERO "non-square" elements of the larger matrices U, VT. * IF (M.GT.N .AND. WANTU.EQ.1) THEN CALL PCLASET('Full',M-SIZE,SIZE,ZERO,ZERO,U,IA+SIZE,JU,DESCU) ELSE IF (N.GT.M .AND. WANTVT.EQ.1) THEN CALL PCLASET('Full',SIZE,N-SIZE,ZERO,ZERO,VT,IVT,JVT+SIZE, + DESCVT) END IF * * Multiply Householder rotations from bidiagonalized matrix. * IF (WANTU.EQ.1) CALL PCUNMBR('Q','L','N',M,SIZE,N,A,IA,JA,DESCA, + WORK(INDTAUQ),U,IU,JU,DESCU, + WORK(INDWORK),LLWORK,INFO) * IF (WANTVT.EQ.1) CALL PCUNMBR('P','R','C',SIZE,N,M,A,IA,JA,DESCA, + WORK(INDTAUP),VT,IVT,JVT,DESCVT, + WORK(INDWORK),LLWORK,INFO) * * Copy singular values into output array S. * DO 10 I = 1,SIZE S(I) = RWORK(INDD2+IOFFD+I-1) 10 CONTINUE * * If matrix was scaled, then rescale singular values appropriately. * IF (ISCALE.EQ.1) THEN CALL SSCAL(SIZE,ONE/SIGMA,S,1) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF (SIZE.LE.ITHVAL) THEN J = SIZE K = 1 ELSE J = SIZE/ITHVAL K = ITHVAL END IF * DO 20 I = 1,J RWORK(I+INDE) = S((I-1)*K+1) RWORK(I+INDD2) = S((I-1)*K+1) 20 CONTINUE * CALL SGAMN2D(DESCA(CTXT_),'a',' ',J,1,RWORK(1+INDE),J,1,1,-1,-1,0) CALL SGAMX2D(DESCA(CTXT_),'a',' ',J,1,RWORK(1+INDD2),J,1,1,-1,-1, + 0) * DO 30 I = 1,J IF ((RWORK(I+INDE)-RWORK(I+INDD2)).NE.DZERO) THEN INFO = SIZE + 1 END IF 30 CONTINUE * 40 CONTINUE * CALL BLACS_GRIDEXIT(CONTEXTC) CALL BLACS_GRIDEXIT(CONTEXTR) * * End of PCGESVD * RETURN END scalapack-1.8.0/SRC/pzunglq.f0000640000175000017500000003100310363532303015607 0ustar muammarmuammar SUBROUTINE PZUNGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGLQ generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PZGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PZGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ J, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZLASET, PZUNGL2 * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL PZLASET( 'All', IA+M-IL, IL-IA, ZERO, ZERO, A, IL, JA, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PZUNGL2( IA+M-IL, N-IL+IA, IA+K-IL, A, IL, JA+IL-IA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of rows to loop over ? * IF( IL.GT.IN+1 ) THEN * * Use blocked code * DO 10 I = IL-DESCA( MB_ ), IN+1, -DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) J = JA + I - IA * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PZLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-I-IB+IA, N-I+IA, IB, A, I, J, $ DESCA, WORK, A, I+IB, J, DESCA, $ WORK( IPW ) ) END IF * * Apply H' to columns j:ja+n-1 of current block * CALL PZUNGL2( IB, N-I+IA, IB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set columns ia:i-1 of current block to zero * CALL PZLASET( 'All', IB, I-IA, ZERO, ZERO, A, I, JA, DESCA ) 10 CONTINUE * END IF * * Handle first block separately * IF( IL.GT.IA ) THEN * IB = IN - IA + 1 * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PZLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Forward', $ 'Rowwise', M-IB, N, IB, A, IA, JA, DESCA, WORK, $ A, IA+IB, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-1 of current block * CALL PZUNGL2( IB, N, IB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGLQ * END scalapack-1.8.0/SRC/pzheevx.f0000640000175000017500000011667610377154001015624 0ustar muammarmuammar SUBROUTINE PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, IFAIL, ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK, $ LWORK, M, N, NZ DOUBLE PRECISION ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ), $ IFAIL( * ), IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), W( * ) COMPLEX*16 A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZHEEVX computes selected eigenvalues and, optionally, eigenvectors * of a complex hermitian matrix A by calling the recommended sequence * of ScaLAPACK routines. Eigenvalues/vectors can be selected by * specifying a range of values or a range of indices for the desired * eigenvalues. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * PZHEEVX assumes IEEE 754 standard compliant arithmetic. To port * to a system which does not have IEEE 754 arithmetic, modify * the appropriate SLmake.inc file to include the compiler switch * -DNO_IEEE. This switch only affects the compilation of pdlaiect.c. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX*16 array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ) * * On entry, the Hermitian matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the Hermitian matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * Hermitian matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PZHEEVX cannot guarantee * correct error reporting. * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) DOUBLE PRECISION * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PDLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PDLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PZHEEVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PZHEEVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) DOUBLE PRECISION * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX*16 array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX*16 array, * dimension (LWORK) * WORK(1) returns workspace adequate workspace to allow * optimal performance. * * LWORK (local input) INTEGER * Size of WORK array. If only eigenvalues are requested: * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) * If eigenvectors are requested: * LWORK >= N + ( NP0 + MQ0 + NB ) * NB * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). * * For optimal performance, greater workspace is needed, i.e. * LWORK >= MAX( LWORK, NHETRD_LWORK ) * Where LWORK is as defined above, and * NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 1 ) * NPS * * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/output) DOUBLE PRECISION array, * dimension max(3,LRWORK) * On return, WORK(1) contains the optimal amount of * workspace required for efficient execution. * if JOBZ='N' RWORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' RWORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LRWORK (local input) INTEGER * Size of RWORK * See below for definitions of variables used to define LRWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LRWORK >= 5 * NN + 4 * N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LRWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LRWORK is too small: * If LRWORK is too small to guarantee orthogonality, * PZHEEVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LRWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-25 * is returned. Note that when RANGE='V', PZHEEVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LRWORK is large enough to allow PZHEEVX to * compute the eigenvalues, PZHEEVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PZSTEIN will perform no better than ZSTEIN on 1 * processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then * IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PZHEEVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to compute * eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Differences between PZHEEVX and ZHEEVX * ====================================== * * A, LDA -> A, IA, JA, DESCA * Z, LDZ -> Z, IZ, JZ, DESCZ * WORKSPACE needs are larger for PZHEEVX. * LIWORK parameter added * * ORFAC, ICLUSTER() and GAP() parameters added * meaning of INFO is changed * * Functional differences: * PZHEEVX does not promise orthogonality for eigenvectors associated * with tighly clustered eigenvalues. * PZHEEVX does not reorthogonalize eigenvectors * that are on different processes. The extent of reorthogonalization * is controlled by the input parameter LWORK. * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, TEN, FIVE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0, $ FIVE = 5.0D+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN, $ VALEIG, WANTZ CHARACTER ORDER INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO, $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP, $ INDRWORK, INDTAU, INDWORK, IROFFA, IROFFZ, $ ISCALE, ISIZESTEBZ, ISIZESTEIN, IZROW, $ LALLWORK, LIWMIN, LLRWORK, LLWORK, LRWMIN, $ LRWOPT, LWMIN, LWOPT, MAXEIGS, MB_A, MQ0, $ MYCOL, MYROW, NB, NB_A, NEIG, NHETRD_LWOPT, NN, $ NNP, NP0, NPCOL, NPROCS, NPROW, NPS, NQ0, $ NSPLIT, NZZ, OFFSET, RSRC_A, RSRC_Z, SIZEHEEVX, $ SIZESTEIN, SQNPC DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, $ SIGMA, SMLNUM, VLL, VUU * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV DOUBLE PRECISION PDLAMCH, PZLANHE EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, $ PDLAMCH, PZLANHE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DLASRT, DSCAL, IGAMN2D, PCHK1MAT, PCHK2MAT, $ PDLARED1D, PDSTEBZ, PXERBLA, PZELGET, PZHENTRD, $ PZLASCL, PZSTEIN, PZUNMTR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, ICHAR, INT, MAX, MIN, MOD, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 800+CTXT_ ) ELSE IF( WANTZ ) THEN IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) EPS = PDLAMCH( ICTXT, 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL LOWER = LSAME( UPLO, 'L' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) * * Set up pointers into the WORK array * INDTAU = 1 INDWORK = INDTAU + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDE = 1 INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDRWORK = INDE2 + N LLRWORK = LRWORK - INDRWORK + 1 * * Set up pointers into the IWORK array * ISIZESTEIN = 3*N + NPROCS + 1 ISIZESTEBZ = MAX( 4*N, 14, NPROCS ) INDIBL = ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 INDISP = INDIBL + N * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) $ LQUERY = .TRUE. * NNP = MAX( N, NPROCS+1, 4 ) LIWMIN = 6*NNP * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A NN = MAX( N, NB, 2 ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) NP0 = NUMROC( N+IROFFA, NB, 0, 0, NPROW ) MQ0 = NUMROC( N+ICOFFA, NB, 0, 0, NPCOL ) IF( WANTZ ) THEN RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = N + MAX( NB*( NP0+1 ), 3 ) LWOPT = LWMIN LRWMIN = 5*NN + 4*N IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LRWOPT = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( N, NPROW*NPCOL )*NN ELSE LRWOPT = LRWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+NQ0+NB )*NB LRWMIN = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LRWOPT = LRWMIN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD code * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS LWOPT = MAX( LWOPT, N+NHETRD_LWOPT ) * END IF IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN RWORK( 1 ) = ABSTOL IF( VALEIG ) THEN RWORK( 2 ) = VL RWORK( 3 ) = VU ELSE RWORK( 2 ) = ZERO RWORK( 3 ) = ZERO END IF CALL DGEBS2D( ICTXT, 'ALL', ' ', 3, 1, RWORK, 3 ) ELSE CALL DGEBR2D( ICTXT, 'ALL', ' ', 3, 1, RWORK, 3, 0, 0 ) END IF IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -2 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -3 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -10 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -11 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -12 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -23 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -25 ELSE IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN INFO = -27 ELSE IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -9 ELSE IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -10 ELSE IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* $ ABS( ABSTOL ) ) THEN INFO = -13 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -19 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -19 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2100+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2100+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2100+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2100+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2100+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2100+CSRC_ ) ELSE IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( ALLEIG ) THEN IDUM1( 3 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 3 ) = ICHAR( 'I' ) ELSE IDUM1( 3 ) = ICHAR( 'V' ) END IF IDUM2( 3 ) = 3 IF( LQUERY ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 4 IF( WANTZ ) THEN CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, $ JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 4, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = DCMPLX( LWOPT ) RWORK( 1 ) = DBLE( LRWOPT ) IWORK( 1 ) = LIWMIN END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHEEVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 ICLUSTR( 1 ) = 0 END IF M = 0 WORK( 1 ) = DCMPLX( LWOPT ) RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN END IF * * Scale matrix to allowable range, if necessary. * ABSTLL = ABSTOL ISCALE = 0 IF( VALEIG ) THEN VLL = VL VUU = VU ELSE VLL = ZERO VUU = ZERO END IF * ANRM = PZLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ANRM = ANRM*SIGMA ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM ANRM = ANRM*SIGMA END IF * IF( ISCALE.EQ.1 ) THEN CALL PZLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) IF( ABSTOL.GT.0 ) $ ABSTLL = ABSTOL*SIGMA IF( VALEIG ) THEN VLL = VL*SIGMA VUU = VU*SIGMA IF( VUU.EQ.VLL ) THEN VUU = VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) END IF END IF END IF * * Call PZHENTRD to reduce Hermitian matrix to tridiagonal form. * LALLWORK = LLRWORK * CALL PZHENTRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, RWORK( INDRWORK ), LLRWORK, IINFO ) * * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet work with arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PDLARED1D( N, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDD2 ), RWORK( INDRWORK ), LLRWORK ) * CALL PDLARED1D( N, IA, JA, DESCA, RWORK( INDE ), $ RWORK( INDE2 ), RWORK( INDRWORK ), LLRWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PZELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, $ I+JA-1, DESCA ) RWORK( INDD2+I-1 ) = DBLE( WORK( INDD2+I-1 ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, $ I+JA, DESCA ) RWORK( INDE2+I-1 ) = DBLE( WORK( INDE2+I-1 ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, $ I+JA-1, DESCA ) RWORK( INDE2+I-1 ) = DBLE( WORK( INDE2+I-1 ) ) 30 CONTINUE END IF END IF * * Call PDSTEBZ and, if eigenvectors are desired, PZSTEIN. * IF( WANTZ ) THEN ORDER = 'B' ELSE ORDER = 'E' END IF * CALL PDSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, $ RWORK( INDD2 ), RWORK( INDE2+OFFSET ), M, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWORK ), $ LLRWORK, IWORK( 1 ), ISIZESTEBZ, IINFO ) * * * IF PDSTEBZ fails, the error propogates to INFO, but * we do not propogate the eigenvalue(s) which failed because: * 1) This should never happen if the user specifies * ABSTOL = 2 * PDLAMCH( 'U' ) * 2) PDSTEIN will confirm/deny whether the eigenvalues are * close enough. * IF( IINFO.NE.0 ) THEN INFO = INFO + IERREBZ DO 40 I = 1, M IWORK( INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) 40 CONTINUE END IF IF( WANTZ ) THEN * IF( VALEIG ) THEN * * Compute the maximum number of eigenvalues that we can * compute in the * workspace that we have, and that we can store in Z. * * Loop through the possibilities looking for the largest * NZ that we can feed to PZSTEIN and PZUNMTR * * Since all processes must end up with the same value * of NZ, we first compute the minimum of LALLWORK * CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LALLWORK, 1, 1, 1, -1, $ -1, -1 ) * MAXEIGS = DESCZ( N_ ) * DO 50 NZ = MIN( MAXEIGS, M ), 0, -1 MQ0 = NUMROC( NZ, NB, 0, 0, NPCOL ) SIZESTEIN = ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) SIZEHEEVX = SIZESTEIN IF( SIZEHEEVX.LE.LALLWORK ) $ GO TO 60 50 CONTINUE 60 CONTINUE ELSE NZ = M END IF NZ = MAX( NZ, 0 ) IF( NZ.NE.M ) THEN INFO = INFO + IERRSPC * DO 70 I = 1, M IFAIL( I ) = 0 70 CONTINUE * * The following code handles a rare special case * - NZ .NE. M means that we don't have enough room to store * all the vectors. * - NSPLIT .GT. 1 means that the matrix split * In this case, we cannot simply take the first NZ eigenvalues * because PDSTEBZ sorts the eigenvalues by block when * a split occurs. So, we have to make another call to * PDSTEBZ with a new upper limit - VUU. * IF( NSPLIT.GT.1 ) THEN CALL DLASRT( 'I', M, W, IINFO ) NZZ = 0 IF( NZ.GT.0 ) THEN * VUU = W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) IF( VLL.GE.VUU ) THEN NZZ = 0 ELSE CALL PDSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, $ IU, ABSTLL, RWORK( INDD2 ), $ RWORK( INDE2+OFFSET ), NZZ, NSPLIT, $ W, IWORK( INDIBL ), IWORK( INDISP ), $ RWORK( INDRWORK ), LLRWORK, $ IWORK( 1 ), ISIZESTEBZ, IINFO ) END IF * IF( MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN IF( NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN INFO = INFO + IERREBZ END IF END IF END IF NZ = MIN( NZ, NZZ ) * END IF END IF CALL PZSTEIN( N, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), NZ, W, $ IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, $ JZ, DESCZ, RWORK( INDRWORK ), LALLWORK, $ IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, $ IINFO ) * IF( IINFO.GE.NZ+1 ) $ INFO = INFO + IERRCLS IF( MOD( IINFO, NZ+1 ).NE.0 ) $ INFO = INFO + IERREIN * * Z = Q * Z * * IF( NZ.GT.0 ) THEN CALL PZUNMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) END IF * END IF * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL DSCAL( M, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = DCMPLX( LWOPT ) RWORK( 1 ) = DBLE( LRWOPT ) IWORK( 1 ) = LIWMIN * RETURN * * End of PZHEEVX * END scalapack-1.8.0/SRC/pztrevc.f0000640000175000017500000005265310602576752015636 0ustar muammarmuammar SUBROUTINE PZTREVC( SIDE, HOWMNY, SELECT, N, T, DESCT, VL, DESCVL, $ VR, DESCVR, MM, M, WORK, RWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE INTEGER INFO, M, MM, N * .. * .. Array Arguments .. LOGICAL SELECT( * ) INTEGER DESCT( * ), DESCVL( * ), DESCVR( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 T( * ), VL( * ), VR( * ), WORK( * ) * .. * * Purpose * ======= * * PZTREVC computes some or all of the right and/or left eigenvectors of * a complex upper triangular matrix T in parallel. * * The right eigenvector x and the left eigenvector y of T corresponding * to an eigenvalue w are defined by: * * T*x = w*x, y'*T = w*y' * * where y' denotes the conjugate transpose of the vector y. * * If all eigenvectors are requested, the routine may either return the * matrices X and/or Y of right or left eigenvectors of T, or the * products Q*X and/or Q*Y, where Q is an input unitary * matrix. If T was obtained from the Schur factorization of an * original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of * right or left eigenvectors of A. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * = 'R': compute right eigenvectors only; * = 'L': compute left eigenvectors only; * = 'B': compute both right and left eigenvectors. * * HOWMNY (global input) CHARACTER*1 * = 'A': compute all right and/or left eigenvectors; * = 'B': compute all right and/or left eigenvectors, * and backtransform them using the input matrices * supplied in VR and/or VL; * = 'S': compute selected right and/or left eigenvectors, * specified by the logical array SELECT. * * SELECT (global input) LOGICAL array, dimension (N) * If HOWMNY = 'S', SELECT specifies the eigenvectors to be * computed. * If HOWMNY = 'A' or 'B', SELECT is not referenced. * To select the eigenvector corresponding to the j-th * eigenvalue, SELECT(j) must be set to .TRUE.. * * N (global input) INTEGER * The order of the matrix T. N >= 0. * * T (global input/output) COMPLEX*16 array, dimension * (DESCT(LLD_),*) * The upper triangular matrix T. T is modified, but restored * on exit. * * DESCT (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix T. * * VL (global input/output) COMPLEX*16 array, dimension * (DESCVL(LLD_),MM) * On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must * contain an N-by-N matrix Q (usually the unitary matrix Q of * Schur vectors returned by ZHSEQR). * On exit, if SIDE = 'L' or 'B', VL contains: * if HOWMNY = 'A', the matrix Y of left eigenvectors of T; * if HOWMNY = 'B', the matrix Q*Y; * if HOWMNY = 'S', the left eigenvectors of T specified by * SELECT, stored consecutively in the columns * of VL, in the same order as their * eigenvalues. * If SIDE = 'R', VL is not referenced. * * DESCVL (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix VL. * * VR (global input/output) COMPLEX*16 array, dimension * (DESCVR(LLD_),MM) * On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must * contain an N-by-N matrix Q (usually the unitary matrix Q of * Schur vectors returned by ZHSEQR). * On exit, if SIDE = 'R' or 'B', VR contains: * if HOWMNY = 'A', the matrix X of right eigenvectors of T; * if HOWMNY = 'B', the matrix Q*X; * if HOWMNY = 'S', the right eigenvectors of T specified by * SELECT, stored consecutively in the columns * of VR, in the same order as their * eigenvalues. * If SIDE = 'L', VR is not referenced. * * DESCVR (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix VR. * * MM (global input) INTEGER * The number of columns in the arrays VL and/or VR. MM >= M. * * M (global output) INTEGER * The number of columns in the arrays VL and/or VR actually * used to store the eigenvectors. If HOWMNY = 'A' or 'B', M * is set to N. Each selected eigenvector occupies one * column. * * WORK (local workspace) COMPLEX*16 array, * dimension ( 2*DESCT(LLD_) ) * Additional workspace may be required if PZLATTRS is updated * to use WORK. * * RWORK (local workspace) DOUBLE PRECISION array, * dimension ( DESCT(LLD_) ) * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Further Details * =============== * * The algorithm used in this program is basically backward (forward) * substitution. It is the hope that scaling would be used to make the * the code robust against possible overflow. But scaling has not yet * been implemented in PZLATTRS which is called by this routine to solve * the triangular systems. PZLATTRS just calls PZTRSV. * * Each eigenvector is normalized so that the element of largest * magnitude has magnitude 1; here the magnitude of a complex number * (x,y) is taken to be |x| + |y|. * * Further Details * =============== * * Implemented by Mark R. Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV INTEGER CONTXT, CSRC, I, ICOL, II, IROW, IS, ITMP1, $ ITMP2, J, K, KI, LDT, LDVL, LDVR, LDW, MB, $ MYCOL, MYROW, NB, NPCOL, NPROW, RSRC REAL SELF DOUBLE PRECISION OVFL, REMAXD, SCALE, SMIN, SMLNUM, ULP, UNFL COMPLEX*16 CDUM, REMAXC, SHIFT * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, DGSUM2D, IGAMN2D, $ INFOG2L, PDLABAD, PDZASUM, PXERBLA, PZAMAX, $ PZCOPY, PZDSCAL, PZGEMV, PZLASET, PZLATTRS, $ ZGSUM2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CONTXT = DESCT( CTXT_ ) RSRC = DESCT( RSRC_ ) CSRC = DESCT( CSRC_ ) MB = DESCT( MB_ ) NB = DESCT( NB_ ) LDT = DESCT( LLD_ ) LDW = LDT LDVR = DESCVR( LLD_ ) LDVL = DESCVL( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Decode and test the input parameters * BOTHV = LSAME( SIDE, 'B' ) RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV * ALLV = LSAME( HOWMNY, 'A' ) OVER = LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'O' ) SOMEV = LSAME( HOWMNY, 'S' ) * * Set M to the number of columns required to store the selected * eigenvectors. * IF( SOMEV ) THEN M = 0 DO 10 J = 1, N IF( SELECT( J ) ) $ M = M + 1 10 CONTINUE ELSE M = N END IF * INFO = 0 IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN INFO = -1 ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( MM.LT.M ) THEN INFO = -11 END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PZTREVC', -INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * Set the constants to control overflow. * UNFL = PDLAMCH( CONTXT, 'Safe minimum' ) OVFL = ONE / UNFL CALL PDLABAD( CONTXT, UNFL, OVFL ) ULP = PDLAMCH( CONTXT, 'Precision' ) SMLNUM = UNFL*( N / ULP ) * * Store the diagonal elements of T in working array WORK( LDW+1 ). * DO 20 I = 1, N CALL INFOG2L( I, I, DESCT, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( LDW+IROW ) = T( ( ICOL-1 )*LDT+IROW ) END IF 20 CONTINUE * * Compute 1-norm of each column of strictly upper triangular * part of T to control overflow in triangular solver. Computed, * but not used. For use in PZLATTRS. * RWORK( 1 ) = ZERO DO 30 J = 2, N CALL PDZASUM( J-1, RWORK( J ), T, 1, J, DESCT, 1 ) 30 CONTINUE * I replicate the norms in RWORK. Should they be distributed * over the process rows? CALL DGSUM2D( CONTXT, 'Row', ' ', N, 1, RWORK, N, -1, -1 ) * IF( RIGHTV ) THEN * * Compute right eigenvectors. * * Need to set the distribution pattern of WORK * CALL DESCINIT( DESCW, N, 1, NB, 1, RSRC, CSRC, CONTXT, LDW, $ INFO ) * IS = M DO 70 KI = N, 1, -1 * IF( SOMEV ) THEN IF( .NOT.SELECT( KI ) ) $ GO TO 70 END IF * SMIN = ZERO SHIFT = CZERO CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN SHIFT = T( ( ICOL-1 )*LDT+IROW ) SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM ) END IF CALL DGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 ) CALL ZGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 ) * CALL INFOG2L( 1, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( 1 ) = CONE END IF * * Form right-hand side. Distribute rhs onto first column * of processor grid. * IF( KI.GT.1 ) THEN CALL PZCOPY( KI-1, T, 1, KI, DESCT, 1, WORK, 1, 1, DESCW, $ 1 ) END IF DO 40 K = 1, KI - 1 CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = -WORK( IROW ) END IF 40 CONTINUE * * Solve the triangular system: * (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. * DO 50 K = 1, KI - 1 CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = T( ( ICOL-1 )*LDT+IROW ) - $ SHIFT IF( CABS1( T( ( ICOL-1 )*LDT+IROW ) ).LT.SMIN ) THEN T( ( ICOL-1 )*LDT+IROW ) = DCMPLX( SMIN ) END IF END IF 50 CONTINUE * IF( KI.GT.1 ) THEN CALL PZLATTRS( 'Upper', 'No transpose', 'Non-unit', 'Y', $ KI-1, T, 1, 1, DESCT, WORK, 1, 1, DESCW, $ SCALE, RWORK, INFO ) CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = DCMPLX( SCALE ) END IF END IF * * Copy the vector x or Q*x to VR and normalize. * IF( .NOT.OVER ) THEN CALL PZCOPY( KI, WORK, 1, 1, DESCW, 1, VR, 1, IS, DESCVR, $ 1 ) * CALL PZAMAX( KI, REMAXC, II, VR, 1, IS, DESCVR, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PZDSCAL( KI, REMAXD, VR, 1, IS, DESCVR, 1 ) * CALL PZLASET( ' ', N-KI, 1, CZERO, CZERO, VR, KI+1, IS, $ DESCVR ) ELSE IF( KI.GT.1 ) $ CALL PZGEMV( 'N', N, KI-1, CONE, VR, 1, 1, DESCVR, $ WORK, 1, 1, DESCW, 1, DCMPLX( SCALE ), $ VR, 1, KI, DESCVR, 1 ) * CALL PZAMAX( N, REMAXC, II, VR, 1, KI, DESCVR, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PZDSCAL( N, REMAXD, VR, 1, KI, DESCVR, 1 ) END IF * * Set back the original diagonal elements of T. * DO 60 K = 1, KI - 1 CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = WORK( LDW+IROW ) END IF 60 CONTINUE * IS = IS - 1 70 CONTINUE END IF * IF( LEFTV ) THEN * * Compute left eigenvectors. * * Need to set the distribution pattern of WORK * CALL DESCINIT( DESCW, N, 1, MB, 1, RSRC, CSRC, CONTXT, LDW, $ INFO ) * IS = 1 DO 110 KI = 1, N * IF( SOMEV ) THEN IF( .NOT.SELECT( KI ) ) $ GO TO 110 END IF * SMIN = ZERO SHIFT = CZERO CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN SHIFT = T( ( ICOL-1 )*LDT+IROW ) SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM ) END IF CALL DGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 ) CALL ZGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 ) * CALL INFOG2L( N, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( IROW ) = CONE END IF * * Form right-hand side. * IF( KI.LT.N ) THEN CALL PZCOPY( N-KI, T, KI, KI+1, DESCT, N, WORK, KI+1, 1, $ DESCW, 1 ) END IF DO 80 K = KI + 1, N CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = -DCONJG( WORK( IROW ) ) END IF 80 CONTINUE * * Solve the triangular system: * (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. * DO 90 K = KI + 1, N CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = T( ( ICOL-1 )*LDT+IROW ) - $ SHIFT IF( CABS1( T( ( ICOL-1 )*LDT+IROW ) ).LT.SMIN ) $ T( ( ICOL-1 )*LDT+IROW ) = DCMPLX( SMIN ) END IF 90 CONTINUE * IF( KI.LT.N ) THEN CALL PZLATTRS( 'Upper', 'Conjugate transpose', 'Nonunit', $ 'Y', N-KI, T, KI+1, KI+1, DESCT, WORK, $ KI+1, 1, DESCW, SCALE, RWORK, INFO ) CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = DCMPLX( SCALE ) END IF END IF * * Copy the vector x or Q*x to VL and normalize. * IF( .NOT.OVER ) THEN CALL PZCOPY( N-KI+1, WORK, KI, 1, DESCW, 1, VL, KI, IS, $ DESCVL, 1 ) * CALL PZAMAX( N-KI+1, REMAXC, II, VL, KI, IS, DESCVL, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PZDSCAL( N-KI+1, REMAXD, VL, KI, IS, DESCVL, 1 ) * CALL PZLASET( ' ', KI-1, 1, CZERO, CZERO, VL, 1, IS, $ DESCVL ) ELSE IF( KI.LT.N ) $ CALL PZGEMV( 'N', N, N-KI, CONE, VL, 1, KI+1, DESCVL, $ WORK, KI+1, 1, DESCW, 1, DCMPLX( SCALE ), $ VL, 1, KI, DESCVL, 1 ) * CALL PZAMAX( N, REMAXC, II, VL, 1, KI, DESCVL, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PZDSCAL( N, REMAXD, VL, 1, KI, DESCVL, 1 ) END IF * * Set back the original diagonal elements of T. * DO 100 K = KI + 1, N CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = WORK( LDW+IROW ) END IF 100 CONTINUE * IS = IS + 1 110 CONTINUE END IF * RETURN * * End of PZTREVC * END scalapack-1.8.0/SRC/pcpotri.f0000640000175000017500000001715310363532303015601 0ustar muammarmuammar SUBROUTINE PCPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCPOTRI computes the inverse of a complex Hermitian positive definite * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the * Cholesky factorization sub( A ) = U**H*U or L*L**H computed by * PCPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor U or L * from the Cholesky factorization of the distributed matrix * sub( A ) = U**H*U or L*L**H, as computed by PCPOTRF. * On exit, the local pieces of the upper or lower triangle of * the (Hermitian) inverse of sub( A ), overwriting the input * factor U or L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER ICOFF, ICTXT, IROFF, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCLAUUM, $ PCTRTRI, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.NE.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Invert the triangular Cholesky factor U or L. * CALL PCTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * IF( INFO.GT.0 ) $ RETURN * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PCLAUUM( UPLO, N, A, IA, JA, DESCA ) * RETURN * * End of PCPOTRI * END scalapack-1.8.0/SRC/pslasrt.f0000640000175000017500000002073210363532303015606 0ustar muammarmuammar SUBROUTINE PSLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * February 22, 2000 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, IQ, JQ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) REAL D( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PSLASRT Sort the numbers in D in increasing order and the * corresponding vectors in Q. * * Arguments * ========= * * ID (global input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. (NOT IMPLEMENTED YET) * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( Q ). N >= 0. * * D (global input/output) REAL array, dimmension (N) * On exit, the number in D are sorted in increasing order. * * Q (local input) REAL pointer into the local memory * to an array of dimension (LLD_Q, LOCc(JQ+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IQ (global input) INTEGER * The row index in the global array A indicating the first * row of sub( Q ). * * JQ (global input) INTEGER * The column index in the global array A indicating the * first column of sub( Q ). * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK = MAX( N, NP * ( NB + NQ )) * where * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK = N + 2*NB + 2*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CL, COL, DUMMY, I, ICTXT, IID, IIQ, INDCOL, $ INDX, INDXC, INDXG, IPQ, IPQ2, IPW, IPWORK, J, $ JJQ, K, L, LDQ, LEND, LIWMIN, LWMIN, MYCOL, $ MYROW, NB, ND, NP, NPCOL, NPROW, NQ, PSQ, QCOL, $ QTOT, SBUF * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, LSAME, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PXERBLA, SCOPY, $ SGERV2D, SGESD2D, SLACPY, SLAPST * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 1, N, 1, IQ, JQ, DESCQ, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) LWMIN = MAX( N, NP*( NB+NQ ) ) LIWMIN = N + 2*( NB+NPCOL ) IF( .NOT.LSAME( ID, 'I' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -9 ELSE IF( LIWORK.LT.LIWMIN ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLASRT', -INFO ) RETURN END IF * * Set Pointers * INDXC = 1 INDX = INDXC + N INDXG = INDX INDCOL = INDXG + NB QTOT = INDCOL + NB PSQ = QTOT + NPCOL * IID = 1 IPQ2 = 1 IPW = IPQ2 + NP*NQ * DUMMY = 0 IIQ = INDXG2L( IQ, NB, DUMMY, DUMMY, NPROW ) * * Sort the eigenvalues in D * CALL SLAPST( 'I', N, D, IWORK( INDX ), INFO ) * DO 10 L = 0, N - 1 WORK( IID+L ) = D( IWORK( INDX+L ) ) IWORK( INDXC-1+IWORK( INDX+L ) ) = IID + L 10 CONTINUE CALL SCOPY( N, WORK, 1, D, 1 ) * ND = 0 20 CONTINUE IF( ND.LT.N ) THEN LEND = MIN( NB, N-ND ) J = JQ + ND QCOL = INDXG2P( J, NB, DUMMY, DESCQ( CSRC_ ), NPCOL ) K = 0 DO 30 L = 0, LEND - 1 I = JQ - 1 + IWORK( INDXC+ND+L ) CL = INDXG2P( I, NB, DUMMY, DESCQ( CSRC_ ), NPCOL ) IWORK( INDCOL+L ) = CL IF( MYCOL.EQ.CL ) THEN IWORK( INDXG+K ) = IWORK( INDXC+ND+L ) K = K + 1 END IF 30 CONTINUE * IF( MYCOL.EQ.QCOL ) THEN DO 40 CL = 0, NPCOL - 1 IWORK( QTOT+CL ) = 0 40 CONTINUE DO 50 L = 0, LEND - 1 IWORK( QTOT+IWORK( INDCOL+L ) ) = IWORK( QTOT+ $ IWORK( INDCOL+L ) ) + 1 50 CONTINUE IWORK( PSQ ) = 1 DO 60 CL = 1, NPCOL - 1 IWORK( PSQ+CL ) = IWORK( PSQ+CL-1 ) + IWORK( QTOT+CL-1 ) 60 CONTINUE DO 70 L = 0, LEND - 1 CL = IWORK( INDCOL+L ) I = JQ + ND + L JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = IIQ + ( JJQ-1 )*LDQ IPWORK = IPW + ( IWORK( PSQ+CL )-1 )*NP CALL SCOPY( NP, Q( IPQ ), 1, WORK( IPWORK ), 1 ) IWORK( PSQ+CL ) = IWORK( PSQ+CL ) + 1 70 CONTINUE IWORK( PSQ ) = 1 DO 80 CL = 1, NPCOL - 1 IWORK( PSQ+CL ) = IWORK( PSQ+CL-1 ) + IWORK( QTOT+CL-1 ) 80 CONTINUE DO 90 L = 0, K - 1 I = IWORK( INDXG+L ) JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = IPQ2 + ( JJQ-1 )*NP IPWORK = IPW + ( IWORK( PSQ+MYCOL )-1 )*NP CALL SCOPY( NP, WORK( IPWORK ), 1, WORK( IPQ ), 1 ) IWORK( PSQ+MYCOL ) = IWORK( PSQ+MYCOL ) + 1 90 CONTINUE DO 100 CL = 1, NPCOL - 1 COL = MOD( MYCOL+CL, NPCOL ) SBUF = IWORK( QTOT+COL ) IF( SBUF.NE.0 ) THEN IPWORK = IPW + ( IWORK( PSQ+COL )-1 )*NP CALL SGESD2D( DESCQ( CTXT_ ), NP, SBUF, $ WORK( IPWORK ), NP, MYROW, COL ) END IF 100 CONTINUE * ELSE * IF( K.NE.0 ) THEN CALL SGERV2D( DESCQ( CTXT_ ), NP, K, WORK( IPW ), NP, $ MYROW, QCOL ) DO 110 L = 0, K - 1 I = JQ - 1 + IWORK( INDXG+L ) JJQ = INDXG2L( I, NB, DUMMY, DUMMY, NPCOL ) IPQ = 1 + ( JJQ-1 )*NP IPWORK = IPW + L*NP CALL SCOPY( NP, WORK( IPWORK ), 1, WORK( IPQ ), 1 ) 110 CONTINUE END IF END IF ND = ND + NB GO TO 20 END IF CALL SLACPY( 'Full', NP, NQ, WORK, NP, Q( IIQ ), LDQ ) * * End of PSLASRT * END scalapack-1.8.0/SRC/pdorgrq.f0000640000175000017500000002701310363532303015573 0ustar muammarmuammar SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGRQ generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1) H(2) . . . H(k) * * as returned by PDGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. * N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PDGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDLARFB, $ PDLARFT, PDLASET, PDORGR2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Set A(ia:in,ja+n-m+in-ia+1:ja-n+1) to zero. * CALL PDLASET( 'All', IN-IA+1, M-IN+IA-1, ZERO, ZERO, A, IA, $ JA+N-M+IN-IA+1, DESCA ) * * Use unblocked code for the first or only block. * CALL PDORGR2( IN-IA+1, N-M+IN-IA+1, IN-IA-M+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PDLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the right * CALL PDLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I-IA, N-M+I+IB-IA, IB, A, I, JA, DESCA, WORK, A, $ IA, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-m+i+ib-ia-1 of current block * CALL PDORGR2( IB, N-M+I+IB-IA, IB, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows i:i+ib-1,ja+n-m+i+ib-ia:ja+n-1 of current block to * zero * CALL PDLASET( 'All', IB, M-I-IB+IA, ZERO, ZERO, A, I, $ JA+N-M+I+IB-IA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGRQ * END scalapack-1.8.0/SRC/pzdbtrf.f0000640000175000017500000012616210363532303015575 0ustar muammarmuammar SUBROUTINE PZDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDBTRF computes a LU factorization * of an N-by-N complex banded * diagonally dominant-like distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PZDBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDBTRF and this is stored in AF. If a linear system * is to be solved using PZDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * max(bwl,bwu)*max(bwl,bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MAX_BW, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, UP_PREV_TRI_SIZE_M, $ UP_PREV_TRI_SIZE_N, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZAXPY, $ ZGEMM, ZGERV2D, ZGESD2D, ZLACPY, ZLATCPY, $ ZPBTRF, ZPOTRF, ZSYRK, ZTBTRS, ZTRMM, ZTRRV2D, $ ZTRSD2D, ZTRSM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) MBW2 = MAX_BW * MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -2 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PZDBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PZDBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = MAX(BWL,BWU)*MAX(BWL,BWU) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PZDBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Zero out space for work * DO 20 I=1, WORK_SIZE_MIN WORK( I ) = CZERO 20 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .GT. 0 ) THEN UP_PREV_TRI_SIZE_M= MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) UP_PREV_TRI_SIZE_N=MIN( BWU, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BWL, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL ZTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, $ A( OFST+(MY_NUM_COLS-BWL)*LLDA+(BWL+BWU+1) ), $ LLDA-1, 0, MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL ZDBTRF( ODD_SIZE, ODD_SIZE, BWL, BWU, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * * Apply factorization to lower connection block BL_i * conjugate transpose the connection block in preparation. * Apply factorization to upper connection block BU_i * Move the connection block in preparation. * CALL ZLATCPY( 'U', BWL, BWL, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), $ MAX_BW ) CALL ZLACPY( 'L', BWU, BWU, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW ) * * Perform the triangular system solve {L_i}{{BU'}_i} = {B_i} * CALL ZTBTRS( 'L', 'N', 'U', BWU, BWL, BWU, $ A( OFST+BWU+1+(ODD_SIZE-BWU )*LLDA ), LLDA, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, INFO ) * * Perform the triangular solve {U_i}^C{BL'}_i^C = {BL_i}^C * CALL ZTBTRS( 'U', 'C', 'N', BWL, BWU, BWL, $ A( OFST+1+(ODD_SIZE-BWL)*LLDA ), LLDA, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ INFO ) * * conjugate transpose resulting block to its location * in main storage. * CALL ZLATCPY( 'L', BWL, BWL, $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1 ) * * Move the resulting block back to its location in main storage. * CALL ZLACPY( 'L', BWU, BWU, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ), $ MAX_BW, A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL ZGEMM( 'C', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE , $ AF( ODD_SIZE*BWU+2*MBW2+1), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1), MAX_BW, CONE, $ A( OFST+ODD_SIZE*LLDA+1+BWU ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bwl, bwu)*bwl, bwu+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL ZTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( WORK_U+1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL ZTBTRS( 'L', 'N', 'U', ODD_SIZE, BWL, BWL, $ A( OFST + BWU+1 ), LLDA, AF( WORK_U+1 ), $ ODD_SIZE, INFO ) * * * Calculate the "spike" fillin, ${U_i}^C {{GL}_i}^C = {DU_i}^C$ * * * Copy D block into AF storage for solve. * CALL ZLATCPY( 'L', UP_PREV_TRI_SIZE_N, UP_PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * CALL ZTBTRS( 'U', 'C', 'N', ODD_SIZE, BWU, BWU, $ A( OFST + 1 ), LLDA, $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * * * Zero out space in case result is smaller than storage block * DO 30 I=1, MBW2 AF( ODD_SIZE*BWU+2*MBW2+I ) = CZERO 30 CONTINUE * CALL ZGEMM( 'C', 'N', BWU, BWL, ODD_SIZE, $ -CONE, AF( 1 ), ODD_SIZE, $ AF( WORK_U+1 ), ODD_SIZE, CZERO, $ AF( 1+MAX(0,BWL-BWU)+ODD_SIZE*BWU+ $ (2*MAX_BW+MAX(0,BWU-BWL))*MAX_BW), $ MAX_BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0, $ MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * * Copy matrix HU_i (the last bwl rows of GU_i) to AFL storage * as per requirements of BLAS routine ZTRMM. * Since we have GU_i stored, * conjugate transpose HU_i to HU_i^C. * CALL ZLATCPY( 'N', BWL, BWL, $ AF( WORK_U+ODD_SIZE-BWL+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ), $ MAX_BW ) * CALL ZTRMM( 'R', 'U', 'C', 'N', BWL, BWL, -CONE, $ A( ( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA ) ), $ LLDA-1, AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ), $ MAX_BW ) * * * Copy matrix HL_i (the last bwu rows of GL_i^C) to AFU store * as per requirements of BLAS routine ZTRMM. * Since we have GL_i^C stored, * conjugate transpose HL_i^C to HL_i. * CALL ZLATCPY( 'N', BWU, BWU, $ AF( ODD_SIZE-BWU+1 ), ODD_SIZE, $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * CALL ZTRMM( 'R', 'L', 'N', 'N', BWU, BWU, -CONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ), $ MAX_BW ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, MYCOL-1 ) * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL ZLACPY( 'N', MAX_BW, MAX_BW, $ A( OFST+ODD_SIZE*LLDA+BWU+1 ), $ LLDA-1, AF( ODD_SIZE*BWU+MBW2+1 ), $ MAX_BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL ZAXPY( MBW2, CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL ZDBTRF( MAX_BW, MAX_BW, MIN( MAX_BW-1, BWL ), $ MIN( MAX_BW-1, BWU ), AF( ODD_SIZE*BWU+MBW2+1 $ -( MIN( MAX_BW-1, BWU ))), MAX_BW+1, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL ZLACPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ), $ MAX_BW, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW ) * CALL ZLACPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+1 ), $ MAX_BW, 0, COMM_PROC ) * CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), $ MAX_BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL ZTBTRS( $ 'L', 'N', 'U', BWU, MIN( BWL, BWU-1 ), BWU, $ AF( ODD_SIZE*BWU+ $ MBW2+1+(MAX_BW+1)*(MAX_BW-BWU)), MAX_BW+1, $ AF( WORK_U+ODD_SIZE*BWL+1+MAX_BW-BWU), MAX_BW, INFO ) * * Modify lower off_diagonal block with diagonal block * * CALL ZTBTRS( $ 'U', 'C', 'N', BWL, MIN( BWU, BWL-1 ), BWL, $ AF( ODD_SIZE*BWU+ $ MBW2+1-MIN( BWU, BWL-1 )+(MAX_BW+1)*(MAX_BW-BWL)), MAX_BW+1, $ AF( ODD_SIZE*BWU+1+MAX_BW-BWL), MAX_BW, INFO ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), MAX_BW, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * * * Since ZTBTRS has no "left-right" option, we must transpose * CALL ZLATCPY( 'N', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+ $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW ) * CALL ZTBTRS( $ 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), BWL, $ AF( ODD_SIZE*BWU+MBW2+1), MAX_BW+1, $ WORK( 1+MAX_BW*(MAX_BW-BWL) ), MAX_BW, INFO ) * * Transpose back * CALL ZLATCPY( $ 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+ $ 2*MBW2+1), MAX_BW ) * * * * Since ZTBTRS has no "left-right" option, we must transpose * CALL ZLATCPY( 'N', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+ $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW ) * CALL ZTBTRS( $ 'U', 'C', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), BWU, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 )), MAX_BW+1, $ WORK( 1+MAX_BW*(MAX_BW-BWU) ), MAX_BW, INFO ) * * Transpose back * CALL ZLATCPY( $ 'N', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ AF( ODD_SIZE*BWU+ $ 2*MBW2+1), MAX_BW ) * * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'C', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+(ODD_SIZE)*BWL+2*MBW2+1 ), MAX_BW, $ CZERO, WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL ZGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ AF( ODD_SIZE*BWU+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, COMM_PROC ) * CALL ZGEMM( 'N', 'N', MAX_BW, MAX_BW, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, CZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PZDBTRF * END scalapack-1.8.0/SRC/psgeqpf.f0000640000175000017500000005051710363532303015567 0ustar muammarmuammar SUBROUTINE PSGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. INTEGER IA, JA, INFO, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQPF computes a QR factorization with column pivoting of a * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1): * * sub( A ) * P = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX(3,Mp0 + Nq0) + LOCc(JA+N-1)+Nq0. * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * LOCc(JA+N-1) = NUMROC( JA+N-1, NB_A, MYCOL, CSRC_A, NPCOL ) * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(1) H(2) . . . H(n) * * Each H(i) has the form * * H = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i-1:ia+m-1,ja+i-1). * * The matrix P is represented in jpvt as follows: If * jpvt(j) = i * then the jth column of P is the ith canonical unit vector. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, ICURROW, $ ICURCOL, II, IIA, IOFFA, IPN, IPCOL, IPW, $ IROFF, ITEMP, J, JB, JJ, JJA, JJPVT, JN, KB, $ K, KK, KSTART, KSTEP, LDA, LL, LWMIN, MN, MP, $ MYCOL, MYROW, NPCOL, NPROW, NQ, NQ0, PVT REAL AJJ, ALPHA, TEMP, TEMP2 * .. * .. Local Arrays .. INTEGER DESCN( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGERV2D, $ IGESD2D, INFOG1L, INFOG2L, PCHK1MAT, PSAMAX, $ PSELSET, PSLARF, PSLARFG, PSNRM2, $ PXERBLA, SCOPY, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D, SLARFG, SSWAP * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, IFIX, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NQ0 = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) LWMIN = MAX( 3, MP + NQ ) + NQ0 + NQ * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -10 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEQPF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF MN = MIN( M, N ) * * Initialize the array of pivots * LDA = DESCA( LLD_ ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) KSTEP = NPCOL * DESCA( NB_ ) * IF( MYCOL.EQ.IACOL ) THEN * * Handle first block separately * JB = JN - JA + 1 DO 10 LL = JJA, JJA+JB-1 IPIV( LL ) = JA + LL - JJA 10 CONTINUE KSTART = JN + KSTEP - DESCA( NB_ ) * * Loop over remaining block of columns * DO 30 KK = JJA+JB, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 20 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 20 CONTINUE KSTART = KSTART + KSTEP 30 CONTINUE ELSE KSTART = JN + ( MOD( MYCOL-IACOL+NPCOL, NPCOL )-1 )* $ DESCA( NB_ ) DO 50 KK = JJA, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 40 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 40 CONTINUE KSTART = KSTART + KSTEP 50 CONTINUE END IF * * Initialize partial column norms, handle first block separately * CALL DESCSET( DESCN, 1, DESCA( N_ ), 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), ICTXT, 1 ) * IPN = 1 IPW = IPN + NQ0 + NQ JJ = IPN + JJA - 1 IF( MYCOL.EQ.IACOL ) THEN DO 60 KK = 0, JB-1 CALL PSNRM2( M, WORK( JJ+KK ), A, IA, JA+KK, DESCA, 1 ) WORK( NQ+JJ+KK ) = WORK( JJ+KK ) 60 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining blocks of columns * DO 80 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 70 KK = 0, JB-1 CALL PSNRM2( M, WORK( JJ+KK ), A, IA, J+KK, DESCA, 1 ) WORK( NQ+JJ+KK ) = WORK( JJ+KK ) 70 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) 80 CONTINUE * * Compute factorization * DO 120 J = JA, JA+MN-1 I = IA + J - JA * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, ICURCOL ) K = JA + N - J IF( K.GT.1 ) THEN CALL PSAMAX( K, TEMP, PVT, WORK( IPN ), 1, J, DESCN, $ DESCN( M_ ) ) ELSE PVT = J END IF IF( J.NE.PVT ) THEN CALL INFOG1L( PVT, DESCA( NB_ ), NPCOL, MYCOL, $ DESCA( CSRC_ ), JJPVT, IPCOL ) IF( ICURCOL.EQ.IPCOL ) THEN IF( MYCOL.EQ.ICURCOL ) THEN CALL SSWAP( MP, A( IIA+(JJ-1)*LDA ), 1, $ A( IIA+(JJPVT-1)*LDA ), 1 ) ITEMP = IPIV( JJPVT ) IPIV( JJPVT ) = IPIV( JJ ) IPIV( JJ ) = ITEMP WORK( IPN+JJPVT-1 ) = WORK( IPN+JJ-1 ) WORK( IPN+NQ+JJPVT-1 ) = WORK( IPN+NQ+JJ-1 ) END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN * CALL SGESD2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) WORK( IPW ) = REAL( IPIV( JJ ) ) WORK( IPW+1 ) = WORK( IPN + JJ - 1 ) WORK( IPW+2 ) = WORK( IPN + NQ + JJ - 1 ) CALL SGESD2D( ICTXT, 3, 1, WORK( IPW ), 3, MYROW, $ IPCOL ) * CALL SGERV2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) CALL IGERV2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, $ IPCOL ) * ELSE IF( MYCOL.EQ.IPCOL ) THEN * CALL SGESD2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL IGESD2D( ICTXT, 1, 1, IPIV( JJPVT ), 1, MYROW, $ ICURCOL ) * CALL SGERV2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL SGERV2D( ICTXT, 3, 1, WORK( IPW ), 3, MYROW, $ ICURCOL ) IPIV( JJPVT ) = IFIX( WORK( IPW ) ) WORK( IPN+JJPVT-1 )= WORK( IPW+1 ) WORK( IPN+NQ+JJPVT-1 ) = WORK( IPW+2 ) * END IF * END IF * END IF * * Generate elementary reflector H(i) * CALL INFOG1L( I, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, ICURROW ) IF( DESCA( M_ ).EQ.1 ) THEN IF( MYROW.EQ.ICURROW ) THEN IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*DESCA( LLD_ ) AJJ = A( IOFFA ) CALL SLARFG( 1, AJJ, A( IOFFA ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1 ) CALL SSCAL( NQ-JJ, ALPHA, A( IOFFA+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ ), 1 ) A( IOFFA ) = AJJ ELSE IF( N.GT.1 ) THEN CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, ICURROW, ICURCOL ) CALL SSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1, ICURROW, ICURCOL ) END IF * ELSE * CALL PSLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(i) to A(ia+j-ja:ia+m-1,j+1:ja+n-1) from the left * CALL PSELSET( A, I, J, DESCA, ONE ) CALL PSLARF( 'Left', M-J+JA, JA+N-1-J, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK( IPW ) ) END IF CALL PSELSET( A, I, J, DESCA, AJJ ) * END IF * * Update partial columns norms * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 IF( MOD( J, DESCA( NB_ ) ).EQ.0 ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) IF( (JJA+NQ-JJ).GT.0 ) THEN IF( MYROW.EQ.ICURROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, JJA+NQ-JJ, $ A( II+( MIN( JJA+NQ-1, JJ )-1 )*LDA ), $ LDA ) CALL SCOPY( JJA+NQ-JJ, A( II+( MIN( JJA+NQ-1, JJ ) $ -1)*LDA ), LDA, WORK( IPW+MIN( JJA+NQ-1, $ JJ )-1 ), 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', JJA+NQ-JJ, 1, $ WORK( IPW+MIN( JJA+NQ-1, JJ )-1 ), $ MAX( 1, NQ ), ICURROW, MYCOL ) END IF END IF * JN = MIN( ICEIL( J+1, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA + N - 1 ) IF( MYCOL.EQ.ICURCOL ) THEN DO 90 LL = JJ-1, JJ + JN - J - 2 IF( WORK( IPN+LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( IPW+LL ) ) / $ WORK( IPN+LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05E+0*TEMP* $ ( WORK( IPN+LL ) / WORK( IPN+NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PSNRM2( IA+M-I-1, WORK( IPN+LL ), A, I+1, $ J+LL-JJ+2, DESCA, 1 ) WORK( IPN+NQ+LL ) = WORK( IPN+LL ) ELSE WORK( IPN+LL ) = ZERO WORK( IPN+NQ+LL ) = ZERO END IF ELSE WORK( IPN+LL ) = WORK( IPN+LL ) * SQRT( TEMP ) END IF END IF 90 CONTINUE JJ = JJ + JN - J END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 110 K = JN+1, JA+N-1, DESCA( NB_ ) KB = MIN( JA+N-K, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 100 LL = JJ-1, JJ+KB-2 IF( WORK( IPN+LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( IPW+LL ) ) / $ WORK( IPN+LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05E+0*TEMP* $ ( WORK( IPN+LL ) / WORK( IPN+NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PSNRM2( IA+M-I-1, WORK( IPN+LL ), A, $ I+1, K+LL-JJ+1, DESCA, 1 ) WORK( IPN+NQ+LL ) = WORK( IPN+LL ) ELSE WORK( IPN+LL ) = ZERO WORK( IPN+NQ+LL ) = ZERO END IF ELSE WORK( IPN+LL ) = WORK( IPN+LL ) * SQRT( TEMP ) END IF END IF 100 CONTINUE JJ = JJ + KB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * 110 CONTINUE * 120 CONTINUE * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEQPF * END scalapack-1.8.0/SRC/pclacgv.f0000640000175000017500000001540410363532303015535 0ustar muammarmuammar SUBROUTINE PCLACGV( N, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX X( * ) * .. * * Purpose * ======= * * PCLACGV conjugates a complex vector of length N, sub( X ), where * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = DESCX( M_ ) and * X(IX:IX+N-1,JX) if INCX = 1, and * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_X,*). * On entry the vector to be conjugated * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= N. * On exit the conjugated vector. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICOFFX, ICTXT, IIX, IOFFX, IROFFX, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFFX = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFFX * IF( NQ.GT.0 ) THEN IOFFX = IIX+(JJX-1)*LDX DO 10 I = 1, NQ X( IOFFX ) = CONJG( X( IOFFX ) ) IOFFX = IOFFX + LDX 10 CONTINUE END IF * ELSE IF( INCX.EQ.1 ) THEN * * sub( X ) is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFFX = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFFX * IF( NP.GT.0 ) THEN IOFFX = IIX+(JJX-1)*LDX DO 20 I = IOFFX, IOFFX+NP-1 X( I ) = CONJG( X( I ) ) 20 CONTINUE END IF * END IF * RETURN * * End of PCLACGV * END scalapack-1.8.0/SRC/pcheevd.f0000640000175000017500000003726210363532303015542 0ustar muammarmuammar SUBROUTINE PCHEEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 25, 2002 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) REAL RWORK( * ), W( * ) COMPLEX A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCHEEVD computes all the eigenvalues and eigenvectors of a Hermitian * matrix A by using a divide and conquer algorithm. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only; (NOT IMPLEMENTED YET) * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PCHEEVD cannot guarantee * correct error reporting. * * W (global output) REAL array, dimension (N) * If INFO=0, the eigenvalues in ascending order. * * Z (local output) COMPLEX array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * Z contains the orthonormal eigenvectors of the matrix A. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed for the * computation. * * LWORK (local input) INTEGER * If eigenvectors are requested: * LWORK = N + ( NP0 + MQ0 + NB ) * NB, * with NP0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine calculates the size for all * work arrays. Each of these values is returned in the first * entry of the corresponding work array, and no error message * is issued by PXERBLA. * * RWORK (local workspace/output) REAL array, * dimension (LRWORK) * On output RWORK(1) returns the real workspace needed to * guarantee completion. If the input parameters are incorrect, * RWORK(1) may also be incorrect. * * LRWORK (local input) INTEGER * Size of RWORK array. * LRWORK >= 1 + 9*N + 3*NP*NQ, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On output IWORK(1) returns the integer workspace needed. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in PSLAED3. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ), sub( Z ) must verify * some alignment properties, namely the following expression * should be true: * ( MB_A.EQ.NB_A.EQ.MB_Z.EQ.NB_Z .AND. IROFFA.EQ.ICOFFA .AND. * IROFFA.EQ.0 .AND.IROFFA.EQ.IROFFZ. AND. IAROW.EQ.IZROW) * with IROFFA = MOD( IA-1, MB_A ) * and ICOFFA = MOD( JA-1, NB_A ). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LOWER, LQUERY INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, IINFO, IIZ, $ INDD, INDE, INDE2, INDRWORK, INDTAU, INDWORK, $ INDZ, IPR, IPZ, IROFFA, IROFFZ, ISCALE, IZCOL, $ IZROW, J, JJZ, LDR, LDZ, LIWMIN, LLRWORK, $ LLWORK, LRWMIN, LWMIN, MB_A, MYCOL, MYROW, NB, $ NB_A, NN, NP0, NPCOL, NPROW, NQ, NQ0, OFFSET, $ RSRC_A REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCRZ( 9 ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC REAL PCLANHE, PSLAMCH EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PCLANHE, $ PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCINIT, INFOG2L, $ PCELGET, PCHETRD, PCHK2MAT, PCLASCL, PCLASET, $ PCUNMTR, PSLARED1D, PSLASET, PSSTEDC, PXERBLA, $ SSCAL * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN LOWER = LSAME( UPLO, 'L' ) NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( IA, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, MB_A, MYCOL, CSRC_A, NPCOL ) NP0 = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ0 = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) IROFFZ = MOD( IZ-1, MB_A ) CALL INFOG2L( IZ, JZ, DESCZ, NPROW, NPCOL, MYROW, MYCOL, $ IIZ, JJZ, IZROW, IZCOL ) LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * * Compute the total amount of space needed * NN = MAX( N, NB, 2 ) NQ = NUMROC( NN, NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+NQ+NB )*NB LRWMIN = 1 + 9*N + 3*NP0*NQ0 LIWMIN = 7*N + 8*NPCOL + 2 WORK( 1 ) = CMPLX( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) IWORK( 1 ) = LIWMIN IF( .NOT.LSAME( JOBZ, 'V' ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -16 ELSE IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( LOWER ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'U' ) END IF IDUM2( 1 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, $ JZ, DESCZ, 12, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCHEEVD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Get machine constants. * SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * * Set up pointers into the WORK array * INDTAU = 1 INDWORK = INDTAU + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDE = 1 INDD = INDE + N INDE2 = INDD + N INDRWORK = INDE2 + N LLRWORK = LRWORK - INDRWORK + 1 * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PCLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PCLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce Hermitian matrix to tridiagonal form. * CALL PCHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE2 ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet work with arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PSLARED1D( N, IA, JA, DESCA, RWORK( INDD ), W, $ RWORK( INDRWORK ), LLRWORK ) * CALL PSLARED1D( N, IA, JA, DESCA, RWORK( INDE2 ), $ RWORK( INDE ), RWORK( INDRWORK ), LLRWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PCELGET( 'A', ' ', WORK( INDWORK ), A, I+IA-1, I+JA-1, $ DESCA ) W( I ) = REAL( WORK( INDWORK ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDWORK ), A, I+IA-1, I+JA, $ DESCA ) RWORK( INDE+I-1 ) = REAL( WORK( INDWORK ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDWORK ), A, I+IA, I+JA-1, $ DESCA ) RWORK( INDE+I-1 ) = REAL( WORK( INDWORK ) ) 30 CONTINUE END IF END IF * * Call PSSTEDC to compute eigenvalues and eigenvectors. * INDZ = INDE + N INDRWORK = INDZ + NP0*NQ0 LLRWORK = LRWORK - INDRWORK + 1 LDR = MAX( 1, NP0 ) CALL DESCINIT( DESCRZ, DESCZ( M_ ), DESCZ( N_ ), DESCZ( MB_ ), $ DESCZ( NB_ ), DESCZ( RSRC_ ), DESCZ( CSRC_ ), $ DESCZ( CTXT_ ), LDR, INFO ) CALL PCLASET( 'Full', N, N, CZERO, CONE, Z, IZ, JZ, DESCZ ) CALL PSLASET( 'Full', N, N, ZERO, ONE, RWORK( INDZ ), 1, 1, $ DESCRZ ) CALL PSSTEDC( 'I', N, W, RWORK( INDE+OFFSET ), RWORK( INDZ ), IZ, $ JZ, DESCRZ, RWORK( INDRWORK ), LLRWORK, IWORK, $ LIWORK, IINFO ) * LDZ = DESCZ( LLD_ ) LDR = DESCRZ( LLD_ ) IIZ = INDXG2L( IZ, NB, MYROW, MYROW, NPROW ) JJZ = INDXG2L( JZ, NB, MYCOL, MYCOL, NPCOL ) IPZ = IIZ + ( JJZ-1 )*LDZ IPR = INDZ - 1 + IIZ + ( JJZ-1 )*LDR DO 50 J = 0, NQ0 - 1 DO 40 I = 0, NP0 - 1 Z( IPZ+I+J*LDZ ) = RWORK( IPR+I+J*LDR ) 40 CONTINUE 50 CONTINUE * * Z = Q * Z * CALL PCUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, WORK( INDWORK ), $ LLWORK, IINFO ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL SSCAL( N, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = CMPLX( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PCHEEVD * END scalapack-1.8.0/SRC/pdlawil.f0000640000175000017500000002355110363532303015554 0ustar muammarmuammar SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER II, JJ, M DOUBLE PRECISION H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), V( * ) * .. * * Purpose * ======= * * PDLAWIL gets the transform given by H44,H33, & H43H34 into V * starting at row M. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * II (global input) INTEGER * Row owner of H(M+2,M+2) * * JJ (global input) INTEGER * Column owner of H(M+2,M+2) * * M (global input) INTEGER * On entry, this is where the transform starts (row M.) * Unchanged on exit. * * A (global input) DOUBLE PRECISION array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * Unchanged on exit. * * H44 * H33 * H43H34 (global input) DOUBLE PRECISION * These three values are for the double shift QR iteration. * Unchanged on exit. * * V (global output) DOUBLE PRECISION array of size 3. * Contains the transform on ouput. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, ICOL, IROW, JSRC, LDA, LEFT, $ MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, $ RSRC, UP DOUBLE PRECISION H11, H12, H21, H22, H33S, H44S, S, V1, V2, V3 * .. * .. Local Arrays .. DOUBLE PRECISION BUF( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGERV2D, DGESD2D, INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * On node (II,JJ) collect all DIA,SUP,SUB info from M, M+1 * MODKM1 = MOD( M+1, HBL ) IF( MODKM1.EQ.0 ) THEN IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) CALL DGESD2D( CONTXT, 1, 1, BUF, 1, II, JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) BUF( 2 ) = A( ( ICOL-1 )*LDA+IROW+1 ) BUF( 3 ) = A( ICOL*LDA+IROW ) BUF( 4 ) = A( ICOL*LDA+IROW+1 ) CALL DGESD2D( CONTXT, 4, 1, BUF, 4, II, JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NPCOL.GT.1 ) THEN CALL DGERV2D( CONTXT, 1, 1, V3, 1, MYROW, LEFT ) ELSE V3 = A( ( ICOL-2 )*LDA+IROW ) END IF IF( NUM.GT.1 ) THEN CALL DGERV2D( CONTXT, 4, 1, BUF, 4, UP, LEFT ) H11 = BUF( 1 ) H21 = BUF( 2 ) H12 = BUF( 3 ) H22 = BUF( 4 ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) END IF END IF END IF IF( MODKM1.EQ.1 ) THEN IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) CALL DGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. ( NPROW.GT.1 ) ) $ THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL DGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL DGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NUM.GT.1 ) THEN CALL DGERV2D( CONTXT, 1, 1, H11, 1, UP, LEFT ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) END IF IF( NPROW.GT.1 ) THEN CALL DGERV2D( CONTXT, 1, 1, H12, 1, UP, MYCOL ) ELSE H12 = A( ( ICOL-2 )*LDA+IROW-2 ) END IF IF( NPCOL.GT.1 ) THEN CALL DGERV2D( CONTXT, 1, 1, H21, 1, MYROW, LEFT ) ELSE H21 = A( ( ICOL-3 )*LDA+IROW-1 ) END IF H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF END IF IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) $ RETURN * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF * H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 * RETURN * * End of PDLAWIL * END scalapack-1.8.0/SRC/pcgehrd.f0000640000175000017500000003556410363532303015543 0ustar muammarmuammar SUBROUTINE PCGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEHRD reduces a complex general distributed matrix sub( A ) * to upper Hessenberg form H by an unitary similarity transformation: * Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+ILO-2 * and JA+IHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*NB + NB*MAX( IHIP+1, IHLP+INLQ ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * ICOFFA = MOD( JA-1, NB ), IOFF = MOD( IA+ILO-2, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:I) = 0, v(I+1) = 1 and v(IHI+1:N) = 0; v(I+2:IHI) is stored on * exit in A(IA+ILO+I:IA+IHI-1,JA+ILO+I-2), and tau in TAU(JA+ILO+I-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follow- * ing example, with N = 7, ILO = 2 and IHI = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), H denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(JA+ILO+I-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, IB, ICOFFA, ICTXT, IHIP, $ IHLP, IIA, IINFO, ILCOL, ILROW, IMCOL, INLQ, $ IOFF, IPT, IPW, IPY, IROFFA, J, JJ, JJA, JY, $ K, L, LWMIN, MYCOL, MYROW, NB, NPCOL, NPROW, $ NQ COMPLEX EI * .. * .. Local Arrays .. INTEGER DESCY( DLEN_ ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG1L, $ INFOG2L, PCGEMM, PCGEHD2, PCHK1MAT, $ PCLAHRD, PCLARFB, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, NB ) ICOFFA = MOD( JA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ) IOFF = MOD( IA+ILO-2, NB ) ILROW = INDXG2P( IA+ILO-1, NB, MYROW, DESCA( RSRC_ ), $ NPROW ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ) ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWMIN = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = ILO IDUM2( 1 ) = 2 IDUM1( 2 ) = IHI IDUM2( 2 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEHRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set elements JA:JA+ILO-2 and JA+JHI-1:JA+N-2 of TAU to zero. * NQ = NUMROC( JA+N-2, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL INFOG1L( JA+ILO-2, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 10 J = JJA, MIN( JJ, NQ ) TAU( J ) = ZERO 10 CONTINUE * CALL INFOG1L( JA+IHI-1, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 20 J = JJ, NQ TAU( J ) = ZERO 20 CONTINUE * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPT = 1 IPY = IPT + NB * NB IPW = IPY + IHIP * NB CALL DESCSET( DESCY, IHI+IROFFA, NB, NB, NB, IAROW, ILCOL, ICTXT, $ MAX( 1, IHIP ) ) * K = ILO IB = NB - IOFF JY = IOFF + 1 * * Loop over remaining block of columns * DO 30 L = 1, IHI-ILO+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns j:j+ib-1 to Hessenberg form, returning the * matrices V and T of the block reflector H = I - V*T*V' * which performs the reduction, and also the matrix Y = A*V*T * CALL PCLAHRD( IHI, K, IB, A, IA, J, DESCA, TAU, WORK( IPT ), $ WORK( IPY ), 1, JY, DESCY, WORK( IPW ) ) * * Apply the block reflector H to A(ia:ia+ihi-1,j+ib:ja+ihi-1) * from the right, computing A := A - Y * V'. * V(i+ib,ib-1) must be set to 1. * CALL PCELSET2( EI, A, I+IB, J+IB-1, DESCA, ONE ) CALL PCGEMM( 'No transpose', 'Conjugate transpose', IHI, $ IHI-K-IB+1, IB, -ONE, WORK( IPY ), 1, JY, DESCY, $ A, I+IB, J, DESCA, ONE, A, IA, J+IB, DESCA ) CALL PCELSET( A, I+IB, J+IB-1, DESCA, EI ) * * Apply the block reflector H to A(i+1:ia+ihi-1,j+ib:ja+n-1) from * the left * CALL PCLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI-K, N-K-IB+1, IB, A, I+1, J, $ DESCA, WORK( IPT ), A, I+1, J+IB, DESCA, $ WORK( IPY ) ) * K = K + IB IB = NB JY = 1 DESCY( CSRC_ ) = MOD( DESCY( CSRC_ ) + 1, NPCOL ) * 30 CONTINUE * * Use unblocked code to reduce the rest of the matrix * CALL PCGEHD2( N, K, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEHRD * END scalapack-1.8.0/SRC/pdlantr.f0000640000175000017500000011062410363532303015562 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLANTR returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * trapezoidal or triangular distributed matrix sub( A ) denoting * A(IA:IA+M-1, JA:JA+N-1). * * PDLANTR returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with ia <= i <= ia+m-1, * ( and ja <= j <= ja+n-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PDLANTR as described * above. * * UPLO (global input) CHARACTER * Specifies whether the matrix sub( A ) is upper or lower * trapezoidal. * = 'U': Upper trapezoidal * = 'L': Lower trapezoidal * Note that sub( A ) is triangular instead of trapezoidal * if M = N. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) has * unit diagonal. * = 'N': Non-unit diagonal * = 'U': Unit diagonal * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PDLANTR is * set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PDLANTR is set to zero. N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL UDIAG INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, IOFFA, $ IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, MP, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, DLASSQ, $ INFOG2L, PDTREECOMB * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL LSAME, ICEIL, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * IF( UDIAG ) THEN VALUE = ONE ELSE VALUE = ZERO END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 20 LL = JJ, JJ + JB -1 DO 10 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE DO 40 LL = JJ, JJ + JB -1 DO 30 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE END IF ELSE DO 60 LL = JJ, JJ + JB -1 DO 50 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 80 LL = JJ, JJ + JB -1 DO 70 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 70 CONTINUE IOFFA = IOFFA + LDA 80 CONTINUE ELSE DO 100 LL = JJ, JJ + JB -1 DO 90 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 90 CONTINUE IOFFA = IOFFA + LDA 100 CONTINUE END IF ELSE DO 120 LL = JJ, JJ + JB -1 DO 110 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 110 CONTINUE IOFFA = IOFFA + LDA 120 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 130 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 150 LL = JJ, JJ + JB -1 DO 140 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE ELSE DO 170 LL = JJ, JJ + JB -1 DO 160 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 160 CONTINUE IOFFA = IOFFA + LDA 170 CONTINUE END IF ELSE DO 190 LL = JJ, JJ + JB -1 DO 180 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 180 CONTINUE IOFFA = IOFFA + LDA 190 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 210 LL = JJ, JJ + JB -1 DO 200 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 200 CONTINUE IOFFA = IOFFA + LDA 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 DO 220 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE END IF ELSE DO 250 LL = JJ, JJ + JB -1 DO 240 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 260 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 280 LL = JJ, JJ + JB -1 SUM = ONE DO 270 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 270 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 SUM = ZERO DO 290 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 290 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 300 CONTINUE END IF ELSE DO 320 LL = JJ, JJ + JB -1 SUM = ZERO DO 310 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 310 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 320 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 390 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 340 LL = JJ, JJ + JB -1 SUM = ONE DO 330 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 330 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 340 CONTINUE ELSE DO 360 LL = JJ, JJ + JB -1 SUM = ZERO DO 350 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 360 CONTINUE END IF ELSE DO 380 LL = JJ, JJ + JB -1 SUM = ZERO DO 370 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 380 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 410 LL = JJ, JJ + JB -1 SUM = ONE DO 400 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 400 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 410 CONTINUE ELSE DO 430 LL = JJ, JJ + JB -1 SUM = ZERO DO 420 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 420 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 430 CONTINUE END IF ELSE DO 450 LL = JJ, JJ + JB -1 SUM = ZERO DO 440 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 440 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 450 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 520 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 470 LL = JJ, JJ + JB -1 SUM = ONE DO 460 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 460 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 470 CONTINUE ELSE DO 490 LL = JJ, JJ + JB -1 SUM = ZERO DO 480 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 480 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 490 CONTINUE END IF ELSE DO 510 LL = JJ, JJ + JB -1 SUM = ZERO DO 500 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 500 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 510 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 520 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * IF( LSAME( UPLO, 'U' ) ) THEN IF( UDIAG ) THEN DO 530 KK = IIA, IIA+MP-1 WORK( KK ) = ONE 530 CONTINUE ELSE DO 540 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 540 CONTINUE END IF ELSE IF( UDIAG ) THEN NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF DO 550 KK = IIA, IIA+NP-1 WORK( KK ) = ONE 550 CONTINUE DO 560 KK = IIA+NP, IIA+MP-1 WORK( KK ) = ZERO 560 CONTINUE ELSE DO 570 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 570 CONTINUE END IF END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 590 LL = JJ, JJ + JB -1 DO 580 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 580 CONTINUE IOFFA = IOFFA + LDA 590 CONTINUE ELSE DO 610 LL = JJ, JJ + JB -1 DO 600 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 600 CONTINUE IOFFA = IOFFA + LDA 610 CONTINUE END IF ELSE DO 630 LL = JJ, JJ + JB -1 DO 620 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 620 CONTINUE IOFFA = IOFFA + LDA 630 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 700 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 650 LL = JJ, JJ + JB -1 DO 640 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 640 CONTINUE IOFFA = IOFFA + LDA 650 CONTINUE ELSE DO 670 LL = JJ, JJ + JB -1 DO 660 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 660 CONTINUE IOFFA = IOFFA + LDA 670 CONTINUE END IF ELSE DO 690 LL = JJ, JJ + JB -1 DO 680 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 680 CONTINUE IOFFA = IOFFA + LDA 690 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 700 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 720 LL = JJ, JJ + JB -1 DO 710 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 710 CONTINUE IOFFA = IOFFA + LDA 720 CONTINUE ELSE DO 740 LL = JJ, JJ + JB -1 DO 730 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 730 CONTINUE IOFFA = IOFFA + LDA 740 CONTINUE END IF ELSE DO 760 LL = JJ, JJ + JB -1 DO 750 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 750 CONTINUE IOFFA = IOFFA + LDA 760 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 830 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 780 LL = JJ, JJ + JB -1 DO 770 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 770 CONTINUE IOFFA = IOFFA + LDA 780 CONTINUE ELSE DO 800 LL = JJ, JJ + JB -1 DO 790 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 790 CONTINUE IOFFA = IOFFA + LDA 800 CONTINUE END IF ELSE DO 820 LL = JJ, JJ + JB -1 DO 810 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 810 CONTINUE IOFFA = IOFFA + LDA 820 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 830 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( IDAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * IF( UDIAG ) THEN SCALE = ONE SUM = DBLE( MIN( M, N ) ) / DBLE( NPROW*NPCOL ) ELSE SCALE = ZERO SUM = ONE END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 840 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 840 CONTINUE ELSE DO 850 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 850 CONTINUE END IF ELSE DO 860 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 860 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 900 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 870 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 870 CONTINUE ELSE DO 880 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II+LL-JJ, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 880 CONTINUE END IF ELSE DO 890 LL = JJ, JJ + JB -1 CALL DLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 890 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 900 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 910 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 910 CONTINUE ELSE DO 920 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 920 CONTINUE END IF ELSE DO 930 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 930 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 970 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 940 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 940 CONTINUE ELSE DO 950 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 950 CONTINUE END IF ELSE DO 960 LL = JJ, JJ + JB -1 CALL DLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 960 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 970 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to every process in the grid. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PDLANTR = VALUE * RETURN * * End of PDLANTR * END scalapack-1.8.0/SRC/pcgesv.f0000640000175000017500000002311010367447133015410 0ustar muammarmuammar SUBROUTINE PCGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * Jan 30, 2006 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCGESV computes the solution to a complex system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed * matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS * distributed matrices. * * The LU decomposition with partial pivoting and row interchanges is * used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu- * tation matrix, L is unit lower triangular, and U is upper triangular. * L and U are stored in sub( A ). The factored form of sub( A ) is then * used to solve the system of equations sub( A ) * X = sub( B ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the N-by-N distributed matrix * sub( A ) to be factored. On exit, this array contains the * local pieces of the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand side * distributed matrix sub( B ). On exit, if INFO = 0, sub( B ) * is overwritten by the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, so the solution could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCGETRF, $ PCGETRS, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P EXTERNAL INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( N, 1, NRHS, 2, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( IBROW.NE.IAROW .OR. ICOFFA.NE.IROFFB ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1100+CTXT_) END IF END IF CALL PCHK2MAT( N, 1, N, 1, IA, JA, DESCA, 6, N, 1, NRHS, 2, $ IB, JB, DESCB, 11, 0, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGESV', -INFO ) RETURN END IF * * Compute the LU factorization of sub( A ). * CALL PCGETRF( N, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ), overwriting sub( B ) * with X. * CALL PCGETRS( 'No transpose', N, NRHS, A, IA, JA, DESCA, IPIV, $ B, IB, JB, DESCB, INFO ) * END IF * RETURN * * End of PCGESV * END scalapack-1.8.0/SRC/pcgeqrf.f0000640000175000017500000002751710363532303015555 0ustar muammarmuammar SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQRF computes a QR factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1), and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IINFO, IPW, J, $ JB, JN, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCGEQR2, $ PCLARFB, PCLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JB = JN - JA + 1 * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) * CALL PCGEQR2( M, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( JA+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ja) H(ja+1) . . . H(jn) * CALL PCLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PCLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', M, N-JB, JB, A, IA, JA, DESCA, $ WORK, A, IA, JA+JB, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of columns * DO 10 J = JN+1, JA+K-1, DESCA( NB_ ) JB = MIN( K-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) * CALL PCGEQR2( M-J+JA, JB, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PCLARFT( 'Forward', 'Columnwise', M-J+JA, JB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PCLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', M-J+JA, N-J-JB+JA, JB, A, I, J, $ DESCA, WORK, A, I, J+JB, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEQRF * END scalapack-1.8.0/SRC/pzggrqf.f0000640000175000017500000003633610363532303015605 0ustar muammarmuammar SUBROUTINE PZGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PZGGRQF computes a generalized RQ factorization of * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1): * * sub( A ) = R*Q, sub( B ) = Z*T*Q, * * where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, * and R and T assume one of the forms: * * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, * N-M M ( R21 ) N * N * * where R12 or R21 is upper triangular, and * * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, * ( 0 ) P-N P N-P * N * * where T11 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GRQ * factorization of sub( A ) and sub( B ) implicitly gives the RQ * factorization of sub( A )*inv( sub( B ) ): * * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z' * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the conjugate transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( B ). P >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and sub( B ). * N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAUA, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUA is * tied to the distributed matrix A (see Further Details). * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, the local pieces of the P-by-N distributed matrix * sub( B ) which is to be factored. On exit, the elements on * and above the diagonal of sub( B ) contain the min(P,N) by N * upper trapezoidal matrix T (T is upper triangular if P >= N); * the elements below the diagonal, with the array TAUB, * represent the unitary matrix Z as a product of elementary * reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) COMPLEX*16, array, dimension * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors * TAUB of the elementary reflectors which represent the unitary * matrix Z. TAUB is tied to the distributed matrix B (see * Further Details). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ), * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) + * MB_A * MB_A, * NB_B * ( PpB0 + NqB0 + NB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1). * To form Q explicitly, use ScaLAPACK subroutine PZUNGRQ. * To use Q to update another matrix, use ScaLAPACK subroutine PZUNMRQ. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1). * To form Z explicitly, use ScaLAPACK subroutine PZUNGQR. * To use Z to update another matrix, use ScaLAPACK subroutine PZUNMQR. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0, NQB0, PPB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZGEQRF, PZGERQF, PZUNMRQ * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, INT, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( M, 1, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( P, 2, N, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) PPB0 = NUMROC( P+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NQB0 = NUMROC( N+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ), $ MAX( MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( PPB0 + NQB0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ), $ DESCB( NB_ ) * ( PPB0 + NQB0 + DESCB( NB_ ) ) ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IACOL.NE.IBCOL .OR. ICOFFA.NE.ICOFFB ) THEN INFO = -11 ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -1204 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( M, 1, N, 3, IA, JA, DESCA, 7, P, 2, N, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGGRQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q * CALL PZGERQF( M, N, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := sub( B )*Q' * CALL PZUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), $ A, MAX( IA, IA+M-N ), JA, DESCA, TAUA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) LWMIN = MAX( LWMIN, INT( WORK( 1 ) ) ) * * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T * CALL PZGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DCMPLX( DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) ) * RETURN * * End of PZGGRQF * END scalapack-1.8.0/SRC/pcgelq2.f0000640000175000017500000002507110363532303015454 0ustar muammarmuammar SUBROUTINE PCGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGELQ2 computes a LQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1)' H(ia+k-2)' . . . H(ia)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in * A(ia+i-1,ja+i:ja+n-1), and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ COMPLEX AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLACGV, PCLARF, PCLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * K = MIN( M, N ) DO 10 I = IA, IA+K-1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i,j+1:ja+n-1) * CALL PCLACGV( N-J+JA, A, I, J, DESCA, DESCA( M_ ) ) CALL PCLARFG( N-J+JA, AII, I, J, A, I, MIN( J+1, JA+N-1 ), $ DESCA, DESCA( M_ ), TAU ) * IF( I.LT.IA+M-1 ) THEN * * Apply H(i) to A(i+1:ia+m-1,j:ja+n-1) from the right * CALL PCELSET( A, I, J, DESCA, ONE ) CALL PCLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PCELSET( A, I, J, DESCA, AII ) CALL PCLACGV( N-J+JA, A, I, J, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGELQ2 * END scalapack-1.8.0/SRC/pslabrd.f0000640000175000017500000005264110363532303015551 0ustar muammarmuammar SUBROUTINE PSLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL A( * ), D( * ), E( * ), TAUP( * ), $ TAUQ( * ), X( * ), Y( * ), WORK( * ) * .. * * Purpose * ======= * * PSLABRD reduces the first NB rows and columns of a real general * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper * or lower bidiagonal form by an orthogonal transformation Q' * A * P, * and returns the matrices X and Y which are needed to apply the * transformation to the unreduced part of sub( A ). * * If M >= N, sub( A ) is reduced to upper bidiagonal form; if M < N, to * lower bidiagonal form. * * This is an auxiliary routine called by PSGEBRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of leading rows and columns of sub( A ) to be * reduced. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ) to be reduced. On exit, * the first NB rows and columns of the matrix are overwritten; * the rest of the distributed matrix sub( A ) is unchanged. * If m >= n, elements on and below the diagonal in the first NB * columns, with the array TAUQ, represent the orthogonal * matrix Q as a product of elementary reflectors; and * elements above the diagonal in the first NB rows, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. * If m < n, elements below the diagonal in the first NB * columns, with the array TAUQ, represent the orthogonal * matrix Q as a product of elementary reflectors, and * elements on and above the diagonal in the first NB rows, * with the array TAUP, represent the orthogonal matrix P as * a product of elementary reflectors. * See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(ia+i-1,ja+i-1). D is tied to the distributed * matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(ia+i-1,ja+i) for i = 1,2,...,n-1; * if m < n, E(i) = A(ia+i,ja+i-1) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) REAL array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * X (local output) REAL pointer into the local memory * to an array of dimension (LLD_X,NB). On exit, the local * pieces of the distributed M-by-NB matrix * X(IX:IX+M-1,JX:JX+NB-1) required to update the unreduced * part of sub( A ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * Y (local output) REAL pointer into the local memory * to an array of dimension (LLD_Y,NB). On exit, the local * pieces of the distributed N-by-NB matrix * Y(IY:IY+N-1,JY:JY+NB-1) required to update the unreduced * part of sub( A ). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= NB_A + NQ, with * * NQ = NUMROC( N+MOD( IA-1, NB_Y ), NB_Y, MYCOL, IACOL, NPCOL ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors. * * If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); u(1:i-1) = 0, u(i) = 1, and u(i:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The elements of the vectors v and u together form the m-by-nb matrix * V and the nb-by-n matrix U' which are needed, with X and Y, to apply * the transformation to the unreduced part of the matrix, using a block * update of the form: sub( A ) := sub( A ) - V*Y' - X*U'. * * The contents of sub( A ) on exit are illustrated by the following * examples with nb = 2: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) * ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) * ( v1 v2 a a a ) ( v1 1 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix which is unchanged, * vi denotes an element of the vector defining H(i), and ui an element * of the vector defining G(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, IPY, IW, J, JJ, $ JWY, K, MYCOL, MYROW, NPCOL, NPROW REAL ALPHA, TAU INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), $ DESCTP( DLEN_ ), DESCTQ( DLEN_ ), $ DESCW( DLEN_ ), DESCWY( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSCOPY, $ PSELGET, PSELSET, PSGEMV, PSLARFG, $ PSSCAL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 IW = MOD( IA-1, DESCA( NB_ ) ) + 1 ALPHA = ZERO * CALL DESCSET( DESCWY, 1, N+MOD( IA-1, DESCY( NB_ ) ), 1, $ DESCA( NB_ ), IAROW, IACOL, ICTXT, 1 ) CALL DESCSET( DESCW, DESCA( MB_ ), 1, DESCA( MB_ ), 1, IAROW, $ IACOL, ICTXT, DESCA( MB_ ) ) CALL DESCSET( DESCTQ, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), IAROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCTP, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), IACOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i:ia+m-1,j) * IF( K.GT.1 ) THEN CALL PSGEMV( 'No transpose', M-K+1, K-1, -ONE, A, I, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I, $ J, DESCA, 1 ) CALL PSGEMV( 'No transpose', M-K+1, K-1, -ONE, X, IX+K-1, $ JX, DESCX, A, IA, J, DESCA, 1, ONE, A, I, J, $ DESCA, 1 ) CALL PSELSET( A, I-1, J, DESCA, ALPHA ) END IF * * Generate reflection Q(i) to annihilate A(i+1:ia+m-1,j) * CALL PSLARFG( M-K+1, ALPHA, I, J, A, I+1, J, DESCA, 1, $ TAUQ ) CALL PSELSET( D, 1, J, DESCD, ALPHA ) CALL PSELSET( A, I, J, DESCA, ONE ) * * Compute Y(IA+I:IA+N-1,J) * CALL PSGEMV( 'Transpose', M-K+1, N-K, ONE, A, I, J+1, DESCA, $ A, I, J, DESCA, 1, ZERO, WORK( IPY ), 1, JWY, $ DESCWY, DESCWY( M_ ) ) CALL PSGEMV( 'Transpose', M-K+1, K-1, ONE, A, I, JA, DESCA, $ A, I, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PSGEMV( 'Transpose', K-1, N-K, -ONE, Y, IY, JY+K, $ DESCY, WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), $ 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PSGEMV( 'Transpose', M-K+1, K-1, ONE, X, IX+K-1, JX, $ DESCX, A, I, J, DESCA, 1, ZERO, WORK, IW, 1, $ DESCW, 1 ) CALL PSGEMV( 'Transpose', K-1, N-K, -ONE, A, IA, J+1, DESCA, $ WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) * CALL PSELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PSSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PSCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) * * Update A(i,j+1:ja+n-1) * CALL PSGEMV( 'Transpose', K, N-K, -ONE, Y, IY, JY+K, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, I, J+1, $ DESCA, DESCA( M_ ) ) CALL PSGEMV( 'Transpose', K-1, N-K, -ONE, A, IA, J+1, DESCA, $ X, IX+K-1, JX, DESCX, DESCX( M_ ), ONE, A, I, $ J+1, DESCA, DESCA( M_ ) ) CALL PSELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection P(i) to annihilate A(i,j+2:ja+n-1) * CALL PSLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, N+JA-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PSELSET( E, I, 1, DESCE, ALPHA ) CALL PSELSET( A, I, J+1, DESCA, ONE ) * * Compute X(I+1:IA+M-1,J) * CALL PSGEMV( 'No transpose', M-K, N-K, ONE, A, I+1, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PSGEMV( 'No transpose', K, N-K, ONE, Y, IY, JY+K, $ DESCY, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PSGEMV( 'No transpose', M-K, K, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PSGEMV( 'No transpose', K-1, N-K, ONE, A, IA, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PSGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PSELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PSSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i,j:ja+n-1) * IF( K.GT.1 ) THEN CALL PSGEMV( 'Transpose', K-1, N-K+1, -ONE, Y, IY, $ JY+K-1, DESCY, A, I, JA, DESCA, DESCA( M_ ), $ ONE, A, I, J, DESCA, DESCA( M_ ) ) CALL PSGEMV( 'Transpose', K-1, N-K+1, -ONE, A, IA, J, $ DESCA, X, IX+K-1, JX, DESCX, DESCX( M_ ), $ ONE, A, I, J, DESCA, DESCA( M_ ) ) CALL PSELSET( A, I, J-1, DESCA, ALPHA ) END IF * * Generate reflection P(i) to annihilate A(i,j+1:ja+n-1) * CALL PSLARFG( N-K+1, ALPHA, I, J, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP ) CALL PSELSET( D, I, 1, DESCD, ALPHA ) CALL PSELSET( A, I, J, DESCA, ONE ) * * Compute X(i+1:ia+m-1,j) * CALL PSGEMV( 'No transpose', M-K, N-K+1, ONE, A, I+1, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PSGEMV( 'No transpose', K-1, N-K+1, ONE, Y, IY, JY+K-1, $ DESCY, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PSGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PSGEMV( 'No transpose', K-1, N-K+1, ONE, A, IA, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PSGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PSELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PSSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) * * Update A(i+1:ia+m-1,j) * CALL PSGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PSGEMV( 'No transpose', M-K, K, -ONE, X, IX+K, JX, $ DESCX, A, IA, J, DESCA, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PSELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection Q(i) to annihilate A(i+2:ia+m-1,j) * CALL PSLARFG( M-K, ALPHA, I+1, J, A, MIN( I+2, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PSELSET( E, 1, J, DESCE, ALPHA ) CALL PSELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(ia+i:ia+n-1,j) * CALL PSGEMV( 'Transpose', M-K, N-K, ONE, A, I+1, J+1, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) CALL PSGEMV( 'Transpose', M-K, K-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PSGEMV( 'Transpose', K-1, N-K, -ONE, Y, IY, JY+K, $ DESCY, WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), $ 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PSGEMV( 'Transpose', M-K, K, ONE, X, IX+K, JX, DESCX, $ A, I+1, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PSGEMV( 'Transpose', K, N-K, -ONE, A, IA, J+1, DESCA, $ WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) * CALL PSELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PSSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PSCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) 20 CONTINUE END IF * RETURN * * End of PSLABRD * END scalapack-1.8.0/SRC/pzgerq2.f0000640000175000017500000002517410363532303015515 0ustar muammarmuammar SUBROUTINE PZGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGERQ2 computes a RQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ COMPLEX*16 AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLACGV, PZLARF, PZLARFG * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * K = MIN( M, N ) DO 10 I = IA+K-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i+m-k,ja:j+n-k-1) * CALL PZLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) ) CALL PZLARFG( N-K+J-JA+1, AII, I+M-K, J+N-K, A, I+M-K, JA, $ DESCA, DESCA( M_ ), TAU ) * * Apply H(i) to A(ia:i+m-k-1,ja:j+n-k) from the right * CALL PZELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PZLARF( 'Right', M-K+I-IA, N-K+J-JA+1, A, M-K+I, JA, $ DESCA, DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) CALL PZELSET( A, I+M-K, J+N-K, DESCA, AII ) CALL PZLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGERQ2 * END scalapack-1.8.0/SRC/pcunmqr.f0000640000175000017500000004207210363532303015604 0ustar muammarmuammar SUBROUTINE PCUNMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMQR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PCGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PCGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, $ IPW, IROFFA, IROFFC, J, J1, J2, J3, JB, JCC, $ LCM, LCMQ, LWMIN, MI, MPC0, MYCOL, MYROW, NI, $ NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCLARFB, $ PCLARFT, PCUNM2R, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PCUNM2R( SIDE, TRANS, M, N, J1-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PCLARFT( 'Forward', 'Columnwise', NQ-J+JA, JB, A, $ IA+J-JA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H or H' * CALL PCLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, $ JB, A, IA+J-JA, J, DESCA, WORK, C, ICC, JCC, $ DESCC, WORK( IPW ) ) 10 CONTINUE * * Use unblocked code for the last block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PCUNM2R( SIDE, TRANS, M, N, J2-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMQR * END scalapack-1.8.0/SRC/pslauum.f0000640000175000017500000002063410363532303015605 0ustar muammarmuammar SUBROUTINE PSLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLAUUM computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the blocked form of the algorithm, calling Level 3 PBLAS. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the * distributed matrix sub( A ) is upper or lower triangular: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the triangular factor U or L. N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J, JB, JN * .. * .. External Subroutines .. EXTERNAL PSGEMM, PSLAUU2, PSTRMM, PSSYRK * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * * Handle first block separately * JB = JN-JA+1 CALL PSLAUU2( 'Upper', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PSSYRK( 'Upper', 'No transpose', JB, N-JB, ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PSTRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', $ J-JA, JB, ONE, A, I, J, DESCA, A, IA, J, $ DESCA ) CALL PSLAUU2( 'Upper', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PSGEMM( 'No transpose', 'Transpose', J-JA, JB, $ N-J-JB+JA, ONE, A, IA, J+JB, DESCA, A, I, $ J+JB, DESCA, ONE, A, IA, J, DESCA ) CALL PSSYRK( 'Upper', 'No transpose', JB, N-J-JB+JA, ONE, $ A, I, J+JB, DESCA, ONE, A, I, J, DESCA ) END IF 10 CONTINUE ELSE * * Compute the product L' * L. * * Handle first block separately * JB = JN-JA+1 CALL PSLAUU2( 'Lower', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PSSYRK( 'Lower', 'Transpose', JB, N-JB, ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PSTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', JB, $ J-JA, ONE, A, I, J, DESCA, A, I, JA, DESCA ) CALL PSLAUU2( 'Lower', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PSGEMM( 'Transpose', 'No transpose', JB, J-JA, $ N-J-JB+JA, ONE, A, I+JB, J, DESCA, A, I+JB, $ JA, DESCA, ONE, A, I, JA, DESCA ) CALL PSSYRK( 'Lower', 'Transpose', JB, N-J-JB+JA, ONE, $ A, I+JB, J, DESCA, ONE, A, I, J, DESCA ) END IF 20 CONTINUE END IF * RETURN * * End of PSLAUUM * END scalapack-1.8.0/SRC/dlamsh.f0000640000175000017500000002123710363532303015367 0ustar muammarmuammar SUBROUTINE DLAMSH ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDS, NBULGE, JBLK, LDH, N DOUBLE PRECISION ULP * .. * .. Array Arguments .. DOUBLE PRECISION S(LDS,*), H(LDH,*) * .. * * Purpose * ======= * * DLAMSH sends multiple shifts through a small (single node) matrix to * see how consecutive small subdiagonal elements are modified by * subsequent shifts in an effort to maximize the number of bulges * that can be sent through. * DLAMSH should only be called when there are multiple shifts/bulges * (NBULGE > 1) and the first shift is starting in the middle of an * unreduced Hessenberg matrix because of two or more consecutive small * subdiagonal elements. * * Arguments * ========= * * S (local input/output) DOUBLE PRECISION array, (LDS,*) * On entry, the matrix of shifts. Only the 2x2 diagonal of S is * referenced. It is assumed that S has JBLK double shifts * (size 2). * On exit, the data is rearranged in the best order for * applying. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * 1 < NBULGE <= JBLK <= LDS/2 * * NBULGE (local input/output) INTEGER * On entry, the number of bulges to send through H ( >1 ). * NBULGE should be less than the maximum determined (JBLK). * 1 < NBULGE <= JBLK <= LDS/2 * On exit, the maximum number of bulges that can be sent * through. * * JBLK (local input) INTEGER * On entry, the number of shifts determined for S. * Unchanged on exit. * * H (local input/output) DOUBLE PRECISION array (LDH,N) * On entry, the local matrix to apply the shifts on. * H should be aligned so that the starting row is 2. * On exit, the data is destroyed. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * * N (local input) INTEGER * On entry, the size of H. If all the bulges are expected to * go through, N should be at least 4*NBULGE+2. * Otherwise, NBULGE may be reduced by this routine. * * ULP (local input) DOUBLE PRECISION * On entry, machine precision * Unchanged on exit. * * Implemented by: G. Henry, May 1, 1997 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, TEN PARAMETER ( ZERO = 0.0D+0, TEN = 10.0D+0 ) * .. * .. Local Scalars .. INTEGER K, IBULGE, M, NR, J, IVAL, I DOUBLE PRECISION H44, H33, H43H34, H11, H22, H21, H12, H44S, $ H33S, V1, V2, V3, H00, H10, TST1, T1, T2, T3, $ SUM, S1, DVAL * .. * .. Local Arrays .. DOUBLE PRECISION V(3) * .. * .. External Subroutines .. EXTERNAL DLARFG, DCOPY * .. * .. Intrinsic Functions .. INTRINSIC MAX, ABS * .. * .. Executable Statements .. * M = 2 DO 10 IBULGE = 1, NBULGE H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)* $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.ULP*TST1 ) THEN * Find minimum DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1) IVAL = IBULGE DO 15 I = IBULGE+1, NBULGE H44 = S(2*JBLK-2*I+2, 2*JBLK-2*I+2) H33 = S(2*JBLK-2*I+1,2*JBLK-2*I+1) H43H34 = S(2*JBLK-2*I+1,2*JBLK-2*I+2)* $ S(2*JBLK-2*I+2, 2*JBLK-2*I+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF ( (DVAL.GT.(ABS(H10)*(ABS(V2)+ABS(V3)))/(ULP*TST1)) $ .AND. ( DVAL .GT. 1.D0 ) ) THEN DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1) IVAL = I END IF 15 CONTINUE IF ( (DVAL .LT. TEN) .AND. (IVAL .NE. IBULGE) ) THEN H44 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2) H33 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) H43H34 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) H10 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) S(2*JBLK-2*IVAL+2,2*JBLK-2*IVAL+2) = $ S(2*JBLK-2*IBULGE+2,2*JBLK-2*IBULGE+2) S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) = $ S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) = $ S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) = $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) = H44 S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) = H33 S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) = H43H34 S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) = H10 END IF H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)* $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) END IF IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.TEN*ULP*TST1 ) THEN * IBULGE better not be 1 here or we have a bug! NBULGE = MAX(IBULGE -1,1) RETURN END IF DO 120 K = M, N - 1 NR = MIN( 3, N-K+1 ) IF( K.GT.M ) $ CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL DLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.N-1 ) $ H( K+2, K-1 ) = ZERO ELSE H( K, K-1 ) = -H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 DO 60 J = K, N SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) H( K, J ) = H( K, J ) - SUM*T1 H( K+1, J ) = H( K+1, J ) - SUM*T2 H( K+2, J ) = H( K+2, J ) - SUM*T3 60 CONTINUE DO 70 J = 1, MIN( K+3, N ) SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM*T1 H( J, K+1 ) = H( J, K+1 ) - SUM*T2 H( J, K+2 ) = H( J, K+2 ) - SUM*T3 70 CONTINUE END IF 120 CONTINUE 10 CONTINUE * RETURN END scalapack-1.8.0/SRC/pdlaqge.f0000640000175000017500000002336610363532303015541 0ustar muammarmuammar SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N DOUBLE PRECISION AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), C( * ), R( * ) * .. * * Purpose * ======= * * PDLAQGE equilibrates a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using the row and scaling * factors in the vectors R and C. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)) * containing on entry the M-by-N matrix sub( A ). On exit, * the equilibrated distributed matrix. See EQUED for the * form of the equilibrated distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local input) DOUBLE PRECISION array, dimension LOCr(M_A) * The row scale factors for sub( A ). R is aligned with the * distributed matrix A, and replicated across every process * column. R is tied to the distributed matrix A. * * C (local input) DOUBLE PRECISION array, dimension LOCc(N_A) * The column scale factors of sub( A ). C is aligned with the * distributed matrix A, and replicated down every process * row. C is tied to the distributed matrix A. * * ROWCND (global input) DOUBLE PRECISION * The global ratio of the smallest R(i) to the largest R(i), * IA <= i <= IA+M-1. * * COLCND (global input) DOUBLE PRECISION * The global ratio of the smallest C(i) to the largest C(i), * JA <= j <= JA+N-1. * * AMAX (global input) DOUBLE PRECISION * Absolute value of largest distributed submatrix entry. * * EQUED (global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration * = 'R': Row equilibration, i.e., sub( A ) has been pre- * multiplied by diag(R(IA:IA+M-1)), * = 'C': Column equilibration, i.e., sub( A ) has been post- * multiplied by diag(C(JA:JA+N-1)), * = 'B': Both row and column equilibration, i.e., sub( A ) * has been replaced by * diag(R(IA:IA+M-1)) * sub( A ) * diag(C(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if row or column scaling * should be done based on the ratio of the row or column scaling * factors. If ROWCND < THRESH, row scaling is done, and if * COLCND < THRESH, column scaling is done. * * LARGE and SMALL are threshold values used to decide if row scaling * should be done based on the absolute size of the largest matrix * element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, THRESH PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, $ IROFF, J, JJA, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PDLAMCH( ICTXT, 'Safe minimum' ) / $ PDLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) $ THEN * * No row scaling * IF( COLCND.GE.THRESH ) THEN * * No column scaling * EQUED = 'N' * ELSE * * Column scaling * IOFFA = (JJA-1)*LDA DO 20 J = JJA, JJA+NQ-1 CJ = C( J ) DO 10 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*A( IOFFA + I ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE EQUED = 'C' END IF * ELSE IF( COLCND.GE.THRESH ) THEN * * Row scaling, no column scaling * IOFFA = (JJA-1)*LDA DO 40 J = JJA, JJA+NQ-1 DO 30 I = IIA, IIA+MP-1 A( IOFFA + I ) = R( I )*A( IOFFA + I ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE EQUED = 'R' * ELSE * * Row and column scaling * IOFFA = (JJA-1)*LDA DO 60 J = JJA, JJA+NQ-1 CJ = C( J ) DO 50 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*R( I )*A( IOFFA + I ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE EQUED = 'B' * END IF * RETURN * * End of PDLAQGE * END scalapack-1.8.0/SRC/pdlassq.f0000640000175000017500000002244310363532303015566 0ustar muammarmuammar SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SCALE, SUMSQ * .. * .. Array Arguments .. INTEGER DESCX( * ) DOUBLE PRECISION X( * ) * .. * * Purpose * ======= * * PDLASSQ returns the values scl and smsq such that * * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, * * where x( i ) = sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ). * The value of sumsq is assumed to be non-negative and scl returns the * value * * scl = max( scale, abs( x( i ) ) ). * * scale and sumsq must be supplied in SCALE and SUMSQ respectively. * SCALE and SUMSQ are overwritten by scl and ssq respectively. * * The routine makes only one pass through the vector sub( X ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * The result are only available in the scope of sub( X ), i.e if * sub( X ) is distributed along a process row, the correct results are * only available in this process row of the grid. Similarly if sub( X ) * is distributed along a process column, the correct results are only * available in this process column of the grid. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (input) DOUBLE PRECISION * The vector for which a scaled sum of squares is computed. * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= n. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * SCALE (local input/local output) DOUBLE PRECISION * On entry, the value scale in the equation above. * On exit, SCALE is overwritten with scl , the scaling factor * for the sum of squares. * * SUMSQ (local input/local output) DOUBLE PRECISION * On entry, the value sumsq in the equation above. * On exit, SUMSQ is overwritten with smsq , the basic sum of * squares from which scl has been factored out. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, ICOFF, ICTXT, IIX, IOFF, IROFF, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ DOUBLE PRECISION TEMP1 * .. * .. Local Arrays .. DOUBLE PRECISION WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, INFOG2L, PDTREECOMB * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFF * * Code direct from LAPACK's DLASSQ, (save subroutine call) * IF( NQ.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 10 I = 1, NQ IF( X( IOFF ).NE.ZERO ) THEN TEMP1 = ABS( X( IOFF ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ * ( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + LDX 10 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PDTREECOMB( ICTXT, 'Rowwise', 2, WORK, -1, IXCOL, $ DCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * ELSE IF( INCX.EQ.1 ) THEN * * X is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF * * Code direct from LAPACK's DLASSQ, (save subroutine call) * IF( NP.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 20 I = 1, NP IF( X( IOFF ).NE.ZERO ) THEN TEMP1 = ABS( X( IOFF ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + 1 20 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PDTREECOMB( ICTXT, 'Columnwise', 2, WORK, -1, IXCOL, $ DCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * END IF * RETURN * * End of PDLASSQ * END scalapack-1.8.0/SRC/pdlascl.f0000640000175000017500000004166310363532303015546 0ustar muammarmuammar SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N DOUBLE PRECISION CFROM, CTO * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASCL multiplies the M-by-N real distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1) by the real scalar CTO/CFROM. This * is done without over/underflow as long as the final result * CTO * A(I,J) / CFROM does not over/underflow. TYPE specifies that * sub( A ) may be full, upper triangular, lower triangular or upper * Hessenberg. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TYPE (global input) CHARACTER * TYPE indices the storage type of the input distributed * matrix. * = 'G': sub( A ) is a full matrix, * = 'L': sub( A ) is a lower triangular matrix, * = 'U': sub( A ) is an upper triangular matrix, * = 'H': sub( A ) is an upper Hessenberg matrix. * * CFROM (global input) DOUBLE PRECISION * CTO (global input) DOUBLE PRECISION * The distributed matrix sub( A ) is multiplied by CTO/CFROM. * A(I,J) is computed without over/underflow if the final * result CTO * A(I,J) / CFROM can be represented without * over/underflow. CFROM must be nonzero. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). On exit, this array contains the local * pieces of the distributed matrix multiplied by CTO/CFROM. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER IACOL, IAROW, ICOFFA, ICTXT, ICURCOL, ICURROW, $ IIA, II, INXTROW, IOFFA, IROFFA, ITYPE, J, JB, $ JJA, JJ, JN, KK, LDA, LL, MYCOL, MYROW, MP, $ NPCOL, NPROW, NQ DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE INFO = 0 CALL CHK1MAT( M, 4, N, 6, IA, JA, DESCA, 9, INFO ) IF( INFO.EQ.0 ) THEN IF( LSAME( TYPE, 'G' ) ) THEN ITYPE = 0 ELSE IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 ELSE ITYPE = -1 END IF IF( ITYPE.EQ.-1 ) THEN INFO = -1 ELSE IF( CFROM.EQ.ZERO ) THEN INFO = -4 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASCL', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM * CFROMC = CFROM CTOC = CTO * * Compute local indexes * LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * 10 CONTINUE CFROM1 = CFROMC*SMLNUM CTO1 = CTOC / BIGNUM IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN MUL = SMLNUM DONE = .FALSE. CFROMC = CFROM1 ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN MUL = BIGNUM DONE = .FALSE. CTOC = CTO1 ELSE MUL = CTOC / CFROMC DONE = .TRUE. END IF * IOFFA = ( JJA - 1 ) * LDA ICURROW = IAROW ICURCOL = IACOL * IF( ITYPE.EQ.0 ) THEN * * Full matrix * DO 30 JJ = JJA, JJA+NQ-1 DO 20 II = IIA, IIA+MP-1 A( IOFFA+II ) = A( IOFFA+II ) * MUL 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE * ELSE IF( ITYPE.EQ.1 ) THEN * * Lower triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 50 LL = JJ, JJ + JB -1 DO 40 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 40 CONTINUE IOFFA = IOFFA + LDA 50 CONTINUE ELSE DO 70 LL = JJ, JJ + JB -1 DO 60 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 120 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 90 LL = JJ, JJ + JB -1 DO 80 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 DO 100 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 120 CONTINUE * ELSE IF( ITYPE.EQ.2 ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 140 LL = JJ, JJ + JB -1 DO 130 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 130 CONTINUE IOFFA = IOFFA + LDA 140 CONTINUE ELSE DO 160 LL = JJ, JJ + JB -1 DO 150 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 150 CONTINUE IOFFA = IOFFA + LDA 160 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 210 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 180 LL = JJ, JJ + JB -1 DO 170 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 170 CONTINUE IOFFA = IOFFA + LDA 180 CONTINUE ELSE DO 200 LL = JJ, JJ + JB -1 DO 190 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 190 CONTINUE IOFFA = IOFFA + LDA 200 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 210 CONTINUE * ELSE IF( ITYPE.EQ.3 ) THEN * * Upper Hessenberg matrix * II = IIA JJ = JJA JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.ICURCOL ) THEN DO 230 LL = JJ, JJ+JB-1 DO 220 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 250 LL = JJ, JJ+JB-1 DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE JJ = JJ + JB END IF * II = II + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 260 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( ICURROW+1, NPROW ) IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 280 LL = JJ, JJ + JB -1 DO 270 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 270 CONTINUE IOFFA = IOFFA + LDA 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 DO 290 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 290 CONTINUE IOFFA = IOFFA + LDA 300 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 350 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 320 LL = JJ, JJ + JB -1 DO 310 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 310 CONTINUE IOFFA = IOFFA + LDA 320 CONTINUE ELSE DO 340 LL = JJ, JJ + JB -1 DO 330 KK = IIA, MIN( II-1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 330 CONTINUE IOFFA = IOFFA + LDA 340 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * $ MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 350 CONTINUE * END IF * END IF * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PDLASCL * END scalapack-1.8.0/SRC/pdormql.f0000640000175000017500000004225410363532303015577 0ustar muammarmuammar SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMQL overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PDGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PDGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, IPW, $ IROFFA, IROFFC, J, J1, J2, J3, JB, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDLARFB, $ PDLARFT, PDORM2L, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN JB = J1 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PDORM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PDLARFT( 'Backward', 'Columnwise', NQ-K+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+j+jb-ja-1,jc:jc+n-1) * MI = M - K + J + JB - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+j+jb-ja-1) * NI = N - K + J + JB - JA END IF * * Apply H or H' * CALL PDLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, $ JB, A, IA, J, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN JB = J2 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PDORM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMQL * END scalapack-1.8.0/SRC/pdormbr.f0000640000175000017500000005435610363532303015574 0ustar muammarmuammar SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * If VECT = 'Q', PDORMBR overwrites the general real distributed M-by-N * matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * If VECT = 'P', PDORMBR overwrites sub( C ) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': P * sub( C ) sub( C ) * P * TRANS = 'T': P**T * sub( C ) sub( C ) * P**T * * Here Q and P**T are the orthogonal distributed matrices determined by * PDGEBRD when reducing a real distributed matrix A(IA:*,JA:*) to * bidiagonal form: A(IA:*,JA:*) = Q * B * P**T. Q and P**T are defined * as products of elementary reflectors H(i) and G(i) respectively. * * Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the * order of the orthogonal matrix Q or P**T that is applied. * * If VECT = 'Q', A(IA:*,JA:*) is assumed to have been an NQ-by-K * matrix: * if nq >= k, Q = H(1) H(2) . . . H(k); * if nq < k, Q = H(1) H(2) . . . H(nq-1). * * If VECT = 'P', A(IA:*,JA:*) is assumed to have been a K-by-NQ * matrix: * if k < nq, P = G(1) G(2) . . . G(k); * if k >= nq, P = G(1) G(2) . . . G(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * VECT (global input) CHARACTER * = 'Q': apply Q or Q**T; * = 'P': apply P or P**T. * * SIDE (global input) CHARACTER * = 'L': apply Q, Q**T, P or P**T from the Left; * = 'R': apply Q, Q**T, P or P**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q or P; * = 'T': Transpose, apply Q**T or P**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * If VECT = 'Q', the number of columns in the original * distributed matrix reduced by PDGEBRD. * If VECT = 'P', the number of rows in the original * distributed matrix reduced by PDGEBRD. * K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+MIN(NQ,K)-1)) if * VECT='Q', and (LLD_A,LOCc(JA+NQ-1)) if VECT = 'P'. NQ = M * if SIDE = 'L', and NQ = N otherwise. The vectors which * define the elementary reflectors H(i) and G(i), whose * products determine the matrices Q and P, as returned by * PDGEBRD. * If VECT = 'Q', LLD_A >= max(1,LOCr(IA+NQ-1)); * if VECT = 'P', LLD_A >= max(1,LOCr(IA+MIN(NQ,K)-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(NQ,K)-1) if VECT = 'Q', LOCr(IA+MIN(NQ,K)-1) if * VECT = 'P', TAU(i) must contain the scalar factor of the * elementary reflector H(i) or G(i), which determines Q or P, * as returned by PDGEBRD in its array argument TAUQ or TAUP. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, if VECT='Q', sub( C ) is overwritten by Q*sub( C ) * or Q'*sub( C ) or sub( C )*Q' or sub( C )*Q; if VECT='P, * sub( C ) is overwritten by P*sub( C ) or P'*sub( C ) or * sub( C )*P or sub( C )*P'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * NQ = M; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA+1; JAA=JA; MI=M-1; NI=N; ICC=IC+1; JCC=JC; * end if * else if SIDE = 'R', * NQ = N; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA; JAA=JA+1; MI=M; NI=N-1; ICC=IC; JCC=JC+1; * end if * end if * * If VECT = 'Q', * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * else if VECT <> 'Q', * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( MI+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * end if * * where LCMP = LCM / NPROW, LCMQ = LCM / NPCOL, with * LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JAA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( MI+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If VECT = 'Q', * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * else * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN CHARACTER TRANST INTEGER IAA, IACOL, IAROW, ICC, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IINFO, IROFFA, IROFFC, JAA, JCC, $ LCM, LCMP, LCMQ, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDORMLQ, $ PDORMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE APPLYQ = LSAME( VECT, 'Q' ) LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q or P * IF( LEFT ) THEN NQ = M IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA + 1 JAA = JA MI = M - 1 NI = N ICC = IC + 1 JCC = JC END IF * IF( APPLYQ ) THEN CALL CHK1MAT( M, 4, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, M, 4, IA, JA, DESCA, 10, INFO ) END IF ELSE NQ = N IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA JAA = JA + 1 MI = M NI = N - 1 ICC = IC JCC = JC + 1 END IF * IF( APPLYQ ) THEN CALL CHK1MAT( N, 5, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, N, 5, IA, JA, DESCA, 10, INFO ) END IF END IF CALL CHK1MAT( M, 4, N, 5, IC, JC, DESCC, 15, INFO ) * IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) ICOFFA = MOD( JAA-1, DESCA( NB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JAA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( APPLYQ ) THEN IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF ELSE * IF( LEFT ) THEN MQA0 = NUMROC( MI+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ MI+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN INFO = -1 ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -2 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -3 ELSE IF( K.LT.0 ) THEN INFO = -6 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( APPLYQ .AND. LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( APPLYQ .AND. LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( APPLYQ ) THEN IDUM1( 1 ) = ICHAR( 'Q' ) ELSE IDUM1( 1 ) = ICHAR( 'P' ) END IF IDUM2( 1 ) = 1 IF( LEFT ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'R' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'T' ) END IF IDUM2( 3 ) = 3 IDUM1( 4 ) = K IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( APPLYQ ) THEN IF( LEFT ) THEN CALL PCHK2MAT( M, 4, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( N, 5, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF ELSE IF( LEFT ) THEN CALL PCHK2MAT( K, 6, M, 4, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( K, 6, N, 5, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( APPLYQ ) THEN * * Apply Q * IF( NQ.GE.K ) THEN * * Q was determined by a call to PDGEBRD with nq >= k * CALL PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * Q was determined by a call to PDGEBRD with nq < k * CALL PDORMQR( SIDE, TRANS, MI, NI, NQ-1, A, IA+1, JA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) END IF ELSE * * Apply P * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF IF( NQ.GT.K ) THEN * * P was determined by a call to PDGEBRD with nq > k * CALL PDORMLQ( SIDE, TRANST, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * P was determined by a call to PDGEBRD with nq <= k * CALL PDORMLQ( SIDE, TRANST, MI, NI, NQ-1, A, IA, JA+1, $ DESCA, TAU, C, ICC, JCC, DESCC, WORK, LWORK, $ IINFO ) END IF END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMBR * END scalapack-1.8.0/SRC/pspttrf.f0000640000175000017500000010373110363532303015621 0ustar muammarmuammar SUBROUTINE PSPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL AF( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPTTRF computes a Cholesky factorization * of an N-by-N real tridiagonal * symmetric positive definite distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PSPTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' D U or * * P A(1:N, JA:JA+N-1) P^T = L D L', * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPTTRF and this is stored in AF. If a linear system * is to be solved using PSPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, INT_TEMP, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, PXERBLA, $ RESHAPE, SGERV2D, SGESD2D, SPTTRF, SPTTRSV, $ STRRV2D, STRSD2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 5*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -9 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 5*100+6 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 5*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PSPTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PSPTTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( 12*NPCOL+3*NB ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -7 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PSPTTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -9 CALL PXERBLA( ICTXT, 'PSPTTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA( 5 ) PARAM_CHECK( 6, 1 ) = DESCA( 4 ) PARAM_CHECK( 5, 1 ) = DESCA( 3 ) PARAM_CHECK( 4, 1 ) = DESCA( 1 ) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 505 PARAM_CHECK( 6, 2 ) = 504 PARAM_CHECK( 5, 2 ) = 503 PARAM_CHECK( 4, 2 ) = 501 PARAM_CHECK( 3, 2 ) = 4 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 9 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 90 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL STRSD2D( ICTXT, 'U', 'N', 1, 1, $ E( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {L_i}^T in each processor * Or A_i = {U_i}^T {U_i} if E is the upper superdiagonal * CALL SPTTRF( ODD_SIZE, D( PART_OFFSET+1 ), E( PART_OFFSET+1 ), $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 20 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * * Perform the triangular system solve {L_i}{{B'}_i}^T = {B_i}^T * by dividing B_i by diagonal element * E( PART_OFFSET+ODD_SIZE ) = E( PART_OFFSET+ODD_SIZE ) / $ D( PART_OFFSET+ODD_SIZE ) * * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^T * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 ) - $ D( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE ) ) ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 20 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL STRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * CALL SPTTRSV( 'N', ODD_SIZE, INT_ONE, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * Divide by D * DO 30 I = 1, ODD_SIZE AF( I ) = AF( I ) / D( PART_OFFSET+I ) 30 CONTINUE * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * * * Since there is no element-by-element vector multiplication in * the BLAS, this loop must be hardwired in without a BLAS call * INT_TEMP = ODD_SIZE*INT_ONE + 2 + 1 AF( INT_TEMP ) = 0 * DO 40 I = 1, ODD_SIZE AF( INT_TEMP ) = AF( INT_TEMP ) - $ D( PART_OFFSET+I )*( AF( I )* $ ( AF( I ) ) ) 40 CONTINUE * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * AF( ODD_SIZE+1 ) = -D( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE )* $ AF( ODD_SIZE ) ) * * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 80 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 70 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = REAL( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + AF( ODD_SIZE+3 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 50 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 60 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 50 60 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ).EQ.ZERO ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) / AF( ODD_SIZE+2 ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*AF( ODD_SIZE+1 )*AF( ODD_SIZE+2 )* $ ( AF( ODD_SIZE+1 ) ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = ( AF( ODD_SIZE+3 ) ) / AF( ODD_SIZE+2 ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 )* $ ( AF( ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 )* $ AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 70 CONTINUE * * 80 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 90 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PSPTTRF * END scalapack-1.8.0/SRC/pcptsv.f0000640000175000017500000004604110363532303015436 0ustar muammarmuammar SUBROUTINE PCPTSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX B( * ), E( * ), WORK( * ) REAL D( * ) * .. * * * Purpose * ======= * * PCPTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal symmetric positive definite distributed * matrix. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PCPTTRF and PCPTTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL + 3*NB) * +max((10+2*min(100,NRHS))*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PCPTTRF, PCPTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PCPTTRF and PCPTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 5*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PCPTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL + 3*NB) * * Factor the matrix * CALL PCPTTRF( N, D, E, JA, DESCA, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR ), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PCPTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PCPTTRS( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPTSV', -INFO ) RETURN END IF * RETURN * * End of PCPTSV * END scalapack-1.8.0/SRC/pcungqr.f0000640000175000017500000003075710363532303015605 0ustar muammarmuammar SUBROUTINE PCUNGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGQR generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PCGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PCGEQRF in the K columns of its distributed * matrix argument A(IA:*,JA:JA+K-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+K-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JL, $ JN, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCLARFB, $ PCLARFT, PCLASET, PCUNG2R, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL PCLASET( 'All', JL-JA, JA+N-JL, ZERO, ZERO, A, IA, JL, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PCUNG2R( M-JL+JA, JA+N-JL, JA+K-JL, A, IA+JL-JA, JL, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of columns to loop over ? * IF( JL.GT.JN+1 ) THEN * * Use blocked code * DO 10 J = JL-DESCA( NB_ ), JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PCLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, $ J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PCLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-I+IA, N-J-JB+JA, JB, A, I, $ J, DESCA, WORK, A, I, J+JB, DESCA, $ WORK( IPW ) ) END IF * * Apply H to rows i:ia+m-1 of current block * CALL PCUNG2R( M-I+IA, JB, JB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia:i-1 of current block to zero * CALL PCLASET( 'All', I-IA, JB, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * END IF * * Handle first block separately * IF( JL.GT.JA ) THEN * JB = JN - JA + 1 * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PCLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PCLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, $ JA+JB, DESCA, WORK( IPW ) ) * * Apply H to rows ia:ia+m-1 of current block * CALL PCUNG2R( M, JB, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGQR * END scalapack-1.8.0/SRC/pcgerq2.f0000640000175000017500000002516010363532303015461 0ustar muammarmuammar SUBROUTINE PCGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGERQ2 computes a RQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ COMPLEX AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PCELSET, PCLACGV, PCLARF, PCLARFG, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * K = MIN( M, N ) DO 10 I = IA+K-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i+m-k,ja:j+n-k-1) * CALL PCLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) ) CALL PCLARFG( N-K+J-JA+1, AII, I+M-K, J+N-K, A, I+M-K, JA, $ DESCA, DESCA( M_ ), TAU ) * * Apply H(i) to A(ia:i+m-k-1,ja:j+n-k) from the right * CALL PCELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PCLARF( 'Right', M-K+I-IA, N-K+J-JA+1, A, M-K+I, JA, $ DESCA, DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) CALL PCELSET( A, I+M-K, J+N-K, DESCA, AII ) CALL PCLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGERQ2 * END scalapack-1.8.0/SRC/pcgesvx.f0000640000175000017500000010465510363532303015604 0ustar muammarmuammar SUBROUTINE PCGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, IPIV, EQUED, R, C, B, IB, $ JB, DESCB, X, IX, JX, DESCX, RCOND, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, TRANS INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LRWORK, $ LWORK, N, NRHS REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ) REAL BERR( * ), C( * ), FERR( * ), R( * ), $ RWORK( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PCGESVX uses the LU factorization to compute the solution to a * complex system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * In the following description, A denotes A(IA:IA+N-1,JA:JA+N-1), * B denotes B(IB:IB+N-1,JB:JB+NRHS-1) and X denotes * X(IX:IX+N-1,JX:JX+NRHS-1). * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') * or diag(C)*B (if TRANS = 'T' or 'C'). * * 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if FACT = 'E') as * A = P * L * U, * where P is a permutation matrix, L is a unit lower triangular * matrix, and U is upper triangular. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If FACT = 'E' and equilibration was used, the matrix X is * premultiplied by diag(C) (if TRANS = 'N') or diag(R) (if * TRANS = 'T' or 'C') so that it solves the original system * before equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix * A(IA:IA+N-1,JA:JA+N-1) is supplied on entry, and if not, * whether the matrix A(IA:IA+N-1,JA:JA+N-1) should be * equilibrated before it is factored. * = 'F': On entry, AF(IAF:IAF+N-1,JAF:JAF+N-1) and IPIV con- * tain the factored form of A(IA:IA+N-1,JA:JA+N-1). * If EQUED is not 'N', the matrix * A(IA:IA+N-1,JA:JA+N-1) has been equilibrated with * scaling factors given by R and C. * A(IA:IA+N-1,JA:JA+N-1), AF(IAF:IAF+N-1,JAF:JAF+N-1), * and IPIV are not modified. * = 'N': The matrix A(IA:IA+N-1,JA:JA+N-1) will be copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * = 'E': The matrix A(IA:IA+N-1,JA:JA+N-1) will be equili- * brated if necessary, then copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': A(IA:IA+N-1,JA:JA+N-1) * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (No transpose) * = 'T': A(IA:IA+N-1,JA:JA+N-1)**T * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * = 'C': A(IA:IA+N-1,JA:JA+N-1)**H * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Conjugate transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right-hand sides, i.e., the number of columns * of the distributed submatrices B(IB:IB+N-1,JB:JB+NRHS-1) and * X(IX:IX+N-1,JX:JX+NRHS-1). NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * the local memory to an array of local dimension * (LLD_A,LOCc(JA+N-1)). On entry, the N-by-N matrix * A(IA:IA+N-1,JA:JA+N-1). If FACT = 'F' and EQUED is not 'N', * then A(IA:IA+N-1,JA:JA+N-1) must have been equilibrated by * the scaling factors in R and/or C. A(IA:IA+N-1,JA:JA+N-1) is * not modified if FACT = 'F' or 'N', or if FACT = 'E' and * EQUED = 'N' on exit. * * On exit, if EQUED .ne. 'N', A(IA:IA+N-1,JA:JA+N-1) is scaled * as follows: * EQUED = 'R': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * EQUED = 'C': A(IA:IA+N-1,JA:JA+N-1) := * A(IA:IA+N-1,JA:JA+N-1) * diag(C) * EQUED = 'B': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) COMPLEX pointer * into the local memory to an array of local dimension * (LLD_AF,LOCc(JA+N-1)). If FACT = 'F', then * AF(IAF:IAF+N-1,JAF:JAF+N-1) is an input argument and on * entry contains the factors L and U from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by PCGETRF. * If EQUED .ne. 'N', then AF is the factored form of the * equilibrated matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'N', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original * matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equili- * brated matrix A(IA:IA+N-1,JA:JA+N-1) (see the description of * A(IA:IA+N-1,JA:JA+N-1) for the form of the equilibrated * matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input or local output) INTEGER array, dimension * LOCr(M_A)+MB_A. If FACT = 'F', then IPIV is an input argu- * ment and on entry contains the pivot indices from the fac- * torization A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by * PCGETRF; IPIV(i) -> The global row local row i was * swapped with. This array must be aligned with * A( IA:IA+N-1, * ). * * If FACT = 'N', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original matrix * A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equilibrated matrix * A(IA:IA+N-1,JA:JA+N-1). * * EQUED (global input or global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'R': Row equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) has * been premultiplied by diag(R). * = 'C': Column equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) * has been postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., * A(IA:IA+N-1,JA:JA+N-1) has been replaced by * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * R (local input or local output) REAL array, * dimension LOCr(M_A). * The row scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'R' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the left by diag(R); if EQUED='N' or 'C', R is not acces- * sed. R is an input variable if FACT = 'F'; otherwise, R is * an output variable. * If FACT = 'F' and EQUED = 'R' or 'B', each element of R must * be positive. * R is replicated in every process column, and is aligned * with the distributed matrix A. * * C (local input or local output) REAL array, * dimension LOCc(N_A). * The column scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'C' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the right by diag(C); if EQUED = 'N' or 'R', C is not * accessed. C is an input variable if FACT = 'F'; otherwise, * C is an output variable. If FACT = 'F' and EQUED = 'C' or * 'B', each element of C must be positive. * C is replicated in every process row, and is aligned with * the distributed matrix A. * * B (local input/local output) COMPLEX pointer * into the local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1) ). On entry, the N-by-NRHS right-hand * side matrix B(IB:IB+N-1,JB:JB+NRHS-1). On exit, if * EQUED = 'N', B(IB:IB+N-1,JB:JB+NRHS-1) is not modified; if * TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by * diag(R)*B(IB:IB+N-1,JB:JB+NRHS-1); if TRANS = 'T' or 'C' * and EQUED = 'C' or 'B', B(IB:IB+N-1,JB:JB+NRHS-1) is over- * written by diag(C)*B(IB:IB+N-1,JB:JB+NRHS-1). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) COMPLEX pointer * into the local memory to an array of local dimension * (LLD_X, LOCc(JX+NRHS-1)). If INFO = 0, the N-by-NRHS * solution matrix X(IX:IX+N-1,JX:JX+NRHS-1) to the original * system of equations. Note that A(IA:IA+N-1,JA:JA+N-1) and * B(IB:IB+N-1,JB:JB+NRHS-1) are modified on exit if * EQUED .ne. 'N', and the solution to the equilibrated system * is inv(diag(C))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'N' * and EQUED = 'C' or 'B', or * inv(diag(R))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'T' or 'C' * and EQUED = 'R' or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) REAL * The estimate of the reciprocal condition number of the matrix * A(IA:IA+N-1,JA:JA+N-1) after equilibration (if done). If * RCOND is less than the machine precision (in particular, if * RCOND = 0), the matrix is singular to working precision. * This condition is indicated by a return code of INFO > 0. * * FERR (local output) REAL array, dimension LOCc(N_B) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix * X(IX:IX+N-1,JX:JX+NRHS-1). If XTRUE is the true solution, * FERR(j) bounds the magnitude of the largest entry in * (X(j) - XTRUE) divided by the magnitude of the largest entry * in X(j). The estimate is as reliable as the estimate for * RCOND, and is almost always a slight overestimate of the * true error. FERR is replicated in every process row, and is * aligned with the matrices B and X. * * BERR (local output) REAL array, dimension LOCc(N_B). * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A(IA:IA+N-1,JA:JA+N-1) or * B(IB:IB+N-1,JB:JB+NRHS-1) that makes X(j) an exact solution). * BERR is replicated in every process row, and is aligned * with the matrices B and X. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PCGECON( LWORK ), PCGERFS( LWORK ) ) * + LOCr( N_A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK = 2*LOCc(N_A). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: U(IA+I-1,IA+I-1) is exactly zero. The * factorization has been completed, but the * factor U is exactly singular, so the solution * and error bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the * matrix is singular to working precision, and * the solution and error bounds have not been * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLEQU, EQUIL, LQUERY, NOFACT, NOTRAN, ROWEQU CHARACTER NORM INTEGER CONWRK, I, IACOL, IAROW, IAFROW, IBROW, IBCOL, $ ICOFFA, ICOFFB, ICOFFX, ICTXT, IDUMM, $ IIA, IIB, IIX, $ INFEQU, IROFFA, IROFFAF, IROFFB, $ IROFFX, IXCOL, IXROW, J, JJA, JJB, JJX, $ LCM, LCMQ, $ LRWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, $ NQ, NQB, NRHSQ, RFSWRK REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, $ ROWCND, SMLNUM * .. * .. Local Arrays .. INTEGER CDESC( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ INFOG2L, PCGECON, PCGEEQU, PCGERFS, $ PCGETRF, PCGETRS, PCLACPY, $ PCLAQGE, PSCOPY, PXERBLA, SGEBR2D, $ SGEBS2D, SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC REAL PSLAMCH, PCLANGE EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC, PCLANGE, $ PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IX, JX, DESCX, 24, INFO ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) NOTRAN = LSAME( TRANS, 'N' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' ROWEQU = .FALSE. COLEQU = .FALSE. ELSE ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA NQB = ICEIL( N+IROFFA, DESCA( NB_ )*NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL CONWRK = 2*NP + 2*NQ + MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQ + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) RFSWRK = 3*NP IF( LSAME( TRANS, 'N' ) ) THEN RFSWRK = RFSWRK + NP + NQ + $ ICEIL( NQB, LCMQ )*DESCA( NB_ ) ELSE IF( LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) ) THEN RFSWRK = RFSWRK + NP + NQ END IF LWMIN = MAX( CONWRK, RFSWRK ) LRWMIN = MAX( 2*NQ, NP ) RWORK( 1 ) = REAL( LRWMIN ) IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT. LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( ROWEQU ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 10 J = IIA, IIA + NP - 1 RCMIN = MIN( RCMIN, R( J ) ) RCMAX = MAX( RCMAX, R( J ) ) 10 CONTINUE CALL SGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN ROWCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE ROWCND = ONE END IF END IF IF( COLEQU .AND. INFO.EQ.0 ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 20 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 20 CONTINUE CALL SGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -15 ELSE IF( N.GT.0 ) THEN COLCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE COLCND = ONE END IF END IF END IF END IF * WORK( 1 ) = REAL( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( DESCX( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2400+NB_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -29 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -31 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( TRANS ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 14 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 29 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 5, IDUM1, $ IDUM2, INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 29 IF( LRWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 4, IDUM1, $ IDUM2, INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGESVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PCGEEQU( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFEQU ) IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PCLAQGE( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) NP = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFB IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFB * IF( NOTRAN ) THEN IF( ROWEQU ) THEN DO 40 J = JJB, JJB+NRHSQ-1 DO 30 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = R( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 30 CONTINUE 40 CONTINUE END IF ELSE IF( COLEQU ) THEN * * Transpose the Column scale factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PSCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), RWORK, IB, JB, $ DESCB, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, RWORK( IIB ), $ DESCB( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, RWORK( IIB ), $ DESCB( LLD_ ), MYROW, IBCOL ) END IF DO 60 J = JJB, JJB+NRHSQ-1 DO 50 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = RWORK( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 50 CONTINUE 60 CONTINUE END IF * IF( NOFACT.OR.EQUIL ) THEN * * Compute the LU factorization of A. * CALL PCLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PCGETRF( N, N, AF, IAF, JAF, DESCAF, IPIV, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * IF( NOTRAN ) THEN NORM = '1' ELSE NORM = 'I' END IF ANORM = PCLANGE( NORM, N, N, A, IA, JA, DESCA, RWORK ) * * Compute the reciprocal of the condition number of A. * CALL PCGECON( NORM, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PSLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PCLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PCGETRS( TRANS, N, NRHS, AF, IAF, JAF, DESCAF, IPIV, X, IX, $ JX, DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PCGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFX IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFX * IF( NOTRAN ) THEN IF( COLEQU ) THEN * * Transpose the column scaling factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PSCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), RWORK, IX, $ JX, DESCX, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, $ RWORK( IIX ), DESCX( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, $ RWORK( IIX ), DESCX( LLD_ ), MYROW, $ IBCOL ) END IF * DO 80 J = JJX, JJX+NRHSQ-1 DO 70 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = RWORK( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 70 CONTINUE 80 CONTINUE DO 90 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / COLCND 90 CONTINUE END IF ELSE IF( ROWEQU ) THEN DO 110 J = JJX, JJX+NRHSQ-1 DO 100 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = R( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 100 CONTINUE 110 CONTINUE DO 120 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / ROWCND 120 CONTINUE END IF * WORK( 1 ) = REAL( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) * RETURN * * End of PCGESVX * END scalapack-1.8.0/SRC/pdlaevswp.f0000640000175000017500000002570610363532303016131 0ustar muammarmuammar* * SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LWORK, N * .. * .. Array Arguments .. INTEGER DESCZ( * ), KEY( * ), NVS( * ) DOUBLE PRECISION WORK( * ), Z( * ), ZIN( LDZI, * ) * .. * * Purpose * ======= * * PDLAEVSWP moves the eigenvectors (potentially unsorted) from * where they are computed, to a ScaLAPACK standard block cyclic * array, sorted so that the corresponding eigenvalues are sorted. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * ZIN (local input) DOUBLE PRECISION array, * dimension ( LDZI, NVS(iam) ) * The eigenvectors on input. Each eigenvector resides entirely * in one process. Each process holds a contiguous set of * NVS(iam) eigenvectors. The first eigenvector which the * process holds is: sum for i=[0,iam-1) of NVS(i) * * LDZI (locl input) INTEGER * leading dimension of the ZIN array * * Z (local output) DOUBLE PRECISION array * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ) * The eigenvectors on output. The eigenvectors are distributed * in a block cyclic manner in both dimensions, with a * block size of NB. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * NVS (global input) INTEGER array, dimension( nprocs+1 ) * nvs(i) = number of processes * number of eigenvectors held by processes [0,i-1) * nvs(1) = number of eigen vectors held by [0,1-1) == 0 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) == * total number of eigenvectors * * KEY (global input) INTEGER array, dimension( N ) * Indicates the actual index (after sorting) for each of the * eigenvectors. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J, $ MAXI, MAXII, MINI, MINII, MYCOL, MYROW, NB, $ NBUFSIZE, NPCOL, NPROCS, NPROW, PCOL, RECVCOL, $ RECVFROM, RECVROW, SENDCOL, SENDROW, SENDTO * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P EXTERNAL INDXG2L, INDXG2P * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGERV2D, DGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * NB = DESCZ( MB_ ) * NPROCS = NPROW*NPCOL * * If PxSTEIN operates on a sub-matrix of a global matrix, the * key [] that contains the indicies of the eigenvectors is refe- * renced to the dimensions of the sub-matrix and not the global * distrubited matrix. Because of this, PxLAEVSWP will incorrectly * map the eigenvectors to the global eigenvector matrix, Z, unless * the key[] elements are shifted as below. * DO 10 J = DESCZ( N_ ), 1, -1 KEY( J ) = KEY( J-JZ+1 ) + ( JZ-1 ) 10 CONTINUE * DO 110 DIST = 0, NPROCS - 1 * SENDTO = MOD( IAM+DIST, NPROCS ) RECVFROM = MOD( NPROCS+IAM-DIST, NPROCS ) * SENDROW = MOD( SENDTO, NPROW ) SENDCOL = SENDTO / NPROW RECVROW = MOD( RECVFROM, NPROW ) RECVCOL = RECVFROM / NPROW * SENDROW = SENDTO / NPCOL SENDCOL = MOD( SENDTO, NPCOL ) RECVROW = RECVFROM / NPCOL RECVCOL = MOD( RECVFROM, NPCOL ) * * Figure out what I have that process "sendto" wants * NBUFSIZE = 0 * * We are looping through the eigenvectors that I presently own. * DO 40 J = NVS( 1+IAM ) + JZ, NVS( 1+IAM+1 ) + JZ - 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( SENDCOL.EQ.PCOL ) THEN MINII = MOD( SENDROW+DESCZ( RSRC_ ), NPROW )* $ DESCZ( MB_ ) + 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 30 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+DESCZ( MB_ )-1, N+IZ-1 ) DO 20 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 WORK( NBUFSIZE ) = ZIN( I+1-IZ, $ J-NVS( 1+IAM )+1-JZ ) 20 CONTINUE 30 CONTINUE END IF 40 CONTINUE * * IF( MYROW.NE.SENDROW .OR. MYCOL.NE.SENDCOL ) $ CALL DGESD2D( DESCZ( CTXT_ ), NBUFSIZE, 1, WORK, NBUFSIZE, $ SENDROW, SENDCOL ) * * * Figure out what process "recvfrom" has that I want * NBUFSIZE = 0 DO 70 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 60 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 50 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 50 CONTINUE 60 CONTINUE END IF 70 CONTINUE * * * IF( MYROW.NE.RECVROW .OR. MYCOL.NE.RECVCOL ) $ CALL DGERV2D( DESCZ( CTXT_ ), 1, NBUFSIZE, WORK, 1, RECVROW, $ RECVCOL ) * NBUFSIZE = 0 DO 100 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CYCLIC_J = INDXG2L( KEY( J ), DESCZ( MB_ ), -1, -1, $ NPCOL ) CYCLIC_I = 1 MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 90 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) CYCLIC_I = INDXG2L( MINI, DESCZ( MB_ ), -1, -1, $ NPROW ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 80 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 Z( CYCLIC_I+( CYCLIC_J-1 )*DESCZ( LLD_ ) ) $ = WORK( NBUFSIZE ) CYCLIC_I = CYCLIC_I + 1 80 CONTINUE 90 CONTINUE END IF 100 CONTINUE * 110 CONTINUE RETURN * * End of PDLAEVSWP * END scalapack-1.8.0/SRC/pslaed2.f0000640000175000017500000003477110363532303015460 0ustar muammarmuammar SUBROUTINE PSLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, $ NN1, NN2, IB1, IB2 ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N, $ N1, NB, NN, NN1, NN2, NPCOL REAL RHO * .. * .. Array Arguments .. INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ), $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ), $ PSM( 0: NPCOL-1, 4 ) REAL D( * ), DLAMDA( * ), Q( LDQ, * ), $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * ) * .. * * Purpose * ======= * * PSLAED2 sorts the two sets of eigenvalues together into a single * sorted set. Then it tries to deflate the size of the problem. * There are two ways in which deflation can occur: when two or more * eigenvalues are close together or if there is a tiny entry in the * Z vector. For each such occurrence the order of the related secular * equation problem is reduced by one. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * K (output) INTEGER * The number of non-deflated eigenvalues, and the order of the * related secular equation. 0 <= K <=N. * * N (input) INTEGER * The dimension of the symmetric tridiagonal matrix. N >= 0. * * N1 (input) INTEGER * The location of the last eigenvalue in the leading sub-matrix. * min(1,N) < N1 < N. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * D (input/output) REAL array, dimension (N) * On entry, D contains the eigenvalues of the two submatrices to * be combined. * On exit, D contains the trailing (N-K) updated eigenvalues * (those which were deflated) sorted into increasing order. * * DROW (global input) INTEGER * The process row over which the first row of the matrix D is * distributed. 0 <= DROW < NPROW. * * DCOL (global input) INTEGER * The process column over which the first column of the * matrix D is distributed. 0 <= DCOL < NPCOL. * * Q (input/output) REAL array, dimension (LDQ, N) * On entry, Q contains the eigenvectors of two submatrices in * the two square blocks with corners at (1,1), (N1,N1) * and (N1+1, N1+1), (N,N). * On exit, Q contains the trailing (N-K) updated eigenvectors * (those which were deflated) in its last N-K columns. * * LDQ (input) INTEGER * The leading dimension of the array Q. LDQ >= max(1,NQ). * * RHO (global input/output) REAL * On entry, the off-diagonal element associated with the rank-1 * cut which originally split the two submatrices which are now * being recombined. * On exit, RHO has been modified to the value required by * PSLAED3. * * Z (global input) REAL array, dimension (N) * On entry, Z contains the updating vector (the last * row of the first sub-eigenvector matrix and the first row of * the second sub-eigenvector matrix). * On exit, the contents of Z have been destroyed by the updating * process. * * DLAMDA (global output) REAL array, dimension (N) * A copy of the first K eigenvalues which will be used by * SLAED3 to form the secular equation. * * W (global output) REAL array, dimension (N) * The first k values of the final deflation-altered z-vector * which will be passed to SLAED3. * * Q2 (output) REAL array, dimension (LDQ2, NQ) * A copy of the first K eigenvectors which will be used by * * LDQ2 (input) INTEGER * The leading dimension of the array Q2. * * QBUF (workspace) REAL array, dimension 3*N * * CTOT (workspace) INTEGER array, dimension( NPCOL, 4) * * PSM (workspace) INTEGER array, dimension( NPCOL, 4) * * NPCOL (global input) INTEGER * The total number of columns over which the distributed * submatrix is distributed. * * INDX (workspace) INTEGER array, dimension (N) * The permutation used to sort the contents of DLAMDA into * ascending order. * * INDXC (output) INTEGER array, dimension (N) * The permutation used to arrange the columns of the deflated * Q matrix into three groups: the first group contains non-zero * elements only at and above N1, the second contains * non-zero elements only below N1, and the third is dense. * * INDXP (workspace) INTEGER array, dimension (N) * The permutation used to place deflated values of D at the end * of the array. INDXP(1:K) points to the nondeflated D-values * and INDXP(K+1:N) points to the deflated eigenvalues. * * INDCOL (workspace) INTEGER array, dimension (N) * * COLTYP (workspace/output) INTEGER array, dimension (N) * During execution, a label which will indicate which of the * following types a column in the Q2 matrix is: * 1 : non-zero in the upper half only; * 2 : dense; * 3 : non-zero in the lower half only; * 4 : deflated. * * NN (global output) INTEGER, the order of matrix U, (PSLAED1). * NN1 (global output) INTEGER, the order of matrix Q1, (PSLAED1). * NN2 (global output) INTEGER, the order of matrix Q2, (PSLAED1). * IB1 (global output) INTEGER, pointeur on Q1, (PSLAED1). * IB2 (global output) INTEGER, pointeur on Q2, (PSLAED1). * * ===================================================================== * * .. Parameters .. REAL MONE, ZERO, ONE, TWO, EIGHT PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, $ TWO = 2.0E0, EIGHT = 8.0E0 ) * .. * .. Local Scalars .. INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2, $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ, $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ REAL C, EPS, S, T, TAU, TOL * .. * .. External Functions .. INTEGER INDXG2L, INDXL2G, ISAMAX, NUMROC REAL PSLAMCH, SLAPY2 EXTERNAL INDXG2L, INDXL2G, ISAMAX, NUMROC, PSLAMCH, $ SLAPY2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, INFOG1L, SCOPY, $ SGERV2D, SGESD2D, SLAPST, SROT, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. External Functions .. * .. * .. Local Arrays .. INTEGER PTT( 4 ) * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_PINFO( IAM, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NUMROC( N, NB, MYROW, DROW, NPROW ) * N2 = N - N1 N1P1 = N1 + 1 * IF( RHO.LT.ZERO ) THEN CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) END IF * * Normalize z so that norm(z) = 1. Since z is the concatenation of * two normalized vectors, norm2(z) = sqrt(2). * T = ONE / SQRT( TWO ) CALL SSCAL( N, T, Z, 1 ) * * RHO = ABS( norm(z)**2 * RHO ) * RHO = ABS( TWO*RHO ) * * Calculate the allowable deflation tolerance * IMAX = ISAMAX( N, Z, 1 ) JMAX = ISAMAX( N, D, 1 ) EPS = PSLAMCH( ICTXT, 'Epsilon' ) TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) * * If the rank-1 modifier is small enough, no more needs to be done * except to reorganize Q so that its columns correspond with the * elements in D. * IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN K = 0 GO TO 220 END IF * * If there are multiple eigenvalues then the problem deflates. Here * the number of equal eigenvalues are found. As each equal * eigenvalue is found, an elementary reflector is computed to rotate * the corresponding eigensubspace so that the corresponding * components of Z are zero in this new basis. * * CALL SLAPST( 'I', N, D, INDX, INFO ) * DO 10 I = 1, N1 COLTYP( I ) = 1 10 CONTINUE DO 20 I = N1P1, N COLTYP( I ) = 3 20 CONTINUE COL = DCOL DO 40 I = 1, N, NB DO 30 J = 0, NB - 1 IF( I+J.LE.N ) $ INDCOL( I+J ) = COL 30 CONTINUE COL = MOD( COL+1, NPCOL ) 40 CONTINUE * K = 0 K2 = N + 1 DO 50 J = 1, N NJ = INDX( J ) IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN * * Deflate due to small z component. * K2 = K2 - 1 COLTYP( NJ ) = 4 INDXP( K2 ) = NJ IF( J.EQ.N ) $ GO TO 80 ELSE PJ = NJ GO TO 60 END IF 50 CONTINUE 60 CONTINUE J = J + 1 NJ = INDX( J ) IF( J.GT.N ) $ GO TO 80 IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN * * Deflate due to small z component. * K2 = K2 - 1 COLTYP( NJ ) = 4 INDXP( K2 ) = NJ ELSE * * Check if eigenvalues are close enough to allow deflation. * S = Z( PJ ) C = Z( NJ ) * * Find sqrt(a**2+b**2) without overflow or * destructive underflow. * TAU = SLAPY2( C, S ) T = D( NJ ) - D( PJ ) C = C / TAU S = -S / TAU IF( ABS( T*C*S ).LE.TOL ) THEN * * Deflation is possible. * Z( NJ ) = TAU Z( PJ ) = ZERO IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) $ COLTYP( NJ ) = 2 COLTYP( PJ ) = 4 CALL INFOG1L( NJ, NB, NPCOL, MYCOL, DCOL, NJJ, NJCOL ) CALL INFOG1L( PJ, NB, NPCOL, MYCOL, DCOL, PJJ, PJCOL ) IF( INDCOL( PJ ).EQ.INDCOL( NJ ) .AND. MYCOL.EQ.NJCOL ) THEN CALL SROT( NP, Q( 1, PJJ ), 1, Q( 1, NJJ ), 1, C, S ) ELSE IF( MYCOL.EQ.PJCOL ) THEN CALL SGESD2D( ICTXT, NP, 1, Q( 1, PJJ ), NP, MYROW, $ NJCOL ) CALL SGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, NJCOL ) CALL SROT( NP, Q( 1, PJJ ), 1, QBUF, 1, C, S ) ELSE IF( MYCOL.EQ.NJCOL ) THEN CALL SGESD2D( ICTXT, NP, 1, Q( 1, NJJ ), NP, MYROW, $ PJCOL ) CALL SGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, PJCOL ) CALL SROT( NP, QBUF, 1, Q( 1, NJJ ), 1, C, S ) END IF T = D( PJ )*C**2 + D( NJ )*S**2 D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 D( PJ ) = T K2 = K2 - 1 I = 1 70 CONTINUE IF( K2+I.LE.N ) THEN IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN INDXP( K2+I-1 ) = INDXP( K2+I ) INDXP( K2+I ) = PJ I = I + 1 GO TO 70 ELSE INDXP( K2+I-1 ) = PJ END IF ELSE INDXP( K2+I-1 ) = PJ END IF PJ = NJ ELSE K = K + 1 DLAMDA( K ) = D( PJ ) W( K ) = Z( PJ ) INDXP( K ) = PJ PJ = NJ END IF END IF GO TO 60 80 CONTINUE * * Record the last eigenvalue. * K = K + 1 DLAMDA( K ) = D( PJ ) W( K ) = Z( PJ ) INDXP( K ) = PJ * * Count up the total number of the various types of columns, then * form a permutation which positions the four column types into * four uniform groups (although one or more of these groups may be * empty). * DO 100 J = 1, 4 DO 90 I = 0, NPCOL - 1 CTOT( I, J ) = 0 90 CONTINUE PTT( J ) = 0 100 CONTINUE DO 110 J = 1, N CT = COLTYP( J ) COL = INDCOL( J ) CTOT( COL, CT ) = CTOT( COL, CT ) + 1 110 CONTINUE * * PSM(*) = Position in SubMatrix (of types 1 through 4) * DO 120 COL = 0, NPCOL - 1 PSM( COL, 1 ) = 1 PSM( COL, 2 ) = 1 + CTOT( COL, 1 ) PSM( COL, 3 ) = PSM( COL, 2 ) + CTOT( COL, 2 ) PSM( COL, 4 ) = PSM( COL, 3 ) + CTOT( COL, 3 ) 120 CONTINUE PTT( 1 ) = 1 DO 140 I = 2, 4 CT = 0 DO 130 J = 0, NPCOL - 1 CT = CT + CTOT( J, I-1 ) 130 CONTINUE PTT( I ) = PTT( I-1 ) + CT 140 CONTINUE * * Fill out the INDXC array so that the permutation which it induces * will place all type-1 columns first, all type-2 columns next, * then all type-3's, and finally all type-4's. * DO 150 J = 1, N JS = INDXP( J ) COL = INDCOL( JS ) CT = COLTYP( JS ) I = INDXL2G( PSM( COL, CT ), NB, COL, DCOL, NPCOL ) INDX( J ) = I INDXC( PTT( CT ) ) = I PSM( COL, CT ) = PSM( COL, CT ) + 1 PTT( CT ) = PTT( CT ) + 1 150 CONTINUE DO 160 J = 1, N JS = INDXP( J ) JJS = INDXG2L( JS, NB, J, J, NPCOL ) COL = INDCOL( JS ) IF( COL.EQ.MYCOL ) THEN I = INDX( J ) JJQ2 = INDXG2L( I, NB, J, J, NPCOL ) CALL SCOPY( NP, Q( 1, JJS ), 1, Q2( 1, JJQ2 ), 1 ) END IF 160 CONTINUE * * * The deflated eigenvalues and their corresponding vectors go back * into the last N - K slots of D and Q respectively. * CALL SCOPY( N, D, 1, Z, 1 ) DO 170 J = K + 1, N JS = INDXP( J ) I = INDX( J ) D( I ) = Z( JS ) 170 CONTINUE * PTT( 1 ) = 1 DO 190 I = 2, 4 CT = 0 DO 180 J = 0, NPCOL - 1 CT = CT + CTOT( J, I-1 ) 180 CONTINUE PTT( I ) = PTT( I-1 ) + CT 190 CONTINUE * * IB1 = INDXC( 1 ) IE1 = IB1 IB2 = INDXC( PTT( 2 ) ) IE2 = IB2 DO 200 I = 2, PTT( 3 ) - 1 IB1 = MIN( IB1, INDXC( I ) ) IE1 = MAX( IE1, INDXC( I ) ) 200 CONTINUE DO 210 I = PTT( 2 ), PTT( 4 ) - 1 IB2 = MIN( IB2, INDXC( I ) ) IE2 = MAX( IE2, INDXC( I ) ) 210 CONTINUE NN1 = IE1 - IB1 + 1 NN2 = IE2 - IB2 + 1 NN = MAX( IE1, IE2 ) - MIN( IB1, IB2 ) + 1 220 CONTINUE RETURN * * End of PSLAED2 * END scalapack-1.8.0/SRC/pzdttrsv.f0000640000175000017500000014220610363532303016017 0ustar muammarmuammar SUBROUTINE PZDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PZDTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Gaussian elimination code PZ@(dom_pre)TTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PZDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDTTRF and this is stored in AF. If a linear system * is to be solved using PZDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZGEMM, $ ZGERV2D, ZGESD2D, ZLACPY, ZMATADD, ZTBTRS, $ ZTRMM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX*16 ZDOTC EXTERNAL LSAME, NUMROC, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 12*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 12*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 12*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 12*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 12*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 12*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -4 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -8 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 9*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PZDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PZDTTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1205 PARAM_CHECK( 15, 2 ) = 1204 PARAM_CHECK( 14, 2 ) = 1203 PARAM_CHECK( 13, 2 ) = 1202 PARAM_CHECK( 12, 2 ) = 1201 PARAM_CHECK( 11, 2 ) = 11 PARAM_CHECK( 10, 2 ) = 905 PARAM_CHECK( 9, 2 ) = 904 PARAM_CHECK( 8, 2 ) = 903 PARAM_CHECK( 7, 2 ) = 901 PARAM_CHECK( 6, 2 ) = 8 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -DL( PART_OFFSET+ODD_SIZE+1 ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE, $ AF( 1 ), ODD_SIZE, B( PART_OFFSET+1 ), LLDB, $ CZERO, WORK( 1+INT_ONE-INT_ONE ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTBTRS( 'L', 'N', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTBTRS( 'L', 'C', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-INT_ONE ), INT_ONE, $ CONE, B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -DCONJG( DL( PART_OFFSET+ODD_SIZE+1 ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -DCONJG( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, CZERO, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTBTRS( 'U', 'C', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTBTRS( 'U', 'N', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ), $ NRHS, AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, $ WORK( 1+INT_ONE-INT_ONE ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DU( PART_OFFSET+2 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), $ B( PART_OFFSET+1 ), LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZDTTRSV * END scalapack-1.8.0/SRC/pcpttrf.f0000640000175000017500000010513510363532303015601 0ustar muammarmuammar SUBROUTINE PCPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX AF( * ), E( * ), WORK( * ) REAL D( * ) * .. * * * Purpose * ======= * * PCPTTRF computes a Cholesky factorization * of an N-by-N complex tridiagonal * symmetric positive definite distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PCPTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' D U or * * P A(1:N, JA:JA+N-1) P^T = L D L', * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPTTRF and this is stored in AF. If a linear system * is to be solved using PCPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, INT_TEMP, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CAXPY, CGEMM, CGERV2D, CGESD2D, CLACPY, $ CLATCPY, CPBTRF, CPOTRF, CSYRK, CTBTRS, CTRMM, $ CTRRV2D, CTRSD2D, CTRSM, CTRTRS, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 5*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -9 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 5*100 + 6 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 5*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PCPTTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPTTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (12*NPCOL + 3*NB) * IF( LAF .LT. LAF_MIN ) THEN INFO = -7 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PCPTTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -9 CALL PXERBLA( ICTXT, $ 'PCPTTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA(5) PARAM_CHECK( 6, 1 ) = DESCA(4) PARAM_CHECK( 5, 1 ) = DESCA(3) PARAM_CHECK( 4, 1 ) = DESCA(1) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 505 PARAM_CHECK( 6, 2 ) = 504 PARAM_CHECK( 5, 2 ) = 503 PARAM_CHECK( 4, 2 ) = 501 PARAM_CHECK( 3, 2 ) = 4 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 9 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL CTRSD2D( ICTXT, 'U', 'N', 1, 1, $ E( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {L_i}^C in each processor * Or A_i = {U_i}^C {U_i} if E is the upper superdiagonal * CALL CPTTRF( ODD_SIZE, D( PART_OFFSET+1 ), E( PART_OFFSET+1 ), $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * * Perform the triangular system solve {L_i}{{B'}_i}^C = {B_i}^C * by dividing B_i by diagonal element * E( PART_OFFSET+ODD_SIZE ) = E( PART_OFFSET+ODD_SIZE )/ $ D( PART_OFFSET+ODD_SIZE ) * * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^C * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 )- $ D( PART_OFFSET+ODD_SIZE )*REAL( E( PART_OFFSET+ODD_SIZE )* $ CONJG( E( PART_OFFSET+ODD_SIZE ) ) ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL CTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * CALL CPTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * Divide by D * DO 20 I=1, ODD_SIZE AF( I ) = AF( I )/D( PART_OFFSET+I ) 20 CONTINUE * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * * * Since there is no element-by-element vector multiplication in * the BLAS, this loop must be hardwired in without a BLAS call * INT_TEMP = ODD_SIZE*INT_ONE+2+1 AF( INT_TEMP ) = 0 * DO 30 I=1, ODD_SIZE AF( INT_TEMP ) = AF( INT_TEMP )-D( PART_OFFSET+I )* $ ( AF( I )*CONJG( AF( I ) ) ) 30 CONTINUE * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * AF( ODD_SIZE+1 ) = $ - D( PART_OFFSET+ODD_SIZE ) $ * CONJG( E( PART_OFFSET+ODD_SIZE ) $ * AF( ODD_SIZE ) ) * * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = $ CMPLX( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+AF( ODD_SIZE+3 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ) .EQ. CZERO ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 )/AF( ODD_SIZE+2 ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*AF( ODD_SIZE+1 )*AF( ODD_SIZE+2 ) $ *CONJG( AF( ODD_SIZE+1 ) ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = ( AF( ODD_SIZE+3 ) ) $ /AF( ODD_SIZE+2 ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 ) $ *CONJG( AF( ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ * AF( ODD_SIZE+2 ) $ * AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PCPTTRF * END scalapack-1.8.0/SRC/pclantr.f0000640000175000017500000011062710363532303015564 0ustar muammarmuammar REAL FUNCTION PCLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLANTR returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * trapezoidal or triangular distributed matrix sub( A ) denoting * A(IA:IA+M-1, JA:JA+N-1). * * PCLANTR returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with ia <= i <= ia+m-1, * ( and ja <= j <= ja+n-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PCLANTR as described * above. * * UPLO (global input) CHARACTER * Specifies whether the matrix sub( A ) is upper or lower * trapezoidal. * = 'U': Upper trapezoidal * = 'L': Lower trapezoidal * Note that sub( A ) is triangular instead of trapezoidal * if M = N. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) has * unit diagonal. * = 'N': Non-unit diagonal * = 'U': Unit diagonal * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PCLANTR is * set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PCLANTR is set to zero. N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL UDIAG INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, IOFFA, $ IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, MP, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASSQ, INFOG2L, PSTREECOMB, $ SCOMBSSQ, SGEBR2D, SGEBS2D, $ SGAMX2D, SGSUM2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL LSAME, ICEIL, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * IF( UDIAG ) THEN VALUE = ONE ELSE VALUE = ZERO END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 20 LL = JJ, JJ + JB -1 DO 10 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE DO 40 LL = JJ, JJ + JB -1 DO 30 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE END IF ELSE DO 60 LL = JJ, JJ + JB -1 DO 50 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 80 LL = JJ, JJ + JB -1 DO 70 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 70 CONTINUE IOFFA = IOFFA + LDA 80 CONTINUE ELSE DO 100 LL = JJ, JJ + JB -1 DO 90 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 90 CONTINUE IOFFA = IOFFA + LDA 100 CONTINUE END IF ELSE DO 120 LL = JJ, JJ + JB -1 DO 110 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 110 CONTINUE IOFFA = IOFFA + LDA 120 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 130 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 150 LL = JJ, JJ + JB -1 DO 140 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE ELSE DO 170 LL = JJ, JJ + JB -1 DO 160 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 160 CONTINUE IOFFA = IOFFA + LDA 170 CONTINUE END IF ELSE DO 190 LL = JJ, JJ + JB -1 DO 180 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 180 CONTINUE IOFFA = IOFFA + LDA 190 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 210 LL = JJ, JJ + JB -1 DO 200 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 200 CONTINUE IOFFA = IOFFA + LDA 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 DO 220 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE END IF ELSE DO 250 LL = JJ, JJ + JB -1 DO 240 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 260 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 280 LL = JJ, JJ + JB -1 SUM = ONE DO 270 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 270 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 SUM = ZERO DO 290 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 290 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 300 CONTINUE END IF ELSE DO 320 LL = JJ, JJ + JB -1 SUM = ZERO DO 310 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 310 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 320 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 390 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 340 LL = JJ, JJ + JB -1 SUM = ONE DO 330 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 330 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 340 CONTINUE ELSE DO 360 LL = JJ, JJ + JB -1 SUM = ZERO DO 350 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 360 CONTINUE END IF ELSE DO 380 LL = JJ, JJ + JB -1 SUM = ZERO DO 370 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 380 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 410 LL = JJ, JJ + JB -1 SUM = ONE DO 400 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 400 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 410 CONTINUE ELSE DO 430 LL = JJ, JJ + JB -1 SUM = ZERO DO 420 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 420 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 430 CONTINUE END IF ELSE DO 450 LL = JJ, JJ + JB -1 SUM = ZERO DO 440 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 440 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 450 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 520 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 470 LL = JJ, JJ + JB -1 SUM = ONE DO 460 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 460 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 470 CONTINUE ELSE DO 490 LL = JJ, JJ + JB -1 SUM = ZERO DO 480 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 480 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 490 CONTINUE END IF ELSE DO 510 LL = JJ, JJ + JB -1 SUM = ZERO DO 500 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 500 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 510 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 520 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * IF( LSAME( UPLO, 'U' ) ) THEN IF( UDIAG ) THEN DO 530 KK = IIA, IIA+MP-1 WORK( KK ) = ONE 530 CONTINUE ELSE DO 540 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 540 CONTINUE END IF ELSE IF( UDIAG ) THEN NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF DO 550 KK = IIA, IIA+NP-1 WORK( KK ) = ONE 550 CONTINUE DO 560 KK = IIA+NP, IIA+MP-1 WORK( KK ) = ZERO 560 CONTINUE ELSE DO 570 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 570 CONTINUE END IF END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 590 LL = JJ, JJ + JB -1 DO 580 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 580 CONTINUE IOFFA = IOFFA + LDA 590 CONTINUE ELSE DO 610 LL = JJ, JJ + JB -1 DO 600 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 600 CONTINUE IOFFA = IOFFA + LDA 610 CONTINUE END IF ELSE DO 630 LL = JJ, JJ + JB -1 DO 620 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 620 CONTINUE IOFFA = IOFFA + LDA 630 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 700 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 650 LL = JJ, JJ + JB -1 DO 640 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 640 CONTINUE IOFFA = IOFFA + LDA 650 CONTINUE ELSE DO 670 LL = JJ, JJ + JB -1 DO 660 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 660 CONTINUE IOFFA = IOFFA + LDA 670 CONTINUE END IF ELSE DO 690 LL = JJ, JJ + JB -1 DO 680 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 680 CONTINUE IOFFA = IOFFA + LDA 690 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 700 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 720 LL = JJ, JJ + JB -1 DO 710 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 710 CONTINUE IOFFA = IOFFA + LDA 720 CONTINUE ELSE DO 740 LL = JJ, JJ + JB -1 DO 730 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 730 CONTINUE IOFFA = IOFFA + LDA 740 CONTINUE END IF ELSE DO 760 LL = JJ, JJ + JB -1 DO 750 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 750 CONTINUE IOFFA = IOFFA + LDA 760 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 830 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 780 LL = JJ, JJ + JB -1 DO 770 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 770 CONTINUE IOFFA = IOFFA + LDA 780 CONTINUE ELSE DO 800 LL = JJ, JJ + JB -1 DO 790 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 790 CONTINUE IOFFA = IOFFA + LDA 800 CONTINUE END IF ELSE DO 820 LL = JJ, JJ + JB -1 DO 810 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 810 CONTINUE IOFFA = IOFFA + LDA 820 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 830 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( ISAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * IF( UDIAG ) THEN SCALE = ONE SUM = REAL( MIN( M, N ) ) / REAL( NPROW*NPCOL ) ELSE SCALE = ZERO SUM = ONE END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 840 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 840 CONTINUE ELSE DO 850 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 850 CONTINUE END IF ELSE DO 860 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 860 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 900 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 870 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 870 CONTINUE ELSE DO 880 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 880 CONTINUE END IF ELSE DO 890 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 890 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 900 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 910 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 910 CONTINUE ELSE DO 920 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 920 CONTINUE END IF ELSE DO 930 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 930 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 970 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 940 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 940 CONTINUE ELSE DO 950 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 950 CONTINUE END IF ELSE DO 960 LL = JJ, JJ + JB -1 CALL CLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 960 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 970 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to every process in the grid. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PCLANTR = VALUE * RETURN * * End of PCLANTR * END scalapack-1.8.0/SRC/sdbtf2.f0000640000175000017500000001214510363532303015301 0ustar muammarmuammar SUBROUTINE SDBTF2( M, N, KL, KU, AB, LDAB, INFO ) * * Modified by Andrew J. Cleary in November, 96 from: * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. REAL AB( LDAB, * ) * .. * * Purpose * ======= * * Sdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting with row interchanges. * * This is the unblocked version of the algorithm, calling Level 2 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) REAL array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine; elements marked * + need not be set on entry, but are required by the routine to store * elements of U, because of fill-in resulting from the row * interchanges. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER J, JP, JU, KM, KV * .. * .. External Functions .. INTEGER ISAMAX EXTERNAL ISAMAX * .. * .. External Subroutines .. EXTERNAL SGER, SSCAL, SSWAP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U, allowing for * fill-in. * KV = KU * * Test the input parameters. * INFO = 0 *ECA IF( M.LT.0 ) THEN *ECA INFO = -1 *ECA ELSE IF( N.LT.0 ) THEN *ECA INFO = -2 *ECA ELSE IF( KL.LT.0 ) THEN *ECA INFO = -3 *ECA ELSE IF( KU.LT.0 ) THEN *ECA INFO = -4 *ECA ELSE IF( LDAB.LT.KL+KV+1 ) THEN *ECA INFO = -6 *ECA END IF *ECA IF( INFO.NE.0 ) THEN *ECA CALL XERBLA( 'SDBTF2', -INFO ) *ECA RETURN *ECA END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Gaussian elimination without partial pivoting * * JU is the index of the last column affected by the current stage * of the factorization. * JU = 1 * DO 40 J = 1, MIN( M, N ) * * Test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-J ) JP = 1 IF( AB( KV+1, J ).NE.ZERO ) THEN JU = MAX( JU, MIN( J+KU, N ) ) * IF( KM.GT.0 ) THEN * * Compute multipliers. * CALL SSCAL( KM, ONE / AB( KU+1, J ), AB( KU+2, J ), 1 ) * * Update trailing submatrix within the band. * IF( JU.GT.J ) THEN CALL SGER( KM, JU-J, -ONE, AB( KU+2, J ), 1, $ AB( KU, J+1 ), LDAB-1, AB( KU+1, J+1 ), $ LDAB-1 ) END IF END IF ELSE * IF( INFO.EQ.0 ) $ INFO = J END IF 40 CONTINUE RETURN * * End of SDBTF2 * END scalapack-1.8.0/SRC/pslamr1d.f0000640000175000017500000001067110363532303015642 0ustar muammarmuammar SUBROUTINE PSLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Bugs * ==== * * I am not sure that this works correctly when IB and JB are not equal * to 1. Indeed, I suspect that IB should always be set to 1 or ignored * with 1 used in its place. * * PSLAMR1D has not been tested except withint the contect of * PSSYPTRD, the prototype reduction to tridiagonal form code. * * Purpose * * ======= * * PSLAMR1D redistributes a one-dimensional row vector from one data * decomposition to another. * * This is an auxiliary routine called by PSSYTRD to redistribute D, E * and TAU. * * Notes * ===== * * Although all processes call PSGEMR2D, only the processes that own * the first column of A send data and only processes that own the * first column of B receive data. The calls to SGEBS2D/SGEBR2D * spread the data down. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * A (local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JA+N-1)). * On output, A is replicated across all processes in * this processor column. * * IA (global input) INTEGER * A's global row index, which points to the beginning of * the submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JB+N-1)). * * IB (global input) INTEGER * B's global row index, NOT USED * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCBB( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSGEMR2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return if possible * IF( N.LE.0 ) $ RETURN * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) DESCBB( I ) = DESCB( I ) 10 CONTINUE * DESCAA( M_ ) = 1 DESCBB( M_ ) = 1 DESCAA( LLD_ ) = 1 DESCBB( LLD_ ) = 1 * ICTXT = DESCB( CTXT_ ) CALL PSGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'C', ' ', NQ, 1, B, NQ ) ELSE CALL SGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * RETURN * * End of PSLAMR1D * END scalapack-1.8.0/SRC/pdorgl2.f0000640000175000017500000002605610363532303015474 0ustar muammarmuammar SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGL2 generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k) . . . H(2) H(1) * * as returned by PDGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PDGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, J, KP, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 DOUBLE PRECISION TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLASET, PDSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia+k:ia+m-1 to rows of the unit matrix * CALL PDLASET( 'All', M-K, K, ZERO, ZERO, A, IA+K, JA, DESCA ) CALL PDLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, $ DESCA ) * END IF * TAUI = ZERO KP = NUMROC( IA+K-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+K-1, IA, -1 * * Apply H(i) to A(i:ia+m-1,ja+i-ia:ja+n-1) from the right * J = JA + I - IA II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, KP ) ) IF( J.LT.JA+N-1 ) THEN IF( I.LT.IA+M-1 ) THEN CALL PDELSET( A, I, J, DESCA, ONE ) CALL PDLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PDSCAL( N-J+JA-1, -TAUI, A, I, J+1, DESCA, $ DESCA( M_ ) ) END IF CALL PDELSET( A, I, J, DESCA, ONE-TAUI ) * * Set A(i,ja:j-1) to zero * CALL PDLASET( 'All', 1, J-JA, ZERO, ZERO, A, I, JA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGL2 * END scalapack-1.8.0/SRC/pzhetd2.f0000640000175000017500000004313710363532303015502 0ustar muammarmuammar SUBROUTINE PZHETD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZHETD2 reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*N. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J, $ JJ, JK, JN, LDA, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW COMPLEX*16 ALPHA, TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, ZAXPY, ZGEBR2D, ZGEBS2D, $ ZHEMV, ZHER2, ZLARFG * .. * .. External Functions .. LOGICAL LSAME COMPLEX*16 ZDOTC EXTERNAL LSAME, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) LWMIN = 3 * N * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHETD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( UPPER ) THEN * * Process(IAROW, IACOL) owns block to be reduced * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the upper triangle of sub( A ) * IK = II+N-1+(JJ+N-2)*LDA A( IK ) = DBLE( A( IK ) ) DO 10 J = N-1, 1, -1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA:IA+J-1,JA:JA+J-1) * ALPHA = A( IK+JK*LDA ) CALL ZLARFG( J, ALPHA, A( II+JK*LDA ), 1, TAUI ) E( JK+1 ) = DBLE( ALPHA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA:IA+J-1,JA:JA+J-1) * A( IK+JK*LDA ) = ONE * * Compute x := tau * A * v storing x in TAU(1:i) * CALL ZHEMV( UPLO, J, TAUI, A( II+(JJ-1)*LDA ), $ LDA, A( II+JK*LDA ), 1, ZERO, $ TAU( JJ ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*ZDOTC( J, TAU( JJ ), 1, $ A( II+JK*LDA ), 1 ) CALL ZAXPY( J, ALPHA, A( II+JK*LDA ), 1, $ TAU( JJ ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL ZHER2( UPLO, J, -ONE, A( II+JK*LDA ), 1, $ TAU( JJ ), 1, A( II+(JJ-1)*LDA ), $ LDA ) END IF * * Copy D, E, TAU to broadcast them columnwise. * A( IK+JK*LDA ) = DCMPLX( E( JK+1 ) ) D( JK+1 ) = DBLE( A( IK+1+JK*LDA ) ) WORK( J+1 ) = DCMPLX( D( JK+1 ) ) WORK( N+J+1 ) = DCMPLX( E( JK+1 ) ) TAU( JK+1 ) = TAUI WORK( 2*N+J+1 ) = TAU( JK+1 ) * 10 CONTINUE D( JJ ) = DBLE( A( II+(JJ-1)*LDA ) ) WORK( 1 ) = DCMPLX( D( JJ ) ) WORK( N+1 ) = ZERO WORK( 2*N+1 ) = ZERO * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1 ) * ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1, $ IAROW, IACOL ) DO 20 J = 2, N JN = JJ + J - 1 D( JN ) = DBLE( WORK( J ) ) E( JN ) = DBLE( WORK( N+J ) ) TAU( JN ) = WORK( 2*N+J ) 20 CONTINUE D( JJ ) = DBLE( WORK( 1 ) ) END IF END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the lower triangle of sub( A ) * A( II+(JJ-1)*LDA ) = DBLE( A( II+(JJ-1)*LDA ) ) DO 30 J = 1, N - 1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1) * ALPHA = A( IK+1+(JK-1)*LDA ) CALL ZLARFG( N-J, ALPHA, A( IK+2+(JK-1)*LDA ), 1, $ TAUI ) E( JK ) = DBLE( ALPHA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1) * A( IK+1+(JK-1)*LDA ) = ONE * * Compute x := tau * A * v storing y in TAU(i:n-1) * CALL ZHEMV( UPLO, N-J, TAUI, A( IK+1+JK*LDA ), $ LDA, A( IK+1+(JK-1)*LDA ), 1, $ ZERO, TAU( JK ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*ZDOTC( N-J, TAU( JK ), 1, $ A( IK+1+(JK-1)*LDA ), 1 ) CALL ZAXPY( N-J, ALPHA, A( IK+1+(JK-1)*LDA ), $ 1, TAU( JK ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL ZHER2( UPLO, N-J, -ONE, $ A( IK+1+(JK-1)*LDA ), 1, $ TAU( JK ), 1, A( IK+1+JK*LDA ), $ LDA ) END IF * * Copy D(JK), E(JK), TAU(JK) to broadcast them * columnwise. * A( IK+1+(JK-1)*LDA ) = DCMPLX( E( JK ) ) D( JK ) = DBLE( A( IK+(JK-1)*LDA ) ) WORK( J ) = DCMPLX( D( JK ) ) WORK( N+J ) = DCMPLX( E( JK ) ) TAU( JK ) = TAUI WORK( 2*N+J ) = TAU( JK ) 30 CONTINUE JN = JJ + N - 1 D( JN ) = DBLE( A( II+N-1+(JN-1)*LDA ) ) WORK( N ) = DCMPLX( D( JN ) ) TAU( JN ) = ZERO WORK( 2*N ) = ZERO * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1 ) * ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1, IAROW, IACOL ) DO 40 J = 1, N - 1 JN = JJ + J - 1 D( JN ) = DBLE( WORK( J ) ) E( JN ) = DBLE( WORK( N+J ) ) TAU( JN ) = WORK( 2*N+J ) 40 CONTINUE JN = JJ + N - 1 D( JN ) = DBLE( WORK( N ) ) TAU( JN ) = ZERO END IF END IF END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZHETD2 * END scalapack-1.8.0/SRC/pdlacp2.f0000640000175000017500000003747710363532303015461 0ustar muammarmuammar SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDLACP2 copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PDLACP2 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * PDLACP2 requires that only dimension of the matrix operands is * distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW, $ ICOFFA, IIA, IIAA, IIB, IIBB, IIBEGA, IIBEGB, $ IIENDA, IINXTA, IINXTB, ILEFT, IRIGHT, IROFFA, $ ITOP, JJA, JJAA, JJB, JJBB, JJBEGA, JJBEGB, $ JJENDA, JJNXTA, JJNXTB, LDA, LDB, MBA, MP, $ MPAA, MYCOL, MYDIST, MYROW, NBA, NPCOL, NPROW, $ NQ, NQAA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DLACPY, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) LDB = DESCB( LLD_ ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MBA = DESCA( MB_ ) * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MP.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEGA = IIA IIENDA = IIA + MP - 1 IINXTA = MIN( ICEIL( IIBEGA, MBA ) * MBA, IIENDA ) IIBEGB = IIB IINXTB = IIBEGB + IINXTA - IIBEGA * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL DLACPY( UPLO, IINXTA-IIBEGA+1, N-ITOP, $ A( IIBEGA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBEGB+(JJB+ITOP-1)*LDB ), LDB ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEGA = IINXTA + 1 IINXTA = MIN( IINXTA+MBA, IIENDA ) IIBEGB = IINXTB + 1 IINXTB = IIBEGB + IINXTA - IIBEGA GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * MPAA = MP IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJAA.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL DLACPY( 'All', MPAA, ITOP-JJAA+JJA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL DLACPY( UPLO, MPAA, HEIGHT, $ A( IIAA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBB+(JJB+ITOP-1)*LDB ), LDB ) MPAA = MAX( 0, MPAA - HEIGHT ) IIAA = IIAA + HEIGHT JJAA = JJA + IBASE IIBB = IIBB + HEIGHT JJBB = JJB + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * CALL DLACPY( 'All', MP, N, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQ.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEGA = JJA JJENDA = JJA + NQ - 1 JJNXTA = MIN( ICEIL( JJBEGA, NBA ) * NBA, JJENDA ) JJBEGB = JJB JJNXTB = JJBEGB + JJNXTA - JJBEGA * 30 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL DLACPY( UPLO, M-ILEFT, JJNXTA-JJBEGA+1, $ A( IIA+ILEFT+(JJBEGA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBEGB-1)*LDB ), LDB ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEGA = JJNXTA +1 JJNXTA = MIN( JJNXTA+NBA, JJENDA ) JJBEGB = JJNXTB +1 JJNXTB = JJBEGB + JJNXTA - JJBEGA GO TO 30 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * NQAA = NQ IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 40 CONTINUE IF( IIAA.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL DLACPY( 'All', ILEFT-IIAA+IIA, NQAA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL DLACPY( UPLO, WIDE, NQAA, $ A( IIA+ILEFT+(JJAA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBB-1)*LDB ), LDB ) NQAA = MAX( 0, NQAA - WIDE ) IIAA = IIA + IRIGHT JJAA = JJAA + WIDE IIBB = IIB + IRIGHT JJBB = JJBB + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 40 END IF * ELSE * CALL DLACPY( 'All', M, NQ, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * END IF * RETURN * * End of PDLACP2 * END scalapack-1.8.0/SRC/pslatrz.f0000640000175000017500000002156010363532303015615 0ustar muammarmuammar SUBROUTINE PSLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSLATRZ reduces the M-by-N ( M<=N ) real upper trapezoidal matrix * sub( A ) = [ A(IA:IA+M-1,JA:JA+M-1) A(IA:IA+M-1,JA+N-L:JA+N-1) ] to * upper triangular form by means of orthogonal transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N orthogonal matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. L > 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements N-L+1 to N of the first M rows * of sub( A ), with the array TAU, represent the orthogonal * matrix Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), which is used to introduce zeros into * the (m - k + 1)th row of sub( A ), is given in the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, ICTXT, II, J, J1, MP, MYCOL, MYROW, $ NPCOL, NPROW REAL AII * .. * .. External Subroutines .. EXTERNAL INFOG1L, PSELSET, PSLARFG, PSLARZ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, IAROW ) DO 10 I = II, MP TAU( I ) = ZERO 10 CONTINUE * ELSE * J1 = JA + N - L DO 20 I = IA+M-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * [ A(i, j) A(i,j1:ja+n-1) ] * CALL PSLARFG( L+1, AII, I, J, A, I, J1, DESCA, DESCA( M_ ), $ TAU ) * * Apply H(i) to A(ia:i-1,j:ja+n-1) from the right * CALL PSLARZ( 'Right', I-IA, JA+N-J, L, A, I, J1, DESCA, $ DESCA( M_ ), TAU, A, IA, J, DESCA, WORK ) CALL PSELSET( A, I, J, DESCA, AII ) * 20 CONTINUE * END IF * RETURN * * End of PSLATRZ * END scalapack-1.8.0/SRC/pslacpy.f0000640000175000017500000002235410363532303015573 0ustar muammarmuammar SUBROUTINE PSLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSLACPY copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PSLACPY * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) REAL pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBB, IBLK, IN, ITMP, J, JAA, JBB, $ JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PSLACP2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PSLACP2( UPLO, M, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN CALL PSLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB + ITMP JBB = JB + ITMP JAA = JA + ITMP CALL PSLACP2( UPLO, IBLK, N-ITMP, A, I, JAA, DESCA, $ B, IBB, JBB, DESCB ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN CALL PSLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IBB = IB + JTMP JBB = JB + JTMP IAA = IA + JTMP CALL PSLACP2( UPLO, M-JTMP, JBLK, A, IAA, J, DESCA, $ B, IBB, JBB, DESCB ) 20 CONTINUE ELSE IF( M.LE.N ) THEN CALL PSLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB+ITMP CALL PSLACP2( UPLO, IBLK, N, A, I, JA, DESCA, $ B, IBB, JB, DESCB ) 30 CONTINUE ELSE CALL PSLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) JBB = JB+JTMP CALL PSLACP2( UPLO, M, JBLK, A, IA, J, DESCA, $ B, IB, JBB, DESCB ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PSLACPY * END scalapack-1.8.0/SRC/pdgeqlf.f0000640000175000017500000002731410363532303015543 0ustar muammarmuammar SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQLF computes a QL factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, IINFO, ICTXT, IPW, J, JB, JL, JN, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDGEQL2, $ PDLARFB, PDLARFT, PB_TOPGET, PB_TOPSET, $ PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQLF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( (JA+N-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( JL.GE.JN+1 ) THEN * * Use blocked code initially * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Compute the QL factorization of the current block * A(ia:ia+m-n+j+jb-ja-1,j:j+jb-1) * CALL PDGEQL2( M-N+J+JB-JA, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( J.GT.JA ) THEN * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PDLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the * left * CALL PDLARFB( 'Left', 'Transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = M - N + JN - JA + 1 NU = JN - JA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PDGEQL2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEQLF * END scalapack-1.8.0/SRC/pclarzc.f0000640000175000017500000010242110363532303015550 0ustar muammarmuammar SUBROUTINE PCLARZC( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARZC applies a complex elementary reflector Q**H to a * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), * from either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PCTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q**H * sub( C ), * = 'R': form sub( C ) * Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or * sub( C ) * Q**H if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST COMPLEX TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CAXPY, CCOPY, CGEBR2D, $ CGEBS2D, CGEMV, CGERC, CGERV2D, $ CGESD2D, CGSUM2D, CLASET, INFOG2L, $ PB_TOPGET, PBCTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = CONJG( TAU( JJV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, V( IOFFV ), $ 1, ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL CGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = CONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * IPW = MPV+1 CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = CONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = CONJG( TAU( IIV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = CONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * IPW = NQV+1 CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = CONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PCLARZC * END scalapack-1.8.0/SRC/pspbtrs.f0000640000175000017500000006314510363532303015620 0ustar muammarmuammar SUBROUTINE PSPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PSPBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded symmetric positive definite distributed * matrix with bandwidth BW. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PSPBTRF. * * Routine PSPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPBTRF and this is stored in AF. If a linear system * is to be solved using PSPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PSPBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 7*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 10*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 10*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 10*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 10*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 7*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 7*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 7*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 10*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 10*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -6 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 7*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PSPBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PSPBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( BW*NRHS ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PSPBTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB( 5 ) PARAM_CHECK( 15, 1 ) = DESCB( 4 ) PARAM_CHECK( 14, 1 ) = DESCB( 3 ) PARAM_CHECK( 13, 1 ) = DESCB( 2 ) PARAM_CHECK( 12, 1 ) = DESCB( 1 ) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA( 5 ) PARAM_CHECK( 9, 1 ) = DESCA( 4 ) PARAM_CHECK( 8, 1 ) = DESCA( 3 ) PARAM_CHECK( 7, 1 ) = DESCA( 1 ) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PSPBTRSV( 'L', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PSPBTRSV( 'U', 'T', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PSPBTRSV( 'L', 'T', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PSPBTRSV( 'U', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSPBTRS * END scalapack-1.8.0/SRC/psdttrsv.f0000640000175000017500000014170610363532303016014 0ustar muammarmuammar SUBROUTINE PSDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PSDTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Gaussian elimination code PS@(dom_pre)TTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PSDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) REAL pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDTTRF and this is stored in AF. If a linear system * is to be solved using PSDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE, SAXPY, SDTTRSV, $ SGEMM, SGERV2D, SGESD2D, SMATADD, STBTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 9*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 12*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 12*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 12*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 12*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 9*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 12*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 12*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -4 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -8 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 9*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PSDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PSDTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PSDTTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB( 5 ) PARAM_CHECK( 15, 1 ) = DESCB( 4 ) PARAM_CHECK( 14, 1 ) = DESCB( 3 ) PARAM_CHECK( 13, 1 ) = DESCB( 2 ) PARAM_CHECK( 12, 1 ) = DESCB( 1 ) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA( 5 ) PARAM_CHECK( 9, 1 ) = DESCA( 4 ) PARAM_CHECK( 8, 1 ) = DESCA( 3 ) PARAM_CHECK( 7, 1 ) = DESCA( 1 ) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1205 PARAM_CHECK( 15, 2 ) = 1204 PARAM_CHECK( 14, 2 ) = 1203 PARAM_CHECK( 13, 2 ) = 1202 PARAM_CHECK( 12, 2 ) = 1201 PARAM_CHECK( 11, 2 ) = 11 PARAM_CHECK( 10, 2 ) = 905 PARAM_CHECK( 9, 2 ) = 904 PARAM_CHECK( 8, 2 ) = 903 PARAM_CHECK( 7, 2 ) = 901 PARAM_CHECK( 6, 2 ) = 8 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 180 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL SDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -DL( PART_OFFSET+ODD_SIZE+1 ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, ODD_SIZE, -ONE, $ AF( 1 ), ODD_SIZE, B( PART_OFFSET+1 ), LLDB, $ ZERO, WORK( 1+INT_ONE-INT_ONE ), INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STBTRS( 'L', 'N', 'U', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STBTRS( 'L', 'T', 'U', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -ONE, $ AF( 1 ), ODD_SIZE, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE, ONE, B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -( DL( PART_OFFSET+ODD_SIZE+1 ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL SDTTRSV( UPLO, 'T', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL SDTTRSV( UPLO, 'T', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, ODD_SIZE, -ONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, ZERO, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 110 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 90 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 100 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 90 100 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STBTRS( 'U', 'T', 'N', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 110 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 160 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 120 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 130 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 120 130 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STBTRS( 'U', 'N', 'N', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 140 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 140 150 CONTINUE * [End of GOTO Loop] * 160 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+1 ), ODD_SIZE, $ WORK( 1+INT_ONE-INT_ONE ), INT_ONE, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL SDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, $ DU( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 170 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 180 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSDTTRSV * END scalapack-1.8.0/SRC/psgelqf.f0000640000175000017500000002741010363532303015557 0ustar muammarmuammar SUBROUTINE PSGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGELQF computes a LQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1) H(ia+k-2) . . . H(ia), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1)=0 * and v(i) = 1; v(i+1:n) is stored on exit in A(ia+i-1,ja+i:ja+n-1), * and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ IROFF, J, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSGELQ2, $ PSLARFB, PSLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGELQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Handle the first block of rows separately * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IB = IN - IA + 1 * * Compute the LQ factorization of the first block A(ia:in:ja:ja+n-1) * CALL PSGELQ2( IB, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( IA+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ia) H(ia+1) . . . H(in) * CALL PSLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PSLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-IB, N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, $ JA, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of rows * DO 10 I = IN+1, IA+K-1, DESCA( MB_ ) IB = MIN( K-I+IA, DESCA( MB_ ) ) J = JA + I - IA * * Compute the LQ factorization of the current block * A(i:i+ib-1:j:ja+n-1) * CALL PSGELQ2( IB, N-I+IA, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PSLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PSLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-J+JA, IB, A, I, J, DESCA, WORK, $ A, I+IB, J, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGELQF * END scalapack-1.8.0/SRC/psorml2.f0000640000175000017500000003514410363532303015517 0ustar muammarmuammar SUBROUTINE PSORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORML2 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PSGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JCC, LCM, LCMP, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSELSET2, PSLARF, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K -1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(i:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * CALL PSELSET2( AII, A, I, JA+I-IA, DESCA, ONE ) CALL PSLARF( SIDE, MI, NI, A, I, JA+I-IA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) CALL PSELSET( A, I, JA+I-IA, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORML2 * END scalapack-1.8.0/SRC/psgelq2.f0000640000175000017500000002453510363532303015500 0ustar muammarmuammar SUBROUTINE PSGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGELQ2 computes a LQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1) H(ia+k-2) . . . H(ia), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1)=0 * and v(i) = 1; v(i+1:n) is stored on exit in A(ia+i-1,ja+i:ja+n-1), * and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLARFG, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * K = MIN( M, N ) DO 10 I = IA, IA+K-1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i,j+1:ja+n-1) * CALL PSLARFG( N-J+JA, AII, I, J, A, I, MIN( J+1, JA+N-1 ), $ DESCA, DESCA( M_ ), TAU ) * IF( I.LT.IA+M-1 ) THEN * * Apply H(i) to A(i+1:ia+m-1,j:ja+n-1) from the right * CALL PSELSET( A, I, J, DESCA, ONE ) CALL PSLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PSELSET( A, I, J, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGELQ2 * END scalapack-1.8.0/SRC/pzlatrd.f0000640000175000017500000004427510363532303015606 0ustar muammarmuammar SUBROUTINE PZLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCW( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAU( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PZLATRD reduces NB rows and columns of a complex Hermitian * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) to complex * tridiagonal form by an unitary similarity transformation * Q' * sub( A ) * Q, and returns the matrices V and W which are * needed to apply the transformation to the unreduced part of sub( A ). * * If UPLO = 'U', PZLATRD reduces the last NB rows and columns of a * matrix, of which the upper triangle is supplied; * if UPLO = 'L', PZLATRD reduces the first NB rows and columns of a * matrix, of which the lower triangle is supplied. * * This is an auxiliary routine called by PZHETRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of rows and columns to be reduced. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. * On exit, if UPLO = 'U', the last NB columns have been reduced * to tridiagonal form, with the diagonal elements overwriting * the diagonal elements of sub( A ); the elements above the * diagonal with the array TAU, represent the unitary matrix Q * as a product of elementary reflectors. If UPLO = 'L', the * first NB columns have been reduced to tridiagonal form, with * the diagonal elements overwriting the diagonal elements of * sub( A ); the elements below the diagonal with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors; See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * W (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_W,NB_W), This array contains * the local pieces of the N-by-NB_W matrix W required to * update the unreduced part of sub( A ). * * IW (global input) INTEGER * The row index in the global array W indicating the first * row of sub( W ). * * JW (global input) INTEGER * The column index in the global array W indicating the * first column of sub( W ). * * DESCW (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix W. * * WORK (local workspace) COMPLEX*16 array, dimension (NB_A) * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n) H(n-1) . . . H(n-nb+1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the N-by-NB matrix V * which is needed, with W, to apply the transformation to the unreduced * part of the matrix, using a Hermitian rank-2k update of the form: * sub( A ) := sub( A ) - V*W' - W*V'. * * The contents of A on exit are illustrated by the following examples * with n = 5 and nb = 2: * * if UPLO = 'U': if UPLO = 'L': * * ( a a a v4 v5 ) ( d ) * ( a a v4 v5 ) ( 1 d ) * ( a 1 v5 ) ( v1 1 a ) * ( d 1 ) ( v1 v2 a a ) * ( d ) ( v1 v2 a a a ) * * where d denotes a diagonal element of the reduced matrix, a denotes * an element of the original matrix that is unchanged, and vi denotes * an element of the vector defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, JP, JWK, K, $ KW, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX*16 AII, ALPHA, BETA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCWK( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, DGEBR2D, DGEBS2D, $ INFOG2L, PDELSET, PZAXPY, PZDOTC, $ PZELGET, PZELSET, PZGEMV, PZHEMV, $ PZLACGV, PZLARFG, PZSCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) AII = ZERO BETA = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * CALL INFOG2L( N+IA-NB, N+JA-NB, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce last NB columns of upper triangle * DO 10 J = JA+N-1, JA+N-NB, -1 I = IA + J - JA K = J - JA + 1 KW = MOD( K-1, DESCA( MB_ ) ) + 1 * * Update A(IA:I,I) * CALL PZELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( AII ) ) ) CALL PZLACGV( N-K, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ) ) CALL PZGEMV( 'No transpose', K, N-K, -ONE, A, IA, J+1, $ DESCA, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ), $ ONE, A, IA, J, DESCA, 1 ) CALL PZLACGV( N-K, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ) ) CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZGEMV( 'No transpose', K, N-K, -ONE, W, IW, JW+KW, $ DESCW, A, I, J+1, DESCA, DESCA( M_ ), ONE, A, $ IA, J, DESCA, 1 ) CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( AII ) ) ) IF( N-K.GT.0 ) $ CALL PZELSET( A, I, J+1, DESCA, DCMPLX( E( JP ) ) ) * * Generate elementary reflector H(i) to annihilate * A(IA:I-2,I) * JP = MIN( JJ+KW-1, NQ ) CALL PZLARFG( K-1, BETA, I-1, J, A, IA, J, DESCA, 1, $ TAU ) CALL PDELSET( E, 1, J, DESCE, DBLE( BETA ) ) CALL PZELSET( A, I-1, J, DESCA, ONE ) * * Compute W(IW:IW+K-2,JW+KW-1) * CALL PZHEMV( 'Upper', K-1, ONE, A, IA, JA, DESCA, A, IA, J, $ DESCA, 1, ZERO, W, IW, JW+KW-1, DESCW, 1 ) * JWK = MOD( K-1, DESCWK( NB_ ) ) + 2 CALL PZGEMV( 'Conjugate transpose', K-1, N-K, ONE, W, IW, $ JW+KW, DESCW, A, IA, J, DESCA, 1, ZERO, WORK, $ 1, JWK, DESCWK, DESCWK( M_ ) ) CALL PZGEMV( 'No transpose', K-1, N-K, -ONE, A, IA, J+1, $ DESCA, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K, ONE, A, IA, $ J+1, DESCA, A, IA, J, DESCA, 1, ZERO, WORK, 1, $ JWK, DESCWK, DESCWK( M_ ) ) CALL PZGEMV( 'No transpose', K-1, N-K, -ONE, W, IW, JW+KW, $ DESCW, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PZSCAL( K-1, TAU( JP ), W, IW, JW+KW-1, DESCW, 1 ) * CALL PZDOTC( K-1, ALPHA, W, IW, JW+KW-1, DESCW, 1, A, IA, J, $ DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PZAXPY( K-1, ALPHA, A, IA, J, DESCA, 1, W, IW, JW+KW-1, $ DESCW, 1 ) CALL PZELGET( 'E', ' ', BETA, A, I, J, DESCA ) CALL PDELSET( D, 1, J, DESCD, DBLE( BETA ) ) * 10 CONTINUE * ELSE * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce first NB columns of lower triangle * DO 20 J = JA, JA+NB-1 I = IA + J - JA K = J - JA + 1 * * Update A(J:JA+N-1,J) * CALL PZELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( AII ) ) ) CALL PZLACGV( K-1, W, IW+K-1, JW, DESCW, DESCW( M_ ) ) CALL PZGEMV( 'No transpose', N-K+1, K-1, -ONE, A, I, JA, $ DESCA, W, IW+K-1, JW, DESCW, DESCW( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PZLACGV( K-1, W, IW+K-1, JW, DESCW, DESCW( M_ ) ) CALL PZLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZGEMV( 'No transpose', N-K+1, K-1, -ONE, W, IW+K-1, $ JW, DESCW, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PZLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( AII ) ) ) IF( K.GT.1 ) $ CALL PZELSET( A, I, J-1, DESCA, DCMPLX( E( JP ) ) ) * * * Generate elementary reflector H(i) to annihilate * A(I+2:IA+N-1,I) * JP = MIN( JJ+K-1, NQ ) CALL PZLARFG( N-K, BETA, I+1, J, A, I+2, J, DESCA, 1, $ TAU ) CALL PDELSET( E, 1, J, DESCE, DBLE( BETA ) ) CALL PZELSET( A, I+1, J, DESCA, ONE ) * * Compute W(IW+K:IW+N-1,JW+K-1) * CALL PZHEMV( 'Lower', N-K, ONE, A, I+1, J+1, DESCA, A, I+1, $ J, DESCA, 1, ZERO, W, IW+K, JW+K-1, DESCW, 1 ) * CALL PZGEMV( 'Conjugate Transpose', N-K, K-1, ONE, W, IW+K, $ JW, DESCW, A, I+1, J, DESCA, 1, ZERO, WORK, 1, $ 1, DESCWK, DESCWK( M_ ) ) CALL PZGEMV( 'No transpose', N-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', N-K, K-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, $ 1, DESCWK, DESCWK( M_ ) ) CALL PZGEMV( 'No transpose', N-K, K-1, -ONE, W, IW+K, JW, $ DESCW, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PZSCAL( N-K, TAU( JP ), W, IW+K, JW+K-1, DESCW, 1 ) CALL PZDOTC( N-K, ALPHA, W, IW+K, JW+K-1, DESCW, 1, A, I+1, $ J, DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PZAXPY( N-K, ALPHA, A, I+1, J, DESCA, 1, W, IW+K, $ JW+K-1, DESCW, 1 ) CALL PZELGET( 'E', ' ', BETA, A, I, J, DESCA ) CALL PDELSET( D, 1, J, DESCD, DBLE( BETA ) ) * 20 CONTINUE * END IF * * Broadcast columnwise the diagonal elements into D. * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1, $ IAROW, MYCOL ) END IF END IF * RETURN * * End of PZLATRD * END scalapack-1.8.0/SRC/pctrtri.f0000640000175000017500000003070110363532303015602 0ustar muammarmuammar SUBROUTINE PCTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCTRTRI computes the inverse of a upper or lower triangular * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the distributed matrix sub( A ) is upper * or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) * is unit triangular: * = 'N': Non-unit triangular, * = 'U': Unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * triangular matrix sub( A ). If UPLO = 'U', the leading * N-by-N upper triangular part of the matrix sub( A ) contains * the upper triangular matrix to be inverted, and the strictly * lower triangular part of sub( A ) is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * the matrix sub( A ) contains the lower triangular matrix, * and the strictly upper triangular part of sub( A ) is not * referenced. * On exit, the (triangular) inverse of the original matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, A(IA+K-1,JA+K-1) is exactly zero. The * triangular matrix sub( A ) is singular and its * inverse can not be computed. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER I, ICOFF, ICTXT, IROFF, ICURCOL, ICURROW, $ IDUMMY, II, IOFFA, J, JB, JJ, JN, LDA, MYCOL, $ MYROW, NN, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK1MAT, PCTRTI2, PCTRMM, PCTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) * CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOUNIT ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 * CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTRTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) * * Handle first block separately * JB = JN-JA+1 LDA = DESCA( LLD_ ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 10 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 IOFFA = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 20 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 IOFFA = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Use blocked code * IF( UPPER ) THEN * * Compute inverse of upper triangular matrix * JB = JN-JA+1 * * Handle first block of column separately * CALL PCTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * * Loop over remaining block of columns * DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Compute rows 1:j-1 of current block column * CALL PCTRMM( 'Left', UPLO, 'No transpose', DIAG, J-JA, JB, $ ONE, A, IA, JA, DESCA, A, IA, J, DESCA ) CALL PCTRSM( 'Right', UPLO, 'No transpose', DIAG, J-JA, $ JB, -ONE, A, I, J, DESCA, A, IA, J, DESCA ) * * Compute inverse of current diagonal block * CALL PCTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 40 CONTINUE * ELSE * * Compute inverse of lower triangular matrix * NN = ( ( JA+N-2 ) / DESCA( NB_ ) )*DESCA( NB_ ) + 1 DO 50 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA IF( J+JB.LE.JA+N-1 ) THEN * * Compute rows j+jb:ja+n-1 of current block column * CALL PCTRMM( 'Left', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, ONE, A, I+JB, J+JB, DESCA, $ A, I+JB, J, DESCA ) CALL PCTRSM( 'Right', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, -ONE, A, I, J, DESCA, $ A, I+JB, J, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PCTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 50 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 IF( JA+JB.LE.JA+N-1 ) THEN * * Compute rows ja+jb:ja+n-1 of current block column * CALL PCTRMM( 'Left', UPLO, 'No transpose', DIAG, N-JB, JB, $ ONE, A, IA+JB, JA+JB, DESCA, A, IA+JB, JA, $ DESCA ) CALL PCTRSM( 'Right', UPLO, 'No transpose', DIAG, N-JB, JB, $ -ONE, A, IA, JA, DESCA, A, IA+JB, JA, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PCTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * END IF * RETURN * * End PCTRTRI * END scalapack-1.8.0/SRC/pspbsv.f0000640000175000017500000004503410363532303015435 0ustar muammarmuammar SUBROUTINE PSPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded symmetric positive definite distributed * matrix with bandwidth BW. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PSPBTRF and PSPBTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+2*bw)*bw * +max((bw*NRHS), bw*bw) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PSPBTRF, PSPBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PSPBTRF and PSPBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PSPBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+2*BW)*BW * * Factor the matrix * CALL PSPBTRF( UPLO, N, BW, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PSPBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PSPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPBSV', -INFO ) RETURN END IF * RETURN * * End of PSPBSV * END scalapack-1.8.0/SRC/pzporfs.f0000640000175000017500000010141410363532303015616 0ustar muammarmuammar SUBROUTINE PZPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, $ LRWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ), X( * ) DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) * .. * * Purpose * ======= * * PZPORFS improves the computed solution to a system of linear * equations when the coefficient matrix is Hermitian positive definite * and provides error bounds and backward error estimates for the * solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ). * This array contains the local pieces of the N-by-N Hermitian * distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * On entry, this array contains the factors L or U from the * Cholesky factorization sub( A ) = L*L**H or U**H*U, as * computed by PZPOTRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). On exit, it contains the * improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ZERO, RONE, TWO, THREE PARAMETER ( ZERO = 0.0D+0, RONE = 1.0D+0, TWO = 2.0D+0, $ THREE = 3.0D+0 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX*16 ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ INFOG2L, PCHK2MAT, PXERBLA, PZAHEMV, PZAXPY, $ PZCOPY, PZHEMV, PZPOTRS, PZLACON, $ ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, ICHAR, MAX, MIN, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 19, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2 * NPMOD LRWMIN = NPMOD WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -13 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1500 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1500 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1900 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -17 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1900 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -18 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1900 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -25 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 23 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 25 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 0, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, N, 2, NRHS, 3, $ IX, JX, DESCX, 19, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPORFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X) * CALL PZCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to * the i-th components of the numerator and denominator * before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PZAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J) is larger than machine epsilon, and * 2) BERR(J) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PZAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub( B ) - sub( A )*sub( X ). * CALL PZCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PZAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, $ 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PZAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + NZ*EPS*( * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X)) * +abs(sub(B))) is incremented by SAFE1 if the i-th component * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PZPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) * RETURN * * End of PZPORFS * END scalapack-1.8.0/SRC/strmvt.f0000640000175000017500000001112310363532303015447 0ustar muammarmuammar SUBROUTINE STRMVT( UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, $ INCZ ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER INCW, INCX, INCY, INCZ, LDT, N * .. * .. Array Arguments .. REAL T( LDT, * ), W( * ), X( * ), Y( * ), Z( * ) * .. * * Purpose * ======= * * STRMVT performs the matrix-vector operations * * x := T' *y, and w := T *z, * * where x is an n element vector and T is an n by n * upper or lower triangular matrix. * * Arguments * ========= * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix A. * N must be at least zero. * Unchanged on exit. * * T - REAL array of DIMENSION ( LDT, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array T must contain the upper * triangular matrix and the strictly lower triangular part of * T is not referenced. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array T must contain the lower * triangular matrix and the strictly upper triangular part of * T is not referenced. * * LDT - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least * max( 1, n ). * Unchanged on exit. * * X - REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). * On exit, X = T' * y * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * Unchanged on exit. * * Y - REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). * Before entry, the incremented array Y must contain the n * element vector y. Unchanged on exit. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * W - REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCW ) ). * On exit, W = T * z * * INCW - INTEGER. * On entry, INCW specifies the increment for the elements of * W. INCW must not be zero. * Unchanged on exit. * * Z - REAL array of dimension at least * ( 1 + ( n - 1 )*abs( INCZ ) ). * Before entry, the incremented array Z must contain the n * element vector z. Unchanged on exit. * * INCY - INTEGER. * On entrz, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * * Level 2 Blas routine. * * * .. Local Scalars .. INTEGER INFO * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL SCOPY, STRMV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( LDT.LT.MAX( 1, N ) ) THEN INFO = 4 ELSE IF( INCW.EQ.0 ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 10 ELSE IF( INCZ.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'STRMVT', INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * * IF( INCX.NE.1 .OR. INCY.NE.1 .OR. INCW.NE.1 .OR. INCZ.NE.1 .OR. $ .TRUE. ) THEN CALL SCOPY( N, Y, INCY, X, INCX ) CALL STRMV( UPLO, 'C', 'N', N, T, LDT, X, INCX ) CALL SCOPY( N, Z, INCZ, W, INCW ) CALL STRMV( UPLO, 'N', 'N', N, T, LDT, W, INCW ) RETURN END IF * RETURN * * End of STRMVT. * END scalapack-1.8.0/SRC/pdlarzt.f0000640000175000017500000002571710363532303015606 0ustar muammarmuammar SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) DOUBLE PRECISION TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PDLARZT forms the triangular factor T of a real block reflector * H of order > n, which is defined as a product of k elementary * reflectors as returned by PDTZRZF. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the array V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the array V, and * * H = I - V' * T * V * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise (not supported yet) * = 'R': rowwise * * N (global input) INTEGER * The number of meaningful entries of the block reflector H. * N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)). * The distributed matrix V contains the Householder vectors. * See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) DOUBLE PRECISION array, dimension (MB_V,MB_V) * It contains the k-by-k triangular factor of the block * reflector associated with V. T is lower triangular. * * WORK (local workspace) DOUBLE PRECISION array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * ______V_____ * ( v1 v2 v3 ) / \ * ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) * V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) * ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) * ( v1 v2 v3 ) * . . . * . . . * 1 . . * 1 . * 1 * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * ______V_____ * 1 / \ * . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) * . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) * . . . ( . . 1 . . v3 v3 v3 v3 v3 ) * . . . * ( v1 v2 v3 ) * ( v1 v2 v3 ) * V = ( v1 v2 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJV, LDV, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, DCOPY, DGEMV, $ DGSUM2D, DLASET, DTRMV, INFOG2L, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( MYROW.EQ.IVROW ) THEN IW = 1 ITMP0 = 0 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF * DO 10 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ.GT.0 ) THEN CALL DGEMV( 'No transpose', ITMP0, NQ, -TAU( II ), $ V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, WORK( IW ), $ 1 ) ELSE CALL DLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF IW = IW + ITMP0 * 10 CONTINUE * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 20 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL DCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', ITMP0, $ T( ITMP1+DESCV( MB_ ) ), DESCV( MB_ ), $ T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 20 CONTINUE * END IF * END IF * RETURN * * End of PDLARZT * END scalapack-1.8.0/SRC/pssyevx.f0000640000175000017500000011474610377154001015650 0ustar muammarmuammar SUBROUTINE PSSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, $ N, NZ REAL ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ), $ IFAIL( * ), IWORK( * ) REAL A( * ), GAP( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSYEVX computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. Eigenvalues/vectors can be selected by * specifying a range of values or a range of indices for the desired * eigenvalues. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * PSSYEVX assumes IEEE 754 standard compliant arithmetic. To port * to a system which does not have IEEE 754 arithmetic, modify * the appropriate SLmake.inc file to include the compiler switch * -DNO_IEEE. This switch only affects the compilation of pslaiect.c. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic REAL array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PSSYEVX cannot guarantee * correct error reporting. * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) REAL * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PSLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PSLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PSSYEVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PSSYEVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) REAL * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) REAL array, * dimension max(3,LWORK) * On return, WORK(1) contains the optimal amount of * workspace required for efficient execution. * if JOBZ='N' WORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' WORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LWORK (local input) INTEGER * Size of WORK * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LWORK is too small: * If LWORK is too small to guarantee orthogonality, * PSSYEVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-23 * is returned. Note that when RANGE='V', PSSYEVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LWORK is large enough to allow PSSYEVX to * compute the eigenvalues, PSSYEVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * Greater performance can be achieved if adequate workspace * is provided. On the other hand, in some situations, * performance can decrease as the workspace provided * increases above the workspace amount shown below: * * For optimal performance, greater workspace may be * needed, i.e. * LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT ) * Where: * LWORK, as defined previously, depends upon the number * of eigenvectors requested, and * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 3 ) * NPS * * ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L', * 0, 0, 0, 0) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * For large N, no extra workspace is needed, however the * biggest boost in performance comes for small N, so it * is wise to provide the extra workspace (typically less * than a Megabyte per process). * * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PSSTEIN will perform no better than SSTEIN on 1 * processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then * IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) REAL array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PSSYEVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to compute * eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Differences between PSSYEVX and SSYEVX * ====================================== * * A, LDA -> A, IA, JA, DESCA * Z, LDZ -> Z, IZ, JZ, DESCZ * WORKSPACE needs are larger for PSSYEVX. * LIWORK parameter added * * ORFAC, ICLUSTER() and GAP() parameters added * meaning of INFO is changed * * Functional differences: * PSSYEVX does not promise orthogonality for eigenvectors associated * with tighly clustered eigenvalues. * PSSYEVX does not reorthogonalize eigenvectors * that are on different processes. The extent of reorthogonalization * is controlled by the input parameter LWORK. * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, TEN, FIVE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0, $ FIVE = 5.0E+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN, $ VALEIG, WANTZ CHARACTER ORDER INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO, $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP, $ INDTAU, INDWORK, IROFFA, IROFFZ, ISCALE, $ ISIZESTEBZ, ISIZESTEIN, IZROW, LALLWORK, $ LIWMIN, LLWORK, LWMIN, LWOPT, MAXEIGS, MB_A, $ MQ0, MYCOL, MYROW, NB, NB_A, NEIG, NN, NNP, $ NP0, NPCOL, NPROCS, NPROW, NPS, NSPLIT, $ NSYTRD_LWOPT, NZZ, OFFSET, RSRC_A, RSRC_Z, $ SIZEORMTR, SIZESTEIN, SIZESYEVX, SQNPC REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, $ SIGMA, SMLNUM, VLL, VUU * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV REAL PSLAMCH, PSLANSY EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, $ PSLAMCH, PSLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT, $ PCHK2MAT, PSELGET, PSLARED1D, PSLASCL, PSORMTR, $ PSSTEBZ, PSSTEIN, PSSYNTRD, PXERBLA, SGEBR2D, $ SGEBS2D, SLASRT, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, REAL, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 800+CTXT_ ) ELSE IF( WANTZ ) THEN IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) EPS = PSLAMCH( ICTXT, 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL LOWER = LSAME( UPLO, 'L' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDWORK = INDE2 + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the IWORK array * ISIZESTEIN = 3*N + NPROCS + 1 ISIZESTEBZ = MAX( 4*N, 14, NPROCS ) INDIBL = ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 INDISP = INDIBL + N * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) $ LQUERY = .TRUE. * NNP = MAX( N, NPROCS+1, 4 ) LIWMIN = 6*NNP * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A NN = MAX( N, NB, 2 ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) NP0 = NUMROC( N+IROFFA, NB, 0, 0, NPROW ) MQ0 = NUMROC( N+ICOFFA, NB, 0, 0, NPCOL ) IF( WANTZ ) THEN RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) ELSE LWOPT = LWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD code * ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS LWOPT = MAX( LWOPT, 5*N+NSYTRD_LWOPT ) * END IF IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( VALEIG ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL SGEBS2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL SGEBR2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3, 0, 0 ) END IF IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -2 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -3 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -10 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -11 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -12 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -23 ELSE IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN INFO = -25 ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -9 ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -10 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) $ THEN INFO = -13 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -19 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -19 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2100+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2100+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2100+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2100+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2100+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2100+CSRC_ ) ELSE IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( ALLEIG ) THEN IDUM1( 3 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 3 ) = ICHAR( 'I' ) ELSE IDUM1( 3 ) = ICHAR( 'V' ) END IF IDUM2( 3 ) = 3 IF( LQUERY ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 4 IF( WANTZ ) THEN CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, $ JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 4, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = REAL( LWOPT ) IWORK( 1 ) = LIWMIN END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYEVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 ICLUSTR( 1 ) = 0 END IF M = 0 WORK( 1 ) = REAL( LWOPT ) IWORK( 1 ) = LIWMIN RETURN END IF * * Scale matrix to allowable range, if necessary. * ABSTLL = ABSTOL ISCALE = 0 IF( VALEIG ) THEN VLL = VL VUU = VU ELSE VLL = ZERO VUU = ZERO END IF * ANRM = PSLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ANRM = ANRM*SIGMA ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM ANRM = ANRM*SIGMA END IF * IF( ISCALE.EQ.1 ) THEN CALL PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) IF( ABSTOL.GT.0 ) $ ABSTLL = ABSTOL*SIGMA IF( VALEIG ) THEN VLL = VL*SIGMA VUU = VU*SIGMA IF( VUU.EQ.VLL ) THEN VUU = VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) END IF END IF END IF * * Call PSSYNTRD to reduce symmetric matrix to tridiagonal form. * LALLWORK = LLWORK * CALL PSSYNTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet work with arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), $ WORK( INDWORK ), LLWORK ) * CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDE ), WORK( INDE2 ), $ WORK( INDWORK ), LLWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PSELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, $ I+JA-1, DESCA ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, $ I+JA, DESCA ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, $ I+JA-1, DESCA ) 30 CONTINUE END IF END IF * * Call PSSTEBZ and, if eigenvectors are desired, PSSTEIN. * IF( WANTZ ) THEN ORDER = 'B' ELSE ORDER = 'E' END IF * CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, $ WORK( INDD2 ), WORK( INDE2+OFFSET ), M, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWORK ), $ LLWORK, IWORK( 1 ), ISIZESTEBZ, IINFO ) * * * IF PSSTEBZ fails, the error propogates to INFO, but * we do not propogate the eigenvalue(s) which failed because: * 1) This should never happen if the user specifies * ABSTOL = 2 * PSLAMCH( 'U' ) * 2) PSSTEIN will confirm/deny whether the eigenvalues are * close enough. * IF( IINFO.NE.0 ) THEN INFO = INFO + IERREBZ DO 40 I = 1, M IWORK( INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) 40 CONTINUE END IF IF( WANTZ ) THEN * IF( VALEIG ) THEN * * Compute the maximum number of eigenvalues that we can * compute in the * workspace that we have, and that we can store in Z. * * Loop through the possibilities looking for the largest * NZ that we can feed to PSSTEIN and PSORMTR * * Since all processes must end up with the same value * of NZ, we first compute the minimum of LALLWORK * CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LALLWORK, 1, 1, 1, -1, $ -1, -1 ) * MAXEIGS = DESCZ( N_ ) * DO 50 NZ = MIN( MAXEIGS, M ), 0, -1 MQ0 = NUMROC( NZ, NB, 0, 0, NPCOL ) SIZESTEIN = ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) SIZEORMTR = MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + $ NB*NB * SIZESYEVX = MAX( SIZESTEIN, SIZEORMTR ) IF( SIZESYEVX.LE.LALLWORK ) $ GO TO 60 50 CONTINUE 60 CONTINUE ELSE NZ = M END IF NZ = MAX( NZ, 0 ) IF( NZ.NE.M ) THEN INFO = INFO + IERRSPC * DO 70 I = 1, M IFAIL( I ) = 0 70 CONTINUE * * The following code handles a rare special case * - NZ .NE. M means that we don't have enough room to store * all the vectors. * - NSPLIT .GT. 1 means that the matrix split * In this case, we cannot simply take the first NZ eigenvalues * because PSSTEBZ sorts the eigenvalues by block when * a split occurs. So, we have to make another call to * PSSTEBZ with a new upper limit - VUU. * IF( NSPLIT.GT.1 ) THEN CALL SLASRT( 'I', M, W, IINFO ) NZZ = 0 IF( NZ.GT.0 ) THEN * VUU = W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) IF( VLL.GE.VUU ) THEN NZZ = 0 ELSE CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, $ IU, ABSTLL, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), NZZ, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), $ WORK( INDWORK ), LLWORK, IWORK( 1 ), $ ISIZESTEBZ, IINFO ) END IF * IF( MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN IF( NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN INFO = INFO + IERREBZ END IF END IF END IF NZ = MIN( NZ, NZZ ) * END IF END IF CALL PSSTEIN( N, WORK( INDD2 ), WORK( INDE2+OFFSET ), NZ, W, $ IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, $ JZ, DESCZ, WORK( INDWORK ), LALLWORK, IWORK( 1 ), $ ISIZESTEIN, IFAIL, ICLUSTR, GAP, IINFO ) * IF( IINFO.GE.NZ+1 ) $ INFO = INFO + IERRCLS IF( MOD( IINFO, NZ+1 ).NE.0 ) $ INFO = INFO + IERREIN * * Z = Q * Z * * IF( NZ.GT.0 ) THEN CALL PSORMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) END IF * END IF * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL SSCAL( M, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = REAL( LWOPT ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSSYEVX * END scalapack-1.8.0/SRC/psgetri.f0000640000175000017500000003516510430435051015576 0ustar muammarmuammar SUBROUTINE PSGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.4) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * v1.7.4: May 10, 2006 * v1.7: May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ), IWORK( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSGETRI computes the inverse of a distributed matrix using the LU * factorization computed by PSGETRF. This method inverts U and then * computes the inverse of sub( A ) = A(IA:IA+N-1,JA:JA+N-1) denoted * InvA by solving the system InvA*L = inv(U) for InvA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the L and U obtained by the * factorization sub( A ) = P*L*U computed by PSGETRF. On * exit, if INFO = 0, sub( A ) contains the inverse of the * original distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A * keeps track of the pivoting information. IPIV(i) is the * global row index the local row i was swapped with. This * array is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = LOCr(N+MOD(IA-1,MB_A))*NB_A. WORK is used to keep a * copy of at most an entire column block of sub( A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK used as workspace for * physically transposing the pivots. * LIWORK is local input and must be at least * if NPROW == NPCOL then * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + NB_A, * else * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + * MAX( CEIL(CEIL(LOCr(M_A)/MB_A)/(LCM/NPROW)), * NB_A ) * where LCM is the least common multiple of process * rows and columns (NPROW and NPCOL). * end if * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,IA+K-1) is exactly zero; the * matrix is singular and its inverse could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IROFF, IW, J, $ JB, JN, LCM, LIWMIN, LWMIN, MP, MYCOL, MYROW, $ NN, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PSGEMM, PSLACPY, PSLASET, PSLAPIV, $ PSTRSM, PSTRTRI, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) LWMIN = NP * DESCA( NB_ ) * MP = NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) NQ = NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) IF( NPROW.EQ.NPCOL ) THEN LIWMIN = NQ + DESCA( NB_ ) ELSE * * Use the formula for the workspace given in PxLAPIV * to compute the minimum size LIWORK for IWORK * * The formula in PxLAPIV is * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * * where * M_P is the global length of the pivot vector * MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW * I_P is IA * I_P = IA * MB_P is the block size use for the block cyclic distribution of the * pivot vector * MB_P = DESCA (MB_ ) * LOCc ( . ) * NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) * LOCr ( . ) * NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) * CEIL ( X / Y ) * ICEIL( X, Y ) * LCM * LCM = ILCM( NPROW, NPCOL ) * LCM = ILCM( NPROW, NPCOL ) LIWMIN = NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW $ + MOD ( IA - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -8 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 8 IF( LIWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGETRI', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Form inv(U). If INFO > 0 from PSTRTRI, then U is singular, * and the inverse is not computed. * CALL PSTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) IF( INFO.GT.0 ) $ RETURN * * Define array descriptor for working array WORK * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) NN = ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1 IACOL = INDXG2P( NN, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, NP ) ) IW = IROFF + 1 * * Solve the equation inv(A)*L=inv(U) for inv(A) using blocked code. * DO 10 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Copy current block column of L to WORK and replace with zeros. * CALL PSLACPY( 'Lower', JA+N-1-J, JB, A, I+1, J, DESCA, $ WORK, IW+J-JA+1, 1, DESCW ) CALL PSLASET( 'Lower', JA+N-1-J, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute current block column of inv(A). * IF( J+JB.LE.JA+N-1 ) $ CALL PSGEMM( 'No transpose', 'No transpose', N, JB, $ JA+N-J-JB, -ONE, A, IA, J+JB, DESCA, WORK, $ IW+J+JB-JA, 1, DESCW, ONE, A, IA, J, DESCA ) CALL PSTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW+J-JA, 1, DESCW, A, IA, J, DESCA ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 * * Copy current block column of L to WORK and replace with zeros. * CALL PSLACPY( 'Lower', N-1, JB, A, IA+1, JA, DESCA, WORK, IW+1, $ 1, DESCW ) CALL PSLASET( 'Lower', N-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Compute current block column of inv(A). * IF( JA+JB.LE.JA+N-1 ) $ CALL PSGEMM( 'No transpose', 'No transpose', N, JB, $ N-JB, -ONE, A, IA, JA+JB, DESCA, WORK, IW+JB, 1, $ DESCW, ONE, A, IA, JA, DESCA ) CALL PSTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW, 1, DESCW, A, IA, JA, DESCA ) * * Use the row pivots and apply them to the columns of the global * matrix. * CALL DESCSET( DESCW, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ MP+DESCA( MB_ ) ) CALL PSLAPIV( 'Backward', 'Columns', 'Column', N, N, A, IA, $ JA, DESCA, IPIV, IA, 1, DESCW, IWORK ) * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSGETRI * END scalapack-1.8.0/SRC/pcunmr2.f0000640000175000017500000003532210363532303015505 0ustar muammarmuammar SUBROUTINE PCUNMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMR2 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IROFFC, LCM, LCMP, LWMIN, MI, $ MPC0, MYCOL, MYROW, NI, NPCOL, NPROW, NQ, NQC0 COMPLEX AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCELSET2, PCLACGV, PCLARF, PCLARFC, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic:ic+m-k+i-ia,jc:jc+n-1) * MI = M - K + I - IA + 1 ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc:jc+n-k+i-ia+1) * NI = N - K + I - IA + 1 END IF * * Apply H(i) or H(i)' * CALL PCLACGV( NQ-K+I-IA, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCELSET2( AII, A, I, JA+NQ-K+I-IA, DESCA, ONE ) IF( NOTRAN ) THEN CALL PCLARFC( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) ELSE CALL PCLARF( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) END IF CALL PCELSET( A, I, JA+NQ-K+I-IA, DESCA, AII ) CALL PCLACGV( NQ-K+I-IA, A, I, JA, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMR2 * END scalapack-1.8.0/SRC/slasorte.f0000640000175000017500000001000710602576752015760 0ustar muammarmuammar SUBROUTINE SLASORTE( S, LDS, J, OUT, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER INFO, J, LDS * .. * .. Array Arguments .. REAL OUT( J, * ), S( LDS, * ) * .. * * Purpose * ======= * * SLASORTE sorts eigenpairs so that real eigenpairs are together and * complex are together. This way one can employ 2x2 shifts easily * since every 2nd subdiagonal is guaranteed to be zero. * This routine does no parallel work. * * Arguments * ========= * * S (local input/output) REAL array, dimension LDS * On entry, a matrix already in Schur form. * On exit, the diagonal blocks of S have been rewritten to pair * the eigenvalues. The resulting matrix is no longer * similar to the input. * * LDS (local input) INTEGER * On entry, the leading dimension of the local array S. * Unchanged on exit. * * J (local input) INTEGER * On entry, the order of the matrix S. * Unchanged on exit. * * OUT (local input/output) REAL array, dimension Jx2 * This is the work buffer required by this routine. * * INFO (local input) INTEGER * This is set if the input matrix had an odd number of real * eigenvalues and things couldn't be paired or if the input * matrix S was not originally in Schur form. * 0 indicates successful completion. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER BOT, I, LAST, TOP * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * LAST = J TOP = 1 BOT = J INFO = 0 DO 10 I = J - 1, 1, -1 IF( S( I+1, I ).EQ.ZERO ) THEN IF( LAST-I.EQ.2 ) THEN OUT( BOT-1, 1 ) = S( I+1, I+1 ) OUT( BOT, 2 ) = S( I+2, I+2 ) OUT( BOT-1, 2 ) = S( I+1, I+2 ) OUT( BOT, 1 ) = S( I+2, I+1 ) BOT = BOT - 2 END IF IF( LAST-I.EQ.1 ) THEN IF( MOD( TOP, 2 ).EQ.1 ) THEN * * FIRST OF A PAIR * IF( ( I.EQ.J-1 ) .OR. ( I.EQ.1 ) ) THEN OUT( TOP, 1 ) = S( I+1, I+1 ) ELSE OUT( TOP, 1 ) = S( I+1, I+1 ) END IF OUT( TOP, 2 ) = ZERO ELSE * * SECOND OF A PAIR * IF( ( I.EQ.J-1 ) .OR. ( I.EQ.1 ) ) THEN OUT( TOP, 2 ) = S( I+1, I+1 ) ELSE OUT( TOP, 2 ) = S( I+1, I+1 ) END IF OUT( TOP, 1 ) = ZERO END IF TOP = TOP + 1 END IF IF( LAST-I.GT.2 ) THEN INFO = I RETURN END IF LAST = I END IF 10 CONTINUE IF( LAST.EQ.2 ) THEN * * GRAB LAST DOUBLE PAIR * OUT( BOT-1, 1 ) = S( 1, 1 ) OUT( BOT, 2 ) = S( 2, 2 ) OUT( BOT-1, 2 ) = S( 1, 2 ) OUT( BOT, 1 ) = S( 2, 1 ) BOT = BOT - 2 END IF IF( LAST.EQ.1 .and. mod(top, 2) .eq. 0 ) THEN * * GRAB SECOND PART OF LAST PAIR * OUT(TOP, 2) = s(1,1) OUT(TOP, 1) = zero TOP = TOP + 1 END IF IF( TOP-1.NE.BOT ) THEN INFO = -BOT RETURN END IF * * Overwrite the S diagonals * DO 20 I = 1, J, 2 S( I, I ) = OUT( I, 1 ) S( I+1, I ) = OUT( I+1, 1 ) S( I, I+1 ) = OUT( I, 2 ) S( I+1, I+1 ) = OUT( I+1, 2 ) 20 CONTINUE * RETURN * * End of SLASORTE * END scalapack-1.8.0/SRC/pclatrs.f0000640000175000017500000000531310363532303015564 0ustar muammarmuammar SUBROUTINE PCLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL CNORM( * ) COMPLEX A( * ), X( * ), WORK( * ) * .. * * Purpose * ======= * * PCLATRS solves a triangular system. This routine in unfinished * at this time, but will be part of the next release. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * * .. Local Scalars .. INTEGER ICTXT, IIX, IROFF, JJX, MYCOL, MYROW, NP, $ NPCOL, NPROW, LDX, IXCOL, IXROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, INFOG2L, $ PCTRSV * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ***** NO SCALING ***** Call PCTRSV for all cases ***** * SCALE = ONE CALL PCTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX(MB_) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IXCOL ) THEN CALL CGEBS2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX ) ELSE CALL CGEBR2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX, MYROW, IXCOL ) END IF * RETURN * * End of PCLATRS * END scalapack-1.8.0/SRC/pslaqsy.f0000640000175000017500000003200310363532303015604 0ustar muammarmuammar SUBROUTINE PSLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N REAL AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), SC( * ), SR( * ) * .. * * Purpose * ======= * * PSLAQSY equilibrates a symmetric distributed matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the * vectors SR and SC. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is to be referenced: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (input/output) REAL pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the distributed symmetric * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * part of the matrix, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular part of the matrix, and the strictly upper trian- * gular part of sub( A ) is not referenced. * On exit, if EQUED = 'Y', the equilibrated matrix: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local input) REAL array, dimension LOCr(M_A) * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned * with the distributed matrix A, and replicated across every * process column. SR is tied to the distributed matrix A. * * SC (local input) REAL array, dimension LOCc(N_A) * The scale factors for sub( A ). SC is aligned with the dis- * tributed matrix A, and replicated down every process row. * SC is tied to the distributed matrix A. * * SCOND (global input) REAL * Ratio of the smallest SR(i) (respectively SC(j)) to the * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1 * and JA <= j <= JA+N-1. * * AMAX (global input) REAL * Absolute value of the largest distributed submatrix entry. * * EQUED (output) CHARACTER*1 * Specifies whether or not equilibration was done. * = 'N': No equilibration. * = 'Y': Equilibration was done, i.e., sub( A ) has been re- * placed by: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if scaling should be done * based on the ratio of the scaling factors. If SCOND < THRESH, * scaling is done. * * LARGE and SMALL are threshold values used to decide if scaling should * be done based on the absolute size of the largest matrix element. * If AMAX > LARGE or AMAX < SMALL, scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, THRESH PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J, $ JB, JJ, JJA, JN, KK, LDA, LL, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PSLAMCH( ICTXT, 'Safe minimum' ) / $ PSLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN * * No equilibration * EQUED = 'N' * ELSE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Replace A by diag(S) * A * diag(S). * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 20 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 10 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE IOFFA = IOFFA + JB*LDA END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 70 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 40 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 30 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE ELSE DO 60 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 50 KK = IIA, II-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 70 CONTINUE * ELSE * * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFF * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 90 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 80 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 100 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 160 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 130 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 120 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE ELSE DO 150 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 140 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 160 CONTINUE * END IF * EQUED = 'Y' * END IF * RETURN * * End of PSLAQSY * END scalapack-1.8.0/SRC/pzlatrs.f0000640000175000017500000000531310363532303015613 0ustar muammarmuammar SUBROUTINE PZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION CNORM( * ) COMPLEX*16 A( * ), X( * ), WORK( * ) * .. * * Purpose * ======= * * PZLATRS solves a triangular system. This routine in unfinished * at this time, but will be part of the next release. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * * .. Local Scalars .. INTEGER ICTXT, IIX, IROFF, JJX, MYCOL, MYROW, NP, $ NPCOL, NPROW, LDX, IXCOL, IXROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, ZGEBR2D, ZGEBS2D, INFOG2L, $ PZTRSV * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ***** NO SCALING ***** Call PZTRSV for all cases ***** * SCALE = ONE CALL PZTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX(MB_) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IXCOL ) THEN CALL ZGEBS2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX ) ELSE CALL ZGEBR2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX, MYROW, IXCOL ) END IF * RETURN * * End of PZLATRS * END scalapack-1.8.0/SRC/pdgetrf.f0000640000175000017500000002645710363532303015563 0ustar muammarmuammar SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDGETRF computes an LU factorization of a general M-by-N distributed * matrix sub( A ) = (IA:IA+M-1,JA:JA+N-1) using partial pivoting with * row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal ele- * ments (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). L and U are stored in sub( A ). * * This is the right-looking Parallel Level 3 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be factored. On exit, this * array contains the local pieces of the factors L and U from * the factorization sub( A ) = P*L*U; the unit diagonal ele- * ments of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, COLCTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PDGEMM, PDGETF2, $ PDLASWP, PDTRSM, PXERBLA * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUM1, $ IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRF', -INFO ) RETURN END IF * * Quick return if possible * IF( DESCA( M_ ).EQ.1 ) THEN IPIV( 1 ) = 1 RETURN ELSE IF( M.EQ.0 .OR. N.EQ.0 ) THEN RETURN END IF * * Split-ring topology for the communication along process rows * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', ' ' ) * * Handle the first block of columns separately * MN = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PDGETF2( M, JB, A, IA, JA, DESCA, IPIV, INFO ) * IF( JB+1.LE.N ) THEN * * Apply interchanges to columns JN+1:JA+N-1. * CALL PDLASWP( 'Forward', 'Rows', N-JB, A, IA, JN+1, DESCA, $ IA, IN, IPIV ) * * Compute block row of U. * CALL PDTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-JB, ONE, A, IA, JA, DESCA, A, IA, JN+1, DESCA ) * IF( JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PDGEMM( 'No transpose', 'No transpose', M-JB, N-JB, JB, $ -ONE, A, IN+1, JA, DESCA, A, IA, JN+1, DESCA, $ ONE, A, IN+1, JN+1, DESCA ) * END IF END IF * * Loop over the remaining blocks of columns. * DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PDGETF2( M-J+JA, JB, A, I, J, DESCA, IPIV, IINFO ) * IF( INFO.EQ.0 .AND. IINFO.GT.0 ) $ INFO = IINFO + J - JA * * Apply interchanges to columns JA:J-JA. * CALL PDLASWP( 'Forward', 'Rowwise', J-JA, A, IA, JA, DESCA, $ I, I+JB-1, IPIV ) * IF( J-JA+JB+1.LE.N ) THEN * * Apply interchanges to columns J+JB:JA+N-1. * CALL PDLASWP( 'Forward', 'Rowwise', N-J-JB+JA, A, IA, J+JB, $ DESCA, I, I+JB-1, IPIV ) * * Compute block row of U. * CALL PDTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-J-JB+JA, ONE, A, I, J, DESCA, A, I, J+JB, $ DESCA ) * IF( J-JA+JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PDGEMM( 'No transpose', 'No transpose', M-J-JB+JA, $ N-J-JB+JA, JB, -ONE, A, I+JB, J, DESCA, A, $ I, J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * END IF END IF * 10 CONTINUE * IF( INFO.EQ.0 ) $ INFO = MN + 1 CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, INFO, 1, IDUM1, IDUM2, $ -1, -1, MYCOL ) IF( INFO.EQ.MN+1 ) $ INFO = 0 * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * RETURN * * End of PDGETRF * END scalapack-1.8.0/SRC/pdsyttrd.f0000640000175000017500000012270310363532303015774 0ustar muammarmuammar SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * * ======= * * PDSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding * process and memory location. * * Let A be a generic term for any 2D block cyclicly distributed * array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- ----------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, * indicating the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to * distribute the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to * distribute the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the * first row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes * of its process row. * The values of LOCp() and LOCq() may be determined via a call to * the ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1) * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCq(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal workspace * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS * Where: * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PDSYTTRD', 'L', 0, 0, * 0, 0 ) * * NUMROC is a ScaLAPACK tool function; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of * elementary reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of * elementary reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and * vi denotes an element of the vector defining H(i). * * Data storage requirements * ========================= * * PDSYTTRD is not intended to be called directly. All users are * encourage to call PDSYTRD which will then call PDHETTRD if * appropriate. A must be in cyclic format (i.e. MB = NB = 1), * the process grid must be square ( i.e. NPROW = NPCOL ) and * only lower triangular storage is supported. * * Local variables * =============== * * PDSYTTRD uses five local arrays: * WORK ( InV ) dimension ( NP, ANB+1): array V * WORK ( InH ) dimension ( NP, ANB+1): array H * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT * * Arrays V and H are replicated across all processor columns. * Arrays V^T and H^T are replicated across all processor rows. * * WORK ( InVT ), or V^T, is stored as a tall skinny * array ( NQ x ANB-1 ) for efficiency. Since only the lower * triangular portion of A is updated, Av is computed as: * tril(A) * v + v^T * tril(A,-1). This is performed as * two local triangular matrix-vector multiplications (both in * MVR2) followed by a transpose and a sum across the columns. * In the local computation, WORK( InVT ) is used to compute * tril(A) * v and WORK( InV ) is used to compute * v^T * tril(A,-1) * * The following variables are global indices into A: * INDEX: The current global row and column number. * MAXINDEX: The global row and column for the first row and * column in the trailing block of A. * LIIB, LIJB: The first row, column in * * The following variables point into the arrays A, V, H, V^T, H^T: * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T. * LII: local index I: The local row number for row INDEX * LIJ: local index J: The local column number for column INDEX * LIIP1: local index I+1: The local row number for row INDEX+1 * LIJP1: local index J+1: The local col number for col INDEX+1 * LTLI: lower triangular local index I: The local row for the * upper left entry in tril( A(INDEX, INDEX) ) * LTLIP1: lower triangular local index I+1: The local row for the * upper left entry in tril( A(INDEX+1, INDEX+1) ) * * Details: The distinction between LII and LTLI (and between * LIIP1 and LTLIP1) is subtle. Within the current processor * column (i.e. MYCOL .eq. CURCOL) they are the same. However, * on some processors, A( LII, LIJ ) points to an element * above the diagonal, on these processors, LTLI = LII+1. * * The following variables give the number of rows and/or columns * in various matrices: * NP: The number of local rows in A( 1:N, 1:N ) * NQ: The number of local columns in A( 1:N, 1:N ) * NPM0: The number of local rows in A( INDEX:N, INDEX:N ) * NQM0: The number of local columns in A( INDEX:N, INDEX:N ) * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N ) * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N ) * LTNM0: The number of local rows & columns in * tril( A( INDEX:N, INDEX:N ) ) * LTNM1: The number of local rows & columns in * tril( A( INDEX+1:N, INDEX+1:N ) ) * NOTE: LTNM0 == LTNM1 on all processors except the diagonal * processors, i.e. those where MYCOL == MYROW. * * Invariants: * NP = NPM0 + LII - 1 * NQ = NQM0 + LIJ - 1 * NP = NPM1 + LIIP1 - 1 * NQ = NQM1 + LIJP1 - 1 * NP = LTLI + LTNM0 - 1 * NP = LTLIP1 + LTNM1 - 1 * * Temporary variables. The following variables are used within * a few lines after they are set and do hold state from one loop * iteration to the next: * * The matrix A: * The matrix A does not hold the same values that it would * in an unblocked code nor the values that it would hold in * in a blocked code. * * The value of A is confusing. It is easiest to state the * difference between trueA and A at the point that MVR2 is called, * so we will start there. * * Let trueA be the value that A would * have at a given point in an unblocked code and A * be the value that A has in this code at the same point. * * At the time of the call to MVR2, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ). * * At the bottom of the inner loop, * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) and * v = V( liip1:N, BINDEX+1 ) and * h = H( liip1:N, BINDEX+1 ) * * At the top of the loop, BINDEX gets incremented, hence: * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and * V = V( MAXINDEX:N, 1:BINDEX-1 ) and * v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) * * * A gets updated at the bottom of the outer loop * After this update, trueA = A + v' * h + h' * v * where v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) and BINDEX = 0 * Indeed, the previous loop invariant as stated above for the * top of the loop still holds, but with BINDEX = 0, H and V * are null matrices. * * After the current column of A is updated, * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N ) * the rest of A is untouched. * * After the current block column of A is updated, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) * * This brings us back to the point at which mvr2 is called. * * * Details of the parallelization: * * We delay spreading v across to all processor columns (which * would naturally happen at the bottom of the loop) in order to * combine the spread of v( : , i-1 ) with the spread of h( : , i ) * * In order to compute h( :, i ), we must update A( :, i ) * which means that the processor column owning A( :, i ) must * have: c, tau, v( i, i ) and h( i, i ). * * The traditional * way of computing v (and the one used in pzlatrd.f and * zlatrd.f) is: * v = tau * v * c = v' * h * alpha = - tau * c / 2 * v = v + alpha * h * However, the traditional way of computing v requires that tau * be broadcast to all processors in the current column (to compute * v = tau * v) and then a sum-to-all is required (to * compute v' * h ). We use the following formula instead: * c = v' * h * v = tau * ( v - c * tau' * h / 2 ) * The above formula allows tau to be spread down in the * same call to DGSUM2D which performs the sum-to-all of c. * * The computation of v, which could be performed in any processor * column (or other procesor subsets), is performed in the * processor column that owns A( :, i+1 ) so that A( :, i+1 ) * can be updated prior to spreading v across. * * We keep the block column of A up-to-date to minimize the * work required in updating the current column of A. Updating * the block column of A is reasonably load balanced whereas * updating the current column of A is not (only the current * processor column is involved). * * In the following overview of the steps performed, M in the * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p)) * or more flops per processor. * * Inner loop: * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) ) *M h = house( A(index:n, index) ) *M Spread v, h across *M vt = v^T; ht = h^T * A( index+1:n, index+1:maxindex ) -= * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) ) *C v = tril(A) * h; vt = ht * tril(A,-1) *MorC v = v - H*V*h - V*H*h *M v = v + vt^T *M c = v' * h * v = tau * ( v - c * tau' * h / 2 ) *C A = A - H*V - V*H * * * * ================================================================= * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) DOUBLE PRECISION Z_ONE, Z_NEGONE, Z_ZERO PARAMETER ( Z_ONE = 1.0D0, Z_NEGONE = -1.0D0, $ Z_ZERO = 0.0D0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * * * .. Local Scalars .. * * LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX, $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT, $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA, $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1, $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX, $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP, $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ, $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX, $ PBMIN, PBSIZE, PNB, ROWSPERPROC DOUBLE PRECISION ALPHA, BETA, C, CONJTOPH, CONJTOPV, NORM, $ ONEOVERBETA, SAFMAX, SAFMIN, TOPH, TOPNV, $ TOPTAU, TOPV * .. * .. Local Arrays .. * * * * INTEGER IDUM1( 1 ), IDUM2( 1 ) DOUBLE PRECISION CC( 3 ), DTMP( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DCOMBNRM2, DGEBR2D, $ DGEBS2D, DGEMM, DGEMV, DGERV2D, DGESD2D, $ DGSUM2D, DLACPY, DSCAL, DTRMVT, PCHK1MAT, $ PDTREECOMB, PXERBLA * .. * .. External Functions .. * LOGICAL LSAME INTEGER ICEIL, NUMROC, PJLAENV DOUBLE PRECISION DNRM2, PDLAMCH EXTERNAL LSAME, ICEIL, NUMROC, PJLAENV, DNRM2, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD, SIGN, SQRT * .. * * * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * * Further details * =============== * * At the top of the loop, v and nh have been computed but not * spread across. Hence, A is out-of-date even after the * rank 2k update. Furthermore, we compute the next v before * nh is spread across. * * I claim that if we used a sum-to-all on NV, by summing CC within * each column, that we could compute NV locally and could avoid * spreading V across. Bruce claims that sum-to-all can be made * to cost no more than sum-to-one on the Paragon. If that is * true, this would be a win. But, * the BLACS sum-to-all is just a sum-to-one followed by a broadcast, * and hence the present scheme is better for now. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * SAFMAX = SQRT( PDLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PDLAMCH( ICTXT, 'S' ) ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE * * Here we set execution options for PDSYTTRD * PNB = PJLAENV( ICTXT, 2, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) ANB = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) * INTERLEAVE = ( PJLAENV( ICTXT, 4, 'PDSYTTRD', 'L', 1, 0, 0, $ 0 ).EQ.1 ) TWOGEMMS = ( PJLAENV( ICTXT, 4, 'PDSYTTRD', 'L', 2, 0, 0, $ 0 ).EQ.1 ) BALANCED = ( PJLAENV( ICTXT, 4, 'PDSYTTRD', 'L', 3, 0, 0, $ 0 ).EQ.1 ) * CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) * * UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 .AND. DESCA( NB_ ).NE.1 ) $ INFO = 600 + NB_ IF( INFO.EQ.0 ) THEN * * * Here is the arithmetic: * Let maxnpq = max( np, nq, 2 * ANB ) * LDV = 4 * max( np, nq ) + 2 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB ) * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS * * This overestimates memory requirements when ANB > NP/2 * Memory requirements are lower when interleave = .false. * Hence, we could have two sets of memory requirements, * one for interleave and one for * * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORK( 1 ) = DBLE( LWMIN ) IF( .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( NPROW.NE.NPCOL ) THEN INFO = -( 600+CTXT_ ) ELSE IF( DESCA( DTYPE_ ).NE.1 ) THEN INFO = -( 600+DTYPE_ ) ELSE IF( DESCA( MB_ ).NE.1 ) THEN INFO = -( 600+MB_ ) ELSE IF( DESCA( NB_ ).NE.1 ) THEN INFO = -( 600+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.0 ) THEN INFO = -( 600+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.0 ) THEN INFO = -( 600+CSRC_ ) ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 * CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * * Reduce the lower triangle of sub( A ) NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * NXTROW = 0 NXTCOL = 0 * LIIP1 = 1 LIJP1 = 1 NPM1 = NP NQM1 = NQ * LDA = DESCA( LLD_ ) ICTXT = DESCA( CTXT_ ) * * * * Miscellaneous details: * Put tau, D and E in the right places * Check signs * Place all the arrays in WORK, control their placement * in memory. * * * * Loop invariants * A(LIIP1, LIJ) points to the first element of A(I+1,J) * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N ) * A(LII:N,LIJ:N) is one step out of date. * proc( CURROW, CURCOL ) owns A(LII,LIJ) * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ) * INH = 1 * IF( INTERLEAVE ) THEN * * H and V are interleaved to minimize memory movement * LDV has to be twice as large to accomodate interleaving. * In addition, LDV is doubled again to allow v, h and * toptau to be spreaad across and transposed in a * single communication operation with minimum memory * movement. * * We could reduce LDV back to 2*MAX(NPM1,NQM1) * by increasing the memory movement required in * the spread and transpose of v, h and toptau. * However, since the non-interleaved path already * provides a mear minimum memory requirement option, * we did not provide this additional path. * LDV = 4*( MAX( NPM1, NQM1 ) ) + 2 * INH = 1 * INV = INH + LDV / 2 INVT = INH + ( ANB+1 )*LDV * INHT = INVT + LDV / 2 INTMP = INVT + LDV*( ANB+1 ) * ELSE LDV = MAX( NPM1, NQM1 ) * INHT = INH + LDV*( ANB+1 ) INV = INHT + LDV*( ANB+1 ) * * The code works without this +1, but only because of a * coincidence. Without the +1, WORK(INVT) gets trashed, but * WORK(INVT) is only used once and when it is used, it is * multiplied by WORK( INH ) which is zero. Hence, the fact * that WORK(INVT) is trashed has no effect. * INVT = INV + LDV*( ANB+1 ) + 1 INTMP = INVT + LDV*( 2*ANB ) * END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) WORK( 1 ) = DBLE( LWMIN ) RETURN END IF * * * The satisfies the loop invariant: trueA = A - V * HT - H * VT, * (where V, H, VT and HT all have BINDEX+1 rows/columns) * the first ANB times through the loop. * * * * Setting either ( InH and InHT ) or InV to Z_ZERO * is adequate except in the face of NaNs. * * DO 10 I = 1, NP WORK( INH+I-1 ) = Z_ZERO WORK( INV+I-1 ) = Z_ZERO 10 CONTINUE DO 20 I = 1, NQ WORK( INHT+I-1 ) = Z_ZERO 20 CONTINUE * * * TOPNV = Z_ZERO * LTLIP1 = LIJP1 LTNM1 = NPM1 IF( MYCOL.GT.MYROW ) THEN LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * DO 210 MININDEX = 1, N - 1, ANB * * MAXINDEX = MIN( MININDEX+ANB-1, N ) LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1 * NQB = NQ - LIJB + 1 NPB = NP - LIIB + 1 INHTB = INHT + LIJB - 1 INVTB = INVT + LIJB - 1 INHB = INH + LIIB - 1 INVB = INV + LIIB - 1 * * * * DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 ) * BINDEX = INDEX - MININDEX * CURROW = NXTROW CURCOL = NXTCOL * NXTROW = MOD( CURROW+1, NPROW ) NXTCOL = MOD( CURCOL+1, NPCOL ) * LII = LIIP1 LIJ = LIJP1 NPM0 = NPM1 * IF( MYROW.EQ.CURROW ) THEN NPM1 = NPM1 - 1 LIIP1 = LIIP1 + 1 END IF IF( MYCOL.EQ.CURCOL ) THEN NQM1 = NQM1 - 1 LIJP1 = LIJP1 + 1 LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * * * * V = NV, VT = NVT, H = NH, HT = NHT * * * Update the current column of A * * IF( MYCOL.EQ.CURCOL ) THEN * INDEXA = LII + ( LIJ-1 )*LDA INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV CONJTOPH = WORK( INHT+LIJ-1+BINDEX*LDV ) CONJTOPV = TOPNV * IF( INDEX.GT.1 ) THEN DO 30 I = 0, NPM0 - 1 * A( INDEXA+I ) = A( INDEXA+I ) A( INDEXA+I ) = A( INDEXA+I ) - $ WORK( INDEXINV+LDV+I )*CONJTOPH - $ WORK( INDEXINH+LDV+I )*CONJTOPV 30 CONTINUE END IF * * END IF * * IF( MYCOL.EQ.CURCOL ) THEN * * Compute the householder vector * IF( MYROW.EQ.CURROW ) THEN DTMP( 2 ) = A( LII+( LIJ-1 )*LDA ) ELSE DTMP( 2 ) = ZERO END IF IF( MYROW.EQ.NXTROW ) THEN DTMP( 3 ) = A( LIIP1+( LIJ-1 )*LDA ) DTMP( 4 ) = ZERO ELSE DTMP( 3 ) = ZERO DTMP( 4 ) = ZERO END IF * NORM = DNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 ) DTMP( 1 ) = NORM * * IF DTMP(5) = 1.0, NORM is too large and might cause * overflow, hence PDTREECOMB must be called. IF DTMP(5) * is zero on output, DTMP(1) can be trusted. * DTMP( 5 ) = ZERO IF( DTMP( 1 ).GE.SAFMAX .OR. DTMP( 1 ).LT.SAFMIN ) THEN DTMP( 5 ) = ONE DTMP( 1 ) = ZERO END IF * DTMP( 1 ) = DTMP( 1 )*DTMP( 1 ) CALL DGSUM2D( ICTXT, 'C', ' ', 5, 1, DTMP, 5, -1, $ CURCOL ) IF( DTMP( 5 ).EQ.ZERO ) THEN DTMP( 1 ) = SQRT( DTMP( 1 ) ) ELSE DTMP( 1 ) = NORM CALL PDTREECOMB( ICTXT, 'C', 1, DTMP, -1, MYCOL, $ DCOMBNRM2 ) END IF * NORM = DTMP( 1 ) * D( LIJ ) = DTMP( 2 ) IF( MYROW.EQ.CURROW .AND. MYCOL.EQ.CURCOL ) THEN A( LII+( LIJ-1 )*LDA ) = D( LIJ ) END IF * * ALPHA = DTMP( 3 ) * NORM = SIGN( NORM, ALPHA ) * IF( NORM.EQ.ZERO ) THEN TOPTAU = ZERO ELSE BETA = NORM + ALPHA TOPTAU = BETA / NORM ONEOVERBETA = 1.0D0 / BETA * CALL DSCAL( NPM1, ONEOVERBETA, $ A( LIIP1+( LIJ-1 )*LDA ), 1 ) END IF * IF( MYROW.EQ.NXTROW ) THEN A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE END IF * TAU( LIJ ) = TOPTAU E( LIJ ) = -NORM * END IF * * * Spread v, nh, toptau across * DO 40 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+ $ ( LIJ-1 )*LDA ) 40 CONTINUE * IF( MYCOL.EQ.CURCOL ) THEN WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU CALL DGEBS2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1 ) ELSE CALL DGEBR2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1, MYROW, CURCOL ) TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) END IF DO 50 I = 0, NPM1 - 1 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+BINDEX*LDV+NPM1+I ) 50 CONTINUE * IF( INDEX.LT.N ) THEN IF( MYROW.EQ.NXTROW .AND. MYCOL.EQ.CURCOL ) $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ ) END IF * * Transpose v, nh * * IF( MYROW.EQ.MYCOL ) THEN DO 60 I = 0, NPM1 + NPM1 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+ $ BINDEX*LDV+I ) 60 CONTINUE ELSE CALL DGESD2D( ICTXT, NPM1+NPM1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1, $ MYCOL, MYROW ) CALL DGERV2D( ICTXT, NQM1+NQM1, 1, $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1, $ MYCOL, MYROW ) END IF * DO 70 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+ $ LIJP1-1+BINDEX*LDV+NQM1+I ) 70 CONTINUE * * * Update the current block column of A * IF( INDEX.GT.1 ) THEN DO 90 J = LIJP1, LIJB - 1 DO 80 I = 0, NPM1 - 1 * A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA ) $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )* $ WORK( INHT+J-1+BINDEX*LDV ) - $ WORK( INH+LIIP1-1+BINDEX*LDV+I )* $ WORK( INVT+J-1+BINDEX*LDV ) 80 CONTINUE 90 CONTINUE END IF * * * * Compute NV = A * NHT; NVT = A * NH * * These two lines are necessary because these elements * are not always involved in the calls to DTRMVT * for two reasons: * 1) On diagonal processors, the call to TRMVT * involves only LTNM1-1 elements * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1 * and when the results are combined across all processes, * uninitialized values may be included. WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO * * IF( MYROW.EQ.MYCOL ) THEN IF( LTNM1.GT.1 ) THEN CALL DTRMVT( 'L', LTNM1-1, $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1, $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ), $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+ $ 1 )*LDV ), 1 ) END IF DO 100 I = 1, LTNM1 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) + $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )* $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV ) 100 CONTINUE ELSE IF( LTNM1.GT.0 ) $ CALL DTRMVT( 'L', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ), $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+ $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+ $ ( BINDEX+1 )*LDV ), 1, $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ), $ 1 ) * END IF * * * We take advantage of the fact that: * A * sum( B ) = sum ( A * B ) for matrices A,B * * trueA = A + V * HT + H * VT * hence: (trueA)v = Av' + V * HT * v + H * VT * v * VT * v = sum_p_in_NPROW ( VTp * v ) * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v ) * * v = v + V * HT * h + H * VT * h * * * * tmp = HT * nh1 DO 110 I = 1, 2*( BINDEX+1 ) WORK( INTMP-1+I ) = 0 110 CONTINUE * IF( BALANCED ) THEN NPSET = NPROW MYSETNUM = MYROW ROWSPERPROC = ICEIL( NQB, NPSET ) MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 ) * * * tmp = HT * v * CALL DGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INHTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL DGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INVTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * * CALL DGSUM2D( ICTXT, 'C', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) ELSE * tmp = HT * v * CALL DGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INHTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL DGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INVTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * END IF * * * IF( BALANCED ) THEN MYSETNUM = MYCOL * ROWSPERPROC = ICEIL( NPB, NPSET ) MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 ) * CALL DGSUM2D( ICTXT, 'R', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) * * * v = v + V * tmp IF( INDEX.GT.1. ) THEN CALL DGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INVB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) * * v = v + H * tmp2 CALL DGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INHB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) END IF * ELSE * v = v + V * tmp CALL DGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ), $ LDV, WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * * * v = v + H * tmp2 CALL DGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ), $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * END IF * * * Transpose NV and add it back into NVT * IF( MYROW.EQ.MYCOL ) THEN DO 120 I = 0, NQM1 - 1 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+ $ I ) 120 CONTINUE ELSE CALL DGESD2D( ICTXT, NQM1, 1, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), $ NQM1, MYCOL, MYROW ) CALL DGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL, $ MYROW ) * END IF DO 130 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I ) 130 CONTINUE * * Sum-to-one NV rowwise (within a row) * CALL DGSUM2D( ICTXT, 'R', ' ', NPM1, 1, $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1, $ MYROW, NXTCOL ) * * * Dot product c = NV * NH * Sum-to-all c within next processor column * * IF( MYCOL.EQ.NXTCOL ) THEN CC( 1 ) = Z_ZERO DO 140 I = 0, NPM1 - 1 CC( 1 ) = CC( 1 ) + WORK( INV+LIIP1-1+( BINDEX+1 )* $ LDV+I )*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+ $ I ) 140 CONTINUE IF( MYROW.EQ.NXTROW ) THEN CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV ) ELSE CC( 2 ) = Z_ZERO CC( 3 ) = Z_ZERO END IF CALL DGSUM2D( ICTXT, 'C', ' ', 3, 1, CC, 3, -1, NXTCOL ) * TOPV = CC( 2 ) C = CC( 1 ) TOPH = CC( 3 ) * TOPNV = TOPTAU*( TOPV-C*TOPTAU / 2*TOPH ) * * * Compute V = Tau * (V - C * Tau' / 2 * H ) * * DO 150 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU* $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C*TOPTAU / $ 2*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) ) 150 CONTINUE * END IF * * 160 CONTINUE * * * Perform the rank2k update * IF( MAXINDEX.LT.N ) THEN * DO 170 I = 0, NPM1 - 1 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I ) 170 CONTINUE * * * IF( .NOT.TWOGEMMS ) THEN IF( INTERLEAVE ) THEN LDZG = LDV / 2 ELSE CALL DLACPY( 'A', LTNM1, ANB, WORK( INHT+LIJP1-1 ), $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV ) * CALL DLACPY( 'A', LTNM1, ANB, WORK( INV+LTLIP1-1 ), $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV ) LDZG = LDV END IF NBZG = ANB*2 ELSE LDZG = LDV NBZG = ANB END IF * * DO 180 PBMIN = 1, LTNM1, PNB * PBSIZE = MIN( PNB, LTNM1-PBMIN+1 ) PBMAX = MIN( LTNM1, PBMIN+PNB-1 ) CALL DGEMM( 'N', 'C', PBSIZE, PBMAX, NBZG, Z_NEGONE, $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) IF( TWOGEMMS ) THEN CALL DGEMM( 'N', 'C', PBSIZE, PBMAX, ANB, Z_NEGONE, $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) END IF 180 CONTINUE * * * DO 190 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I ) WORK( INH+LIIP1-1+I ) = WORK( INTMP+I ) 190 CONTINUE DO 200 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I ) 200 CONTINUE * * END IF * * End of the update A code * 210 CONTINUE * IF( MYCOL.EQ.NXTCOL ) THEN IF( MYROW.EQ.NXTROW ) THEN * D( NQ ) = A( NP+( NQ-1 )*LDA ) * CALL DGEBS2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1 ) ELSE CALL DGEBR2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1, NXTROW, $ NXTCOL ) END IF END IF * * * * WORK( 1 ) = DBLE( LWMIN ) RETURN * * End of PDSYTTRD * * END scalapack-1.8.0/SRC/pzungrq.f0000640000175000017500000002706210363532303015627 0ustar muammarmuammar SUBROUTINE PZUNGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGRQ generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. * N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PZGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZLASET, PZUNGR2 * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Set A(ia:in,ja+n-m+in-ia+1:ja-n+1) to zero. * CALL PZLASET( 'All', IN-IA+1, M-IN+IA-1, ZERO, ZERO, A, IA, $ JA+N-M+IN-IA+1, DESCA ) * * Use unblocked code for the first or only block. * CALL PZUNGR2( IN-IA+1, N-M+IN-IA+1, IN-IA-M+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PZLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the right * CALL PZLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-m+i+ib-ia-1 of current block * CALL PZUNGR2( IB, N-M+I+IB-IA, IB, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows i:i+ib-1,ja+n-m+i+ib-ia:ja+n-1 of current block to * zero * CALL PZLASET( 'All', IB, M-I-IB+IA, ZERO, ZERO, A, I, $ JA+N-M+I+IB-IA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGRQ * END scalapack-1.8.0/SRC/pzpotri.f0000640000175000017500000001715610363532303015633 0ustar muammarmuammar SUBROUTINE PZPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZPOTRI computes the inverse of a complex Hermitian positive definite * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the * Cholesky factorization sub( A ) = U**H*U or L*L**H computed by * PZPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor U or L * from the Cholesky factorization of the distributed matrix * sub( A ) = U**H*U or L*L**H, as computed by PZPOTRF. * On exit, the local pieces of the upper or lower triangle of * the (Hermitian) inverse of sub( A ), overwriting the input * factor U or L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER ICOFF, ICTXT, IROFF, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PXERBLA, $ PZLAUUM, PZTRTRI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.NE.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Invert the triangular Cholesky factor U or L. * CALL PZTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * IF( INFO.GT.0 ) $ RETURN * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PZLAUUM( UPLO, N, A, IA, JA, DESCA ) * RETURN * * End of PZPOTRI * END scalapack-1.8.0/SRC/tools.h0000640000175000017500000000220010363532303015246 0ustar muammarmuammar#include "./pblas.h" #ifdef __STDC__ typedef void (*CPYPTR)(int, int, float *, int, float *, int); #define SLVOID void #else typedef void (*CPYPTR)(); #define SLVOID char #endif #define ErrPrnt fprintf(stderr, "line %d of file %s\n",__LINE__, __FILE__); #define Mdescset(desc, m, n, mb, nb, rsrc, csrc, ictxt, lld) \ { \ (desc)[DT_] = BLOCK_CYCLIC_2D; \ (desc)[CTXT_] = (ictxt); \ (desc)[M_] = (m); \ (desc)[N_] = (n); \ (desc)[MB_] = (mb); \ (desc)[NB_] = (nb); \ (desc)[RSRC_] = (rsrc); \ (desc)[CSRC_] = (csrc); \ (desc)[LLD_] = (lld); \ } #define MCindxg2p(IG, nb, srcproc, nprocs) \ ( ((srcproc) + (IG)/(nb)) % nprocs ) typedef struct {double r, i;} DCOMPLEX; typedef struct {float r, i;} SCOMPLEX; #define Mmalloc(M_ptr, M_type, M_elt, M_i, M_ctxt) \ { \ void pberror_(); \ (M_ptr) = ( M_type * ) malloc((M_elt)*(sizeof(M_type))); \ if (!(M_ptr)) \ { \ if ((M_elt) > 0) \ { \ (M_i) = 1; \ fprintf(stderr, "Not enough memory on line %d of file %s!!\n", \ __LINE__, __FILE__); \ pberror_(&(M_ctxt), __FILE__, &(M_i)); \ } \ } \ } scalapack-1.8.0/SRC/pzlabrd.f0000640000175000017500000005542310363532303015561 0ustar muammarmuammar SUBROUTINE PZLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAUP( * ), TAUQ( * ), X( * ), Y( * ), $ WORK( * ) * .. * * Purpose * ======= * * PZLABRD reduces the first NB rows and columns of a complex general * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper * or lower bidiagonal form by an unitary transformation Q' * A * P, and * returns the matrices X and Y which are needed to apply the transfor- * mation to the unreduced part of sub( A ). * * If M >= N, sub( A ) is reduced to upper bidiagonal form; if M < N, to * lower bidiagonal form. * * This is an auxiliary routine called by PZGEBRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of leading rows and columns of sub( A ) to be * reduced. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ) to be reduced. On exit, * the first NB rows and columns of the matrix are overwritten; * the rest of the distributed matrix sub( A ) is unchanged. * If m >= n, elements on and below the diagonal in the first NB * columns, with the array TAUQ, represent the unitary * matrix Q as a product of elementary reflectors; and * elements above the diagonal in the first NB rows, with the * array TAUP, represent the unitary matrix P as a product * of elementary reflectors. * If m < n, elements below the diagonal in the first NB * columns, with the array TAUQ, represent the unitary * matrix Q as a product of elementary reflectors, and * elements on and above the diagonal in the first NB rows, * with the array TAUP, represent the unitary matrix P as * a product of elementary reflectors. * See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(ia+i-1,ja+i-1). D is tied to the distributed * matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(ia+i-1,ja+i) for i = 1,2,...,n-1; * if m < n, E(i) = A(ia+i,ja+i-1) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX*16 array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX*16 array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * X (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_X,NB). On exit, the local * pieces of the distributed M-by-NB matrix * X(IX:IX+M-1,JX:JX+NB-1) required to update the unreduced * part of sub( A ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * Y (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_Y,NB). On exit, the local * pieces of the distributed N-by-NB matrix * Y(IY:IY+N-1,JY:JY+NB-1) required to update the unreduced * part of sub( A ). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= NB_A + NQ, with * * NQ = NUMROC( N+MOD( IA-1, NB_Y ), NB_Y, MYCOL, IACOL, NPCOL ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors. * * If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); u(1:i-1) = 0, u(i) = 1, and u(i:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The elements of the vectors v and u together form the m-by-nb matrix * V and the nb-by-n matrix U' which are needed, with X and Y, to apply * the transformation to the unreduced part of the matrix, using a block * update of the form: sub( A ) := sub( A ) - V*Y' - X*U'. * * The contents of sub( A ) on exit are illustrated by the following * examples with nb = 2: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) * ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) * ( v1 v2 a a a ) ( v1 1 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix which is unchanged, * vi denotes an element of the vector defining H(i), and ui an element * of the vector defining G(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, IPY, IW, J, JJ, $ JWY, K, MYCOL, MYROW, NPCOL, NPROW COMPLEX*16 ALPHA, TAU INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), $ DESCTP( DLEN_ ), DESCTQ( DLEN_ ), $ DESCW( DLEN_ ), DESCWY( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDELSET, $ PZCOPY, PZELGET, PZELSET, PZGEMV, $ PZLACGV, PZLARFG, PZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 IW = MOD( IA-1, DESCA( NB_ ) ) + 1 ALPHA = ZERO * CALL DESCSET( DESCWY, 1, N+MOD( IA-1, DESCY( NB_ ) ), 1, $ DESCA( NB_ ), IAROW, IACOL, ICTXT, 1 ) CALL DESCSET( DESCW, DESCA( MB_ ), 1, DESCA( MB_ ), 1, IAROW, $ IACOL, ICTXT, DESCA( MB_ ) ) CALL DESCSET( DESCTQ, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), IAROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCTP, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), IACOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i:ia+m-1,j) * IF( K.GT.1 ) THEN CALL PZGEMV( 'No transpose', M-K+1, K-1, -ONE, A, I, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I, $ J, DESCA, 1 ) CALL PZGEMV( 'No transpose', M-K+1, K-1, -ONE, X, IX+K-1, $ JX, DESCX, A, IA, J, DESCA, 1, ONE, A, I, J, $ DESCA, 1 ) CALL PZELSET( A, I-1, J, DESCA, ALPHA ) END IF * * Generate reflection Q(i) to annihilate A(i+1:ia+m-1,j) * CALL PZLARFG( M-K+1, ALPHA, I, J, A, I+1, J, DESCA, 1, $ TAUQ ) CALL PDELSET( D, 1, J, DESCD, DBLE( ALPHA ) ) CALL PZELSET( A, I, J, DESCA, ONE ) * * Compute Y(IA+I:IA+N-1,J) * CALL PZGEMV( 'Conjugate transpose', M-K+1, N-K, ONE, A, I, $ J+1, DESCA, A, I, J, DESCA, 1, ZERO, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PZGEMV( 'Conjugate transpose', M-K+1, K-1, ONE, A, I, $ JA, DESCA, A, I, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K, -ONE, Y, IY, $ JY+K, DESCY, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PZGEMV( 'Conjugate transpose', M-K+1, K-1, ONE, X, $ IX+K-1, JX, DESCX, A, I, J, DESCA, 1, ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K, -ONE, A, IA, $ J+1, DESCA, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) * CALL PZELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PZSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PZLACGV( N-K, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PZCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) * * Update A(i,j+1:ja+n-1) * CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZLACGV( K, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZGEMV( 'Conjugate transpose', K, N-K, -ONE, Y, IY, $ JY+K, DESCY, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZLACGV( K, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K, -ONE, A, IA, $ J+1, DESCA, X, IX+K-1, JX, DESCX, DESCX( M_ ), $ ONE, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PZLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PZELSET( A, I, J, DESCA, DCMPLX( DBLE( ALPHA ) ) ) * * Generate reflection P(i) to annihilate A(i,j+2:ja+n-1) * CALL PZLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, N+JA-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PDELSET( E, I, 1, DESCE, DBLE( ALPHA ) ) CALL PZELSET( A, I, J+1, DESCA, ONE ) * * Compute X(I+1:IA+M-1,J) * CALL PZGEMV( 'No transpose', M-K, N-K, ONE, A, I+1, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PZGEMV( 'No transpose', K, N-K, ONE, Y, IY, JY+K, $ DESCY, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PZGEMV( 'No transpose', M-K, K, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PZGEMV( 'No transpose', K-1, N-K, ONE, A, IA, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PZGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PZELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PZSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) CALL PZLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i,j:ja+n-1) * CALL PZLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) IF( K.GT.1 ) THEN CALL PZLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K+1, -ONE, Y, $ IY, JY+K-1, DESCY, A, I, JA, DESCA, $ DESCA( M_ ), ONE, A, I, J, DESCA, $ DESCA( M_ ) ) CALL PZLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K+1, -ONE, A, $ IA, J, DESCA, X, IX+K-1, JX, DESCX, $ DESCX( M_ ), ONE, A, I, J, DESCA, $ DESCA( M_ ) ) CALL PZLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PZELSET( A, I, J-1, DESCA, DCMPLX( DBLE( ALPHA ) ) ) END IF * * Generate reflection P(i) to annihilate A(i,j+1:ja+n-1) * CALL PZLARFG( N-K+1, ALPHA, I, J, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP ) CALL PDELSET( D, I, 1, DESCD, DBLE( ALPHA ) ) CALL PZELSET( A, I, J, DESCA, ONE ) * * Compute X(i+1:ia+m-1,j) * CALL PZGEMV( 'No transpose', M-K, N-K+1, ONE, A, I+1, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PZGEMV( 'No transpose', K-1, N-K+1, ONE, Y, IY, JY+K-1, $ DESCY, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PZGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PZGEMV( 'No transpose', K-1, N-K+1, ONE, A, IA, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PZGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PZELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PZSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) CALL PZLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) * * Update A(i+1:ia+m-1,j) * CALL PZGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PZGEMV( 'No transpose', M-K, K, -ONE, X, IX+K, JX, $ DESCX, A, IA, J, DESCA, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PZELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection Q(i) to annihilate A(i+2:ia+m-1,j) * CALL PZLARFG( M-K, ALPHA, I+1, J, A, MIN( I+2, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PDELSET( E, 1, J, DESCE, DBLE( ALPHA ) ) CALL PZELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(ia+i:ia+n-1,j) * CALL PZGEMV( 'Conjugate transpose', M-K, N-K, ONE, A, I+1, $ J+1, DESCA, A, I+1, J, DESCA, 1, ZERO, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PZGEMV( 'Conjugate transpose', M-K, K-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', K-1, N-K, -ONE, Y, IY, $ JY+K, DESCY, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PZGEMV( 'Conjugate transpose', M-K, K, ONE, X, IX+K, $ JX, DESCX, A, I+1, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PZGEMV( 'Conjugate transpose', K, N-K, -ONE, A, IA, $ J+1, DESCA, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) * CALL PZELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PZSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PZLACGV( N-K, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PZCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) 20 CONTINUE END IF * RETURN * * End of PZLABRD * END scalapack-1.8.0/SRC/pdlauum.f0000640000175000017500000002065010363532303015564 0ustar muammarmuammar SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLAUUM computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the blocked form of the algorithm, calling Level 3 PBLAS. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the * distributed matrix sub( A ) is upper or lower triangular: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the triangular factor U or L. N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J, JB, JN * .. * .. External Subroutines .. EXTERNAL PDGEMM, PDLAUU2, PDTRMM, PDSYRK * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * * Handle first block separately * JB = JN-JA+1 CALL PDLAUU2( 'Upper', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PDSYRK( 'Upper', 'No transpose', JB, N-JB, ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PDTRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', $ J-JA, JB, ONE, A, I, J, DESCA, A, IA, J, $ DESCA ) CALL PDLAUU2( 'Upper', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PDGEMM( 'No transpose', 'Transpose', J-JA, JB, $ N-J-JB+JA, ONE, A, IA, J+JB, DESCA, A, I, $ J+JB, DESCA, ONE, A, IA, J, DESCA ) CALL PDSYRK( 'Upper', 'No transpose', JB, N-J-JB+JA, ONE, $ A, I, J+JB, DESCA, ONE, A, I, J, DESCA ) END IF 10 CONTINUE ELSE * * Compute the product L' * L. * * Handle first block separately * JB = JN-JA+1 CALL PDLAUU2( 'Lower', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PDSYRK( 'Lower', 'Transpose', JB, N-JB, ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PDTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', JB, $ J-JA, ONE, A, I, J, DESCA, A, I, JA, DESCA ) CALL PDLAUU2( 'Lower', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PDGEMM( 'Transpose', 'No transpose', JB, J-JA, $ N-J-JB+JA, ONE, A, I+JB, J, DESCA, A, I+JB, $ JA, DESCA, ONE, A, I, JA, DESCA ) CALL PDSYRK( 'Lower', 'Transpose', JB, N-J-JB+JA, ONE, $ A, I+JB, J, DESCA, ONE, A, I, J, DESCA ) END IF 20 CONTINUE END IF * RETURN * * End of PDLAUUM * END scalapack-1.8.0/SRC/slapst.f0000640000175000017500000001500410363532303015420 0ustar muammarmuammar SUBROUTINE SLAPST( ID, N, D, INDX, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, N * .. * .. Array Arguments .. INTEGER INDX( * ) REAL D( * ) * .. * * Purpose * ======= * SLAPST is a modified version of the LAPACK routine SLASRT. * * Define a permutation INDX that sorts the numbers in D * in increasing order (if ID = 'I') or * in decreasing order (if ID = 'D' ). * * Use Quick Sort, reverting to Insertion sort on arrays of * size <= 20. Dimension of STACK limits N to about 2**32. * * Arguments * ========= * * ID (input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. * * N (input) INTEGER * The length of the array D. * * D (input) REAL array, dimension (N) * The array to be sorted. * * INDX (ouput) INTEGER array, dimension (N). * The permutation which sorts the array D. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. INTEGER SELECT PARAMETER ( SELECT = 20 ) * .. * .. Local Scalars .. INTEGER DIR, ENDD, I, ITMP, J, START, STKPNT REAL D1, D2, D3, DMNMX * .. * .. Local Arrays .. INTEGER STACK( 2, 32 ) * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Executable Statements .. * * Test the input paramters. * INFO = 0 DIR = -1 IF( LSAME( ID, 'D' ) ) THEN DIR = 0 ELSE IF( LSAME( ID, 'I' ) ) THEN DIR = 1 END IF IF( DIR.EQ.-1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SLAPST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.LE.1 ) $ RETURN * DO 10 I = 1, N INDX( I ) = I 10 CONTINUE * STKPNT = 1 STACK( 1, 1 ) = 1 STACK( 2, 1 ) = N 20 CONTINUE START = STACK( 1, STKPNT ) ENDD = STACK( 2, STKPNT ) STKPNT = STKPNT - 1 IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN * * Do Insertion sort on D( START:ENDD ) * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * DO 40 I = START + 1, ENDD DO 30 J = I, START + 1, -1 IF( D( INDX( J ) ).GT.D( INDX( J-1 ) ) ) THEN ITMP = INDX( J ) INDX( J ) = INDX( J-1 ) INDX( J-1 ) = ITMP ELSE GO TO 40 END IF 30 CONTINUE 40 CONTINUE * ELSE * * Sort into increasing order * DO 60 I = START + 1, ENDD DO 50 J = I, START + 1, -1 IF( D( INDX( J ) ).LT.D( INDX( J-1 ) ) ) THEN ITMP = INDX( J ) INDX( J ) = INDX( J-1 ) INDX( J-1 ) = ITMP ELSE GO TO 60 END IF 50 CONTINUE 60 CONTINUE * END IF * ELSE IF( ENDD-START.GT.SELECT ) THEN * * Partition D( START:ENDD ) and stack parts, largest one first * * Choose partition entry as median of 3 * D1 = D( INDX( START ) ) D2 = D( INDX( ENDD ) ) I = ( START+ENDD ) / 2 D3 = D( INDX( I ) ) IF( D1.LT.D2 ) THEN IF( D3.LT.D1 ) THEN DMNMX = D1 ELSE IF( D3.LT.D2 ) THEN DMNMX = D3 ELSE DMNMX = D2 END IF ELSE IF( D3.LT.D2 ) THEN DMNMX = D2 ELSE IF( D3.LT.D1 ) THEN DMNMX = D3 ELSE DMNMX = D1 END IF END IF * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * I = START - 1 J = ENDD + 1 70 CONTINUE 80 CONTINUE J = J - 1 IF( D( INDX( J ) ).LT.DMNMX ) $ GO TO 80 90 CONTINUE I = I + 1 IF( D( INDX( I ) ).GT.DMNMX ) $ GO TO 90 IF( I.LT.J ) THEN ITMP = INDX( I ) INDX( I ) = INDX( J ) INDX( J ) = ITMP GO TO 70 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF ELSE * * Sort into increasing order * I = START - 1 J = ENDD + 1 100 CONTINUE 110 CONTINUE J = J - 1 IF( D( INDX( J ) ).GT.DMNMX ) $ GO TO 110 120 CONTINUE I = I + 1 IF( D( INDX( I ) ).LT.DMNMX ) $ GO TO 120 IF( I.LT.J ) THEN ITMP = INDX( I ) INDX( I ) = INDX( J ) INDX( J ) = ITMP GO TO 100 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF END IF END IF IF( STKPNT.GT.0 ) $ GO TO 20 RETURN * * End of SLAPST * END scalapack-1.8.0/SRC/pzunm2r.f0000640000175000017500000004163210363532303015535 0ustar muammarmuammar SUBROUTINE PZUNM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNM2R overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PZGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PZGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 COMPLEX*16 AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZELSET2, PZLARF, PZLARFC, ZGEBR2D, ZGEBS2D, $ ZGERV2D, ZGESD2D, ZSCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - DCONJG( TAU( JJ ) ) END IF CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL ZSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL ZSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - DCONJG( TAU( JJ ) ) END IF END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL ZGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL ZGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL ZSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * DO 10 J = J1, J2, J3 IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H(j) or H(j)' * CALL PZELSET2( AJJ, A, IA+J-JA, J, DESCA, ONE ) IF( NOTRAN ) THEN CALL PZLARF( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, $ C, ICC, JCC, DESCC, WORK ) ELSE CALL PZLARFC( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, $ C, ICC, JCC, DESCC, WORK ) END IF CALL PZELSET( A, IA+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNM2R * END scalapack-1.8.0/SRC/slaref.f0000640000175000017500000002443310363532303015374 0ustar muammarmuammar SUBROUTINE SLAREF( TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, $ ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, $ LIHIZ, VECS, V2, V3, T1, T2, T3 ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. LOGICAL BLOCK, WANTZ CHARACTER TYPE INTEGER ICOL1, IROW1, ISTART, ISTOP, ITMP1, ITMP2, LDA, $ LDZ, LIHIZ, LILOZ REAL T1, T2, T3, V2, V3 * .. * .. Array Arguments .. REAL A( LDA, * ), VECS( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * SLAREF applies one or several Householder reflectors of size 3 * to one or two matrices (if column is specified) on either their * rows or columns. * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * If 'R': Apply reflectors to the rows of the matrix * (apply from left) * Otherwise: Apply reflectors to the columns of the matrix * Unchanged on exit. * * A (global input/output) REAL array, (LDA,*) * On entry, the matrix to receive the reflections. * The updated matrix on exit. * * LDA (local input) INTEGER * On entry, the leading dimension of A. Unchanged on exit. * * WANTZ (global input) LOGICAL * If .TRUE., then apply any column reflections to Z as well. * If .FALSE., then do no additional work on Z. * * Z (global input/output) REAL array, (LDZ,*) * On entry, the second matrix to receive column reflections. * This is changed only if WANTZ is set. * * LDZ (local input) INTEGER * On entry, the leading dimension of Z. Unchanged on exit. * * BLOCK (global input) LOGICAL * If .TRUE., then apply several reflectors at once and read * their data from the VECS array. * If .FALSE., apply the single reflector given by V2, V3, * T1, T2, and T3. * * IROW1 (local input/output) INTEGER * On entry, the local row element of A. * Undefined on output. * * * ICOL1 (local input/output) INTEGER * On entry, the local column element of A. * Undefined on output. * * ISTART (global input) INTEGER * Specifies the "number" of the first reflector. This is * used as an index into VECS if BLOCK is set. * ISTART is ignored if BLOCK is .FALSE.. * * ISTOP (global input) INTEGER * Specifies the "number" of the last reflector. This is * used as an index into VECS if BLOCK is set. * ISTOP is ignored if BLOCK is .FALSE.. * * ITMP1 (local input) INTEGER * Starting range into A. For rows, this is the local * first column. For columns, this is the local first row. * * ITMP2 (local input) INTEGER * Ending range into A. For rows, this is the local last * column. For columns, this is the local last row. * * LILOZ * LIHIZ (local input) INTEGER * These serve the same purpose as ITMP1,ITMP2 but for Z * when WANTZ is set. * * VECS (global input) REAL array of size 3*N (matrix * size) * This holds the size 3 reflectors one after another and this * is only accessed when BLOCK is .TRUE. * * V2 * V3 * T1 * T2 * T3 (global input/output) REAL * This holds information on a single size 3 Householder * reflector and is read when BLOCK is .FALSE., and * overwritten when BLOCK is .TRUE. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Local Scalars .. INTEGER J, K REAL H11, H22, SUM, T12, T13, T22, T23, T32, T33, $ V22, V23, V32, V33 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * IF( LSAME( TYPE, 'R' ) ) THEN IF( BLOCK ) THEN DO 20 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 10 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 H11 = A( IROW1+1, J ) - SUM*T2 H22 = A( IROW1+2, J ) - SUM*T3 SUM = H11 + V22*H22 + V32*A( IROW1+3, J ) A( IROW1+1, J ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = A( IROW1+3, J ) - SUM*T32 SUM = H11 + V23*H22 + V33*A( IROW1+4, J ) A( IROW1+2, J ) = H11 - SUM*T13 A( IROW1+3, J ) = H22 - SUM*T23 A( IROW1+4, J ) = A( IROW1+4, J ) - SUM*T33 10 CONTINUE IROW1 = IROW1 + 3 20 CONTINUE DO 40 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 30 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*T2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*T3 30 CONTINUE IROW1 = IROW1 + 1 40 CONTINUE ELSE DO 50 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*T2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*T3 50 CONTINUE END IF ELSE * * Do column transforms * IF( BLOCK ) THEN DO 80 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 60 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 H11 = A( J, ICOL1+1 ) - SUM*T2 H22 = A( J, ICOL1+2 ) - SUM*T3 SUM = H11 + V22*H22 + V32*A( J, ICOL1+3 ) A( J, ICOL1+1 ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = A( J, ICOL1+3 ) - SUM*T32 SUM = H11 + V23*H22 + V33*A( J, ICOL1+4 ) A( J, ICOL1+2 ) = H11 - SUM*T13 A( J, ICOL1+3 ) = H22 - SUM*T23 A( J, ICOL1+4 ) = A( J, ICOL1+4 ) - SUM*T33 60 CONTINUE IF( WANTZ ) THEN DO 70 J = LILOZ, LIHIZ SUM = Z( J, ICOL1 ) + V2*Z( J, ICOL1+1 ) + $ V3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM*T1 H11 = Z( J, ICOL1+1 ) - SUM*T2 H22 = Z( J, ICOL1+2 ) - SUM*T3 SUM = H11 + V22*H22 + V32*Z( J, ICOL1+3 ) Z( J, ICOL1+1 ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = Z( J, ICOL1+3 ) - SUM*T32 SUM = H11 + V23*H22 + V33*Z( J, ICOL1+4 ) Z( J, ICOL1+2 ) = H11 - SUM*T13 Z( J, ICOL1+3 ) = H22 - SUM*T23 Z( J, ICOL1+4 ) = Z( J, ICOL1+4 ) - SUM*T33 70 CONTINUE END IF ICOL1 = ICOL1 + 3 80 CONTINUE DO 110 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 90 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*T2 A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*T3 90 CONTINUE IF( WANTZ ) THEN DO 100 J = LILOZ, LIHIZ SUM = Z( J, ICOL1 ) + V2*Z( J, ICOL1+1 ) + $ V3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM*T1 Z( J, ICOL1+1 ) = Z( J, ICOL1+1 ) - SUM*T2 Z( J, ICOL1+2 ) = Z( J, ICOL1+2 ) - SUM*T3 100 CONTINUE END IF ICOL1 = ICOL1 + 1 110 CONTINUE ELSE DO 120 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*T2 A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*T3 120 CONTINUE END IF END IF RETURN * * End of SLAREF * END scalapack-1.8.0/SRC/pclanhs.f0000640000175000017500000006250610363532303015553 0ustar muammarmuammar REAL FUNCTION PCLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLANHS returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * Hessenberg distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PCLANHS returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PCLANHS as described * above. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PCLANHS is set to zero. N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, INXTROW, $ IOFFA, IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASSQ, INFOG2L, PSTREECOMB, $ SCOMBSSQ, SGEBR2D, SGEBS2D, $ SGAMX2D, SGSUM2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL LSAME, ICEIL, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * VALUE = ZERO * * Find max(abs(A(i,j))). * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 20 LL = JJ, JJ+JB-1 DO 10 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 40 LL = JJ, JJ+JB-1 DO 30 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 50 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 70 LL = JJ, JJ + JB -1 DO 60 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE ELSE DO 90 LL = JJ, JJ+JB-1 DO 80 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 140 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 110 LL = JJ, JJ + JB -1 DO 100 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE ELSE DO 130 LL = JJ, JJ + JB -1 DO 120 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, $ ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 140 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 160 LL = JJ, JJ+JB-1 SUM = ZERO DO 150 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 150 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 160 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 190 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 180 LL = JJ, JJ+JB-1 SUM = ZERO DO 170 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 170 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 180 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 190 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 210 LL = JJ, JJ + JB -1 SUM = ZERO DO 200 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 200 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 SUM = ZERO DO 220 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 220 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 230 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 280 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 250 LL = JJ, JJ + JB -1 SUM = ZERO DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 240 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 250 CONTINUE ELSE DO 270 LL = JJ, JJ + JB -1 SUM = ZERO DO 260 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 260 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 270 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 280 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * DO 290 KK = IIA, IIA+NP-1 WORK( KK ) = ZERO 290 CONTINUE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 310 LL = JJ, JJ+JB-1 DO 300 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 300 CONTINUE IOFFA = IOFFA + LDA 310 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 340 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 330 LL = JJ, JJ+JB-1 DO 320 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 320 CONTINUE IOFFA = IOFFA + LDA 330 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 340 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 360 LL = JJ, JJ + JB -1 DO 350 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA 360 CONTINUE ELSE DO 380 LL = JJ, JJ + JB -1 DO 370 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA 380 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 430 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 400 LL = JJ, JJ + JB -1 DO 390 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 390 CONTINUE IOFFA = IOFFA + LDA 400 CONTINUE ELSE DO 420 LL = JJ, JJ + JB -1 DO 410 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS(A(IOFFA+KK)) 410 CONTINUE IOFFA = IOFFA + LDA 420 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 430 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK, MAX( 1, NP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( NP.GT.0 ) THEN VALUE = WORK( ISAMAX( NP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * SCALE = ZERO SUM = ONE II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 440 LL = JJ, JJ+JB-1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 440 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 460 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 450 LL = JJ, JJ+JB-1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 450 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 460 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 470 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 470 CONTINUE ELSE DO 480 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 480 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL CLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 510 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 490 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 490 CONTINUE ELSE DO 500 LL = JJ, JJ + JB -1 CALL CLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 500 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL CLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 510 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PCLANHS = VALUE * RETURN * * End of PCLANHS * END scalapack-1.8.0/SRC/psormr3.f0000640000175000017500000003504710363532303015530 0ustar muammarmuammar SUBROUTINE PSORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMR3 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JAA, JCC, LCM, $ LCMP, LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSLARZ, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( L.LT.0 .OR. L.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic+i-ia:icc+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * CALL PSLARZ( SIDE, MI, NI, L, A, I, JAA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMR3 * END scalapack-1.8.0/SRC/pdsyntrd.f0000640000175000017500000005053010363532303015764 0ustar muammarmuammar SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * Bugs * ==== * * * Support for UPLO='U' is limited to calling the old, slow, PDSYTRD * code. * * * Purpose * * ======= * * PDSYNTRD is a prototype version of PDSYTRD which uses tailored * codes (either the serial, DSYTRD, or the parallel code, PDSYTTRD) * when the workspace provided by the user is adequate. * * * PDSYNTRD reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Features * ======== * * PDSYNTRD is faster than PDSYTRD on almost all matrices, * particularly small ones (i.e. N < 500 * sqrt(P) ), provided that * enough workspace is available to use the tailored codes. * * The tailored codes provide performance that is essentially * independent of the input data layout. * * The tailored codes place no restrictions on IA, JA, MB or NB. * At present, IA, JA, MB and NB are restricted to those values allowed * by PDSYTRD to keep the interface simple. These restrictions are * documented below. (Search for "restrictions".) * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * For optimal performance, greater workspace is needed, i.e. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS + 4 ) * NPS * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER ANB, CTXTB, I, IACOL, IAROW, ICOFFA, ICTXT, $ IINFO, INDB, INDD, INDE, INDTAU, INDW, IPW, $ IROFFA, J, JB, JX, K, KK, LLWORK, LWMIN, MINSZ, $ MYCOL, MYCOLB, MYROW, MYROWB, NB, NP, NPCOL, $ NPCOLB, NPROW, NPROWB, NPS, NQ, ONEPMIN, SQNPC, $ TTLWMIN * .. * .. Local Arrays .. INTEGER DESCB( DLEN_ ), DESCW( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, CHK1MAT, DESCSET, DSYTRD, $ IGAMN2D, PCHK1MAT, PDELSET, PDLAMR1D, PDLATRD, $ PDSYR2K, PDSYTD2, PDSYTTRD, PDTRMR2D, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC, PJLAENV EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( ( NP+1 )*NB, 3*NB ) ANB = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) MINSZ = PJLAENV( ICTXT, 5, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) TTLWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS * WORK( 1 ) = DBLE( TTLWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 * * The following two restrictions are not necessary provided * that either of the tailored codes are used. * ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 600+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ONEPMIN = N*N + 3*N + 1 LLWORK = LWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLWORK, 1, 1, -1, -1, -1, $ -1 ) * * * * Use the serial, LAPACK, code: DTRD on small matrices if we * we have enough space. * NPROWB = 0 IF( ( N.LT.MINSZ .OR. SQNPC.EQ.1 ) .AND. LLWORK.GE.ONEPMIN .AND. $ .NOT.UPPER ) THEN NPROWB = 1 NPS = N ELSE IF( LLWORK.GE.TTLWMIN .AND. .NOT.UPPER ) THEN NPROWB = SQNPC END IF END IF * IF( NPROWB.GE.1 ) THEN NPCOLB = NPROWB SQNPC = NPROWB INDB = 1 INDD = INDB + NPS*NPS INDE = INDD + NPS INDTAU = INDE + NPS INDW = INDTAU + NPS LLWORK = LLWORK - INDW + 1 * CALL BLACS_GET( ICTXT, 10, CTXTB ) CALL BLACS_GRIDINIT( CTXTB, 'Row major', SQNPC, SQNPC ) CALL BLACS_GRIDINFO( CTXTB, NPROWB, NPCOLB, MYROWB, MYCOLB ) CALL DESCSET( DESCB, N, N, 1, 1, 0, 0, CTXTB, NPS ) * CALL PDTRMR2D( UPLO, 'N', N, N, A, IA, JA, DESCA, WORK( INDB ), $ 1, 1, DESCB, ICTXT ) * * * Only those processors in context CTXTB are needed for a while * IF( NPROWB.GT.0 ) THEN * IF( NPROWB.EQ.1 ) THEN CALL DSYTRD( UPLO, N, WORK( INDB ), NPS, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDW ), $ LLWORK, INFO ) ELSE * CALL PDSYTTRD( 'L', N, WORK( INDB ), 1, 1, DESCB, $ WORK( INDD ), WORK( INDE ), $ WORK( INDTAU ), WORK( INDW ), LLWORK, $ INFO ) * END IF END IF * * All processors participate in moving the data back to the * way that PDSYNTRD expects it. * CALL PDLAMR1D( N-1, WORK( INDE ), 1, 1, DESCB, E, 1, JA, $ DESCA ) * CALL PDLAMR1D( N, WORK( INDD ), 1, 1, DESCB, D, 1, JA, DESCA ) * CALL PDLAMR1D( N, WORK( INDTAU ), 1, 1, DESCB, TAU, 1, JA, $ DESCA ) * CALL PDTRMR2D( UPLO, 'N', N, N, WORK( INDB ), 1, 1, DESCB, A, $ IA, JA, DESCA, ICTXT ) * IF( MYROWB.GE.0 ) $ CALL BLACS_GRIDEXIT( CTXTB ) * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP*NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, $ INDXG2P( JA+N-KK, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, NP ) ) * DO 10 K = N - KK + 1, NB + 1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PDLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, $ TAU, WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PDSYR2K( UPLO, 'No transpose', K-1, JB, -ONE, A, IA, $ J, DESCA, WORK, 1, 1, DESCW, ONE, A, IA, $ JA, DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PDELSET( A, I-1, J, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+NPCOL-1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PDSYTD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N - NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PDLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PDSYR2K( UPLO, 'No transpose', N-K-NB+1, NB, -ONE, $ A, I+NB, J, DESCA, WORK, K+NB, 1, DESCW, $ ONE, A, I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PDELSET( A, I+NB, J+NB-1, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PDSYTD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, TAU, $ WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * END IF * WORK( 1 ) = DBLE( TTLWMIN ) * RETURN * * End of PDSYNTRD * END scalapack-1.8.0/SRC/pstrrfs.f0000640000175000017500000007352010363532303015624 0ustar muammarmuammar SUBROUTINE PSTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LIWORK, LWORK, $ N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ), IWORK( * ) REAL A( * ), B( * ), BERR( * ), FERR( * ), $ WORK( * ), X( * ) * .. * * Purpose * ======= * * PSTRRFS provides error bounds and backward error estimates for the * solution to a system of linear equations with a triangular * coefficient matrix. * * The solution matrix X must be computed by PSTRTRS or some other * means before entering this routine. PSTRRFS does not do iterative * refinement because doing so cannot improve the backward error. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**T * sub( X ) = sub( B ) * (Conjugate transpose = Transpose) * * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) REAL pointer into the local memory * to an array of local dimension (LLD_A,LOCc(JA+N-1) ). This * array contains the local pieces of the original triangular * distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * If DIAG = 'U', the diagonal elements of sub( A ) are also * not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) REAL pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The estimated forward error bounds for * each solution vector of sub( X ). If XTRUE is the true * solution, FERR bounds the magnitude of the largest entry * in (sub( X ) - XTRUE) divided by the magnitude of the * largest entry in sub( X ). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD( IA-1, MB_A ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices sub( X ) and sub( B ) should be * distributed the same way on the same processes. These conditions * ensure that sub( X ) and sub( B ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER CHARACTER TRANST INTEGER IAROW, IXBCOL, IXBROW, IXCOL, IXROW, ICOFFA, $ ICOFFB, ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, $ IIW, IOFFXB, IPB, IPR, IPV, IROFFA, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK1MAT, PCHK2MAT, PSATRMV, PSAXPY, $ PSCOPY, PSLACON, PSTRSV, PSTRMV, $ PXERBLA, SGAMX2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IX, JX, DESCX, 17, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3*NPMOD WORK( 1 ) = REAL( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( NRHS.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -15 ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -16 ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1700+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1700+NB_ ) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1700+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -21 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 21 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 23 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, 0, IDUM1, IDUM2, $ INFO ) CALL PCHK2MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, N, 4, NRHS, 5, $ IX, JX, DESCX, 17, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTRRFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = maximum number of nonzero entries in each row of A, plus 1 * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) )*DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 90 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PSCOPY( N, X, IX, JX+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PSAXPY( N, -ONE, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, $ JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the matrix * or vector Z. If the i-th component of the denominator is less * than SAFE2, then SAFE1 is added to the i-th components of the * numerator and denominator before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 20 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 20 CONTINUE END IF END IF * CALL PSATRMV( UPLO, TRANS, DIAG, N, ONE, A, IA, JA, DESCA, X, $ IX, JX+K, DESCX, 1, ONE, WORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 30 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix or * vector Z * NZ is the maximum number of nonzeros in any row of A, plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 40 CONTINUE END IF END IF * KASE = 0 50 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PSTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 60 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 60 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF CALL PSTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 50 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 80 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 90 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 180 J = JN + 1, JB + NRHS - 1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 170 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PSCOPY( N, X, IX, J+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PSAXPY( N, -ONE, B, IB, J+K, DESCB, 1, WORK( IPR ), $ IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 100 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 100 CONTINUE END IF END IF * CALL PSATRMV( UPLO, TRANS, DIAG, N, ONE, A, IA, JA, DESCA, $ X, IX, J+K, DESCX, 1, ONE, WORK( IPB ), IW, $ JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 110 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 110 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))/norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of A, * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 120 CONTINUE END IF END IF * KASE = 0 130 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PSTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 140 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 150 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 150 CONTINUE END IF END IF CALL PSTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 130 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 160 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 170 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSTRRFS * END scalapack-1.8.0/SRC/pdgeqr2.f0000640000175000017500000002750210363532303015464 0ustar muammarmuammar SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQR2 computes a QR factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i:ia+m-1,ja+i-1), * and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, II, IACOL, IAROW, ICTXT, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DGEBR2D, $ DGEBS2D, DLARFG, DSCAL, INFOG2L, $ PDELSET, PDLARF, PDLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) I = II+(JJ-1)*DESCA( LLD_ ) IF( MYCOL.EQ.IACOL ) THEN AJJ = A( I ) CALL DLARFG( 1, AJJ, A( I ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL DSCAL( NQ-JJ, ALPHA, A( I+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL DSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), 1, $ IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA, JA+K-1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(i+1:ia+m-1,j) * CALL PDLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(j)' to A(i:ia+m-1,j+1:ja+n-1) from the left * CALL PDELSET( A, I, J, DESCA, ONE ) * CALL PDLARF( 'Left', M-J+JA, N-J+JA-1, A, I, J, DESCA, 1, $ TAU, A, I, J+1, DESCA, WORK ) END IF CALL PDELSET( A, I, J, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEQR2 * END scalapack-1.8.0/SRC/pzhentrd.f0000640000175000017500000005323010363532303015753 0ustar muammarmuammar SUBROUTINE PZHENTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ), RWORK( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * Bugs * ==== * * * Support for UPLO='U' is limited to calling the old, slow, PZHETRD * code. * * * Purpose * * ======= * * PZHENTRD is a prototype version of PZHETRD which uses tailored * codes (either the serial, ZHETRD, or the parallel code, PZHETTRD) * when the workspace provided by the user is adequate. * * * PZHENTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Features * ======== * * PZHENTRD is faster than PZHETRD on almost all matrices, * particularly small ones (i.e. N < 500 * sqrt(P) ), provided that * enough workspace is available to use the tailored codes. * * The tailored codes provide performance that is essentially * independent of the input data layout. * * The tailored codes place no restrictions on IA, JA, MB or NB. * At present, IA, JA, MB and NB are restricted to those values allowed * by PZHETRD to keep the interface simple. These restrictions are * documented below. (Search for "restrictions".) * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * For optimal performance, greater workspace is needed, i.e. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS + 4 ) * NPS * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * RWORK (local workspace/local output) COMPLEX*16 array, * dimension (LRWORK) * On exit, RWORK( 1 ) returns the optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 1 * * For optimal performance, greater workspace is needed, i.e. * LRWORK >= MAX( 2 * N ) * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER ANB, CTXTB, I, IACOL, IAROW, ICOFFA, ICTXT, $ IINFO, INDB, INDRD, INDRE, INDTAU, INDW, IPW, $ IROFFA, J, JB, JX, K, KK, LLRWORK, LLWORK, $ LRWMIN, LWMIN, MINSZ, MYCOL, MYCOLB, MYROW, $ MYROWB, NB, NP, NPCOL, NPCOLB, NPROW, NPROWB, $ NPS, NQ, ONEPMIN, ONEPRMIN, SQNPC, TTLRWMIN, $ TTLWMIN * .. * .. Local Arrays .. INTEGER DESCB( DLEN_ ), DESCW( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, CHK1MAT, DESCSET, IGAMN2D, $ PCHK1MAT, PDLAMR1D, PB_TOPGET, PB_TOPSET, $ PXERBLA, PZELSET, PZHER2K, PZHETD2, PZHETTRD, $ PZLAMR1D, PZLATRD, PZTRMR2D, ZHETRD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC, PJLAENV EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, INT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( ( NP+1 )*NB, 3*NB ) ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) MINSZ = PJLAENV( ICTXT, 5, 'PZHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) TTLWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS LRWMIN = 1 TTLRWMIN = 2*NPS * WORK( 1 ) = DCMPLX( DBLE( TTLWMIN ) ) RWORK( 1 ) = DBLE( TTLRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 * * The following two restrictions are not necessary provided * that either of the tailored codes are used. * ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 600+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 13 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHENTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ONEPMIN = N*N + 3*N + 1 LLWORK = LWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLWORK, 1, 1, -1, -1, -1, $ -1 ) * ONEPRMIN = 2*N LLRWORK = LRWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLRWORK, 1, 1, -1, -1, -1, $ -1 ) * * * Use the serial, LAPACK, code: ZTRD on small matrices if we * we have enough space. * NPROWB = 0 IF( ( N.LT.MINSZ .OR. SQNPC.EQ.1 ) .AND. LLWORK.GE.ONEPMIN .AND. $ LLRWORK.GE.ONEPRMIN .AND. .NOT.UPPER ) THEN NPROWB = 1 NPS = N ELSE IF( LLWORK.GE.TTLWMIN .AND. LLRWORK.GE.TTLRWMIN .AND. .NOT. $ UPPER ) THEN NPROWB = SQNPC END IF END IF * IF( NPROWB.GE.1 ) THEN NPCOLB = NPROWB SQNPC = NPROWB INDB = 1 INDRD = 1 INDRE = INDRD + NPS INDTAU = INDB + NPS*NPS INDW = INDTAU + NPS LLWORK = LLWORK - INDW + 1 * CALL BLACS_GET( ICTXT, 10, CTXTB ) CALL BLACS_GRIDINIT( CTXTB, 'Row major', SQNPC, SQNPC ) CALL BLACS_GRIDINFO( CTXTB, NPROWB, NPCOLB, MYROWB, MYCOLB ) CALL DESCSET( DESCB, N, N, 1, 1, 0, 0, CTXTB, NPS ) * CALL PZTRMR2D( UPLO, 'N', N, N, A, IA, JA, DESCA, WORK( INDB ), $ 1, 1, DESCB, ICTXT ) * * * Only those processors in context CTXTB are needed for a while * IF( NPROWB.GT.0 ) THEN * IF( NPROWB.EQ.1 ) THEN CALL ZHETRD( UPLO, N, WORK( INDB ), NPS, RWORK( INDRD ), $ RWORK( INDRE ), WORK( INDTAU ), $ WORK( INDW ), LLWORK, INFO ) ELSE * CALL PZHETTRD( 'L', N, WORK( INDB ), 1, 1, DESCB, $ RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDTAU ), WORK( INDW ), LLWORK, $ INFO ) * END IF END IF * * All processors participate in moving the data back to the * way that PZHENTRD expects it. * CALL PDLAMR1D( N-1, RWORK( INDRE ), 1, 1, DESCB, E, 1, JA, $ DESCA ) * CALL PDLAMR1D( N, RWORK( INDRD ), 1, 1, DESCB, D, 1, JA, $ DESCA ) * CALL PZLAMR1D( N, WORK( INDTAU ), 1, 1, DESCB, TAU, 1, JA, $ DESCA ) * CALL PZTRMR2D( UPLO, 'N', N, N, WORK( INDB ), 1, 1, DESCB, A, $ IA, JA, DESCA, ICTXT ) * IF( MYROWB.GE.0 ) $ CALL BLACS_GRIDEXIT( CTXTB ) * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP*NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, $ INDXG2P( JA+N-KK, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, NP ) ) * DO 10 K = N - KK + 1, NB + 1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PZLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, $ TAU, WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PZHER2K( UPLO, 'No transpose', K-1, JB, -CONE, A, $ IA, J, DESCA, WORK, 1, 1, DESCW, ONE, A, $ IA, JA, DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PZELSET( A, I-1, J, DESCA, DCMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+NPCOL-1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PZHETD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N - NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PZLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PZHER2K( UPLO, 'No transpose', N-K-NB+1, NB, -CONE, $ A, I+NB, J, DESCA, WORK, K+NB, 1, DESCW, $ ONE, A, I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PZELSET( A, I+NB, J+NB-1, DESCA, DCMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PZHETD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, TAU, $ WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * END IF * WORK( 1 ) = DCMPLX( DBLE( TTLWMIN ) ) RWORK( 1 ) = DBLE( TTLRWMIN ) * RETURN * * End of PZHENTRD * END scalapack-1.8.0/SRC/pdpoequ.f0000640000175000017500000003126510363532303015576 0ustar muammarmuammar SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, N DOUBLE PRECISION AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), SC( * ), SR( * ) * .. * * Purpose * ======= * * PDPOEQU computes row and column scalings intended to * equilibrate a distributed symmetric positive definite matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) and reduce its condition number * (with respect to the two-norm). SR and SC contain the scale * factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled distri- * buted matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on * the diagonal. This choice of SR and SC puts the condition number * of B within a factor N of the smallest possible condition number * over all possible diagonal scalings. * * The scaling factor are stored along process rows in SR and along * process columns in SC. The duplication of information simplifies * greatly the application of the factors. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension ( LLD_A, LOCc(JA+N-1) ), the * N-by-N symmetric positive definite distributed matrix * sub( A ) whose scaling factors are to be computed. Only the * diagonal elements of sub( A ) are referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local output) DOUBLE PRECISION array, dimension LOCr(M_A) * If INFO = 0, SR(IA:IA+N-1) contains the row scale factors * for sub( A ). SR is aligned with the distributed matrix A, * and replicated across every process column. SR is tied to the * distributed matrix A. * * SC (local output) DOUBLE PRECISION array, dimension LOCc(N_A) * If INFO = 0, SC(JA:JA+N-1) contains the column scale factors * for A(IA:IA+M-1,JA:JA+N-1). SC is aligned with the distribu- * ted matrix A, and replicated down every process row. SC is * tied to the distributed matrix A. * * SCOND (global output) DOUBLE PRECISION * If INFO = 0, SCOND contains the ratio of the smallest SR(i) * (or SC(j)) to the largest SR(i) (or SC(j)), with * IA <= i <= IA+N-1 and JA <= j <= JA+N-1. If SCOND >= 0.1 * and AMAX is neither too large nor too small, it is not worth * scaling by SR (or SC). * * AMAX (global output) DOUBLE PRECISION * Absolute value of largest matrix element. If AMAX is very * close to overflow or very close to underflow, the matrix * should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the K-th diagonal entry of sub( A ) is * nonpositive. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER ALLCTOP, COLCTOP, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ IDUMM, II, IIA, IOFFA, IOFFD, IROFF, J, JB, JJ, $ JJA, JN, LDA, LL, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ DOUBLE PRECISION AII, SMIN * .. * .. Local Arrays .. INTEGER DESCSC( DLEN_ ), DESCSR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMN2D, $ DGAMX2D, DGSUM2D, IGAMN2D, INFOG2L, $ PCHK1MAT, PB_TOPGET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', ALLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Compute some local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) * * Assign descriptors for SR and SC arrays * CALL DESCSET( DESCSR, N, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCSC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Initialize the scaling factors to zero. * DO 10 II = IIA, IIA+NP-1 SR( II ) = ZERO 10 CONTINUE * DO 20 JJ = JJA, JJA+NQ-1 SC( JJ ) = ZERO 20 CONTINUE * * Find the minimum and maximum diagonal elements. * Handle first block separately. * II = IIA JJ = JJA JB = JN-JA+1 SMIN = ONE / PDLAMCH( ICTXT, 'S' ) AMAX = ZERO * IOFFA = II+(JJ-1)*LDA IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFD = IOFFA DO 30 LL = 0, JB-1 AII = A( IOFFD ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = LL + 1 IOFFD = IOFFD + LDA + 1 30 CONTINUE END IF * IF( MYROW.EQ.IAROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFD = IOFFA DO 40 LL = 0, JB-1 AII = A( IOFFD ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = J + LL - JA + 1 IOFFD = IOFFD + LDA + 1 40 CONTINUE END IF * IF( MYROW.EQ.ICURROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * * Compute scaling factors * CALL DGSUM2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, SC( JJA ), $ 1, -1, MYCOL ) CALL DGSUM2D( ICTXT, 'Rowwise', ROWCTOP, NP, 1, SR( IIA ), $ MAX( 1, NP ), -1, MYCOL ) * CALL DGAMX2D( ICTXT, 'All', ALLCTOP, 1, 1, AMAX, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, SMIN, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) * IF( SMIN.LE.ZERO ) THEN * * Find the first non-positive diagonal element and return. * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) RETURN * ELSE * * Set the scale factors to the reciprocals * of the diagonal elements. * DO 60 II = IIA, IIA+NP-1 SR( II ) = ONE / SQRT( SR( II ) ) 60 CONTINUE * DO 70 JJ = JJA, JJA+NQ-1 SC( JJ ) = ONE / SQRT( SC( JJ ) ) 70 CONTINUE * * Compute SCOND = min(S(I)) / max(S(I)) * SCOND = SQRT( SMIN ) / SQRT( AMAX ) * END IF * RETURN * * End of PDPOEQU * END scalapack-1.8.0/SRC/pslaed0.f0000640000175000017500000001742610363532303015454 0ustar muammarmuammar SUBROUTINE PSLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER INFO, IQ, JQ, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) REAL D( * ), E( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAED0 computes all eigenvalues and corresponding eigenvectors of a * symmetric tridiagonal matrix using the divide and conquer method. * * * Arguments * ========= * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input/output) REAL array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in descending order. * * E (global input/output) REAL array, dimension (N-1) * On entry, the subdiagonal elements of the tridiagonal matrix. * On exit, E has been destroyed. * * Q (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * On output, Q is distributed across the P processes in block * cyclic format. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * * WORK (local workspace ) REAL array, dimension (LWORK) * LWORK = 6*N + 2*NP*NQ, with * NP = NUMROC( N, MB_Q, MYROW, IQROW, NPROW ) * NQ = NUMROC( N, NB_Q, MYCOL, IQCOL, NPCOL ) * IQROW = INDXG2P( IQ, NB_Q, MYROW, RSRC_Q, NPROW ) * IQCOL = INDXG2P( JQ, MB_Q, MYCOL, CSRC_Q, NPCOL ) * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * LIWORK = 2 + 7*N + 8*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ID, IDCOL, IDROW, IID, IINFO, IIQ, IM1, IM2, $ IPQ, IQCOL, IQROW, J, JJD, JJQ, LDQ, MATSIZ, $ MYCOL, MYROW, N1, NB, NBL, NBL1, NPCOL, NPROW, $ SUBPBS, TSUBPBS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSLAED1, PXERBLA, $ SGEBR2D, SGEBS2D, SGERV2D, SGESD2D, SSTEQR * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 IF( DESCQ( NB_ ).GT.N .OR. N.LT.2 ) $ INFO = -1 IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PSLAED0', -INFO ) RETURN END IF * NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) CALL INFOG2L( IQ, JQ, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) * * Determine the size and placement of the submatrices, and save in * the leading elements of IWORK. * TSUBPBS = ( N-1 ) / NB + 1 IWORK( 1 ) = TSUBPBS SUBPBS = 1 10 CONTINUE IF( IWORK( SUBPBS ).GT.1 ) THEN DO 20 J = SUBPBS, 1, -1 IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 IWORK( 2*J-1 ) = IWORK( J ) / 2 20 CONTINUE SUBPBS = 2*SUBPBS GO TO 10 END IF DO 30 J = 2, SUBPBS IWORK( J ) = IWORK( J ) + IWORK( J-1 ) 30 CONTINUE * * Divide the matrix into TSUBPBS submatrices of size at most NB * using rank-1 modifications (cuts). * DO 40 I = NB + 1, N, NB IM1 = I - 1 D( IM1 ) = D( IM1 ) - ABS( E( IM1 ) ) D( I ) = D( I ) - ABS( E( IM1 ) ) 40 CONTINUE * * Solve each submatrix eigenproblem at the bottom of the divide and * conquer tree. D is the same on each process. * DO 50 ID = 1, N, NB CALL INFOG2L( IQ-1+ID, JQ-1+ID, DESCQ, NPROW, NPCOL, MYROW, $ MYCOL, IID, JJD, IDROW, IDCOL ) MATSIZ = MIN( NB, N-ID+1 ) IF( MYROW.EQ.IDROW .AND. MYCOL.EQ.IDCOL ) THEN IPQ = IID + ( JJD-1 )*LDQ CALL SSTEQR( 'I', MATSIZ, D( ID ), E( ID ), Q( IPQ ), LDQ, $ WORK, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'SSTEQR', -INFO ) RETURN END IF IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) THEN CALL SGESD2D( DESCQ( CTXT_ ), MATSIZ, 1, D( ID ), MATSIZ, $ IQROW, IQCOL ) END IF ELSE IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL SGERV2D( DESCQ( CTXT_ ), MATSIZ, 1, D( ID ), MATSIZ, $ IDROW, IDCOL ) END IF 50 CONTINUE * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL SGEBS2D( DESCQ( CTXT_ ), 'A', ' ', N, 1, D, N ) ELSE CALL SGEBR2D( DESCQ( CTXT_ ), 'A', ' ', N, 1, D, N, IQROW, $ IQCOL ) END IF * * Successively merge eigensystems of adjacent submatrices * into eigensystem for the corresponding larger matrix. * * while ( SUBPBS > 1 ) * 60 CONTINUE IF( SUBPBS.GT.1 ) THEN IM2 = SUBPBS - 2 DO 80 I = 0, IM2, 2 IF( I.EQ.0 ) THEN NBL = IWORK( 2 ) NBL1 = IWORK( 1 ) IF( NBL1.EQ.0 ) $ GO TO 70 ID = 1 MATSIZ = MIN( N, NBL*NB ) N1 = NBL1*NB ELSE NBL = IWORK( I+2 ) - IWORK( I ) NBL1 = NBL / 2 IF( NBL1.EQ.0 ) $ GO TO 70 ID = IWORK( I )*NB + 1 MATSIZ = MIN( NB*NBL, N-ID+1 ) N1 = NBL1*NB END IF * * Merge lower order eigensystems (of size N1 and MATSIZ - N1) * into an eigensystem of size MATSIZ. * CALL PSLAED1( MATSIZ, N1, D( ID ), ID, Q, IQ, JQ, DESCQ, $ E( ID+N1-1 ), WORK, IWORK( SUBPBS+1 ), IINFO ) IF( IINFO.NE.0 ) THEN INFO = IINFO*( N+1 ) + ID END IF 70 CONTINUE IWORK( I / 2+1 ) = IWORK( I+2 ) 80 CONTINUE SUBPBS = SUBPBS / 2 GO TO 60 END IF * * end while * 90 CONTINUE RETURN * * End of PSLAED0 * END scalapack-1.8.0/SRC/pdlamr1d.f0000640000175000017500000001067110363532303015623 0ustar muammarmuammar SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Bugs * ==== * * I am not sure that this works correctly when IB and JB are not equal * to 1. Indeed, I suspect that IB should always be set to 1 or ignored * with 1 used in its place. * * PDLAMR1D has not been tested except withint the contect of * PDSYPTRD, the prototype reduction to tridiagonal form code. * * Purpose * * ======= * * PDLAMR1D redistributes a one-dimensional row vector from one data * decomposition to another. * * This is an auxiliary routine called by PDSYTRD to redistribute D, E * and TAU. * * Notes * ===== * * Although all processes call PDGEMR2D, only the processes that own * the first column of A send data and only processes that own the * first column of B receive data. The calls to DGEBS2D/DGEBR2D * spread the data down. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * A (local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JA+N-1)). * On output, A is replicated across all processes in * this processor column. * * IA (global input) INTEGER * A's global row index, which points to the beginning of * the submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JB+N-1)). * * IB (global input) INTEGER * B's global row index, NOT USED * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCBB( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, PDGEMR2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return if possible * IF( N.LE.0 ) $ RETURN * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) DESCBB( I ) = DESCB( I ) 10 CONTINUE * DESCAA( M_ ) = 1 DESCBB( M_ ) = 1 DESCAA( LLD_ ) = 1 DESCBB( LLD_ ) = 1 * ICTXT = DESCB( CTXT_ ) CALL PDGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'C', ' ', NQ, 1, B, NQ ) ELSE CALL DGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * RETURN * * End of PDLAMR1D * END scalapack-1.8.0/SRC/pcgetf2.f0000640000175000017500000002264110363532303015451 0ustar muammarmuammar SUBROUTINE PCGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCGETF2 computes an LU factorization of a general M-by-N * distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using * partial pivoting with row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal * elements (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). * * This is the right-looking Parallel Level 2 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). * NB_A-MOD(JA-1, NB_A) >= N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ). On exit, this array contains * the local pieces of the factors L and U from the factoriza- * tion sub( A ) = P*L*U; the unit diagonal elements of L are * not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IROFF, J, $ JJA, MN, MYCOL, MYROW, NPCOL, NPROW COMPLEX GMAX * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PCAMAX, PCGERU, $ PCSCAL, PCSWAP, PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * MN = MIN( M, N ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) * IF( MYCOL.EQ.IACOL ) THEN DO 10 J = JA, JA+MN-1 I = IA + J - JA * * Find pivot and test for singularity. * CALL PCAMAX( M-J+JA, GMAX, IPIV( IIA+J-JA ), A, I, J, $ DESCA, 1 ) IF( GMAX.NE.ZERO ) THEN * * Apply the row interchanges to columns JA:JA+N-1 * CALL PCSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, $ IPIV( IIA+J-JA ), JA, DESCA, DESCA( M_ ) ) * * Compute elements I+1:IA+M-1 of J-th column. * IF( J-JA+1.LT.M ) $ CALL PCSCAL( M-J+JA-1, ONE / GMAX, A, I+1, J, $ DESCA, 1 ) ELSE IF( INFO.EQ.0 ) THEN INFO = J - JA + 1 END IF * * Update trailing submatrix * IF( J-JA+1.LT.MN ) THEN CALL PCGERU( M-J+JA-1, N-J+JA-1, -ONE, A, I+1, J, DESCA, $ 1, A, I, J+1, DESCA, DESCA( M_ ), A, I+1, $ J+1, DESCA ) END IF 10 CONTINUE * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN, MYROW, IACOL ) * END IF * RETURN * * End of PCGETF2 * END scalapack-1.8.0/SRC/psormrq.f0000640000175000017500000004232110363532303015617 0ustar muammarmuammar SUBROUTINE PSORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMRQ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, LCM, $ LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, NI, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSLARFB, $ PSLARFT, PSORMR2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PSORMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PSLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-IA, IB, $ A, I, JA, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+i+ib-ia-1,jc:jc+n-1) * MI = M - K + I + IB - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+i+ib-ia-1) * NI = N - K + I + IB - IA END IF * * Apply H or H' * CALL PSLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, $ IB, A, I, JA, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PSORMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMRQ * END scalapack-1.8.0/SRC/pdtrcon.f0000640000175000017500000004036310363532303015571 0ustar muammarmuammar SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LIWORK, LWORK, N DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDTRCON estimates the reciprocal of the condition number of a * triangular distributed matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm. * * The norm of A(IA:IA+N-1,JA:JA+N-1) is computed and an estimate is * obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), then the reciprocal * of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm; * = 'I': Infinity-norm. * * UPLO (global input) CHARACTER * = 'U': A(IA:IA+N-1,JA:JA+N-1) is upper triangular; * = 'L': A(IA:IA+N-1,JA:JA+N-1) is lower triangular. * * DIAG (global input) CHARACTER * = 'N': A(IA:IA+N-1,JA:JA+N-1) is non-unit triangular; * = 'U': A(IA:IA+N-1,JA:JA+N-1) is unit triangular. * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the triangular distributed * matrix A(IA:IA+N-1,JA:JA+N-1). If UPLO = 'U', the leading * N-by-N upper triangular part of this distributed matrix con- * tains the upper triangular matrix, and its strictly lower * triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of this ditributed * matrix contains the lower triangular matrix, and the strictly * upper triangular part is not referenced. If DIAG = 'U', the * diagonal elements of A(IA:IA+N-1,JA:JA+N-1) are also not * referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + LOCc(N+MOD(JA-1,NB_A)) * + MAX( 2, MAX( NB_A*MAX( 1, CEIL(NPROW-1,NPCOL) ), * LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX( 1, CEIL(NPCOL-1,NPROW) ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOUNIT, ONENRM, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPN, IPV, IPW, $ IPX, IROFF, IV, IX, IXX, JJA, JV, JX, KASE, $ KASE1, LIWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, $ NPMOD, NPROW, NQ, NQMOD DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM DOUBLE PRECISION WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 5 ), $ IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGEBR2D, $ DGEBS2D, INFOG2L, PCHK1MAT, PDAMAX, $ PDLATRS, PDLACON, PDRSCL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANTR EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH, $ PDLANTR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) NOUNIT = LSAME( DIAG, 'N' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 11 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 13 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 5, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * RCOND = ZERO SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' )*DBLE( MAX( 1, N ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPN = IPV + NP IPW = IPN + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Compute the norm of the triangular matrix A. * ANORM = PDLANTR( NORM, UPLO, DIAG, N, N, A, IA, JA, DESCA, WORK ) * * Continue only if ANORM > 0. * IF( ANORM.GT.ZERO ) THEN * * Estimate the norm of the inverse of A. * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 10 CONTINUE CALL PDLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), $ IX, JX, DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(A). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( UPLO, 'No transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, WORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(A'). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( UPLO, 'Transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, WORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF NORMIN = 'Y' * * Multiply by 1/SCALE if doing so will not cause overflow. * IF( SCALE.NE.ONE ) THEN CALL PDAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, $ DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL DGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1 ) ELSE CALL DGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1, IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / ANORM ) / AINVNM END IF * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PDTRCON * END scalapack-1.8.0/SRC/psptsv.f0000640000175000017500000004522410363532303015460 0ustar muammarmuammar SUBROUTINE PSPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal symmetric positive definite distributed * matrix. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PSPTTRF and PSPTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL + 3*NB) * +max((10+2*min(100,NRHS))*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PSPTTRF, PSPTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PSPTTRF and PSPTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 5*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PSPTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL + 3*NB) * * Factor the matrix * CALL PSPTTRF( N, D, E, JA, DESCA, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR ), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PSPTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PSPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPTSV', -INFO ) RETURN END IF * RETURN * * End of PSPTSV * END scalapack-1.8.0/SRC/pdlacpy.f0000640000175000017500000002240410363532303015550 0ustar muammarmuammar SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDLACPY copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PDLACPY * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBB, IBLK, IN, ITMP, J, JAA, JBB, $ JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PDLACP2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PDLACP2( UPLO, M, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN CALL PDLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB + ITMP JBB = JB + ITMP JAA = JA + ITMP CALL PDLACP2( UPLO, IBLK, N-ITMP, A, I, JAA, DESCA, $ B, IBB, JBB, DESCB ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN CALL PDLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IBB = IB + JTMP JBB = JB + JTMP IAA = IA + JTMP CALL PDLACP2( UPLO, M-JTMP, JBLK, A, IAA, J, DESCA, $ B, IBB, JBB, DESCB ) 20 CONTINUE ELSE IF( M.LE.N ) THEN CALL PDLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB+ITMP CALL PDLACP2( UPLO, IBLK, N, A, I, JA, DESCA, $ B, IBB, JB, DESCB ) 30 CONTINUE ELSE CALL PDLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) JBB = JB+JTMP CALL PDLACP2( UPLO, M, JBLK, A, IA, J, DESCA, $ B, IB, JBB, DESCB ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PDLACPY * END scalapack-1.8.0/SRC/pzdbsv.f0000640000175000017500000004536410363532303015436 0ustar muammarmuammar SUBROUTINE PZDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PZDBTRF and PZDBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * +max((max(bwl,bwu)*NRHS), max(bwl,bwu)*max(bwl,bwu)) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PXERBLA, PZDBTRF, PZDBTRS * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PZDBTRF and PZDBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PZDBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Factor the matrix * CALL PZDBTRF( N, BWL, BWU, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PZDBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PZDBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBSV', -INFO ) RETURN END IF * RETURN * * End of PZDBSV * END scalapack-1.8.0/SRC/pcunmql.f0000640000175000017500000004225410363532303015600 0ustar muammarmuammar SUBROUTINE PCUNMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMQL overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PCGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PCGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, IPW, $ IROFFA, IROFFC, J, J1, J2, J3, JB, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCLARFB, $ PCLARFT, PCUNM2L, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN JB = J1 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PCUNM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PCLARFT( 'Backward', 'Columnwise', NQ-K+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+j+jb-ja-1,jc:jc+n-1) * MI = M - K + J + JB - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+j+jb-ja-1) * NI = N - K + J + JB - JA END IF * * Apply H or H' * CALL PCLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, $ JB, A, IA, J, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN JB = J2 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PCUNM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMQL * END scalapack-1.8.0/SRC/pdgelqf.f0000640000175000017500000002745310363532303015547 0ustar muammarmuammar SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGELQF computes a LQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1) H(ia+k-2) . . . H(ia), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1)=0 * and v(i) = 1; v(i+1:n) is stored on exit in A(ia+i-1,ja+i:ja+n-1), * and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ IROFF, J, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDGELQ2, $ PDLARFB, PDLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Handle the first block of rows separately * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IB = IN - IA + 1 * * Compute the LQ factorization of the first block A(ia:in:ja:ja+n-1) * CALL PDGELQ2( IB, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( IA+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ia) H(ia+1) . . . H(in) * CALL PDLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PDLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-IB, N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, $ JA, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of rows * DO 10 I = IN+1, IA+K-1, DESCA( MB_ ) IB = MIN( K-I+IA, DESCA( MB_ ) ) J = JA + I - IA * * Compute the LQ factorization of the current block * A(i:i+ib-1:j:ja+n-1) * CALL PDGELQ2( IB, N-I+IA, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PDLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PDLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-J+JA, IB, A, I, J, DESCA, WORK, $ A, I+IB, J, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGELQF * END scalapack-1.8.0/SRC/pcungql.f0000640000175000017500000002702610363532303015572 0ustar muammarmuammar SUBROUTINE PCUNGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGQL generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PCGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PCGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JN, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCLARFB, $ PCLARFT, PCLASET, PCUNG2L, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Set A(ia+m-n+jn-ja+1:ia-m+1,ja:jn) to zero. * CALL PCLASET( 'All', N-JN+JA-1, JN-JA+1, ZERO, ZERO, A, $ IA+M-N+JN-JA+1, JA, DESCA ) * * Use unblocked code for the first or only block. * CALL PCUNG2L( M-N+JN-JA+1, JN-JA+1, JN-JA-N+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PCLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the left * CALL PCLARFB( 'Left', 'No transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H to rows ia:m-k+i+ib-1 of current block * CALL PCUNG2L( M-N+J+JB-JA, JB, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia+m-n+j+jb-ja:ia+m-1,j:j+jb-1 of current block to * zero * CALL PCLASET( 'All', N-J-JB+JA, JB, ZERO, ZERO, A, $ IA+M-N+J+JB-JA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGQL * END scalapack-1.8.0/SRC/pzgbtrs.f0000640000175000017500000011301510363532303015606 0ustar muammarmuammar SUBROUTINE PZGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWU, BWL, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV(*) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZGBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PZGBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded distributed * matrix with bandwidth BWL, BWU. * * Routine PZGBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZGBTRF and this is stored in AF. If a linear system * is to be solved using PZGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NRHS*(NB+2*bwl+4*bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Marbwus Hegland, Australian Natonal University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC, $ FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, J, JA_NEW, L, LBWL, LBWU, LDBB, $ LDW, LLDA, LLDB, LM, LMJ, LN, LPTR, MYCOL, $ MYROW, NB, NEICOL, NP, NPACT, NPCOL, NPROW, $ NPSTR, NP_SAVE, ODD_SIZE, PART_OFFSET, $ RECOVERY_VAL, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WPTR * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (2*BWL+2*BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * BW = BWU+BWL * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZGBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.(BWL+BWU+1) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZGBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check worksize * WORK_SIZE_MIN = NRHS*(NB+2*BWL+4*BWU) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PZGBTRS: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 16 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZGBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * * Move data into workspace - communicate/copy (overlap) * IF (MYCOL .LT. NPCOL-1) THEN CALL ZGESD2D( ICTXT, BWU, NRHS, B(NB-BWU+1), LLDB, $ 0, MYCOL + 1) ENDIF * IF (MYCOL .LT. NPCOL-1) THEN LM = NB-BWU ELSE LM = NB ENDIF * IF (MYCOL .GT. 0) THEN WPTR = BWU+1 ELSE WPTR = 1 ENDIF * LDW = NB+BWU + 2*BW+BWU * CALL ZLACPY( 'G', LM, NRHS, B(1), LLDB, WORK( WPTR ), LDW ) * * Zero out rest of work * DO 1501 J=1, NRHS DO 1502 L=WPTR+LM, LDW WORK( (J-1)*LDW+L ) = CZERO 1502 CONTINUE 1501 CONTINUE * IF (MYCOL .GT. 0) THEN CALL ZGERV2D( ICTXT, BWU, NRHS, WORK(1), LDW, $ 0, MYCOL-1) ENDIF * ******************************************************************** * PHASE 1: Local computation phase -- Solve L*X = B ******************************************************************** * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF (MYCOL .NE. 0) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1+BWU ENDIF * IF (MYCOL .NE. NPCOL-1) THEN LM = NB - LBWU LN = NB - BW ELSE IF (MYCOL .NE. 0) THEN LM = ODD_SIZE + BWU LN = MAX(ODD_SIZE-BW,0) ELSE LM = N LN = MAX( N-BW, 0 ) ENDIF * DO 21 J = 1, LN * LMJ = MIN(LBWL,LM-J) L = IPIV( J ) * IF( L.NE.J ) THEN CALL ZSWAP(NRHS, WORK(L), LDW, WORK(J), LDW) ENDIF * LPTR = BW+1 + (J-1)*LLDA + APTR * CALL ZGERU(LMJ,NRHS,-CONE, A(LPTR),1, WORK(J),LDW, $ WORK(J+1),LDW) * 21 CONTINUE * ******************************************************************** * PHASE 2: Global computation phase -- Solve L*X = B ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU BN = BW ELSE BM = MIN(BW,ODD_SIZE) + BWU BN = MIN(BW,ODD_SIZE) ENDIF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = (NB+BWU)*BW + 1 LDBB = 2*BW + BWU * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. CALL ZGETRS( 'N', N-LN, NRHS, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), WORK( LN+1 ), LDW, INFO) * ENDIF * * Loop over levels ... * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) is used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels 200 IF (NPACT .LE. 1) GOTO 300 * * Test if processor is active IF (MOD(MYCOL,NPSTR) .EQ. 0) THEN * * Send/Receive blocks * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * NEICOL = MYCOL + NPSTR * IF (NEICOL/NPSTR .LE. NPACT-1) THEN * IF (NEICOL/NPSTR .LT. NPACT-1) THEN BMN = BW ELSE BMN = MIN(BW,NUMROC(N, NB, NEICOL, 0, NPCOL))+BWU ENDIF * CALL ZGESD2D( ICTXT, BM, NRHS, $ WORK(LN+1), LDW, 0, NEICOL ) * IF( NPACT .NE. 2 )THEN * * Receive answers back from partner processor * CALL ZGERV2D(ICTXT, BM+BMN-BW, NRHS, $ WORK( LN+1 ), LDW, 0, NEICOL ) * BM = BM+BMN-BW * ENDIF * ENDIF * ELSE * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * CALL ZLACPY( 'G', BM, NRHS, WORK(LN+1), LDW, $ WORK(NB+BWU+BMN+1), LDW ) * CALL ZGERV2D( ICTXT, BMN, NRHS, WORK( NB+BWU+1 ), $ LDW, 0, NEICOL ) * * and do the permutations and eliminations * IF (NPACT .NE. 2) THEN * * Solve locally for BW variables * CALL ZLASWP( NRHS, WORK(NB+BWU+1), LDW, 1, BW, $ IPIV(LN+1), 1) * CALL ZTRSM('L','L','N','U', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) * * Use soln just calculated to update RHS * CALL ZGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ WORK(NB+BWU+1), LDW, $ CONE, WORK(NB+BWU+1+BW), LDW ) * * Give answers back to partner processor * CALL ZGESD2D( ICTXT, BM+BMN-BW, NRHS, $ WORK(NB+BWU+1+BW), LDW, 0, NEICOL ) * ELSE * * Finish up calculations for final level * CALL ZLASWP( NRHS, WORK(NB+BWU+1), LDW, 1, BM+BMN, $ IPIV(LN+1), 1) * CALL ZTRSM('L','L','N','U', BM+BMN, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) ENDIF * ENDIF * NPACT = (NPACT + 1)/2 NPSTR = NPSTR * 2 GOTO 200 * ENDIF * 300 CONTINUE * * ************************************** * BACKSOLVE ******************************************************************** * PHASE 2: Global computation phase -- Solve U*Y = X ******************************************************************** * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. * In fact, the backsolve portion was done in the call to * ZGETRS in the frontsolve. * ENDIF * * Compute variable needed to reverse loop structure in * reduced system. * RECOVERY_VAL = NPACT*NPSTR - NPCOL * * Loop over levels * Terminal values of NPACT and NPSTR from frontsolve are used * 2200 IF( NPACT .GE. NPCOL ) GOTO 2300 * NPSTR = NPSTR/2 * NPACT = NPACT*2 * * Have to adjust npact for non-power-of-2 * NPACT = NPACT-MOD( (RECOVERY_VAL/NPSTR), 2 ) * * Find size of submatrix in this proc at this level * IF( MYCOL/NPSTR .LT. NPACT-1 ) THEN BN = BW ELSE BN = MIN(BW, NUMROC(N, NB, NPCOL-1, 0, NPCOL) ) ENDIF * * If this processor is even in this level... * IF( MOD( MYCOL, 2*NPSTR ) .EQ. 0 ) THEN * NEICOL = MYCOL+NPSTR * IF( NEICOL/NPSTR .LE. NPACT-1 ) THEN * IF( NEICOL/NPSTR .LT. NPACT-1 ) THEN BMN = BW BNN = BW ELSE BMN = MIN(BW,NUMROC(N, NB, NEICOL, 0, NPCOL))+BWU BNN = MIN(BW, NUMROC(N, NB, NEICOL, 0, NPCOL) ) ENDIF * IF( NPACT .GT. 2 ) THEN * CALL ZGESD2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * CALL ZGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * ELSE * CALL ZGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * ENDIF * ENDIF * ELSE * This processor is odd on this level * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * IF( NEICOL .LT. NPCOL-1 ) THEN BNN = BW ELSE BNN = MIN(BW, NUMROC(N, NB, NEICOL, 0, NPCOL) ) ENDIF * IF( NPACT .GT. 2 ) THEN * * Move RHS to make room for received solutions * CALL ZLACPY( 'G', BW, NRHS, WORK(NB+BWU+1), $ LDW, WORK(NB+BWU+BW+1), LDW ) * CALL ZGERV2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * CALL ZGEMM( 'N', 'N', BW, NRHS, BN, $ -CONE, AF(BBPTR), LDBB, $ WORK(LN+1), LDW, $ CONE, WORK(NB+BWU+BW+1), LDW ) * * IF( MYCOL .GT. NPSTR ) THEN * CALL ZGEMM( 'N', 'N', BW, NRHS, BW, $ -CONE, AF(BBPTR+2*BW*LDBB), LDBB, $ WORK(LN+BW+1), LDW, $ CONE, WORK(NB+BWU+BW+1), LDW ) * ENDIF * CALL ZTRSM('L','U','N','N', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+BW+1), LDW) * * Send new solution to neighbor * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK( NB+BWU+BW+1 ), LDW, 0, NEICOL ) * * Copy new solution into expected place * CALL ZLACPY( 'G', BW, NRHS, WORK(NB+BWU+1+BW), $ LDW, WORK(LN+BW+1), LDW ) * ELSE * * Solve with local diagonal block * CALL ZTRSM( 'L','U','N','N', BN+BNN, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) * * Send new solution to neighbor * CALL ZGESD2D( ICTXT, BW, NRHS, $ WORK(NB+BWU+1), LDW, 0, NEICOL ) * * Shift solutions into expected positions * CALL ZLACPY( 'G', BNN+BN-BW, NRHS, WORK(NB+BWU+1+BW), $ LDW, WORK(LN+1), LDW ) * * IF( (NB+BWU+1) .NE. (LN+1+BW) ) THEN * * Copy one row at a time since spaces may overlap * DO 1064 J=1, BW CALL ZCOPY( NRHS, WORK(NB+BWU+J), LDW, $ WORK(LN+BW+J), LDW ) 1064 CONTINUE * ENDIF * ENDIF * ENDIF * GOTO 2200 * 2300 CONTINUE * End of loop over levels * ******************************************************************** * PHASE 1: (Almost) Local computation phase -- Solve U*Y = X ******************************************************************** * * Reset BM to value it had before reduced system frontsolve... * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU ELSE BM = MIN(BW,ODD_SIZE) + BWU ENDIF * * First metastep is to account for the fillin blocks AF * IF( MYCOL .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, BW, NRHS, WORK( NB-BW+1 ), $ LDW, 0, MYCOL+1 ) * ENDIF * IF( MYCOL .GT. 0 ) THEN * CALL ZGERV2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), $ LDW, 0, MYCOL-1 ) * * Modify local right hand sides with received rhs's * CALL ZGEMM( 'N', 'N', LM-BM, NRHS, BW, -CONE, $ AF( 1 ), LM, WORK( NB+BWU+1 ), LDW, CONE, $ WORK( 1 ), LDW ) * ENDIF * DO 2021 J = LN, 1, -1 * LMJ = MIN( BW, ODD_SIZE-1 ) * LPTR = BW-1+J*LLDA+APTR * * In the following, the TRANS=T option is used to reverse * the order of multiplication, not as a true transpose * CALL ZGEMV( 'T', LMJ, NRHS, -CONE, WORK( J+1), LDW, $ A( LPTR ), LLDA-1, CONE, WORK( J ), LDW ) * * Divide by diagonal element * CALL ZSCAL( NRHS, CONE/A( LPTR-LLDA+1 ), $ WORK( J ), LDW ) 2021 CONTINUE * * * CALL ZLACPY( 'G', ODD_SIZE, NRHS, WORK( 1 ), LDW, $ B( 1 ), LLDB ) * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * RETURN * * End of PZGBTRS * END scalapack-1.8.0/SRC/pslatrd.f0000640000175000017500000004151110363532303015565 0ustar muammarmuammar SUBROUTINE PSLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCW( * ) REAL A( * ), D( * ), E( * ), TAU( * ), W( * ), $ WORK( * ) * .. * * Purpose * ======= * * PSLATRD reduces NB rows and columns of a real symmetric distributed * matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) to symmetric tridiagonal * form by an orthogonal similarity transformation Q' * sub( A ) * Q, * and returns the matrices V and W which are needed to apply the * transformation to the unreduced part of sub( A ). * * If UPLO = 'U', PSLATRD reduces the last NB rows and columns of a * matrix, of which the upper triangle is supplied; * if UPLO = 'L', PSLATRD reduces the first NB rows and columns of a * matrix, of which the lower triangle is supplied. * * This is an auxiliary routine called by PSSYTRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of rows and columns to be reduced. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. * On exit, if UPLO = 'U', the last NB columns have been reduced * to tridiagonal form, with the diagonal elements overwriting * the diagonal elements of sub( A ); the elements above the * diagonal with the array TAU, represent the orthogonal matrix * Q as a product of elementary reflectors. If UPLO = 'L', the * first NB columns have been reduced to tridiagonal form, with * the diagonal elements overwriting the diagonal elements of * sub( A ); the elements below the diagonal with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors; See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * W (local output) REAL pointer into the local memory * to an array of dimension (LLD_W,NB_W), This array contains * the local pieces of the N-by-NB_W matrix W required to * update the unreduced part of sub( A ). * * IW (global input) INTEGER * The row index in the global array W indicating the first * row of sub( W ). * * JW (global input) INTEGER * The column index in the global array W indicating the * first column of sub( W ). * * DESCW (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix W. * * WORK (local workspace) REAL array, dimension (NB_A) * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n) H(n-1) . . . H(n-nb+1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the N-by-NB matrix V * which is needed, with W, to apply the transformation to the unreduced * part of the matrix, using a symmetric rank-2k update of the form: * sub( A ) := sub( A ) - V*W' - W*V'. * * The contents of A on exit are illustrated by the following examples * with n = 5 and nb = 2: * * if UPLO = 'U': if UPLO = 'L': * * ( a a a v4 v5 ) ( d ) * ( a a v4 v5 ) ( 1 d ) * ( a 1 v5 ) ( v1 1 a ) * ( d 1 ) ( v1 v2 a a ) * ( d ) ( v1 v2 a a a ) * * where d denotes a diagonal element of the reduced matrix, a denotes * an element of the original matrix that is unchanged, and vi denotes * an element of the vector defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL HALF, ONE, ZERO PARAMETER ( HALF = 0.5E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, JP, JWK, K, $ KW, MYCOL, MYROW, NPCOL, NPROW, NQ REAL ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCWK( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSAXPY, $ PSDOT, PSELGET, PSELSET, PSGEMV, $ PSLARFG, PSSCAL, PSSYMV, SGEBR2D, $ SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * IF( LSAME( UPLO, 'U' ) ) THEN * CALL INFOG2L( N+IA-NB, N+JA-NB, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce last NB columns of upper triangle * DO 10 J = JA+N-1, JA+N-NB, -1 I = IA + J - JA K = J - JA + 1 KW = MOD( K-1, DESCA( MB_ ) ) + 1 * * Update A(IA:I,I) * CALL PSGEMV( 'No transpose', K, N-K, -ONE, A, IA, J+1, $ DESCA, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ), $ ONE, A, IA, J, DESCA, 1 ) CALL PSGEMV( 'No transpose', K, N-K, -ONE, W, IW, JW+KW, $ DESCW, A, I, J+1, DESCA, DESCA( M_ ), ONE, A, $ IA, J, DESCA, 1 ) IF( N-K.GT.0 ) $ CALL PSELSET( A, I, J+1, DESCA, E( JP ) ) * * Generate elementary reflector H(i) to annihilate * A(IA:I-2,I) * JP = MIN( JJ+KW-1, NQ ) CALL PSLARFG( K-1, E( JP ), I-1, J, A, IA, J, DESCA, 1, $ TAU ) CALL PSELSET( A, I-1, J, DESCA, ONE ) * * Compute W(IW:IW+K-2,JW+KW-1) * CALL PSSYMV( 'Upper', K-1, ONE, A, IA, JA, DESCA, A, IA, J, $ DESCA, 1, ZERO, W, IW, JW+KW-1, DESCW, 1 ) * JWK = MOD( K-1, DESCWK( NB_ ) ) + 2 CALL PSGEMV( 'Transpose', K-1, N-K, ONE, W, IW, JW+KW, $ DESCW, A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK, $ DESCWK, DESCWK( M_ ) ) CALL PSGEMV( 'No transpose', K-1, N-K, -ONE, A, IA, J+1, $ DESCA, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PSGEMV( 'Transpose', K-1, N-K, ONE, A, IA, J+1, DESCA, $ A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK, DESCWK, $ DESCWK( M_ ) ) CALL PSGEMV( 'No transpose', K-1, N-K, -ONE, W, IW, JW+KW, $ DESCW, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PSSCAL( K-1, TAU( JP ), W, IW, JW+KW-1, DESCW, 1 ) * CALL PSDOT( K-1, ALPHA, W, IW, JW+KW-1, DESCW, 1, A, IA, J, $ DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PSAXPY( K-1, ALPHA, A, IA, J, DESCA, 1, W, IW, JW+KW-1, $ DESCW, 1 ) IF( MYCOL.EQ.IACOL ) THEN CALL PSELGET( 'E', ' ', D( JP ), A, I, J, DESCA ) END IF * 10 CONTINUE * ELSE * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce first NB columns of lower triangle * DO 20 J = JA, JA+NB-1 I = IA + J - JA K = J - JA + 1 * * Update A(J:JA+N-1,J) * CALL PSGEMV( 'No transpose', N-K+1, K-1, -ONE, A, I, JA, $ DESCA, W, IW+K-1, JW, DESCW, DESCW( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PSGEMV( 'No transpose', N-K+1, K-1, -ONE, W, IW+K-1, $ JW, DESCW, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J, DESCA, 1 ) IF( K.GT.1 ) $ CALL PSELSET( A, I, J-1, DESCA, E( JP ) ) * * * Generate elementary reflector H(i) to annihilate * A(I+2:IA+N-1,I) * JP = MIN( JJ+K-1, NQ ) CALL PSLARFG( N-K, E( JP ), I+1, J, A, I+2, J, DESCA, 1, $ TAU ) CALL PSELSET( A, I+1, J, DESCA, ONE ) * * Compute W(IW+K:IW+N-1,JW+K-1) * CALL PSSYMV( 'Lower', N-K, ONE, A, I+1, J+1, DESCA, A, I+1, $ J, DESCA, 1, ZERO, W, IW+K, JW+K-1, DESCW, 1 ) * CALL PSGEMV( 'Transpose', N-K, K-1, ONE, W, IW+K, JW, DESCW, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK, $ DESCWK( M_ ) ) CALL PSGEMV( 'No transpose', N-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PSGEMV( 'Transpose', N-K, K-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK, $ DESCWK( M_ ) ) CALL PSGEMV( 'No transpose', N-K, K-1, -ONE, W, IW+K, JW, $ DESCW, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PSSCAL( N-K, TAU( JP ), W, IW+K, JW+K-1, DESCW, 1 ) CALL PSDOT( N-K, ALPHA, W, IW+K, JW+K-1, DESCW, 1, A, I+1, $ J, DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PSAXPY( N-K, ALPHA, A, I+1, J, DESCA, 1, W, IW+K, $ JW+K-1, DESCW, 1 ) IF( MYCOL.EQ.IACOL ) THEN CALL PSELGET( 'E', ' ', D( JP ), A, I, J, DESCA ) END IF * 20 CONTINUE * END IF * * Broadcast columnwise the diagonal elements into D. * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1, $ IAROW, MYCOL ) END IF END IF * RETURN * * End of PSLATRD * END scalapack-1.8.0/SRC/psormhr.f0000640000175000017500000003642310363532303015614 0ustar muammarmuammar SUBROUTINE PSORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMHR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of IHI-ILO elementary reflectors, as returned by PSGEHRD: * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * ILO and IHI must have the same values as in the previous call * of PSGEHRD. Q is equal to the unit matrix except in the * distributed submatrix Q(ia+ilo:ia+ihi-1,ia+ilo:ja+ihi-1). * If SIDE = 'L', 1 <= ILO <= IHI <= max(1,M); * if SIDE = 'R', 1 <= ILO <= IHI <= max(1,N); * ILO and IHI are relative indexes. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PSGEHRD. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+M-2) * if SIDE = 'L', and LOCc(JA+N-2) if SIDE = 'R'. This array * contains the scalar factors TAU(j) of the elementary * reflectors H(j) as returned by PSGEHRD. TAU is tied to * the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * IAA = IA + ILO; JAA = JA+ILO-1; * If SIDE = 'L', * MI = IHI-ILO; NI = N; ICC = IC + ILO; JCC = JC; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; NI = IHI-ILO; ICC = IC; JCC = JC + ILO; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NH, NI, NPA0, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSORMQR, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 NH = IHI - ILO IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) IAA = IA + ILO JAA = JA + ILO - 1 * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = NH NI = N ICC = IC + ILO JCC = JC CALL CHK1MAT( M, 3, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N MI = M NI = NH ICC = IC JCC = JC + ILO CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN INFO = -5 ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN INFO = -6 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = ILO IDUM2( 3 ) = 5 IDUM1( 4 ) = IHI IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMHR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) $ RETURN * CALL PSORMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMHR * END scalapack-1.8.0/SRC/zlamsh.f0000640000175000017500000002275410363532303015422 0ustar muammarmuammar SUBROUTINE ZLAMSH( S, LDS, NBULGE, JBLK, H, LDH, N, ULP ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. INTEGER JBLK, LDH, LDS, N, NBULGE DOUBLE PRECISION ULP * .. * .. Array Arguments .. COMPLEX*16 H( LDH, * ), S( LDS, * ) * .. * * Purpose * ======= * * ZLAMSH sends multiple shifts through a small (single node) matrix to * see how consecutive small subdiagonal elements are modified by * subsequent shifts in an effort to maximize the number of bulges * that can be sent through. * ZLAMSH should only be called when there are multiple shifts/bulges * (NBULGE > 1) and the first shift is starting in the middle of an * unreduced Hessenberg matrix because of two or more consecutive * small subdiagonal elements. * * Arguments * ========= * * S (local input/output) COMPLEX*16 array, ( LDS,* ) * On entry, the matrix of shifts. Only the 2x2 diagonal of S * is referenced. It is assumed that S has JBLK double shifts * (size 2). * On exit, the data is rearranged in the best order for * applying. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * 1 < NBULGE <= JBLK <= LDS/2 * * NBULGE (local input/output) INTEGER * On entry, the number of bulges to send through H ( >1 ). * NBULGE should be less than the maximum determined (JBLK). * 1 < NBULGE <= JBLK <= LDS/2 * On exit, the maximum number of bulges that can be sent * through. * * JBLK (local input) INTEGER * On entry, the number of shifts determined for S. * Unchanged on exit. * * H (local input/output) COMPLEX*16 array ( LDH,N ) * On entry, the local matrix to apply the shifts on. * H should be aligned so that the starting row is 2. * On exit, the data is destroyed. * * LDH (local input) INTEGER * On entry, the leading dimension of H. Unchanged on exit. * * N (local input) INTEGER * On entry, the size of H. If all the bulges are expected to * go through, N should be at least 4*NBULGE+2. * Otherwise, NBULGE may be reduced by this routine. * * ULP (local input) DOUBLE PRECISION * On entry, machine precision * Unchanged on exit. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RONE, TEN PARAMETER ( RONE = 1.0D+0, TEN = 10.0D+0 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IBULGE, IVAL, J, K, M, NR DOUBLE PRECISION DVAL, S1, TST1 COMPLEX*16 CDUM, H00, H10, H11, H12, H21, H22, H33, H33S, $ H43H34, H44, H44S, SUM, T1, T2, T3, V1, V2, V3 * .. * .. Local Arrays .. COMPLEX*16 V( 3 ) * .. * .. External Subroutines .. EXTERNAL ZCOPY, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * M = 2 DO 50 IBULGE = 1, NBULGE H44 = S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) H33 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) H43H34 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 )* $ S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).GT.ULP*TST1 ) THEN * Find minimum DVAL = ( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ) ) / $ ( ULP*TST1 ) IVAL = IBULGE DO 10 I = IBULGE + 1, NBULGE H44 = S( 2*JBLK-2*I+2, 2*JBLK-2*I+2 ) H33 = S( 2*JBLK-2*I+1, 2*JBLK-2*I+1 ) H43H34 = S( 2*JBLK-2*I+1, 2*JBLK-2*I+2 )* $ S( 2*JBLK-2*I+2, 2*JBLK-2*I+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( ( DVAL.GT.( CABS1( H10 )*( CABS1( V2 )+ $ CABS1( V3 ) ) ) / ( ULP*TST1 ) ) .AND. $ ( DVAL.GT.RONE ) ) THEN DVAL = ( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ) ) / $ ( ULP*TST1 ) IVAL = I END IF 10 CONTINUE IF( ( DVAL.LT.TEN ) .AND. ( IVAL.NE.IBULGE ) ) THEN H44 = S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2 ) H33 = S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+1 ) H43H34 = S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+2 ) H10 = S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1 ) S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2 ) = S( 2*JBLK-2* $ IBULGE+2, 2*JBLK-2*IBULGE+2 ) S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+1 ) = S( 2*JBLK-2* $ IBULGE+1, 2*JBLK-2*IBULGE+1 ) S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+2 ) = S( 2*JBLK-2* $ IBULGE+1, 2*JBLK-2*IBULGE+2 ) S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1 ) = S( 2*JBLK-2* $ IBULGE+2, 2*JBLK-2*IBULGE+1 ) S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) = H44 S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) = H33 S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 ) = H43H34 S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) = H10 END IF H44 = S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) H33 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) H43H34 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 )* $ S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) END IF IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).GT.TEN*ULP*TST1 ) $ THEN * IBULGE better not be 1 here or we have a bug! NBULGE = MAX( IBULGE-1, 1 ) RETURN END IF DO 40 K = M, N - 1 NR = MIN( 3, N-K+1 ) IF( K.GT.M ) $ CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.N-1 ) $ H( K+2, K-1 ) = ZERO ELSE * H(m,m-1) must be updated, * H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 DO 20 J = K, N SUM = DCONJG( T1 )*H( K, J ) + $ DCONJG( T2 )*H( K+1, J ) + $ DCONJG( T3 )*H( K+2, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 H( K+2, J ) = H( K+2, J ) - SUM*V3 20 CONTINUE DO 30 J = 1, MIN( K+3, N ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 ) H( J, K+2 ) = H( J, K+2 ) - SUM*DCONJG( V3 ) 30 CONTINUE END IF 40 CONTINUE 50 CONTINUE * RETURN * * End of ZLAMSH * END scalapack-1.8.0/SRC/pslaed1.f0000640000175000017500000002245210363532303015450 0ustar muammarmuammar SUBROUTINE PSLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, $ IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER ID, INFO, IQ, JQ, N, N1 REAL RHO * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) REAL D( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PSLAED1 computes the updated eigensystem of a diagonal * matrix after modification by a rank-one symmetric matrix, * in parallel. * * T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) * * where Z = Q'u, u is a vector of length N with ones in the * N1 and N1 + 1 th elements and zeros elsewhere. * * The eigenvectors of the original matrix are stored in Q, and the * eigenvalues are in D. The algorithm consists of three stages: * * The first stage consists of deflating the size of the problem * when there are multiple eigenvalues or if there is a zero in * the Z vector. For each such occurence the dimension of the * secular equation problem is reduced by one. This stage is * performed by the routine PSLAED2. * * The second stage consists of calculating the updated * eigenvalues. This is done by finding the roots of the secular * equation via the routine SLAED4 (as called by PSLAED3). * This routine also calculates the eigenvectors of the current * problem. * * The final stage consists of computing the updated eigenvectors * directly using the updated eigenvalues. The eigenvectors for * the current problem are multiplied with the eigenvectors from * the overall problem. * * Arguments * ========= * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * * N1 (input) INTEGER * The location of the last eigenvalue in the leading * sub-matrix. * min(1,N) <= N1 <= N. * * D (global input/output) REAL array, dimension (N) * On entry,the eigenvalues of the rank-1-perturbed matrix. * On exit, the eigenvalues of the repaired matrix. * * ID (global input) INTEGER * Q's global row/col index, which points to the beginning * of the submatrix which is to be operated on. * * Q (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * RHO (input) REAL * The subdiagonal entry used to create the rank-1 modification. * * WORK (local workspace/output) REAL array, * dimension 6*N + 2*NP*NQ * * IWORK (local workspace/output) INTEGER array, * dimension 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the ith eigenvalue. * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER COL, COLTYP, IBUF, ICTOT, ICTXT, IDLMDA, IIQ, $ INDCOL, INDROW, INDX, INDXC, INDXP, INDXR, INQ, $ IPQ, IPQ2, IPSM, IPU, IPWORK, IQ1, IQ2, IQCOL, $ IQQ, IQROW, IW, IZ, J, JC, JJ2C, JJC, JJQ, JNQ, $ K, LDQ, LDQ2, LDU, MYCOL, MYROW, NB, NN, NN1, $ NN2, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCQ2( DLEN_ ), DESCU( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, INFOG1L, INFOG2L, $ PSGEMM, PSLAED2, PSLAED3, PSLAEDZ, PSLASET, $ PXERBLA, SCOPY * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE IF( N.LT.0 ) THEN INFO = -1 ELSE IF( ID.GT.DESCQ( N_ ) ) THEN INFO = -4 ELSE IF( N1.GE.N ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PSLAED1', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * The following values are integer pointers which indicate * the portion of the workspace used by a particular array * in PSLAED2 and PSLAED3. * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) * CALL INFOG2L( IQ-1+ID, JQ-1+ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, $ IIQ, JJQ, IQROW, IQCOL ) * NP = NUMROC( N, DESCQ( MB_ ), MYROW, IQROW, NPROW ) NQ = NUMROC( N, DESCQ( NB_ ), MYCOL, IQCOL, NPCOL ) * LDQ2 = MAX( NP, 1 ) LDU = LDQ2 * IZ = 1 IDLMDA = IZ + N IW = IDLMDA + N IPQ2 = IW + N IPU = IPQ2 + LDQ2*NQ IBUF = IPU + LDU*NQ * (IBUF est de taille 3*N au maximum) * ICTOT = 1 IPSM = ICTOT + NPCOL*4 INDX = IPSM + NPCOL*4 INDXC = INDX + N INDXP = INDXC + N INDCOL = INDXP + N COLTYP = INDCOL + N INDROW = COLTYP + N INDXR = INDROW + N * CALL DESCINIT( DESCQ2, N, N, NB, NB, IQROW, IQCOL, ICTXT, LDQ2, $ INFO ) CALL DESCINIT( DESCU, N, N, NB, NB, IQROW, IQCOL, ICTXT, LDU, $ INFO ) * * Form the z-vector which consists of the last row of Q_1 and the * first row of Q_2. * IPWORK = IDLMDA CALL PSLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, WORK( IZ ), $ WORK( IPWORK ) ) * * Deflate eigenvalues. * IPQ = IIQ + ( JJQ-1 )*LDQ CALL PSLAED2( ICTXT, K, N, N1, NB, D, IQROW, IQCOL, Q( IPQ ), LDQ, $ RHO, WORK( IZ ), WORK( IW ), WORK( IDLMDA ), $ WORK( IPQ2 ), LDQ2, WORK( IBUF ), IWORK( ICTOT ), $ IWORK( IPSM ), NPCOL, IWORK( INDX ), IWORK( INDXC ), $ IWORK( INDXP ), IWORK( INDCOL ), IWORK( COLTYP ), $ NN, NN1, NN2, IQ1, IQ2 ) * * Solve Secular Equation. * IF( K.NE.0 ) THEN CALL PSLASET( 'A', N, N, ZERO, ONE, WORK( IPU ), 1, 1, DESCU ) CALL PSLAED3( ICTXT, K, N, NB, D, IQROW, IQCOL, RHO, $ WORK( IDLMDA ), WORK( IW ), WORK( IZ ), $ WORK( IPU ), LDQ2, WORK( IBUF ), IWORK( INDX ), $ IWORK( INDCOL ), IWORK( INDROW ), IWORK( INDXR ), $ IWORK( INDXC ), IWORK( ICTOT ), NPCOL, INFO ) * * Compute the updated eigenvectors. * IQQ = MIN( IQ1, IQ2 ) IF( NN1.GT.0 ) THEN INQ = IQ - 1 + ID JNQ = JQ - 1 + ID + IQQ - 1 CALL PSGEMM( 'N', 'N', N1, NN, NN1, ONE, WORK( IPQ2 ), 1, $ IQ1, DESCQ2, WORK( IPU ), IQ1, IQQ, DESCU, $ ZERO, Q, INQ, JNQ, DESCQ ) END IF IF( NN2.GT.0 ) THEN INQ = IQ - 1 + ID + N1 JNQ = JQ - 1 + ID + IQQ - 1 CALL PSGEMM( 'N', 'N', N-N1, NN, NN2, ONE, WORK( IPQ2 ), $ N1+1, IQ2, DESCQ2, WORK( IPU ), IQ2, IQQ, $ DESCU, ZERO, Q, INQ, JNQ, DESCQ ) END IF * DO 10 J = K + 1, N JC = IWORK( INDX+J-1 ) CALL INFOG1L( JQ-1+JC, NB, NPCOL, MYCOL, IQCOL, JJC, COL ) CALL INFOG1L( JC, NB, NPCOL, MYCOL, IQCOL, JJ2C, COL ) IF( MYCOL.EQ.COL ) THEN IQ2 = IPQ2 + ( JJ2C-1 )*LDQ2 INQ = IPQ + ( JJC-1 )*LDQ CALL SCOPY( NP, WORK( IQ2 ), 1, Q( INQ ), 1 ) END IF 10 CONTINUE END IF * 20 CONTINUE RETURN * * End of PSLAED1 * END scalapack-1.8.0/SRC/pclaqge.f0000640000175000017500000002331110363532303015526 0ustar muammarmuammar SUBROUTINE PCLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N REAL AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL C( * ), R( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAQGE equilibrates a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using the row and scaling * factors in the vectors R and C. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)) * containing on entry the M-by-N matrix sub( A ). On exit, * the equilibrated distributed matrix. See EQUED for the * form of the equilibrated distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local input) REAL array, dimension LOCr(M_A) * The row scale factors for sub( A ). R is aligned with the * distributed matrix A, and replicated across every process * column. R is tied to the distributed matrix A. * * C (local input) REAL array, dimension LOCc(N_A) * The column scale factors of sub( A ). C is aligned with the * distributed matrix A, and replicated down every process * row. C is tied to the distributed matrix A. * * ROWCND (global input) REAL * The global ratio of the smallest R(i) to the largest R(i), * IA <= i <= IA+M-1. * * COLCND (global input) REAL * The global ratio of the smallest C(i) to the largest C(i), * JA <= j <= JA+N-1. * * AMAX (global input) REAL * Absolute value of largest distributed submatrix entry. * * EQUED (global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration * = 'R': Row equilibration, i.e., sub( A ) has been pre- * multiplied by diag(R(IA:IA+M-1)), * = 'C': Column equilibration, i.e., sub( A ) has been post- * multiplied by diag(C(JA:JA+N-1)), * = 'B': Both row and column equilibration, i.e., sub( A ) * has been replaced by * diag(R(IA:IA+M-1)) * sub( A ) * diag(C(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if row or column scaling * should be done based on the ratio of the row or column scaling * factors. If ROWCND < THRESH, row scaling is done, and if * COLCND < THRESH, column scaling is done. * * LARGE and SMALL are threshold values used to decide if row scaling * should be done based on the absolute size of the largest matrix * element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, THRESH PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, $ IROFF, J, JJA, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC REAL PSLAMCH EXTERNAL NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PSLAMCH( ICTXT, 'Safe minimum' ) / $ PSLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) $ THEN * * No row scaling * IF( COLCND.GE.THRESH ) THEN * * No column scaling * EQUED = 'N' * ELSE * * Column scaling * IOFFA = (JJA-1)*LDA DO 20 J = JJA, JJA+NQ-1 CJ = C( J ) DO 10 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*A( IOFFA + I ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE EQUED = 'C' END IF * ELSE IF( COLCND.GE.THRESH ) THEN * * Row scaling, no column scaling * IOFFA = (JJA-1)*LDA DO 40 J = JJA, JJA+NQ-1 DO 30 I = IIA, IIA+MP-1 A( IOFFA + I ) = R( I )*A( IOFFA + I ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE EQUED = 'R' * ELSE * * Row and column scaling * IOFFA = (JJA-1)*LDA DO 60 J = JJA, JJA+NQ-1 CJ = C( J ) DO 50 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*R( I )*A( IOFFA + I ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE EQUED = 'B' * END IF * RETURN * * End of PCLAQGE * END scalapack-1.8.0/SRC/pdlarfb.f0000640000175000017500000010371410363532303015532 0ustar muammarmuammar SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PDLARFB applies a real block reflector Q or its transpose Q**T to a * real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) * from the left or the right. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * DIRECT (global input) CHARACTER * Indicates how Q is formed from a product of elementary * reflectors * = 'F': Q = H(1) H(2) . . . H(k) (Forward) * = 'B': Q = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * V (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and * SIDE = 'R'. It contains the local pieces of the distributed * vectors V representing the Householder transformation. * See further details. * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1)); * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1)); * if STOREV = 'R', LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian- * gular matrix T in the representation of the block reflector. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL FORWARD CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW, $ ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV, $ ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV, $ IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV, $ ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND, $ JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC, $ MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV, $ NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D,DGEMM, $ DGSUM2D, DLACPY, DLASET, DTRBR2D, $ DTRBS2D, DTRMM, INFOG1L, INFOG2L, PB_TOPGET, $ PBDTRAN * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF FORWARD = LSAME( DIRECT, 'F' ) IF( FORWARD ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) IROFFV = MOD( IV-1, MBV ) ICOFFV = MOD( JV-1, NBV ) MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYCOL.EQ.ICCOL ) $ NQC = NQC - ICOFFC IF( MYROW.EQ.ICROW ) $ MPC = MPC - IROFFC JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFC = IIC + ( JJC-1 ) * LDC IOFFV = IIV + ( JJV-1 ) * LDV * IF( LSAME( STOREV, 'C' ) ) THEN * * V is stored columnwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV ) * IPV = 1 IPW = IPV + MPC * K LV = MAX( 1, MPC ) LW = MAX( 1, NQC ) * * Broadcast V to the other process columns. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ V( IOFFV ), LDV ) IF( MYROW.EQ.IVROW ) $ CALL DTRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV ) CALL DLACPY( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ WORK( IPV ), LV, MYROW, IVCOL ) IF( MYROW.EQ.IVROW ) $ CALL DTRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV, MYROW, IVCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPV) = ( V1 ) where V1 is unit lower triangular, * ( V2 ) zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + MPC - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 10 CONTINUE IF( K-ITOP .GT.0 ) THEN CALL DLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 10 END IF * ELSE * * WORK(IPV) = ( V1 ) where V2 is unit upper triangular, * ( V2 ) zeroes lower triangular part of V2 * JJ = JJV IOFF = MOD( IV+M-K-1, MBV ) CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ), $ II, ILASTROW ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 20 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL DLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL DLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPV+II-IIV+ITOP*LV ), LV ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 20 END IF * END IF * * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K * IF( MPC.GT.0 ) THEN CALL DGEMM( 'Transpose', 'No transpose', NQC, K, MPC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * CALL DTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, NBV, WORK( IPW ), LW ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL DGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form sub( C )*Q or sub( C )*Q' * * ICOFFC = IROFFV is required by the current transposition * routine PBDTRAN * NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NPV = NPV0 - IROFFV ELSE NPV = NPV0 END IF IF( MYCOL.EQ.ICCOL ) THEN NQC0 = NQC + ICOFFC ELSE NQC0 = NQC END IF * * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]' * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]' * WORK( IPT ) is the workspace for PBDTRAN * IPV = 1 IPW = IPV + K * NQC0 IPT = IPW + NPV0 * K LV = MAX( 1, K ) LW = MAX( 1, NPV0 ) * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN CALL DLASET( 'All', IROFFV, K, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + IROFFV CALL DLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL DLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower * triangular, zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + NPV - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 30 CONTINUE IF( ( K-ITOP ).GT.0 ) THEN CALL DLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ), $ LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 30 END IF * ELSE * * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper * triangular, zeroes lower triangular part of V2. * JJ = JJV CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW, $ DESCV( RSRC_ ), II, ILASTROW ) IOFF = MOD( IV+N-K-1, MBV ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 40 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL DLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL DLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPW1+II-IIV+ITOP*LW ), LW ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 40 END IF END IF END IF * CALL PBDTRAN( ICTXT, 'Columnwise', 'Transpose', N+IROFFV, K, $ MBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV, $ IVROW, IVCOL, -1, ICCOL, WORK( IPT ) ) * * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC * IF( MYCOL.EQ.ICCOL ) $ IPV = IPV + ICOFFC * LV * * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K * LW = MAX( 1, MPC ) * IF( NQC.GT.0 ) THEN CALL DGEMM( 'No transpose', 'Transpose', MPC, K, NQC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN * * Broadcast the block reflector to the other rows. * CALL DTRBS2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV ) ELSE CALL DTRBR2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV, IVROW, MYCOL ) END IF CALL DTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, NBV, WORK( IPW ), LW ) * CALL DGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V' * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL DGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) END IF * ELSE * * V is stored rowwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC = ICOFFV is required by the current transposition * routine PBDTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW ) THEN MPC0 = MPC + IROFFC ELSE MPC0 = MPC END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBDTRAN * IPV = 1 IPW = IPV + MPC0 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC0 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL DLASET( 'All', K, ICOFFV, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + ICOFFV * LW CALL DLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL DLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + MQV - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 50 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL DLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ), $ LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 50 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL, $ DESCV( CSRC_ ), JJ, ILASTCOL ) IOFF = MOD( JV+M-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 60 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL DLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL DLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 60 END IF END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K * CALL PBDTRAN( ICTXT, 'Rowwise', 'Transpose', K, M+ICOFFV, $ NBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV, $ IVROW, IVCOL, ICROW, -1, WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K * IF( MYROW.EQ.ICROW ) $ IPV = IPV + IROFFC * * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V' * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K * LW = MAX( 1, NQC ) * IF( MPC.GT.0 ) THEN CALL DGEMM( 'Transpose', 'No transpose', NQC, K, MPC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL DTRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV ) ELSE CALL DTRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL DTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V' * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL DGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC LV = MAX( 1, K ) LW = MAX( 1, MPC ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL DTRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV ) CALL DLACPY( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL DTRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + NQC - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 70 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL DLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ), $ LV ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 70 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ), $ JJ, ILASTCOL ) IOFF = MOD( JV+N-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 80 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL DLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL DLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 80 END IF * END IF * * WORK( IPV ) is K x NQC = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K * IF( NQC.GT.0 ) THEN CALL DGEMM( 'No Transpose', 'Transpose', MPC, K, NQC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL DTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL DGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) * END IF * END IF * RETURN * * End of PDLARFB * END scalapack-1.8.0/SRC/pdlatrd.f0000640000175000017500000004162010363532303015547 0ustar muammarmuammar SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCW( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), W( * ), $ WORK( * ) * .. * * Purpose * ======= * * PDLATRD reduces NB rows and columns of a real symmetric distributed * matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) to symmetric tridiagonal * form by an orthogonal similarity transformation Q' * sub( A ) * Q, * and returns the matrices V and W which are needed to apply the * transformation to the unreduced part of sub( A ). * * If UPLO = 'U', PDLATRD reduces the last NB rows and columns of a * matrix, of which the upper triangle is supplied; * if UPLO = 'L', PDLATRD reduces the first NB rows and columns of a * matrix, of which the lower triangle is supplied. * * This is an auxiliary routine called by PDSYTRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of rows and columns to be reduced. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. * On exit, if UPLO = 'U', the last NB columns have been reduced * to tridiagonal form, with the diagonal elements overwriting * the diagonal elements of sub( A ); the elements above the * diagonal with the array TAU, represent the orthogonal matrix * Q as a product of elementary reflectors. If UPLO = 'L', the * first NB columns have been reduced to tridiagonal form, with * the diagonal elements overwriting the diagonal elements of * sub( A ); the elements below the diagonal with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors; See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * W (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_W,NB_W), This array contains * the local pieces of the N-by-NB_W matrix W required to * update the unreduced part of sub( A ). * * IW (global input) INTEGER * The row index in the global array W indicating the first * row of sub( W ). * * JW (global input) INTEGER * The column index in the global array W indicating the * first column of sub( W ). * * DESCW (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix W. * * WORK (local workspace) DOUBLE PRECISION array, dimension (NB_A) * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n) H(n-1) . . . H(n-nb+1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the N-by-NB matrix V * which is needed, with W, to apply the transformation to the unreduced * part of the matrix, using a symmetric rank-2k update of the form: * sub( A ) := sub( A ) - V*W' - W*V'. * * The contents of A on exit are illustrated by the following examples * with n = 5 and nb = 2: * * if UPLO = 'U': if UPLO = 'L': * * ( a a a v4 v5 ) ( d ) * ( a a v4 v5 ) ( 1 d ) * ( a 1 v5 ) ( v1 1 a ) * ( d 1 ) ( v1 v2 a a ) * ( d ) ( v1 v2 a a a ) * * where d denotes a diagonal element of the reduced matrix, a denotes * an element of the original matrix that is unchanged, and vi denotes * an element of the vector defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION HALF, ONE, ZERO PARAMETER ( HALF = 0.5D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, JP, JWK, K, $ KW, MYCOL, MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCWK( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, DGEBR2D, DGEBS2D, $ INFOG2L, PDAXPY, PDDOT, PDELGET, $ PDELSET, PDGEMV, PDLARFG, PDSCAL, $ PDSYMV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * IF( LSAME( UPLO, 'U' ) ) THEN * CALL INFOG2L( N+IA-NB, N+JA-NB, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce last NB columns of upper triangle * DO 10 J = JA+N-1, JA+N-NB, -1 I = IA + J - JA K = J - JA + 1 KW = MOD( K-1, DESCA( MB_ ) ) + 1 * * Update A(IA:I,I) * CALL PDGEMV( 'No transpose', K, N-K, -ONE, A, IA, J+1, $ DESCA, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ), $ ONE, A, IA, J, DESCA, 1 ) CALL PDGEMV( 'No transpose', K, N-K, -ONE, W, IW, JW+KW, $ DESCW, A, I, J+1, DESCA, DESCA( M_ ), ONE, A, $ IA, J, DESCA, 1 ) IF( N-K.GT.0 ) $ CALL PDELSET( A, I, J+1, DESCA, E( JP ) ) * * Generate elementary reflector H(i) to annihilate * A(IA:I-2,I) * JP = MIN( JJ+KW-1, NQ ) CALL PDLARFG( K-1, E( JP ), I-1, J, A, IA, J, DESCA, 1, $ TAU ) CALL PDELSET( A, I-1, J, DESCA, ONE ) * * Compute W(IW:IW+K-2,JW+KW-1) * CALL PDSYMV( 'Upper', K-1, ONE, A, IA, JA, DESCA, A, IA, J, $ DESCA, 1, ZERO, W, IW, JW+KW-1, DESCW, 1 ) * JWK = MOD( K-1, DESCWK( NB_ ) ) + 2 CALL PDGEMV( 'Transpose', K-1, N-K, ONE, W, IW, JW+KW, $ DESCW, A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK, $ DESCWK, DESCWK( M_ ) ) CALL PDGEMV( 'No transpose', K-1, N-K, -ONE, A, IA, J+1, $ DESCA, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PDGEMV( 'Transpose', K-1, N-K, ONE, A, IA, J+1, DESCA, $ A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK, DESCWK, $ DESCWK( M_ ) ) CALL PDGEMV( 'No transpose', K-1, N-K, -ONE, W, IW, JW+KW, $ DESCW, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PDSCAL( K-1, TAU( JP ), W, IW, JW+KW-1, DESCW, 1 ) * CALL PDDOT( K-1, ALPHA, W, IW, JW+KW-1, DESCW, 1, A, IA, J, $ DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PDAXPY( K-1, ALPHA, A, IA, J, DESCA, 1, W, IW, JW+KW-1, $ DESCW, 1 ) IF( MYCOL.EQ.IACOL ) THEN CALL PDELGET( 'E', ' ', D( JP ), A, I, J, DESCA ) END IF * 10 CONTINUE * ELSE * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce first NB columns of lower triangle * DO 20 J = JA, JA+NB-1 I = IA + J - JA K = J - JA + 1 * * Update A(J:JA+N-1,J) * CALL PDGEMV( 'No transpose', N-K+1, K-1, -ONE, A, I, JA, $ DESCA, W, IW+K-1, JW, DESCW, DESCW( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PDGEMV( 'No transpose', N-K+1, K-1, -ONE, W, IW+K-1, $ JW, DESCW, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J, DESCA, 1 ) IF( K.GT.1 ) $ CALL PDELSET( A, I, J-1, DESCA, E( JP ) ) * * * Generate elementary reflector H(i) to annihilate * A(I+2:IA+N-1,I) * JP = MIN( JJ+K-1, NQ ) CALL PDLARFG( N-K, E( JP ), I+1, J, A, I+2, J, DESCA, 1, $ TAU ) CALL PDELSET( A, I+1, J, DESCA, ONE ) * * Compute W(IW+K:IW+N-1,JW+K-1) * CALL PDSYMV( 'Lower', N-K, ONE, A, I+1, J+1, DESCA, A, I+1, $ J, DESCA, 1, ZERO, W, IW+K, JW+K-1, DESCW, 1 ) * CALL PDGEMV( 'Transpose', N-K, K-1, ONE, W, IW+K, JW, DESCW, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK, $ DESCWK( M_ ) ) CALL PDGEMV( 'No transpose', N-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PDGEMV( 'Transpose', N-K, K-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK, $ DESCWK( M_ ) ) CALL PDGEMV( 'No transpose', N-K, K-1, -ONE, W, IW+K, JW, $ DESCW, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PDSCAL( N-K, TAU( JP ), W, IW+K, JW+K-1, DESCW, 1 ) CALL PDDOT( N-K, ALPHA, W, IW+K, JW+K-1, DESCW, 1, A, I+1, $ J, DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PDAXPY( N-K, ALPHA, A, I+1, J, DESCA, 1, W, IW+K, $ JW+K-1, DESCW, 1 ) IF( MYCOL.EQ.IACOL ) THEN CALL PDELGET( 'E', ' ', D( JP ), A, I, J, DESCA ) END IF * 20 CONTINUE * END IF * * Broadcast columnwise the diagonal elements into D. * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1, $ IAROW, MYCOL ) END IF END IF * RETURN * * End of PDLATRD * END scalapack-1.8.0/SRC/pzunmqr.f0000640000175000017500000004214210363532303015631 0ustar muammarmuammar SUBROUTINE PZUNMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMQR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PZGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PZGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, $ IPW, IROFFA, IROFFC, J, J1, J2, J3, JB, JCC, $ LCM, LCMQ, LWMIN, MI, MPC0, MYCOL, MYROW, NI, $ NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZUNM2R * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PZUNM2R( SIDE, TRANS, M, N, J1-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PZLARFT( 'Forward', 'Columnwise', NQ-J+JA, JB, A, $ IA+J-JA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H or H' * CALL PZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, $ JB, A, IA+J-JA, J, DESCA, WORK, C, ICC, JCC, $ DESCC, WORK( IPW ) ) 10 CONTINUE * * Use unblocked code for the last block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PZUNM2R( SIDE, TRANS, M, N, J2-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMQR * END scalapack-1.8.0/SRC/pcsrscl.f0000640000175000017500000001626510363532303015575 0ustar muammarmuammar SUBROUTINE PCSRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N REAL SA * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX SX( * ) * .. * * Purpose * ======= * * PCSRSCL multiplies an N-element complex distributed vector * sub( X ) by the real scalar 1/a. This is done without overflow or * underflow as long as the final sub( X )/a does not overflow or * underflow. * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * SA (global input) REAL * The scalar a which is used to divide each component of * sub( X ). SA must be >= 0, or the subroutine will divide by * zero. * * SX (local input/local output) COMPLEX array * containing the local pieces of a distributed matrix of * dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER ICTXT, MYCOL, MYROW, NPCOL, NPROW REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PCSSCAL, PSLABAD * .. * .. External Functions .. REAL PSLAMCH EXTERNAL PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) * * Initialize the denominator to SA and the numerator to 1. * CDEN = SA CNUM = ONE * 10 CONTINUE CDEN1 = CDEN*SMLNUM CNUM1 = CNUM / BIGNUM IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN * * Pre-multiply sub( X ) by SMLNUM if CDEN is large compared to * CNUM. * MUL = SMLNUM DONE = .FALSE. CDEN = CDEN1 ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN * * Pre-multiply sub( X ) by BIGNUM if CDEN is small compared to * CNUM. * MUL = BIGNUM DONE = .FALSE. CNUM = CNUM1 ELSE * * Multiply sub( X ) by CNUM / CDEN and return. * MUL = CNUM / CDEN DONE = .TRUE. END IF * * Scale the vector sub( X ) by MUL * CALL PCSSCAL( N, MUL, SX, IX, JX, DESCX, INCX ) * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PCSRSCL * END scalapack-1.8.0/SRC/pdlacp3.f0000640000175000017500000003017210363532303015443 0ustar muammarmuammar SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER I, II, JJ, LDB, M, REV * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), B( LDB, * ) * .. * * Purpose * ======= * * PDLACP3 is an auxiliary routine that copies from a global parallel * array into a local replicated array or vise versa. Notice that * the entire submatrix that is copied gets placed on one node or * more. The receiving node can be specified precisely, or all nodes * can receive, or just one row or column of nodes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * M is the order of the square submatrix that is copied. * M >= 0. * Unchanged on exit * * I (global input) INTEGER * A(I,I) is the global location that the copying starts from. * Unchanged on exit. * * A (global input/output) DOUBLE PRECISION array, dimension * (DESCA(LLD_),*) * On entry, the parallel matrix to be copied into or from. * On exit, if REV=1, the copied data. * Unchanged on exit if REV=0. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/output) DOUBLE PRECISION array of size (LDB,M) * If REV=0, this is the global portion of the array * A(I:I+M-1,I:I+M-1). * If REV=1, this is the unchanged on exit. * * LDB (local input) INTEGER * The leading dimension of B. * * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) * receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data * If II>=0,JJ=-1, then all cols in row II receive the data * If II=-1,JJ=-1, then all nodes receive the data * If REV<>0, then II is the source row index for the node(s) * sending the replicated B. * * JJ (global input) INTEGER * Similar description as II above * * REV (global input) INTEGER * Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in * A) into the global A. * * Implemented by: G. Henry, May 1, 1997 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN, $ III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP, $ JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D, $ DGESD2D, INFOG1L * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.LE.0 ) $ RETURN * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M DO 10 IDJ = 1, M B( IDI, IDJ ) = ZERO 10 CONTINUE 20 CONTINUE END IF * IFIN = I + M - 1 * IF( MOD( I+HBL, HBL ).NE.0 ) THEN ISTOP = MIN( I+HBL-MOD( I+HBL, HBL ), IFIN ) ELSE ISTOP = I END IF IDJ = I ISTOPJ = ISTOP IF( IDJ.LE.IFIN ) THEN 30 CONTINUE IDI = I ISTOPI = ISTOP IF( IDI.LE.IFIN ) THEN 40 CONTINUE ROW = MOD( ( IDI-1 ) / HBL, NPROW ) COL = MOD( ( IDJ-1 ) / HBL, NPCOL ) CALL INFOG1L( IDI, HBL, NPROW, ROW, 0, IROW1, ITMP ) IROW2 = NUMROC( ISTOPI, HBL, ROW, 0, NPROW ) CALL INFOG1L( IDJ, HBL, NPCOL, COL, 0, ICOL1, ITMP ) ICOL2 = NUMROC( ISTOPJ, HBL, COL, 0, NPCOL ) IF( ( MYROW.EQ.ROW ) .AND. ( MYCOL.EQ.COL ) ) THEN IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to everyone * IF( REV.EQ.0 ) THEN CALL DGEBS2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.NE.-1 ) ) THEN * * Send the message to Column MYCOL which better be JJ * IF( REV.EQ.0 ) THEN CALL DGEBS2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to Row MYROW which better be II * IF( REV.EQ.0 ) THEN CALL DGEBS2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.NE.-1 ) .AND. $ ( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) ) THEN * * Recv/Send the message to (II,JJ) * IF( REV.EQ.0 ) THEN CALL DGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ A( ( ICOL1-1 )*LDA+IROW1 ), LDA, II, $ JJ ) ELSE CALL DGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, II, JJ ) END IF END IF IF( REV.EQ.0 ) THEN DO 60 JJJ = ICOL1, ICOL2 DO 50 III = IROW1, IROW2 B( IDI+III-IROW1+1-I, IDJ+JJJ-ICOL1+1-I ) $ = A( ( JJJ-1 )*LDA+III ) 50 CONTINUE 60 CONTINUE ELSE DO 80 JJJ = ICOL1, ICOL2 DO 70 III = IROW1, IROW2 A( ( JJJ-1 )*LDA+III ) = B( IDI+III-IROW1+1-I, $ IDJ+JJJ-ICOL1+1-I ) 70 CONTINUE 80 CONTINUE END IF ELSE IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL DGEBR2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL DGEBR2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL DGEBR2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL DGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) ELSE CALL DGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) * CALL DGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, * $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL) END IF END IF END IF IDI = ISTOPI + 1 ISTOPI = MIN( ISTOPI+HBL, IFIN ) IF( IDI.LE.IFIN ) $ GO TO 40 END IF IDJ = ISTOPJ + 1 ISTOPJ = MIN( ISTOPJ+HBL, IFIN ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF RETURN * * End of PDLACP3 * END scalapack-1.8.0/SRC/pdormrz.f0000640000175000017500000004333010363532303015612 0ustar muammarmuammar SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMRZ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JAA, $ JCC, LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDLARZB, $ PDLARZT, PDORMR3, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IDUM1( 4 ) = L IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRZ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PDORMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PDLARZT( 'Backward', 'Rowwise', L, IB, A, I, JAA, DESCA, $ TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PDLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, IB, $ L, A, I, JAA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PDORMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMRZ * END scalapack-1.8.0/SRC/pzgetf2.f0000640000175000017500000002264410363532303015503 0ustar muammarmuammar SUBROUTINE PZGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZGETF2 computes an LU factorization of a general M-by-N * distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using * partial pivoting with row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal * elements (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). * * This is the right-looking Parallel Level 2 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). * NB_A-MOD(JA-1, NB_A) >= N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ). On exit, this array contains * the local pieces of the factors L and U from the factoriza- * tion sub( A ) = P*L*U; the unit diagonal elements of L are * not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IROFF, J, $ JJA, MN, MYCOL, MYROW, NPCOL, NPROW COMPLEX*16 GMAX * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PB_TOPGET, PXERBLA, PZAMAX, $ PZGERU, PZSCAL, PZSWAP * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * MN = MIN( M, N ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) * IF( MYCOL.EQ.IACOL ) THEN DO 10 J = JA, JA+MN-1 I = IA + J - JA * * Find pivot and test for singularity. * CALL PZAMAX( M-J+JA, GMAX, IPIV( IIA+J-JA ), A, I, J, $ DESCA, 1 ) IF( GMAX.NE.ZERO ) THEN * * Apply the row interchanges to columns JA:JA+N-1 * CALL PZSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, $ IPIV( IIA+J-JA ), JA, DESCA, DESCA( M_ ) ) * * Compute elements I+1:IA+M-1 of J-th column. * IF( J-JA+1.LT.M ) $ CALL PZSCAL( M-J+JA-1, ONE / GMAX, A, I+1, J, $ DESCA, 1 ) ELSE IF( INFO.EQ.0 ) THEN INFO = J - JA + 1 END IF * * Update trailing submatrix * IF( J-JA+1.LT.MN ) THEN CALL PZGERU( M-J+JA-1, N-J+JA-1, -ONE, A, I+1, J, DESCA, $ 1, A, I, J+1, DESCA, DESCA( M_ ), A, I+1, $ J+1, DESCA ) END IF 10 CONTINUE * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN, MYROW, IACOL ) * END IF * RETURN * * End of PZGETF2 * END scalapack-1.8.0/SRC/csteqr2.f0000640000175000017500000004453010363532303015503 0ustar muammarmuammar SUBROUTINE CSTEQR2( COMPZ, N, D, E, Z, LDZ, NR, WORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * November 15, 1997 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, LDZ, N, NR * .. * .. Array Arguments .. REAL D( * ), E( * ), WORK( * ) COMPLEX Z( LDZ, * ) * .. * * Purpose * ======= * * CSTEQR2 is a modified version of LAPACK routine CSTEQR. * CSTEQR2 computes all eigenvalues and, optionally, eigenvectors of a * symmetric tridiagonal matrix using the implicit QL or QR method. * CSTEQR2 is modified from CSTEQR to allow each ScaLAPACK process * running CSTEQR2 to perform updates on a distributed matrix Q. * Proper usage of CSTEQR2 can be gleaned from * examination of ScaLAPACK's * PCHEEV. * CSTEQR2 incorporates changes attributed to Greg Henry. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. * = 'I': Compute eigenvalues and eigenvectors of the * tridiagonal matrix. Z must be initialized to the * identity matrix by PCLASET or CLASET prior * to entering this subroutine. * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input/output) REAL array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in ascending order. * * E (input/output) REAL array, dimension (N-1) * On entry, the (n-1) subdiagonal elements of the tridiagonal * matrix. * On exit, E has been destroyed. * * Z (local input/local output) COMPLEX array, global * dimension (N, N), local dimension (LDZ, NR). * On entry, if COMPZ = 'V', then Z contains the orthogonal * matrix used in the reduction to tridiagonal form. * On exit, if INFO = 0, then if COMPZ = 'V', Z contains the * orthonormal eigenvectors of the original symmetric matrix, * and if COMPZ = 'I', Z contains the orthonormal eigenvectors * of the symmetric tridiagonal matrix. * If COMPZ = 'N', then Z is not referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= 1, and if * eigenvectors are desired, then LDZ >= max(1,N). * * NR (input) INTEGER * NR = MAX(1, NUMROC( N, NB, MYPROW, 0, NPROCS ) ). * If COMPZ = 'N', then NR is not referenced. * * WORK (workspace) REAL array, dimension (max(1,2*N-2)) * If COMPZ = 'N', then WORK is not referenced. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: the algorithm has failed to find all the eigenvalues in * a total of 30*N iterations; if INFO = i, then i * elements of E have not converged to zero; on exit, D * and E contain the elements of a symmetric tridiagonal * matrix which is orthogonally similar to the original * matrix. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, TWO, THREE, HALF PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, $ THREE = 3.0E0, HALF = 0.5E0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E0, 1.0E0 ) ) INTEGER MAXIT, NMAXLOOK PARAMETER ( MAXIT = 30, NMAXLOOK = 15 ) * .. * .. Local Scalars .. INTEGER I, ICOMPZ, II, ILAST, ISCALE, J, JTOT, K, L, $ L1, LEND, LENDM1, LENDP1, LENDSV, LM1, LSV, M, $ MM, MM1, NLOOK, NM1, NMAXIT REAL ANORM, B, C, EPS, EPS2, F, G, GP, OLDEL, OLDGP, $ OLDRP, P, R, RP, RT1, RT2, S, SAFMAX, SAFMIN, $ SSFMAX, SSFMIN, TST, TST1 * .. * .. External Functions .. LOGICAL LSAME REAL SLAMCH, SLANST, SLAPY2 EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 * .. * .. External Subroutines .. EXTERNAL CLASR, CSWAP, SLAEV2, SLARTG, SLASCL, SSTERF, $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * ILAST = 0 INFO = 0 * IF( LSAME( COMPZ, 'N' ) ) THEN ICOMPZ = 0 ELSEIF( LSAME( COMPZ, 'I' ) ) THEN ICOMPZ = 1 ELSE ICOMPZ = -1 ENDIF IF( ICOMPZ.LT.0 ) THEN INFO = -1 ELSEIF( N.LT.0 ) THEN INFO = -2 ELSEIF( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, NR ) ) THEN INFO = -6 ENDIF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CSTEQR2', -INFO ) RETURN ENDIF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * If eigenvectors aren't not desired, this is faster * IF( ICOMPZ.EQ.0 ) THEN CALL SSTERF( N, D, E, INFO ) RETURN ENDIF * IF( N.EQ.1 ) THEN Z( 1, 1 ) = CONE RETURN ENDIF * * Determine the unit roundoff and over/underflow thresholds. * EPS = SLAMCH( 'E' ) EPS2 = EPS**2 SAFMIN = SLAMCH( 'S' ) SAFMAX = ONE / SAFMIN SSFMAX = SQRT( SAFMAX ) / THREE SSFMIN = SQRT( SAFMIN ) / EPS2 * * Compute the eigenvalues and eigenvectors of the tridiagonal * matrix. * NMAXIT = N*MAXIT JTOT = 0 * * Determine where the matrix splits and choose QL or QR iteration * for each block, according to whether top or bottom diagonal * element is smaller. * L1 = 1 NM1 = N - 1 * 10 CONTINUE IF( L1.GT.N ) $ GOTO 220 IF( L1.GT.1 ) $ E( L1-1 ) = ZERO IF( L1.LE.NM1 ) THEN DO 20 M = L1, NM1 TST = ABS( E( M ) ) IF( TST.EQ.ZERO ) $ GOTO 30 IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ $ 1 ) ) ) )*EPS ) THEN E( M ) = ZERO GOTO 30 ENDIF 20 CONTINUE ENDIF M = N * 30 CONTINUE L = L1 LSV = L LEND = M LENDSV = LEND L1 = M + 1 IF( LEND.EQ.L ) $ GOTO 10 * * Scale submatrix in rows and columns L to LEND * ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) ISCALE = 0 IF( ANORM.EQ.ZERO ) $ GOTO 10 IF( ANORM.GT.SSFMAX ) THEN ISCALE = 1 CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, $ INFO ) CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, $ INFO ) ELSEIF( ANORM.LT.SSFMIN ) THEN ISCALE = 2 CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, $ INFO ) CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, $ INFO ) ENDIF * * Choose between QL and QR iteration * IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN LEND = LSV L = LENDSV ENDIF * IF( LEND.GT.L ) THEN * * QL Iteration * * Look for small subdiagonal element. * 40 CONTINUE IF( L.NE.LEND ) THEN LENDM1 = LEND - 1 DO 50 M = L, LENDM1 TST = ABS( E( M ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ $ SAFMIN )GOTO 60 50 CONTINUE ENDIF * M = LEND * 60 CONTINUE IF( M.LT.LEND ) $ E( M ) = ZERO P = D( L ) IF( M.EQ.L ) $ GOTO 110 * * If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L+1 ) THEN CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) WORK( L ) = C WORK( N-1+L ) = S CALL CLASR( 'R', 'V', 'B', NR, 2, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) D( L ) = RT1 D( L+1 ) = RT2 E( L ) = ZERO L = L + 2 IF( L.LE.LEND ) $ GOTO 40 GOTO 200 ENDIF * IF( JTOT.EQ.NMAXIT ) $ GOTO 200 JTOT = JTOT + 1 * * Form shift. * G = ( D( L+1 )-P ) / ( TWO*E( L ) ) R = SLAPY2( G, ONE ) G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) * IF( ICOMPZ.EQ.0 ) THEN * Do not do a lookahead! GOTO 90 ENDIF * OLDEL = ABS( E( L ) ) GP = G RP = R TST = ABS( E( L ) )**2 TST = TST / ( ( EPS2*ABS( D( L ) ) )*ABS( D( L+1 ) )+SAFMIN ) * NLOOK = 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) THEN 70 CONTINUE * * This is the lookahead loop, going until we have * convergence or too many steps have been taken. * S = ONE C = ONE P = ZERO MM1 = M - 1 DO 80 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL SLARTG( GP, F, C, S, RP ) GP = D( I+1 ) - P RP = ( D( I )-GP )*S + TWO*C*B P = S*RP IF( I.NE.L ) $ GP = C*RP - B 80 CONTINUE OLDGP = GP OLDRP = RP * Find GP & RP for the next iteration IF( ABS( C*OLDRP-B ).GT.SAFMIN ) THEN GP = ( ( OLDGP+P )-( D( L )-P ) ) / ( TWO*( C*OLDRP-B ) ) ELSE * * Goto put in by G. Henry to fix ALPHA problem * GOTO 90 * GP = ( ( OLDGP+P )-( D( L )-P ) ) / * $ ( TWO*( C*OLDRP-B )+SAFMIN ) ENDIF RP = SLAPY2( GP, ONE ) GP = D( M ) - ( D( L )-P ) + $ ( ( C*OLDRP-B ) / ( GP+SIGN( RP, GP ) ) ) TST1 = TST TST = ABS( C*OLDRP-B )**2 TST = TST / ( ( EPS2*ABS( D( L )-P ) )*ABS( OLDGP+P )+ $ SAFMIN ) * Make sure that we are making progress IF( ABS( C*OLDRP-B ).GT.0.9E0*OLDEL ) THEN IF( ABS( C*OLDRP-B ).GT.OLDEL ) THEN GP = G RP = R ENDIF TST = HALF ELSE OLDEL = ABS( C*OLDRP-B ) ENDIF NLOOK = NLOOK + 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) $ GOTO 70 ENDIF * IF( ( TST.LE.ONE ) .AND. ( TST.NE.HALF ) .AND. $ ( ABS( P ).LT.EPS*ABS( D( L ) ) ) .AND. $ ( ILAST.EQ.L ) .AND. ( ABS( E( L ) )**2.LE.10000.0E0* $ ( ( EPS2*ABS( D( L ) ) )*ABS( D( L+1 ) )+SAFMIN ) ) ) THEN * * Skip the current step: the subdiagonal info is just noise. * M = L E( M ) = ZERO P = D( L ) JTOT = JTOT - 1 GOTO 110 ENDIF G = GP R = RP * * Lookahead over * 90 CONTINUE * S = ONE C = ONE P = ZERO * * Inner loop * MM1 = M - 1 DO 100 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL SLARTG( G, F, C, S, R ) IF( I.NE.M-1 ) $ E( I+1 ) = R G = D( I+1 ) - P R = ( D( I )-G )*S + TWO*C*B P = S*R D( I+1 ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * WORK( I ) = C WORK( N-1+I ) = -S * 100 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * MM = M - L + 1 CALL CLASR( 'R', 'V', 'B', NR, MM, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) * D( L ) = D( L ) - P E( L ) = G ILAST = L GOTO 40 * * Eigenvalue found. * 110 CONTINUE D( L ) = P * L = L + 1 IF( L.LE.LEND ) $ GOTO 40 GOTO 200 * ELSE * * QR Iteration * * Look for small superdiagonal element. * 120 CONTINUE IF( L.NE.LEND ) THEN LENDP1 = LEND + 1 DO 130 M = L, LENDP1, -1 TST = ABS( E( M-1 ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ $ SAFMIN )GOTO 140 130 CONTINUE ENDIF * M = LEND * 140 CONTINUE IF( M.GT.LEND ) $ E( M-1 ) = ZERO P = D( L ) IF( M.EQ.L ) $ GOTO 190 * * If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L-1 ) THEN CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) WORK( M ) = C WORK( N-1+M ) = S CALL CLASR( 'R', 'V', 'F', NR, 2, WORK( M ), WORK( N-1+M ), $ Z( 1, L-1 ), LDZ ) D( L-1 ) = RT1 D( L ) = RT2 E( L-1 ) = ZERO L = L - 2 IF( L.GE.LEND ) $ GOTO 120 GOTO 200 ENDIF * IF( JTOT.EQ.NMAXIT ) $ GOTO 200 JTOT = JTOT + 1 * * Form shift. * G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) R = SLAPY2( G, ONE ) G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) * IF( ICOMPZ.EQ.0 ) THEN * Do not do a lookahead! GOTO 170 ENDIF * OLDEL = ABS( E( L-1 ) ) GP = G RP = R TST = ABS( E( L-1 ) )**2 TST = TST / ( ( EPS2*ABS( D( L ) ) )*ABS( D( L-1 ) )+SAFMIN ) NLOOK = 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) THEN 150 CONTINUE * * This is the lookahead loop, going until we have * convergence or too many steps have been taken. * S = ONE C = ONE P = ZERO * * Inner loop * LM1 = L - 1 DO 160 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL SLARTG( GP, F, C, S, RP ) GP = D( I ) - P RP = ( D( I+1 )-GP )*S + TWO*C*B P = S*RP IF( I.LT.LM1 ) $ GP = C*RP - B 160 CONTINUE OLDGP = GP OLDRP = RP * Find GP & RP for the next iteration IF( ABS( C*OLDRP-B ).GT.SAFMIN ) THEN GP = ( ( OLDGP+P )-( D( L )-P ) ) / ( TWO*( C*OLDRP-B ) ) ELSE * * Goto put in by G. Henry to fix ALPHA problem * GOTO 170 * GP = ( ( OLDGP+P )-( D( L )-P ) ) / * $ ( TWO*( C*OLDRP-B )+SAFMIN ) ENDIF RP = SLAPY2( GP, ONE ) GP = D( M ) - ( D( L )-P ) + $ ( ( C*OLDRP-B ) / ( GP+SIGN( RP, GP ) ) ) TST1 = TST TST = ABS( ( C*OLDRP-B ) )**2 TST = TST / ( ( EPS2*ABS( D( L )-P ) )*ABS( OLDGP+P )+ $ SAFMIN ) * Make sure that we are making progress IF( ABS( C*OLDRP-B ).GT.0.9E0*OLDEL ) THEN IF( ABS( C*OLDRP-B ).GT.OLDEL ) THEN GP = G RP = R ENDIF TST = HALF ELSE OLDEL = ABS( C*OLDRP-B ) ENDIF NLOOK = NLOOK + 1 IF( ( TST.GT.ONE ) .AND. ( NLOOK.LE.NMAXLOOK ) ) $ GOTO 150 ENDIF IF( ( TST.LE.ONE ) .AND. ( TST.NE.HALF ) .AND. $ ( ABS( P ).LT.EPS*ABS( D( L ) ) ) .AND. $ ( ILAST.EQ.L ) .AND. ( ABS( E( L-1 ) )**2.LE.10000.0E0* $ ( ( EPS2*ABS( D( L-1 ) ) )*ABS( D( L ) )+SAFMIN ) ) ) THEN * * Skip the current step: the subdiagonal info is just noise. * M = L E( M-1 ) = ZERO P = D( L ) JTOT = JTOT - 1 GOTO 190 ENDIF * G = GP R = RP * * Lookahead over * 170 CONTINUE * S = ONE C = ONE P = ZERO DO 180 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL SLARTG( G, F, C, S, R ) IF( I.NE.M ) $ E( I-1 ) = R G = D( I ) - P R = ( D( I+1 )-G )*S + TWO*C*B P = S*R D( I ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * WORK( I ) = C WORK( N-1+I ) = S * 180 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * MM = L - M + 1 CALL CLASR( 'R', 'V', 'F', NR, MM, WORK( M ), WORK( N-1+M ), $ Z( 1, M ), LDZ ) * D( L ) = D( L ) - P E( LM1 ) = G ILAST = L GOTO 120 * * Eigenvalue found. * 190 CONTINUE D( L ) = P * L = L - 1 IF( L.GE.LEND ) $ GOTO 120 GOTO 200 * ENDIF * * Undo scaling if necessary * 200 CONTINUE IF( ISCALE.EQ.1 ) THEN CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ELSEIF( ISCALE.EQ.2 ) THEN CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ENDIF * * Check for no convergence to an eigenvalue after a total * of N*MAXIT iterations. * IF( JTOT.LT.NMAXIT ) $ GOTO 10 DO 210 I = 1, N - 1 IF( E( I ).NE.ZERO ) $ INFO = INFO + 1 210 CONTINUE GOTO 250 * * Order eigenvalues and eigenvectors. * 220 CONTINUE * * Use Selection Sort to minimize swaps of eigenvectors * DO 240 II = 2, N I = II - 1 K = I P = D( I ) DO 230 J = II, N IF( D( J ).LT.P ) THEN K = J P = D( J ) ENDIF 230 CONTINUE IF( K.NE.I ) THEN D( K ) = D( I ) D( I ) = P CALL CSWAP( NR, Z( 1, I ), 1, Z( 1, K ), 1 ) ENDIF 240 CONTINUE * 250 CONTINUE * WRITE( *, FMT = * )'JTOT', JTOT RETURN * * End of SSTEQR2 * END scalapack-1.8.0/SRC/pslacon.f0000640000175000017500000003226510363532303015561 0ustar muammarmuammar SUBROUTINE PSLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, $ EST, KASE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N REAL EST * .. * .. Array Arguments .. INTEGER DESCV( * ), DESCX( * ), ISGN( * ) REAL V( * ), X( * ) * .. * * Purpose * ======= * * PSLACON estimates the 1-norm of a square, real distributed matrix A. * Reverse communication is used for evaluating matrix-vector products. * X and V are aligned with the distributed matrix A, this information * is implicitly contained within IV, IX, DESCV, and DESCX. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vectors V and X. N >= 0. * * V (local workspace) REAL pointer into the local * memory to an array of dimension LOCr(N+MOD(IV-1,MB_V)). On * the final return, V = A*W, where EST = norm(V)/norm(W) * (W is not returned). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * X (local input/local output) REAL pointer into the * local memory to an array of dimension * LOCr(N+MOD(IX-1,MB_X)). On an intermediate return, X * should be overwritten by * A * X, if KASE=1, * A' * X, if KASE=2, * PSLACON must be re-called with all the other parameters * unchanged. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * ISGN (local workspace) INTEGER array, dimension * LOCr(N+MOD(IX-1,MB_X)). ISGN is aligned with X and V. * * * EST (global output) REAL * An estimate (a lower bound) for norm(A). * * KASE (local input/local output) INTEGER * On the initial call to PSLACON, KASE should be 0. * On an intermediate return, KASE will be 1 or 2, indicating * whether X should be overwritten by A * X or A' * X. * On the final return from PSLACON, KASE will again be 0. * * Further Details * =============== * * The serial version SLACON has been contributed by Nick Higham, * University of Manchester. It was originally named SONEST, dated * March 16, 1988. * * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of * a real or complex matrix, with applications to condition estimation", * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ZERO, ONE, TWO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IFLAG, IIVX, IMAXROW, IOFFVX, IROFF, $ ITER, IVXCOL, IVXROW, J, JLAST, JJVX, JUMP, $ K, MYCOL, MYROW, NP, NPCOL, NPROW REAL ALTSGN, ESTOLD, JLMAX, TEMP, XMAX * .. * .. Local Arrays .. REAL WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGSUM2D, INFOG2L, PSAMAX, $ PSASUM, PSELGET, SGEBR2D, $ SGEBS2D, SCOPY * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, INDXL2G, NUMROC EXTERNAL INDXG2L, INDXG2P, INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD, NINT, REAL, SIGN * .. * .. Save statement .. SAVE * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) IF( MYCOL.NE.IVXCOL ) $ RETURN IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) $ NP = NP - IROFF IOFFVX = IIVX + (JJVX-1)*DESCX( LLD_ ) * IF( KASE.EQ.0 ) THEN DO 10 I = IOFFVX, IOFFVX+NP-1 X( I ) = ONE / REAL( N ) 10 CONTINUE KASE = 1 JUMP = 1 RETURN END IF * GO TO ( 20, 40, 70, 110, 140 )JUMP * * ................ ENTRY (JUMP = 1) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X * 20 CONTINUE IF( N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN V( IOFFVX ) = X( IOFFVX ) EST = ABS( V( IOFFVX ) ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF * ... QUIT GO TO 150 END IF CALL PSASUM( N, EST, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * DO 30 I = IOFFVX, IOFFVX+NP-1 X( I ) = SIGN( ONE, X( I ) ) ISGN( I ) = NINT( X( I ) ) 30 CONTINUE KASE = 2 JUMP = 2 RETURN * * ................ ENTRY (JUMP = 2) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X * 40 CONTINUE CALL PSAMAX( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = REAL( J ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( WORK( 2 ) ) END IF END IF ITER = 2 * * MAIN LOOP - ITERATIONS 2, 3,...,ITMAX * 50 CONTINUE DO 60 I = IOFFVX, IOFFVX+NP-1 X( I ) = ZERO 60 CONTINUE IMAXROW = INDXG2P( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) IF( MYROW.EQ.IMAXROW ) THEN I = INDXG2L( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) X( I ) = ONE END IF KASE = 1 JUMP = 3 RETURN * * ................ ENTRY (JUMP = 3) * X HAS BEEN OVERWRITTEN BY A*X * 70 CONTINUE CALL SCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) ESTOLD = EST CALL PSASUM( N, EST, V, IV, JV, DESCV, 1 ) IF( DESCV( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF IFLAG = 0 DO 80 I = IOFFVX, IOFFVX+NP-1 IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) THEN IFLAG = 1 GO TO 90 END IF 80 CONTINUE * 90 CONTINUE CALL IGSUM2D( ICTXT, 'C', ' ', 1, 1, IFLAG, 1, -1, MYCOL ) * * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. * ALONG WITH IT, TEST FOR CYCLING. * IF( IFLAG.EQ.0 .OR. EST.LE.ESTOLD ) $ GO TO 120 * DO 100 I = IOFFVX, IOFFVX+NP-1 X( I ) = SIGN( ONE, X( I ) ) ISGN( I ) = NINT( X( I ) ) 100 CONTINUE KASE = 2 JUMP = 4 RETURN * * ................ ENTRY (JUMP = 4) * X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X * 110 CONTINUE JLAST = J CALL PSAMAX( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = REAL( J ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( WORK( 2 ) ) END IF END IF CALL PSELGET( 'Columnwise', ' ', JLMAX, X, JLAST, JX, DESCX ) IF( ( JLMAX.NE.ABS( XMAX ) ).AND.( ITER.LT.ITMAX ) ) THEN ITER = ITER + 1 GO TO 50 END IF * * ITERATION COMPLETE. FINAL STAGE. * 120 CONTINUE DO 130 I = IOFFVX, IOFFVX+NP-1 K = INDXL2G( I-IOFFVX+IIVX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW )-IX+1 IF( MOD( K, 2 ).EQ.0 ) THEN ALTSGN = -ONE ELSE ALTSGN = ONE END IF X( I ) = ALTSGN*( ONE+REAL( K-1 ) / REAL( N-1 ) ) 130 CONTINUE KASE = 1 JUMP = 5 RETURN * * ................ ENTRY (JUMP = 5) * X HAS BEEN OVERWRITTEN BY A*X * 140 CONTINUE CALL PSASUM( N, TEMP, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1, $ IVXROW, MYCOL ) END IF END IF TEMP = TWO*( TEMP / REAL( 3*N ) ) IF( TEMP.GT.EST ) THEN CALL SCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) EST = TEMP END IF * 150 CONTINUE KASE = 0 * RETURN * * End of PSLACON * END scalapack-1.8.0/SRC/psgehd2.f0000640000175000017500000002670210363532303015455 0ustar muammarmuammar SUBROUTINE PSGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEHD2 reduces a real general distributed matrix sub( A ) * to upper Hessenberg form H by an orthogonal similarity transforma- * tion: Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+JLO-2 * and JA+JHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB + MAX( NpA0, NB ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on * exit in A(ia+ilo+i:ia+ihi-1,ja+ilo+i-2), and tau in TAU(ja+ilo+i-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follo- * wing example, with n = 7, ilo = 2 and ihi = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), h denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(ja+ilo+i-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IAROW, ICOFFA, ICTXT, IROFFA, J, K, LWMIN, $ MYCOL, MYROW, NPA0, NPCOL, NPROW REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLARFG, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NPA0 = NUMROC( IHI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = DESCA( NB_ ) + MAX( NPA0, DESCA( NB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * DO 10 K = ILO, IHI-1 I = IA + K - 1 J = JA + K - 1 * * Compute elementary reflector H(j) to annihilate * A(i+2:ihi+ia-1,j) * CALL PSLARFG( IHI-K, AII, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PSELSET( A, I+1, J, DESCA, ONE ) * * Apply H(k) to A(ia:ihi+ia-1,j+1:ihi+ja-1) from the right * CALL PSLARF( 'Right', IHI, IHI-K, A, I+1, J, DESCA, 1, TAU, A, $ IA, J+1, DESCA, WORK ) * * Apply H(j) to A(i+1:ia+ihi-1,j+1:ja+n-1) from the left * CALL PSLARF( 'Left', IHI-K, N-K, A, I+1, J, DESCA, 1, TAU, A, $ I+1, J+1, DESCA, WORK ) * CALL PSELSET( A, I+1, J, DESCA, AII ) 10 CONTINUE * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEHD2 * END scalapack-1.8.0/SRC/claref.f0000640000175000017500000003150310363532303015350 0ustar muammarmuammar SUBROUTINE CLAREF( TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, $ ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, $ LIHIZ, VECS, V2, V3, T1, T2, T3 ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. LOGICAL BLOCK, WANTZ CHARACTER TYPE INTEGER ICOL1, IROW1, ISTART, ISTOP, ITMP1, ITMP2, LDA, $ LDZ, LIHIZ, LILOZ COMPLEX T1, T2, T3, V2, V3 * .. * .. Array Arguments .. COMPLEX A( LDA, * ), VECS( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * CLAREF applies one or several Householder reflectors of size 3 * to one or two matrices (if column is specified) on either their * rows or columns. * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * If 'R': Apply reflectors to the rows of the matrix * (apply from left) * Otherwise: Apply reflectors to the columns of the matrix * Unchanged on exit. * * A (global input/output) COMPLEX array, (LDA,*) * On entry, the matrix to receive the reflections. * The updated matrix on exit. * * LDA (local input) INTEGER * On entry, the leading dimension of A. Unchanged on exit. * * WANTZ (global input) LOGICAL * If .TRUE., then apply any column reflections to Z as well. * If .FALSE., then do no additional work on Z. * * Z (global input/output) COMPLEX array, (LDZ,*) * On entry, the second matrix to receive column reflections. * This is changed only if WANTZ is set. * * LDZ (local input) INTEGER * On entry, the leading dimension of Z. Unchanged on exit. * * BLOCK (global input) LOGICAL * If .TRUE., then apply several reflectors at once and read * their data from the VECS array. * If .FALSE., apply the single reflector given by V2, V3, * T1, T2, and T3. * * IROW1 (local input/output) INTEGER * On entry, the local row element of A. * Undefined on output. * * * ICOL1 (local input/output) INTEGER * On entry, the local column element of A. * Undefined on output. * * ISTART (global input) INTEGER * Specifies the "number" of the first reflector. This is * used as an index into VECS if BLOCK is set. * ISTART is ignored if BLOCK is .FALSE.. * * ISTOP (global input) INTEGER * Specifies the "number" of the last reflector. This is * used as an index into VECS if BLOCK is set. * ISTOP is ignored if BLOCK is .FALSE.. * * ITMP1 (local input) INTEGER * Starting range into A. For rows, this is the local * first column. For columns, this is the local first row. * * ITMP2 (local input) INTEGER * Ending range into A. For rows, this is the local last * column. For columns, this is the local last row. * * LILOZ * LIHIZ (local input) INTEGER * These serve the same purpose as ITMP1,ITMP2 but for Z * when WANTZ is set. * * VECS (global input) COMPLEX array of size 3*N (matrix size) * This holds the size 3 reflectors one after another and this * is only accessed when BLOCK is .TRUE. * * V2 * V3 * T1 * T2 * T3 (global input/output) COMPLEX * This holds information on a single size 3 Householder * reflector and is read when BLOCK is .FALSE., and * overwritten when BLOCK is .TRUE. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Local Scalars .. INTEGER J, K COMPLEX A1, A11, A2, A22, A3, A4, A5, B1, B2, B3, B4, $ B5, H11, H22, SUM, SUM1, SUM2, SUM3, T12, T13, $ T22, T23, T32, T33, TMP1, TMP2, TMP3, V22, V23, $ V32, V33 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MOD * .. * .. Executable Statements .. * IF( LSAME( TYPE, 'R' ) ) THEN IF( BLOCK ) THEN DO 30 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 10 J = ITMP1, ITMP2 - MOD( ITMP2-ITMP1+1, 2 ), 2 A1 = A( IROW1, J ) A2 = A( IROW1+1, J ) A3 = A( IROW1+2, J ) A4 = A( IROW1+3, J ) A5 = A( IROW1+4, J ) B1 = A( IROW1, J+1 ) B2 = A( IROW1+1, J+1 ) B3 = A( IROW1+2, J+1 ) B4 = A( IROW1+3, J+1 ) B5 = A( IROW1+4, J+1 ) SUM1 = CONJG( T1 )*A1 + CONJG( T2 )*A2 + $ CONJG( T3 )*A3 A( IROW1, J ) = A1 - SUM1 H11 = A2 - SUM1*V2 H22 = A3 - SUM1*V3 TMP1 = CONJG( T1 )*B1 + CONJG( T2 )*B2 + $ CONJG( T3 )*B3 A( IROW1, J+1 ) = B1 - TMP1 A11 = B2 - TMP1*V2 A22 = B3 - TMP1*V3 SUM2 = CONJG( T12 )*H11 + CONJG( T22 )*H22 + $ CONJG( T32 )*A4 A( IROW1+1, J ) = H11 - SUM2 H11 = H22 - SUM2*V22 H22 = A4 - SUM2*V32 TMP2 = CONJG( T12 )*A11 + CONJG( T22 )*A22 + $ CONJG( T32 )*B4 A( IROW1+1, J+1 ) = A11 - TMP2 A11 = A22 - TMP2*V22 A22 = B4 - TMP2*V32 SUM3 = CONJG( T13 )*H11 + CONJG( T23 )*H22 + $ CONJG( T33 )*A5 A( IROW1+2, J ) = H11 - SUM3 A( IROW1+3, J ) = H22 - SUM3*V23 A( IROW1+4, J ) = A5 - SUM3*V33 TMP3 = CONJG( T13 )*A11 + CONJG( T23 )*A22 + $ CONJG( T33 )*B5 A( IROW1+2, J+1 ) = A11 - TMP3 A( IROW1+3, J+1 ) = A22 - TMP3*V23 A( IROW1+4, J+1 ) = B5 - TMP3*V33 10 CONTINUE DO 20 J = ITMP2 - MOD( ITMP2-ITMP1+1, 2 ) + 1, ITMP2 SUM = CONJG( T1 )*A( IROW1, J ) + $ CONJG( T2 )*A( IROW1+1, J ) + $ CONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM H11 = A( IROW1+1, J ) - SUM*V2 H22 = A( IROW1+2, J ) - SUM*V3 SUM = CONJG( T12 )*H11 + CONJG( T22 )*H22 + $ CONJG( T32 )*A( IROW1+3, J ) A( IROW1+1, J ) = H11 - SUM H11 = H22 - SUM*V22 H22 = A( IROW1+3, J ) - SUM*V32 SUM = CONJG( T13 )*H11 + CONJG( T23 )*H22 + $ CONJG( T33 )*A( IROW1+4, J ) A( IROW1+2, J ) = H11 - SUM A( IROW1+3, J ) = H22 - SUM*V23 A( IROW1+4, J ) = A( IROW1+4, J ) - SUM*V33 20 CONTINUE IROW1 = IROW1 + 3 30 CONTINUE DO 50 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 40 J = ITMP1, ITMP2 SUM = CONJG( T1 )*A( IROW1, J ) + $ CONJG( T2 )*A( IROW1+1, J ) + $ CONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3 40 CONTINUE IROW1 = IROW1 + 1 50 CONTINUE ELSE DO 60 J = ITMP1, ITMP2 SUM = CONJG( T1 )*A( IROW1, J ) + $ CONJG( T2 )*A( IROW1+1, J ) + $ CONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3 60 CONTINUE END IF ELSE * * Do column transforms * IF( BLOCK ) THEN DO 90 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 70 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM H11 = A( J, ICOL1+1 ) - SUM*CONJG( V2 ) H22 = A( J, ICOL1+2 ) - SUM*CONJG( V3 ) SUM = T12*H11 + T22*H22 + T32*A( J, ICOL1+3 ) A( J, ICOL1+1 ) = H11 - SUM H11 = H22 - SUM*CONJG( V22 ) H22 = A( J, ICOL1+3 ) - SUM*CONJG( V32 ) SUM = T13*H11 + T23*H22 + T33*A( J, ICOL1+4 ) A( J, ICOL1+2 ) = H11 - SUM A( J, ICOL1+3 ) = H22 - SUM*CONJG( V23 ) A( J, ICOL1+4 ) = A( J, ICOL1+4 ) - SUM*CONJG( V33 ) 70 CONTINUE IF( WANTZ ) THEN DO 80 J = LILOZ, LIHIZ SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) + $ T3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM H11 = Z( J, ICOL1+1 ) - SUM*CONJG( V2 ) H22 = Z( J, ICOL1+2 ) - SUM*CONJG( V3 ) SUM = T12*H11 + T22*H22 + T32*Z( J, ICOL1+3 ) Z( J, ICOL1+1 ) = H11 - SUM H11 = H22 - SUM*CONJG( V22 ) H22 = Z( J, ICOL1+3 ) - SUM*CONJG( V32 ) SUM = T13*H11 + T23*H22 + T33*Z( J, ICOL1+4 ) Z( J, ICOL1+2 ) = H11 - SUM Z( J, ICOL1+3 ) = H22 - SUM*CONJG( V23 ) Z( J, ICOL1+4 ) = Z( J, ICOL1+4 ) - $ SUM*CONJG( V33 ) 80 CONTINUE END IF ICOL1 = ICOL1 + 3 90 CONTINUE DO 120 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 100 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*CONJG( V2 ) A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*CONJG( V3 ) 100 CONTINUE IF( WANTZ ) THEN DO 110 J = LILOZ, LIHIZ SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) + $ T3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM Z( J, ICOL1+1 ) = Z( J, ICOL1+1 ) - $ SUM*CONJG( V2 ) Z( J, ICOL1+2 ) = Z( J, ICOL1+2 ) - $ SUM*CONJG( V3 ) 110 CONTINUE END IF ICOL1 = ICOL1 + 1 120 CONTINUE ELSE DO 130 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*CONJG( V2 ) A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*CONJG( V3 ) 130 CONTINUE END IF END IF RETURN * * End of CLAREF * END scalapack-1.8.0/SRC/pdpbsv.f0000640000175000017500000004510010363532303015410 0ustar muammarmuammar SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded symmetric positive definite distributed * matrix with bandwidth BW. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PDPBTRF and PDPBTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+2*bw)*bw * +max((bw*NRHS), bw*bw) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PDPBTRF, PDPBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPBTRF and PDPBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PDPBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+2*BW)*BW * * Factor the matrix * CALL PDPBTRF( UPLO, N, BW, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) RETURN END IF * RETURN * * End of PDPBSV * END scalapack-1.8.0/SRC/pdsyevd.f0000640000175000017500000003130510363532303015572 0ustar muammarmuammar SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) DOUBLE PRECISION A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSYEVD computes all the eigenvalues and eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PDSYEVD assumes a homogeneous system and makes * no checks for consistency of the eigenvalues or eigenvectors across * the different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only; (NOT IMPLEMENTED YET) * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/workspace) block cyclic DOUBLE PRECISION array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * W (global output) DOUBLE PRECISION array, dimension (N) * If INFO=0, the eigenvalues in ascending order. * * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * Z contains the orthonormal eigenvectors * of the symmetric matrix A. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension (LWORK) * On output, WORK(1) returns the workspace required. * * LWORK (local input) INTEGER * LWORK >= MAX( 1+6*N+2*NP*NQ, TRILWMIN ) + 2*N * TRILWMIN = 3*N + MAX( NB*( NP+1 ), 3*NB ) * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * Alignment requirements * ====================== * * The distributed submatrices sub( A ), sub( Z ) must verify * some alignment properties, namely the following expression * should be true: * ( MB_A.EQ.NB_A.EQ.MB_Z.EQ.NB_Z .AND. IROFFA.EQ.ICOFFA .AND. * IROFFA.EQ.0 .AND.IROFFA.EQ.IROFFZ. AND. IAROW.EQ.IZROW) * with IROFFA = MOD( IA-1, MB_A ) * and ICOFFA = MOD( JA-1, NB_A ). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICOFFZ, ICTXT, IINFO, $ INDD, INDE, INDE2, INDTAU, INDWORK, INDWORK2, $ IROFFA, IROFFZ, ISCALE, LIWMIN, LLWORK, $ LLWORK2, LWMIN, MYCOL, MYROW, NB, NP, NPCOL, $ NPROW, NQ, OFFSET, TRILWMIN DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. * .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, INDXG2P, NUMROC, PDLAMCH, PDLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DSCAL, PCHK1MAT, $ PDLARED1D, PDLASCL, PDLASET, PDORMTR, PDSTEDC, $ PDSYTRD, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFZ = MOD( IZ-1, DESCZ( MB_ ) ) ICOFFZ = MOD( JZ-1, DESCZ( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * LQUERY = ( LWORK.EQ.-1 ) TRILWMIN = 3*N + MAX( NB*( NP+1 ), 3*NB ) LWMIN = MAX( 1+6*N+2*NP*NQ, TRILWMIN ) + 2*N LIWMIN = 7*N + 8*NPCOL + 2 WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN IF( .NOT.LSAME( JOBZ, 'V' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( IROFFA.NE.IROFFZ .OR. ICOFFA.NE.ICOFFZ ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 1200+CSRC_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDE2 = INDD + N INDWORK = INDE2 + N LLWORK = LWORK - INDWORK + 1 INDWORK2 = INDD LLWORK2 = LWORK - INDWORK2 + 1 * * Scale matrix to allowable range, if necessary. * ISCALE = 0 SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) ANRM = PDLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PDLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * * CALL PDSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE2 ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDD ), W, $ WORK( INDWORK ), LLWORK ) * CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDE2 ), WORK( INDE ), $ WORK( INDWORK ), LLWORK ) * CALL PDLASET( 'Full', N, N, ZERO, ONE, Z, 1, 1, DESCZ ) * IF( UPPER ) THEN OFFSET = 1 ELSE OFFSET = 0 END IF CALL PDSTEDC( 'I', N, W, WORK( INDE+OFFSET ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK2 ), LLWORK2, IWORK, LIWORK, INFO ) * CALL PDORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, WORK( INDWORK2 ), $ LLWORK2, IINFO ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * RETURN * * End of PDSYEVD * END scalapack-1.8.0/SRC/pclarzb.f0000640000175000017500000005663410363532303015565 0ustar muammarmuammar SUBROUTINE PCLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARZB applies a complex block reflector Q or its conjugate * transpose Q**H to a complex M-by-N distributed matrix sub( C ) * denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right. * * Q is a product of k elementary reflectors as returned by PCTZRZF. * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * DIRECT (global input) CHARACTER * Indicates how H is formed from a product of elementary * reflectors * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise (not supported yet) * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L', * (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local * pieces of the distributed vectors V representing the * Householder transformation as returned by PCTZRZF. * LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) COMPLEX array, dimension MB_V by MB_V * The lower triangular matrix T in the representation of the * block reflector. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT CHARACTER COLBTOP, TRANST INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2, $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV, $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL, $ IVROW, J, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV, $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20, $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV, $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CGEBR2D, $ CGEBS2D, CGEMM, CGSUM2D, CLACGV, $ CLACPY, CLASET, CTRBR2D, CTRBS2D, $ CTRMM, INFOG2L, PBCMATADD, PBCTRAN, $ PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -4 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * LEFT = LSAME( SIDE, 'L' ) IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) ICOFFV = MOD( JV-1, NBV ) NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFV = IIV + ( JJV-1 ) * LDV MBC = DESCC( MB_ ) NBC = DESCC( NB_ ) NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1, $ JJC1, ICROW1, ICCOL1 ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NQCALL ) ) * IF( LEFT ) THEN IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, MBC ) MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = ICOFFC1 NQC2 = NQC1 ELSE IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = IROFFC1 MPC2 = MPC1 ICOFFC2 = MOD( JC+N-L-1, NBC ) NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NQCALL ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC2 = ICOFFV is required by the current transposition * routine PBCTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW2 ) THEN MPC20 = MPC2 + IROFFC2 ELSE MPC20 = MPC2 END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBCTRAN * IPV = 1 IPW = IPV + MPC20 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC20 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL CLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW+ICOFFV*LW ), LW ) ELSE CALL CLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW ), LW ) END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K * CALL PBCTRAN( ICTXT, 'Rowwise', 'Conjugate transpose', K, $ M+ICOFFV, DESCV( NB_ ), WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K * IF( MYROW.EQ.ICROW2 ) $ IPV = IPV + IROFFC2 * * WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V' * WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K * LW = MAX( 1, NQC2 ) * IF( MPC2.GT.0 ) THEN CALL CGEMM( 'Transpose', 'No transpose', NQC2, K, MPC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL CLASET( 'All', NQC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 10 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBCMATADD( ICTXT, 'Transpose', NQC2, IINXT-IIBEG+1, $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE, $ WORK( IPW+ITOP ), LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 10 END IF END IF * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL CTRBS2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV ) ELSE CALL CTRBR2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL CTRMM( 'Right', 'Lower', TRANST, 'Non unit', NQC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 20 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBCMATADD( ICTXT, 'Transpose', IINXT-IIBEG+1, NQC2, $ -ONE, WORK( IPW+ITOP ), LW, ONE, $ C( IIBEG+(JJC1-1)*LDC ), LDC ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 20 END IF END IF * * C2 C2 - V' * W' * C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )' * MPC2 x NQC2 MPC2 x K K x NQC2 * DO 30 J = 1, K CALL CLACGV( MPC2, WORK( IPV+(J-1)*LV ), 1 ) 30 CONTINUE CALL CGEMM( 'No transpose', 'Transpose', MPC2, NQC2, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC2 ), LDC ) * ELSE * * Form sub( C ) * Q or sub( C ) * Q' * * Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2 * WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC2 LV = MAX( 1, K ) LW = MAX( 1, MPC2 ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL CTRBS2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV ) CALL CLACPY( 'All', K, NQC2, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL CTRBR2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * * WORK( IPV ) is K x NQC2 = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K * IF( NQC2.GT.0 ) THEN CALL CGEMM( 'No Transpose', 'Transpose', MPC2, K, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL CLASET( 'All', MPC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 40 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBCMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC, ONE, $ WORK( IPW+ILEFT*LW ), LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 40 END IF END IF * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN DO 50 J = 1, K CALL CLACGV( K-J+1, T( J+(J-1)*MBV ), 1 ) 50 CONTINUE CALL CTRMM( 'Right', 'Lower', TRANS, 'Non unit', MPC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW ) DO 60 J = 1, K CALL CLACGV( K-J+1, T( J+(J-1)*MBV ), 1 ) 60 CONTINUE ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 70 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBCMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, -ONE, $ WORK( IPW+ILEFT*LW ), LW, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 70 END IF END IF * * C2 C2 - W * conjg( V ) * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * conjg( WORK( IPV ) ) * MPC2 x NQC2 MPC2 x K K x NQC2 * DO 80 J = 1, NQC2 CALL CLACGV( K, WORK( IPV+(J-1)*LV ), 1 ) 80 CONTINUE IF( IOFFC2.GT.0 ) $ CALL CGEMM( 'No transpose', 'No transpose', MPC2, NQC2, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC2 ), LDC ) * END IF * RETURN * * End of PCLARZB * END scalapack-1.8.0/SRC/pdtrti2.f0000640000175000017500000002366410363532303015515 0ustar muammarmuammar SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDTRTI2 computes the inverse of a real upper or lower triangular * block matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). This matrix should be * contained in one and only one process memory space (local operation). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular * = 'U': sub( A ) is unit triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)), * this array contains the local pieces of the triangular matrix * sub( A ). If UPLO = 'U', the leading N-by-N upper triangular * part of the matrix sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of the matrix sub( A ) contains the lower * triangular matrix, and the strictly upper triangular part * of sub( A ) is not referenced. If DIAG = 'U', the diagonal * elements of sub( A ) are also not referenced and are assumed * to be 1. On exit, the (triangular) inverse of the original * matrix, in the same storage format. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER IACOL, IAROW, ICTXT, ICURR, IDIAG, IIA, IOFFA, $ JJA, LDA, MYCOL, MYROW, NA, NPCOL, NPROW DOUBLE PRECISION AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DSCAL, $ DTRMV, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Compute local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) * IF( UPPER ) THEN * IOFFA = IIA + ( JJA - 1 ) * LDA ICURR = IOFFA + LDA * IF( NOUNIT ) THEN * * Compute inverse of upper non-unit triangular matrix. * A( IOFFA ) = ONE / A( IOFFA ) IDIAG = ICURR + 1 DO 10 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements 1:j-1 of j-th column. * CALL DTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL DSCAL( NA, AJJ, A( ICURR ), 1 ) IDIAG = IDIAG + LDA + 1 ICURR = ICURR + LDA 10 CONTINUE * ELSE * * Compute inverse of upper unit triangular matrix. * DO 20 NA = 1, N-1 * * Compute elements 1:j-1 of j-th column. * CALL DTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL DSCAL( NA, -ONE, A( ICURR ), 1 ) ICURR = ICURR + LDA 20 CONTINUE * END IF * ELSE * ICURR = IIA + N - 1 + ( JJA + N - 2 ) * LDA IOFFA = ICURR - LDA * IF( NOUNIT ) THEN * * Compute inverse of lower non-unit triangular matrix. * A( ICURR ) = ONE / A( ICURR ) IDIAG = IOFFA - 1 DO 30 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements j+1:n of j-th column. * CALL DTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL DSCAL( NA, AJJ, A( IOFFA ), 1 ) ICURR = IDIAG IDIAG = IDIAG - LDA - 1 IOFFA = IDIAG + 1 30 CONTINUE * ELSE * * Compute inverse of lower unit triangular matrix. * DO 40 NA = 1, N-1 * * Compute elements j+1:n of j-th column. * CALL DTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL DSCAL( NA, -ONE, A( IOFFA ), 1 ) ICURR = ICURR - LDA - 1 IOFFA = ICURR - LDA 40 CONTINUE * END IF * END IF * END IF * * End of PDTRTI2 * END scalapack-1.8.0/SRC/pdgebd2.f0000640000175000017500000004257610363532303015437 0ustar muammarmuammar SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PDGEBD2 reduces a real general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an orthogonal transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * orthogonal matrix Q as a product of elementary reflectors, * and the elements above the first superdiagonal, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. If M < N, the diagonal and the * first subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the orthogonal matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) DOUBLE PRECISION array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MpA0, NqA0 ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ) * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J, $ JJ, K, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 DOUBLE PRECISION ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DESCSET, $ DGEBR2D, DGEBS2D, DLARFG, INFOG2L, $ PDLARF, PDLARFG, PDELSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = MAX( MPA0, NQA0 ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( M.EQ.1 .AND. N.EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN I = II+(JJ-1)*DESCA( LLD_ ) CALL DLARFG( 1, A( I ), A( I ), 1, TAUQ( JJ ) ) D( JJ ) = A( I ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1 ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1, IAROW, IACOL ) CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1, IAROW, IACOL ) END IF END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO RETURN END IF * ALPHA = ZERO * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, N I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector H(j) to annihilate * A(ia+i:ia+m-1,j) * CALL PDLARFG( M-K+1, ALPHA, I, J, A, MIN( I+1, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PDELSET( D, 1, J, DESCD, ALPHA ) CALL PDELSET( A, I, J, DESCA, ONE ) * * Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left * CALL PDLARF( 'Left', M-K+1, N-K, A, I, J, DESCA, 1, TAUQ, A, $ I, J+1, DESCA, WORK ) CALL PDELSET( A, I, J, DESCA, ALPHA ) * IF( K.LT.N ) THEN * * Generate elementary reflector G(i) to annihilate * A(i,ja+j+1:ja+n-1) * CALL PDLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, JA+N-1 ), DESCA, DESCA( M_ ), $ TAUP ) CALL PDELSET( E, I, 1, DESCE, ALPHA ) CALL PDELSET( A, I, J+1, DESCA, ONE ) * * Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right * CALL PDLARF( 'Right', M-K, N-K, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP, A, I+1, J+1, DESCA, $ WORK ) CALL PDELSET( A, I, J+1, DESCA, ALPHA ) ELSE CALL PDELSET( TAUP, I, 1, DESCE, ZERO ) END IF 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, M I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector G(i) to annihilate * A(i,ja+j:ja+n-1) * CALL PDLARFG( N-K+1, ALPHA, I, J, A, I, $ MIN( J+1, JA+N-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PDELSET( D, I, 1, DESCD, ALPHA ) CALL PDELSET( A, I, J, DESCA, ONE ) * * Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right * CALL PDLARF( 'Right', M-K, N-K+1, A, I, J, DESCA, $ DESCA( M_ ), TAUP, A, MIN( I+1, IA+M-1 ), J, $ DESCA, WORK ) CALL PDELSET( A, I, J, DESCA, ALPHA ) * IF( K.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:ia+m-1,j) * CALL PDLARFG( M-K, ALPHA, I+1, J, A, $ MIN( I+2, IA+M-1 ), J, DESCA, 1, TAUQ ) CALL PDELSET( E, 1, J, DESCE, ALPHA ) CALL PDELSET( A, I+1, J, DESCA, ONE ) * * Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left * CALL PDLARF( 'Left', M-K, N-K, A, I+1, J, DESCA, 1, TAUQ, $ A, I+1, J+1, DESCA, WORK ) CALL PDELSET( A, I+1, J, DESCA, ALPHA ) ELSE CALL PDELSET( TAUQ, 1, J, DESCE, ZERO ) END IF 20 CONTINUE END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEBD2 * END scalapack-1.8.0/SRC/pcheev.f0000640000175000017500000005551310363532303015375 0ustar muammarmuammar SUBROUTINE PCHEEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) REAL RWORK( * ), W( * ) COMPLEX A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCHEEV computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PCHEEV assumes a homogeneous system and makes * only spot checks of the consistency of the eigenvalues across the * different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Notes * ===== * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distributed * matrix A. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of A. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of A. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of the * distributed matrix A. * LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PCHEEV cannot guarantee * correct error reporting. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * Z (local output) COMPLEX array, * global dimension (N, N), * local dimension (LLD_Z, LOCc(JZ+N-1)) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed to guarantee * completion. If the input parameters are incorrect, WORK(1) * may also be incorrect. * * If JOBZ='N' WORK(1) = minimal workspace for eigenvalues only. * If JOBZ='V' WORK(1) = minimal workspace required to * generate all the eigenvectors. * * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= MAX( NB*( NP0+1 ), 3 ) +3*N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required: * LWORK >= (NP0 + NQ0 + NB)*NB + 3*N + N^2 * * Variable definitions: * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * NQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * RWORK (local workspace/output) COMPLEX array, * dimension (LRWORK) * On output RWORK(1) returns the * REAL workspace needed to * guarantee completion. If the input parameters are incorrect, * RWORK(1) may also be incorrect. * * LRWORK (local input) INTEGER * Size of RWORK array. * If eigenvectors are desired (JOBZ = 'V') then * LRWORK >= 2*N + 2*N-2 * If eigenvectors are not desired (JOBZ = 'N') then * LRWORK >= 2*N * * If LRWORK = -1, the LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the RWORK array. The required workspace is returned * as the first element of RWORK and no error message is issued * by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in CSTEQR2 after a total of 30*N iterations. * If INFO = N+1, then PCHEEV has detected heterogeneity * by finding that eigenvalues were not identical across * the process grid. In this case, the accuracy of * the results from PCHEEV cannot be guaranteed. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER ITHVAL PARAMETER ( ITHVAL = 10 ) * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, $ IINFO, INDD, INDE, INDRD, INDRE, INDRWORK, $ INDTAU, INDWORK, INDWORK2, IROFFA, IROFFZ, $ ISCALE, IZROW, J, K, LDC, LLRWORK, LLWORK, $ LRMIN, LRWMIN, LWMIN, MB_A, MB_Z, MYCOL, $ MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, NP0, $ NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ0, NRC, $ RSIZECSTEQR2, RSRC_A, RSRC_Z, SIZECSTEQR2, $ SIZEPCHETRD, SIZEPCUNMTR REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCQR( 10 ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC, SL_GRIDRESHAPE REAL PCLANHE, PSLAMCH EXTERNAL LSAME, INDXG2P, NUMROC, SL_GRIDRESHAPE, $ PCLANHE, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, CHK1MAT, $ CSTEQR2, DESCINIT, PCELGET, PCGEMR2D, PCHETRD, $ PCHK1MAT, PCHK2MAT, PCLASCL, PCLASET, PCUNMTR, $ PXERBLA, SCOPY, SGAMN2D, SGAMX2D, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, ICHAR, INT, MAX, MIN, MOD, REAL, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * * Initialize pointer to some safe value * INDTAU = 1 INDD = 1 INDE = 1 INDWORK = 1 INDWORK2 = 1 * INDRE = 1 INDRD = 1 INDRWORK = 1 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE IF( WANTZ ) THEN IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A LOWER = LSAME( UPLO, 'L' ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) NP0 = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) IF( WANTZ ) THEN NB_Z = DESCZ( NB_ ) MB_Z = DESCZ( MB_ ) RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * * COMPLEX work space for PCHETRD * CALL PCHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE ), WORK( INDTAU ), $ WORK( INDWORK ), -1, IINFO ) SIZEPCHETRD = INT( ABS( WORK( 1 ) ) ) * * COMPLEX work space for PCUNMTR * IF( WANTZ ) THEN CALL PCUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), -1, IINFO ) SIZEPCUNMTR = INT( ABS( WORK( 1 ) ) ) ELSE SIZEPCUNMTR = 0 END IF * * REAL work space for CSTEQR2 * IF( WANTZ ) THEN RSIZECSTEQR2 = MIN( 1, 2*N-2 ) ELSE RSIZECSTEQR2 = 0 END IF * * Initialize the context of the single column distributed * matrix required by CSTEQR2. This specific distribution * allows each process to do 1/pth of the work updating matrix * Q during CSTEQR2 and achieve some parallelization to an * otherwise serial subroutine. * LDC = 0 IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB_A, MYPROWC, 0, NPROCS ) LDC = MAX( 1, NRC ) CALL DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, LDC, $ INFO ) END IF * * COMPLEX work space for CSTEQR2 * IF( WANTZ ) THEN SIZECSTEQR2 = N*LDC ELSE SIZECSTEQR2 = 0 END IF * * Set up pointers into the WORK array * INDTAU = 1 INDD = INDTAU + N INDE = INDD + N INDWORK = INDE + N INDWORK2 = INDWORK + N*LDC LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDRE = 1 INDRD = INDRE + N INDRWORK = INDRD + N LLRWORK = LRWORK - INDRWORK + 1 * * Compute the total amount of space needed * LRWMIN = 2*N + RSIZECSTEQR2 LWMIN = 3*N + MAX( SIZEPCHETRD, SIZEPCUNMTR, SIZECSTEQR2 ) * END IF IF( INFO.EQ.0 ) THEN IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -16 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 3 IF( WANTZ ) THEN CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, $ JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 3, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = CMPLX( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PCHEEV', -INFO ) IF( WANTZ ) $ CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) THEN IF( WANTZ ) $ CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN END IF * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PCLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PCLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * CALL PCHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDRD ), $ RWORK( INDRE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * DO 10 I = 1, N CALL PCELGET( 'A', ' ', WORK( INDD+I-1 ), A, I+IA-1, I+JA-1, $ DESCA ) RWORK( INDRD+I-1 ) = REAL( WORK( INDD+I-1 ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDE+I-1 ), A, I+IA-1, I+JA, $ DESCA ) RWORK( INDRE+I-1 ) = REAL( WORK( INDE+I-1 ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PCELGET( 'A', ' ', WORK( INDE+I-1 ), A, I+IA, I+JA-1, $ DESCA ) RWORK( INDRE+I-1 ) = REAL( WORK( INDE+I-1 ) ) 30 CONTINUE END IF * IF( WANTZ ) THEN * CALL PCLASET( 'Full', N, N, CZERO, CONE, WORK( INDWORK ), 1, 1, $ DESCQR ) * * CSTEQR2 is a modified version of LAPACK's CSTEQR. The * modifications allow each process to perform partial updates * to matrix Q. * CALL CSTEQR2( 'I', N, RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDWORK ), LDC, NRC, RWORK( INDRWORK ), $ INFO ) * CALL PCGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, $ DESCZ, CONTEXTC ) * CALL PCUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) * ELSE * CALL CSTEQR2( 'N', N, RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDWORK ), 1, 1, RWORK( INDRWORK ), INFO ) END IF * * Copy eigenvalues from workspace to output array * CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL SSCAL( N, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = REAL( LWMIN ) * * Free up resources * IF( WANTZ ) THEN CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF( N.LE.ITHVAL ) THEN J = N K = 1 ELSE J = N / ITHVAL K = ITHVAL END IF * LRMIN = INT( RWORK( 1 ) ) INDTAU = 0 INDE = INDTAU + J DO 40 I = 1, J RWORK( I+INDTAU ) = W( ( I-1 )*K+1 ) RWORK( I+INDE ) = W( ( I-1 )*K+1 ) 40 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'All', ' ', J, 1, RWORK( 1+INDTAU ), $ J, 1, 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'All', ' ', J, 1, RWORK( 1+INDE ), $ J, 1, 1, -1, -1, 0 ) * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( RWORK( I+INDTAU )-RWORK( I+INDE ).NE. $ ZERO ) ) THEN INFO = N + 1 END IF 50 CONTINUE RWORK( 1 ) = LRMIN * RETURN * * End of PCHEEV * END scalapack-1.8.0/SRC/pdormtr.f0000640000175000017500000004044310363532303015606 0ustar muammarmuammar SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMTR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of nq-1 elementary reflectors, as returned by PDSYTRD: * * if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); * * if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(IA:*,JA:*) contains elementary * reflectors from PDSYTRD; * = 'L': Lower triangle of A(IA:*,JA:*) contains elementary * reflectors from PDSYTRD. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * or (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PDSYTRD. * If SIDE = 'L', LLD_A >= max(1,LOCr(IA+M-1)); * if SIDE = 'R', LLD_A >= max(1,LOCr(IA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LTAU, where * if SIDE = 'L' and UPLO = 'U', LTAU = LOCc(M_A), * if SIDE = 'L' and UPLO = 'L', LTAU = LOCc(JA+M-2), * if SIDE = 'R' and UPLO = 'U', LTAU = LOCc(N_A), * if SIDE = 'R' and UPLO = 'L', LTAU = LOCc(JA+N-2). * TAU(i) must contain the scalar factor of the elementary * reflector H(i), as returned by PDSYTRD. TAU is tied to the * distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * If UPLO = 'U', * IAA = IA, JAA = JA+1, ICC = IC, JCC = JC; * else UPLO = 'L', * IAA = IA+1, JAA = JA; * if SIDE = 'L', * ICC = IC+1; JCC = JC; * else * ICC = IC; JCC = JC+1; * end if * end if * * If SIDE = 'L', * MI = M-1; NI = N; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; MI = N-1; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN, UPPER INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDORMQL, $ PDORMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) * IF( UPPER ) THEN IAA = IA JAA = JA+1 ICC = IC JCC = JC ELSE IAA = IA+1 JAA = JA IF( LEFT ) THEN ICC = IC + 1 JCC = JC ELSE ICC = IC JCC = JC + 1 END IF END IF * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = M - 1 NI = N CALL CHK1MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, INFO ) ELSE NQ = N MI = M NI = N - 1 CALL CHK1MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, INFO ) END IF CALL CHK1MAT( MI, 4, NI, 5, ICC, JCC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -3 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'T' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMTR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) $ RETURN * IF( UPPER ) THEN * * Q was determined by a call to PDSYTRD with UPLO = 'U' * CALL PDORMQL( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * ELSE * * Q was determined by a call to PDSYTRD with UPLO = 'L' * CALL PDORMQR( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMTR * END scalapack-1.8.0/SRC/psorgqr.f0000640000175000017500000003067610363532303015623 0ustar muammarmuammar SUBROUTINE PSORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGQR generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PSGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PSGEQRF in the K columns of its distributed * matrix argument A(IA:*,JA:JA+K-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+K-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JL, $ JN, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLARFB, $ PSLARFT, PSLASET, PSORG2R, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL PSLASET( 'All', JL-JA, JA+N-JL, ZERO, ZERO, A, IA, JL, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PSORG2R( M-JL+JA, JA+N-JL, JA+K-JL, A, IA+JL-JA, JL, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of columns to loop over ? * IF( JL.GT.JN+1 ) THEN * * Use blocked code * DO 10 J = JL-DESCA( NB_ ), JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PSLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, $ J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PSLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-I+IA, N-J-JB+JA, JB, A, I, $ J, DESCA, WORK, A, I, J+JB, DESCA, $ WORK( IPW ) ) END IF * * Apply H to rows i:ia+m-1 of current block * CALL PSORG2R( M-I+IA, JB, JB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia:i-1 of current block to zero * CALL PSLASET( 'All', I-IA, JB, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * END IF * * Handle first block separately * IF( JL.GT.JA ) THEN * JB = JN - JA + 1 * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PSLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PSLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, $ JA+JB, DESCA, WORK( IPW ) ) * * Apply H to rows ia:ia+m-1 of current block * CALL PSORG2R( M, JB, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGQR * END scalapack-1.8.0/SRC/pdtzrzf.f0000640000175000017500000003104210363532303015615 0ustar muammarmuammar SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper triangular form by means * of orthogonal transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N orthogonal matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements M+1 to N of the first M rows of * sub( A ), with the array TAU, represent the orthogonal matrix * Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), which is used to introduce zeros into * the (m - k + 1)th row of sub( A ), is given in the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IL, IN, IPW, $ IROFFA, J, JM1, L, LWMIN, MP0, MYCOL, MYROW, $ NPCOL, NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG1L, PCHK1MAT, $ PDLATRZ, PDLARZB, PDLARZT, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -9 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTZRZF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ IIA, IAROW ) IF( MYROW.EQ.IAROW ) $ MP0 = MP0 - IROFFA DO 10 I = IIA, IIA+MP0-1 TAU( I ) = ZERO 10 CONTINUE * ELSE * L = N-M JM1 = JA + MIN( M+1, N ) - 1 IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * * Use blocked code initially * DO 20 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) J = JA + I - IA * * Compute the complete orthogonal factorization of the current * block A(i:i+ib-1,j:ja+n-1) * CALL PDLATRZ( IB, JA+N-J, L, A, I, J, DESCA, TAU, WORK ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PDLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,j:ja+n-1) from the right * CALL PDLARZB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, JA+N-J, IB, L, A, I, JM1, $ DESCA, WORK, A, IA, J, DESCA, WORK( IPW ) ) END IF * 20 CONTINUE * * Use unblocked code to factor the last or only block * CALL PDLATRZ( IN-IA+1, N, N-M, A, IA, JA, DESCA, TAU, WORK ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDTZRZF * END scalapack-1.8.0/SRC/pdgeql2.f0000640000175000017500000003007010363532303015450 0ustar muammarmuammar SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQL2 computes a QL factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DGEBR2D, $ DGEBS2D, DLARFG, DSCAL, INFOG2L, $ PDELSET, PDLARF, PDLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IACOL = INDXG2P( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN I = II+(JJ+NQ-2)*DESCA( LLD_ ) AJJ = A( I ) CALL DLARFG( 1, AJJ, A( I ), 1, TAU( JJ+NQ-1 ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ+NQ-1 ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL DSCAL( NQ-1, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL DSCAL( NQ, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1, IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA+K-1, JA, -1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(ia:i+m-k-1,j+n-k) * CALL PDLARFG( M-K+I-IA+1, AJJ, M-K+I, N-K+J, A, IA, $ N-K+J, DESCA, 1, TAU ) * * Apply H(j) to A(ia:i+m-k,ja:j+n-k-1) from the left * CALL PDELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PDLARF( 'Left', M-K+I-IA+1, N-K+J-JA, A, IA, N-K+J, $ DESCA, 1, TAU, A, IA, JA, DESCA, WORK ) CALL PDELSET( A, I+M-K, J+N-K, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEQL2 * END scalapack-1.8.0/SRC/psdbsv.f0000640000175000017500000004511310363532303015417 0ustar muammarmuammar SUBROUTINE PSDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSDBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PSDBTRF and PSDBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * +max((max(bwl,bwu)*NRHS), max(bwl,bwu)*max(bwl,bwu)) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PSDBTRF, PSDBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PSDBTRF and PSDBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PSDBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Factor the matrix * CALL PSDBTRF( N, BWL, BWU, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PSDBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PSDBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBSV', -INFO ) RETURN END IF * RETURN * * End of PSDBSV * END scalapack-1.8.0/SRC/pzlarf.f0000640000175000017500000007015010363532303015413 0ustar muammarmuammar SUBROUTINE PZLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARF applies a complex elementary reflector Q to a complex M-by-N * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the * left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST COMPLEX*16 TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBZTRNV, $ ZCOPY, ZGEBR2D, ZGEBS2D, ZGEMV, $ ZGERC, ZGERV2D, ZGESD2D, ZGSUM2D, $ ZLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL ZCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL ZCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MP+1 CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFV.GT.0 .AND. IOFFC.GT.0 ) $ CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL ZCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 IF( IOFFV.GT.0 ) $ CALL ZCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQ+1 CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFC.GT.0 ) $ CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PZLARF * END scalapack-1.8.0/SRC/pslarzb.f0000640000175000017500000005557710363532303015612 0ustar muammarmuammar SUBROUTINE PSLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) REAL C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARZB applies a real block reflector Q or its transpose Q**T to * a real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) * from the left or the right. * * Q is a product of k elementary reflectors as returned by PSTZRZF. * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * DIRECT (global input) CHARACTER * Indicates how H is formed from a product of elementary * reflectors * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise (not supported yet) * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) REAL pointer into the local memory * to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L', * (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local * pieces of the distributed vectors V representing the * Householder transformation as returned by PSTZRZF. * LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) REAL array, dimension MB_V by MB_V * The lower triangular matrix T in the representation of the * block reflector. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) REAL array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LEFT CHARACTER COLBTOP, TRANST INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2, $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV, $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL, $ IVROW, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV, $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20, $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV, $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, INFOG2L, $ PBSMATADD, PBSTRAN, PB_TOPGET, PXERBLA, $ SGEBR2D, SGEBS2D, SGEMM, $ SGSUM2D, SLACPY, SLASET, STRBR2D, $ STRBS2D, STRMM * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -4 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * LEFT = LSAME( SIDE, 'L' ) IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) ICOFFV = MOD( JV-1, NBV ) NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFV = IIV + ( JJV-1 ) * LDV MBC = DESCC( MB_ ) NBC = DESCC( NB_ ) NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1, $ JJC1, ICROW1, ICCOL1 ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NQCALL ) ) * IF( LEFT ) THEN IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, MBC ) MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = ICOFFC1 NQC2 = NQC1 ELSE IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = IROFFC1 MPC2 = MPC1 ICOFFC2 = MOD( JC+N-L-1, NBC ) NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NQCALL ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC2 = ICOFFV is required by the current transposition * routine PBSTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW2 ) THEN MPC20 = MPC2 + IROFFC2 ELSE MPC20 = MPC2 END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBSTRAN * IPV = 1 IPW = IPV + MPC20 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC20 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL SLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW+ICOFFV*LW ), LW ) ELSE CALL SLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW ), LW ) END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K * CALL PBSTRAN( ICTXT, 'Rowwise', 'Transpose', K, M+ICOFFV, $ DESCV( NB_ ), WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K * IF( MYROW.EQ.ICROW2 ) $ IPV = IPV + IROFFC2 * * WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V' * WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K * LW = MAX( 1, NQC2 ) * IF( MPC2.GT.0 ) THEN CALL SGEMM( 'Transpose', 'No transpose', NQC2, K, MPC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', NQC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 10 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBSMATADD( ICTXT, 'Transpose', NQC2, IINXT-IIBEG+1, $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE, $ WORK( IPW+ITOP ), LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 10 END IF END IF * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL STRBS2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV ) ELSE CALL STRBR2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL STRMM( 'Right', 'Lower', TRANST, 'Non unit', NQC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 20 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBSMATADD( ICTXT, 'Transpose', IINXT-IIBEG+1, NQC2, $ -ONE, WORK( IPW+ITOP ), LW, ONE, $ C( IIBEG+(JJC1-1)*LDC ), LDC ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 20 END IF END IF * * C2 C2 - V' * W' * C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )' * MPC2 x NQC2 MPC2 x K K x NQC2 * CALL SGEMM( 'No transpose', 'Transpose', MPC2, NQC2, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC2 ), LDC ) * ELSE * * Form sub( C ) * Q or sub( C ) * Q' * * Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2 * WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC2 LV = MAX( 1, K ) LW = MAX( 1, MPC2 ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL STRBS2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV ) CALL SLACPY( 'All', K, NQC2, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL STRBR2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * * WORK( IPV ) is K x NQC2 = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K * IF( NQC2.GT.0 ) THEN CALL SGEMM( 'No Transpose', 'Transpose', MPC2, K, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', MPC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 30 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBSMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC, ONE, $ WORK( IPW+ILEFT*LW ), LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 30 END IF END IF * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL STRMM( 'Right', 'Lower', TRANS, 'Non unit', MPC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 40 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBSMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, -ONE, $ WORK( IPW+ILEFT*LW ), LW, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 40 END IF END IF * * C2 C2 - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC2 x NQC2 MPC2 x K K x NQC2 * IF( IOFFC2.GT.0 ) $ CALL SGEMM( 'No transpose', 'No transpose', MPC2, NQC2, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC2 ), LDC ) * END IF * RETURN * * End of PSLARZB * END scalapack-1.8.0/SRC/pchengst.f0000640000175000017500000004170010363532303015727 0ustar muammarmuammar SUBROUTINE PCHENGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ) * .. * * Purpose * * ======= * * PCHENGST reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * * PCHENGST performs the same function as PCHEGST, but is based on * rank 2K updates, which are faster and more scalable than * triangular solves (the basis of PCHENGST). * * PCHENGST calls PCHEGST when UPLO='U', hence PCHENGST provides * improved performance only when UPLO='L', IBTYPE=1. * * PCHENGST also calls PCHEGST when insufficient workspace is * provided, hence PCHENGST provides improved * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PCPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PCPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) REAL * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB ) * * When IBTYPE = 1 and UPLO = 'L', PCHENGST provides improved * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * where NB = MB_A = NB_A, * NP0 = NUMROC( N, NB, 0, 0, NPROW ), * NQ0 = NUMROC( N, NB, 0, 0, NPROW ), * * NUMROC ia a ScaLAPACK tool functions * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * * .. Parameters .. COMPLEX ONEHALF, ONE, MONE REAL RONE PARAMETER ( ONEHALF = ( 0.5E0, 0.0E0 ), $ ONE = ( 1.0E0, 0.0E0 ), $ MONE = ( -1.0E0, 0.0E0 ), RONE = 1.0E0 ) INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( DLEN_ = 9, CTXT_ = 2, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, INDAA, INDG, INDR, INDRT, IROFFA, $ IROFFB, J, K, KB, LWMIN, LWOPT, MYCOL, MYROW, $ NB, NP0, NPCOL, NPK, NPROW, NQ0, POSTK * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ), $ DESCR( DLEN_ ), DESCRT( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL LSAME, INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCGEMM, $ PCHEGST, PCHEMM, PCHER2K, PCHK2MAT, PCLACPY, $ PCTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) SCALE = 1.0E0 * NB = DESCA( MB_ ) * * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) LWMIN = MAX( NB*( NP0+1 ), 3*NB ) IF( IBTYPE.EQ.1 .AND. .NOT.UPPER ) THEN LWOPT = 2*NP0*NB + NQ0*NB + NB*NB ELSE LWOPT = LWMIN END IF WORK( 1 ) = CMPLX( REAL( LWOPT ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHENGST', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PCHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) RETURN END IF * CALL DESCSET( DESCG, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCR, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCRT, NB, N, NB, NB, IAROW, IACOL, ICTXT, NB ) CALL DESCSET( DESCAA, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * INDG = 1 INDR = INDG + DESCG( LLD_ )*NB INDAA = INDR + DESCR( LLD_ )*NB INDRT = INDAA + DESCAA( LLD_ )*NB * DO 30 K = 1, N, NB * KB = MIN( N-K+1, NB ) POSTK = K + KB NPK = N - POSTK + 1 * * CALL PCLACPY( 'A', N-POSTK+1, KB, B, POSTK+IB-1, K+JB-1, DESCB, $ WORK( INDG ), POSTK, 1, DESCG ) CALL PCLACPY( 'A', N-POSTK+1, KB, A, POSTK+IA-1, K+JA-1, DESCA, $ WORK( INDR ), POSTK, 1, DESCR ) CALL PCLACPY( 'A', KB, K-1, A, K+IA-1, JA, DESCA, $ WORK( INDRT ), 1, 1, DESCRT ) * CALL PCLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDR ), K, 1, DESCR ) CALL PCTRSM( 'Right', 'L', 'N', 'N', NPK, KB, MONE, B, K+IB-1, $ K+JB-1, DESCB, WORK( INDG ), POSTK, 1, DESCG ) * CALL PCHEMM( 'Right', 'L', NPK, KB, ONEHALF, A, K+IA-1, K+JA-1, $ DESCA, WORK( INDG ), POSTK, 1, DESCG, ONE, $ WORK( INDR ), POSTK, 1, DESCR ) * CALL PCHER2K( 'Lower', 'No T', NPK, KB, ONE, WORK( INDG ), $ POSTK, 1, DESCG, WORK( INDR ), POSTK, 1, DESCR, $ RONE, A, POSTK+IA-1, POSTK+JA-1, DESCA ) * CALL PCGEMM( 'No T', 'No Conj', NPK, K-1, KB, ONE, $ WORK( INDG ), POSTK, 1, DESCG, WORK( INDRT ), 1, $ 1, DESCRT, ONE, A, POSTK+IA-1, JA, DESCA ) * CALL PCHEMM( 'Right', 'L', NPK, KB, ONE, WORK( INDR ), K, 1, $ DESCR, WORK( INDG ), POSTK, 1, DESCG, ONE, A, $ POSTK+IA-1, K+JA-1, DESCA ) * CALL PCTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, K-1, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, K+IA-1, JA, $ DESCA ) * CALL PCLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDAA ), 1, 1, DESCAA ) * IF( MYROW.EQ.DESCAA( RSRC_ ) .AND. MYCOL.EQ.DESCAA( CSRC_ ) ) $ THEN DO 20 I = 1, KB DO 10 J = 1, I WORK( INDAA+J-1+( I-1 )*DESCAA( LLD_ ) ) $ = CONJG( WORK( INDAA+I-1+( J-1 )*DESCAA( LLD_ ) ) ) 10 CONTINUE 20 CONTINUE END IF * CALL PCTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, $ 1, DESCAA ) * CALL PCTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', KB, KB, ONE, $ B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, 1, $ DESCAA ) * CALL PCLACPY( 'L', KB, KB, WORK( INDAA ), 1, 1, DESCAA, A, $ K+IA-1, K+JA-1, DESCA ) * CALL PCTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', NPK, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, POSTK+IA-1, $ K+JA-1, DESCA ) * DESCR( CSRC_ ) = MOD( DESCR( CSRC_ )+1, NPCOL ) DESCG( CSRC_ ) = MOD( DESCG( CSRC_ )+1, NPCOL ) DESCRT( RSRC_ ) = MOD( DESCRT( RSRC_ )+1, NPROW ) DESCAA( RSRC_ ) = MOD( DESCAA( RSRC_ )+1, NPROW ) DESCAA( CSRC_ ) = MOD( DESCAA( CSRC_ )+1, NPCOL ) 30 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWOPT ) ) * RETURN END scalapack-1.8.0/SRC/clanv2.f0000640000175000017500000000667210363532303015312 0ustar muammarmuammar SUBROUTINE CLANV2( A, B, C, D, RT1, RT2, CS, SN ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. REAL CS COMPLEX A, B, C, D, RT1, RT2, SN * .. * * Purpose * ======= * * CLANV2 computes the Schur factorization of a complex 2-by-2 * nonhermitian matrix in standard form: * * [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] * [ C D ] [ SN CS ] [ 0 DD ] [-SN CS ] * * Arguments * ========= * * A (input/output) COMPLEX * B (input/output) COMPLEX * C (input/output) COMPLEX * D (input/output) COMPLEX * On entry, the elements of the input matrix. * On exit, they are overwritten by the elements of the * standardised Schur form. * * RT1 (output) COMPLEX * RT2 (output) COMPLEX * The two eigenvalues. * * CS (output) REAL * SN (output) COMPLEX * Parameters of the rotation matrix. * * Further Details * =============== * * Implemented by Mark R. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. REAL RZERO, HALF, RONE PARAMETER ( RZERO = 0.0E+0, HALF = 0.5E+0, $ RONE = 1.0E+0 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. COMPLEX AA, BB, DD, T, TEMP, TEMP2, U, X, Y * .. * .. External Functions .. COMPLEX CLADIV EXTERNAL CLADIV * .. * .. External Subroutines .. EXTERNAL CLARTG * .. * .. Intrinsic Functions .. INTRINSIC REAL, CMPLX, CONJG, AIMAG, SQRT * .. * .. Executable Statements .. * * Initialize CS and SN * CS = RONE SN = ZERO * IF( C.EQ.ZERO ) THEN GO TO 10 * ELSE IF( B.EQ.ZERO ) THEN * * Swap rows and columns * CS = RZERO SN = ONE TEMP = D D = A A = TEMP B = -C C = ZERO GO TO 10 ELSE IF( ( A-D ).EQ.ZERO ) THEN TEMP = SQRT( B*C ) A = A + TEMP D = D - TEMP IF( ( B+C ).EQ.ZERO ) THEN CS = SQRT( HALF ) SN = CMPLX( RZERO, RONE )*CS ELSE TEMP = SQRT( B+C ) TEMP2 = CLADIV( SQRT( B ), TEMP ) CS = REAL( TEMP2 ) SN = CLADIV( SQRT( C ), TEMP ) END IF B = B - C C = ZERO GO TO 10 ELSE * * Compute eigenvalue closest to D * T = D U = B*C X = HALF*( A-T ) Y = SQRT( X*X+U ) IF( REAL( X )*REAL( Y )+AIMAG( X )*AIMAG( Y ).LT.RZERO ) $ Y = -Y T = T - CLADIV( U, ( X+Y ) ) * * Do one QR step with exact shift T - resulting 2 x 2 in * triangular form. * CALL CLARTG( A-T, C, CS, SN, AA ) * D = D - T BB = CS*B + SN*D DD = -CONJG( SN )*B + CS*D * A = AA*CS + BB*CONJG( SN ) + T B = -AA*SN + BB*CS C = ZERO D = T * END IF * 10 CONTINUE * * Store eigenvalues in RT1 and RT2. * RT1 = A RT2 = D RETURN * * End of CLANV2 * END scalapack-1.8.0/SRC/pspotf2.f0000640000175000017500000003053610363532303015516 0ustar muammarmuammar SUBROUTINE PSPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSPOTF2 computes the Cholesky factorization of a real symmetric * positive definite distributed matrix sub( A )=A(IA:IA+N-1,JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURR, IDIAG, IIA, $ IOFFA, IROFF, J, JJA, LDA, MYCOL, MYROW, $ NPCOL, NPROW REAL AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PB_TOPGET, PXERBLA, SGEMV, $ SSCAL * .. * .. Intrinsic Functions .. INTRINSIC MOD, SQRT * .. * .. External Functions .. LOGICAL LSAME REAL SDOT EXTERNAL LSAME, SDOT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Compute local information * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF ( UPPER ) THEN * * Process (IAROW, IACOL) owns block to be factorized * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN * * Compute the Cholesky factorization A = U'*U. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 10 J = JA, JA+N-1 * * Compute U(J,J) and test for non-positive-definiteness. * AJJ = A( IDIAG ) - $ SDOT( J-JA, A( IOFFA ), 1, A( IOFFA ), 1 ) IF( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 20 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of row J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + LDA CALL SGEMV( 'Transpose', J-JA, JA+N-J-1, -ONE, $ A( IOFFA+LDA ), LDA, A( IOFFA ), 1, $ ONE, A( ICURR ), LDA ) CALL SSCAL( N-J+JA-1, ONE / AJJ, A( ICURR ), LDA ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE * 20 CONTINUE * * Broadcast INFO to all processes in my IAROW. * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) END IF * * IAROW bcasts along columns so that everyone has INFO * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1, $ IAROW, MYCOL ) * END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Compute the Cholesky factorization A = L*L'. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 30 J = JA, JA+N-1 * * Compute L(J,J) and test for non-positive-definiteness. * AJJ = A( IDIAG ) - $ SDOT( J-JA, A( IOFFA ), LDA, A( IOFFA ), LDA ) IF ( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 40 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of column J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + 1 CALL SGEMV( 'No transpose', JA+N-J-1, J-JA, -ONE, $ A( IOFFA+1 ), LDA, A( IOFFA ), LDA, $ ONE, A( ICURR ), 1 ) CALL SSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), 1 ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 30 CONTINUE * 40 CONTINUE * * Broadcast INFO to everyone in IACOL * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1, IAROW, MYCOL ) * END IF * * IACOL bcasts INFO along rows so that everyone has it * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) * END IF * END IF * RETURN * * End of PSPOTF2 * END scalapack-1.8.0/SRC/pclawil.f0000640000175000017500000002417210363532303015553 0ustar muammarmuammar SUBROUTINE PCLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER II, JJ, M COMPLEX H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), V( * ) * .. * * Purpose * ======= * * PCLAWIL gets the transform given by H44,H33, & H43H34 into V * starting at row M. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * II (global input) INTEGER * Row owner of H(M+2,M+2) * * JJ (global input) INTEGER * Column owner of H(M+2,M+2) * * M (global input) INTEGER * On entry, this is where the transform starts (row M.) * Unchanged on exit. * * A (global input) COMPLEX array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * Unchanged on exit. * * H44 * H33 * H43H34 (global input) COMPLEX * These three values are for the double shift QR iteration. * Unchanged on exit. * * V (global output) COMPLEX array of size 3. * Contains the transform on ouput. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, ICOL, IROW, JSRC, LDA, LEFT, $ MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, $ RSRC, UP REAL S COMPLEX CDUM, H11, H12, H21, H22, H33S, H44S, V1, V2, $ V3 * .. * .. Local Arrays .. COMPLEX BUF( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, CGERV2D, CGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, AIMAG, MOD * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * On node (II,JJ) collect all DIA,SUP,SUB info from M, M+1 * MODKM1 = MOD( M+1, HBL ) IF( MODKM1.EQ.0 ) THEN IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) CALL CGESD2D( CONTXT, 1, 1, BUF, 1, II, JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) BUF( 2 ) = A( ( ICOL-1 )*LDA+IROW+1 ) BUF( 3 ) = A( ICOL*LDA+IROW ) BUF( 4 ) = A( ICOL*LDA+IROW+1 ) CALL CGESD2D( CONTXT, 4, 1, BUF, 4, II, JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NPCOL.GT.1 ) THEN CALL CGERV2D( CONTXT, 1, 1, V3, 1, MYROW, LEFT ) ELSE V3 = A( ( ICOL-2 )*LDA+IROW ) END IF IF( NUM.GT.1 ) THEN CALL CGERV2D( CONTXT, 4, 1, BUF, 4, UP, LEFT ) H11 = BUF( 1 ) H21 = BUF( 2 ) H12 = BUF( 3 ) H22 = BUF( 4 ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) END IF END IF END IF IF( MODKM1.EQ.1 ) THEN IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) CALL CGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. ( NPROW.GT.1 ) ) $ THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL CGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL CGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NUM.GT.1 ) THEN CALL CGERV2D( CONTXT, 1, 1, H11, 1, UP, LEFT ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) END IF IF( NPROW.GT.1 ) THEN CALL CGERV2D( CONTXT, 1, 1, H12, 1, UP, MYCOL ) ELSE H12 = A( ( ICOL-2 )*LDA+IROW-2 ) END IF IF( NPCOL.GT.1 ) THEN CALL CGERV2D( CONTXT, 1, 1, H21, 1, MYROW, LEFT ) ELSE H21 = A( ( ICOL-3 )*LDA+IROW-1 ) END IF H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF END IF IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) $ RETURN * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF * H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 * RETURN * * End of PCLAWIL * END scalapack-1.8.0/SRC/psgeqrf.f0000640000175000017500000002742310363532303015571 0ustar muammarmuammar SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQRF computes a QR factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i:ia+m-1,ja+i-1), * and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IINFO, IPW, J, $ JB, JN, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSGEQR2, $ PSLARFB, PSLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JB = JN - JA + 1 * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) * CALL PSGEQR2( M, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( JA+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ja) H(ja+1) . . . H(jn) * CALL PSLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PSLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', M, $ N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, JA+JB, $ DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of columns * DO 10 J = JN+1, JA+K-1, DESCA( NB_ ) JB = MIN( K-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) * CALL PSGEQR2( M-J+JA, JB, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PSLARFT( 'Forward', 'Columnwise', M-J+JA, JB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PSLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', $ M-J+JA, N-J-JB+JA, JB, A, I, J, DESCA, WORK, $ A, I, J+JB, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEQRF * END scalapack-1.8.0/SRC/pztrtri.f0000640000175000017500000003070410363532303015634 0ustar muammarmuammar SUBROUTINE PZTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZTRTRI computes the inverse of a upper or lower triangular * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the distributed matrix sub( A ) is upper * or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) * is unit triangular: * = 'N': Non-unit triangular, * = 'U': Unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * triangular matrix sub( A ). If UPLO = 'U', the leading * N-by-N upper triangular part of the matrix sub( A ) contains * the upper triangular matrix to be inverted, and the strictly * lower triangular part of sub( A ) is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * the matrix sub( A ) contains the lower triangular matrix, * and the strictly upper triangular part of sub( A ) is not * referenced. * On exit, the (triangular) inverse of the original matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, A(IA+K-1,JA+K-1) is exactly zero. The * triangular matrix sub( A ) is singular and its * inverse can not be computed. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER I, ICOFF, ICTXT, IROFF, ICURCOL, ICURROW, $ IDUMMY, II, IOFFA, J, JB, JJ, JN, LDA, MYCOL, $ MYROW, NN, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK1MAT, PXERBLA, PZTRTI2, PZTRMM, $ PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) * CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOUNIT ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 * CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTRTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) * * Handle first block separately * JB = JN-JA+1 LDA = DESCA( LLD_ ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 10 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 IOFFA = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 20 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 IOFFA = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Use blocked code * IF( UPPER ) THEN * * Compute inverse of upper triangular matrix * JB = JN-JA+1 * * Handle first block of column separately * CALL PZTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * * Loop over remaining block of columns * DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Compute rows 1:j-1 of current block column * CALL PZTRMM( 'Left', UPLO, 'No transpose', DIAG, J-JA, JB, $ ONE, A, IA, JA, DESCA, A, IA, J, DESCA ) CALL PZTRSM( 'Right', UPLO, 'No transpose', DIAG, J-JA, $ JB, -ONE, A, I, J, DESCA, A, IA, J, DESCA ) * * Compute inverse of current diagonal block * CALL PZTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 40 CONTINUE * ELSE * * Compute inverse of lower triangular matrix * NN = ( ( JA+N-2 ) / DESCA( NB_ ) )*DESCA( NB_ ) + 1 DO 50 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA IF( J+JB.LE.JA+N-1 ) THEN * * Compute rows j+jb:ja+n-1 of current block column * CALL PZTRMM( 'Left', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, ONE, A, I+JB, J+JB, DESCA, $ A, I+JB, J, DESCA ) CALL PZTRSM( 'Right', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, -ONE, A, I, J, DESCA, $ A, I+JB, J, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PZTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 50 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 IF( JA+JB.LE.JA+N-1 ) THEN * * Compute rows ja+jb:ja+n-1 of current block column * CALL PZTRMM( 'Left', UPLO, 'No transpose', DIAG, N-JB, JB, $ ONE, A, IA+JB, JA+JB, DESCA, A, IA+JB, JA, $ DESCA ) CALL PZTRSM( 'Right', UPLO, 'No transpose', DIAG, N-JB, JB, $ -ONE, A, IA, JA, DESCA, A, IA+JB, JA, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PZTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * END IF * RETURN * * End PZTRTRI * END scalapack-1.8.0/SRC/ddttrsv.f0000640000175000017500000001135710363532303015613 0ustar muammarmuammar SUBROUTINE DDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, $ B, LDB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from DGTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * DDTTRSV solves one of the systems of equations * L * X = B, L**T * X = B, or L**H * X = B, * U * X = B, U**T * X = B, or U**H * X = B, * with factors of the tridiagonal matrix A from the LU factorization * computed by DDTTRF. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether to solve with L or U. * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * DL (input) COMPLEX array, dimension (N-1) * The (n-1) multipliers that define the matrix L from the * LU factorization of A. * * D (input) COMPLEX array, dimension (N) * The n diagonal elements of the upper triangular matrix U from * the LU factorization of A. * * DU (input) COMPLEX array, dimension (N-1) * The (n-1) elements of the first superdiagonal of U. * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, B is overwritten by the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL LOWER, NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) LOWER = LSAME( UPLO, 'L' ) IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN INFO = -9 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( NOTRAN ) THEN * IF( LOWER ) THEN * Solve L*X = B, overwriting B with X. * DO 35 J = 1, NRHS * * Solve L*x = b. * DO 10 I = 1, N - 1 B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) 10 CONTINUE 35 CONTINUE * ELSE * Solve U*x = b. * DO 30 J = 1, NRHS B( N, J ) = B( N, J ) / D( N ) IF( N.GT.1 ) $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / $ D( N-1 ) DO 20 I = N - 2, 1, -1 B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J ) ) / D( I ) 20 CONTINUE 30 CONTINUE * ENDIF * ELSE * IF( .NOT. LOWER ) THEN * Solve U**T * X = B, overwriting B with X. * DO 65 J = 1, NRHS * * Solve U**T * x = b. * B( 1, J ) = B( 1, J ) / D( 1 ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) DO 40 I = 3, N B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J ) ) / D( I ) 40 CONTINUE 65 CONTINUE * ELSE * * Solve L**T * X = B, overwriting B with X. DO 60 J = 1, NRHS * * Solve L**T * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) 50 CONTINUE 60 CONTINUE ENDIF END IF * * End of DDTTRSV * END scalapack-1.8.0/SRC/pdlatrs.f0000640000175000017500000000531410363532303015566 0ustar muammarmuammar SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION A( * ), CNORM( * ), $ X( * ), WORK( * ) * .. * * Purpose * ======= * * PDLATRS solves a triangular system. This routine in unfinished * at this time, but will be part of the next release. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * * .. Local Scalars .. INTEGER ICTXT, IIX, IROFF, JJX, MYCOL, MYROW, NP, $ NPCOL, NPROW, LDX, IXCOL, IXROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, INFOG2L, $ PDTRSV * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ***** NO SCALING ***** Call PDTRSV for all cases ***** * SCALE = ONE CALL PDTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX(MB_) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IXCOL ) THEN CALL DGEBS2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX ) ELSE CALL DGEBR2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX, MYROW, IXCOL ) END IF * RETURN * * End of PDLATRS * END scalapack-1.8.0/SRC/pdgeqrf.f0000640000175000017500000002746610363532303015561 0ustar muammarmuammar SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQRF computes a QR factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i:ia+m-1,ja+i-1), * and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IINFO, IPW, J, $ JB, JN, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDGEQR2, $ PDLARFB, PDLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JB = JN - JA + 1 * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) * CALL PDGEQR2( M, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( JA+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ja) H(ja+1) . . . H(jn) * CALL PDLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PDLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', M, $ N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, JA+JB, $ DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of columns * DO 10 J = JN+1, JA+K-1, DESCA( NB_ ) JB = MIN( K-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) * CALL PDGEQR2( M-J+JA, JB, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PDLARFT( 'Forward', 'Columnwise', M-J+JA, JB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PDLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', $ M-J+JA, N-J-JB+JA, JB, A, I, J, DESCA, WORK, $ A, I, J+JB, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEQRF * END scalapack-1.8.0/SRC/pdlaed3.f0000640000175000017500000002662310363532303015437 0ustar muammarmuammar SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, $ INDXR, INDXC, CTOT, NPCOL, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL DOUBLE PRECISION RHO * .. * .. Array Arguments .. INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ), $ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * ) DOUBLE PRECISION BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ), $ W( * ), Z( * ) * .. * * Purpose * ======= * * PDLAED3 finds the roots of the secular equation, as defined by the * values in D, W, and RHO, between 1 and K. It makes the * appropriate calls to SLAED4 * * This code makes very mild assumptions about floating point * arithmetic. It will work on machines with a guard digit in * add/subtract, or on those binary machines without guard digits * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. * It could conceivably fail on hexadecimal or decimal machines * without guard digits, but we know of none. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * K (output) INTEGER * The number of non-deflated eigenvalues, and the order of the * related secular equation. 0 <= K <=N. * * N (input) INTEGER * The dimension of the symmetric tridiagonal matrix. N >= 0. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * D (input/output) DOUBLE PRECISION array, dimension (N) * On entry, D contains the eigenvalues of the two submatrices to * be combined. * On exit, D contains the trailing (N-K) updated eigenvalues * (those which were deflated) sorted into increasing order. * * DROW (global input) INTEGER * The process row over which the first row of the matrix D is * distributed. 0 <= DROW < NPROW. * * DCOL (global input) INTEGER * The process column over which the first column of the * matrix D is distributed. 0 <= DCOL < NPCOL. * * Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) * On entry, Q contains the eigenvectors of two submatrices in * the two square blocks with corners at (1,1), (N1,N1) * and (N1+1, N1+1), (N,N). * On exit, Q contains the trailing (N-K) updated eigenvectors * (those which were deflated) in its last N-K columns. * * LDQ (input) INTEGER * The leading dimension of the array Q. LDQ >= max(1,NQ). * * RHO (global input/output) DOUBLE PRECISION * On entry, the off-diagonal element associated with the rank-1 * cut which originally split the two submatrices which are now * being recombined. * On exit, RHO has been modified to the value required by * PDLAED3. * * DLAMDA (global output) DOUBLE PRECISION array, dimension (N) * A copy of the first K eigenvalues which will be used by * SLAED3 to form the secular equation. * * W (global output) DOUBLE PRECISION array, dimension (N) * The first k values of the final deflation-altered z-vector * which will be passed to SLAED3. * * Z (global input) DOUBLE PRECISION array, dimension (N) * On entry, Z contains the updating vector (the last * row of the first sub-eigenvector matrix and the first row of * the second sub-eigenvector matrix). * On exit, the contents of Z have been destroyed by the updating * process. * * U (global output) DOUBLE PRECISION array * global dimension (N, N), local dimension (LDU, NQ). * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * * LDU (input) INTEGER * The leading dimension of the array U. * * QBUF (workspace) DOUBLE PRECISION array, dimension 3*N * * * INDX (workspace) INTEGER array, dimension (N) * The permutation used to sort the contents of DLAMDA into * ascending order. * * INDCOL (workspace) INTEGER array, dimension (N) * * * INDROW (workspace) INTEGER array, dimension (N) * * * INDXR (workspace) INTEGER array, dimension (N) * * * INDXC (workspace) INTEGER array, dimension (N) * * CTOT (workspace) INTEGER array, dimension( NPCOL, 4) * * NPCOL (global input) INTEGER * The total number of columns over which the distributed * submatrix is distributed. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value. * > 0: The algorithm failed to compute the ith eigenvalue. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU, $ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW, $ NPROW, PDC, PDR, ROW DOUBLE PRECISION AUX, TEMP * .. * .. External Functions .. INTEGER INDXG2L DOUBLE PRECISION DLAMC3, DNRM2 EXTERNAL INDXG2L, DLAMC3, DNRM2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, DLAED4 * .. * .. Intrinsic Functions .. INTRINSIC MOD, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 * * Quick return if possible * IF( K.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ROW = DROW COL = DCOL DO 20 I = 1, N, NB DO 10 J = 0, NB - 1 IF( I+J.LE.N ) THEN INDROW( I+J ) = ROW INDCOL( I+J ) = COL END IF 10 CONTINUE ROW = MOD( ROW+1, NPROW ) COL = MOD( COL+1, NPCOL ) 20 CONTINUE * MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 ) KLR = MYKL / NPROW IF( MYROW.EQ.DROW ) THEN MYKLR = KLR + MOD( MYKL, NPROW ) ELSE MYKLR = KLR END IF PDC = 1 COL = DCOL 30 CONTINUE IF( MYCOL.NE.COL ) THEN PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) COL = MOD( COL+1, NPCOL ) GO TO 30 END IF PDR = PDC KL = KLR + MOD( MYKL, NPROW ) ROW = DROW 40 CONTINUE IF( MYROW.NE.ROW ) THEN PDR = PDR + KL KL = KLR ROW = MOD( ROW+1, NPROW ) GO TO 40 END IF * DO 50 I = 1, K DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) Z( I ) = ONE 50 CONTINUE IF( MYKLR.GT.0 ) THEN KK = PDR DO 80 I = 1, MYKLR CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO ) IF( IINFO.NE.0 ) THEN INFO = KK END IF * * ..Compute part of z * DO 60 J = 1, KK - 1 Z( J ) = Z( J )*( BUF( J ) / $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 60 CONTINUE Z( KK ) = Z( KK )*BUF( KK ) DO 70 J = KK + 1, K Z( J ) = Z( J )*( BUF( J ) / $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 70 CONTINUE KK = KK + 1 80 CONTINUE * IF( MYROW.NE.DROW ) THEN CALL DCOPY( K, Z, 1, BUF, 1 ) CALL DGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL ) ELSE IPD = 2*K + 1 CALL DCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) IF( KLR.GT.0 ) THEN IPD = MYKLR + IPD ROW = MOD( DROW+1, NPROW ) DO 100 I = 1, NPROW - 1 CALL DGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW, $ MYCOL ) CALL DCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) DO 90 J = 1, K Z( J ) = Z( J )*BUF( J ) 90 CONTINUE IPD = IPD + KLR ROW = MOD( ROW+1, NPROW ) 100 CONTINUE END IF END IF END IF * IF( MYROW.EQ.DROW ) THEN IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN CALL DCOPY( K, Z, 1, BUF, 1 ) CALL DCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) CALL DGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL ) ELSE IF( MYCOL.EQ.DCOL ) THEN IPD = 2*K + 1 COL = DCOL KL = MYKL DO 120 I = 1, NPCOL - 1 IPD = IPD + KL COL = MOD( COL+1, NPCOL ) KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) IF( KL.NE.0 ) THEN CALL DGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL ) CALL DCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 ) DO 110 J = 1, K Z( J ) = Z( J )*BUF( J ) 110 CONTINUE END IF 120 CONTINUE DO 130 I = 1, K Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) ) 130 CONTINUE * END IF END IF * * Diffusion * IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN CALL DCOPY( K, Z, 1, BUF, 1 ) CALL DCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) CALL DGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL ) CALL DCOPY( K, BUF, 1, Z, 1 ) END IF * * Copy of D at the good place * KLC = 0 KLR = 0 DO 140 I = 1, K GI = INDX( I ) D( GI ) = BUF( K+I ) COL = INDCOL( GI ) ROW = INDROW( GI ) IF( COL.EQ.MYCOL ) THEN KLC = KLC + 1 INDXC( KLC ) = I END IF IF( ROW.EQ.MYROW ) THEN KLR = KLR + 1 INDXR( KLR ) = I END IF 140 CONTINUE * * Compute eigenvectors of the modified rank-1 modification. * IF( MYKL.NE.0 ) THEN DO 180 J = 1, MYKL KK = INDXC( J ) JU = INDX( KK ) JJU = INDXG2L( JU, NB, J, J, NPCOL ) CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO ) IF( IINFO.NE.0 ) THEN INFO = KK END IF IF( K.EQ.1 .OR. K.EQ.2 ) THEN DO 150 I = 1, KLR KK = INDXR( I ) IU = INDX( KK ) IIU = INDXG2L( IU, NB, J, J, NPROW ) U( IIU, JJU ) = BUF( KK ) 150 CONTINUE GO TO 180 END IF * DO 160 I = 1, K BUF( I ) = Z( I ) / BUF( I ) 160 CONTINUE TEMP = DNRM2( K, BUF, 1 ) DO 170 I = 1, KLR KK = INDXR( I ) IU = INDX( KK ) IIU = INDXG2L( IU, NB, J, J, NPROW ) U( IIU, JJU ) = BUF( KK ) / TEMP 170 CONTINUE * 180 CONTINUE END IF * 190 CONTINUE * RETURN * * End of PDLAED3 * END scalapack-1.8.0/SRC/pclascl.f0000640000175000017500000004162510363532303015543 0ustar muammarmuammar SUBROUTINE PCLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N REAL CFROM, CTO * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLASCL multiplies the M-by-N complex distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1) by the real scalar CTO/CFROM. This * is done without over/underflow as long as the final result * CTO * A(I,J) / CFROM does not over/underflow. TYPE specifies that * sub( A ) may be full, upper triangular, lower triangular or upper * Hessenberg. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TYPE (global input) CHARACTER * TYPE indices the storage type of the input distributed * matrix. * = 'G': sub( A ) is a full matrix, * = 'L': sub( A ) is a lower triangular matrix, * = 'U': sub( A ) is an upper triangular matrix, * = 'H': sub( A ) is an upper Hessenberg matrix. * * CFROM (global input) REAL * CTO (global input) REAL * The distributed matrix sub( A ) is multiplied by CTO/CFROM. * A(I,J) is computed without over/underflow if the final * result CTO * A(I,J) / CFROM can be represented without * over/underflow. CFROM must be nonzero. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). On exit, this array contains the local * pieces of the distributed matrix multiplied by CTO/CFROM. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER IACOL, IAROW, ICOFFA, ICTXT, ICURCOL, ICURROW, $ IIA, II, INXTROW, IOFFA, IROFFA, ITYPE, J, JB, $ JJA, JJ, JN, KK, LDA, LL, MYCOL, MYROW, MP, $ NPCOL, NPROW, NQ REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE INFO = 0 CALL CHK1MAT( M, 4, N, 6, IA, JA, DESCA, 9, INFO ) IF( INFO.EQ.0 ) THEN IF( LSAME( TYPE, 'G' ) ) THEN ITYPE = 0 ELSE IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 ELSE ITYPE = -1 END IF IF( ITYPE.EQ.-1 ) THEN INFO = -1 ELSE IF( CFROM.EQ.ZERO ) THEN INFO = -4 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCLASCL', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM * CFROMC = CFROM CTOC = CTO * * Compute local indexes * LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * 10 CONTINUE CFROM1 = CFROMC*SMLNUM CTO1 = CTOC / BIGNUM IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN MUL = SMLNUM DONE = .FALSE. CFROMC = CFROM1 ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN MUL = BIGNUM DONE = .FALSE. CTOC = CTO1 ELSE MUL = CTOC / CFROMC DONE = .TRUE. END IF * IOFFA = ( JJA - 1 ) * LDA ICURROW = IAROW ICURCOL = IACOL * IF( ITYPE.EQ.0 ) THEN * * Full matrix * DO 30 JJ = JJA, JJA+NQ-1 DO 20 II = IIA, IIA+MP-1 A( IOFFA+II ) = A( IOFFA+II ) * MUL 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE * ELSE IF( ITYPE.EQ.1 ) THEN * * Lower triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 50 LL = JJ, JJ + JB -1 DO 40 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 40 CONTINUE IOFFA = IOFFA + LDA 50 CONTINUE ELSE DO 70 LL = JJ, JJ + JB -1 DO 60 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 120 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 90 LL = JJ, JJ + JB -1 DO 80 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 DO 100 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 120 CONTINUE * ELSE IF( ITYPE.EQ.2 ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 140 LL = JJ, JJ + JB -1 DO 130 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 130 CONTINUE IOFFA = IOFFA + LDA 140 CONTINUE ELSE DO 160 LL = JJ, JJ + JB -1 DO 150 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 150 CONTINUE IOFFA = IOFFA + LDA 160 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 210 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 180 LL = JJ, JJ + JB -1 DO 170 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 170 CONTINUE IOFFA = IOFFA + LDA 180 CONTINUE ELSE DO 200 LL = JJ, JJ + JB -1 DO 190 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 190 CONTINUE IOFFA = IOFFA + LDA 200 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 210 CONTINUE * ELSE IF( ITYPE.EQ.3 ) THEN * * Upper Hessenberg matrix * II = IIA JJ = JJA JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.ICURCOL ) THEN DO 230 LL = JJ, JJ+JB-1 DO 220 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 250 LL = JJ, JJ+JB-1 DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE JJ = JJ + JB END IF * II = II + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 260 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( ICURROW+1, NPROW ) IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 280 LL = JJ, JJ + JB -1 DO 270 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 270 CONTINUE IOFFA = IOFFA + LDA 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 DO 290 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 290 CONTINUE IOFFA = IOFFA + LDA 300 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 350 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 320 LL = JJ, JJ + JB -1 DO 310 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 310 CONTINUE IOFFA = IOFFA + LDA 320 CONTINUE ELSE DO 340 LL = JJ, JJ + JB -1 DO 330 KK = IIA, MIN( II-1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 330 CONTINUE IOFFA = IOFFA + LDA 340 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * $ MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 350 CONTINUE * END IF * END IF * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PCLASCL * END scalapack-1.8.0/SRC/pslacp2.f0000640000175000017500000003744710363532303015475 0ustar muammarmuammar SUBROUTINE PSLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSLACP2 copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PSLACP2 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * PSLACP2 requires that only dimension of the matrix operands is * distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) REAL pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW, $ ICOFFA, IIA, IIAA, IIB, IIBB, IIBEGA, IIBEGB, $ IIENDA, IINXTA, IINXTB, ILEFT, IRIGHT, IROFFA, $ ITOP, JJA, JJAA, JJB, JJBB, JJBEGA, JJBEGB, $ JJENDA, JJNXTA, JJNXTB, LDA, LDB, MBA, MP, $ MPAA, MYCOL, MYDIST, MYROW, NBA, NPCOL, NPROW, $ NQ, NQAA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SLACPY * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) LDB = DESCB( LLD_ ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MBA = DESCA( MB_ ) * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MP.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEGA = IIA IIENDA = IIA + MP - 1 IINXTA = MIN( ICEIL( IIBEGA, MBA ) * MBA, IIENDA ) IIBEGB = IIB IINXTB = IIBEGB + IINXTA - IIBEGA * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL SLACPY( UPLO, IINXTA-IIBEGA+1, N-ITOP, $ A( IIBEGA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBEGB+(JJB+ITOP-1)*LDB ), LDB ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEGA = IINXTA + 1 IINXTA = MIN( IINXTA+MBA, IIENDA ) IIBEGB = IINXTB + 1 IINXTB = IIBEGB + IINXTA - IIBEGA GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * MPAA = MP IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJAA.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL SLACPY( 'All', MPAA, ITOP-JJAA+JJA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL SLACPY( UPLO, MPAA, HEIGHT, $ A( IIAA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBB+(JJB+ITOP-1)*LDB ), LDB ) MPAA = MAX( 0, MPAA - HEIGHT ) IIAA = IIAA + HEIGHT JJAA = JJA + IBASE IIBB = IIBB + HEIGHT JJBB = JJB + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * CALL SLACPY( 'All', MP, N, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQ.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEGA = JJA JJENDA = JJA + NQ - 1 JJNXTA = MIN( ICEIL( JJBEGA, NBA ) * NBA, JJENDA ) JJBEGB = JJB JJNXTB = JJBEGB + JJNXTA - JJBEGA * 30 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL SLACPY( UPLO, M-ILEFT, JJNXTA-JJBEGA+1, $ A( IIA+ILEFT+(JJBEGA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBEGB-1)*LDB ), LDB ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEGA = JJNXTA +1 JJNXTA = MIN( JJNXTA+NBA, JJENDA ) JJBEGB = JJNXTB +1 JJNXTB = JJBEGB + JJNXTA - JJBEGA GO TO 30 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * NQAA = NQ IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 40 CONTINUE IF( IIAA.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL SLACPY( 'All', ILEFT-IIAA+IIA, NQAA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL SLACPY( UPLO, WIDE, NQAA, $ A( IIA+ILEFT+(JJAA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBB-1)*LDB ), LDB ) NQAA = MAX( 0, NQAA - WIDE ) IIAA = IIA + IRIGHT JJAA = JJAA + WIDE IIBB = IIB + IRIGHT JJBB = JJBB + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 40 END IF * ELSE * CALL SLACPY( 'All', M, NQ, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * END IF * RETURN * * End of PSLACP2 * END scalapack-1.8.0/SRC/pzlanhe.f0000640000175000017500000010251110363532303015553 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZLANHE( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLANHE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * complex hermitian distributed matrix sub(A) = A(IA:IA+N-1,JA:JA+N-1). * * PZLANHE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PZLANHE as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * hermitian matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PZLANHE is set to zero. N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the hermitian distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION ABSA, SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DAXPY, DCOMBSSQ, $ DGAMX2D, DGSUM2D, DGEBR2D, $ DGEBS2D, PDCOL2ROW, PDTREECOMB, $ ZLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL ICEIL, IDAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is Hermitian, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( MYCOL.EQ.IACOL ) THEN IF( JJ.LE.JJA+NQ-1 ) THEN VALUE = MAX( VALUE, $ ABS( DBLE( A( K+(JJ-1)*LDA ) ) ) ) DO 30 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF ELSE IF( JJ.LE.JJA+NQ-1 ) THEN DO 35 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 35 CONTINUE END IF END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( MYCOL.EQ.ICURCOL ) THEN IF( JJ.LE.JJA+NQ-1 ) THEN VALUE = MAX( VALUE, $ ABS( DBLE( A( K+(JJ-1)*LDA ) ) ) ) DO 70 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF ELSE IF( JJ.LE.JJA+NQ-1 ) THEN DO 75 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 75 CONTINUE END IF END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.IAROW ) THEN IF( II.LE.IIA+NP-1 ) THEN VALUE = MAX( VALUE, ABS( DBLE( A( II+K ) ) ) ) DO 100 LL = II+1, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF ELSE IF( II.LE.IIA+NP-1 ) THEN DO 105 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 105 CONTINUE END IF END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.ICURROW ) THEN IF( II.LE.IIA+NP-1 ) THEN VALUE = MAX( VALUE, $ ABS( DBLE( A( II+K ) ) ) ) DO 140 LL = II+1, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF ELSE IF( II.LE.IIA+NP-1 ) THEN DO 145 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 145 CONTINUE END IF END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * hermitian). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( MYCOL.EQ.IACOL ) THEN IF( JJA+NQ.GT.JJ ) THEN SUM = ABS( DBLE( A( K+(JJ-1)*LDA ) ) ) DO 210 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF ELSE IF( JJA+NQ.GT.JJ ) THEN DO 215 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 215 CONTINUE END IF END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( JJA+NQ.GT.JJ ) THEN SUM = ABS( DBLE( A( K+(JJ-1)*LDA ) ) ) DO 250 LL = JJ*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF ELSE IF( JJA+NQ.GT.JJ ) THEN DO 255 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 255 CONTINUE END IF END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( MYROW.EQ.IAROW ) THEN IF( IIA+NP.GT.II ) THEN SUM = ABS( DBLE( A( IOFFA+II ) ) ) DO 280 LL = II+1, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF ELSE DO 285 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 285 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( MYROW.EQ.ICURROW ) THEN IF( IIA+NP.GT.II ) THEN SUM = ABS( DBLE( A( II+IOFFA ) ) ) DO 320 LL = II+1, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE ELSE IF( II.EQ.IIA+NP-1 ) THEN SUM = ABS( DBLE( A( II+IOFFA ) ) ) END IF ELSE DO 325 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 325 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to DGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PDCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL DAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( IDAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) THEN IF( DBLE( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( DBLE( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) THEN IF( DBLE( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( DBLE( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.IAROW ) THEN IF( DBLE( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( DBLE( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( MYROW.EQ.ICURROW ) THEN IF( DBLE( A( II+K ) ).NE.ZERO ) THEN ABSA = ABS( DBLE( A( II+K ) ) ) IF( SCALE.LT.ABSA ) THEN SUM = ONE + SUM * ( SCALE / ABSA )**2 SCALE = ABSA ELSE SUM = SUM + ( ABSA / SCALE )**2 END IF END IF II = II + 1 END IF CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PZLANHE = VALUE * RETURN * * End of PZLANHE * END scalapack-1.8.0/SRC/pslauu2.f0000640000175000017500000001754510363532303015521 0ustar muammarmuammar SUBROUTINE PSLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLAUU2 computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the unblocked form of the algorithm, calling Level 2 BLAS. * No communication is performed by this routine, the matrix to operate * on should be strictly local to one process. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the matrix * sub( A ) is upper or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the order of the triangular factor U or L. N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA, $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SGEMV, SSCAL * .. * .. External Functions .. LOGICAL LSAME REAL SDOT EXTERNAL LSAME, SDOT * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * DO 10 NA = N-1, 1, -1 AII = A( IDIAG ) ICURR = IDIAG + LDA A( IDIAG ) = AII*AII + SDOT( NA, A( ICURR ), LDA, $ A( ICURR ), LDA ) CALL SGEMV( 'No transpose', N-NA-1, NA, ONE, $ A( IOFFA+LDA ), LDA, A( ICURR ), LDA, AII, $ A( IOFFA ), 1 ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE AII = A( IDIAG ) CALL SSCAL( N, AII, A( IOFFA ), 1 ) * ELSE * * Compute the product L' * L. * DO 20 NA = 1, N-1 AII = A( IDIAG ) ICURR = IDIAG + 1 A(IDIAG) = AII*AII + SDOT( N-NA, A( ICURR ), 1, $ A( ICURR ), 1 ) CALL SGEMV( 'Transpose', N-NA, NA-1, ONE, A( IOFFA+1 ), $ LDA, A( ICURR ), 1, AII, A( IOFFA ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 20 CONTINUE AII = A( IDIAG ) CALL SSCAL( N, AII, A( IOFFA ), LDA ) * END IF * END IF * RETURN * * End of PSLAUU2 * END scalapack-1.8.0/SRC/pzlahrd.f0000640000175000017500000002443410602576752015601 0ustar muammarmuammar SUBROUTINE PZLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCY( * ) COMPLEX*16 A( * ), T( * ), TAU( * ), WORK( * ), Y( * ) * .. * * Purpose * ======= * * PZLAHRD reduces the first NB columns of a complex general * N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that * elements below the k-th subdiagonal are zero. The reduction is * performed by an unitary similarity transformation Q' * A * Q. The * routine returns the matrices V and T which determine Q as a block * reflector I - V*T*V', and also the matrix Y = A * V * T. * * This is an auxiliary routine called by PZGEHRD. In the following * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). * N >= 0. * * K (global input) INTEGER * The offset for the reduction. Elements below the k-th * subdiagonal in the first NB columns are reduced to zero. * * NB (global input) INTEGER * The number of columns to be reduced. * * A (local input/local output) COMPLEX*16 pointer into * the local memory to an array of dimension (LLD_A, * LOCc(JA+N-K)). On entry, this array contains the the local * pieces of the N-by-(N-K+1) general distributed matrix * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above * the k-th subdiagonal in the first NB columns are overwritten * with the corresponding elements of the reduced distributed * matrix; the elements below the k-th subdiagonal, with the * array TAU, represent the matrix Q as a product of elementary * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are * unchanged. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16 array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). TAU is tied to the distributed matrix A. * * T (local output) COMPLEX*16 array, dimension (NB_A,NB_A) * The upper triangular matrix T. * * Y (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_Y,NB_A). On exit, this array * contains the local pieces of the N-by-NB distributed * matrix Y. LLD_Y >= LOCr(IA+N-1). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) COMPLEX*16 array, dimension (NB) * * Further Details * =============== * * The matrix Q is represented as a product of nb elementary reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the (n-k+1)-by-nb matrix * V which is needed, with T and Y, to apply the transformation to the * unreduced part of the matrix, using an update of the form: * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V'). * * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the * following example with n = 7, k = 3 and nb = 2: * * ( a h a a a ) * ( a h a a a ) * ( a h a a a ) * ( h h a a a ) * ( v1 h a a a ) * ( v1 v2 a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper * Hessenberg matrix H, and vi denotes an element of the vector * defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL IPROC INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL, $ JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ COMPLEX*16 EI * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PZELSET, $ PZGEMV, PZLACGV, PZLARFG, PZSCAL, $ ZAXPY, ZCOPY, ZSCAL, ZTRMV * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.1 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) * IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - IOFF * EI = ZERO JW = IOFF + 1 CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW, $ IACOL, ICTXT, 1 ) * DO 10 L = 1, NB I = IA + K + L - 2 J = JA + L - 1 * IF( L.GT.1 ) THEN * * Update A(ia:ia+n-1,j) * * Compute i-th column of A - Y * V' * CALL PZLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J, $ DESCA, 1 ) CALL PZLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) ) * * Apply I - V * T' * V' to this column (call it b) from the * left, using the last column of T as workspace * * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) * ( V2 ) ( b2 ) * * where V1 is unit lower triangular * * w := V1' * b1 * IF( IPROC ) THEN CALL ZCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1, $ WORK( JW ), 1 ) CALL ZTRMV( 'Lower', 'Conjugate transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) END IF * * w := w + V2'*b2 * CALL PZGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A, $ I+1, JA, DESCA, A, I+1, J, DESCA, 1, ONE, WORK, $ 1, JW, DESCW, DESCW( M_ ) ) * * w := T'*w * IF( IPROC ) $ CALL ZTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', $ L-1, T, DESCA( NB_ ), WORK( JW ), 1 ) * * b2 := b2 - V2*w * CALL PZGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE, $ A, I+1, J, DESCA, 1 ) * * b1 := b1 - V1*w * IF( IPROC ) THEN CALL ZTRMV( 'Lower', 'No transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) CALL ZAXPY( L-1, -ONE, WORK( JW ), 1, $ A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 ) END IF CALL PZELSET( A, I, J-1, DESCA, EI ) END IF * * Generate the elementary reflector H(i) to annihilate * A(ia+k+i:ia+n-1,j) * CALL PZLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PZELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(iy:y+n-1,jy+l-1) * CALL PZGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1, $ DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1, $ DESCY, 1 ) CALL PZGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW, $ DESCW, DESCW( M_ ) ) CALL PZGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY, $ JY+L-1, DESCY, 1 ) JL = MIN( JJ+L-1, JA+NQ-1 ) CALL PZSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 ) * * Compute T(1:i,i) * IF( IPROC ) THEN JT = ( L-1 ) * DESCA( NB_ ) CALL ZSCAL( L-1, -TAU( JL ), WORK( JW ), 1 ) CALL ZCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 ) CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), T( JT+1 ), 1 ) T( JT+L ) = TAU( JL ) END IF 10 CONTINUE * CALL PZELSET( A, K+NB+IA-1, J, DESCA, EI ) * RETURN * * End of PZLAHRD * END scalapack-1.8.0/SRC/psdttrs.f0000640000175000017500000006651110363532303015626 0ustar muammarmuammar SUBROUTINE PSDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PSDTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PSDTTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal diagonally dominant-like distributed * matrix. * * Routine PSDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) REAL pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDTTRF and this is stored in AF. If a linear system * is to be solved using PSDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_M_B, STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PSDTTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -15 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PSDTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PSDTTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = 10*NPCOL + 4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, 'PSDTTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB( 5 ) PARAM_CHECK( 14, 1 ) = DESCB( 4 ) PARAM_CHECK( 13, 1 ) = DESCB( 3 ) PARAM_CHECK( 12, 1 ) = DESCB( 2 ) PARAM_CHECK( 11, 1 ) = DESCB( 1 ) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 15, 2 ) = 1105 PARAM_CHECK( 14, 2 ) = 1104 PARAM_CHECK( 13, 2 ) = 1103 PARAM_CHECK( 12, 2 ) = 1102 PARAM_CHECK( 11, 2 ) = 1101 PARAM_CHECK( 10, 2 ) = 10 PARAM_CHECK( 9, 2 ) = 805 PARAM_CHECK( 8, 2 ) = 804 PARAM_CHECK( 7, 2 ) = 803 PARAM_CHECK( 6, 2 ) = 801 PARAM_CHECK( 5, 2 ) = 7 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PSDTTRSV( 'L', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PSDTTRSV( 'U', 'T', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * END IF * * Call backsolve routine * IF( ( LSAME( TRANS, 'C' ) ) .OR. ( LSAME( TRANS, 'T' ) ) ) THEN * CALL PSDTTRSV( 'L', 'T', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PSDTTRSV( 'U', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSDTTRS * END scalapack-1.8.0/SRC/pslascl.f0000640000175000017500000004161710363532303015564 0ustar muammarmuammar SUBROUTINE PSLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N REAL CFROM, CTO * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASCL multiplies the M-by-N real distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1) by the real scalar CTO/CFROM. This * is done without over/underflow as long as the final result * CTO * A(I,J) / CFROM does not over/underflow. TYPE specifies that * sub( A ) may be full, upper triangular, lower triangular or upper * Hessenberg. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TYPE (global input) CHARACTER * TYPE indices the storage type of the input distributed * matrix. * = 'G': sub( A ) is a full matrix, * = 'L': sub( A ) is a lower triangular matrix, * = 'U': sub( A ) is an upper triangular matrix, * = 'H': sub( A ) is an upper Hessenberg matrix. * * CFROM (global input) REAL * CTO (global input) REAL * The distributed matrix sub( A ) is multiplied by CTO/CFROM. * A(I,J) is computed without over/underflow if the final * result CTO * A(I,J) / CFROM can be represented without * over/underflow. CFROM must be nonzero. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). On exit, this array contains the local * pieces of the distributed matrix multiplied by CTO/CFROM. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER IACOL, IAROW, ICOFFA, ICTXT, ICURCOL, ICURROW, $ IIA, II, INXTROW, IOFFA, IROFFA, ITYPE, J, JB, $ JJA, JJ, JN, KK, LDA, LL, MYCOL, MYROW, MP, $ NPCOL, NPROW, NQ REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE INFO = 0 CALL CHK1MAT( M, 4, N, 6, IA, JA, DESCA, 9, INFO ) IF( INFO.EQ.0 ) THEN IF( LSAME( TYPE, 'G' ) ) THEN ITYPE = 0 ELSE IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 ELSE ITYPE = -1 END IF IF( ITYPE.EQ.-1 ) THEN INFO = -1 ELSE IF( CFROM.EQ.ZERO ) THEN INFO = -4 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLASCL', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM * CFROMC = CFROM CTOC = CTO * * Compute local indexes * LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * 10 CONTINUE CFROM1 = CFROMC*SMLNUM CTO1 = CTOC / BIGNUM IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN MUL = SMLNUM DONE = .FALSE. CFROMC = CFROM1 ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN MUL = BIGNUM DONE = .FALSE. CTOC = CTO1 ELSE MUL = CTOC / CFROMC DONE = .TRUE. END IF * IOFFA = ( JJA - 1 ) * LDA ICURROW = IAROW ICURCOL = IACOL * IF( ITYPE.EQ.0 ) THEN * * Full matrix * DO 30 JJ = JJA, JJA+NQ-1 DO 20 II = IIA, IIA+MP-1 A( IOFFA+II ) = A( IOFFA+II ) * MUL 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE * ELSE IF( ITYPE.EQ.1 ) THEN * * Lower triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 50 LL = JJ, JJ + JB -1 DO 40 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 40 CONTINUE IOFFA = IOFFA + LDA 50 CONTINUE ELSE DO 70 LL = JJ, JJ + JB -1 DO 60 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 120 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 90 LL = JJ, JJ + JB -1 DO 80 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 DO 100 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 120 CONTINUE * ELSE IF( ITYPE.EQ.2 ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 140 LL = JJ, JJ + JB -1 DO 130 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 130 CONTINUE IOFFA = IOFFA + LDA 140 CONTINUE ELSE DO 160 LL = JJ, JJ + JB -1 DO 150 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 150 CONTINUE IOFFA = IOFFA + LDA 160 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 210 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 180 LL = JJ, JJ + JB -1 DO 170 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 170 CONTINUE IOFFA = IOFFA + LDA 180 CONTINUE ELSE DO 200 LL = JJ, JJ + JB -1 DO 190 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 190 CONTINUE IOFFA = IOFFA + LDA 200 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 210 CONTINUE * ELSE IF( ITYPE.EQ.3 ) THEN * * Upper Hessenberg matrix * II = IIA JJ = JJA JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.ICURCOL ) THEN DO 230 LL = JJ, JJ+JB-1 DO 220 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 250 LL = JJ, JJ+JB-1 DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE JJ = JJ + JB END IF * II = II + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 260 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( ICURROW+1, NPROW ) IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 280 LL = JJ, JJ + JB -1 DO 270 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 270 CONTINUE IOFFA = IOFFA + LDA 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 DO 290 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 290 CONTINUE IOFFA = IOFFA + LDA 300 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 350 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 320 LL = JJ, JJ + JB -1 DO 310 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 310 CONTINUE IOFFA = IOFFA + LDA 320 CONTINUE ELSE DO 340 LL = JJ, JJ + JB -1 DO 330 KK = IIA, MIN( II-1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 330 CONTINUE IOFFA = IOFFA + LDA 340 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * $ MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 350 CONTINUE * END IF * END IF * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PSLASCL * END scalapack-1.8.0/SRC/pzunmql.f0000640000175000017500000004232410363532303015625 0ustar muammarmuammar SUBROUTINE PZUNMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMQL overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PZGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PZGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, IPW, $ IROFFA, IROFFC, J, J1, J2, J3, JB, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZUNM2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN JB = J1 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PZUNM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PZLARFT( 'Backward', 'Columnwise', NQ-K+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+j+jb-ja-1,jc:jc+n-1) * MI = M - K + J + JB - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+j+jb-ja-1) * NI = N - K + J + JB - JA END IF * * Apply H or H' * CALL PZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, $ JB, A, IA, J, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN JB = J2 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PZUNM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMQL * END scalapack-1.8.0/SRC/cdbtf2.f0000640000175000017500000001237410363532303015265 0ustar muammarmuammar SUBROUTINE CDBTF2( M, N, KL, KU, AB, LDAB, INFO ) * * Modified by Andrew J. Cleary in November, 96 from: * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. COMPLEX AB( LDAB, * ) * .. * * Purpose * ======= * * Cdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting with row interchanges. * * This is the unblocked version of the algorithm, calling Level 2 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) COMPLEX array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine; elements marked * + need not be set on entry, but are required by the routine to store * elements of U, because of fill-in resulting from the row * interchanges. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER J, JP, JU, KM, KV * .. * .. External Functions .. INTEGER ISAMAX EXTERNAL ISAMAX * .. * .. External Subroutines .. EXTERNAL CGERU, CSCAL, CSWAP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U, allowing for * fill-in. * KV = KU * * Test the input parameters. * INFO = 0 *ECA IF( M.LT.0 ) THEN *ECA INFO = -1 *ECA ELSE IF( N.LT.0 ) THEN *ECA INFO = -2 *ECA ELSE IF( KL.LT.0 ) THEN *ECA INFO = -3 *ECA ELSE IF( KU.LT.0 ) THEN *ECA INFO = -4 *ECA ELSE IF( LDAB.LT.KL+KV+1 ) THEN *ECA INFO = -6 *ECA END IF *ECA IF( INFO.NE.0 ) THEN *ECA CALL XERBLA( 'CDBTF2', -INFO ) *ECA RETURN *ECA END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Gaussian elimination without partial pivoting * * JU is the index of the last column affected by the current stage * of the factorization. * JU = 1 * DO 40 J = 1, MIN( M, N ) * * Test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-J ) JP = 1 IF( AB( KV+1, J ).NE.ZERO ) THEN JU = MAX( JU, MIN( J+KU, N ) ) * IF( KM.GT.0 ) THEN * * Compute multipliers. * CALL CSCAL( KM, ONE / AB( KU+1, J ), AB( KU+2, J ), 1 ) * * Update trailing submatrix within the band. * IF( JU.GT.J ) THEN CALL CGERU( KM, JU-J, -CONE, AB( KU+2, J ), 1, $ AB( KU, J+1 ), LDAB-1, AB( KU+1, J+1 ), $ LDAB-1 ) END IF END IF ELSE * IF( INFO.EQ.0 ) $ INFO = J END IF 40 CONTINUE RETURN * * End of CDBTF2 * END scalapack-1.8.0/SRC/pdgetrs.f0000640000175000017500000002612010363532303015563 0ustar muammarmuammar SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDGETRS solves a system of distributed linear equations * * op( sub( A ) ) * X = sub( B ) * * with a general N-by-N distributed matrix sub( A ) using the LU * factorization computed by PDGETRF. * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), op( A ) = A or A**T and * sub( B ) denotes B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block data decomposition ( MB_A=NB_A ). * * Arguments * ========= * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': sub( A ) * X = sub( B ) (No transpose) * = 'T': sub( A )**T * X = sub( B ) (Transpose) * = 'C': sub( A )**T * X = sub( B ) (Transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local * memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the factors * L and U from the factorization sub( A ) = P*L*U; the unit * diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand sides * sub( B ). On exit, sub( B ) is overwritten by the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PDLAPIV, PDTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE NOTRAN = LSAME( TRANS, 'N' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1200+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1200+CTXT_) END IF END IF IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, 3, $ IB, JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ DESCA( MB_ ) + NUMROC( DESCA( M_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) ) * IF( NOTRAN ) THEN * * Solve sub( A ) * X = sub( B ). * * Apply row interchanges to the right hand sides. * CALL PDLAPIV( 'Forward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A )' * X = sub( B ). * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Apply row interchanges to the solution vectors. * CALL PDLAPIV( 'Backward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * END IF * RETURN * * End of PDGETRS * END scalapack-1.8.0/SRC/clahqr2.f0000640000175000017500000003447510363532303015463 0ustar muammarmuammar SUBROUTINE CLAHQR2( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, $ IHIZ, Z, LDZ, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 22, 2000 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N * .. * .. Array Arguments .. COMPLEX H( LDH, * ), W( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * CLAHQR2 is an auxiliary routine called by CHSEQR to update the * eigenvalues and Schur decomposition already computed by CHSEQR, by * dealing with the Hessenberg submatrix in rows and columns ILO to IHI. * This version of CLAHQR (not the standard LAPACK version) uses a * double-shift algorithm (like LAPACK's SLAHQR). * Unlike the standard LAPACK convention, this does not assume the * subdiagonal is real, nor does it work to preserve this quality if * given. * * Arguments * ========= * * WANTT (input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (input) INTEGER * The order of the matrix H. N >= 0. * * ILO (input) INTEGER * IHI (input) INTEGER * It is assumed that H is already upper triangular in rows and * columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). * CLAHQR works primarily with the Hessenberg submatrix in rows * and columns ILO to IHI, but applies transformations to all of * H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * H (input/output) COMPLEX array, dimension (LDH,N) * On entry, the upper Hessenberg matrix H. * On exit, if WANTT is .TRUE., H is upper triangular in rows * and columns ILO:IHI. If WANTT is .FALSE., the contents of H * are unspecified on exit. * * LDH (input) INTEGER * The leading dimension of the array H. LDH >= max(1,N). * * W (output) COMPLEX array, dimension (N) * The computed eigenvalues ILO to IHI are stored in the * corresponding elements of W. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in H, with W(i) = H(i,i). * * ILOZ (input) INTEGER * IHIZ (input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (input/output) COMPLEX array, dimension (LDZ,N) * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations, and on exit Z has been updated; * transformations are applied only to the submatrix * Z(ILOZ:IHIZ,ILO:IHI). If WANTZ is .FALSE., Z is not * referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * > 0: if INFO = i, CLAHQR failed to compute all the * eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) * iterations; elements i+1:ihi of W contain those * eigenvalues which have been successfully computed. * * Further Details * =============== * * Modified by Mark R. Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0 ) REAL DAT1, DAT2 PARAMETER ( DAT1 = 0.75E+0, DAT2 = -0.4375E+0 ) * .. * .. Local Scalars .. INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ REAL CS, OVFL, S, SMLNUM, TST1, ULP, UNFL COMPLEX CDUM, H00, H10, H11, H12, H21, H22, H33, H33S, $ H43H34, H44, H44S, SN, SUM, T1, T2, T3, V1, V2, $ V3 * .. * .. Local Arrays .. REAL RWORK( 1 ) COMPLEX V( 3 ) * .. * .. External Functions .. REAL SLAMCH, CLANHS EXTERNAL SLAMCH, CLANHS * .. * .. External Subroutines .. EXTERNAL SLABAD, CCOPY, CLANV2, CLARFG, CROT * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CONJG, AIMAG, MAX, MIN * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * INFO = 0 * * Quick return if possible * IF( N.EQ.0 ) $ RETURN IF( ILO.EQ.IHI ) THEN W( ILO ) = H( ILO, ILO ) RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * * Set machine-dependent constants for the stopping criterion. * If norm(H) <= sqrt(OVFL), overflow should not occur. * UNFL = SLAMCH( 'Safe minimum' ) OVFL = RONE / UNFL CALL SLABAD( UNFL, OVFL ) ULP = SLAMCH( 'Precision' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = 30*NH * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of 1 or 2. Each iteration of the loop works * with the active submatrix in rows and columns L to I. * Eigenvalues I+1 to IHI have already converged. Either L = ILO, or * H(L,L-1) is negligible so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 150 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 130 ITS = 0, ITN * * Look for a single small subdiagonal element. * DO 20 K = I, L + 1, -1 TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) IF( TST1.EQ.RZERO ) $ TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 30 20 CONTINUE 30 CONTINUE L = K IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * H( L, L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * IF( L.GE.I-1 ) $ GO TO 140 * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN * * Exceptional shift. * * S = ABS( REAL( H( I,I-1 ) ) ) + ABS( REAL( H( I-1,I-2 ) ) ) S = CABS1( H( I, I-1 ) ) + CABS1( H( I-1, I-2 ) ) H44 = DAT1*S H33 = H44 H43H34 = DAT2*S*S ELSE * * Prepare to use Wilkinson's shift. * H44 = H( I, I ) H33 = H( I-1, I-1 ) H43H34 = H( I, I-1 )*H( I-1, I ) END IF * * Look for two consecutive small subdiagonal elements. * DO 40 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S = CABS1( V1 ) + CABS1( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 IF( M.EQ.L ) $ GO TO 50 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 ) $ GO TO 50 40 CONTINUE 50 CONTINUE * * Double-shift QR step * DO 120 K = M, I - 1 * * The first iteration of this loop determines a reflection G * from the vector V and applies it from left and right to H, * thus creating a nonzero bulge below the subdiagonal. * * Each subsequent iteration determines a reflection G to * restore the Hessenberg form in the (K-1)th column, and thus * chases the bulge one step toward the bottom of the active * submatrix. NR is the order of G * NR = MIN( 3, I-K+1 ) IF( K.GT.M ) $ CALL CCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL CLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.I-1 ) $ H( K+2, K-1 ) = ZERO ELSE IF( M.GT.L ) THEN * The real double-shift code uses H( K, K-1 ) = -H( K, K-1 ) * instead of the following. H( K, K-1 ) = H( K, K-1 ) - CONJG( T1 )*H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 * * Apply G from the left to transform the rows of the matrix * in columns K to I2. * DO 60 J = K, I2 SUM = CONJG( T1 )*H( K, J ) + $ CONJG( T2 )*H( K+1, J ) + $ CONJG( T3 )*H( K+2, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 H( K+2, J ) = H( K+2, J ) - SUM*V3 60 CONTINUE * * Apply G from the right to transform the columns of the * matrix in rows I1 to min(K+3,I). * DO 70 J = I1, MIN( K+3, I ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) H( J, K+2 ) = H( J, K+2 ) - SUM*CONJG( V3 ) 70 CONTINUE * IF( WANTZ ) THEN * * Accumulate transformations in the matrix Z * DO 80 J = ILOZ, IHIZ SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + $ T3*Z( J, K+2 ) Z( J, K ) = Z( J, K ) - SUM Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 ) Z( J, K+2 ) = Z( J, K+2 ) - SUM*CONJG( V3 ) 80 CONTINUE END IF ELSE IF( NR.EQ.2 ) THEN * * Apply G from the left to transform the rows of the matrix * in columns K to I2. * DO 90 J = K, I2 SUM = CONJG( T1 )*H( K, J ) + $ CONJG( T2 )*H( K+1, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 90 CONTINUE * * Apply G from the right to transform the columns of the * matrix in rows I1 to min(K+2,I). * DO 100 J = I1, MIN( K+2, I ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) 100 CONTINUE * IF( WANTZ ) THEN * * Accumulate transformations in the matrix Z * DO 110 J = ILOZ, IHIZ SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) Z( J, K ) = Z( J, K ) - SUM Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 ) 110 CONTINUE END IF END IF * * Since at the start of the QR step we have for M > L * H( K, K-1 ) = H( K, K-1 ) - CONJG( T1 )*H( K, K-1 ) * then we don't need to do the following * IF( K.EQ.M .AND. M.GT.L ) THEN * If the QR step was started at row M > L because two * consecutive small subdiagonals were found, then H(M,M-1) * must also be updated by a factor of (1-T1). * TEMP = ONE - T1 * H( m, m-1 ) = H( m, m-1 )*CONJG( TEMP ) * END IF 120 CONTINUE * * Ensure that H(I,I-1) is real. * 130 CONTINUE * * Failure to converge in remaining number of iterations * INFO = I RETURN * 140 CONTINUE * IF( L.EQ.I ) THEN * * H(I,I-1) is negligible: one eigenvalue has converged. * W( I ) = H( I, I ) * ELSE IF( L.EQ.I-1 ) THEN * * H(I-1,I-2) is negligible: a pair of eigenvalues have converged. * * Transform the 2-by-2 submatrix to standard Schur form, * and compute and store the eigenvalues. * CALL CLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), $ H( I, I ), W( I-1 ), W( I ), CS, SN ) * IF( WANTT ) THEN * * Apply the transformation to the rest of H. * IF( I2.GT.I ) $ CALL CROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, $ CS, SN ) CALL CROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, $ CONJG( SN ) ) END IF IF( WANTZ ) THEN * * Apply the transformation to Z. * CALL CROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, $ CONJG( SN ) ) END IF * END IF * * Decrement number of remaining iterations, and return to start of * the main loop with new value of I. * ITN = ITN - ITS I = L - 1 GO TO 10 * 150 CONTINUE RETURN * * End of CLAHQR2 * END scalapack-1.8.0/SRC/pzgels.f0000640000175000017500000005474110363532303015431 0ustar muammarmuammar SUBROUTINE PZGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PZGELS solves overdetermined or underdetermined complex linear * systems involving an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1), * or its conjugate-transpose, using a QR or LQ factorization of * sub( A ). It is assumed that sub( A ) has full rank. * * The following options are provided: * * 1. If TRANS = 'N' and m >= n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )*X ||. * * 2. If TRANS = 'N' and m < n: find the minimum norm solution of * an underdetermined system sub( A ) * X = sub( B ). * * 3. If TRANS = 'C' and m >= n: find the minimum norm solution of * an undetermined system sub( A )**H * X = sub( B ). * * 4. If TRANS = 'C' and m < n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )**H * X ||. * * where sub( B ) denotes B( IB:IB+M-1, JB:JB+NRHS-1 ) when TRANS = 'N' * and B( IB:IB+N-1, JB:JB+NRHS-1 ) otherwise. Several right hand side * vectors b and solution vectors x can be handled in a single call; * When TRANS = 'N', the solution vectors are stored as the columns of * the N-by-NRHS right hand side matrix sub( B ) and the M-by-NRHS * right hand side matrix sub( B ) otherwise. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER * = 'N': the linear system involves sub( A ); * = 'C': the linear system involves sub( A )**H. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e. the number of columns * of the distributed submatrices sub( B ) and X. NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). On entry, the M-by-N matrix A. * if M >= N, sub( A ) is overwritten by details of its QR * factorization as returned by PZGEQRF; * if M < N, sub( A ) is overwritten by details of its LQ * factorization as returned by PZGELQF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of local dimension * (LLD_B, LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributed matrix B of right hand side * vectors, stored columnwise; * sub( B ) is M-by-NRHS if TRANS='N', and N-by-NRHS otherwise. * On exit, sub( B ) is overwritten by the solution vectors, * stored columnwise: if TRANS = 'N' and M >= N, rows 1 to N * of sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements N+1 to M in that * column; if TRANS = 'N' and M < N, rows 1 to N of sub( B ) * contain the minimum norm solution vectors; if TRANS = 'C' * and M >= N, rows 1 to M of sub( B ) contain the minimum norm * solution vectors; if TRANS = 'C' and M < N, rows 1 to M of * sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements M+1 to N in that * column. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= LTAU + MAX( LWF, LWS ) where * If M >= N, then * LTAU = NUMROC( JA+MIN(M,N)-1, NB_A, MYCOL, CSRC_A, NPCOL ), * LWF = NB_A * ( MpA0 + NqA0 + NB_A ) * LWS = MAX( (NB_A*(NB_A-1))/2, (NRHSqB0 + MpB0)*NB_A ) + * NB_A * NB_A * Else * LTAU = NUMROC( IA+MIN(M,N)-1, MB_A, MYROW, RSRC_A, NPROW ), * LWF = MB_A * ( MpA0 + NqA0 + MB_A ) * LWS = MAX( (MB_A*(MB_A-1))/2, ( NpB0 + MAX( NqA0 + * NUMROC( NUMROC( N+IROFFB, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NRHSqB0 ) )*MB_A ) + * MB_A * MB_A * End if * * where LCMP = LCM / NPROW with LCM = ILCM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * MpB0 = NUMROC( M+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NRHSqB0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, TPSD INTEGER BROW, IACOL, IAROW, IASCL, IBCOL, IBROW, IBSCL, $ ICOFFA, ICOFFB, ICTXT, IPW, IROFFA, IROFFB, $ LCM, LCMP, LTAU, LWF, LWMIN, LWS, MPA0, MPB0, $ MYCOL, MYROW, NPB0, NPCOL, NPROW, NQA0, $ NRHSQB0, SCLLEN DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) DOUBLE PRECISION RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, PDLAMCH, $ PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PZGELQF, $ PZGEQRF, PDLABAD, PZLASCL, PZLASET, $ PZTRSM, PZUNMLQ, PZUNMQR, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( M, 2, N, 3, IA, JA, DESCA, 8, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( IA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( IB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NRHSQB0 = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, $ NPCOL ) IF( M.GE.N ) THEN MPB0 = NUMROC( M+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( JA+MIN(M,N)-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) LWF = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) LWS = MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPB0 + NRHSQB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ )*DESCA( NB_ ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( IA+MIN(M,N)-1, DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) LWF = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) LWS = MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( NPB0 + MAX( NQA0 + NUMROC( NUMROC( N+IROFFB, $ DESCA( MB_ ), 0, 0, NPROW ), DESCA( MB_ ), 0, 0, $ LCMP ), NRHSQB0 ) )*DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF LWMIN = LTAU + MAX( LWF, LWS ) WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) * TPSD = .TRUE. IF( LSAME( TRANS, 'N' ) ) $ TPSD = .FALSE. * IF( .NOT.( LSAME( TRANS, 'N' ) .OR. $ LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( M.GE.N .AND. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. IAROW.NE.IBROW ) THEN INFO = -10 ELSE IF( M.LT.N .AND. ICOFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( M.LT.N .AND. DESCA( NB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1200 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF END IF * IF( .NOT.TPSD ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( M, 2, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, 4, $ IB, JB, DESCB, 12, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGELS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, $ IB, JB, DESCB ) RETURN END IF * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'P' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) * * Scale A, B if max entry outside range [SMLNUM,BIGNUM] * ANRM = PZLANGE( 'M', M, N, A, IA, JA, DESCA, RWORK ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PZLASCL( 'G', ANRM, SMLNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 1 ELSE IF( ANRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PZLASCL( 'G', ANRM, BIGNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 2 ELSE IF( ANRM.EQ.ZERO ) THEN * * Matrix all zero. Return zero solution. * CALL PZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, IB, $ JB, DESCB ) GO TO 10 END IF * BROW = M IF( TPSD ) $ BROW = N * BNRM = PZLANGE( 'M', BROW, NRHS, B, IB, JB, DESCB, RWORK ) * IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PZLASCL( 'G', BNRM, SMLNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 1 ELSE IF( BNRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PZLASCL( 'G', BNRM, BIGNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 2 END IF * IPW = LTAU + 1 * IF( M.GE.N ) THEN * * compute QR factorization of A * CALL PZGEQRF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least N, optimally N*NB * IF( .NOT.TPSD ) THEN * * Least-Squares Problem min || A * X - B || * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q' * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, $ IA, JA, DESCA, WORK, B, IB, JB, DESCB, $ WORK( IPW ), LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+N-1,JB:JB+NRHS-1) := inv(R) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, CONE, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * SCLLEN = N * ELSE * * Overdetermined system of equations sub( A )' * X = sub( B ) * * sub( B ) := inv(R') * sub( B ) * CALL PZTRSM( 'Left', 'Upper', 'Conjugate transpose', $ 'Non-unit', N, NRHS, CONE, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) * * B(IB+N:IB+M-1,JB:JB+NRHS-1) = ZERO * CALL PZLASET( 'All', M-N, NRHS, CZERO, CZERO, B, IB+N, JB, $ DESCB ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q(1:N,:) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PZUNMQR( 'Left', 'No transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = M * END IF * ELSE * * Compute LQ factorization of sub( A ) * CALL PZGELQF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least M, optimally M*NB. * IF( .NOT.TPSD ) THEN * * underdetermined system of equations sub( A ) * X = sub( B ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L) * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, $ NRHS, CONE, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * B(IB+M:IB+N-1,JB:JB+NRHS-1) = 0 * CALL PZLASET( 'All', N-M, NRHS, CZERO, CZERO, B, IB+M, JB, $ DESCB ) * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q(1:N,:)' * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PZUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, $ IA, JA, DESCA, WORK, B, IB, JB, DESCB, $ WORK( IPW ), LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = N * ELSE * * overdetermined system min || A' * X - B || * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PZUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L') * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PZTRSM( 'Left', 'Lower', 'Conjugate transpose', $ 'Non-unit', M, NRHS, CONE, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) * SCLLEN = M * END IF * END IF * * Undo scaling * IF( IASCL.EQ.1 ) THEN CALL PZLASCL( 'G', ANRM, SMLNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IASCL.EQ.2 ) THEN CALL PZLASCL( 'G', ANRM, BIGNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF IF( IBSCL.EQ.1 ) THEN CALL PZLASCL( 'G', SMLNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IBSCL.EQ.2 ) THEN CALL PZLASCL( 'G', BIGNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF * 10 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGELS * END scalapack-1.8.0/SRC/pzhengst.f0000640000175000017500000004176010363532303015764 0ustar muammarmuammar SUBROUTINE PZHENGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ) * .. * * Purpose * * ======= * * PZHENGST reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * * PZHENGST performs the same function as PZHEGST, but is based on * rank 2K updates, which are faster and more scalable than * triangular solves (the basis of PZHENGST). * * PZHENGST calls PZHEGST when UPLO='U', hence PZHENGST provides * improved performance only when UPLO='L', IBTYPE=1. * * PZHENGST also calls PZHEGST when insufficient workspace is * provided, hence PZHENGST provides improved * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PZPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PZPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) DOUBLE PRECISION * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB ) * * When IBTYPE = 1 and UPLO = 'L', PZHENGST provides improved * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * where NB = MB_A = NB_A, * NP0 = NUMROC( N, NB, 0, 0, NPROW ), * NQ0 = NUMROC( N, NB, 0, 0, NPROW ), * * NUMROC ia a ScaLAPACK tool functions * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * * .. Parameters .. COMPLEX*16 ONEHALF, ONE, MONE DOUBLE PRECISION RONE PARAMETER ( ONEHALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ), $ MONE = ( -1.0D0, 0.0D0 ), RONE = 1.0D0 ) INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( DLEN_ = 9, CTXT_ = 2, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, INDAA, INDG, INDR, INDRT, IROFFA, $ IROFFB, J, K, KB, LWMIN, LWOPT, MYCOL, MYROW, $ NB, NP0, NPCOL, NPK, NPROW, NQ0, POSTK * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ), $ DESCR( DLEN_ ), DESCRT( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL LSAME, INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PXERBLA, PZGEMM, PZHEGST, PZHEMM, PZHER2K, $ PZLACPY, PZTRSM * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) SCALE = 1.0D0 * NB = DESCA( MB_ ) * * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) LWMIN = MAX( NB*( NP0+1 ), 3*NB ) IF( IBTYPE.EQ.1 .AND. .NOT.UPPER ) THEN LWOPT = 2*NP0*NB + NQ0*NB + NB*NB ELSE LWOPT = LWMIN END IF WORK( 1 ) = DCMPLX( DBLE( LWOPT ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHENGST', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PZHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) RETURN END IF * CALL DESCSET( DESCG, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCR, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCRT, NB, N, NB, NB, IAROW, IACOL, ICTXT, NB ) CALL DESCSET( DESCAA, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * INDG = 1 INDR = INDG + DESCG( LLD_ )*NB INDAA = INDR + DESCR( LLD_ )*NB INDRT = INDAA + DESCAA( LLD_ )*NB * DO 30 K = 1, N, NB * KB = MIN( N-K+1, NB ) POSTK = K + KB NPK = N - POSTK + 1 * * CALL PZLACPY( 'A', N-POSTK+1, KB, B, POSTK+IB-1, K+JB-1, DESCB, $ WORK( INDG ), POSTK, 1, DESCG ) CALL PZLACPY( 'A', N-POSTK+1, KB, A, POSTK+IA-1, K+JA-1, DESCA, $ WORK( INDR ), POSTK, 1, DESCR ) CALL PZLACPY( 'A', KB, K-1, A, K+IA-1, JA, DESCA, $ WORK( INDRT ), 1, 1, DESCRT ) * CALL PZLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDR ), K, 1, DESCR ) CALL PZTRSM( 'Right', 'L', 'N', 'N', NPK, KB, MONE, B, K+IB-1, $ K+JB-1, DESCB, WORK( INDG ), POSTK, 1, DESCG ) * CALL PZHEMM( 'Right', 'L', NPK, KB, ONEHALF, A, K+IA-1, K+JA-1, $ DESCA, WORK( INDG ), POSTK, 1, DESCG, ONE, $ WORK( INDR ), POSTK, 1, DESCR ) * CALL PZHER2K( 'Lower', 'No T', NPK, KB, ONE, WORK( INDG ), $ POSTK, 1, DESCG, WORK( INDR ), POSTK, 1, DESCR, $ RONE, A, POSTK+IA-1, POSTK+JA-1, DESCA ) * CALL PZGEMM( 'No T', 'No Conj', NPK, K-1, KB, ONE, $ WORK( INDG ), POSTK, 1, DESCG, WORK( INDRT ), 1, $ 1, DESCRT, ONE, A, POSTK+IA-1, JA, DESCA ) * CALL PZHEMM( 'Right', 'L', NPK, KB, ONE, WORK( INDR ), K, 1, $ DESCR, WORK( INDG ), POSTK, 1, DESCG, ONE, A, $ POSTK+IA-1, K+JA-1, DESCA ) * CALL PZTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, K-1, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, K+IA-1, JA, $ DESCA ) * CALL PZLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDAA ), 1, 1, DESCAA ) * IF( MYROW.EQ.DESCAA( RSRC_ ) .AND. MYCOL.EQ.DESCAA( CSRC_ ) ) $ THEN DO 20 I = 1, KB DO 10 J = 1, I WORK( INDAA+J-1+( I-1 )*DESCAA( LLD_ ) ) $ = DCONJG( WORK( INDAA+I-1+( J-1 )* $ DESCAA( LLD_ ) ) ) 10 CONTINUE 20 CONTINUE END IF * CALL PZTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, $ 1, DESCAA ) * CALL PZTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', KB, KB, ONE, $ B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, 1, $ DESCAA ) * CALL PZLACPY( 'L', KB, KB, WORK( INDAA ), 1, 1, DESCAA, A, $ K+IA-1, K+JA-1, DESCA ) * CALL PZTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', NPK, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, POSTK+IA-1, $ K+JA-1, DESCA ) * DESCR( CSRC_ ) = MOD( DESCR( CSRC_ )+1, NPCOL ) DESCG( CSRC_ ) = MOD( DESCG( CSRC_ )+1, NPCOL ) DESCRT( RSRC_ ) = MOD( DESCRT( RSRC_ )+1, NPROW ) DESCAA( RSRC_ ) = MOD( DESCAA( RSRC_ )+1, NPROW ) DESCAA( CSRC_ ) = MOD( DESCAA( CSRC_ )+1, NPCOL ) 30 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWOPT ) ) * RETURN END scalapack-1.8.0/SRC/pzlarzc.f0000640000175000017500000010245510363532303015606 0ustar muammarmuammar SUBROUTINE PZLARZC( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARZC applies a complex elementary reflector Q**H to a * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), * from either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PZTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q**H * sub( C ), * = 'R': form sub( C ) * Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or * sub( C ) * Q**H if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST COMPLEX*16 TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBZTRNV, $ ZAXPY, ZCOPY, ZGEBR2D, ZGEBS2D, $ ZGEMV, ZGERC, ZGERV2D, ZGESD2D, $ ZGSUM2D, ZLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = DCONJG( TAU( JJV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, V( IOFFV ), $ 1, ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = DCONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * IPW = MPV+1 CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = DCONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = DCONJG( TAU( IIV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = DCONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * IPW = NQV+1 CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = DCONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PZLARZC * END scalapack-1.8.0/SRC/cdttrf.f0000640000175000017500000000630610363532303015405 0ustar muammarmuammar SUBROUTINE CDTTRF( N, DL, D, DU, INFO ) * * Written by Andrew J. Cleary, November 1996. * Modified from CGTTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. INTEGER INFO, N * .. * .. Array Arguments .. COMPLEX D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * CDTTRF computes an LU factorization of a complex tridiagonal matrix A * using elimination without partial pivoting. * * The factorization has the form * A = L * U * where L is a product of unit lower bidiagonal * matrices and U is upper triangular with nonzeros in only the main * diagonal and first superdiagonal. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * DL (input/output) COMPLEX array, dimension (N-1) * On entry, DL must contain the (n-1) subdiagonal elements of * A. * On exit, DL is overwritten by the (n-1) multipliers that * define the matrix L from the LU factorization of A. * * D (input/output) COMPLEX array, dimension (N) * On entry, D must contain the diagonal elements of A. * On exit, D is overwritten by the n diagonal elements of the * upper triangular matrix U from the LU factorization of A. * * DU (input/output) COMPLEX array, dimension (N-1) * On entry, DU must contain the (n-1) superdiagonal elements * of A. * On exit, DU is overwritten by the (n-1) elements of the first * superdiagonal of U. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Local Scalars .. INTEGER I COMPLEX FACT * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Parameters .. COMPLEX CZERO PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Executable Statements .. * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 CALL XERBLA( 'CDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * DO 20 I = 1, N - 1 IF( DL( I ).EQ.CZERO ) THEN * * Subdiagonal is zero, no elimination is required. * IF( D( I ).EQ.CZERO .AND. INFO.EQ.0 ) $ INFO = I ELSE * FACT = DL( I ) / D( I ) DL( I ) = FACT D( I+1 ) = D( I+1 ) - FACT*DU( I ) END IF 20 CONTINUE IF( D( N ).EQ.CZERO .AND. INFO.EQ.0 ) THEN INFO = N RETURN END IF * RETURN * * End of CDTTRF * END scalapack-1.8.0/SRC/pcpttrs.f0000640000175000017500000006722310363532303015623 0ustar muammarmuammar SUBROUTINE PCPTTRS( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX AF( * ), B( * ), E( * ), WORK( * ) REAL D( * ) * .. * * * Purpose * ======= * * PCPTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PCPTTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal symmetric positive definite distributed * matrix. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'D *U or L*D L' as computed by PCPTTRF. * * Routine PCPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * IMPORTANT NOTE: The current version of this code supports * only IB=JA * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPTTRF and this is stored in AF. If a linear system * is to be solved using PCPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, LLDA, LLDB, $ MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, $ PCPTTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 9*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 9*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 9*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -13 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 9*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -5 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCPTTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPTTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -13 CALL PXERBLA( ICTXT, $ 'PCPTTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB(5) PARAM_CHECK( 14, 1 ) = DESCB(4) PARAM_CHECK( 13, 1 ) = DESCB(3) PARAM_CHECK( 12, 1 ) = DESCB(2) PARAM_CHECK( 11, 1 ) = DESCB(1) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 15, 2 ) = 905 PARAM_CHECK( 14, 2 ) = 904 PARAM_CHECK( 13, 2 ) = 903 PARAM_CHECK( 12, 2 ) = 902 PARAM_CHECK( 11, 2 ) = 901 PARAM_CHECK( 10, 2 ) = 8 PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 13 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PCPTTRSV( 'L', 'N', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ELSE * CALL PCPTTRSV( 'U', 'C', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ENDIF * * Divide by the main diagonal: B <- D^{-1} B * * The main partition is first * DO 10 I=PART_OFFSET+1, PART_OFFSET+ODD_SIZE CALL CSCAL( NRHS, CMPLX( CONE/D( I ) ), B( I ), LLDB ) 10 CONTINUE * * Reduced system is next * IF( MYCOL .LT. NPCOL-1 ) THEN I=PART_OFFSET+ODD_SIZE+1 CALL CSCAL( NRHS, CONE/AF( ODD_SIZE+2 ), B( I ), LLDB ) ENDIF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PCPTTRSV( 'L', 'C', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ELSE * CALL PCPTTRSV( 'U', 'N', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCPTTRS * END scalapack-1.8.0/SRC/dstein2.f0000640000175000017500000002650510363532303015472 0ustar muammarmuammar* * SUBROUTINE DSTEIN2( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, LDZ, $ WORK, IWORK, IFAIL, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, LDZ, M, N DOUBLE PRECISION ORFAC * .. * .. Array Arguments .. INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), $ IWORK( * ) DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * DSTEIN2 computes the eigenvectors of a real symmetric tridiagonal * matrix T corresponding to specified eigenvalues, using inverse * iteration. * * The maximum number of iterations allowed for each eigenvector is * specified by an internal parameter MAXITS (currently set to 5). * * Arguments * ========= * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input) DOUBLE PRECISION array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (input) DOUBLE PRECISION array, dimension (N) * The (n-1) subdiagonal elements of the tridiagonal matrix * T, in elements 1 to N-1. E(N) need not be set. * * M (input) INTEGER * The number of eigenvectors to be found. 0 <= M <= N. * * W (input) DOUBLE PRECISION array, dimension (N) * The first M elements of W contain the eigenvalues for * which eigenvectors are to be computed. The eigenvalues * should be grouped by split-off block and ordered from * smallest to largest within the block. ( The output array * W from DSTEBZ with ORDER = 'B' is expected here. ) * * IBLOCK (input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to * the first submatrix from the top, =2 if W(i) belongs to * the second submatrix, etc. ( The output array IBLOCK * from DSTEBZ is expected here. ) * * ISPLIT (input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to * ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 * through ISPLIT( 2 ), etc. * ( The output array ISPLIT from DSTEBZ is expected here. ) * * ORFAC (input) DOUBLE PRECISION * ORFAC specifies which eigenvectors should be * orthogonalized. Eigenvectors that correspond to eigenvalues * which are within ORFAC*||T|| of each other are to be * orthogonalized. * * Z (output) DOUBLE PRECISION array, dimension (LDZ, M) * The computed eigenvectors. The eigenvector associated * with the eigenvalue W(i) is stored in the i-th column of * Z. Any vector which fails to converge is set to its current * iterate after MAXITS iterations. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= max(1,N). * * WORK (workspace) DOUBLE PRECISION array, dimension (5*N) * * IWORK (workspace) INTEGER array, dimension (N) * * IFAIL (output) INTEGER array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after * MAXITS iterations, then their indices are stored in * array IFAIL. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, then i eigenvectors failed to converge * in MAXITS iterations. Their indices are stored in * array IFAIL. * * Internal Parameters * =================== * * MAXITS INTEGER, default = 5 * The maximum number of iterations performed. * * EXTRA INTEGER, default = 2 * The number of iterations performed after norm growth * criterion is satisfied, should be at least 1. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, TEN, ODM1 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1, $ ODM1 = 1.0D-1 ) INTEGER MAXITS, EXTRA PARAMETER ( MAXITS = 5, EXTRA = 2 ) * .. * .. Local Scalars .. INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, $ JBLK, JMAX, NBLK, NRMCHK DOUBLE PRECISION EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, SCL, $ SEP, STPCRT, TOL, XJ, XJM, ZTR * .. * .. Local Arrays .. INTEGER ISEED( 4 ) * .. * .. External Functions .. INTEGER IDAMAX DOUBLE PRECISION DASUM, DDOT, DLAMCH, DNRM2 EXTERNAL IDAMAX, DASUM, DDOT, DLAMCH, DNRM2 * .. * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 DO 10 I = 1, M IFAIL( I ) = 0 10 CONTINUE * IF( N.LT.0 ) THEN INFO = -1 ELSE IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( ORFAC.LT.ZERO ) THEN INFO = -8 ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN INFO = -10 ELSE DO 20 J = 2, M IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN INFO = -6 GO TO 30 END IF IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) $ THEN INFO = -5 GO TO 30 END IF 20 CONTINUE 30 CONTINUE END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'DSTEIN2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) THEN RETURN ELSE IF( N.EQ.1 ) THEN Z( 1, 1 ) = ONE RETURN END IF * * Get machine constants. * EPS = DLAMCH( 'Precision' ) * * Initialize seed for random number generator DLARNV. * DO 40 I = 1, 4 ISEED( I ) = 1 40 CONTINUE * * Initialize pointers. * INDRV1 = 0 INDRV2 = INDRV1 + N INDRV3 = INDRV2 + N INDRV4 = INDRV3 + N INDRV5 = INDRV4 + N * * Compute eigenvectors of matrix blocks. * J1 = 1 DO 160 NBLK = 1, IBLOCK( M ) * * Find starting and ending indices of block nblk. * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) BLKSIZ = BN - B1 + 1 IF( BLKSIZ.EQ.1 ) $ GO TO 60 GPIND = J1 * * Compute reorthogonalization criterion and stopping criterion. * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 50 I = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ $ ABS( E( I ) ) ) 50 CONTINUE ORTOL = ORFAC*ONENRM * STPCRT = SQRT( ODM1 / BLKSIZ ) * * Loop through eigenvalues of block nblk. * 60 CONTINUE JBLK = 0 DO 150 J = J1, M IF( IBLOCK( J ).NE.NBLK ) THEN J1 = J GO TO 160 END IF JBLK = JBLK + 1 XJ = W( J ) * * Skip all the work if the block size is one. * IF( BLKSIZ.EQ.1 ) THEN WORK( INDRV1+1 ) = ONE GO TO 120 END IF * * If eigenvalues j and j-1 are too close, add a relatively * small perturbation. * IF( JBLK.GT.1 ) THEN EPS1 = ABS( EPS*XJ ) PERTOL = TEN*EPS1 SEP = XJ - XJM IF( SEP.LT.PERTOL ) $ XJ = XJM + PERTOL END IF * ITS = 0 NRMCHK = 0 * * Get random starting vector. * CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) * * Copy the matrix T so it won't be destroyed in factorization. * CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) * * Compute LU factors with partial pivoting ( PT = LU ) * TOL = ZERO CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, $ IINFO ) * * Update iteration count. * 70 CONTINUE ITS = ITS + 1 IF( ITS.GT.MAXITS ) $ GO TO 100 * * Normalize and scale the righthand side vector Pb. * SCL = BLKSIZ*ONENRM*MAX( EPS, $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / $ DASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) * * Solve the system LU = Pb. * CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, $ WORK( INDRV1+1 ), TOL, IINFO ) * * Reorthogonalize by modified Gram-Schmidt if eigenvalues are * close enough. * IF( JBLK.EQ.1 ) $ GO TO 90 IF( ABS( XJ-XJM ).GT.ORTOL ) $ GPIND = J * IF( GPIND.NE.J ) THEN DO 80 I = GPIND, J - 1 ZTR = -DDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), $ 1 ) CALL DAXPY( BLKSIZ, ZTR, Z( B1, I ), 1, $ WORK( INDRV1+1 ), 1 ) 80 CONTINUE END IF * * Check the infinity norm of the iterate. * 90 CONTINUE JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) NRM = ABS( WORK( INDRV1+JMAX ) ) * * Continue for additional iterations after norm reaches * stopping criterion. * IF( NRM.LT.STPCRT ) $ GO TO 70 NRMCHK = NRMCHK + 1 IF( NRMCHK.LT.EXTRA+1 ) $ GO TO 70 * GO TO 110 * * If stopping criterion was not satisfied, update info and * store eigenvector number in array ifail. * 100 CONTINUE INFO = INFO + 1 IFAIL( INFO ) = J * * Accept iterate as jth eigenvector. * 110 CONTINUE SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) IF( WORK( INDRV1+JMAX ).LT.ZERO ) $ SCL = -SCL CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) 120 CONTINUE DO 130 I = 1, N Z( I, J ) = ZERO 130 CONTINUE DO 140 I = 1, BLKSIZ Z( B1+I-1, J ) = WORK( INDRV1+I ) 140 CONTINUE * * Save the shift to check eigenvalue spacing at next * iteration. * XJM = XJ * 150 CONTINUE 160 CONTINUE * RETURN * * End of DSTEIN2 * END scalapack-1.8.0/SRC/pdlabrd.f0000640000175000017500000005300110363532303015521 0ustar muammarmuammar SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAUP( * ), $ TAUQ( * ), X( * ), Y( * ), WORK( * ) * .. * * Purpose * ======= * * PDLABRD reduces the first NB rows and columns of a real general * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper * or lower bidiagonal form by an orthogonal transformation Q' * A * P, * and returns the matrices X and Y which are needed to apply the * transformation to the unreduced part of sub( A ). * * If M >= N, sub( A ) is reduced to upper bidiagonal form; if M < N, to * lower bidiagonal form. * * This is an auxiliary routine called by PDGEBRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of leading rows and columns of sub( A ) to be * reduced. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ) to be reduced. On exit, * the first NB rows and columns of the matrix are overwritten; * the rest of the distributed matrix sub( A ) is unchanged. * If m >= n, elements on and below the diagonal in the first NB * columns, with the array TAUQ, represent the orthogonal * matrix Q as a product of elementary reflectors; and * elements above the diagonal in the first NB rows, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. * If m < n, elements below the diagonal in the first NB * columns, with the array TAUQ, represent the orthogonal * matrix Q as a product of elementary reflectors, and * elements on and above the diagonal in the first NB rows, * with the array TAUP, represent the orthogonal matrix P as * a product of elementary reflectors. * See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(ia+i-1,ja+i-1). D is tied to the distributed * matrix A. * * E (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(ia+i-1,ja+i) for i = 1,2,...,n-1; * if m < n, E(i) = A(ia+i,ja+i-1) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) DOUBLE PRECISION array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) DOUBLE PRECISION array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * X (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_X,NB). On exit, the local * pieces of the distributed M-by-NB matrix * X(IX:IX+M-1,JX:JX+NB-1) required to update the unreduced * part of sub( A ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * Y (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_Y,NB). On exit, the local * pieces of the distributed N-by-NB matrix * Y(IY:IY+N-1,JY:JY+NB-1) required to update the unreduced * part of sub( A ). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= NB_A + NQ, with * * NQ = NUMROC( N+MOD( IA-1, NB_Y ), NB_Y, MYCOL, IACOL, NPCOL ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors. * * If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); u(1:i-1) = 0, u(i) = 1, and u(i:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The elements of the vectors v and u together form the m-by-nb matrix * V and the nb-by-n matrix U' which are needed, with X and Y, to apply * the transformation to the unreduced part of the matrix, using a block * update of the form: sub( A ) := sub( A ) - V*Y' - X*U'. * * The contents of sub( A ) on exit are illustrated by the following * examples with nb = 2: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) * ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) * ( v1 v2 a a a ) ( v1 1 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix which is unchanged, * vi denotes an element of the vector defining H(i), and ui an element * of the vector defining G(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, IPY, IW, J, JJ, $ JWY, K, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION ALPHA, TAU INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), $ DESCTP( DLEN_ ), DESCTQ( DLEN_ ), $ DESCW( DLEN_ ), DESCWY( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PDCOPY, $ PDELGET, PDELSET, PDGEMV, PDLARFG, $ PDSCAL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 IW = MOD( IA-1, DESCA( NB_ ) ) + 1 ALPHA = ZERO * CALL DESCSET( DESCWY, 1, N+MOD( IA-1, DESCY( NB_ ) ), 1, $ DESCA( NB_ ), IAROW, IACOL, ICTXT, 1 ) CALL DESCSET( DESCW, DESCA( MB_ ), 1, DESCA( MB_ ), 1, IAROW, $ IACOL, ICTXT, DESCA( MB_ ) ) CALL DESCSET( DESCTQ, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), IAROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCTP, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), IACOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i:ia+m-1,j) * IF( K.GT.1 ) THEN CALL PDGEMV( 'No transpose', M-K+1, K-1, -ONE, A, I, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I, $ J, DESCA, 1 ) CALL PDGEMV( 'No transpose', M-K+1, K-1, -ONE, X, IX+K-1, $ JX, DESCX, A, IA, J, DESCA, 1, ONE, A, I, J, $ DESCA, 1 ) CALL PDELSET( A, I-1, J, DESCA, ALPHA ) END IF * * Generate reflection Q(i) to annihilate A(i+1:ia+m-1,j) * CALL PDLARFG( M-K+1, ALPHA, I, J, A, I+1, J, DESCA, 1, $ TAUQ ) CALL PDELSET( D, 1, J, DESCD, ALPHA ) CALL PDELSET( A, I, J, DESCA, ONE ) * * Compute Y(IA+I:IA+N-1,J) * CALL PDGEMV( 'Transpose', M-K+1, N-K, ONE, A, I, J+1, DESCA, $ A, I, J, DESCA, 1, ZERO, WORK( IPY ), 1, JWY, $ DESCWY, DESCWY( M_ ) ) CALL PDGEMV( 'Transpose', M-K+1, K-1, ONE, A, I, JA, DESCA, $ A, I, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PDGEMV( 'Transpose', K-1, N-K, -ONE, Y, IY, JY+K, $ DESCY, WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), $ 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PDGEMV( 'Transpose', M-K+1, K-1, ONE, X, IX+K-1, JX, $ DESCX, A, I, J, DESCA, 1, ZERO, WORK, IW, 1, $ DESCW, 1 ) CALL PDGEMV( 'Transpose', K-1, N-K, -ONE, A, IA, J+1, DESCA, $ WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) * CALL PDELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PDSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PDCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) * * Update A(i,j+1:ja+n-1) * CALL PDGEMV( 'Transpose', K, N-K, -ONE, Y, IY, JY+K, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, I, J+1, $ DESCA, DESCA( M_ ) ) CALL PDGEMV( 'Transpose', K-1, N-K, -ONE, A, IA, J+1, DESCA, $ X, IX+K-1, JX, DESCX, DESCX( M_ ), ONE, A, I, $ J+1, DESCA, DESCA( M_ ) ) CALL PDELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection P(i) to annihilate A(i,j+2:ja+n-1) * CALL PDLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, N+JA-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PDELSET( E, I, 1, DESCE, ALPHA ) CALL PDELSET( A, I, J+1, DESCA, ONE ) * * Compute X(I+1:IA+M-1,J) * CALL PDGEMV( 'No transpose', M-K, N-K, ONE, A, I+1, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PDGEMV( 'No transpose', K, N-K, ONE, Y, IY, JY+K, $ DESCY, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PDGEMV( 'No transpose', M-K, K, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PDGEMV( 'No transpose', K-1, N-K, ONE, A, IA, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PDGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PDELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PDSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i,j:ja+n-1) * IF( K.GT.1 ) THEN CALL PDGEMV( 'Transpose', K-1, N-K+1, -ONE, Y, IY, $ JY+K-1, DESCY, A, I, JA, DESCA, DESCA( M_ ), $ ONE, A, I, J, DESCA, DESCA( M_ ) ) CALL PDGEMV( 'Transpose', K-1, N-K+1, -ONE, A, IA, J, $ DESCA, X, IX+K-1, JX, DESCX, DESCX( M_ ), $ ONE, A, I, J, DESCA, DESCA( M_ ) ) CALL PDELSET( A, I, J-1, DESCA, ALPHA ) END IF * * Generate reflection P(i) to annihilate A(i,j+1:ja+n-1) * CALL PDLARFG( N-K+1, ALPHA, I, J, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP ) CALL PDELSET( D, I, 1, DESCD, ALPHA ) CALL PDELSET( A, I, J, DESCA, ONE ) * * Compute X(i+1:ia+m-1,j) * CALL PDGEMV( 'No transpose', M-K, N-K+1, ONE, A, I+1, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PDGEMV( 'No transpose', K-1, N-K+1, ONE, Y, IY, JY+K-1, $ DESCY, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PDGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PDGEMV( 'No transpose', K-1, N-K+1, ONE, A, IA, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PDGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PDELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PDSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) * * Update A(i+1:ia+m-1,j) * CALL PDGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PDGEMV( 'No transpose', M-K, K, -ONE, X, IX+K, JX, $ DESCX, A, IA, J, DESCA, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PDELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection Q(i) to annihilate A(i+2:ia+m-1,j) * CALL PDLARFG( M-K, ALPHA, I+1, J, A, MIN( I+2, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PDELSET( E, 1, J, DESCE, ALPHA ) CALL PDELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(ia+i:ia+n-1,j) * CALL PDGEMV( 'Transpose', M-K, N-K, ONE, A, I+1, J+1, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) CALL PDGEMV( 'Transpose', M-K, K-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PDGEMV( 'Transpose', K-1, N-K, -ONE, Y, IY, JY+K, $ DESCY, WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), $ 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PDGEMV( 'Transpose', M-K, K, ONE, X, IX+K, JX, DESCX, $ A, I+1, J, DESCA, 1, ZERO, WORK, IW, 1, DESCW, $ 1 ) CALL PDGEMV( 'Transpose', K, N-K, -ONE, A, IA, J+1, DESCA, $ WORK, IW, 1, DESCW, 1, ONE, WORK( IPY ), 1, $ JWY, DESCWY, DESCWY( M_ ) ) * CALL PDELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PDSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PDCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) 20 CONTINUE END IF * RETURN * * End of PDLABRD * END scalapack-1.8.0/SRC/pssyevd.f0000640000175000017500000003123610363532303015614 0ustar muammarmuammar SUBROUTINE PSSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) REAL A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSYEVD computes all the eigenvalues and eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PSSYEVD assumes a homogeneous system and makes * no checks for consistency of the eigenvalues or eigenvectors across * the different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only; (NOT IMPLEMENTED YET) * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/workspace) block cyclic REAL array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * W (global output) REAL array, dimension (N) * If INFO=0, the eigenvalues in ascending order. * * Z (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * Z contains the orthonormal eigenvectors * of the symmetric matrix A. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) REAL array, * dimension (LWORK) * On output, WORK(1) returns the workspace required. * * LWORK (local input) INTEGER * LWORK >= MAX( 1+6*N+2*NP*NQ, TRILWMIN ) + 2*N * TRILWMIN = 3*N + MAX( NB*( NP+1 ), 3*NB ) * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * Alignment requirements * ====================== * * The distributed submatrices sub( A ), sub( Z ) must verify * some alignment properties, namely the following expression * should be true: * ( MB_A.EQ.NB_A.EQ.MB_Z.EQ.NB_Z .AND. IROFFA.EQ.ICOFFA .AND. * IROFFA.EQ.0 .AND.IROFFA.EQ.IROFFZ. AND. IAROW.EQ.IZROW) * with IROFFA = MOD( IA-1, MB_A ) * and ICOFFA = MOD( JA-1, NB_A ). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICOFFZ, ICTXT, IINFO, $ INDD, INDE, INDE2, INDTAU, INDWORK, INDWORK2, $ IROFFA, IROFFZ, ISCALE, LIWMIN, LLWORK, $ LLWORK2, LWMIN, MYCOL, MYROW, NB, NP, NPCOL, $ NPROW, NQ, OFFSET, TRILWMIN REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PSLAMCH, PSLANSY EXTERNAL LSAME, INDXG2P, NUMROC, PSLAMCH, PSLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLARED1D, $ PSLASCL, PSLASET, PSORMTR, PSSTEDC, PSSYTRD, $ PXERBLA, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFZ = MOD( IZ-1, DESCZ( MB_ ) ) ICOFFZ = MOD( JZ-1, DESCZ( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) TRILWMIN = 3*N + MAX( NB*( NP+1 ), 3*NB ) LWMIN = MAX( 1+6*N+2*NP*NQ, TRILWMIN ) + 2*N LIWMIN = 7*N + 8*NPCOL + 2 WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN IF( .NOT.LSAME( JOBZ, 'V' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( IROFFA.NE.IROFFZ .OR. ICOFFA.NE.ICOFFZ ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 1200+CSRC_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYEVD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDE2 = INDD + N INDWORK = INDE2 + N LLWORK = LWORK - INDWORK + 1 INDWORK2 = INDD LLWORK2 = LWORK - INDWORK2 + 1 * * Scale matrix to allowable range, if necessary. * ISCALE = 0 SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) ANRM = PSLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * * CALL PSSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE2 ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDD ), W, $ WORK( INDWORK ), LLWORK ) * CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDE2 ), WORK( INDE ), $ WORK( INDWORK ), LLWORK ) * CALL PSLASET( 'Full', N, N, ZERO, ONE, Z, 1, 1, DESCZ ) * IF( UPPER ) THEN OFFSET = 1 ELSE OFFSET = 0 END IF CALL PSSTEDC( 'I', N, W, WORK( INDE+OFFSET ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK2 ), LLWORK2, IWORK, LIWORK, INFO ) * CALL PSORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, WORK( INDWORK2 ), $ LLWORK2, IINFO ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL SSCAL( N, ONE / SIGMA, W, 1 ) END IF * RETURN * * End of PSSYEVD * END scalapack-1.8.0/SRC/pctrti2.f0000640000175000017500000002365610363532303015515 0ustar muammarmuammar SUBROUTINE PCTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCTRTI2 computes the inverse of a complex upper or lower triangular * block matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). This matrix should be * contained in one and only one process memory space (local operation). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular * = 'U': sub( A ) is unit triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)), * this array contains the local pieces of the triangular matrix * sub( A ). If UPLO = 'U', the leading N-by-N upper triangular * part of the matrix sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of the matrix sub( A ) contains the lower * triangular matrix, and the strictly upper triangular part * of sub( A ) is not referenced. If DIAG = 'U', the diagonal * elements of sub( A ) are also not referenced and are assumed * to be 1. On exit, the (triangular) inverse of the original * matrix, in the same storage format. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER IACOL, IAROW, ICTXT, ICURR, IDIAG, IIA, IOFFA, $ JJA, LDA, MYCOL, MYROW, NA, NPCOL, NPROW COMPLEX AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, CSCAL, $ CTRMV, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Compute local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) * IF( UPPER ) THEN * IOFFA = IIA + ( JJA - 1 ) * LDA ICURR = IOFFA + LDA * IF( NOUNIT ) THEN * * Compute inverse of upper non-unit triangular matrix. * A( IOFFA ) = ONE / A( IOFFA ) IDIAG = ICURR + 1 DO 10 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements 1:j-1 of j-th column. * CALL CTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL CSCAL( NA, AJJ, A( ICURR ), 1 ) IDIAG = IDIAG + LDA + 1 ICURR = ICURR + LDA 10 CONTINUE * ELSE * * Compute inverse of upper unit triangular matrix. * DO 20 NA = 1, N-1 * * Compute elements 1:j-1 of j-th column. * CALL CTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL CSCAL( NA, -ONE, A( ICURR ), 1 ) ICURR = ICURR + LDA 20 CONTINUE * END IF * ELSE * ICURR = IIA + N - 1 + ( JJA + N - 2 ) * LDA IOFFA = ICURR - LDA * IF( NOUNIT ) THEN * * Compute inverse of lower non-unit triangular matrix. * A( ICURR ) = ONE / A( ICURR ) IDIAG = IOFFA - 1 DO 30 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements j+1:n of j-th column. * CALL CTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL CSCAL( NA, AJJ, A( IOFFA ), 1 ) ICURR = IDIAG IDIAG = IDIAG - LDA - 1 IOFFA = IDIAG + 1 30 CONTINUE * ELSE * * Compute inverse of lower unit triangular matrix. * DO 40 NA = 1, N-1 * * Compute elements j+1:n of j-th column. * CALL CTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL CSCAL( NA, -ONE, A( IOFFA ), 1 ) ICURR = ICURR - LDA - 1 IOFFA = ICURR - LDA 40 CONTINUE * END IF * END IF * END IF * * End of PCTRTI2 * END scalapack-1.8.0/SRC/pctrevc.f0000640000175000017500000005261210602576752015602 0ustar muammarmuammar SUBROUTINE PCTREVC( SIDE, HOWMNY, SELECT, N, T, DESCT, VL, DESCVL, $ VR, DESCVR, MM, M, WORK, RWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. CHARACTER HOWMNY, SIDE INTEGER INFO, M, MM, N * .. * .. Array Arguments .. LOGICAL SELECT( * ) INTEGER DESCT( * ), DESCVL( * ), DESCVR( * ) REAL RWORK( * ) COMPLEX T( * ), VL( * ), VR( * ), WORK( * ) * .. * * Purpose * ======= * * PCTREVC computes some or all of the right and/or left eigenvectors of * a complex upper triangular matrix T in parallel. * * The right eigenvector x and the left eigenvector y of T corresponding * to an eigenvalue w are defined by: * * T*x = w*x, y'*T = w*y' * * where y' denotes the conjugate transpose of the vector y. * * If all eigenvectors are requested, the routine may either return the * matrices X and/or Y of right or left eigenvectors of T, or the * products Q*X and/or Q*Y, where Q is an input unitary * matrix. If T was obtained from the Schur factorization of an * original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of * right or left eigenvectors of A. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER*1 * = 'R': compute right eigenvectors only; * = 'L': compute left eigenvectors only; * = 'B': compute both right and left eigenvectors. * * HOWMNY (global input) CHARACTER*1 * = 'A': compute all right and/or left eigenvectors; * = 'B': compute all right and/or left eigenvectors, * and backtransform them using the input matrices * supplied in VR and/or VL; * = 'S': compute selected right and/or left eigenvectors, * specified by the logical array SELECT. * * SELECT (global input) LOGICAL array, dimension (N) * If HOWMNY = 'S', SELECT specifies the eigenvectors to be * computed. * If HOWMNY = 'A' or 'B', SELECT is not referenced. * To select the eigenvector corresponding to the j-th * eigenvalue, SELECT(j) must be set to .TRUE.. * * N (global input) INTEGER * The order of the matrix T. N >= 0. * * T (global input/output) COMPLEX array, dimension * (DESCT(LLD_),*) * The upper triangular matrix T. T is modified, but restored * on exit. * * DESCT (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix T. * * VL (global input/output) COMPLEX array, dimension * (DESCVL(LLD_),MM) * On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must * contain an N-by-N matrix Q (usually the unitary matrix Q of * Schur vectors returned by CHSEQR). * On exit, if SIDE = 'L' or 'B', VL contains: * if HOWMNY = 'A', the matrix Y of left eigenvectors of T; * if HOWMNY = 'B', the matrix Q*Y; * if HOWMNY = 'S', the left eigenvectors of T specified by * SELECT, stored consecutively in the columns * of VL, in the same order as their * eigenvalues. * If SIDE = 'R', VL is not referenced. * * DESCVL (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix VL. * * VR (global input/output) COMPLEX array, dimension * (DESCVR(LLD_),MM) * On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must * contain an N-by-N matrix Q (usually the unitary matrix Q of * Schur vectors returned by CHSEQR). * On exit, if SIDE = 'R' or 'B', VR contains: * if HOWMNY = 'A', the matrix X of right eigenvectors of T; * if HOWMNY = 'B', the matrix Q*X; * if HOWMNY = 'S', the right eigenvectors of T specified by * SELECT, stored consecutively in the columns * of VR, in the same order as their * eigenvalues. * If SIDE = 'L', VR is not referenced. * * DESCVR (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix VR. * * MM (global input) INTEGER * The number of columns in the arrays VL and/or VR. MM >= M. * * M (global output) INTEGER * The number of columns in the arrays VL and/or VR actually * used to store the eigenvectors. If HOWMNY = 'A' or 'B', M * is set to N. Each selected eigenvector occupies one * column. * * WORK (local workspace) COMPLEX array, * dimension ( 2*DESCT(LLD_) ) * Additional workspace may be required if PCLATTRS is updated * to use WORK. * * RWORK (local workspace) REAL array, * dimension ( DESCT(LLD_) ) * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Further Details * =============== * * The algorithm used in this program is basically backward (forward) * substitution. It is the hope that scaling would be used to make the * the code robust against possible overflow. But scaling has not yet * been implemented in PCLATTRS which is called by this routine to solve * the triangular systems. PCLATTRS just calls PCTRSV. * * Each eigenvector is normalized so that the element of largest * magnitude has magnitude 1; here the magnitude of a complex number * (x,y) is taken to be |x| + |y|. * * Further Details * =============== * * Implemented by Mark R. Fahey, June, 2000 * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV INTEGER CONTXT, CSRC, I, ICOL, II, IROW, IS, ITMP1, $ ITMP2, J, K, KI, LDT, LDVL, LDVR, LDW, MB, $ MYCOL, MYROW, NB, NPCOL, NPROW, RSRC REAL SELF REAL OVFL, REMAXD, SCALE, SMIN, SMLNUM, ULP, UNFL COMPLEX CDUM, REMAXC, SHIFT * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. LOGICAL LSAME REAL PSLAMCH EXTERNAL LSAME, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCINIT, SGSUM2D, IGAMN2D, $ INFOG2L, PSLABAD, PSCASUM, PXERBLA, PCAMAX, $ PCCOPY, PCSSCAL, PCGEMV, PCLASET, PCLATTRS, $ CGSUM2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CMPLX, CONJG, AIMAG, MAX * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CONTXT = DESCT( CTXT_ ) RSRC = DESCT( RSRC_ ) CSRC = DESCT( CSRC_ ) MB = DESCT( MB_ ) NB = DESCT( NB_ ) LDT = DESCT( LLD_ ) LDW = LDT LDVR = DESCVR( LLD_ ) LDVL = DESCVL( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Decode and test the input parameters * BOTHV = LSAME( SIDE, 'B' ) RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV * ALLV = LSAME( HOWMNY, 'A' ) OVER = LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'O' ) SOMEV = LSAME( HOWMNY, 'S' ) * * Set M to the number of columns required to store the selected * eigenvectors. * IF( SOMEV ) THEN M = 0 DO 10 J = 1, N IF( SELECT( J ) ) $ M = M + 1 10 CONTINUE ELSE M = N END IF * INFO = 0 IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN INFO = -1 ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( MM.LT.M ) THEN INFO = -11 END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PCTREVC', -INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * Set the constants to control overflow. * UNFL = PSLAMCH( CONTXT, 'Safe minimum' ) OVFL = ONE / UNFL CALL PSLABAD( CONTXT, UNFL, OVFL ) ULP = PSLAMCH( CONTXT, 'Precision' ) SMLNUM = UNFL*( N / ULP ) * * Store the diagonal elements of T in working array WORK( LDW+1 ). * DO 20 I = 1, N CALL INFOG2L( I, I, DESCT, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( LDW+IROW ) = T( ( ICOL-1 )*LDT+IROW ) END IF 20 CONTINUE * * Compute 1-norm of each column of strictly upper triangular * part of T to control overflow in triangular solver. Computed, * but not used. For use in PCLATTRS. * RWORK( 1 ) = ZERO DO 30 J = 2, N CALL PSCASUM( J-1, RWORK( J ), T, 1, J, DESCT, 1 ) 30 CONTINUE * I replicate the norms in RWORK. Should they be distributed * over the process rows? CALL SGSUM2D( CONTXT, 'Row', ' ', N, 1, RWORK, N, -1, -1 ) * IF( RIGHTV ) THEN * * Compute right eigenvectors. * * Need to set the distribution pattern of WORK * CALL DESCINIT( DESCW, N, 1, NB, 1, RSRC, CSRC, CONTXT, LDW, $ INFO ) * IS = M DO 70 KI = N, 1, -1 * IF( SOMEV ) THEN IF( .NOT.SELECT( KI ) ) $ GO TO 70 END IF * SMIN = ZERO SHIFT = CZERO CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN SHIFT = T( ( ICOL-1 )*LDT+IROW ) SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM ) END IF CALL SGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 ) CALL CGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 ) * CALL INFOG2L( 1, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( 1 ) = CONE END IF * * Form right-hand side. Distribute rhs onto first column * of processor grid. * IF( KI.GT.1 ) THEN CALL PCCOPY( KI-1, T, 1, KI, DESCT, 1, WORK, 1, 1, DESCW, $ 1 ) END IF DO 40 K = 1, KI - 1 CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = -WORK( IROW ) END IF 40 CONTINUE * * Solve the triangular system: * (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. * DO 50 K = 1, KI - 1 CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = T( ( ICOL-1 )*LDT+IROW ) - $ SHIFT IF( CABS1( T( ( ICOL-1 )*LDT+IROW ) ).LT.SMIN ) THEN T( ( ICOL-1 )*LDT+IROW ) = CMPLX( SMIN ) END IF END IF 50 CONTINUE * IF( KI.GT.1 ) THEN CALL PCLATTRS( 'Upper', 'No transpose', 'Non-unit', 'Y', $ KI-1, T, 1, 1, DESCT, WORK, 1, 1, DESCW, $ SCALE, RWORK, INFO ) CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = CMPLX( SCALE ) END IF END IF * * Copy the vector x or Q*x to VR and normalize. * IF( .NOT.OVER ) THEN CALL PCCOPY( KI, WORK, 1, 1, DESCW, 1, VR, 1, IS, DESCVR, $ 1 ) * CALL PCAMAX( KI, REMAXC, II, VR, 1, IS, DESCVR, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PCSSCAL( KI, REMAXD, VR, 1, IS, DESCVR, 1 ) * CALL PCLASET( ' ', N-KI, 1, CZERO, CZERO, VR, KI+1, IS, $ DESCVR ) ELSE IF( KI.GT.1 ) $ CALL PCGEMV( 'N', N, KI-1, CONE, VR, 1, 1, DESCVR, $ WORK, 1, 1, DESCW, 1, CMPLX( SCALE ), $ VR, 1, KI, DESCVR, 1 ) * CALL PCAMAX( N, REMAXC, II, VR, 1, KI, DESCVR, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PCSSCAL( N, REMAXD, VR, 1, KI, DESCVR, 1 ) END IF * * Set back the original diagonal elements of T. * DO 60 K = 1, KI - 1 CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = WORK( LDW+IROW ) END IF 60 CONTINUE * IS = IS - 1 70 CONTINUE END IF * IF( LEFTV ) THEN * * Compute left eigenvectors. * * Need to set the distribution pattern of WORK * CALL DESCINIT( DESCW, N, 1, MB, 1, RSRC, CSRC, CONTXT, LDW, $ INFO ) * IS = 1 DO 110 KI = 1, N * IF( SOMEV ) THEN IF( .NOT.SELECT( KI ) ) $ GO TO 110 END IF * SMIN = ZERO SHIFT = CZERO CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN SHIFT = T( ( ICOL-1 )*LDT+IROW ) SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM ) END IF CALL SGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 ) CALL CGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 ) * CALL INFOG2L( N, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN WORK( IROW ) = CONE END IF * * Form right-hand side. * IF( KI.LT.N ) THEN CALL PCCOPY( N-KI, T, KI, KI+1, DESCT, N, WORK, KI+1, 1, $ DESCW, 1 ) END IF DO 80 K = KI + 1, N CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = -CONJG( WORK( IROW ) ) END IF 80 CONTINUE * * Solve the triangular system: * (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. * DO 90 K = KI + 1, N CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = T( ( ICOL-1 )*LDT+IROW ) - $ SHIFT IF( CABS1( T( ( ICOL-1 )*LDT+IROW ) ).LT.SMIN ) $ T( ( ICOL-1 )*LDT+IROW ) = CMPLX( SMIN ) END IF 90 CONTINUE * IF( KI.LT.N ) THEN CALL PCLATTRS( 'Upper', 'Conjugate transpose', 'Nonunit', $ 'Y', N-KI, T, KI+1, KI+1, DESCT, WORK, $ KI+1, 1, DESCW, SCALE, RWORK, INFO ) CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN WORK( IROW ) = CMPLX( SCALE ) END IF END IF * * Copy the vector x or Q*x to VL and normalize. * IF( .NOT.OVER ) THEN CALL PCCOPY( N-KI+1, WORK, KI, 1, DESCW, 1, VL, KI, IS, $ DESCVL, 1 ) * CALL PCAMAX( N-KI+1, REMAXC, II, VL, KI, IS, DESCVL, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PCSSCAL( N-KI+1, REMAXD, VL, KI, IS, DESCVL, 1 ) * CALL PCLASET( ' ', KI-1, 1, CZERO, CZERO, VL, 1, IS, $ DESCVL ) ELSE IF( KI.LT.N ) $ CALL PCGEMV( 'N', N, N-KI, CONE, VL, 1, KI+1, DESCVL, $ WORK, KI+1, 1, DESCW, 1, CMPLX( SCALE ), $ VL, 1, KI, DESCVL, 1 ) * CALL PCAMAX( N, REMAXC, II, VL, 1, KI, DESCVL, 1 ) REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL ) CALL PCSSCAL( N, REMAXD, VL, 1, KI, DESCVL, 1 ) END IF * * Set back the original diagonal elements of T. * DO 100 K = KI + 1, N CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN T( ( ICOL-1 )*LDT+IROW ) = WORK( LDW+IROW ) END IF 100 CONTINUE * IS = IS + 1 110 CONTINUE END IF * RETURN * * End of PCTREVC * END scalapack-1.8.0/SRC/pdgehrd.f0000640000175000017500000003543210363532303015536 0ustar muammarmuammar SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEHRD reduces a real general distributed matrix sub( A ) * to upper Hessenberg form H by an orthogonal similarity transforma- * tion: Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+ILO-2 * and JA+IHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*NB + NB*MAX( IHIP+1, IHLP+INLQ ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * ICOFFA = MOD( JA-1, NB ), IOFF = MOD( IA+ILO-2, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:I) = 0, v(I+1) = 1 and v(IHI+1:N) = 0; v(I+2:IHI) is stored on * exit in A(IA+ILO+I:IA+IHI-1,JA+ILO+I-2), and tau in TAU(JA+ILO+I-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follow- * ing example, with N = 7, ILO = 2 and IHI = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), H denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(JA+ILO+I-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, IB, ICOFFA, ICTXT, IHIP, $ IHLP, IIA, IINFO, ILCOL, ILROW, IMCOL, INLQ, $ IOFF, IPT, IPW, IPY, IROFFA, J, JJ, JJA, JY, $ K, L, LWMIN, MYCOL, MYROW, NB, NPCOL, NPROW, $ NQ DOUBLE PRECISION EI * .. * .. Local Arrays .. INTEGER DESCY( DLEN_ ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG1L, $ INFOG2L, PCHK1MAT, PDGEMM, PDGEHD2, $ PDLAHRD, PDLARFB, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, NB ) ICOFFA = MOD( JA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ) IOFF = MOD( IA+ILO-2, NB ) ILROW = INDXG2P( IA+ILO-1, NB, MYROW, DESCA( RSRC_ ), $ NPROW ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ) ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWMIN = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = ILO IDUM2( 1 ) = 2 IDUM1( 2 ) = IHI IDUM2( 2 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set elements JA:JA+ILO-2 and JA+JHI-1:JA+N-2 of TAU to zero. * NQ = NUMROC( JA+N-2, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL INFOG1L( JA+ILO-2, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 10 J = JJA, MIN( JJ, NQ ) TAU( J ) = ZERO 10 CONTINUE * CALL INFOG1L( JA+IHI-1, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 20 J = JJ, NQ TAU( J ) = ZERO 20 CONTINUE * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPT = 1 IPY = IPT + NB * NB IPW = IPY + IHIP * NB CALL DESCSET( DESCY, IHI+IROFFA, NB, NB, NB, IAROW, ILCOL, ICTXT, $ MAX( 1, IHIP ) ) * K = ILO IB = NB - IOFF JY = IOFF + 1 * * Loop over remaining block of columns * DO 30 L = 1, IHI-ILO+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns j:j+ib-1 to Hessenberg form, returning the * matrices V and T of the block reflector H = I - V*T*V' * which performs the reduction, and also the matrix Y = A*V*T * CALL PDLAHRD( IHI, K, IB, A, IA, J, DESCA, TAU, WORK( IPT ), $ WORK( IPY ), 1, JY, DESCY, WORK( IPW ) ) * * Apply the block reflector H to A(ia:ia+ihi-1,j+ib:ja+ihi-1) * from the right, computing A := A - Y * V'. * V(i+ib,ib-1) must be set to 1. * CALL PDELSET2( EI, A, I+IB, J+IB-1, DESCA, ONE ) CALL PDGEMM( 'No transpose', 'Transpose', IHI, IHI-K-IB+1, IB, $ -ONE, WORK( IPY ), 1, JY, DESCY, A, I+IB, J, $ DESCA, ONE, A, IA, J+IB, DESCA ) CALL PDELSET( A, I+IB, J+IB-1, DESCA, EI ) * * Apply the block reflector H to A(i+1:ia+ihi-1,j+ib:ja+n-1) from * the left * CALL PDLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', $ IHI-K, N-K-IB+1, IB, A, I+1, J, DESCA, $ WORK( IPT ), A, I+1, J+IB, DESCA, WORK( IPY ) ) * K = K + IB IB = NB JY = 1 DESCY( CSRC_ ) = MOD( DESCY( CSRC_ ) + 1, NPCOL ) * 30 CONTINUE * * Use unblocked code to reduce the rest of the matrix * CALL PDGEHD2( N, K, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEHRD * END scalapack-1.8.0/SRC/pslansy.f0000640000175000017500000007022410363532303015610 0ustar muammarmuammar REAL FUNCTION PSLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLANSY returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * real symmetric distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PSLANSY returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PSLANSY as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PSLANSY is set to zero. N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the symmetric distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PSCOL2ROW, PSTREECOMB, $ SAXPY, SCOMBSSQ, SGAMX2D, SGSUM2D, $ SGEBR2D, SGEBS2D, SLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL ICEIL, ISAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is symmetric, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 30 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 70 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 100 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 140 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * symmetric). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 210 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 250 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 280 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 320 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to SGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PSCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL SAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( ISAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL SLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL SLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL SLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL SLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL SLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL SLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL SLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL SLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PSLANSY = VALUE * RETURN * * End of PSLANSY * END scalapack-1.8.0/SRC/pzhegvx.f0000640000175000017500000010611110377154001015605 0ustar muammarmuammar SUBROUTINE PZHEGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, $ IFAIL, ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, $ LIWORK, LRWORK, LWORK, M, N, NZ DOUBLE PRECISION ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCZ( * ), $ ICLUSTR( * ), IFAIL( * ), IWORK( * ) DOUBLE PRECISION GAP( * ), RWORK( * ), W( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), Z( * ) * .. * * Purpose * * ======= * * PZHEGVX computes all the eigenvalues, and optionally, * the eigenvectors * of a complex generalized Hermitian-definite eigenproblem, of the form * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or * sub( B )*sub( A )*x=(lambda)*x. * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be * Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed * to be Hermitian positive definite. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * JOBZ (global input) CHARACTER*1 * = 'N': Compute eigenvalues only; * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored; * = 'L': Lower triangles of sub( A ) and sub( B ) are stored. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix. * * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains * the distributed matrix Z of eigenvectors. The eigenvectors * are normalized as follows: * if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I; * if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I. * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') * or the lower triangle (if UPLO='L') of sub( A ), including * the diagonal, is destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PZHEGVX cannot guarantee * correct error reporting. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( B ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( B ) contains * the lower triangular part of the matrix. * * On exit, if INFO <= N, the part of sub( B ) containing the * matrix is overwritten by the triangular factor U or L from * the Cholesky factorization sub( B ) = U**H*U or * sub( B ) = L*L**H. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * DESCB( CTXT_ ) must equal DESCA( CTXT_ ) * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) DOUBLE PRECISION * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PDLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PDLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PZHEGVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PZHEGVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) DOUBLE PRECISION * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX*16 array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * The row index in the global array Z indicating the first * row of sub( Z ). * * JZ (global input) INTEGER * The column index in the global array Z indicating the * first column of sub( Z ). * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX*16 array, * dimension (LWORK) * WORK(1) returns the optimal workspace. * * LWORK (local input) INTEGER * Size of WORK array. If only eigenvalues are requested: * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) * If eigenvectors are requested: * LWORK >= N + ( NP0 + MQ0 + NB ) * NB * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). * * For optimal performance, greater workspace is needed, i.e. * LWORK >= MAX( LWORK, N + NHETRD_LWOPT, * NHEGST_LWOPT ) * Where LWORK is as defined above, and * NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 1 ) * NPS * NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB * * NB = DESCA( MB_ ) * NP0 = NUMROC( N, NB, 0, 0, NPROW ) * NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the optimal * size for all work arrays. Each of these values is returned * in the first entry of the correspondingwork array, and no * error message is issued by PXERBLA. * * RWORK (local workspace/output) DOUBLE PRECISION array, * dimension max(3,LRWORK) * On return, RWORK(1) contains the amount of workspace * required for optimal efficiency * if JOBZ='N' RWORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' RWORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required when computing optimal workspace. * * LRWORK (local input) INTEGER * Size of RWORK * See below for definitions of variables used to define LRWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LRWORK >= 5 * NN + 4 * N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LRWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = * DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LRWORK is too small: * If LRWORK is too small to guarantee orthogonality, * PZHEGVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LRWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-25 * is returned. Note that when RANGE='V', PZHEGVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LRWORK is large enough to allow PZHEGVX to * compute the eigenvalues, PZHEGVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PZSTEIN will perform no better than ZSTEIN on 1 processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (output) INTEGER array, dimension (N) * IFAIL provides additional information when INFO .NE. 0 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * * If neither of the above error conditions hold and JOBZ = 'V', * then the first M elements of IFAIL are set to zero. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PZHEGVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to * compute eigenvalues. * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/16,2).NE.0), then B was not positive * definite. IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1), * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties, * namely the following expressions should be true: * * DESCA(MB_) = DESCA(NB_) * IA = IB = IZ * JA = IB = JZ * DESCA(M_) = DESCB(M_) =DESCZ(M_) * DESCA(N_) = DESCB(N_)= DESCZ(N_) * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_) * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_) * MOD( IA-1, DESCA( MB_ ) ) = 0 * MOD( JA-1, DESCA( NB_ ) ) = 0 * MOD( IB-1, DESCB( MB_ ) ) = 0 * MOD( JB-1, DESCB( NB_ ) ) = 0 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION FIVE, ZERO PARAMETER ( FIVE = 5.0D+0, ZERO = 0.0D+0 ) INTEGER IERRNPD PARAMETER ( IERRNPD = 16 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ CHARACTER TRANS INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA, $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LRWMIN, $ LRWOPT, LWMIN, LWOPT, MQ0, MYCOL, MYROW, NB, $ NEIG, NHEGST_LWOPT, NHETRD_LWOPT, NN, NP0, $ NPCOL, NPROW, NPS, NQ0, SQNPC DOUBLE PRECISION EPS, SCALE * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV DOUBLE PRECISION PDLAMCH EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DSCAL, PCHK1MAT, PCHK2MAT, PXERBLA, PZHEEVX, $ PZHENGST, PZPOTRF, PZTRMM, PZTRSM * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, ICHAR, INT, MAX, MIN, MOD, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2600+CTXT_ ) ELSE * * Get machine constants. * EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) * WANTZ = LSAME( JOBZ, 'V' ) UPPER = LSAME( UPLO, 'U' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN RWORK( 1 ) = ABSTOL IF( VALEIG ) THEN RWORK( 2 ) = VL RWORK( 3 ) = VU ELSE RWORK( 2 ) = ZERO RWORK( 3 ) = ZERO END IF CALL DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, $ 3 ) ELSE CALL DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, 3, $ 0, 0 ) END IF IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) $ LQUERY = .TRUE. * LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) * NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = N + MAX( NB*( NP0+1 ), 3 ) LWOPT = LWMIN LRWMIN = 5*NN + 4*N IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LRWOPT = 4*N + MAX( 5*NN, NP0*MQ0 ) ELSE LRWOPT = LRWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+MQ0+NB )*NB LWOPT = LWMIN LRWMIN = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LRWOPT = LRWMIN * END IF * * Compute how much workspace is needed to use the * new TRD and GST algorithms * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS NB = DESCA( MB_ ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB LWOPT = MAX( LWOPT, N+NHETRD_LWOPT, NHEGST_LWOPT ) * * Version 1.0 Limitations * IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -2 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -3 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN INFO = -( 1300+M_ ) ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN INFO = -( 1300+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -( 1300+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN INFO = -( 1300+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN INFO = -( 1300+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2200+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2200+CTXT_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -11 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -12 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -15 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -16 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -17 ELSE IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -14 ELSE IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -15 ELSE IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* $ ABS( ABSTOL ) ) THEN INFO = -18 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -32 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( WANTZ ) THEN IDUM1( 2 ) = ICHAR( 'V' ) ELSE IDUM1( 2 ) = ICHAR( 'N' ) END IF IDUM2( 2 ) = 2 IF( UPPER ) THEN IDUM1( 3 ) = ICHAR( 'U' ) ELSE IDUM1( 3 ) = ICHAR( 'L' ) END IF IDUM2( 3 ) = 3 IF( ALLEIG ) THEN IDUM1( 4 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 4 ) = ICHAR( 'I' ) ELSE IDUM1( 4 ) = ICHAR( 'V' ) END IF IDUM2( 4 ) = 4 IF( LQUERY ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 5 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, $ JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, $ INFO ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = DCMPLX( DBLE( LWOPT ) ) RWORK( 1 ) = DBLE( LRWOPT ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHEGVX ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Form a Cholesky factorization of sub( B ). * CALL PZPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) IF( INFO.NE.0 ) THEN IWORK( 1 ) = LIWMIN WORK( 1 ) = DCMPLX( DBLE( LWOPT ) ) RWORK( 1 ) = DBLE( LRWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD RETURN END IF * * Transform problem to standard eigenvalue problem and solve. * CALL PZHENGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) CALL PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, $ IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR, $ GAP, INFO ) * IF( WANTZ ) THEN * * Backtransform eigenvectors to the original problem. * NEIG = M IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN * * For sub( A )*x=(lambda)*sub( B )*x and * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors: * x = inv(L)'*y or inv(U)*y * IF( UPPER ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF * CALL PZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * For sub( B )*sub( A )*x=(lambda)*x; * backtransform eigenvectors: x = L*y or U'*y * IF( UPPER ) THEN TRANS = 'C' ELSE TRANS = 'N' END IF * CALL PZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) END IF END IF * IF( SCALE.NE.ONE ) THEN CALL DSCAL( N, SCALE, W, 1 ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = DCMPLX( DBLE( LWOPT ) ) RWORK( 1 ) = DBLE( LRWOPT ) RETURN * * End of PZHEGVX * END scalapack-1.8.0/SRC/zdttrf.f0000640000175000017500000000630610363532303015434 0ustar muammarmuammar SUBROUTINE ZDTTRF( N, DL, D, DU, INFO ) * * Written by Andrew J. Cleary, November 1996. * Modified from ZGTTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. INTEGER INFO, N * .. * .. Array Arguments .. COMPLEX*16 D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * ZDTTRF computes an LU factorization of a complex tridiagonal matrix A * using elimination without partial pivoting. * * The factorization has the form * A = L * U * where L is a product of unit lower bidiagonal * matrices and U is upper triangular with nonzeros in only the main * diagonal and first superdiagonal. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * DL (input/output) COMPLEX array, dimension (N-1) * On entry, DL must contain the (n-1) subdiagonal elements of * A. * On exit, DL is overwritten by the (n-1) multipliers that * define the matrix L from the LU factorization of A. * * D (input/output) COMPLEX array, dimension (N) * On entry, D must contain the diagonal elements of A. * On exit, D is overwritten by the n diagonal elements of the * upper triangular matrix U from the LU factorization of A. * * DU (input/output) COMPLEX array, dimension (N-1) * On entry, DU must contain the (n-1) superdiagonal elements * of A. * On exit, DU is overwritten by the (n-1) elements of the first * superdiagonal of U. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Local Scalars .. INTEGER I COMPLEX*16 FACT * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Parameters .. COMPLEX*16 CZERO PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Executable Statements .. * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 CALL XERBLA( 'ZDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * DO 20 I = 1, N - 1 IF( DL( I ).EQ.CZERO ) THEN * * Subdiagonal is zero, no elimination is required. * IF( D( I ).EQ.CZERO .AND. INFO.EQ.0 ) $ INFO = I ELSE * FACT = DL( I ) / D( I ) DL( I ) = FACT D( I+1 ) = D( I+1 ) - FACT*DU( I ) END IF 20 CONTINUE IF( D( N ).EQ.CZERO .AND. INFO.EQ.0 ) THEN INFO = N RETURN END IF * RETURN * * End of ZDTTRF * END scalapack-1.8.0/SRC/pscsum1.f0000640000175000017500000002106510363532303015511 0ustar muammarmuammar SUBROUTINE PSCSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N REAL ASUM * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX X( * ) * .. * * Purpose * ======= * * PSCSUM1 returns the sum of absolute values of a complex * distributed vector sub( X ) in ASUM, * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Based on PSCASUM from the Level 1 PBLAS. The change is * to use the 'genuine' absolute value. * * The serial version of this routine was originally contributed by * Nick Higham for use with CLACON. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * When the result of a vector-oriented PBLAS call is a scalar, it will * be made available only within the scope which owns the vector(s) * being operated on. Let X be a generic term for the input vector(s). * Then, the processes which receive the answer will be (note that if * an operation involves more than one vector, the processes which re- * ceive the result will be the union of the following calculation for * each vector): * * If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process * row or process column owns the vector operand, therefore only the * process of coordinate {RSRC_X, CSRC_X} receives the result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process part of this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process part of this column receives the result; * * Parameters * ========== * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * ASUM (local output) pointer to REAL * The sum of absolute values of the distributed vector sub( X ) * only in its scope. * * X (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER CCTOP, RCTOP INTEGER ICOFF, ICTXT, IIX, IROFF, IXCOL, IXROW, JJX, $ LDX, MYCOL, MYROW, NP, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SGSUM2D, PB_TOPGET * .. * .. External Functions .. INTEGER NUMROC REAL SCSUM1 EXTERNAL NUMROC, SCSUM1 * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * ASUM = ZERO IF( N.LE.0 ) $ RETURN * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * IF( INCX.EQ.1 .AND. DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.IXCOL ) THEN ASUM = ABS( X( IIX+(JJX-1)*LDX ) ) END IF RETURN END IF * IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is distributed over a process row * IF( MYROW.EQ.IXROW ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', RCTOP ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ-ICOFF ASUM = SCSUM1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) CALL SGSUM2D( ICTXT, 'Rowwise', RCTOP, 1, 1, ASUM, 1, $ -1, MYCOL ) END IF * ELSE * * X is distributed over a process column * IF( MYCOL.EQ.IXCOL ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', CCTOP ) IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP-IROFF ASUM = SCSUM1( NP, X( IIX+(JJX-1)*LDX ), 1 ) CALL SGSUM2D( ICTXT, 'Columnwise', CCTOP, 1, 1, ASUM, 1, $ -1, MYCOL ) END IF * END IF * RETURN * * End of PSCSUM1 * END scalapack-1.8.0/SRC/pdormqr.f0000640000175000017500000004206710363532303015607 0ustar muammarmuammar SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORMQR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PDGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, $ IPW, IROFFA, IROFFC, J, J1, J2, J3, JB, JCC, $ LCM, LCMQ, LWMIN, MI, MPC0, MYCOL, MYROW, NI, $ NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDLARFB, $ PDLARFT, PDORM2R, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PDORM2R( SIDE, TRANS, M, N, J1-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PDLARFT( 'Forward', 'Columnwise', NQ-J+JA, JB, A, $ IA+J-JA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H or H' * CALL PDLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, $ JB, A, IA+J-JA, J, DESCA, WORK, C, ICC, JCC, $ DESCC, WORK( IPW ) ) 10 CONTINUE * * Use unblocked code for the last block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PDORM2R( SIDE, TRANS, M, N, J2-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORMQR * END scalapack-1.8.0/SRC/pspotrf.f0000640000175000017500000003141210363532303015610 0ustar muammarmuammar SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSPOTRF computes the Cholesky factorization of an N-by-N real * symmetric positive definite distributed matrix sub( A ) denoting * A(IA:IA+N-1, JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PSPOTF2, PSSYRK, PSTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( UPPER ) THEN * * Split-ring topology for the communication along process * columns, 1-tree topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * * A is upper triangular, compute Cholesky factorization A = U'*U. * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA(NB_), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PSPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PSTRSM( 'Left', UPLO, 'Transpose', 'Non-Unit', $ JB, N-JB, ONE, A, IA, JA, DESCA, A, IA, JA+JB, $ DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PSSYRK( UPLO, 'Transpose', N-JB, JB, -ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PSPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PSTRSM( 'Left', UPLO, 'Transpose', 'Non-Unit', $ JB, N-J-JB+JA, ONE, A, I, J, DESCA, A, $ I, J+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PSSYRK( UPLO, 'Transpose', N-J-JB+JA, JB, $ -ONE, A, I, J+JB, DESCA, ONE, A, I+JB, $ J+JB, DESCA ) END IF 10 CONTINUE * ELSE * * 1-tree topology for the communication along process columns, * Split-ring topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * A is lower triangular, compute Cholesky factorization A = L*L' * (right-looking) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PSPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PSTRSM( 'Right', UPLO, 'Transpose', 'Non-Unit', $ N-JB, JB, ONE, A, IA, JA, DESCA, A, IA+JB, JA, $ DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PSSYRK( UPLO, 'No Transpose', N-JB, JB, -ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * END IF * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PSPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PSTRSM( 'Right', UPLO, 'Transpose', 'Non-Unit', $ N-J-JB+JA, JB, ONE, A, I, J, DESCA, A, I+JB, $ J, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PSSYRK( UPLO, 'No Transpose', N-J-JB+JA, JB, -ONE, $ A, I+JB, J, DESCA, ONE, A, I+JB, J+JB, $ DESCA ) * END IF 20 CONTINUE * END IF * 30 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PSPOTRF * END scalapack-1.8.0/SRC/pzpbtrs.f0000640000175000017500000006407510363532303015632 0ustar muammarmuammar SUBROUTINE PZPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZPBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PZPBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded symmetric positive definite distributed * matrix with bandwidth BW. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PZPBTRF. * * Routine PZPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPBTRF and this is stored in AF. If a linear system * is to be solved using PZPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ PZPBTRSV, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 7*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 10*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 10*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 10*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 10*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 10*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 10*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -6 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZPBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (BW*NRHS) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, $ 'PZPBTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PZPBTRSV( 'L', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PZPBTRSV( 'U', 'C', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PZPBTRSV( 'L', 'C', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PZPBTRSV( 'U', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZPBTRS * END scalapack-1.8.0/SRC/spttrsv.f0000640000175000017500000000670510363532303015647 0ustar muammarmuammar SUBROUTINE SPTTRSV( TRANS, N, NRHS, D, E, B, LDB, $ INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * November, 1996. * Modified from SPTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. REAL D( * ) REAL B( LDB, * ), E( * ) * .. * * Purpose * ======= * * SPTTRSV solves one of the triangular systems * L**T* X = B, or L * X = B, * where L is the Cholesky factor of a Hermitian positive * definite tridiagonal matrix A such that * A = L*D*L**H (computed by SPTTRF). * * Arguments * ========= * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': L * X = B (No transpose) * = 'T': L**T * X = B (Transpose) * * N (input) INTEGER * The order of the tridiagonal matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * D (input) REAL array, dimension (N) * The n diagonal elements of the diagonal matrix D from the * factorization computed by SPTTRF. * * E (input) COMPLEX array, dimension (N-1) * The (n-1) off-diagonal elements of the unit bidiagonal * factor U or L from the factorization computed by SPTTRF * (see UPLO). * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input arguments. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN IF( NOTRAN ) THEN * DO 60 J = 1, NRHS * * Solve L * x = b. * DO 40 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) 40 CONTINUE 60 CONTINUE * ELSE * DO 65 J = 1, NRHS * * Solve L**H * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - $ B( I+1, J )*( E( I ) ) 50 CONTINUE 65 CONTINUE ENDIF * RETURN * * End of SPTTRS * END scalapack-1.8.0/SRC/pclabrd.f0000640000175000017500000005534610363532303015536 0ustar muammarmuammar SUBROUTINE PCLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ), DESCY( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAUP( * ), TAUQ( * ), X( * ), Y( * ), $ WORK( * ) * .. * * Purpose * ======= * * PCLABRD reduces the first NB rows and columns of a complex general * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper * or lower bidiagonal form by an unitary transformation Q' * A * P, and * returns the matrices X and Y which are needed to apply the transfor- * mation to the unreduced part of sub( A ). * * If M >= N, sub( A ) is reduced to upper bidiagonal form; if M < N, to * lower bidiagonal form. * * This is an auxiliary routine called by PCGEBRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of leading rows and columns of sub( A ) to be * reduced. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ) to be reduced. On exit, * the first NB rows and columns of the matrix are overwritten; * the rest of the distributed matrix sub( A ) is unchanged. * If m >= n, elements on and below the diagonal in the first NB * columns, with the array TAUQ, represent the unitary * matrix Q as a product of elementary reflectors; and * elements above the diagonal in the first NB rows, with the * array TAUP, represent the unitary matrix P as a product * of elementary reflectors. * If m < n, elements below the diagonal in the first NB * columns, with the array TAUQ, represent the unitary * matrix Q as a product of elementary reflectors, and * elements on and above the diagonal in the first NB rows, * with the array TAUP, represent the unitary matrix P as * a product of elementary reflectors. * See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(ia+i-1,ja+i-1). D is tied to the distributed * matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(ia+i-1,ja+i) for i = 1,2,...,n-1; * if m < n, E(i) = A(ia+i,ja+i-1) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * X (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_X,NB). On exit, the local * pieces of the distributed M-by-NB matrix * X(IX:IX+M-1,JX:JX+NB-1) required to update the unreduced * part of sub( A ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * Y (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_Y,NB). On exit, the local * pieces of the distributed N-by-NB matrix * Y(IY:IY+N-1,JY:JY+NB-1) required to update the unreduced * part of sub( A ). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= NB_A + NQ, with * * NQ = NUMROC( N+MOD( IA-1, NB_Y ), NB_Y, MYCOL, IACOL, NPCOL ) * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ) * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors. * * If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); u(1:i-1) = 0, u(i) = 1, and u(i:n) is * stored on exit in A(ia+i-1,ja+i:ja+n-1); tauq is stored in * TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The elements of the vectors v and u together form the m-by-nb matrix * V and the nb-by-n matrix U' which are needed, with X and Y, to apply * the transformation to the unreduced part of the matrix, using a block * update of the form: sub( A ) := sub( A ) - V*Y' - X*U'. * * The contents of sub( A ) on exit are illustrated by the following * examples with nb = 2: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) * ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) * ( v1 v2 a a a ) ( v1 1 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) ( v1 v2 a a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix which is unchanged, * vi denotes an element of the vector defining H(i), and ui an element * of the vector defining G(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, IPY, IW, J, JJ, $ JWY, K, MYCOL, MYROW, NPCOL, NPROW COMPLEX ALPHA, TAU INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), $ DESCTP( DLEN_ ), DESCTQ( DLEN_ ), $ DESCW( DLEN_ ), DESCWY( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCCOPY, $ PCELGET, PCELSET, PCGEMV, PCLACGV, $ PCLARFG, PCSCAL, PSELSET * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 IW = MOD( IA-1, DESCA( NB_ ) ) + 1 ALPHA = ZERO * CALL DESCSET( DESCWY, 1, N+MOD( IA-1, DESCY( NB_ ) ), 1, $ DESCA( NB_ ), IAROW, IACOL, ICTXT, 1 ) CALL DESCSET( DESCW, DESCA( MB_ ), 1, DESCA( MB_ ), 1, IAROW, $ IACOL, ICTXT, DESCA( MB_ ) ) CALL DESCSET( DESCTQ, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), IAROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCTP, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), IACOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i:ia+m-1,j) * IF( K.GT.1 ) THEN CALL PCGEMV( 'No transpose', M-K+1, K-1, -ONE, A, I, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I, $ J, DESCA, 1 ) CALL PCGEMV( 'No transpose', M-K+1, K-1, -ONE, X, IX+K-1, $ JX, DESCX, A, IA, J, DESCA, 1, ONE, A, I, J, $ DESCA, 1 ) CALL PCELSET( A, I-1, J, DESCA, ALPHA ) END IF * * Generate reflection Q(i) to annihilate A(i+1:ia+m-1,j) * CALL PCLARFG( M-K+1, ALPHA, I, J, A, I+1, J, DESCA, 1, $ TAUQ ) CALL PSELSET( D, 1, J, DESCD, REAL( ALPHA ) ) CALL PCELSET( A, I, J, DESCA, ONE ) * * Compute Y(IA+I:IA+N-1,J) * CALL PCGEMV( 'Conjugate transpose', M-K+1, N-K, ONE, A, I, $ J+1, DESCA, A, I, J, DESCA, 1, ZERO, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PCGEMV( 'Conjugate transpose', M-K+1, K-1, ONE, A, I, $ JA, DESCA, A, I, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K, -ONE, Y, IY, $ JY+K, DESCY, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PCGEMV( 'Conjugate transpose', M-K+1, K-1, ONE, X, $ IX+K-1, JX, DESCX, A, I, J, DESCA, 1, ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K, -ONE, A, IA, $ J+1, DESCA, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) * CALL PCELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PCSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PCLACGV( N-K, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PCCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) * * Update A(i,j+1:ja+n-1) * CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCLACGV( K, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCGEMV( 'Conjugate transpose', K, N-K, -ONE, Y, IY, $ JY+K, DESCY, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCLACGV( K, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K, -ONE, A, IA, $ J+1, DESCA, X, IX+K-1, JX, DESCX, DESCX( M_ ), $ ONE, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( ALPHA ) ) ) * * Generate reflection P(i) to annihilate A(i,j+2:ja+n-1) * CALL PCLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, N+JA-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PSELSET( E, I, 1, DESCE, REAL( ALPHA ) ) CALL PCELSET( A, I, J+1, DESCA, ONE ) * * Compute X(I+1:IA+M-1,J) * CALL PCGEMV( 'No transpose', M-K, N-K, ONE, A, I+1, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PCGEMV( 'No transpose', K, N-K, ONE, Y, IY, JY+K, $ DESCY, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PCGEMV( 'No transpose', M-K, K, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PCGEMV( 'No transpose', K-1, N-K, ONE, A, IA, J+1, $ DESCA, A, I, J+1, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PCGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PCELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PCSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, NB I = IA + K - 1 J = JA + K - 1 JWY = IW + K * * Update A(i,j:ja+n-1) * CALL PCLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) IF( K.GT.1 ) THEN CALL PCLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K+1, -ONE, Y, $ IY, JY+K-1, DESCY, A, I, JA, DESCA, $ DESCA( M_ ), ONE, A, I, J, DESCA, $ DESCA( M_ ) ) CALL PCLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K+1, -ONE, A, $ IA, J, DESCA, X, IX+K-1, JX, DESCX, $ DESCX( M_ ), ONE, A, I, J, DESCA, $ DESCA( M_ ) ) CALL PCLACGV( K-1, X, IX+K-1, JX, DESCX, DESCX( M_ ) ) CALL PCELSET( A, I, J-1, DESCA, CMPLX( REAL( ALPHA ) ) ) END IF * * Generate reflection P(i) to annihilate A(i,j+1:ja+n-1) * CALL PCLARFG( N-K+1, ALPHA, I, J, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP ) CALL PSELSET( D, I, 1, DESCD, REAL( ALPHA ) ) CALL PCELSET( A, I, J, DESCA, ONE ) * * Compute X(i+1:ia+m-1,j) * CALL PCGEMV( 'No transpose', M-K, N-K+1, ONE, A, I+1, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, X, $ IX+K, JX+K-1, DESCX, 1 ) CALL PCGEMV( 'No transpose', K-1, N-K+1, ONE, Y, IY, JY+K-1, $ DESCY, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PCGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) CALL PCGEMV( 'No transpose', K-1, N-K+1, ONE, A, IA, J, $ DESCA, A, I, J, DESCA, DESCA( M_ ), ZERO, $ WORK, IW, 1, DESCW, 1 ) CALL PCGEMV( 'No transpose', M-K, K-1, -ONE, X, IX+K, JX, $ DESCX, WORK, IW, 1, DESCW, 1, ONE, X, IX+K, $ JX+K-1, DESCX, 1 ) * CALL PCELGET( 'Columnwise', ' ', TAU, TAUP, I, 1, DESCTP ) CALL PCSCAL( M-K, TAU, X, IX+K, JX+K-1, DESCX, 1 ) CALL PCLACGV( N-K+1, A, I, J, DESCA, DESCA( M_ ) ) * * Update A(i+1:ia+m-1,j) * CALL PCGEMV( 'No transpose', M-K, K-1, -ONE, A, I+1, JA, $ DESCA, Y, IY, JY+K-1, DESCY, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PCGEMV( 'No transpose', M-K, K, -ONE, X, IX+K, JX, $ DESCX, A, IA, J, DESCA, 1, ONE, A, I+1, J, $ DESCA, 1 ) CALL PCELSET( A, I, J, DESCA, ALPHA ) * * Generate reflection Q(i) to annihilate A(i+2:ia+m-1,j) * CALL PCLARFG( M-K, ALPHA, I+1, J, A, MIN( I+2, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PSELSET( E, 1, J, DESCE, REAL( ALPHA ) ) CALL PCELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(ia+i:ia+n-1,j) * CALL PCGEMV( 'Conjugate transpose', M-K, N-K, ONE, A, I+1, $ J+1, DESCA, A, I+1, J, DESCA, 1, ZERO, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PCGEMV( 'Conjugate transpose', M-K, K-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K, -ONE, Y, IY, $ JY+K, DESCY, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) CALL PCGEMV( 'Conjugate transpose', M-K, K, ONE, X, IX+K, $ JX, DESCX, A, I+1, J, DESCA, 1, ZERO, WORK, IW, $ 1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', K, N-K, -ONE, A, IA, $ J+1, DESCA, WORK, IW, 1, DESCW, 1, ONE, $ WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ) ) * CALL PCELGET( 'Rowwise', ' ', TAU, TAUQ, 1, J, DESCTQ ) CALL PCSCAL( N-K, TAU, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PCLACGV( N-K, WORK( IPY ), 1, JWY, DESCWY, $ DESCWY( M_ ) ) CALL PCCOPY( N-K, WORK( IPY ), 1, JWY, DESCWY, DESCWY( M_ ), $ Y, IY+K-1, JY+K, DESCY, DESCY( M_ ) ) 20 CONTINUE END IF * RETURN * * End of PCLABRD * END scalapack-1.8.0/SRC/pspotri.f0000640000175000017500000001714510363532303015622 0ustar muammarmuammar SUBROUTINE PSPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSPOTRI computes the inverse of a real symmetric positive definite * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the * Cholesky factorization sub( A ) = U**T*U or L*L**T computed by * PSPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor U or L * from the Cholesky factorization of the distributed matrix * sub( A ) = U**T*U or L*L**T, as computed by PSPOTRF. * On exit, the local pieces of the upper or lower triangle of * the (symmetric) inverse of sub( A ), overwriting the input * factor U or L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER ICOFF, ICTXT, IROFF, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLAUUM, $ PSTRTRI, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.NE.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Invert the triangular Cholesky factor U or L. * CALL PSTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * IF( INFO.GT.0 ) $ RETURN * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PSLAUUM( UPLO, N, A, IA, JA, DESCA ) * RETURN * * End of PSPOTRI * END scalapack-1.8.0/SRC/pclapv2.f0000640000175000017500000003673010363532303015472 0ustar muammarmuammar SUBROUTINE PCLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAPV2 applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1), resulting in row or column pivoting. The * pivot vector should be aligned with the distributed matrix A. For * pivoting the rows of sub( A ), IPIV should be distributed along a * process column and replicated over all process rows. Similarly, * IPIV should be distributed along a process row and replicated over * all process columns for column pivoting. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P * sub( A ); * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P ) * sub( A ). * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this local array contains the local pieces of the * distributed matrix sub( A ) to which the row or columns * interchanges will be applied. On exit, this array contains * the local pieces of the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (input) INTEGER array, dimension >= LOCr(M_A)+MB_A if * ROWCOL = 'R', LOCc(N_A)+NB_A otherwise. It contains * the pivoting information. IPIV(i) is the global row (column), * local row (column) i was swapped with. The last piece of the * array of size MB_A (resp. NB_A) is used as workspace. IPIV is * tied to the distributed matrix A. * * IP (global input) INTEGER * IPIV's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JP (global input) INTEGER * IPIV's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCIP (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix IPIV. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL FORWRD, ROWPVT INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIP, IP1, ITMP, $ IPVWRK, J, JB, JJP, JP1, K, MA, MBA, MYCOL, $ MYROW, NBA, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBS2D, IGEBR2D, INFOG2L, $ PCSWAP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN END IF FORWRD = LSAME( DIREC, 'F' ) * * * Get grid and matrix parameters * MA = DESCA( M_ ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * IF( FORWRD ) THEN CALL INFOG2L( IP, JP, DESCIP, NPROW, NPCOL, MYROW, MYCOL, $ IIP, JJP, ICURROW, ICURCOL ) * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * Loop over rows of sub( A ) * I = IA IB = MIN( M, ICEIL( IA, MBA ) * MBA - IA + 1 ) 10 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP ), IB ) ITMP = IIP IIP = IIP + IB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( ITMP ), IB, ICURROW, MYCOL ) END IF * * Pivot the block of rows * DO 20 K = I, I+IB-1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PCSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP + 1 20 CONTINUE * * Go on to next row of processes, increment row counter, * and figure number of rows to pivot next * ICURROW = MOD( ICURROW+1, NPROW ) I = I + IB IB = MIN( MBA, M-I+IA ) IF( IB .GT. 0 ) GOTO 10 * * If I am pivoting the columns of sub( A ) * ELSE IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * Loop over columns of sub( A ) * J = JA JB = MIN( N, ICEIL( JA, NBA ) * NBA - JA + 1 ) 30 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP ), JB ) ITMP = JJP JJP = JJP + JB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( ITMP ), JB, MYROW, ICURCOL ) END IF * * Pivot the block of columns * DO 40 K = J, J+JB-1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PCSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP + 1 40 CONTINUE * * Go on to next column of processes, increment column * counter, and figure number of columns to pivot next * ICURCOL = MOD( ICURCOL+1, NPCOL ) J = J + JB JB = MIN( NBA, N-J+JA ) IF( JB .GT. 0 ) GOTO 30 END IF * * If I want to apply pivots in reverse order, i.e. reversing * pivoting done earlier. Thus this section computes * inv( P ) * sub( A ). * ELSE * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN CALL INFOG2L( IP+M-1, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) * IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * If I'm not in the current process row, my IIP points out * past end of pivot vector (since I don't own a piece of the * last row). Adjust IIP so it points at last pivot entry. * IF( MYROW.NE.ICURROW ) IIP = IIP - 1 * * Loop over rows in reverse order, starting at last row * I = IA + M - 1 IB = MOD( I, MBA ) IF( IB .EQ. 0 ) IB = MBA IB = MIN( IB, M ) 50 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN ITMP = IIP IIP = IIP - IB CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP+1 ), IB ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IPVWRK ), IB, ICURROW, MYCOL ) ITMP = IPVWRK + IB - 1 END IF * * Pivot the block of rows * DO 60 K = I, I-IB+1, -1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PCSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP - 1 60 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURROW = MOD( NPROW+ICURROW-1, NPROW ) I = I - IB IB = MIN( MBA, I-IA+1 ) IF( IB .GT. 0 ) GOTO 50 * * Otherwise, I'm pivoting the columns of sub( A ) * ELSE CALL INFOG2L( IP, JP+N-1, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * If I'm not in the current process column, my JJP points out * past end of pivot vector (since I don't own a piece of the * last column). Adjust JJP so it points at last pivot entry. * IF( MYCOL.NE.ICURCOL ) JJP = JJP - 1 * * Loop over columns in reverse order starting at last column * J = JA + N - 1 JB = MOD( J, NBA ) IF( JB .EQ. 0 ) JB = NBA JB = MIN( JB, N ) 70 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN ITMP = JJP JJP = JJP - JB CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP+1 ), JB ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( IPVWRK ), JB, MYROW, ICURCOL ) ITMP = IPVWRK + JB - 1 END IF * * Pivot a block of columns * DO 80 K = J, J-JB+1, -1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PCSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP - 1 80 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL ) J = J - JB JB = MIN( NBA, J-JA+1 ) IF( JB .GT. 0 ) GOTO 70 END IF * END IF * RETURN * * End PCLAPV2 * END scalapack-1.8.0/SRC/pssytd2.f0000640000175000017500000004213610363532303015530 0ustar muammarmuammar SUBROUTINE PSSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSSYTD2 reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*N. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL HALF, ONE, ZERO PARAMETER ( HALF = 0.5E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J, $ JJ, JK, JN, LDA, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW REAL ALPHA, TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, SAXPY, SGEBR2D, SGEBS2D, $ SLARFG, SSYMV, SSYR2 * .. * .. External Functions .. LOGICAL LSAME REAL SDOT EXTERNAL LSAME, SDOT * .. * .. Intrinsic Functions .. INTRINSIC REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) LWMIN = 3 * N * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYTD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( UPPER ) THEN * * Process(IAROW, IACOL) owns block to be reduced * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the upper triangle of sub( A ) * DO 10 J = N-1, 1, -1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA:IA+J-1,JA:JA+J-1) * CALL SLARFG( J, A( IK+JK*LDA ), A( II+JK*LDA ), 1, $ TAUI ) E( JK+1 ) = A( IK+JK*LDA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA:IA+J-1,JA:JA+J-1) * A( IK+JK*LDA ) = ONE * * Compute x := tau * A * v storing x in TAU(1:i) * CALL SSYMV( UPLO, J, TAUI, A( II+(JJ-1)*LDA ), $ LDA, A( II+JK*LDA ), 1, ZERO, $ TAU( JJ ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*SDOT( J, TAU( JJ ), 1, $ A( II+JK*LDA ), 1 ) CALL SAXPY( J, ALPHA, A( II+JK*LDA ), 1, $ TAU( JJ ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL SSYR2( UPLO, J, -ONE, A( II+JK*LDA ), 1, $ TAU( JJ ), 1, A( II+(JJ-1)*LDA ), $ LDA ) A( IK+JK*LDA ) = E( JK+1 ) END IF * * Copy D, E, TAU to broadcast them columnwise. * D( JK+1 ) = A( IK+1+JK*LDA ) WORK( J+1 ) = D( JK+1 ) WORK( N+J+1 ) = E( JK+1 ) TAU( JK+1 ) = TAUI WORK( 2*N+J+1 ) = TAU( JK+1 ) * 10 CONTINUE D( JJ ) = A( II+(JJ-1)*LDA ) WORK( 1 ) = D( JJ ) WORK( N+1 ) = ZERO WORK( 2*N+1 ) = ZERO * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1 ) * ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1, $ IAROW, IACOL ) DO 20 J = 2, N JN = JJ + J - 1 D( JN ) = WORK( J ) E( JN ) = WORK( N+J ) TAU( JN ) = WORK( 2*N+J ) 20 CONTINUE D( JJ ) = WORK( 1 ) END IF END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the lower triangle of sub( A ) * DO 30 J = 1, N - 1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1) * CALL SLARFG( N-J, A( IK+1+(JK-1)*LDA ), $ A( IK+2+(JK-1)*LDA ), 1, TAUI ) E( JK ) = A( IK+1+(JK-1)*LDA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1) * A( IK+1+(JK-1)*LDA ) = ONE * * Compute x := tau * A * v storing y in TAU(i:n-1) * CALL SSYMV( UPLO, N-J, TAUI, A( IK+1+JK*LDA ), $ LDA, A( IK+1+(JK-1)*LDA ), 1, $ ZERO, TAU( JK ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*SDOT( N-J, TAU( JK ), 1, $ A( IK+1+(JK-1)*LDA ), 1 ) CALL SAXPY( N-J, ALPHA, A( IK+1+(JK-1)*LDA ), $ 1, TAU( JK ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL SSYR2( UPLO, N-J, -ONE, $ A( IK+1+(JK-1)*LDA ), 1, $ TAU( JK ), 1, A( IK+1+JK*LDA ), $ LDA ) A( IK+1+(JK-1)*LDA ) = E( JK ) END IF * * Copy D(JK), E(JK), TAU(JK) to broadcast them * columnwise. * D( JK ) = A( IK+(JK-1)*LDA ) WORK( J ) = D( JK ) WORK( N+J ) = E( JK ) TAU( JK ) = TAUI WORK( 2*N+J ) = TAU( JK ) 30 CONTINUE JN = JJ + N - 1 D( JN ) = A( II+N-1+(JN-1)*LDA ) WORK( N ) = D( JN ) TAU( JN ) = ZERO WORK( 2*N ) = ZERO * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1 ) * ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1, IAROW, IACOL ) DO 40 J = 1, N - 1 JN = JJ + J - 1 D( JN ) = WORK( J ) E( JN ) = WORK( N+J ) TAU( JN ) = WORK( 2*N+J ) 40 CONTINUE JN = JJ + N - 1 D( JN ) = WORK( N ) TAU( JN ) = ZERO END IF END IF END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSSYTD2 * END scalapack-1.8.0/SRC/pclapiv.f0000640000175000017500000003364410363532303015562 0ustar muammarmuammar SUBROUTINE PCLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ), IWORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAPIV applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), resulting in row or column * pivoting. The pivot vector may be distributed across a process row * or a column. The pivot vector should be aligned with the distributed * matrix A. This routine will transpose the pivot vector if necessary. * For example if the row pivots should be applied to the columns of * sub( A ), pass ROWCOL='C' and PIVROC='C'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Restrictions * ============ * * IPIV must always be a distributed vector (not a matrix). Thus: * IF( ROWPIV .EQ. 'C' ) THEN * JP must be 1 * ELSE * IP must be 1 * END IF * * The following restrictions apply when IPIV must be transposed: * IF( ROWPIV.EQ.'C' .AND. PIVROC.EQ.'C') THEN * DESCIP(MB_) must equal DESCA(NB_) * ELSE IF( ROWPIV.EQ.'R" .AND. PIVROC.EQ.'R') THEN * DESCIP(NB_) must equal DESCA(MB_) * END IF * * Arguments * ========= * * DIREC (global input) CHARACTER*1 * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P*sub( A ). * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P )*sub( A ). * * ROWCOL (global input) CHARACTER*1 * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * PIVROC (global input) CHARACTER*1 * Specifies whether IPIV is distributed over a process row * or column: * = 'R' IPIV distributed over a process row * = 'C' IPIV distributed over a process column * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * distributed submatrix sub( A ) to which the row or column * interchanges will be applied. On exit, the local pieces * of the permuted distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension (LIPIV) where LIPIV is * when ROWCOL='R' or 'r': * >= LOCr( IA+M-1 ) + MB_A if PIVROC='C' or 'c', * >= LOCc( M + MOD(JP-1,NB_P) ) if PIVROC='R' or 'r', and, * when ROWCOL='C' or 'c': * >= LOCr( N + MOD(IP-1,MB_P) ) if PIVROC='C' or 'c', * >= LOCc( JA+N-1 ) + NB_A if PIVROC='R' or 'r'. * This array contains the pivoting information. IPIV(i) is the * global row (column), local row (column) i was swapped with. * When ROWCOL='R' or 'r' and PIVROC='C' or 'c', or ROWCOL='C' * or 'c' and PIVROC='R' or 'r', the last piece of this array of * size MB_A (resp. NB_A) is used as workspace. In those cases, * this array is tied to the distributed matrix A. * * IP (global input) INTEGER * The row index in the global array P indicating the first * row of sub( P ). * * JP (global input) INTEGER * The column index in the global array P indicating the * first column of sub( P ). * * DESCIP (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed vector IPIV. * * IWORK (local workspace) INTEGER array, dimension (LDW) * where LDW is equal to the workspace necessary for * transposition, and the storage of the tranposed IPIV: * * Let LCM be the least common multiple of NPROW and NPCOL. * IF( ROWCOL.EQ.'R' .AND. PIVROC.EQ.'R' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + NB_P * ELSE * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + * NB_P * CEIL( CEIL(LOCc(N_P)/NB_P) / (LCM/NPCOL) ) * END IF * ELSE IF( ROWCOL.EQ.'C' .AND. PIVROC.EQ.'C' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P * ELSE * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * END IF * ELSE * IWORK is not referenced. * END IF * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ROWPVT INTEGER I, ICTXT, ICURCOL, ICURROW, IIP, ITMP, IPT, $ JJP, JPT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCPT( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, $ INFOG2L, PCLAPV2, PICOL2ROW, PIROW2COL * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC, INDXG2P EXTERNAL LSAME, NUMROC, INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'C' ) ) THEN CALL PCLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PCLAPV2 * ELSE * * Take IPIV distributed over row 0, and store it in * iwork, distributed over column 0 * IPT = MOD( JP-1, DESCA(MB_) ) DESCPT(M_) = M + IPT + NPROW*DESCA(MB_) DESCPT(N_) = 1 DESCPT(MB_) = DESCA(MB_) DESCPT(NB_) = 1 DESCPT(RSRC_) = INDXG2P( IA, DESCA(MB_), IA, DESCA(RSRC_), $ NPROW ) DESCPT(CSRC_) = MYCOL DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = NUMROC( DESCPT(M_), DESCPT(MB_), MYROW, $ DESCPT(RSRC_), NPROW ) ITMP = NUMROC( DESCIP(N_), DESCIP(NB_), MYCOL, $ DESCIP(CSRC_), NPCOL ) CALL INFOG2L( IP, JP-IPT, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) CALL PIROW2COL( ICTXT, M+IPT, 1, DESCIP(NB_), IPIV(JJP), $ ITMP, IWORK, DESCPT(LLD_), 0, ICURCOL, $ DESCPT(RSRC_), $ MYCOL, IWORK(DESCPT(LLD_)-DESCPT(MB_)+1) ) * * Send column-distributed pivots to all columns * ITMP = DESCPT(LLD_) - DESCPT(MB_) IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP, $ MYROW, 0 ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * IPT = IPT + 1 DO 10 I = 1, ITMP IWORK(I) = IWORK(I) - JP + IPT 10 CONTINUE CALL PCLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ IPT, 1, DESCPT ) END IF * * Otherwise, we're pivoting the columns of sub( A ) * ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'R' ) ) THEN CALL PCLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PCLAPV2 * ELSE * * Take IPIV distributed over column 0, and store it in * iwork, distributed over row 0 * JPT = MOD( IP-1, DESCA(NB_) ) DESCPT(M_) = 1 DESCPT(N_) = N + JPT + NPCOL*DESCA(NB_) DESCPT(MB_) = 1 DESCPT(NB_) = DESCA(NB_) DESCPT(RSRC_) = MYROW DESCPT(CSRC_) = INDXG2P( JA, DESCA(NB_), JA, DESCA(CSRC_), $ NPCOL ) DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = 1 CALL INFOG2L( IP-JPT, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) ITMP = NUMROC( N+JPT, DESCPT(NB_), MYCOL, DESCPT(CSRC_), $ NPCOL ) CALL PICOL2ROW( ICTXT, N+JPT, 1, DESCIP(MB_), IPIV(IIP), $ DESCIP(LLD_), IWORK, MAX(1, ITMP), ICURROW, $ 0, 0, DESCPT(CSRC_), IWORK(ITMP+1) ) * * Send row-distributed pivots to all rows * IF( MYROW.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP, 0, MYCOL ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * JPT = JPT + 1 DO 20 I = 1, ITMP IWORK(I) = IWORK(I) - IP + JPT 20 CONTINUE CALL PCLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ 1, JPT, DESCPT ) END IF END IF * RETURN * * End of PCLAPIV * END scalapack-1.8.0/SRC/pdzsum1.f0000640000175000017500000002110410363532303015513 0ustar muammarmuammar SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION ASUM * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PDZSUM1 returns the sum of absolute values of a complex * distributed vector sub( X ) in ASUM, * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Based on PDZASUM from the Level 1 PBLAS. The change is * to use the 'genuine' absolute value. * * The serial version of this routine was originally contributed by * Nick Higham for use with ZLACON. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * When the result of a vector-oriented PBLAS call is a scalar, it will * be made available only within the scope which owns the vector(s) * being operated on. Let X be a generic term for the input vector(s). * Then, the processes which receive the answer will be (note that if * an operation involves more than one vector, the processes which re- * ceive the result will be the union of the following calculation for * each vector): * * If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process * row or process column owns the vector operand, therefore only the * process of coordinate {RSRC_X, CSRC_X} receives the result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process part of this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process part of this column receives the result; * * Parameters * ========== * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * ASUM (local output) pointer to DOUBLE PRECISION * The sum of absolute values of the distributed vector sub( X ) * only in its scope. * * X (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. CHARACTER CCTOP, RCTOP INTEGER ICOFF, ICTXT, IIX, IROFF, IXCOL, IXROW, JJX, $ LDX, MYCOL, MYROW, NP, NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGSUM2D, INFOG2L, PB_TOPGET * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION DZSUM1 EXTERNAL DZSUM1, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * ASUM = ZERO IF( N.LE.0 ) $ RETURN * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * IF( INCX.EQ.1 .AND. DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.IXCOL ) THEN ASUM = ABS( X( IIX+(JJX-1)*LDX ) ) END IF RETURN END IF * IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is distributed over a process row * IF( MYROW.EQ.IXROW ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', RCTOP ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ-ICOFF ASUM = DZSUM1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) CALL DGSUM2D( ICTXT, 'Rowwise', RCTOP, 1, 1, ASUM, 1, $ -1, MYCOL ) END IF * ELSE * * X is distributed over a process column * IF( MYCOL.EQ.IXCOL ) THEN CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', CCTOP ) IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP-IROFF ASUM = DZSUM1( NP, X( IIX+(JJX-1)*LDX ), 1 ) CALL DGSUM2D( ICTXT, 'Columnwise', CCTOP, 1, 1, ASUM, 1, $ -1, MYCOL ) END IF * END IF * RETURN * * End of PDZSUM1 * END scalapack-1.8.0/SRC/pclarzt.f0000640000175000017500000002607110363532303015577 0ustar muammarmuammar SUBROUTINE PCLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) COMPLEX TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARZT forms the triangular factor T of a complex block reflector * H of order > n, which is defined as a product of k elementary * reflectors as returned by PCTZRZF. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the array V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the array V, and * * H = I - V' * T * V * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise (not supported yet) * = 'R': rowwise * * N (global input) INTEGER * The number of meaningful entries of the block reflector H. * N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) COMPLEX pointer into the local memory * to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)). * The distributed matrix V contains the Householder vectors. * See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) COMPLEX, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) COMPLEX array, dimension (MB_V,MB_V) * It contains the k-by-k triangular factor of the block * reflector associated with V. T is lower triangular. * * WORK (local workspace) COMPLEX array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * ______V_____ * ( v1 v2 v3 ) / \ * ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) * V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) * ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) * ( v1 v2 v3 ) * . . . * . . . * 1 . . * 1 . * 1 * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * ______V_____ * 1 / \ * . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) * . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) * . . . ( . . 1 . . v3 v3 v3 v3 v3 ) * . . . * ( v1 v2 v3 ) * ( v1 v2 v3 ) * V = ( v1 v2 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJV, LDV, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CCOPY, CGEMV, $ CGSUM2D, CLACGV, CLASET, CTRMV, $ INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( MYROW.EQ.IVROW ) THEN IW = 1 ITMP0 = 0 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF * DO 10 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ.GT.0 ) THEN CALL CLACGV( NQ, V( II+(JJV-1)*LDV ), LDV ) CALL CGEMV( 'No transpose', ITMP0, NQ, -TAU( II ), $ V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, WORK( IW ), $ 1 ) CALL CLACGV( NQ, V( II+(JJV-1)*LDV ), LDV ) ELSE CALL CLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF IW = IW + ITMP0 * 10 CONTINUE * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 20 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL CCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', ITMP0, $ T( ITMP1+DESCV( MB_ ) ), DESCV( MB_ ), $ T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 20 CONTINUE * END IF * END IF * RETURN * * End of PCLARZT * END scalapack-1.8.0/SRC/pspttrs.f0000640000175000017500000006423710363532303015645 0ustar muammarmuammar SUBROUTINE PSPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, $ LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL AF( * ), B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PSPTTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal symmetric positive definite distributed * matrix. * * Routine PSPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * IMPORTANT NOTE: The current version of this code supports * only IB=JA * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPTTRF and this is stored in AF. If a linear system * is to be solved using PSPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, $ MYROW, MY_NUM_COLS, NB, NP, NPCOL, NPROW, $ NP_SAVE, ODD_SIZE, PART_OFFSET, PART_SIZE, $ RETURN_CODE, STORE_M_B, STORE_N_A, TEMP, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 14, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PSPTTRSV, PXERBLA, RESHAPE, SSCAL * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 5*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 8*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 8*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 8*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -12 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 5*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 8*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 8*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -2 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -4 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 5*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PSPTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PSPTTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( 10+2*MIN( 100, NRHS ) )*NPCOL + 4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -12 CALL PXERBLA( ICTXT, 'PSPTTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 14, 1 ) = DESCB( 5 ) PARAM_CHECK( 13, 1 ) = DESCB( 4 ) PARAM_CHECK( 12, 1 ) = DESCB( 3 ) PARAM_CHECK( 11, 1 ) = DESCB( 2 ) PARAM_CHECK( 10, 1 ) = DESCB( 1 ) PARAM_CHECK( 9, 1 ) = IB PARAM_CHECK( 8, 1 ) = DESCA( 5 ) PARAM_CHECK( 7, 1 ) = DESCA( 4 ) PARAM_CHECK( 6, 1 ) = DESCA( 3 ) PARAM_CHECK( 5, 1 ) = DESCA( 1 ) PARAM_CHECK( 4, 1 ) = JA PARAM_CHECK( 3, 1 ) = NRHS PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 14, 2 ) = 905 PARAM_CHECK( 13, 2 ) = 904 PARAM_CHECK( 12, 2 ) = 903 PARAM_CHECK( 11, 2 ) = 902 PARAM_CHECK( 10, 2 ) = 901 PARAM_CHECK( 9, 2 ) = 8 PARAM_CHECK( 8, 2 ) = 505 PARAM_CHECK( 7, 2 ) = 504 PARAM_CHECK( 6, 2 ) = 503 PARAM_CHECK( 5, 2 ) = 501 PARAM_CHECK( 4, 2 ) = 4 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 12 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 14, PARAM_CHECK, 14, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 30 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * * CALL PSPTTRSV( 'L', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * * Divide by the main diagonal: B <- D^{-1} B * * The main partition is first * DO 10 I = PART_OFFSET + 1, PART_OFFSET + ODD_SIZE CALL SSCAL( NRHS, REAL( ONE / D( I ) ), B( I ), LLDB ) 10 CONTINUE * * Reduced system is next * IF( MYCOL.LT.NPCOL-1 ) THEN I = PART_OFFSET + ODD_SIZE + 1 CALL SSCAL( NRHS, ONE / AF( ODD_SIZE+2 ), B( I ), LLDB ) END IF * * Call backsolve routine * * CALL PSPTTRSV( 'U', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) 20 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 30 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSPTTRS * END scalapack-1.8.0/SRC/pctrtrs.f0000640000175000017500000003143310363532303015617 0ustar muammarmuammar SUBROUTINE PCTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCTRTRS solves a triangular system of the form * * sub( A ) * X = sub( B ) or sub( A )**T * X = sub( B ) or * * sub( A )**H * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a triangular * distributed matrix of order N, and B(IB:IB+N-1,JB:JB+NRHS-1) is an * N-by-NRHS distributed matrix denoted by sub( B ). A check is made * to verify that sub( A ) is nonsingular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': Solve sub( A ) * X = sub( B ) (No transpose) * = 'T': Solve sub( A )**T * X = sub( B ) (Transpose) * = 'C': Solve sub( A )**H * X = sub( B ) (Conjugate transpose) * * DIAG (global input) CHARACTER * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed matrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1) ). This array * contains the local pieces of the distributed triangular * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of sub( A ) contains the lower triangular * matrix, and the strictly upper triangular part of sub( A ) * is not referenced. If DIAG = 'U', the diagonal elements of * sub( A ) are also not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the right hand side distributed matrix * sub( B ). On exit, if INFO = 0, sub( B ) is overwritten by * the solution matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the i-th diagonal element of sub( A ) is * zero, indicating that the submatrix is singular and the * solutions X have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER I, IAROW, IBROW, ICOFFA, ICTXT, ICURCOL, $ ICURROW, IROFFA, IROFFB, IDUM, II, IOFFA, J, $ JBLK, JJ, JN, LDA, LL, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK2MAT, PCTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) NOTRAN = LSAME( TRANS, 'N' ) * CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -8 ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -904 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -1304 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'D' ) END IF IDUM2( 3 ) = 3 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, NRHS, 5, $ IB, JB, DESCB, 13, 3, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTRTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first block separately * JBLK = JN-JA+1 IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 10 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 LL = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JBLK = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 20 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 LL = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Solve A * x = b, A**T * x = b, or A**H * x = b. * CALL PCTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * RETURN * * End of PCTRTRS * END scalapack-1.8.0/SRC/pcung2l.f0000640000175000017500000002571610363532303015477 0ustar muammarmuammar SUBROUTINE PCUNG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNG2L generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PCGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PCGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0 COMPLEX TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLARF, PCLASET, PCSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja:ja+n-k-1 to columns of the unit matrix * CALL PCLASET( 'All', M-N, N-K, ZERO, ZERO, A, IA, JA, DESCA ) CALL PCLASET( 'All', N, N-K, ZERO, ONE, A, IA+M-N, JA, DESCA ) * TAUJ = ZERO NQA0 = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) ) DO 10 J = JA+N-K, JA+N-1 * * Apply H(j) to A(ia:ia+m-n+j-ja,ja:j) from the left * CALL PCELSET( A, IA+M-N+J-JA, J, DESCA, ONE ) CALL PCLARF( 'Left', M-N+J-JA+1, J-JA, A, IA, J, DESCA, 1, TAU, $ A, IA, JA, DESCA, WORK ) * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, NQA0 ) ) CALL PCSCAL( M-N+J-JA, -TAUJ, A, IA, J, DESCA, 1 ) CALL PCELSET( A, IA+M-N+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia+m-n+j-ja+1:ia+m-1,j) to zero * CALL PCLASET( 'All', JA+N-1-J, 1, ZERO, ZERO, A, IA+M-N+J-JA+1, $ J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNG2L * END scalapack-1.8.0/SRC/pzlawil.f0000640000175000017500000002420310363532303015575 0ustar muammarmuammar SUBROUTINE PZLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER II, JJ, M COMPLEX*16 H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), V( * ) * .. * * Purpose * ======= * * PZLAWIL gets the transform given by H44,H33, & H43H34 into V * starting at row M. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * II (global input) INTEGER * Row owner of H(M+2,M+2) * * JJ (global input) INTEGER * Column owner of H(M+2,M+2) * * M (global input) INTEGER * On entry, this is where the transform starts (row M.) * Unchanged on exit. * * A (global input) COMPLEX*16 array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * Unchanged on exit. * * H44 * H33 * H43H34 (global input) COMPLEX*16 * These three values are for the double shift QR iteration. * Unchanged on exit. * * V (global output) COMPLEX*16 array of size 3. * Contains the transform on ouput. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, ICOL, IROW, JSRC, LDA, LEFT, $ MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, $ RSRC, UP DOUBLE PRECISION S COMPLEX*16 CDUM, H11, H12, H21, H22, H33S, H44S, V1, V2, $ V3 * .. * .. Local Arrays .. COMPLEX*16 BUF( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * On node (II,JJ) collect all DIA,SUP,SUB info from M, M+1 * MODKM1 = MOD( M+1, HBL ) IF( MODKM1.EQ.0 ) THEN IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) CALL ZGESD2D( CONTXT, 1, 1, BUF, 1, II, JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW ) BUF( 2 ) = A( ( ICOL-1 )*LDA+IROW+1 ) BUF( 3 ) = A( ICOL*LDA+IROW ) BUF( 4 ) = A( ICOL*LDA+IROW+1 ) CALL ZGESD2D( CONTXT, 4, 1, BUF, 4, II, JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NPCOL.GT.1 ) THEN CALL ZGERV2D( CONTXT, 1, 1, V3, 1, MYROW, LEFT ) ELSE V3 = A( ( ICOL-2 )*LDA+IROW ) END IF IF( NUM.GT.1 ) THEN CALL ZGERV2D( CONTXT, 4, 1, BUF, 4, UP, LEFT ) H11 = BUF( 1 ) H21 = BUF( 2 ) H12 = BUF( 3 ) H22 = BUF( 4 ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) END IF END IF END IF IF( MODKM1.EQ.1 ) THEN IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) ) $ THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW, $ ICOL, RSRC, JSRC ) CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( DOWN.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. ( NPROW.GT.1 ) ) $ THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. $ ( NPCOL.GT.1 ) ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II, $ JJ ) END IF IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) IF( NUM.GT.1 ) THEN CALL ZGERV2D( CONTXT, 1, 1, H11, 1, UP, LEFT ) ELSE H11 = A( ( ICOL-3 )*LDA+IROW-2 ) END IF IF( NPROW.GT.1 ) THEN CALL ZGERV2D( CONTXT, 1, 1, H12, 1, UP, MYCOL ) ELSE H12 = A( ( ICOL-2 )*LDA+IROW-2 ) END IF IF( NPCOL.GT.1 ) THEN CALL ZGERV2D( CONTXT, 1, 1, H21, 1, MYROW, LEFT ) ELSE H21 = A( ( ICOL-3 )*LDA+IROW-1 ) END IF H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF END IF IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) $ RETURN * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, RSRC, JSRC ) H11 = A( ( ICOL-3 )*LDA+IROW-2 ) H21 = A( ( ICOL-3 )*LDA+IROW-1 ) H12 = A( ( ICOL-2 )*LDA+IROW-2 ) H22 = A( ( ICOL-2 )*LDA+IROW-1 ) V3 = A( ( ICOL-2 )*LDA+IROW ) END IF * H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 * RETURN * * End of PZLAWIL * END scalapack-1.8.0/SRC/pslarz.f0000640000175000017500000010171310363532303015430 0ustar muammarmuammar SUBROUTINE PSLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) REAL C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARZ applies a real elementary reflector Q (or Q**T) to a real * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from * either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a real scalar and v is a real vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PSTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q, Q = Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) REAL pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) REAL, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) REAL array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST REAL TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBSTRNV, $ SAXPY, SCOPY, SGEBR2D, SGEBS2D, $ SGEMV, SGER, SGERV2D, SGESD2D, $ SGSUM2D, SLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL SGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL SGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL SLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL SGER( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL SCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL SGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL SGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL SCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL SGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MPV+1 CALL SGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL SGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL SAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL SGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL SLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * IF( MPC2.GT.0 .AND. NQV.GT.0 ) $ CALL SGER( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL SCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL SGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL SGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL SCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL SGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQV+1 CALL SGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL SGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL SGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PSLARZ * END scalapack-1.8.0/SRC/pzlarft.f0000640000175000017500000004507210363532303015604 0ustar muammarmuammar SUBROUTINE PZLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) COMPLEX*16 TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARFT forms the triangular factor T of a complex block reflector H * of order n, which is defined as a product of k elementary reflectors. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the distributed matrix V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the distributed matrix V, and * * H = I - V' * T * V * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER*1 * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER*1 * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise * = 'R': rowwise * * N (global input) INTEGER * The order of the block reflector H. N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) COMPLEX*16 pointer into the local memory * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1)) * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if * STOREV = 'R'. The distributed matrix V contains the * Householder vectors. See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) COMPLEX*16 array, dimension (NB_V,NB_V) * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains * the k-by-k triangular factor of the block reflector asso- * ciated with V. If DIRECT = 'F', T is upper triangular; * if DIRECT = 'B', T is lower triangular. * * WORK (local workspace) COMPLEX*16 array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 ) * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 ) * ( v1 v2 1 ) ( 1 v3 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 ) * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 ) * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) * ( 1 v3 ) * ( 1 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL FORWARD INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJ, JJV, LDV, MICOL, MIROW, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ COMPLEX*16 VII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZCOPY, ZGEMV, $ ZGSUM2D, ZLACGV, ZLASET, ZTRMV * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 .OR. K.LE.0 ) $ RETURN * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( LSAME( STOREV, 'C' ) .AND. MYCOL.EQ.IVCOL ) THEN * IW = 1 LDV = DESCV( LLD_ ) IROFF = MOD( IV-1, DESCV( MB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NP = NP - IROFF II = IIV + 1 ELSE II = IIV END IF IF( IROFF+1.EQ.DESCV( MB_ ) ) THEN MIROW = MOD( IVROW+1, NPROW ) ELSE MIROW = IVROW END IF ITMP0 = 0 * DO 10 JJ = JJV+1, JJV+K-1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( NP-II+IIV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', NP-II+IIV, ITMP0, $ -TAU( JJ ), V( II+(JJV-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL ZLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II + 1 END IF * IF( MOD( IV+ITMP0, DESCV( MB_ ) ).EQ.0 ) $ MIROW = MOD( MIROW+1, NPROW ) * 10 CONTINUE * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( JJV ) * DO 20 JJ = JJV+1, JJV+K-1 * * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( NB_ ) CALL ZCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( NB_ ), T( ITMP1 ), 1 ) T(ITMP1+ITMP0) = TAU( JJ ) * 20 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF-1, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ NP = NP - IROFF MIROW = INDXG2P( IV+N-2, DESCV( MB_ ), MYROW, $ DESCV( RSRC_ ), NPROW ) II = IIV + NP - 1 ITMP0 = 0 * DO 30 JJ = JJV+K-2, JJV, -1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( II-IIV+1.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', II-IIV+1, ITMP0, $ -TAU( JJ ), V( IIV+JJ*LDV ), LDV, $ V( IIV+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL ZLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II - 1 END IF * IF( MOD( IV+N-ITMP0-2, DESCV(MB_) ).EQ.0 ) $ MIROW = MOD( MIROW+NPROW-1, NPROW ) * 30 CONTINUE * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( NB_ ) * T( ITMP1-1 ) = TAU( JJV+K-1 ) * DO 40 JJ = JJV+K-2, JJV, -1 * * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( NB_ ) - 1 CALL ZCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( NB_ ) ), $ DESCV( NB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( JJ ) * 40 CONTINUE * END IF * END IF * ELSE IF( LSAME( STOREV, 'R' ) .AND. MYROW.EQ.IVROW ) THEN * IW = 1 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN NQ = NQ - ICOFF JJ = JJV + 1 ELSE JJ = JJV END IF IF( ICOFF+1.EQ.DESCV( NB_ ) ) THEN MICOL = MOD( IVCOL+1, NPCOL ) ELSE MICOL = IVCOL END IF ITMP0 = 0 * DO 50 II = IIV+1, IIV+K-1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( iv+i-1 ) * * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ-JJ+JJV.GT.0 ) THEN CALL ZLACGV( NQ-JJ+JJV, V( II+(JJ-1)*LDV ), LDV ) CALL ZGEMV( 'No transpose', ITMP0, NQ-JJ+JJV, $ -TAU(II), V( IIV+(JJ-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) CALL ZLACGV( NQ-JJ+JJV, V( II+(JJ-1)*LDV ), LDV ) ELSE CALL ZLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ + 1 END IF * IF( MOD( JV+ITMP0, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+1, NPCOL ) * 50 CONTINUE * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( IIV ) * DO 60 II = IIV+1, IIV+K-1 * * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( MB_ ) CALL ZCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1+ITMP0 ) = TAU( II ) * 60 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF-1, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF MICOL = INDXG2P( JV+N-2, DESCV( NB_ ), MYCOL, $ DESCV( CSRC_ ), NPCOL ) JJ = JJV + NQ - 1 ITMP0 = 0 * DO 70 II = IIV+K-2, IIV, -1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)' * ITMP0 = ITMP0 + 1 IF( JJ-JJV+1.GT.0 ) THEN CALL ZLACGV( JJ-JJV+1, V( II+(JJV-1)*LDV ), LDV ) CALL ZGEMV( 'No transpose', ITMP0, JJ-JJV+1, $ -TAU( II ), V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) CALL ZLACGV( JJ-JJV+1, V( II+(JJV-1)*LDV ), LDV ) ELSE CALL ZLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ - 1 END IF * IF( MOD( JV+N-ITMP0-2, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+NPCOL-1, NPCOL ) * 70 CONTINUE * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 80 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL ZCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( MB_ ) ), $ DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 80 CONTINUE * END IF * END IF * END IF * RETURN * * End of PZLARFT * END scalapack-1.8.0/SRC/pzgeqlf.f0000640000175000017500000002734710363532303015577 0ustar muammarmuammar SUBROUTINE PZGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQLF computes a QL factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, IINFO, ICTXT, IPW, J, JB, JL, JN, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZGEQL2, PZLARFB, $ PZLARFT * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEQLF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( (JA+N-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( JL.GE.JN+1 ) THEN * * Use blocked code initially * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Compute the QL factorization of the current block * A(ia:ia+m-n+j+jb-ja-1,j:j+jb-1) * CALL PZGEQL2( M-N+J+JB-JA, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( J.GT.JA ) THEN * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PZLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the * left * CALL PZLARFB( 'Left', 'Conjugate transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = M - N + JN - JA + 1 NU = JN - JA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PZGEQL2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEQLF * END scalapack-1.8.0/SRC/pcgeqpf.f0000640000175000017500000005234110363532303015544 0ustar muammarmuammar SUBROUTINE PCGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. INTEGER IA, JA, INFO, LRWORK, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL RWORK( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQPF computes a QR factorization with column pivoting of a * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1): * * sub( A ) * P = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX(3,Mp0 + Nq0). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCc(JA+N-1)+Nq0. * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * LOCc(JA+N-1) = NUMROC( JA+N-1, NB_A, MYCOL, CSRC_A, NPCOL ) * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(1) H(2) . . . H(n) * * Each H(i) has the form * * H = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i-1:ia+m-1,ja+i-1). * * The matrix P is represented in jpvt as follows: If * jpvt(j) = i * then the jth column of P is the ith canonical unit vector. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, ICURROW, $ ICURCOL, II, IIA, IOFFA, IPCOL, IROFF, ITEMP, $ J, JB, JJ, JJA, JJPVT, JN, KB, K, KK, KSTART, $ KSTEP, LDA, LL, LRWMIN, LWMIN, MN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ, NQ0, PVT REAL TEMP, TEMP2 COMPLEX AJJ, ALPHA * .. * .. Local Arrays .. INTEGER DESCN( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CGEBR2D, CGEBS2D, $ CGERV2D, CGESD2D, CHK1MAT, CLARFG, $ CSWAP, DESCSET, IGERV2D, IGESD2D, INFOG1L, $ INFOG2L, PCELSET, PCHK1MAT, PCLARFC, $ PCLARFG, PSAMAX, PSCNRM2, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, CONJG, IFIX, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NQ0 = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) LWMIN = MAX( 3, MP + NQ ) LRWMIN = NQ0 + NQ * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEQPF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF MN = MIN( M, N ) * * Initialize the array of pivots * LDA = DESCA( LLD_ ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) KSTEP = NPCOL * DESCA( NB_ ) * IF( MYCOL.EQ.IACOL ) THEN * * Handle first block separately * JB = JN - JA + 1 DO 10 LL = JJA, JJA+JB-1 IPIV( LL ) = JA + LL - JJA 10 CONTINUE KSTART = JN + KSTEP - DESCA( NB_ ) * * Loop over remaining block of columns * DO 30 KK = JJA+JB, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 20 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 20 CONTINUE KSTART = KSTART + KSTEP 30 CONTINUE ELSE KSTART = JN + ( MOD( MYCOL-IACOL+NPCOL, NPCOL )-1 )* $ DESCA( NB_ ) DO 50 KK = JJA, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 40 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 40 CONTINUE KSTART = KSTART + KSTEP 50 CONTINUE END IF * * Initialize partial column norms, handle first block separately * CALL DESCSET( DESCN, 1, DESCA( N_ ), 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), ICTXT, 1 ) * JJ = JJA IF( MYCOL.EQ.IACOL ) THEN DO 60 KK = 0, JB-1 CALL PSCNRM2( M, RWORK( JJ+KK ), A, IA, JA+KK, DESCA, 1 ) RWORK( NQ+JJ+KK ) = RWORK( JJ+KK ) 60 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining blocks of columns * DO 80 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 70 KK = 0, JB-1 CALL PSCNRM2( M, RWORK( JJ+KK ), A, IA, J+KK, DESCA, 1 ) RWORK( NQ+JJ+KK ) = RWORK( JJ+KK ) 70 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) 80 CONTINUE * * Compute factorization * DO 120 J = JA, JA+MN-1 I = IA + J - JA * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, ICURCOL ) K = JA + N - J IF( K.GT.1 ) THEN CALL PSAMAX( K, TEMP, PVT, RWORK, 1, J, DESCN, $ DESCN( M_ ) ) ELSE PVT = J END IF IF( J.NE.PVT ) THEN CALL INFOG1L( PVT, DESCA( NB_ ), NPCOL, MYCOL, $ DESCA( CSRC_ ), JJPVT, IPCOL ) IF( ICURCOL.EQ.IPCOL ) THEN IF( MYCOL.EQ.ICURCOL ) THEN CALL CSWAP( MP, A( IIA+(JJ-1)*LDA ), 1, $ A( IIA+(JJPVT-1)*LDA ), 1 ) ITEMP = IPIV( JJPVT ) IPIV( JJPVT ) = IPIV( JJ ) IPIV( JJ ) = ITEMP RWORK( JJPVT ) = RWORK( JJ ) RWORK( NQ+JJPVT ) = RWORK( NQ+JJ ) END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN * CALL CGESD2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) WORK( 1 ) = CMPLX( REAL( IPIV( JJ ) ) ) WORK( 2 ) = CMPLX( RWORK( JJ ) ) WORK( 3 ) = CMPLX( RWORK( JJ + NQ ) ) CALL CGESD2D( ICTXT, 3, 1, WORK, 3, MYROW, IPCOL ) * CALL CGERV2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) CALL IGERV2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, $ IPCOL ) * ELSE IF( MYCOL.EQ.IPCOL ) THEN * CALL CGESD2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL IGESD2D( ICTXT, 1, 1, IPIV( JJPVT ), 1, MYROW, $ ICURCOL ) * CALL CGERV2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL CGERV2D( ICTXT, 3, 1, WORK, 3, MYROW, ICURCOL ) IPIV( JJPVT ) = IFIX( REAL( WORK( 1 ) ) ) RWORK( JJPVT ) = REAL( WORK( 2 ) ) RWORK( JJPVT+NQ ) = REAL( WORK( 3 ) ) * END IF * END IF * END IF * * Generate elementary reflector H(i) * CALL INFOG1L( I, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, ICURROW ) IF( DESCA( M_ ).EQ.1 ) THEN IF( MYROW.EQ.ICURROW ) THEN IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*DESCA( LLD_ ) AJJ = A( IOFFA ) CALL CLARFG( 1, AJJ, A( IOFFA ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = CMPLX( ONE ) - CONJG( TAU( JJ ) ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1 ) CALL CSCAL( NQ-JJ, ALPHA, A( IOFFA+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ ), 1 ) A( IOFFA ) = AJJ ELSE IF( N.GT.1 ) THEN CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, ICURROW, ICURCOL ) CALL CSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1, ICURROW, ICURCOL ) END IF * ELSE * CALL PCLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(i) to A(ia+j-ja:ia+m-1,j+1:ja+n-1) from the left * CALL PCELSET( A, I, J, DESCA, CMPLX( ONE ) ) CALL PCLARFC( 'Left', M-J+JA, JA+N-1-J, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK ) END IF CALL PCELSET( A, I, J, DESCA, AJJ ) * END IF * * Update partial columns norms * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 IF( MOD( J, DESCA( NB_ ) ).EQ.0 ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) IF( (JJA+NQ-JJ).GT.0 ) THEN IF( MYROW.EQ.ICURROW ) THEN CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, JJA+NQ-JJ, $ A( II+( MIN( JJA+NQ-1, JJ )-1 )*LDA ), $ LDA ) CALL CCOPY( JJA+NQ-JJ, A( II+( MIN( JJA+NQ-1, JJ ) $ -1)*LDA ), LDA, WORK( MIN( JJA+NQ-1, JJ ) ), $ 1 ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', JJA+NQ-JJ, 1, $ WORK( MIN( JJA+NQ-1, JJ ) ), MAX( 1, NQ ), $ ICURROW, MYCOL ) END IF END IF * JN = MIN( ICEIL( J+1, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA + N - 1 ) IF( MYCOL.EQ.ICURCOL ) THEN DO 90 LL = JJ, JJ + JN - J - 1 IF( RWORK( LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( LL ) ) / RWORK( LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05E+0*TEMP* $ ( RWORK( LL ) / RWORK( NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PSCNRM2( IA+M-I-1, RWORK( LL ), A, $ I+1, J+LL-JJ, DESCA, 1 ) RWORK( NQ+LL ) = RWORK( LL ) ELSE RWORK( LL ) = ZERO RWORK( NQ+LL ) = ZERO END IF ELSE RWORK( LL ) = RWORK( LL ) * SQRT( TEMP ) END IF END IF 90 CONTINUE JJ = JJ + JN - J END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 110 K = JN+1, JA+N-1, DESCA( NB_ ) KB = MIN( JA+N-K, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 100 LL = JJ, JJ+KB-1 IF( RWORK(LL).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( LL ) ) / RWORK( LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05E+0*TEMP* $ ( RWORK( LL ) / RWORK( NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PSCNRM2( IA+M-I-1, RWORK( LL ), A, $ I+1, K+LL-JJ, DESCA, 1 ) RWORK( NQ+LL ) = RWORK( LL ) ELSE RWORK( LL ) = ZERO RWORK( NQ+LL ) = ZERO END IF ELSE RWORK( LL ) = RWORK( LL ) * SQRT( TEMP ) END IF END IF 100 CONTINUE JJ = JJ + KB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * 110 CONTINUE * 120 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) * RETURN * * End of PCGEQPF * END scalapack-1.8.0/SRC/pcunmbr.f0000640000175000017500000005435510363532303015574 0ustar muammarmuammar SUBROUTINE PCUNMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * If VECT = 'Q', PCUNMBR overwrites the general complex distributed * M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * If VECT = 'P', PCUNMBR overwrites sub( C ) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': P * sub( C ) sub( C ) * P * TRANS = 'C': P**H * sub( C ) sub( C ) * P**H * * Here Q and P**H are the unitary distributed matrices determined by * PCGEBRD when reducing a complex distributed matrix A(IA:*,JA:*) to * bidiagonal form: A(IA:*,JA:*) = Q * B * P**H. Q and P**H are defined * as products of elementary reflectors H(i) and G(i) respectively. * * Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the * order of the unitary matrix Q or P**H that is applied. * * If VECT = 'Q', A(IA:*,JA:*) is assumed to have been an NQ-by-K * matrix: * if nq >= k, Q = H(1) H(2) . . . H(k); * if nq < k, Q = H(1) H(2) . . . H(nq-1). * * If VECT = 'P', A(IA:*,JA:*) is assumed to have been a K-by-NQ * matrix: * if k < nq, P = G(1) G(2) . . . G(k); * if k >= nq, P = G(1) G(2) . . . G(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * VECT (global input) CHARACTER * = 'Q': apply Q or Q**H; * = 'P': apply P or P**H. * * SIDE (global input) CHARACTER * = 'L': apply Q, Q**H, P or P**H from the Left; * = 'R': apply Q, Q**H, P or P**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q or P; * = 'C': Conjugate transpose, apply Q**H or P**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * If VECT = 'Q', the number of columns in the original * distributed matrix reduced by PCGEBRD. * If VECT = 'P', the number of rows in the original * distributed matrix reduced by PCGEBRD. * K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+MIN(NQ,K)-1)) if * VECT='Q', and (LLD_A,LOCc(JA+NQ-1)) if VECT = 'P'. NQ = M * if SIDE = 'L', and NQ = N otherwise. The vectors which * define the elementary reflectors H(i) and G(i), whose * products determine the matrices Q and P, as returned by * PCGEBRD. * If VECT = 'Q', LLD_A >= max(1,LOCr(IA+NQ-1)); * if VECT = 'P', LLD_A >= max(1,LOCr(IA+MIN(NQ,K)-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX array, dimension * LOCc(JA+MIN(NQ,K)-1) if VECT = 'Q', LOCr(IA+MIN(NQ,K)-1) if * VECT = 'P', TAU(i) must contain the scalar factor of the * elementary reflector H(i) or G(i), which determines Q or P, * as returned by PDGEBRD in its array argument TAUQ or TAUP. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, if VECT='Q', sub( C ) is overwritten by Q*sub( C ) * or Q'*sub( C ) or sub( C )*Q' or sub( C )*Q; if VECT='P, * sub( C ) is overwritten by P*sub( C ) or P'*sub( C ) or * sub( C )*P or sub( C )*P'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * NQ = M; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA+1; JAA=JA; MI=M-1; NI=N; ICC=IC+1; JCC=JC; * end if * else if SIDE = 'R', * NQ = N; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA; JAA=JA+1; MI=M; NI=N-1; ICC=IC; JCC=JC+1; * end if * end if * * If VECT = 'Q', * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * else if VECT <> 'Q', * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( MI+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * end if * * where LCMP = LCM / NPROW, LCMQ = LCM / NPCOL, with * LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JAA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( MI+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If VECT = 'Q', * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * else * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN CHARACTER TRANST INTEGER IAA, IACOL, IAROW, ICC, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IINFO, IROFFA, IROFFC, JAA, JCC, $ LCM, LCMP, LCMQ, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCUNMLQ, $ PCUNMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE APPLYQ = LSAME( VECT, 'Q' ) LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q or P * IF( LEFT ) THEN NQ = M IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA + 1 JAA = JA MI = M - 1 NI = N ICC = IC + 1 JCC = JC END IF * IF( APPLYQ ) THEN CALL CHK1MAT( M, 4, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, M, 4, IA, JA, DESCA, 10, INFO ) END IF ELSE NQ = N IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA JAA = JA + 1 MI = M NI = N - 1 ICC = IC JCC = JC + 1 END IF * IF( APPLYQ ) THEN CALL CHK1MAT( N, 5, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, N, 5, IA, JA, DESCA, 10, INFO ) END IF END IF CALL CHK1MAT( M, 4, N, 5, IC, JC, DESCC, 15, INFO ) * IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) ICOFFA = MOD( JAA-1, DESCA( NB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JAA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( APPLYQ ) THEN IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF ELSE * IF( LEFT ) THEN MQA0 = NUMROC( MI+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ MI+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN INFO = -1 ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -2 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -3 ELSE IF( K.LT.0 ) THEN INFO = -6 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( APPLYQ .AND. LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( APPLYQ .AND. LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( APPLYQ ) THEN IDUM1( 1 ) = ICHAR( 'Q' ) ELSE IDUM1( 1 ) = ICHAR( 'P' ) END IF IDUM2( 1 ) = 1 IF( LEFT ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'R' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'C' ) END IF IDUM2( 3 ) = 3 IDUM1( 4 ) = K IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( APPLYQ ) THEN IF( LEFT ) THEN CALL PCHK2MAT( M, 4, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( N, 5, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF ELSE IF( LEFT ) THEN CALL PCHK2MAT( K, 6, M, 4, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( K, 6, N, 5, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( APPLYQ ) THEN * * Apply Q * IF( NQ.GE.K ) THEN * * Q was determined by a call to PCGEBRD with nq >= k * CALL PCUNMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * Q was determined by a call to PCGEBRD with nq < k * CALL PCUNMQR( SIDE, TRANS, MI, NI, NQ-1, A, IA+1, JA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) END IF ELSE * * Apply P * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF IF( NQ.GT.K ) THEN * * P was determined by a call to PCGEBRD with nq > k * CALL PCUNMLQ( SIDE, TRANST, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * P was determined by a call to PCGEBRD with nq <= k * CALL PCUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A, IA, JA+1, $ DESCA, TAU, C, ICC, JCC, DESCC, WORK, LWORK, $ IINFO ) END IF END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMBR * END scalapack-1.8.0/SRC/pzgesvd.f0000640000175000017500000005606310377355407015623 0ustar muammarmuammar SUBROUTINE PZGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, + VT,IVT,JVT,DESCVT,WORK,LWORK,RWORK,INFO) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Oak Ridge National Laboratory * and Univ. of California Berkeley. * Jan 2006 * * .. Scalar Arguments .. CHARACTER JOBU,JOBVT INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N * .. * .. Array Arguments .. INTEGER DESCA(*),DESCU(*),DESCVT(*) COMPLEX*16 A(*),U(*),VT(*),WORK(*) DOUBLE PRECISION S(*) DOUBLE PRECISION RWORK(*) * .. * * Purpose * ======= * * PZGESVD computes the singular value decomposition (SVD) of an * M-by-N matrix A, optionally computing the left and/or right * singular vectors. The SVD is written as * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A and the columns of U and V are the * corresponding right and left singular vectors, respectively. The * singular values are returned in array S in decreasing order and * only the first min(M,N) columns of U and rows of VT = V**T are * computed. * * Notes * ===== * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, and * assume that its process grid has dimension r x c. LOCr( K ) denotes * the number of elements of K that a process would receive if K were * distributed over the r processes of its process column. Similarly, * LOCc( K ) denotes the number of elements of K that a process would * receive if K were distributed over the c processes of its process * row. The values of LOCr() and LOCc() may be determined via a call * to the ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZE = min( M, N ) * SIZEQ = number of local columns in U * SIZEP = number of local rows in VT * * JOBU (global input) CHARACTER*1 * Specifies options for computing U: * = 'V': the first SIZE columns of U (the left singular * vectors) are returned in the array U; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (global input) CHARACTER*1 * Specifies options for computing V**T: * = 'V': the first SIZE rows of V**T (the right singular * vectors) are returned in the array VT; * = 'N': no rows of V**T (no right singular vectors) are * computed. * * M (global input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (global input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX*16 * array, * global dimension (M, N), local dimension (MP, NQ) * On exit, the contents of A are destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * S (global output) DOUBLE PRECISION array, dimension SIZE * The singular values of A, sorted so that S(i) >= S(i+1). * * U (local output) COMPLEX*16 array, local dimension * (MP, SIZEQ), global dimension (M, SIZE) * if JOBU = 'V', U contains the first min(m,n) columns of U * if JOBU = 'N', U is not referenced. * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local output) COMPLEX*16 array, local dimension * (SIZEP, NQ), global dimension (SIZE, N). * If JOBVT = 'V', VT contains the first SIZE rows of * V**T. If JOBVT = 'N', VT is not referenced. * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * WORK (local workspace/output) COMPLEX*16 array, dimension * (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * LWORK >= 1 + 2*SIZEB + MAX(WATOBD, WBDTOSVD), * * where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer, * respectively, to the workspace required to bidiagonalize * the matrix A and to go from the bidiagonal matrix to the * singular value decomposition U*S*VT. * * For WATOBD, the following holds: * * WATOBD = MAX(MAX(WPZLANGE,WPZGEBRD), * MAX(WPZLARED2D,WP(pre)LARED1D)), * * where WPZLANGE, WPZLARED1D, WPZLARED2D, WPZGEBRD are the * workspaces required respectively for the subprograms * PZLANGE, PDLARED1D, PDLARED2D, PZGEBRD. Using the * standard notation * * MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW), * NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL), * * the workspaces required for the above subprograms are * * WPZLANGE = MP, * WPDLARED1D = NQ0, * WPDLARED2D = MP0, * WPZGEBRD = NB*(MP + NQ + 1) + NQ, * * where NQ0 and MP0 refer, respectively, to the values obtained * at MYCOL = 0 and MYROW = 0. In general, the upper limit for * the workspace is given by a workspace required on * processor (0,0): * * WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0. * * In case of a homogeneous process grid this upper limit can * be used as an estimate of the minimum workspace for every * processor. * * For WBDTOSVD, the following holds: * * WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) + * MAX(WZBDSQR, * MAX(WANTU*WPZORMBRQLN, WANTVT*WPZORMBRPRT)), * * where * * 1, if left(right) singular vectors are wanted * WANTU(WANTVT) = * 0, otherwise * * and WZBDSQR, WPZORMBRQLN and WPZORMBRPRT refer respectively * to the workspace required for the subprograms ZBDSQR, * PZUNMBR(QLN), and PZUNMBR(PRT), where QLN and PRT are the * values of the arguments VECT, SIDE, and TRANS in the call * to PZUNMBR. NRU is equal to the local number of rows of * the matrix U when distributed 1-dimensional "column" of * processes. Analogously, NCVT is equal to the local number * of columns of the matrix VT when distributed across * 1-dimensional "row" of processes. Calling the LAPACK * procedure ZBDSQR requires * * WZBDSQR = MAX(1, 4*SIZE ) * * on every processor. Finally, * * WPZORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB, * WPZORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB, * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the work array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * RWORK (workspace) REAL array, dimension (1+4*SIZEB) * On exit, if INFO = 0, RWORK(1) returns the necessary size * for RWORK. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if ZBDSQR did not converge * If INFO = MIN(M,N) + 1, then PZGESVD has detected * heterogeneity by finding that eigenvalues were not * identical across the process grid. In this case, the * accuracy of the results from PZGESVD cannot be * guaranteed. * * ===================================================================== * * The results of PZGEBRD, and therefore PZGESVD, may vary slightly * from run to run with the same input data. If repeatability is an * issue, call BLACS_SET with the appropriate option after defining * the process grid. * * Alignment requirements * ====================== * * The routine PZGESVD inherits the same alignement requirement as * the routine PZGEBRD, namely: * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * * ===================================================================== * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_, + CSRC_,LLD_,ITHVAL PARAMETER (BLOCK_CYCLIC_2D=1,DLEN_=9,DTYPE_=1,CTXT_=2,M_=3,N_=4, + MB_=5,NB_=6,RSRC_=7,CSRC_=8,LLD_=9,ITHVAL=10) COMPLEX*16 ZERO,ONE PARAMETER (ZERO= ((0.0D+0,0.0D+0)),ONE= ((1.0D+0,0.0D+0))) DOUBLE PRECISION DZERO,DONE PARAMETER (DZERO=0.0D+0,DONE=1.0D+0) * .. * .. Local Scalars .. CHARACTER UPLO INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ, + INDU,INDV,INDWORK,IOFFD,IOFFE,ISCALE,J,K,LDU,LDVT,LLWORK, + LWMIN,MAXIM,MB,MP,MYPCOL,MYPCOLC,MYPCOLR,MYPROW,MYPROWC, + MYPROWR,NB,NCVT,NPCOL,NPCOLC,NPCOLR,NPROCS,NPROW,NPROWC, + NPROWR,NQ,NRU,SIZE,SIZEB,SIZEP,SIZEPOS,SIZEQ,WANTU,WANTVT, + WATOBD,WBDTOSVD,WZBDSQR,WPZGEBRD,WPZLANGE,WPZORMBRPRT, + WPZORMBRQLN DOUBLE PRECISION ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM * .. * .. Local Arrays .. INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3) DOUBLE PRECISION C(1,1) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC DOUBLE PRECISION PDLAMCH,PZLANGE EXTERNAL LSAME,NUMROC,PDLAMCH,PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GET,BLACS_GRIDEXIT,BLACS_GRIDINFO,BLACS_GRIDINIT, + CHK1MAT,ZBDSQR,DESCINIT,DGAMN2D,DGAMX2D,DSCAL,IGAMX2D, + IGEBR2D,IGEBS2D,PCHK1MAT,PZGEBRD,PZGEMR2D,PDLARED1D, + PDLARED2D,PZLASCL,PZLASET,PZUNMBR,PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,DBLE INTRINSIC DCMPLX * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) ISCALE = 0 INFO = 0 * IF (NPROW.EQ.-1) THEN INFO = - (800+CTXT_) ELSE * SIZE = MIN(M,N) SIZEB = MAX(M,N) NPROCS = NPROW*NPCOL IF (M.GE.N) THEN IOFFD = JA - 1 IOFFE = IA - 1 SIZEPOS = 1 ELSE IOFFD = IA - 1 IOFFE = JA - 1 SIZEPOS = 3 END IF * IF (LSAME(JOBU,'V')) THEN WANTU = 1 ELSE WANTU = 0 END IF IF (LSAME(JOBVT,'V')) THEN WANTVT = 1 ELSE WANTVT = 0 END IF * CALL CHK1MAT(M,3,N,4,IA,JA,DESCA,8,INFO) IF (WANTU.EQ.1) THEN CALL CHK1MAT(M,3,SIZE,SIZEPOS,IU,JU,DESCU,13,INFO) END IF IF (WANTVT.EQ.1) THEN CALL CHK1MAT(SIZE,SIZEPOS,N,4,IVT,JVT,DESCVT,17,INFO) END IF CALL IGAMX2D(DESCA(CTXT_),'A',' ',1,1,INFO,1,1,1,-1,-1,0) * IF (INFO.EQ.0) THEN * * Set up pointers into the WORK array. * INDD = 2 INDE = INDD + SIZEB + IOFFD INDD2 = INDE + SIZEB + IOFFE INDE2 = INDD2 + SIZEB + IOFFD * INDTAUQ = 2 INDTAUP = INDTAUQ + SIZEB + JA - 1 INDWORK = INDTAUP + SIZEB + IA - 1 LLWORK = LWORK - INDWORK + 1 * * Initialize contexts for "column" and "row" process matrices. * CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTC) CALL BLACS_GRIDINIT(CONTEXTC,'R',NPROCS,1) CALL BLACS_GRIDINFO(CONTEXTC,NPROWC,NPCOLC,MYPROWC, + MYPCOLC) CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTR) CALL BLACS_GRIDINIT(CONTEXTR,'R',1,NPROCS) CALL BLACS_GRIDINFO(CONTEXTR,NPROWR,NPCOLR,MYPROWR, + MYPCOLR) * * Set local dimensions of matrices (this is for MB=NB=1). * NRU = NUMROC(M,1,MYPROWC,0,NPROCS) NCVT = NUMROC(N,1,MYPCOLR,0,NPROCS) NB = DESCA(NB_) MB = DESCA(MB_) MP = NUMROC(M,MB,MYPROW,DESCA(RSRC_),NPROW) NQ = NUMROC(N,NB,MYPCOL,DESCA(CSRC_),NPCOL) IF (WANTVT.EQ.1) THEN SIZEP = NUMROC(SIZE,DESCVT(MB_),MYPROW,DESCVT(RSRC_), + NPROW) ELSE SIZEP = 0 END IF IF (WANTU.EQ.1) THEN SIZEQ = NUMROC(SIZE,DESCU(NB_),MYPCOL,DESCU(CSRC_), + NPCOL) ELSE SIZEQ = 0 END IF * * Transmit MAX(NQ0, MP0). * IF (MYPROW.EQ.0 .AND. MYPCOL.EQ.0) THEN MAXIM = MAX(NQ,MP) CALL IGEBS2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1) ELSE CALL IGEBR2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1,0,0) END IF * WPZLANGE = MP WPZGEBRD = NB* (MP+NQ+1) + NQ WATOBD = MAX(MAX(WPZLANGE,WPZGEBRD),MAXIM) * WZBDSQR = MAX(1,4*SIZE) WPZORMBRQLN = MAX((NB* (NB-1))/2, (SIZEQ+MP)*NB) + NB*NB WPZORMBRPRT = MAX((MB* (MB-1))/2, (SIZEP+NQ)*MB) + MB*MB WBDTOSVD = SIZE* (WANTU*NRU+WANTVT*NCVT) + + MAX(WZBDSQR,MAX(WANTU*WPZORMBRQLN, + WANTVT*WPZORMBRPRT)) * * Finally, calculate required workspace. * LWMIN = 1 + 2*SIZEB + MAX(WATOBD,WBDTOSVD) WORK(1) = DCMPLX(LWMIN,0D+00) RWORK(1) = DBLE(1+4*SIZEB) * IF (WANTU.NE.1 .AND. .NOT. (LSAME(JOBU,'N'))) THEN INFO = -1 ELSE IF (WANTVT.NE.1 .AND. .NOT. (LSAME(JOBVT,'N'))) THEN INFO = -2 ELSE IF (LWORK.LT.LWMIN .AND. LWORK.NE.-1) THEN INFO = -19 END IF * END IF * IDUM1(1) = WANTU IDUM1(2) = WANTVT IF (LWORK.EQ.-1) THEN IDUM1(3) = -1 ELSE IDUM1(3) = 1 END IF IDUM2(1) = 1 IDUM2(2) = 2 IDUM2(3) = 19 CALL PCHK1MAT(M,3,N,4,IA,JA,DESCA,8,3,IDUM1,IDUM2,INFO) IF (INFO.EQ.0) THEN IF (WANTU.EQ.1) THEN CALL PCHK1MAT(M,3,SIZE,4,IU,JU,DESCU,13,0,IDUM1,IDUM2, + INFO) END IF IF (WANTVT.EQ.1) THEN CALL PCHK1MAT(SIZE,3,N,4,IVT,JVT,DESCVT,17,0,IDUM1, + IDUM2,INFO) END IF END IF * END IF * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PZGESVD',-INFO) RETURN ELSE IF (LWORK.EQ.-1) THEN GO TO 40 END IF * * Quick return if possible. * IF (M.LE.0 .OR. N.LE.0) GO TO 40 * * Get machine constants. * SAFMIN = PDLAMCH(DESCA(CTXT_),'Safe minimum') EPS = PDLAMCH(DESCA(CTXT_),'Precision') SMLNUM = SAFMIN/EPS BIGNUM = DONE/SMLNUM RMIN = SQRT(SMLNUM) RMAX = MIN(SQRT(BIGNUM),DONE/SQRT(SQRT(SAFMIN))) * * Scale matrix to allowable range, if necessary. * ANRM = PZLANGE('1',M,N,A,IA,JA,DESCA,WORK(INDWORK)) IF (ANRM.GT.DZERO .AND. ANRM.LT.RMIN) THEN ISCALE = 1 SIGMA = RMIN/ANRM ELSE IF (ANRM.GT.RMAX) THEN ISCALE = 1 SIGMA = RMAX/ANRM END IF * IF (ISCALE.EQ.1) THEN CALL PZLASCL('G',DONE,SIGMA,M,N,A,IA,JA,DESCA,INFO) END IF * CALL PZGEBRD(M,N,A,IA,JA,DESCA,RWORK(INDD),RWORK(INDE), + WORK(INDTAUQ),WORK(INDTAUP),WORK(INDWORK),LLWORK, + INFO) * * Copy D and E to all processes. * Array D is in local array of dimension: * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * Array E is in local array of dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * IF (M.GE.N) THEN * Distribute D CALL PDLARED1D(N+IOFFD,IA,JA,DESCA,RWORK(INDD),RWORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PDLARED2D(M+IOFFE,IA,JA,DESCA,RWORK(INDE),RWORK(INDE2), + WORK(INDWORK),LLWORK) ELSE * Distribute D CALL PDLARED2D(M+IOFFD,IA,JA,DESCA,RWORK(INDD),RWORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PDLARED1D(N+IOFFE,IA,JA,DESCA,RWORK(INDE),RWORK(INDE2), + WORK(INDWORK),LLWORK) END IF * * Prepare for calling PZBDSQR. * IF (M.GE.N) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * INDU = INDWORK INDV = INDU + SIZE*NRU*WANTU INDWORK = INDV + SIZE*NCVT*WANTVT * LDU = MAX(1,NRU) LDVT = MAX(1,SIZE) * CALL DESCINIT(DESCTU,M,SIZE,1,1,0,0,CONTEXTC,LDU,INFO) CALL DESCINIT(DESCTVT,SIZE,N,1,1,0,0,CONTEXTR,LDVT,INFO) * IF (WANTU.EQ.1) THEN CALL PZLASET('Full',M,SIZE,ZERO,ONE,WORK(INDU),1,1,DESCTU) ELSE NRU = 0 END IF * IF (WANTVT.EQ.1) THEN CALL PZLASET('Full',SIZE,N,ZERO,ONE,WORK(INDV),1,1,DESCTVT) ELSE NCVT = 0 END IF * CALL ZBDSQR(UPLO,SIZE,NCVT,NRU,0,RWORK(INDD2+IOFFD), + RWORK(INDE2+IOFFE),WORK(INDV),SIZE,WORK(INDU),LDU,C,1, + WORK(INDWORK),INFO) * * Redistribute elements of U and VT in the block-cyclic fashion. * IF (WANTU.EQ.1) CALL PZGEMR2D(M,SIZE,WORK(INDU),1,1,DESCTU,U,IU, + JU,DESCU,DESCU(CTXT_)) * IF (WANTVT.EQ.1) CALL PZGEMR2D(SIZE,N,WORK(INDV),1,1,DESCTVT,VT, + IVT,JVT,DESCVT,DESCVT(CTXT_)) * * Set to ZERO "non-square" elements of the larger matrices U, VT. * IF (M.GT.N .AND. WANTU.EQ.1) THEN CALL PZLASET('Full',M-SIZE,SIZE,ZERO,ZERO,U,IA+SIZE,JU,DESCU) ELSE IF (N.GT.M .AND. WANTVT.EQ.1) THEN CALL PZLASET('Full',SIZE,N-SIZE,ZERO,ZERO,VT,IVT,JVT+SIZE, + DESCVT) END IF * * Multiply Householder rotations from bidiagonalized matrix. * IF (WANTU.EQ.1) CALL PZUNMBR('Q','L','N',M,SIZE,N,A,IA,JA,DESCA, + WORK(INDTAUQ),U,IU,JU,DESCU, + WORK(INDWORK),LLWORK,INFO) * IF (WANTVT.EQ.1) CALL PZUNMBR('P','R','C',SIZE,N,M,A,IA,JA,DESCA, + WORK(INDTAUP),VT,IVT,JVT,DESCVT, + WORK(INDWORK),LLWORK,INFO) * * Copy singular values into output array S. * DO 10 I = 1,SIZE S(I) = RWORK(INDD2+IOFFD+I-1) 10 CONTINUE * * If matrix was scaled, then rescale singular values appropriately. * IF (ISCALE.EQ.1) THEN CALL DSCAL(SIZE,ONE/SIGMA,S,1) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF (SIZE.LE.ITHVAL) THEN J = SIZE K = 1 ELSE J = SIZE/ITHVAL K = ITHVAL END IF * DO 20 I = 1,J RWORK(I+INDE) = S((I-1)*K+1) RWORK(I+INDD2) = S((I-1)*K+1) 20 CONTINUE * CALL DGAMN2D(DESCA(CTXT_),'a',' ',J,1,RWORK(1+INDE),J,1,1,-1,-1,0) CALL DGAMX2D(DESCA(CTXT_),'a',' ',J,1,RWORK(1+INDD2),J,1,1,-1,-1, + 0) * DO 30 I = 1,J IF ((RWORK(I+INDE)-RWORK(I+INDD2)).NE.DZERO) THEN INFO = SIZE + 1 END IF 30 CONTINUE * 40 CONTINUE * CALL BLACS_GRIDEXIT(CONTEXTC) CALL BLACS_GRIDEXIT(CONTEXTR) * * End of PZGESVD * RETURN END scalapack-1.8.0/SRC/pslamch.f0000640000175000017500000000507110363532303015544 0ustar muammarmuammar REAL FUNCTION PSLAMCH( ICTXT, CMACH ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER CMACH INTEGER ICTXT * .. * * Purpose * ======= * * PSLAMCH determines single precision machine parameters. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle in which the computation takes * place. * * CMACH (global input) CHARACTER*1 * Specifies the value to be returned by PSLAMCH: * = 'E' or 'e', PSLAMCH := eps * = 'S' or 's , PSLAMCH := sfmin * = 'B' or 'b', PSLAMCH := base * = 'P' or 'p', PSLAMCH := eps*base * = 'N' or 'n', PSLAMCH := t * = 'R' or 'r', PSLAMCH := rnd * = 'M' or 'm', PSLAMCH := emin * = 'U' or 'u', PSLAMCH := rmin * = 'L' or 'l', PSLAMCH := emax * = 'O' or 'o', PSLAMCH := rmax * * where * * eps = relative machine precision * sfmin = safe minimum, such that 1/sfmin does not overflow * base = base of the machine * prec = eps*base * t = number of (base) digits in the mantissa * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise * emin = minimum exponent before (gradual) underflow * rmin = underflow threshold - base**(emin-1) * emax = largest exponent before overflow * rmax = overflow threshold - (base**emax)*(1-eps) * * ===================================================================== * * .. Local Scalars .. INTEGER IDUMM REAL TEMP * .. * .. External Subroutines .. EXTERNAL SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME REAL SLAMCH EXTERNAL LSAME, SLAMCH * .. * .. Executable Statements .. * TEMP = SLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL SGAMN2D( ICTXT, 'All', ' ', 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PSLAMCH = TEMP * * End of PSLAMCH * END scalapack-1.8.0/SRC/pslatrs.f0000640000175000017500000000531410363532303015605 0ustar muammarmuammar SUBROUTINE PSLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL A( * ), CNORM( * ), $ X( * ), WORK( * ) * .. * * Purpose * ======= * * PSLATRS solves a triangular system. This routine in unfinished * at this time, but will be part of the next release. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * * .. Local Scalars .. INTEGER ICTXT, IIX, IROFF, JJX, MYCOL, MYROW, NP, $ NPCOL, NPROW, LDX, IXCOL, IXROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGEBR2D, SGEBS2D, INFOG2L, $ PSTRSV * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ***** NO SCALING ***** Call PSTRSV for all cases ***** * SCALE = ONE CALL PSTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX(MB_) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IXCOL ) THEN CALL SGEBS2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX ) ELSE CALL SGEBR2D( ICTXT, 'R', ' ', NP, 1, X( IIX+(JJX-1)*LDX ), $ LDX, MYROW, IXCOL ) END IF * RETURN * * End of PSLATRS * END scalapack-1.8.0/SRC/pddbsv.f0000640000175000017500000004515710363532303015410 0ustar muammarmuammar SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDDBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PDDBTRF and PDDBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * +max((max(bwl,bwu)*NRHS), max(bwl,bwu)*max(bwl,bwu)) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PDDBTRF, PDDBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDDBTRF and PDDBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PDDBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Factor the matrix * CALL PDDBTRF( N, BWL, BWU, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PDDBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) RETURN END IF * RETURN * * End of PDDBSV * END scalapack-1.8.0/SRC/pcgetrs.f0000640000175000017500000002610210363532303015562 0ustar muammarmuammar SUBROUTINE PCGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCGETRS solves a system of distributed linear equations * * op( sub( A ) ) * X = sub( B ) * * with a general N-by-N distributed matrix sub( A ) using the LU * factorization computed by PCGETRF. * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), op( A ) = A, A**T or A**H * and sub( B ) denotes B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block data decomposition ( MB_A=NB_A ). * * Arguments * ========= * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': sub( A ) * X = sub( B ) (No transpose) * = 'T': sub( A )**T * X = sub( B ) (Transpose) * = 'C': sub( A )**H * X = sub( B ) (Conjugate transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local * memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the factors * L and U from the factorization sub( A ) = P*L*U; the unit * diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand sides * sub( B ). On exit, sub( B ) is overwritten by the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PCLAPIV, PCTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE NOTRAN = LSAME( TRANS, 'N' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1200+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1200+CTXT_) END IF END IF IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, 3, $ IB, JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGETRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ DESCA( MB_ ) + NUMROC( DESCA( M_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) ) * IF( NOTRAN ) THEN * * Solve sub( A ) * X = sub( B ). * * Apply row interchanges to the right hand sides. * CALL PCLAPIV( 'Forward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A )' * X = sub( B ). * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, $ A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Apply row interchanges to the solution vectors. * CALL PCLAPIV( 'Backward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * END IF * RETURN * * End of PCGETRS * END scalapack-1.8.0/SRC/pclaqsy.f0000640000175000017500000003203610363532303015572 0ustar muammarmuammar SUBROUTINE PCLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N REAL AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL SC( * ), SR( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAQSY equilibrates a symmetric distributed matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the * vectors SR and SC. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is to be referenced: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (input/output) COMPLEX pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the distributed symmetric * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * part of the matrix, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular part of the matrix, and the strictly upper trian- * gular part of sub( A ) is not referenced. * On exit, if EQUED = 'Y', the equilibrated matrix: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local input) REAL array, dimension LOCr(M_A) * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned * with the distributed matrix A, and replicated across every * process column. SR is tied to the distributed matrix A. * * SC (local input) REAL array, dimension LOCc(N_A) * The scale factors for sub( A ). SC is aligned with the dis- * tributed matrix A, and replicated down every process row. * SC is tied to the distributed matrix A. * * SCOND (global input) REAL * Ratio of the smallest SR(i) (respectively SC(j)) to the * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1 * and JA <= j <= JA+N-1. * * AMAX (global input) REAL * Absolute value of the largest distributed submatrix entry. * * EQUED (output) CHARACTER*1 * Specifies whether or not equilibration was done. * = 'N': No equilibration. * = 'Y': Equilibration was done, i.e., sub( A ) has been re- * placed by: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if scaling should be done * based on the ratio of the scaling factors. If SCOND < THRESH, * scaling is done. * * LARGE and SMALL are threshold values used to decide if scaling should * be done based on the absolute size of the largest matrix element. * If AMAX > LARGE or AMAX < SMALL, scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, THRESH PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J, $ JB, JJ, JJA, JN, KK, LDA, LL, MYCOL, MYROW, NP, $ NPCOL, NPROW REAL CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PSLAMCH( ICTXT, 'Safe minimum' ) / $ PSLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN * * No equilibration * EQUED = 'N' * ELSE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Replace A by diag(S) * A * diag(S). * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 20 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 10 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE IOFFA = IOFFA + JB*LDA END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 70 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 40 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 30 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE ELSE DO 60 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 50 KK = IIA, II-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 70 CONTINUE * ELSE * * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFF * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 90 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 80 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 100 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 160 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 130 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 120 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE ELSE DO 150 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 140 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 160 CONTINUE * END IF * EQUED = 'Y' * END IF * RETURN * * End of PCLAQSY * END scalapack-1.8.0/SRC/psgehrd.f0000640000175000017500000003536610363532303015563 0ustar muammarmuammar SUBROUTINE PSGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEHRD reduces a real general distributed matrix sub( A ) * to upper Hessenberg form H by an orthogonal similarity transforma- * tion: Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+ILO-2 * and JA+IHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*NB + NB*MAX( IHIP+1, IHLP+INLQ ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * ICOFFA = MOD( JA-1, NB ), IOFF = MOD( IA+ILO-2, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:I) = 0, v(I+1) = 1 and v(IHI+1:N) = 0; v(I+2:IHI) is stored on * exit in A(IA+ILO+I:IA+IHI-1,JA+ILO+I-2), and tau in TAU(JA+ILO+I-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follow- * ing example, with N = 7, ILO = 2 and IHI = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), H denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(JA+ILO+I-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, IB, ICOFFA, ICTXT, IHIP, $ IHLP, IIA, IINFO, ILCOL, ILROW, IMCOL, INLQ, $ IOFF, IPT, IPW, IPY, IROFFA, J, JJ, JJA, JY, $ K, L, LWMIN, MYCOL, MYROW, NB, NPCOL, NPROW, $ NQ REAL EI * .. * .. Local Arrays .. INTEGER DESCY( DLEN_ ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG1L, $ INFOG2L, PCHK1MAT, PSGEMM, PSGEHD2, $ PSLAHRD, PSLARFB, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, NB ) ICOFFA = MOD( JA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ) IOFF = MOD( IA+ILO-2, NB ) ILROW = INDXG2P( IA+ILO-1, NB, MYROW, DESCA( RSRC_ ), $ NPROW ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ) ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWMIN = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = ILO IDUM2( 1 ) = 2 IDUM1( 2 ) = IHI IDUM2( 2 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEHRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set elements JA:JA+ILO-2 and JA+JHI-1:JA+N-2 of TAU to zero. * NQ = NUMROC( JA+N-2, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL INFOG1L( JA+ILO-2, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 10 J = JJA, MIN( JJ, NQ ) TAU( J ) = ZERO 10 CONTINUE * CALL INFOG1L( JA+IHI-1, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 20 J = JJ, NQ TAU( J ) = ZERO 20 CONTINUE * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPT = 1 IPY = IPT + NB * NB IPW = IPY + IHIP * NB CALL DESCSET( DESCY, IHI+IROFFA, NB, NB, NB, IAROW, ILCOL, ICTXT, $ MAX( 1, IHIP ) ) * K = ILO IB = NB - IOFF JY = IOFF + 1 * * Loop over remaining block of columns * DO 30 L = 1, IHI-ILO+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns j:j+ib-1 to Hessenberg form, returning the * matrices V and T of the block reflector H = I - V*T*V' * which performs the reduction, and also the matrix Y = A*V*T * CALL PSLAHRD( IHI, K, IB, A, IA, J, DESCA, TAU, WORK( IPT ), $ WORK( IPY ), 1, JY, DESCY, WORK( IPW ) ) * * Apply the block reflector H to A(ia:ia+ihi-1,j+ib:ja+ihi-1) * from the right, computing A := A - Y * V'. * V(i+ib,ib-1) must be set to 1. * CALL PSELSET2( EI, A, I+IB, J+IB-1, DESCA, ONE ) CALL PSGEMM( 'No transpose', 'Transpose', IHI, IHI-K-IB+1, IB, $ -ONE, WORK( IPY ), 1, JY, DESCY, A, I+IB, J, $ DESCA, ONE, A, IA, J+IB, DESCA ) CALL PSELSET( A, I+IB, J+IB-1, DESCA, EI ) * * Apply the block reflector H to A(i+1:ia+ihi-1,j+ib:ja+n-1) from * the left * CALL PSLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', $ IHI-K, N-K-IB+1, IB, A, I+1, J, DESCA, $ WORK( IPT ), A, I+1, J+IB, DESCA, WORK( IPY ) ) * K = K + IB IB = NB JY = 1 DESCY( CSRC_ ) = MOD( DESCY( CSRC_ ) + 1, NPCOL ) * 30 CONTINUE * * Use unblocked code to reduce the rest of the matrix * CALL PSGEHD2( N, K, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEHRD * END scalapack-1.8.0/SRC/pssyttrd.f0000640000175000017500000012255010363532303016013 0ustar muammarmuammar SUBROUTINE PSSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * * ======= * * PSSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding * process and memory location. * * Let A be a generic term for any 2D block cyclicly distributed * array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- ----------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, * indicating the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to * distribute the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to * distribute the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the * first row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes * of its process row. * The values of LOCp() and LOCq() may be determined via a call to * the ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dim LOCq(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dim LOCq(JA+N-1) * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCq(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) REAL array, dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal workspace * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS * Where: * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PSSYTTRD', 'L', 0, 0, * 0, 0 ) * * NUMROC is a ScaLAPACK tool function; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of * elementary reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of * elementary reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and * vi denotes an element of the vector defining H(i). * * Data storage requirements * ========================= * * PSSYTTRD is not intended to be called directly. All users are * encourage to call PSSYTRD which will then call PSHETTRD if * appropriate. A must be in cyclic format (i.e. MB = NB = 1), * the process grid must be square ( i.e. NPROW = NPCOL ) and * only lower triangular storage is supported. * * Local variables * =============== * * PSSYTTRD uses five local arrays: * WORK ( InV ) dimension ( NP, ANB+1): array V * WORK ( InH ) dimension ( NP, ANB+1): array H * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT * * Arrays V and H are replicated across all processor columns. * Arrays V^T and H^T are replicated across all processor rows. * * WORK ( InVT ), or V^T, is stored as a tall skinny * array ( NQ x ANB-1 ) for efficiency. Since only the lower * triangular portion of A is updated, Av is computed as: * tril(A) * v + v^T * tril(A,-1). This is performed as * two local triangular matrix-vector multiplications (both in * MVR2) followed by a transpose and a sum across the columns. * In the local computation, WORK( InVT ) is used to compute * tril(A) * v and WORK( InV ) is used to compute * v^T * tril(A,-1) * * The following variables are global indices into A: * INDEX: The current global row and column number. * MAXINDEX: The global row and column for the first row and * column in the trailing block of A. * LIIB, LIJB: The first row, column in * * The following variables point into the arrays A, V, H, V^T, H^T: * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T. * LII: local index I: The local row number for row INDEX * LIJ: local index J: The local column number for column INDEX * LIIP1: local index I+1: The local row number for row INDEX+1 * LIJP1: local index J+1: The local col number for col INDEX+1 * LTLI: lower triangular local index I: The local row for the * upper left entry in tril( A(INDEX, INDEX) ) * LTLIP1: lower triangular local index I+1: The local row for the * upper left entry in tril( A(INDEX+1, INDEX+1) ) * * Details: The distinction between LII and LTLI (and between * LIIP1 and LTLIP1) is subtle. Within the current processor * column (i.e. MYCOL .eq. CURCOL) they are the same. However, * on some processors, A( LII, LIJ ) points to an element * above the diagonal, on these processors, LTLI = LII+1. * * The following variables give the number of rows and/or columns * in various matrices: * NP: The number of local rows in A( 1:N, 1:N ) * NQ: The number of local columns in A( 1:N, 1:N ) * NPM0: The number of local rows in A( INDEX:N, INDEX:N ) * NQM0: The number of local columns in A( INDEX:N, INDEX:N ) * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N ) * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N ) * LTNM0: The number of local rows & columns in * tril( A( INDEX:N, INDEX:N ) ) * LTNM1: The number of local rows & columns in * tril( A( INDEX+1:N, INDEX+1:N ) ) * NOTE: LTNM0 == LTNM1 on all processors except the diagonal * processors, i.e. those where MYCOL == MYROW. * * Invariants: * NP = NPM0 + LII - 1 * NQ = NQM0 + LIJ - 1 * NP = NPM1 + LIIP1 - 1 * NQ = NQM1 + LIJP1 - 1 * NP = LTLI + LTNM0 - 1 * NP = LTLIP1 + LTNM1 - 1 * * Temporary variables. The following variables are used within * a few lines after they are set and do hold state from one loop * iteration to the next: * * The matrix A: * The matrix A does not hold the same values that it would * in an unblocked code nor the values that it would hold in * in a blocked code. * * The value of A is confusing. It is easiest to state the * difference between trueA and A at the point that MVR2 is called, * so we will start there. * * Let trueA be the value that A would * have at a given point in an unblocked code and A * be the value that A has in this code at the same point. * * At the time of the call to MVR2, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ). * * At the bottom of the inner loop, * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) and * v = V( liip1:N, BINDEX+1 ) and * h = H( liip1:N, BINDEX+1 ) * * At the top of the loop, BINDEX gets incremented, hence: * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and * V = V( MAXINDEX:N, 1:BINDEX-1 ) and * v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) * * * A gets updated at the bottom of the outer loop * After this update, trueA = A + v' * h + h' * v * where v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) and BINDEX = 0 * Indeed, the previous loop invariant as stated above for the * top of the loop still holds, but with BINDEX = 0, H and V * are null matrices. * * After the current column of A is updated, * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N ) * the rest of A is untouched. * * After the current block column of A is updated, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) * * This brings us back to the point at which mvr2 is called. * * * Details of the parallelization: * * We delay spreading v across to all processor columns (which * would naturally happen at the bottom of the loop) in order to * combine the spread of v( : , i-1 ) with the spread of h( : , i ) * * In order to compute h( :, i ), we must update A( :, i ) * which means that the processor column owning A( :, i ) must * have: c, tau, v( i, i ) and h( i, i ). * * The traditional * way of computing v (and the one used in pzlatrd.f and * zlatrd.f) is: * v = tau * v * c = v' * h * alpha = - tau * c / 2 * v = v + alpha * h * However, the traditional way of computing v requires that tau * be broadcast to all processors in the current column (to compute * v = tau * v) and then a sum-to-all is required (to * compute v' * h ). We use the following formula instead: * c = v' * h * v = tau * ( v - c * tau' * h / 2 ) * The above formula allows tau to be spread down in the * same call to SGSUM2D which performs the sum-to-all of c. * * The computation of v, which could be performed in any processor * column (or other procesor subsets), is performed in the * processor column that owns A( :, i+1 ) so that A( :, i+1 ) * can be updated prior to spreading v across. * * We keep the block column of A up-to-date to minimize the * work required in updating the current column of A. Updating * the block column of A is reasonably load balanced whereas * updating the current column of A is not (only the current * processor column is involved). * * In the following overview of the steps performed, M in the * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p)) * or more flops per processor. * * Inner loop: * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) ) *M h = house( A(index:n, index) ) *M Spread v, h across *M vt = v^T; ht = h^T * A( index+1:n, index+1:maxindex ) -= * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) ) *C v = tril(A) * h; vt = ht * tril(A,-1) *MorC v = v - H*V*h - V*H*h *M v = v + vt^T *M c = v' * h * v = tau * ( v - c * tau' * h / 2 ) *C A = A - H*V - V*H * * * * ================================================================= * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E0 ) REAL Z_ONE, Z_NEGONE, Z_ZERO PARAMETER ( Z_ONE = 1.0E0, Z_NEGONE = -1.0E0, $ Z_ZERO = 0.0E0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * * * .. Local Scalars .. * * LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX, $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT, $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA, $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1, $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX, $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP, $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ, $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX, $ PBMIN, PBSIZE, PNB, ROWSPERPROC REAL ALPHA, BETA, C, NORM, ONEOVERBETA, SAFMAX, $ SAFMIN, TOPH, TOPNV, TOPTAU, TOPV, TTOPH, TTOPV * .. * .. Local Arrays .. * * * * INTEGER IDUM1( 1 ), IDUM2( 1 ) REAL CC( 3 ), DTMP( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSTREECOMB, $ PXERBLA, SCOMBNRM2, SGEBR2D, SGEBS2D, SGEMM, $ SGEMV, SGERV2D, SGESD2D, SGSUM2D, SLACPY, $ SSCAL, STRMVT * .. * .. External Functions .. * LOGICAL LSAME INTEGER ICEIL, NUMROC, PJLAENV REAL PSLAMCH, SNRM2 EXTERNAL LSAME, ICEIL, NUMROC, PJLAENV, PSLAMCH, SNRM2 * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL, SIGN, SQRT * .. * * * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * * Further details * =============== * * At the top of the loop, v and nh have been computed but not * spread across. Hence, A is out-of-date even after the * rank 2k update. Furthermore, we compute the next v before * nh is spread across. * * I claim that if we used a sum-to-all on NV, by summing CC within * each column, that we could compute NV locally and could avoid * spreading V across. Bruce claims that sum-to-all can be made * to cost no more than sum-to-one on the Paragon. If that is * true, this would be a win. But, * the BLACS sum-to-all is just a sum-to-one followed by a broadcast, * and hence the present scheme is better for now. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * SAFMAX = SQRT( PSLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PSLAMCH( ICTXT, 'S' ) ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE * * Here we set execution options for PSSYTTRD * PNB = PJLAENV( ICTXT, 2, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) * INTERLEAVE = ( PJLAENV( ICTXT, 4, 'PSSYTTRD', 'L', 1, 0, 0, $ 0 ).EQ.1 ) TWOGEMMS = ( PJLAENV( ICTXT, 4, 'PSSYTTRD', 'L', 2, 0, 0, $ 0 ).EQ.1 ) BALANCED = ( PJLAENV( ICTXT, 4, 'PSSYTTRD', 'L', 3, 0, 0, $ 0 ).EQ.1 ) * CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) * * UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 .AND. DESCA( NB_ ).NE.1 ) $ INFO = 600 + NB_ IF( INFO.EQ.0 ) THEN * * * Here is the arithmetic: * Let maxnpq = max( np, nq, 2 * ANB ) * LDV = 4 * max( np, nq ) + 2 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB ) * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS * * This overestimates memory requirements when ANB > NP/2 * Memory requirements are lower when interleave = .false. * Hence, we could have two sets of memory requirements, * one for interleave and one for * * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORK( 1 ) = REAL( LWMIN ) IF( .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( NPROW.NE.NPCOL ) THEN INFO = -( 600+CTXT_ ) ELSE IF( DESCA( DTYPE_ ).NE.1 ) THEN INFO = -( 600+DTYPE_ ) ELSE IF( DESCA( MB_ ).NE.1 ) THEN INFO = -( 600+MB_ ) ELSE IF( DESCA( NB_ ).NE.1 ) THEN INFO = -( 600+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.0 ) THEN INFO = -( 600+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.0 ) THEN INFO = -( 600+CSRC_ ) ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 * CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYTTRD', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * * Reduce the lower triangle of sub( A ) NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * NXTROW = 0 NXTCOL = 0 * LIIP1 = 1 LIJP1 = 1 NPM1 = NP NQM1 = NQ * LDA = DESCA( LLD_ ) ICTXT = DESCA( CTXT_ ) * * * * Miscellaneous details: * Put tau, D and E in the right places * Check signs * Place all the arrays in WORK, control their placement * in memory. * * * * Loop invariants * A(LIIP1, LIJ) points to the first element of A(I+1,J) * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N ) * A(LII:N,LIJ:N) is one step out of date. * proc( CURROW, CURCOL ) owns A(LII,LIJ) * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ) * INH = 1 * IF( INTERLEAVE ) THEN * * H and V are interleaved to minimize memory movement * LDV has to be twice as large to accomodate interleaving. * In addition, LDV is doubled again to allow v, h and * toptau to be spreaad across and transposed in a * single communication operation with minimum memory * movement. * * We could reduce LDV back to 2*MAX(NPM1,NQM1) * by increasing the memory movement required in * the spread and transpose of v, h and toptau. * However, since the non-interleaved path already * provides a mear minimum memory requirement option, * we did not provide this additional path. * LDV = 4*( MAX( NPM1, NQM1 ) ) + 2 * INH = 1 * INV = INH + LDV / 2 INVT = INH + ( ANB+1 )*LDV * INHT = INVT + LDV / 2 INTMP = INVT + LDV*( ANB+1 ) * ELSE LDV = MAX( NPM1, NQM1 ) * INHT = INH + LDV*( ANB+1 ) INV = INHT + LDV*( ANB+1 ) * * The code works without this +1, but only because of a * coincidence. Without the +1, WORK(INVT) gets trashed, but * WORK(INVT) is only used once and when it is used, it is * multiplied by WORK( INH ) which is zero. Hence, the fact * that WORK(INVT) is trashed has no effect. * INVT = INV + LDV*( ANB+1 ) + 1 INTMP = INVT + LDV*( 2*ANB ) * END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYTTRD', -INFO ) WORK( 1 ) = REAL( LWMIN ) RETURN END IF * * * The satisfies the loop invariant: trueA = A - V * HT - H * VT, * (where V, H, VT and HT all have BINDEX+1 rows/columns) * the first ANB times through the loop. * * * * Setting either ( InH and InHT ) or InV to Z_ZERO * is adequate except in the face of NaNs. * * DO 10 I = 1, NP WORK( INH+I-1 ) = Z_ZERO WORK( INV+I-1 ) = Z_ZERO 10 CONTINUE DO 20 I = 1, NQ WORK( INHT+I-1 ) = Z_ZERO 20 CONTINUE * * * TOPNV = Z_ZERO * LTLIP1 = LIJP1 LTNM1 = NPM1 IF( MYCOL.GT.MYROW ) THEN LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * DO 210 MININDEX = 1, N - 1, ANB * * MAXINDEX = MIN( MININDEX+ANB-1, N ) LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1 * NQB = NQ - LIJB + 1 NPB = NP - LIIB + 1 INHTB = INHT + LIJB - 1 INVTB = INVT + LIJB - 1 INHB = INH + LIIB - 1 INVB = INV + LIIB - 1 * * * * DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 ) * BINDEX = INDEX - MININDEX * CURROW = NXTROW CURCOL = NXTCOL * NXTROW = MOD( CURROW+1, NPROW ) NXTCOL = MOD( CURCOL+1, NPCOL ) * LII = LIIP1 LIJ = LIJP1 NPM0 = NPM1 * IF( MYROW.EQ.CURROW ) THEN NPM1 = NPM1 - 1 LIIP1 = LIIP1 + 1 END IF IF( MYCOL.EQ.CURCOL ) THEN NQM1 = NQM1 - 1 LIJP1 = LIJP1 + 1 LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * * * * V = NV, VT = NVT, H = NH, HT = NHT * * * Update the current column of A * * IF( MYCOL.EQ.CURCOL ) THEN * INDEXA = LII + ( LIJ-1 )*LDA INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV TTOPH = WORK( INHT+LIJ-1+BINDEX*LDV ) TTOPV = TOPNV * IF( INDEX.GT.1 ) THEN DO 30 I = 0, NPM0 - 1 * A( INDEXA+I ) = A( INDEXA+I ) A( INDEXA+I ) = A( INDEXA+I ) - $ WORK( INDEXINV+LDV+I )*TTOPH - $ WORK( INDEXINH+LDV+I )*TTOPV 30 CONTINUE END IF * * END IF * * IF( MYCOL.EQ.CURCOL ) THEN * * Compute the householder vector * IF( MYROW.EQ.CURROW ) THEN DTMP( 2 ) = A( LII+( LIJ-1 )*LDA ) ELSE DTMP( 2 ) = ZERO END IF IF( MYROW.EQ.NXTROW ) THEN DTMP( 3 ) = A( LIIP1+( LIJ-1 )*LDA ) DTMP( 4 ) = ZERO ELSE DTMP( 3 ) = ZERO DTMP( 4 ) = ZERO END IF * NORM = SNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 ) DTMP( 1 ) = NORM * * IF DTMP(5) = 1.0, NORM is too large and might cause * overflow, hence PSTREECOMB must be called. IF DTMP(5) * is zero on output, DTMP(1) can be trusted. * DTMP( 5 ) = ZERO IF( DTMP( 1 ).GE.SAFMAX .OR. DTMP( 1 ).LT.SAFMIN ) THEN DTMP( 5 ) = ONE DTMP( 1 ) = ZERO END IF * DTMP( 1 ) = DTMP( 1 )*DTMP( 1 ) CALL SGSUM2D( ICTXT, 'C', ' ', 5, 1, DTMP, 5, -1, $ CURCOL ) IF( DTMP( 5 ).EQ.ZERO ) THEN DTMP( 1 ) = SQRT( DTMP( 1 ) ) ELSE DTMP( 1 ) = NORM CALL PSTREECOMB( ICTXT, 'C', 1, DTMP, -1, MYCOL, $ SCOMBNRM2 ) END IF * NORM = DTMP( 1 ) * D( LIJ ) = DTMP( 2 ) IF( MYROW.EQ.CURROW .AND. MYCOL.EQ.CURCOL ) THEN A( LII+( LIJ-1 )*LDA ) = D( LIJ ) END IF * * ALPHA = DTMP( 3 ) * NORM = SIGN( NORM, ALPHA ) * IF( NORM.EQ.ZERO ) THEN TOPTAU = ZERO ELSE BETA = NORM + ALPHA TOPTAU = BETA / NORM ONEOVERBETA = 1.0E0 / BETA * CALL SSCAL( NPM1, ONEOVERBETA, $ A( LIIP1+( LIJ-1 )*LDA ), 1 ) END IF * IF( MYROW.EQ.NXTROW ) THEN A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE END IF * TAU( LIJ ) = TOPTAU E( LIJ ) = -NORM * END IF * * * Spread v, nh, toptau across * DO 40 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+ $ ( LIJ-1 )*LDA ) 40 CONTINUE * IF( MYCOL.EQ.CURCOL ) THEN WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU CALL SGEBS2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1 ) ELSE CALL SGEBR2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1, MYROW, CURCOL ) TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) END IF DO 50 I = 0, NPM1 - 1 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+BINDEX*LDV+NPM1+I ) 50 CONTINUE * IF( INDEX.LT.N ) THEN IF( MYROW.EQ.NXTROW .AND. MYCOL.EQ.CURCOL ) $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ ) END IF * * Transpose v, nh * * IF( MYROW.EQ.MYCOL ) THEN DO 60 I = 0, NPM1 + NPM1 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+ $ BINDEX*LDV+I ) 60 CONTINUE ELSE CALL SGESD2D( ICTXT, NPM1+NPM1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1, $ MYCOL, MYROW ) CALL SGERV2D( ICTXT, NQM1+NQM1, 1, $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1, $ MYCOL, MYROW ) END IF * DO 70 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+ $ LIJP1-1+BINDEX*LDV+NQM1+I ) 70 CONTINUE * * * Update the current block column of A * IF( INDEX.GT.1 ) THEN DO 90 J = LIJP1, LIJB - 1 DO 80 I = 0, NPM1 - 1 * A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA ) $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )* $ WORK( INHT+J-1+BINDEX*LDV ) - $ WORK( INH+LIIP1-1+BINDEX*LDV+I )* $ WORK( INVT+J-1+BINDEX*LDV ) 80 CONTINUE 90 CONTINUE END IF * * * * Compute NV = A * NHT; NVT = A * NH * * These two lines are necessary because these elements * are not always involved in the calls to STRMVT * for two reasons: * 1) On diagonal processors, the call to TRMVT * involves only LTNM1-1 elements * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1 * and when the results are combined across all processes, * uninitialized values may be included. WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO * * IF( MYROW.EQ.MYCOL ) THEN IF( LTNM1.GT.1 ) THEN CALL STRMVT( 'L', LTNM1-1, $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1, $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ), $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+ $ 1 )*LDV ), 1 ) END IF DO 100 I = 1, LTNM1 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) + $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )* $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV ) 100 CONTINUE ELSE IF( LTNM1.GT.0 ) $ CALL STRMVT( 'L', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ), $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+ $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+ $ ( BINDEX+1 )*LDV ), 1, $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ), $ 1 ) * END IF * * * We take advantage of the fact that: * A * sum( B ) = sum ( A * B ) for matrices A,B * * trueA = A + V * HT + H * VT * hence: (trueA)v = Av' + V * HT * v + H * VT * v * VT * v = sum_p_in_NPROW ( VTp * v ) * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v ) * * v = v + V * HT * h + H * VT * h * * * * tmp = HT * nh1 DO 110 I = 1, 2*( BINDEX+1 ) WORK( INTMP-1+I ) = 0 110 CONTINUE * IF( BALANCED ) THEN NPSET = NPROW MYSETNUM = MYROW ROWSPERPROC = ICEIL( NQB, NPSET ) MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 ) * * * tmp = HT * v * CALL SGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INHTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL SGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INVTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * * CALL SGSUM2D( ICTXT, 'C', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) ELSE * tmp = HT * v * CALL SGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INHTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL SGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INVTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * END IF * * * IF( BALANCED ) THEN MYSETNUM = MYCOL * ROWSPERPROC = ICEIL( NPB, NPSET ) MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 ) * CALL SGSUM2D( ICTXT, 'R', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) * * * v = v + V * tmp IF( INDEX.GT.1. ) THEN CALL SGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INVB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) * * v = v + H * tmp2 CALL SGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INHB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) END IF * ELSE * v = v + V * tmp CALL SGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ), $ LDV, WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * * * v = v + H * tmp2 CALL SGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ), $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * END IF * * * Transpose NV and add it back into NVT * IF( MYROW.EQ.MYCOL ) THEN DO 120 I = 0, NQM1 - 1 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+ $ I ) 120 CONTINUE ELSE CALL SGESD2D( ICTXT, NQM1, 1, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), $ NQM1, MYCOL, MYROW ) CALL SGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL, $ MYROW ) * END IF DO 130 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I ) 130 CONTINUE * * Sum-to-one NV rowwise (within a row) * CALL SGSUM2D( ICTXT, 'R', ' ', NPM1, 1, $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1, $ MYROW, NXTCOL ) * * * Dot product c = NV * NH * Sum-to-all c within next processor column * * IF( MYCOL.EQ.NXTCOL ) THEN CC( 1 ) = Z_ZERO DO 140 I = 0, NPM1 - 1 CC( 1 ) = CC( 1 ) + WORK( INV+LIIP1-1+( BINDEX+1 )* $ LDV+I )*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+ $ I ) 140 CONTINUE IF( MYROW.EQ.NXTROW ) THEN CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV ) ELSE CC( 2 ) = Z_ZERO CC( 3 ) = Z_ZERO END IF CALL SGSUM2D( ICTXT, 'C', ' ', 3, 1, CC, 3, -1, NXTCOL ) * TOPV = CC( 2 ) C = CC( 1 ) TOPH = CC( 3 ) * TOPNV = TOPTAU*( TOPV-C*TOPTAU / 2*TOPH ) * * * Compute V = Tau * (V - C * Tau' / 2 * H ) * * DO 150 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU* $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C*TOPTAU / $ 2*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) ) 150 CONTINUE * END IF * * 160 CONTINUE * * * Perform the rank2k update * IF( MAXINDEX.LT.N ) THEN * DO 170 I = 0, NPM1 - 1 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I ) 170 CONTINUE * * * IF( .NOT.TWOGEMMS ) THEN IF( INTERLEAVE ) THEN LDZG = LDV / 2 ELSE CALL SLACPY( 'A', LTNM1, ANB, WORK( INHT+LIJP1-1 ), $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV ) * CALL SLACPY( 'A', LTNM1, ANB, WORK( INV+LTLIP1-1 ), $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV ) LDZG = LDV END IF NBZG = ANB*2 ELSE LDZG = LDV NBZG = ANB END IF * * DO 180 PBMIN = 1, LTNM1, PNB * PBSIZE = MIN( PNB, LTNM1-PBMIN+1 ) PBMAX = MIN( LTNM1, PBMIN+PNB-1 ) CALL SGEMM( 'N', 'C', PBSIZE, PBMAX, NBZG, Z_NEGONE, $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) IF( TWOGEMMS ) THEN CALL SGEMM( 'N', 'C', PBSIZE, PBMAX, ANB, Z_NEGONE, $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) END IF 180 CONTINUE * * * DO 190 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I ) WORK( INH+LIIP1-1+I ) = WORK( INTMP+I ) 190 CONTINUE DO 200 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I ) 200 CONTINUE * * END IF * * End of the update A code * 210 CONTINUE * IF( MYCOL.EQ.NXTCOL ) THEN IF( MYROW.EQ.NXTROW ) THEN * D( NQ ) = A( NP+( NQ-1 )*LDA ) * CALL SGEBS2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1 ) ELSE CALL SGEBR2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1, NXTROW, $ NXTCOL ) END IF END IF * * * * WORK( 1 ) = REAL( LWMIN ) RETURN * * End of PSSYTTRD * * END scalapack-1.8.0/SRC/pdorm2r.f0000640000175000017500000004063010363532303015502 0ustar muammarmuammar SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORM2R overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PDGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PDGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 DOUBLE PRECISION AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DGEBR2D, $ DGEBS2D, DGERV2D, DGESD2D, DSCAL, $ INFOG2L, PDELSET, PDELSET2, PDLARF, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL DSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL DSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL DGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL DGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL DSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * DO 10 J = J1, J2, J3 IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H(j) or H(j)' * CALL PDELSET2( AJJ, A, IA+J-JA, J, DESCA, ONE ) CALL PDLARF( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, C, $ ICC, JCC, DESCC, WORK ) CALL PDELSET( A, IA+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORM2R * END scalapack-1.8.0/SRC/pspbtrsv.f0000640000175000017500000014075710363532303016013 0ustar muammarmuammar SUBROUTINE PSPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, $ IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Cholesky factorization code PSPBTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PSPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPBTRF and this is stored in AF. If a linear system * is to be solved using PSPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MBW2, MYCOL, MYROW, MY_NUM_COLS, NB, NP, $ NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE, SGEMM, SGERV2D, $ SGESD2D, SLACPY, SMATADD, STBTRS, STRMM, STRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PSPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PSPBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = BW*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PSPBTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BW PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 180 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW END IF * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'N', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB, $ WORK( 1 ), BW ) * CALL STRMM( 'L', 'U', 'N', 'N', BW, NRHS, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )*LLDA ) ), $ LLDA-1, WORK( 1 ), BW ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'T', 'N', BW, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+BW-BW ), BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STRTRS( 'L', 'N', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ), $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STRTRS( 'L', 'T', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+BW-BW ), BW ) * CALL STRMM( 'L', 'U', 'T', 'N', BW, NRHS, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )*LLDA ) ), $ LLDA-1, WORK( 1+BW-BW ), BW ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'T', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'T', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB, $ WORK( 1 ), BW ) * CALL STRMM( 'L', 'L', 'T', 'N', BW, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1 ), BW ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'T', 'N', BW, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+BW-BW ), BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 110 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 90 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 100 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 90 100 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STRTRS( 'L', 'N', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 110 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 160 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 120 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 130 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 120 130 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ), $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STRTRS( 'L', 'T', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 140 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 140 150 CONTINUE * [End of GOTO Loop] * 160 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL SLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+BW-BW ), BW ) * CALL STRMM( 'L', 'L', 'N', 'N', BW, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL SMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL STBTRS( UPLO, 'N', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 170 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 180 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSPBTRSV * END scalapack-1.8.0/SRC/pslarzt.f0000640000175000017500000002564010363532303015620 0ustar muammarmuammar SUBROUTINE PSLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) REAL TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARZT forms the triangular factor T of a real block reflector * H of order > n, which is defined as a product of k elementary * reflectors as returned by PSTZRZF. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the array V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the array V, and * * H = I - V' * T * V * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise (not supported yet) * = 'R': rowwise * * N (global input) INTEGER * The number of meaningful entries of the block reflector H. * N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) REAL pointer into the local memory * to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)). * The distributed matrix V contains the Householder vectors. * See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) REAL, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) REAL array, dimension (MB_V,MB_V) * It contains the k-by-k triangular factor of the block * reflector associated with V. T is lower triangular. * * WORK (local workspace) REAL array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * ______V_____ * ( v1 v2 v3 ) / \ * ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) * V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) * ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) * ( v1 v2 v3 ) * . . . * . . . * 1 . . * 1 . * 1 * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * ______V_____ * 1 / \ * . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) * . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) * . . . ( . . 1 . . v3 v3 v3 v3 v3 ) * . . . * ( v1 v2 v3 ) * ( v1 v2 v3 ) * V = ( v1 v2 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJV, LDV, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, INFOG2L, PXERBLA, $ SCOPY, SGEMV, SGSUM2D, SLASET, $ STRMV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( MYROW.EQ.IVROW ) THEN IW = 1 ITMP0 = 0 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF * DO 10 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ.GT.0 ) THEN CALL SGEMV( 'No transpose', ITMP0, NQ, -TAU( II ), $ V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, WORK( IW ), $ 1 ) ELSE CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF IW = IW + ITMP0 * 10 CONTINUE * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 20 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL STRMV( 'Lower', 'No transpose', 'Non-unit', ITMP0, $ T( ITMP1+DESCV( MB_ ) ), DESCV( MB_ ), $ T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 20 CONTINUE * END IF * END IF * RETURN * * End of PSLARZT * END scalapack-1.8.0/SRC/pzlaqsy.f0000640000175000017500000003212110363532303015614 0ustar muammarmuammar SUBROUTINE PZLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N DOUBLE PRECISION AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION SC( * ), SR( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAQSY equilibrates a symmetric distributed matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the * vectors SR and SC. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is to be referenced: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (input/output) COMPLEX*16 pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the distributed symmetric * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * part of the matrix, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular part of the matrix, and the strictly upper trian- * gular part of sub( A ) is not referenced. * On exit, if EQUED = 'Y', the equilibrated matrix: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local input) DOUBLE PRECISION array, dimension LOCr(M_A) * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned * with the distributed matrix A, and replicated across every * process column. SR is tied to the distributed matrix A. * * SC (local input) DOUBLE PRECISION array, dimension LOCc(N_A) * The scale factors for sub( A ). SC is aligned with the dis- * tributed matrix A, and replicated down every process row. * SC is tied to the distributed matrix A. * * SCOND (global input) DOUBLE PRECISION * Ratio of the smallest SR(i) (respectively SC(j)) to the * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1 * and JA <= j <= JA+N-1. * * AMAX (global input) DOUBLE PRECISION * Absolute value of the largest distributed submatrix entry. * * EQUED (output) CHARACTER*1 * Specifies whether or not equilibration was done. * = 'N': No equilibration. * = 'Y': Equilibration was done, i.e., sub( A ) has been re- * placed by: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if scaling should be done * based on the ratio of the scaling factors. If SCOND < THRESH, * scaling is done. * * LARGE and SMALL are threshold values used to decide if scaling should * be done based on the absolute size of the largest matrix element. * If AMAX > LARGE or AMAX < SMALL, scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, THRESH PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J, $ JB, JJ, JJA, JN, KK, LDA, LL, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PDLAMCH( ICTXT, 'Safe minimum' ) / $ PDLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN * * No equilibration * EQUED = 'N' * ELSE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Replace A by diag(S) * A * diag(S). * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 20 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 10 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE IOFFA = IOFFA + JB*LDA END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 70 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 40 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 30 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE ELSE DO 60 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 50 KK = IIA, II-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 70 CONTINUE * ELSE * * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFF * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 90 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 80 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 100 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 160 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 130 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 120 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE ELSE DO 150 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 140 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 160 CONTINUE * END IF * EQUED = 'Y' * END IF * RETURN * * End of PZLAQSY * END scalapack-1.8.0/SRC/pslarfg.f0000640000175000017500000002323710363532303015557 0ustar muammarmuammar SUBROUTINE PSLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N REAL ALPHA * .. * .. Array Arguments .. INTEGER DESCX( * ) REAL TAU( * ), X( * ) * .. * * Purpose * ======= * * PSLARFG generates a real elementary reflector H of order n, such * that * * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I. * ( x ) ( 0 ) * * where alpha is a scalar, and sub( X ) is an (N-1)-element real * distributed vector X(IX:IX+N-2,JX) if INCX = 1 and X(IX,JX:JX+N-2) if * INCX = DESCX(M_). H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v' ) , * ( v ) * * where tau is a real scalar and v is a real (N-1)-element * vector. * * If the elements of sub( X ) are all zero, then tau = 0 and H is * taken to be the unit matrix. * * Otherwise 1 <= tau <= 2. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The global order of the elementary reflector. N >= 0. * * ALPHA (local output) REAL * On exit, alpha is computed in the process scope having the * vector sub( X ). * * IAX (global input) INTEGER * The global row index in X of X(IAX,JAX). * * JAX (global input) INTEGER * The global column index in X of X(IAX,JAX). * * X (local input/local output) REAL, pointer into the * local memory to an array of dimension (LLD_X,*). This array * contains the local pieces of the distributed vector sub( X ). * Before entry, the incremented array sub( X ) must contain * the vector x. On exit, it is overwritten with the vector v. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * TAU (local output) REAL, array, dimension LOCc(JX) * if INCX = 1, and LOCr(IX) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX, $ KNT, MYCOL, MYROW, NPCOL, NPROW REAL BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSNRM2, SGEBR2D, $ SGEBS2D, PSSCAL * .. * .. External Functions .. REAL SLAMCH, SLAPY2 EXTERNAL SLAMCH, SLAPY2 * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is distributed across a process row. * CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYROW.NE.IXROW ) $ RETURN * * Broadcast X(IAX,JAX) across the process row. * IF( MYCOL.EQ.IXCOL ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1, $ MYROW, IXCOL ) END IF * INDXTAU = IIAX * ELSE * * sub( X ) is distributed across a process column. * CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYCOL.NE.IXCOL ) $ RETURN * * Broadcast X(IAX,JAX) across the process column. * IF( MYROW.EQ.IXROW ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, ALPHA, 1, $ IXROW, MYCOL ) END IF * INDXTAU = JJAX * END IF * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO RETURN END IF * CALL PSNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) * IF( XNORM.EQ.ZERO ) THEN * * H = I * TAU( INDXTAU ) = ZERO * ELSE * * General case * BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) SAFMIN = SLAMCH( 'S' ) RSAFMN = ONE / SAFMIN IF( ABS( BETA ).LT.SAFMIN ) THEN * * XNORM, BETA may be inaccurate; scale X and recompute them * KNT = 0 10 CONTINUE KNT = KNT + 1 CALL PSSCAL( N-1, RSAFMN, X, IX, JX, DESCX, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN * CALL PSNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) TAU( INDXTAU ) = ( BETA-ALPHA ) / BETA CALL PSSCAL( N-1, ONE/(ALPHA-BETA), X, IX, JX, DESCX, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * ALPHA = BETA DO 20 J = 1, KNT ALPHA = ALPHA*SAFMIN 20 CONTINUE ELSE TAU( INDXTAU ) = ( BETA-ALPHA ) / BETA CALL PSSCAL( N-1, ONE/(ALPHA-BETA), X, IX, JX, DESCX, INCX ) ALPHA = BETA END IF END IF * RETURN * * End of PSLARFG * END scalapack-1.8.0/SRC/pdposv.f0000640000175000017500000002434110363532303015431 0ustar muammarmuammar SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDPOSV computes the solution to a real system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is an N-by-N * symmetric distributed positive definite matrix and X and sub( B ) * denoting B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS distributed * matrices. * * The Cholesky decomposition is used to factor sub( A ) as * * sub( A ) = U**T * U, if UPLO = 'U', or * * sub( A ) = L * L**T, if UPLO = 'L', * * where U is an upper triangular matrix and L is a lower triangular * matrix. The factored form of sub( A ) is then used to solve the * system of equations. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if INFO = 0, this array contains the * local pieces of the factor U or L from the Cholesky factori- * zation sub( A ) = U**T*U or L*L**T. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_B,LOC(JB+NRHS-1)). * On entry, the local pieces of the right hand sides distribu- * ted matrix sub( B ). On exit, if INFO = 0, sub( B ) is over- * written with the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed, and the * solution has not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDPOTRF, $ PDPOTRS, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1000+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSV', -INFO ) RETURN END IF * * Compute the Cholesky factorization sub( A ) = U'*U or L*L'. * CALL PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ) overwriting sub( B ) * with X. * CALL PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * END IF * RETURN * * End of PDPOSV * END scalapack-1.8.0/SRC/pspocon.f0000640000175000017500000003627410363532303015607 0ustar muammarmuammar SUBROUTINE PSPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LIWORK, LWORK, N REAL ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a real symmetric positive definite distributed matrix * using the Cholesky factorization A = U**T*U or A = L*L**T computed by * PSPOTRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the factor stored in * A(IA:IA+N-1,JA:JA+N-1) is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) REAL pointer into the local memory to * an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, this * array contains the local pieces of the factors L or U from * the Cholesky factorization A(IA:IA+N-1,JA:JA+N-1) = U'*U or * L*L', as computed by PSPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) REAL * The 1-norm (or infinity-norm) of the symmetric distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + 2*LOCc(N+MOD(JA-1,NB_A))+ * MAX( 2, MAX(NB_A*CEIL(NPROW-1,NPCOL),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(NPCOL-1,NPROW)) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, $ JX, KASE, LIWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPROW, NPMOD, NQ, NQMOD REAL AINVNM, SCALE, SL, SU, SMLNUM REAL WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK1MAT, PSAMAX, PSLATRS, PSLACON, $ PSRSCL, PB_TOPGET, PB_TOPSET, PXERBLA, SGEBR2D, $ SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + 2*NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN IWORK( 1 ) = LIWMIN INFO = -12 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LIWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPNL = IPV + NP IPNU = IPNL + NQ IPW = IPNU + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the 1-norm (or I-norm) of inv(A). * AINVNM = ZERO KASE = 0 NORMIN = 'N' * 10 CONTINUE CALL PSLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( UPPER ) THEN * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Multiply by 1/SCALE if doing so will not cause overflow. * SCALE = SL*SU IF( SCALE.NE.ONE ) THEN CALL PSAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL SGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PSPOCON * END scalapack-1.8.0/SRC/pdlasmsub.f0000640000175000017500000003123310602576752016122 0ustar muammarmuammar SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER I, K, L, LWORK DOUBLE PRECISION SMLNUM * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), BUF( * ) * .. * * Purpose * ======= * * PDLASMSUB looks for a small subdiagonal element from the bottom * of the matrix that it can safely set to zero. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) DOUBLE PRECISION array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * K (global output) INTEGER * On exit, this yields the bottom portion of the unreduced * submatrix. This will satisfy: L <= M <= I-1. * * SMLNUM (global input) DOUBLE PRECISION * On entry, a "small number" for the given matrix. * Unchanged on exit. * * BUF (local output) DOUBLE PRECISION array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * This code is basically a parallelization of the following snip * of LAPACK code from DLAHQR: * * Look for a single small subdiagonal element. * * DO 20 K = I, L + 1, -1 * TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) * IF( TST1.EQ.ZERO ) * $ TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) * IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) * $ GO TO 30 * 20 CONTINUE * 30 CONTINUE * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2, $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC, $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA, $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, $ RIGHT, UP DOUBLE PRECISION H10, H11, H22, TST1, ULP * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGERV2D, DGESD2D, IGAMX2D, $ INFOG1L, INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS * ISTR1 = 0 ISTR2 = ( ( I-L ) / HBL ) IF( ISTR2*HBL.LT.( I-L ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.2*ISTR2 ) THEN * * Error! * RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY * UP THE TRIDIAGONAL * IBUF1 = 0 IBUF2 = 0 IRCV1 = 0 IRCV2 = 0 DO 10 K = I, L + 1, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * ADD UP THE RECEIVES * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT. $ 1 ) ) ) THEN * * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN * * WE MUST RECEIVE H(K ,K-1) FROM LEFT * IRCV2 = IRCV2 + 1 END IF END IF * * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND * IF( IBUF1.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW, $ RIGHT ) END IF * * RECEIVE APPROPRIATE DATA IF THERE IS ANY * IF( IRCV1.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW, $ LEFT ) END IF * * START MAIN LOOP * IBUF1 = 0 IBUF2 = 0 CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT. * * Start loop for subdiagonal search * DO 40 K = I, L + 1, -1 IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN * * Grab information from WORK array * IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H11 = BUF( ISTR1+IBUF1 ) ELSE H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF2 = IBUF2 + 1 H10 = BUF( ISTR2+IBUF2 ) ELSE H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF ELSE * * Information is local * H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF H22 = A( ( ICOL1-1 )*LDA+IROW1 ) TST1 = ABS( H11 ) + ABS( H22 ) IF( TST1.EQ.ZERO ) THEN * * FIND SOME NORM OF THE LOCAL H(L:I,L:I) * CALL INFOG1L( L, HBL, NPROW, MYROW, 0, ITMP1, III ) IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW ) CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ITMP2, III ) ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL ) DO 30 III = ITMP1, IROW2 DO 20 JJJ = ITMP2, ICOL2 TST1 = TST1 + ABS( A( ( JJJ-1 )*LDA+III ) ) 20 CONTINUE 30 CONTINUE END IF IF( ABS( H10 ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 50 IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN II = MOD( II+NPROW-1, NPROW ) JJ = MOD( JJ+NPCOL-1, NPCOL ) CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) END IF 40 CONTINUE 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) RETURN * * End of PDLASMSUB * END scalapack-1.8.0/SRC/pzdotc.c0000640000175000017500000005207110363532303015417 0ustar muammarmuammar/* --------------------------------------------------------------------- * * Mark R. Fahey * August 2000 * This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0 * which fixes a bug in the incx=1 and incy=1 case. * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pzdotc_( n, dotc, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy ) /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; complex16 * dotc; /* .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex16 X[], Y[]; { /* * Purpose * ======= * * PZDOTC forms the dot product of two distributed vectors, * * dotc := sub( X )**H * sub( Y ) * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * * Parameters * ========== * * N (global input) pointer to INTEGER * The length of the distributed vectors to be multiplied. * N >= 0. * * DOTC (local output) pointer to COMPLEX*16 * The dot product of sub( X ) and sub( Y ) only in their scope. * * X (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * ===================================================================== * * .. Local Scalars .. */ char * cbtop, * cctop, * rbtop, * rctop; int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, mone=-1, mycol, myrow, nn, np, np0, nprow, npcol, nq, nz, ione=1, tmp1, wksz; complex16 xwork[1], ywork[1], zero; /* .. * .. PBLAS Buffer .. */ complex16 * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void zgebr2d_(); void zgebs2d_(); void zgerv2d_(); void zgesd2d_(); void zgsum2d_(); void pbchkvect(); void pberror_(); char * getpbbuf(); char * ptop(); F_VOID_FCT pbztrnv_(); F_VOID_FCT zzdotc_(); F_INTG_FCT ilcm_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(600+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -10; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -9; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1100+MB_+1); } else { info = -12; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -10; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1100+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -9; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1100+MB_+1); } else { info = -12; } } else { info = -7; } } if( ictxt != desc_Y[CTXT_] ) info = -(1100+CTXT_+1); } } if( info ) { pberror_( &ictxt, "PZDOTC", &info ); return; } /* * Quick return if possible. */ dotc->re = ZERO; dotc->im = ZERO; zero.re = ZERO; zero.im = ZERO; if( *n == 0 ) return; /* * dot <- x^{h} * y */ if( *n == 1 ) { if( ( myrow == ixrow ) && ( mycol == ixcol ) ) { buff = &X[iix-1+(jjx-1)*desc_X[LLD_]]; if( ( myrow != iyrow ) || ( mycol != iycol ) ) { zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol ); zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol ); } else *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]]; zzdotc_( n, dotc, buff, n, ywork, n ); } else if( ( myrow == iyrow ) && ( mycol == iycol ) ) { zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, &ixrow, &ixcol ); zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol ); zzdotc_( n, dotc, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n ); } if( ( *incx == desc_X[M_] ) && ( desc_X[M_] != 1 ) ) { if( myrow == ixrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == ixcol ) { zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione, &myrow, &ixcol ); } } } else if( ( *incx == 1 ) && ( desc_X[M_] != 1 ) ) { if( mycol == ixcol ) { cbtop = ptop( BROADCAST, COLUMN, TOPGET ); if( myrow == ixrow ) { zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotc, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotc, &ione, &ixrow, &mycol ); } } } if( ( *incy == desc_Y[M_] ) && ( desc_Y[M_] != 1 ) ) { if( myrow == iyrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == iycol ) { zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione, &myrow, &iycol ); } } } else if( ( *incy == 1 ) && ( desc_Y[M_] != 1 ) ) { if( mycol == iycol ) { cbtop = ptop( BROADCAST, COLUMN, TOPGET ); if( myrow == iyrow ) { zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotc, &ione ); } else { zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ), &ione, &ione, dotc, &ione, &iyrow, &mycol ); } } } return; } if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zzdotc_( &nq, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } } else { if( myrow == ixrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); buff = (complex16 *)getpbbuf( "PZDOTC", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol ); zzdotc_( &nq, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } else if( myrow == iyrow ) { rctop = ptop( COMBINE, ROW, TOPGET ); zgesd2d_( &ictxt, &ione, &nq, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &mycol ); buff = (complex16 *)getpbbuf( "PZDOTC", nq*sizeof(complex16) ); zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol ); zzdotc_( &nq, dotc, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] ); zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotc_( &np, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } } else { if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); buff = (complex16 *)getpbbuf( "PZDOTC", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &iycol ); zzdotc_( &np, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } else if( mycol == iycol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); buff = (complex16 *)getpbbuf( "PZDOTC", np*sizeof(complex16) ); zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &ixcol ); zgesd2d_( &ictxt, &np, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &myrow, &ixcol ); zzdotc_( &np, dotc, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmp = lcm / nprow; nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow ); tmp1 = np0 / desc_X[MB_]; wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZDOTC", wksz*sizeof(complex16) ); if( mycol == iycol ) jjy -= nz; if( myrow == ixrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol, &ixrow, &ixcol, buff+np ); if( mycol == ixcol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotc_( &np, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } if( myrow == iyrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == ixcol ) zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione ); else zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione, &myrow, &ixcol ); } } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*jx-1) % desc_X[NB_]; nn = *n + nz; tmp1 = nn / desc_X[MB_]; np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex16 *)getpbbuf( "PZDOTC", wksz*sizeof(complex16) ); if( myrow == iyrow ) np -= nz; pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { cctop = ptop( COMBINE, COLUMN, TOPGET ); zzdotc_( &np, dotc, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy ); zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ), &ione, &ione, dotc, &ione, &mone, &mycol ); } if( myrow == ixrow ) { rbtop = ptop( BROADCAST, ROW, TOPGET ); if( mycol == iycol ) zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione ); else zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ), &ione, &ione, dotc, &ione, &myrow, &iycol ); } } } } scalapack-1.8.0/SRC/pdlapv2.f0000640000175000017500000003674110363532303015475 0ustar muammarmuammar SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLAPV2 applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1), resulting in row or column pivoting. The * pivot vector should be aligned with the distributed matrix A. For * pivoting the rows of sub( A ), IPIV should be distributed along a * process column and replicated over all process rows. Similarly, * IPIV should be distributed along a process row and replicated over * all process columns for column pivoting. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P * sub( A ); * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P ) * sub( A ). * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this local array contains the local pieces of the * distributed matrix sub( A ) to which the row or columns * interchanges will be applied. On exit, this array contains * the local pieces of the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (input) INTEGER array, dimension >= LOCr(M_A)+MB_A if * ROWCOL = 'R', LOCc(N_A)+NB_A otherwise. It contains * the pivoting information. IPIV(i) is the global row (column), * local row (column) i was swapped with. The last piece of the * array of size MB_A (resp. NB_A) is used as workspace. IPIV is * tied to the distributed matrix A. * * IP (global input) INTEGER * IPIV's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JP (global input) INTEGER * IPIV's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCIP (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix IPIV. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL FORWRD, ROWPVT INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIP, IP1, ITMP, $ IPVWRK, J, JB, JJP, JP1, K, MA, MBA, MYCOL, $ MYROW, NBA, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBS2D, IGEBR2D, INFOG2L, $ PDSWAP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN END IF FORWRD = LSAME( DIREC, 'F' ) * * * Get grid and matrix parameters * MA = DESCA( M_ ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * IF( FORWRD ) THEN CALL INFOG2L( IP, JP, DESCIP, NPROW, NPCOL, MYROW, MYCOL, $ IIP, JJP, ICURROW, ICURCOL ) * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * Loop over rows of sub( A ) * I = IA IB = MIN( M, ICEIL( IA, MBA ) * MBA - IA + 1 ) 10 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP ), IB ) ITMP = IIP IIP = IIP + IB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( ITMP ), IB, ICURROW, MYCOL ) END IF * * Pivot the block of rows * DO 20 K = I, I+IB-1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PDSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP + 1 20 CONTINUE * * Go on to next row of processes, increment row counter, * and figure number of rows to pivot next * ICURROW = MOD( ICURROW+1, NPROW ) I = I + IB IB = MIN( MBA, M-I+IA ) IF( IB .GT. 0 ) GOTO 10 * * If I am pivoting the columns of sub( A ) * ELSE IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * Loop over columns of sub( A ) * J = JA JB = MIN( N, ICEIL( JA, NBA ) * NBA - JA + 1 ) 30 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP ), JB ) ITMP = JJP JJP = JJP + JB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( ITMP ), JB, MYROW, ICURCOL ) END IF * * Pivot the block of columns * DO 40 K = J, J+JB-1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PDSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP + 1 40 CONTINUE * * Go on to next column of processes, increment column * counter, and figure number of columns to pivot next * ICURCOL = MOD( ICURCOL+1, NPCOL ) J = J + JB JB = MIN( NBA, N-J+JA ) IF( JB .GT. 0 ) GOTO 30 END IF * * If I want to apply pivots in reverse order, i.e. reversing * pivoting done earlier. Thus this section computes * inv( P ) * sub( A ). * ELSE * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN CALL INFOG2L( IP+M-1, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) * IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * If I'm not in the current process row, my IIP points out * past end of pivot vector (since I don't own a piece of the * last row). Adjust IIP so it points at last pivot entry. * IF( MYROW.NE.ICURROW ) IIP = IIP - 1 * * Loop over rows in reverse order, starting at last row * I = IA + M - 1 IB = MOD( I, MBA ) IF( IB .EQ. 0 ) IB = MBA IB = MIN( IB, M ) 50 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN ITMP = IIP IIP = IIP - IB CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP+1 ), IB ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IPVWRK ), IB, ICURROW, MYCOL ) ITMP = IPVWRK + IB - 1 END IF * * Pivot the block of rows * DO 60 K = I, I-IB+1, -1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PDSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP - 1 60 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURROW = MOD( NPROW+ICURROW-1, NPROW ) I = I - IB IB = MIN( MBA, I-IA+1 ) IF( IB .GT. 0 ) GOTO 50 * * Otherwise, I'm pivoting the columns of sub( A ) * ELSE CALL INFOG2L( IP, JP+N-1, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * If I'm not in the current process column, my JJP points out * past end of pivot vector (since I don't own a piece of the * last column). Adjust JJP so it points at last pivot entry. * IF( MYCOL.NE.ICURCOL ) JJP = JJP - 1 * * Loop over columns in reverse order starting at last column * J = JA + N - 1 JB = MOD( J, NBA ) IF( JB .EQ. 0 ) JB = NBA JB = MIN( JB, N ) 70 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN ITMP = JJP JJP = JJP - JB CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP+1 ), JB ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( IPVWRK ), JB, MYROW, ICURCOL ) ITMP = IPVWRK + JB - 1 END IF * * Pivot a block of columns * DO 80 K = J, J-JB+1, -1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PDSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP - 1 80 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL ) J = J - JB JB = MIN( NBA, J-JA+1 ) IF( JB .GT. 0 ) GOTO 70 END IF * END IF * RETURN * * End PDLAPV2 * END scalapack-1.8.0/SRC/pcunmlq.f0000640000175000017500000004202610363532303015575 0ustar muammarmuammar SUBROUTINE PCUNMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMLQ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PCGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JCC, $ LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCLARFB, $ PCLARFT, PCUNML2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PCUNML2( SIDE, TRANS, M, N, I1-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PCLARFT( 'Forward', 'Rowwise', NQ-I+IA, IB, A, I, JA+I-IA, $ DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PCLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, $ A, I, JA+I-IA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PCUNML2( SIDE, TRANS, M, N, I2-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMLQ * END scalapack-1.8.0/SRC/pcporfs.f0000640000175000017500000010132610363532303015571 0ustar muammarmuammar SUBROUTINE PCPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, $ LRWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ), X( * ) REAL BERR( * ), FERR( * ), RWORK( * ) * .. * * Purpose * ======= * * PCPORFS improves the computed solution to a system of linear * equations when the coefficient matrix is Hermitian positive definite * and provides error bounds and backward error estimates for the * solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ). * This array contains the local pieces of the N-by-N Hermitian * distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * On entry, this array contains the factors L or U from the * Cholesky factorization sub( A ) = L*L**H or U**H*U, as * computed by PCPOTRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * B (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). On exit, it contains the * improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ZERO, RONE, TWO, THREE PARAMETER ( ZERO = 0.0E+0, RONE = 1.0E+0, TWO = 2.0E+0, $ THREE = 3.0E+0 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT, $ DESCSET, INFOG2L, PCAHEMV, PCAXPY, PCHK2MAT, $ PCCOPY, PCHEMV, PCPOTRS, PCLACON, $ PXERBLA, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 19, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2 * NPMOD LRWMIN = NPMOD WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -13 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1500 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1500 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1900 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -17 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1900 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -18 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1900 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -25 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 23 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 25 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 0, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, N, 2, NRHS, 3, $ IX, JX, DESCX, 19, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPORFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X) * CALL PCCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to * the i-th components of the numerator and denominator * before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PCAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J) is larger than machine epsilon, and * 2) BERR(J) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub( B ) - sub( A )*sub( X ). * CALL PCCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCHEMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PCAHEMV( UPLO, N, RONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, $ 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + NZ*EPS*( * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X)) * +abs(sub(B))) is incremented by SAFE1 if the i-th component * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PCPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) * RETURN * * End of PCPORFS * END scalapack-1.8.0/SRC/pdlarf.f0000640000175000017500000006775610363532303015407 0ustar muammarmuammar SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PDLARF applies a real elementary reflector Q (or Q**T) to a real * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from * either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a real scalar and v is a real vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q, Q = Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST DOUBLE PRECISION TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, $ DGEMV, DGER, DGERV2D, DGESD2D, $ DGSUM2D, DLASET, INFOG2L, PB_TOPGET, $ PBDTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL DGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL DGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL DLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL DGER( MP, NQ, -TAULOC, V( IOFFV ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL DCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL DGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL DGER( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL DGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL DCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL DGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MP+1 CALL DGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL DGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL DGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL DLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFV.GT.0 .AND. IOFFC.GT.0 ) $ CALL DGER( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL DCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL DGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL DGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 IF( IOFFV.GT.0 ) $ CALL DCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQ+1 CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL DGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFC.GT.0 ) $ CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL DGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PDLARF * END scalapack-1.8.0/SRC/pclacon.f0000640000175000017500000003234010363532303015533 0ustar muammarmuammar SUBROUTINE PCLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, EST, $ KASE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N REAL EST * .. * .. Array Arguments .. INTEGER DESCV( * ), DESCX( * ) COMPLEX V( * ), X( * ) * .. * * Purpose * ======= * * PCLACON estimates the 1-norm of a square, complex distributed matrix * A. Reverse communication is used for evaluating matrix-vector * products. X and V are aligned with the distributed matrix A, this * information is implicitly contained within IV, IX, DESCV, and DESCX. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vectors V and X. N >= 0. * * V (local workspace) COMPLEX pointer into the local * memory to an array of dimension LOCr(N+MOD(IV-1,MB_V)). On * the final return, V = A*W, where EST = norm(V)/norm(W) * (W is not returned). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * X (local input/local output) COMPLEX pointer into the * local memory to an array of dimension * LOCr(N+MOD(IX-1,MB_X)). On an intermediate return, X * should be overwritten by * A * X, if KASE=1, * A' * X, if KASE=2, * where A' is the conjugate transpose of A, and PCLACON must * be re-called with all the other parameters unchanged. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * * EST (global output) REAL * An estimate (a lower bound) for norm(A). * * KASE (local input/local output) INTEGER * On the initial call to PCLACON, KASE should be 0. * On an intermediate return, KASE will be 1 or 2, indicating * whether X should be overwritten by A * X or A' * X. * On the final return from PCLACON, KASE will again be 0. * * Further Details * =============== * * The serial version CLACON has been contributed by Nick Higham, * University of Manchester. It was originally named SONEST, dated * March 16, 1988. * * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of * a real or complex matrix, with applications to condition estimation", * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ONE, TWO PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IIVX, IMAXROW, IOFFVX, IROFF, ITER, $ IVXCOL, IVXROW, J, JLAST, JJVX, JUMP, K, $ MYCOL, MYROW, NP, NPCOL, NPROW REAL ALTSGN, ESTOLD, SAFMIN, TEMP COMPLEX JLMAX, XMAX * .. * .. Local Arrays .. COMPLEX WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CGEBR2D, CGEBS2D, $ INFOG2L, PCELGET, PCMAX1, $ PSCSUM1, SGEBR2D, SGEBS2D * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, INDXL2G, NUMROC REAL PSLAMCH EXTERNAL INDXG2L, INDXG2P, INDXL2G, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, REAL * .. * .. Save statement .. SAVE * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) IF( MYCOL.NE.IVXCOL ) $ RETURN IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) $ NP = NP - IROFF IOFFVX = IIVX + (JJVX-1)*DESCX( LLD_ ) * SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) IF( KASE.EQ.0 ) THEN DO 10 I = IOFFVX, IOFFVX+NP-1 X( I ) = CMPLX( ONE / REAL( N ) ) 10 CONTINUE KASE = 1 JUMP = 1 RETURN END IF * GO TO ( 20, 40, 70, 90, 120 )JUMP * * ................ ENTRY (JUMP = 1) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X * 20 CONTINUE IF( N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN V( IOFFVX ) = X( IOFFVX ) EST = ABS( V( IOFFVX ) ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF * ... QUIT GO TO 130 END IF CALL PSCSUM1( N, EST, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * DO 30 I = IOFFVX, IOFFVX+NP-1 IF( ABS( X( I ) ).GT.SAFMIN ) THEN X( I ) = X( I ) / CMPLX( ABS( X( I ) ) ) ELSE X( I ) = CONE END IF 30 CONTINUE KASE = 2 JUMP = 2 RETURN * * ................ ENTRY (JUMP = 2) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X * 40 CONTINUE CALL PCMAX1( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = CMPLX( REAL( J ) ) CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( REAL( WORK( 2 ) ) ) END IF END IF ITER = 2 * * MAIN LOOP - ITERATIONS 2, 3,...,ITMAX * 50 CONTINUE DO 60 I = IOFFVX, IOFFVX+NP-1 X( I ) = CZERO 60 CONTINUE IMAXROW = INDXG2P( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) IF( MYROW.EQ.IMAXROW ) THEN I = INDXG2L( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) X( I ) = CONE END IF KASE = 1 JUMP = 3 RETURN * * ................ ENTRY (JUMP = 3) * X HAS BEEN OVERWRITTEN BY A*X * 70 CONTINUE CALL CCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) ESTOLD = EST CALL PSCSUM1( N, EST, V, IV, JV, DESCV, 1 ) IF( DESCV( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * * TEST FOR CYCLING IF( EST.LE.ESTOLD ) $ GO TO 100 * DO 80 I = IOFFVX, IOFFVX+NP-1 IF( ABS( X( I ) ).GT.SAFMIN ) THEN X( I ) = X( I ) / CMPLX( ABS( X( I ) ) ) ELSE X( I ) = CONE END IF 80 CONTINUE KASE = 2 JUMP = 4 RETURN * * ................ ENTRY (JUMP = 4) * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X * 90 CONTINUE JLAST = J CALL PCMAX1( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = CMPLX( REAL( J ) ) CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( REAL( WORK( 2 ) ) ) END IF END IF CALL PCELGET( 'Columnwise', ' ', JLMAX, X, JLAST, JX, DESCX ) IF( ( REAL( JLMAX ).NE.ABS( REAL( XMAX ) ) ).AND. $ ( ITER.LT.ITMAX ) ) THEN ITER = ITER + 1 GO TO 50 END IF * * ITERATION COMPLETE. FINAL STAGE. * 100 CONTINUE DO 110 I = IOFFVX, IOFFVX+NP-1 K = INDXL2G( I-IOFFVX+IIVX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW )-IX+1 IF( MOD( K, 2 ).EQ.0 ) THEN ALTSGN = -ONE ELSE ALTSGN = ONE END IF X( I ) = CMPLX( ALTSGN*( ONE+REAL( K-1 ) / REAL( N-1 ) ) ) 110 CONTINUE KASE = 1 JUMP = 5 RETURN * * ................ ENTRY (JUMP = 5) * X HAS BEEN OVERWRITTEN BY A*X * 120 CONTINUE CALL PSCSUM1( N, TEMP, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1, $ IVXROW, MYCOL ) END IF END IF TEMP = TWO*( TEMP / REAL( 3*N ) ) IF( TEMP.GT.EST ) THEN CALL CCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) EST = TEMP END IF * 130 CONTINUE KASE = 0 * RETURN * * End of PCLACON * END scalapack-1.8.0/SRC/pdgeqpf.f0000640000175000017500000005061610363532303015550 0ustar muammarmuammar SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. INTEGER IA, JA, INFO, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEQPF computes a QR factorization with column pivoting of a * M-by-N distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1): * * sub( A ) * P = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension LOCc(JA+N-1). * On exit, if IPIV(I) = K, the local i-th column of sub( A )*P * was the global K-th column of sub( A ). IPIV is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX(3,Mp0 + Nq0) + LOCc(JA+N-1)+Nq0. * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * LOCc(JA+N-1) = NUMROC( JA+N-1, NB_A, MYCOL, CSRC_A, NPCOL ) * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(1) H(2) . . . H(n) * * Each H(i) has the form * * H = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i-1:ia+m-1,ja+i-1). * * The matrix P is represented in jpvt as follows: If * jpvt(j) = i * then the jth column of P is the ith canonical unit vector. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, ICURROW, $ ICURCOL, II, IIA, IOFFA, IPN, IPCOL, IPW, $ IROFF, ITEMP, J, JB, JJ, JJA, JJPVT, JN, KB, $ K, KK, KSTART, KSTEP, LDA, LL, LWMIN, MN, MP, $ MYCOL, MYROW, NPCOL, NPROW, NQ, NQ0, PVT DOUBLE PRECISION AJJ, ALPHA, TEMP, TEMP2 * .. * .. Local Arrays .. INTEGER DESCN( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DCOPY, DESCSET, $ DGEBR2D, DGEBS2D, DGERV2D, $ DGESD2D, DLARFG, DSWAP, IGERV2D, $ IGESD2D, INFOG1L, INFOG2L, PCHK1MAT, PDAMAX, $ PDELSET, PDLARF, PDLARFG, PDNRM2, $ PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, IDINT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) NQ0 = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) LWMIN = MAX( 3, MP + NQ ) + NQ0 + NQ * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -10 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQPF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF MN = MIN( M, N ) * * Initialize the array of pivots * LDA = DESCA( LLD_ ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) KSTEP = NPCOL * DESCA( NB_ ) * IF( MYCOL.EQ.IACOL ) THEN * * Handle first block separately * JB = JN - JA + 1 DO 10 LL = JJA, JJA+JB-1 IPIV( LL ) = JA + LL - JJA 10 CONTINUE KSTART = JN + KSTEP - DESCA( NB_ ) * * Loop over remaining block of columns * DO 30 KK = JJA+JB, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 20 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 20 CONTINUE KSTART = KSTART + KSTEP 30 CONTINUE ELSE KSTART = JN + ( MOD( MYCOL-IACOL+NPCOL, NPCOL )-1 )* $ DESCA( NB_ ) DO 50 KK = JJA, JJA+NQ-1, DESCA( NB_ ) KB = MIN( JJA+NQ-KK, DESCA( NB_ ) ) DO 40 LL = KK, KK+KB-1 IPIV( LL ) = KSTART+LL-KK+1 40 CONTINUE KSTART = KSTART + KSTEP 50 CONTINUE END IF * * Initialize partial column norms, handle first block separately * CALL DESCSET( DESCN, 1, DESCA( N_ ), 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), ICTXT, 1 ) * IPN = 1 IPW = IPN + NQ0 + NQ JJ = IPN + JJA - 1 IF( MYCOL.EQ.IACOL ) THEN DO 60 KK = 0, JB-1 CALL PDNRM2( M, WORK( JJ+KK ), A, IA, JA+KK, DESCA, 1 ) WORK( NQ+JJ+KK ) = WORK( JJ+KK ) 60 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining blocks of columns * DO 80 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 70 KK = 0, JB-1 CALL PDNRM2( M, WORK( JJ+KK ), A, IA, J+KK, DESCA, 1 ) WORK( NQ+JJ+KK ) = WORK( JJ+KK ) 70 CONTINUE JJ = JJ + JB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) 80 CONTINUE * * Compute factorization * DO 120 J = JA, JA+MN-1 I = IA + J - JA * CALL INFOG1L( J, DESCA( NB_ ), NPCOL, MYCOL, DESCA( CSRC_ ), $ JJ, ICURCOL ) K = JA + N - J IF( K.GT.1 ) THEN CALL PDAMAX( K, TEMP, PVT, WORK( IPN ), 1, J, DESCN, $ DESCN( M_ ) ) ELSE PVT = J END IF IF( J.NE.PVT ) THEN CALL INFOG1L( PVT, DESCA( NB_ ), NPCOL, MYCOL, $ DESCA( CSRC_ ), JJPVT, IPCOL ) IF( ICURCOL.EQ.IPCOL ) THEN IF( MYCOL.EQ.ICURCOL ) THEN CALL DSWAP( MP, A( IIA+(JJ-1)*LDA ), 1, $ A( IIA+(JJPVT-1)*LDA ), 1 ) ITEMP = IPIV( JJPVT ) IPIV( JJPVT ) = IPIV( JJ ) IPIV( JJ ) = ITEMP WORK( IPN+JJPVT-1 ) = WORK( IPN+JJ-1 ) WORK( IPN+NQ+JJPVT-1 ) = WORK( IPN+NQ+JJ-1 ) END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN * CALL DGESD2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) WORK( IPW ) = DBLE( IPIV( JJ ) ) WORK( IPW+1 ) = WORK( IPN + JJ - 1 ) WORK( IPW+2 ) = WORK( IPN + NQ + JJ - 1 ) CALL DGESD2D( ICTXT, 3, 1, WORK( IPW ), 3, MYROW, $ IPCOL ) * CALL DGERV2D( ICTXT, MP, 1, A( IIA+(JJ-1)*LDA ), LDA, $ MYROW, IPCOL ) CALL IGERV2D( ICTXT, 1, 1, IPIV( JJ ), 1, MYROW, $ IPCOL ) * ELSE IF( MYCOL.EQ.IPCOL ) THEN * CALL DGESD2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL IGESD2D( ICTXT, 1, 1, IPIV( JJPVT ), 1, MYROW, $ ICURCOL ) * CALL DGERV2D( ICTXT, MP, 1, A( IIA+(JJPVT-1)*LDA ), $ LDA, MYROW, ICURCOL ) CALL DGERV2D( ICTXT, 3, 1, WORK( IPW ), 3, MYROW, $ ICURCOL ) IPIV( JJPVT ) = IDINT( WORK( IPW ) ) WORK( IPN+JJPVT-1 ) = WORK( IPW+1 ) WORK( IPN+NQ+JJPVT-1 ) = WORK( IPW+2 ) * END IF * END IF * END IF * * Generate elementary reflector H(i) * CALL INFOG1L( I, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, ICURROW ) IF( DESCA( M_ ).EQ.1 ) THEN IF( MYROW.EQ.ICURROW ) THEN IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*DESCA( LLD_ ) AJJ = A( IOFFA ) CALL DLARFG( 1, AJJ, A( IOFFA ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1 ) CALL DSCAL( NQ-JJ, ALPHA, A( IOFFA+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ ), 1 ) A( IOFFA ) = AJJ ELSE IF( N.GT.1 ) THEN CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, ICURROW, ICURCOL ) CALL DSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.ICURCOL ) THEN CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1, ICURROW, ICURCOL ) END IF * ELSE * CALL PDLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(i) to A(ia+j-ja:ia+m-1,j+1:ja+n-1) from the left * CALL PDELSET( A, I, J, DESCA, ONE ) CALL PDLARF( 'Left', M-J+JA, JA+N-1-J, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK( IPW ) ) END IF CALL PDELSET( A, I, J, DESCA, AJJ ) * END IF * * Update partial columns norms * IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 IF( MOD( J, DESCA( NB_ ) ).EQ.0 ) $ ICURCOL = MOD( ICURCOL+1, NPCOL ) IF( (JJA+NQ-JJ).GT.0 ) THEN IF( MYROW.EQ.ICURROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, JJA+NQ-JJ, $ A( II+( MIN( JJA+NQ-1, JJ )-1 )*LDA ), $ LDA ) CALL DCOPY( JJA+NQ-JJ, A( II+( MIN( JJA+NQ-1, JJ ) $ -1)*LDA ), LDA, WORK( IPW+MIN( JJA+NQ-1, $ JJ )-1 ), 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', JJA+NQ-JJ, 1, $ WORK( IPW+MIN( JJA+NQ-1, JJ )-1 ), $ MAX( 1, NQ ), ICURROW, MYCOL ) END IF END IF * JN = MIN( ICEIL( J+1, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA + N - 1 ) IF( MYCOL.EQ.ICURCOL ) THEN DO 90 LL = JJ-1, JJ + JN - J - 2 IF( WORK( IPN+LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( IPW+LL ) ) / $ WORK( IPN+LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05D+0*TEMP* $ ( WORK( IPN+LL ) / WORK( IPN+NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PDNRM2( IA+M-I-1, WORK( IPN+LL ), A, I+1, $ J+LL-JJ+2, DESCA, 1 ) WORK( IPN+NQ+LL ) = WORK( IPN+LL ) ELSE WORK( IPN+LL ) = ZERO WORK( IPN+NQ+LL ) = ZERO END IF ELSE WORK( IPN+LL ) = WORK( IPN+LL ) * SQRT( TEMP ) END IF END IF 90 CONTINUE JJ = JJ + JN - J END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * DO 110 K = JN+1, JA+N-1, DESCA( NB_ ) KB = MIN( JA+N-K, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 100 LL = JJ-1, JJ+KB-2 IF( WORK( IPN+LL ).NE.ZERO ) THEN TEMP = ONE-( ABS( WORK( IPW+LL ) ) / $ WORK( IPN+LL ) )**2 TEMP = MAX( TEMP, ZERO ) TEMP2 = ONE + 0.05D+0*TEMP* $ ( WORK( IPN+LL ) / WORK( IPN+NQ+LL ) )**2 IF( TEMP2.EQ.ONE ) THEN IF( IA+M-1.GT.I ) THEN CALL PDNRM2( IA+M-I-1, WORK( IPN+LL ), A, $ I+1, K+LL-JJ+1, DESCA, 1 ) WORK( IPN+NQ+LL ) = WORK( IPN+LL ) ELSE WORK( IPN+LL ) = ZERO WORK( IPN+NQ+LL ) = ZERO END IF ELSE WORK( IPN+LL ) = WORK( IPN+LL ) * SQRT( TEMP ) END IF END IF 100 CONTINUE JJ = JJ + KB END IF ICURCOL = MOD( ICURCOL+1, NPCOL ) * 110 CONTINUE * 120 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEQPF * END scalapack-1.8.0/SRC/dlaref.f0000640000175000017500000002453110363532303015354 0ustar muammarmuammar SUBROUTINE DLAREF( TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, $ ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, $ LIHIZ, VECS, V2, V3, T1, T2, T3 ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. LOGICAL BLOCK, WANTZ CHARACTER TYPE INTEGER ICOL1, IROW1, ISTART, ISTOP, ITMP1, ITMP2, LDA, $ LDZ, LIHIZ, LILOZ DOUBLE PRECISION T1, T2, T3, V2, V3 * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), VECS( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * DLAREF applies one or several Householder reflectors of size 3 * to one or two matrices (if column is specified) on either their * rows or columns. * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * If 'R': Apply reflectors to the rows of the matrix * (apply from left) * Otherwise: Apply reflectors to the columns of the matrix * Unchanged on exit. * * A (global input/output) DOUBLE PRECISION array, (LDA,*) * On entry, the matrix to receive the reflections. * The updated matrix on exit. * * LDA (local input) INTEGER * On entry, the leading dimension of A. Unchanged on exit. * * WANTZ (global input) LOGICAL * If .TRUE., then apply any column reflections to Z as well. * If .FALSE., then do no additional work on Z. * * Z (global input/output) DOUBLE PRECISION array, (LDZ,*) * On entry, the second matrix to receive column reflections. * This is changed only if WANTZ is set. * * LDZ (local input) INTEGER * On entry, the leading dimension of Z. Unchanged on exit. * * BLOCK (global input) LOGICAL * If .TRUE., then apply several reflectors at once and read * their data from the VECS array. * If .FALSE., apply the single reflector given by V2, V3, * T1, T2, and T3. * * IROW1 (local input/output) INTEGER * On entry, the local row element of A. * Undefined on output. * * * ICOL1 (local input/output) INTEGER * On entry, the local column element of A. * Undefined on output. * * ISTART (global input) INTEGER * Specifies the "number" of the first reflector. This is * used as an index into VECS if BLOCK is set. * ISTART is ignored if BLOCK is .FALSE.. * * ISTOP (global input) INTEGER * Specifies the "number" of the last reflector. This is * used as an index into VECS if BLOCK is set. * ISTOP is ignored if BLOCK is .FALSE.. * * ITMP1 (local input) INTEGER * Starting range into A. For rows, this is the local * first column. For columns, this is the local first row. * * ITMP2 (local input) INTEGER * Ending range into A. For rows, this is the local last * column. For columns, this is the local last row. * * LILOZ * LIHIZ (local input) INTEGER * These serve the same purpose as ITMP1,ITMP2 but for Z * when WANTZ is set. * * VECS (global input) DOUBLE PRECISION array of size 3*N (matrix * size) * This holds the size 3 reflectors one after another and this * is only accessed when BLOCK is .TRUE. * * V2 * V3 * T1 * T2 * T3 (global input/output) DOUBLE PRECISION * This holds information on a single size 3 Householder * reflector and is read when BLOCK is .FALSE., and * overwritten when BLOCK is .TRUE. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Local Scalars .. INTEGER J, K DOUBLE PRECISION H11, H22, SUM, T12, T13, T22, T23, T32, T33, $ V22, V23, V32, V33 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * IF( LSAME( TYPE, 'R' ) ) THEN IF( BLOCK ) THEN DO 20 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 10 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 H11 = A( IROW1+1, J ) - SUM*T2 H22 = A( IROW1+2, J ) - SUM*T3 SUM = H11 + V22*H22 + V32*A( IROW1+3, J ) A( IROW1+1, J ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = A( IROW1+3, J ) - SUM*T32 SUM = H11 + V23*H22 + V33*A( IROW1+4, J ) A( IROW1+2, J ) = H11 - SUM*T13 A( IROW1+3, J ) = H22 - SUM*T23 A( IROW1+4, J ) = A( IROW1+4, J ) - SUM*T33 10 CONTINUE IROW1 = IROW1 + 3 20 CONTINUE DO 40 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 30 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*T2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*T3 30 CONTINUE IROW1 = IROW1 + 1 40 CONTINUE ELSE DO 50 J = ITMP1, ITMP2 SUM = A( IROW1, J ) + V2*A( IROW1+1, J ) + $ V3*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM*T1 A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*T2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*T3 50 CONTINUE END IF ELSE * * Do column transforms * IF( BLOCK ) THEN DO 80 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 60 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 H11 = A( J, ICOL1+1 ) - SUM*T2 H22 = A( J, ICOL1+2 ) - SUM*T3 SUM = H11 + V22*H22 + V32*A( J, ICOL1+3 ) A( J, ICOL1+1 ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = A( J, ICOL1+3 ) - SUM*T32 SUM = H11 + V23*H22 + V33*A( J, ICOL1+4 ) A( J, ICOL1+2 ) = H11 - SUM*T13 A( J, ICOL1+3 ) = H22 - SUM*T23 A( J, ICOL1+4 ) = A( J, ICOL1+4 ) - SUM*T33 60 CONTINUE IF( WANTZ ) THEN DO 70 J = LILOZ, LIHIZ SUM = Z( J, ICOL1 ) + V2*Z( J, ICOL1+1 ) + $ V3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM*T1 H11 = Z( J, ICOL1+1 ) - SUM*T2 H22 = Z( J, ICOL1+2 ) - SUM*T3 SUM = H11 + V22*H22 + V32*Z( J, ICOL1+3 ) Z( J, ICOL1+1 ) = H11 - SUM*T12 H11 = H22 - SUM*T22 H22 = Z( J, ICOL1+3 ) - SUM*T32 SUM = H11 + V23*H22 + V33*Z( J, ICOL1+4 ) Z( J, ICOL1+2 ) = H11 - SUM*T13 Z( J, ICOL1+3 ) = H22 - SUM*T23 Z( J, ICOL1+4 ) = Z( J, ICOL1+4 ) - SUM*T33 70 CONTINUE END IF ICOL1 = ICOL1 + 3 80 CONTINUE DO 110 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 90 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*T2 A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*T3 90 CONTINUE IF( WANTZ ) THEN DO 100 J = LILOZ, LIHIZ SUM = Z( J, ICOL1 ) + V2*Z( J, ICOL1+1 ) + $ V3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM*T1 Z( J, ICOL1+1 ) = Z( J, ICOL1+1 ) - SUM*T2 Z( J, ICOL1+2 ) = Z( J, ICOL1+2 ) - SUM*T3 100 CONTINUE END IF ICOL1 = ICOL1 + 1 110 CONTINUE ELSE DO 120 J = ITMP1, ITMP2 SUM = A( J, ICOL1 ) + V2*A( J, ICOL1+1 ) + $ V3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM*T1 A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*T2 A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*T3 120 CONTINUE END IF END IF RETURN * * End of DLAREF * END * scalapack-1.8.0/SRC/pcunml2.f0000640000175000017500000003622710363532303015504 0ustar muammarmuammar SUBROUTINE PCUNML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNML2 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PCGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JCC, LCM, LCMP, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 COMPLEX AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCELSET2, PCLACGV, PCLARF, PCLARFC, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K -1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(i:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * IF( I-IA+1.LT.NQ ) $ CALL PCLACGV( NQ-I+IA-1, A, I, JA+I-IA+1, DESCA, $ DESCA( M_ ) ) CALL PCELSET2( AII, A, I, JA+I-IA, DESCA, ONE ) IF( NOTRAN ) THEN CALL PCLARFC( SIDE, MI, NI, A, I, JA+I-IA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) ELSE CALL PCLARF( SIDE, MI, NI, A, I, JA+I-IA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) END IF CALL PCELSET( A, I, JA+I-IA, DESCA, AII ) IF( I-IA+1.LT.NQ ) $ CALL PCLACGV( NQ-I+IA-1, A, I, JA+I-IA+1, DESCA, $ DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNML2 * END scalapack-1.8.0/SRC/pzpbsv.f0000640000175000017500000004530510363532303015445 0ustar muammarmuammar SUBROUTINE PZPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZPBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded symmetric positive definite distributed * matrix with bandwidth BW. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PZPBTRF and PZPBTRS for details. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+2*bw)*bw * +max((bw*NRHS), bw*bw) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PXERBLA, PZPBTRF, PZPBTRS * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PZPBTRF and PZPBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PZPBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+2*BW)*BW * * Factor the matrix * CALL PZPBTRF( UPLO, N, BW, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PZPBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PZPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPBSV', -INFO ) RETURN END IF * RETURN * * End of PZPBSV * END scalapack-1.8.0/SRC/pdgels.f0000640000175000017500000005424110363532303015376 0ustar muammarmuammar SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PDGELS solves overdetermined or underdetermined real linear * systems involving an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1), * or its transpose, using a QR or LQ factorization of sub( A ). It is * assumed that sub( A ) has full rank. * * The following options are provided: * * 1. If TRANS = 'N' and m >= n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )*X ||. * * 2. If TRANS = 'N' and m < n: find the minimum norm solution of * an underdetermined system sub( A ) * X = sub( B ). * * 3. If TRANS = 'T' and m >= n: find the minimum norm solution of * an undetermined system sub( A )**T * X = sub( B ). * * 4. If TRANS = 'T' and m < n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )**T * X ||. * * where sub( B ) denotes B( IB:IB+M-1, JB:JB+NRHS-1 ) when TRANS = 'N' * and B( IB:IB+N-1, JB:JB+NRHS-1 ) otherwise. Several right hand side * vectors b and solution vectors x can be handled in a single call; * When TRANS = 'N', the solution vectors are stored as the columns of * the N-by-NRHS right hand side matrix sub( B ) and the M-by-NRHS * right hand side matrix sub( B ) otherwise. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER * = 'N': the linear system involves sub( A ); * = 'T': the linear system involves sub( A )**T. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e. the number of columns * of the distributed submatrices sub( B ) and X. NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). On entry, the M-by-N matrix A. * if M >= N, sub( A ) is overwritten by details of its QR * factorization as returned by PDGEQRF; * if M < N, sub( A ) is overwritten by details of its LQ * factorization as returned by PDGELQF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of local dimension * (LLD_B, LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributed matrix B of right hand side * vectors, stored columnwise; * sub( B ) is M-by-NRHS if TRANS='N', and N-by-NRHS otherwise. * On exit, sub( B ) is overwritten by the solution vectors, * stored columnwise: if TRANS = 'N' and M >= N, rows 1 to N * of sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements N+1 to M in that * column; if TRANS = 'N' and M < N, rows 1 to N of sub( B ) * contain the minimum norm solution vectors; if TRANS = 'T' * and M >= N, rows 1 to M of sub( B ) contain the minimum norm * solution vectors; if TRANS = 'T' and M < N, rows 1 to M of * sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements M+1 to N in that * column. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= LTAU + MAX( LWF, LWS ) where * If M >= N, then * LTAU = NUMROC( JA+MIN(M,N)-1, NB_A, MYCOL, CSRC_A, NPCOL ), * LWF = NB_A * ( MpA0 + NqA0 + NB_A ) * LWS = MAX( (NB_A*(NB_A-1))/2, (NRHSqB0 + MpB0)*NB_A ) + * NB_A * NB_A * Else * LTAU = NUMROC( IA+MIN(M,N)-1, MB_A, MYROW, RSRC_A, NPROW ), * LWF = MB_A * ( MpA0 + NqA0 + MB_A ) * LWS = MAX( (MB_A*(MB_A-1))/2, ( NpB0 + MAX( NqA0 + * NUMROC( NUMROC( N+IROFFB, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NRHSqB0 ) )*MB_A ) + * MB_A * MB_A * End if * * where LCMP = LCM / NPROW with LCM = ILCM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * MpB0 = NUMROC( M+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NRHSqB0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, TPSD INTEGER BROW, IACOL, IAROW, IASCL, IBCOL, IBROW, IBSCL, $ ICOFFA, ICOFFB, ICTXT, IPW, IROFFA, IROFFB, $ LCM, LCMP, LTAU, LWF, LWMIN, LWS, MPA0, MPB0, $ MYCOL, MYROW, NPB0, NPCOL, NPROW, NQA0, $ NRHSQB0, SCLLEN DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) DOUBLE PRECISION RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, PDLAMCH, $ PDLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDGELQF, $ PDGEQRF, PDLABAD, PDLASCL, PDLASET, $ PDORMLQ, PDORMQR, PDTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( M, 2, N, 3, IA, JA, DESCA, 8, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( IA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( IB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NRHSQB0 = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, $ NPCOL ) IF( M.GE.N ) THEN MPB0 = NUMROC( M+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( JA+MIN(M,N)-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) LWF = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) LWS = MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPB0 + NRHSQB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ )*DESCA( NB_ ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( IA+MIN(M,N)-1, DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) LWF = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) LWS = MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( NPB0 + MAX( NQA0 + NUMROC( NUMROC( N+IROFFB, $ DESCA( MB_ ), 0, 0, NPROW ), DESCA( MB_ ), 0, 0, $ LCMP ), NRHSQB0 ) )*DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF LWMIN = LTAU + MAX( LWF, LWS ) WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) * TPSD = .TRUE. IF( LSAME( TRANS, 'N' ) ) $ TPSD = .FALSE. * IF( .NOT.( LSAME( TRANS, 'N' ) .OR. $ LSAME( TRANS, 'T' ) ) ) THEN INFO = -1 ELSE IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( M.GE.N .AND. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. IAROW.NE.IBROW ) THEN INFO = -10 ELSE IF( M.LT.N .AND. ICOFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( M.LT.N .AND. DESCA( NB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1200 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF END IF * IF( .NOT.TPSD ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IDUM1( 1 ) = ICHAR( 'T' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( M, 2, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, 4, $ IB, JB, DESCB, 12, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PDLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, $ IB, JB, DESCB ) RETURN END IF * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) SMLNUM = SMLNUM / PDLAMCH( ICTXT, 'P' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) * * Scale A, B if max entry outside range [SMLNUM,BIGNUM] * ANRM = PDLANGE( 'M', M, N, A, IA, JA, DESCA, RWORK ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PDLASCL( 'G', ANRM, SMLNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 1 ELSE IF( ANRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PDLASCL( 'G', ANRM, BIGNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 2 ELSE IF( ANRM.EQ.ZERO ) THEN * * Matrix all zero. Return zero solution. * CALL PDLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, IB, JB, $ DESCB ) GO TO 10 END IF * BROW = M IF( TPSD ) $ BROW = N * BNRM = PDLANGE( 'M', BROW, NRHS, B, IB, JB, DESCB, RWORK ) * IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PDLASCL( 'G', BNRM, SMLNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 1 ELSE IF( BNRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PDLASCL( 'G', BNRM, BIGNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 2 END IF * IPW = LTAU + 1 * IF( M.GE.N ) THEN * * compute QR factorization of A * CALL PDGEQRF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least N, optimally N*NB * IF( .NOT.TPSD ) THEN * * Least-Squares Problem min || A * X - B || * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q' * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PDORMQR( 'Left', 'Transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+N-1,JB:JB+NRHS-1) := inv(R) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PDTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * SCLLEN = N * ELSE * * Overdetermined system of equations sub( A )' * X = sub( B ) * * sub( B ) := inv(R') * sub( B ) * CALL PDTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * B(IB+N:IB+M-1,JB:JB+NRHS-1) = ZERO * CALL PDLASET( 'All', M-N, NRHS, ZERO, ZERO, B, IB+N, JB, $ DESCB ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q(1:N,:) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PDORMQR( 'Left', 'No transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = M * END IF * ELSE * * Compute LQ factorization of sub( A ) * CALL PDGELQF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least M, optimally M*NB. * IF( .NOT.TPSD ) THEN * * underdetermined system of equations sub( A ) * X = sub( B ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L) * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PDTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * B(IB+M:IB+N-1,JB:JB+NRHS-1) = 0 * CALL PDLASET( 'All', N-M, NRHS, ZERO, ZERO, B, IB+M, JB, $ DESCB ) * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q(1:N,:)' * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PDORMLQ( 'Left', 'Transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = N * ELSE * * overdetermined system min || A' * X - B || * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PDORMLQ( 'Left', 'No transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L') * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PDTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * SCLLEN = M * END IF * END IF * * Undo scaling * IF( IASCL.EQ.1 ) THEN CALL PDLASCL( 'G', ANRM, SMLNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IASCL.EQ.2 ) THEN CALL PDLASCL( 'G', ANRM, BIGNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF IF( IBSCL.EQ.1 ) THEN CALL PDLASCL( 'G', SMLNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IBSCL.EQ.2 ) THEN CALL PDLASCL( 'G', BIGNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF * 10 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGELS * END scalapack-1.8.0/SRC/psgerq2.f0000640000175000017500000002465110363532303015505 0ustar muammarmuammar SUBROUTINE PSGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGERQ2 computes a RQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PSELSET, PSLARF, PSLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * K = MIN( M, N ) DO 10 I = IA+K-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i+m-k,ja:j+n-k-1) * CALL PSLARFG( N-K+J-JA+1, AII, I+M-K, J+N-K, A, I+M-K, JA, $ DESCA, DESCA( M_ ), TAU ) * * Apply H(i) to A(ia:i+m-k-1,ja:j+n-k) from the right * CALL PSELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PSLARF( 'Right', M-K+I-IA, N-K+J-JA+1, A, M-K+I, JA, $ DESCA, DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) CALL PSELSET( A, I+M-K, J+N-K, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGERQ2 * END scalapack-1.8.0/SRC/pdgehd2.f0000640000175000017500000002674610363532303015446 0ustar muammarmuammar SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGEHD2 reduces a real general distributed matrix sub( A ) * to upper Hessenberg form H by an orthogonal similarity transforma- * tion: Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+JLO-2 * and JA+JHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB + MAX( NpA0, NB ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on * exit in A(ia+ilo+i:ia+ihi-1,ja+ilo+i-2), and tau in TAU(ja+ilo+i-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follo- * wing example, with n = 7, ilo = 2 and ihi = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), h denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(ja+ilo+i-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IAROW, ICOFFA, ICTXT, IROFFA, J, K, LWMIN, $ MYCOL, MYROW, NPA0, NPCOL, NPROW DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLARFG, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NPA0 = NUMROC( IHI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = DESCA( NB_ ) + MAX( NPA0, DESCA( NB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * DO 10 K = ILO, IHI-1 I = IA + K - 1 J = JA + K - 1 * * Compute elementary reflector H(j) to annihilate * A(i+2:ihi+ia-1,j) * CALL PDLARFG( IHI-K, AII, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PDELSET( A, I+1, J, DESCA, ONE ) * * Apply H(k) to A(ia:ihi+ia-1,j+1:ihi+ja-1) from the right * CALL PDLARF( 'Right', IHI, IHI-K, A, I+1, J, DESCA, 1, TAU, A, $ IA, J+1, DESCA, WORK ) * * Apply H(j) to A(i+1:ia+ihi-1,j+1:ja+n-1) from the left * CALL PDLARF( 'Left', IHI-K, N-K, A, I+1, J, DESCA, 1, TAU, A, $ I+1, J+1, DESCA, WORK ) * CALL PDELSET( A, I+1, J, DESCA, AII ) 10 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGEHD2 * END scalapack-1.8.0/SRC/pcdttrf.f0000640000175000017500000010746410363532303015574 0ustar muammarmuammar SUBROUTINE PCDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX AF( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDTTRF computes a LU factorization * of an N-by-N complex tridiagonal * diagonally dominant-like distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PCDTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * DL (local input/local output) COMPLEX pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDTTRF and this is stored in AF. If a linear system * is to be solved using PCDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CAXPY, CGEMM, CGERV2D, CGESD2D, CLACPY, $ CLATCPY, CPBTRF, CPOTRF, CSYRK, CTBTRS, CTRMM, $ CTRRV2D, CTRSD2D, CTRSM, CTRTRS, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX CDOTC EXTERNAL CDOTC, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PCDTTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDTTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (12*NPCOL+3*NB) * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PCDTTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PCDTTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA(5) PARAM_CHECK( 6, 1 ) = DESCA(4) PARAM_CHECK( 5, 1 ) = DESCA(3) PARAM_CHECK( 4, 1 ) = DESCA(1) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 605 PARAM_CHECK( 6, 2 ) = 604 PARAM_CHECK( 5, 2 ) = 603 PARAM_CHECK( 4, 2 ) = 601 PARAM_CHECK( 3, 2 ) = 5 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL CTRSD2D( ICTXT, 'U', 'N', 1, 1, $ DU( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL CDTTRF( ODD_SIZE, DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * * Apply factorization to lower connection block BL_i * Apply factorization to upper connection block BU_i * * * Perform the triangular solve {U_i}^C{BL'}_i^C = {BL_i}^C * * DL( PART_OFFSET+ODD_SIZE+1 ) = $ ( DL( PART_OFFSET+ODD_SIZE+1 ) ) $ / ( D( PART_OFFSET+ODD_SIZE ) ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 )- $ DL( PART_OFFSET+ODD_SIZE+1 )*DU( PART_OFFSET+ODD_SIZE ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * * Move entry that causes spike to auxiliary storage * AF( WORK_U+1 ) = ( DL( PART_OFFSET+1 ) ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL CDTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( WORK_U+1 ), ODD_SIZE, $ INFO ) * * * Calculate the "spike" fillin, ${U_i}^C {{GL}_i}^C = {DU_i}^C$ * CALL CTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * AF( 1 ) = CONJG( AF( 1 ) ) * CALL CDTTRSV( 'U', 'C', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * AF( ODD_SIZE+3 ) = -CONE * $ CDOTC( ODD_SIZE, AF( 1 ), 1, AF( WORK_U+1 ), 1 ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * AF( ODD_SIZE+1 ) = -CONE $ * CONJG( DL( PART_OFFSET+ODD_SIZE+1 ) $ * AF( WORK_U+ODD_SIZE ) ) * * AF(WORK_U+(ODD_SIZE)+1 ) = -CONE $ * DU( PART_OFFSET+ODD_SIZE ) $ * CONJG( AF( ODD_SIZE ) ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = $ CMPLX( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+AF( ODD_SIZE+3 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ) .EQ. CZERO ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( WORK_U+ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * AF( ODD_SIZE+3 ) = AF( WORK_U+ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify lower off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) $ / CONJG( AF( ODD_SIZE+2 ) ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*CONJG( AF( ODD_SIZE+1 ) )* $ AF( WORK_U+(ODD_SIZE)+1 ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+3 ) $ / ( AF( ODD_SIZE+2 ) ) * * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ *CONJG( AF( WORK_U+ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( WORK_U+ODD_SIZE+3 ) $ * AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ * AF( WORK_U+ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PCDTTRF * END scalapack-1.8.0/SRC/pzlattrs.f0000640000175000017500000013032210363532303015776 0ustar muammarmuammar SUBROUTINE PZLATTRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, JA, $ DESCA, X, IX, JX, DESCX, SCALE, CNORM, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, INFO, IX, JA, JX, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) DOUBLE PRECISION CNORM( * ) COMPLEX*16 A( * ), X( * ) * .. * * Purpose * ======= * * PZLATTRS solves one of the triangular systems * * A * x = s*b, A**T * x = s*b, or A**H * x = s*b, * * with scaling to prevent overflow. Here A is an upper or lower * triangular matrix, A**T denotes the transpose of A, A**H denotes the * conjugate transpose of A, x and b are n-element vectors, and s is a * scaling factor, usually less than or equal to 1, chosen so that the * components of x will be less than the overflow threshold. If the * unscaled problem will not cause overflow, the Level 2 PBLAS routine * PZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j) * then s is set to 0 and a non-trivial solution to A*x = 0 is returned. * * This is very slow relative to PZTRSV. This should only be used * when scaling is necessary to control overflow, or when it is modified * to scale better. * Notes * * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the matrix A is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * TRANS (global input) CHARACTER*1 * Specifies the operation applied to A. * = 'N': Solve A * x = s*b (No transpose) * = 'T': Solve A**T * x = s*b (Transpose) * = 'C': Solve A**H * x = s*b (Conjugate transpose) * * DIAG (global input) CHARACTER*1 * Specifies whether or not the matrix A is unit triangular. * = 'N': Non-unit triangular * = 'U': Unit triangular * * NORMIN (global input) CHARACTER*1 * Specifies whether CNORM has been set or not. * = 'Y': CNORM contains the column norms on entry * = 'N': CNORM is not set on entry. On exit, the norms will * be computed and stored in CNORM. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * A (local input) COMPLEX*16 array, dimension (DESCA(LLD_),*) * The triangular matrix A. If UPLO = 'U', the leading n by n * upper triangular part of the array A contains the upper * triangular matrix, and the strictly lower triangular part of * A is not referenced. If UPLO = 'L', the leading n by n lower * triangular part of the array A contains the lower triangular * matrix, and the strictly upper triangular part of A is not * referenced. If DIAG = 'U', the diagonal elements of A are * also not referenced and are assumed to be 1. * * IA (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input/output) COMPLEX*16 array, * dimension (DESCX(LLD_),*) * On entry, the right hand side b of the triangular system. * On exit, X is overwritten by the solution vector x. * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * SCALE (global output) DOUBLE PRECISION * The scaling factor s for the triangular system * A * x = s*b, A**T * x = s*b, or A**H * x = s*b. * If SCALE = 0, the matrix A is singular or badly scaled, and * the vector x is an exact or approximate solution to A*x = 0. * * CNORM (global input or global output) DOUBLE PRECISION array, * dimension (N) * If NORMIN = 'Y', CNORM is an input argument and CNORM(j) * contains the norm of the off-diagonal part of the j-th column * of A. If TRANS = 'N', CNORM(j) must be greater than or equal * to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) * must be greater than or equal to the 1-norm. * * If NORMIN = 'N', CNORM is an output argument and CNORM(j) * returns the 1-norm of the offdiagonal part of the j-th column * of A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -k, the k-th argument had an illegal value * * Further Details * ======= ======= * * A rough bound on x is computed; if that is less than overflow, PZTRSV * is called, otherwise, specific code is used which checks for possible * overflow or divide-by-zero at every operation. * * A columnwise scheme is used for solving A*x = b. The basic algorithm * if A is lower triangular is * * x[1:n] := b[1:n] * for j = 1, ..., n * x(j) := x(j) / A(j,j) * x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] * end * * Define bounds on the components of x after j iterations of the loop: * M(j) = bound on x[1:j] * G(j) = bound on x[j+1:n] * Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. * * Then for iteration j+1 we have * M(j+1) <= G(j) / | A(j+1,j+1) | * G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | * <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) * * where CNORM(j+1) is greater than or equal to the infinity-norm of * column j+1 of A, not counting the diagonal. Hence * * G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) * 1<=i<=j * and * * |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) * 1<=i< j * * Since |x(j)| <= M(j), we use the Level 2 PBLAS routine PZTRSV if the * reciprocal of the largest M(j), j=1,..,n, is larger than * max(underflow, 1/overflow). * * The bound on x(j) is also used to determine when a step in the * columnwise method can be performed without fear of overflow. If * the computed bound is greater than a large constant, x is scaled to * prevent overflow, but if the bound overflows, x is set to 0, x(j) to * 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. * * Similarly, a row-wise scheme is used to solve A**T *x = b or * A**H *x = b. The basic algorithm for A upper triangular is * * for j = 1, ..., n * x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) * end * * We simultaneously compute two bounds * G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j * M(j) = bound on x(i), 1<=i<=j * * The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we * add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. * Then the bound on x(j) is * * M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | * * <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) * 1<=i<=j * * and we can safely call PZTRSV if 1/M(n) and 1/G(n) are both greater * than max(underflow, 1/overflow). * * Last modified by: Mark R. Fahey, August 2000 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE, TWO PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, $ TWO = 2.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER CONTXT, CSRC, I, ICOL, ICOLX, IMAX, IROW, $ IROWX, ITMP1, ITMP1X, ITMP2, ITMP2X, J, JFIRST, $ JINC, JLAST, LDA, LDX, MB, MYCOL, MYROW, NB, $ NPCOL, NPROW, RSRC DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, $ XBND, XJ, XMAX COMPLEX*16 CSUMJ, TJJS, USCAL, XJTMP, ZDUM * .. * .. External Functions .. LOGICAL LSAME INTEGER IDAMAX DOUBLE PRECISION PDLAMCH COMPLEX*16 ZLADIV EXTERNAL LSAME, IDAMAX, PDLAMCH, ZLADIV * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGSUM2D, DSCAL, INFOG2L, $ PDLABAD, PDZASUM, PXERBLA, PZAMAX, PZAXPY, $ PZDOTC, PZDOTU, PZDSCAL, PZLASET, PZSCAL, $ PZTRSV, ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN * .. * .. Statement Functions .. DOUBLE PRECISION CABS1, CABS2 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + $ ABS( DIMAG( ZDUM ) / 2.D0 ) * .. * .. Executable Statements .. * INFO = 0 UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) * CONTXT = DESCA( CTXT_ ) RSRC = DESCA( RSRC_ ) CSRC = DESCA( CSRC_ ) MB = DESCA( MB_ ) NB = DESCA( NB_ ) LDA = DESCA( LLD_ ) LDX = DESCX( LLD_ ) * * Test the input parameters. * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. $ LSAME( NORMIN, 'N' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 END IF * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( CONTXT, 'PZLATTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Determine machine dependent parameters to control overflow. * SMLNUM = PDLAMCH( CONTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( CONTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PDLAMCH( CONTXT, 'Precision' ) BIGNUM = ONE / SMLNUM SCALE = ONE * * IF( LSAME( NORMIN, 'N' ) ) THEN * * Compute the 1-norm of each column, not including the diagonal. * IF( UPPER ) THEN * * A is upper triangular. * CNORM( 1 ) = ZERO DO 10 J = 2, N CALL PDZASUM( J-1, CNORM( J ), A, IA, JA+J-1, DESCA, 1 ) 10 CONTINUE ELSE * * A is lower triangular. * DO 20 J = 1, N - 1 CALL PDZASUM( N-J, CNORM( J ), A, IA+J, JA+J-1, DESCA, $ 1 ) 20 CONTINUE CNORM( N ) = ZERO END IF CALL DGSUM2D( CONTXT, 'Row', ' ', N, 1, CNORM, 1, -1, -1 ) END IF * * Scale the column norms by TSCAL if the maximum element in CNORM is * greater than BIGNUM/2. * IMAX = IDAMAX( N, CNORM, 1 ) TMAX = CNORM( IMAX ) IF( TMAX.LE.BIGNUM*HALF ) THEN TSCAL = ONE ELSE TSCAL = HALF / ( SMLNUM*TMAX ) CALL DSCAL( N, TSCAL, CNORM, 1 ) END IF * * Compute a bound on the computed solution vector to see if the * Level 2 PBLAS routine PZTRSV can be used. * XMAX = ZERO CALL PZAMAX( N, ZDUM, IMAX, X, IX, JX, DESCX, 1 ) XMAX = CABS2( ZDUM ) CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, -1, -1 ) XBND = XMAX * IF( NOTRAN ) THEN * * Compute the growth in A * x = b. * IF( UPPER ) THEN JFIRST = N JLAST = 1 JINC = -1 ELSE JFIRST = 1 JLAST = N JINC = 1 END IF * IF( TSCAL.NE.ONE ) THEN GROW = ZERO GO TO 50 END IF * IF( NOUNIT ) THEN * * A is non-unit triangular. * * Compute GROW = 1/G(j) and XBND = 1/M(j). * Initially, G(0) = max{x(i), i=1,...,n}. * GROW = HALF / MAX( XBND, SMLNUM ) XBND = GROW DO 30 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 50 * * TJJS = A( J, J ) CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW ) CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF TJJ = CABS1( TJJS ) * IF( TJJ.GE.SMLNUM ) THEN * * M(j) = G(j-1) / abs(A(j,j)) * XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) ELSE * * M(j) could overflow, set XBND to 0. * XBND = ZERO END IF * IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN * * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) * GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) ELSE * * G(j) could overflow, set GROW to 0. * GROW = ZERO END IF 30 CONTINUE GROW = XBND ELSE * * A is unit triangular. * * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. * GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) DO 40 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 50 * * G(j) = G(j-1)*( 1 + CNORM(j) ) * GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) 40 CONTINUE END IF 50 CONTINUE * ELSE * * Compute the growth in A**T * x = b or A**H * x = b. * IF( UPPER ) THEN JFIRST = 1 JLAST = N JINC = 1 ELSE JFIRST = N JLAST = 1 JINC = -1 END IF * IF( TSCAL.NE.ONE ) THEN GROW = ZERO GO TO 80 END IF * IF( NOUNIT ) THEN * * A is non-unit triangular. * * Compute GROW = 1/G(j) and XBND = 1/M(j). * Initially, M(0) = max{x(i), i=1,...,n}. * GROW = HALF / MAX( XBND, SMLNUM ) XBND = GROW DO 60 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 80 * * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) * XJ = ONE + CNORM( J ) GROW = MIN( GROW, XBND / XJ ) * * TJJS = A( J, J ) CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW ) CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF TJJ = CABS1( TJJS ) * IF( TJJ.GE.SMLNUM ) THEN * * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) * IF( XJ.GT.TJJ ) $ XBND = XBND*( TJJ / XJ ) ELSE * * M(j) could overflow, set XBND to 0. * XBND = ZERO END IF 60 CONTINUE GROW = MIN( GROW, XBND ) ELSE * * A is unit triangular. * * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. * GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) DO 70 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 80 * * G(j) = ( 1 + CNORM(j) )*G(j-1) * XJ = ONE + CNORM( J ) GROW = GROW / XJ 70 CONTINUE END IF 80 CONTINUE END IF * IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN * * Use the Level 2 PBLAS solve if the reciprocal of the bound on * elements of X is not too small. * CALL PZTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) ELSE * * Use a Level 1 PBLAS solve, scaling intermediate results. * IF( XMAX.GT.BIGNUM*HALF ) THEN * * Scale X so that its components are less than or equal to * BIGNUM in absolute value. * SCALE = ( BIGNUM*HALF ) / XMAX CALL PZDSCAL( N, SCALE, X, IX, JX, DESCX, 1 ) XMAX = BIGNUM ELSE XMAX = XMAX*TWO END IF * IF( NOTRAN ) THEN * * Solve A * x = b * DO 100 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) / A(j,j), scaling x if necessary. * * XJ = CABS1( X( J ) ) CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 90 END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by 1/b(j). * REC = ONE / XJ CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = ZLADIV( X( J ), TJJS ) * XJ = CABS1( X( J ) ) XJTMP = ZLADIV( XJTMP, TJJS ) XJ = CABS1( XJTMP ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM * to avoid overflow when dividing by A(j,j). * REC = ( TJJ*BIGNUM ) / XJ IF( CNORM( J ).GT.ONE ) THEN * * Scale by 1/CNORM(j) to avoid overflow when * multiplying x(j) times column j. * REC = REC / CNORM( J ) END IF CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = ZLADIV( X( J ), TJJS ) * XJ = CABS1( X( J ) ) XJTMP = ZLADIV( XJTMP, TJJS ) XJ = CABS1( XJTMP ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0, and compute a solution to A*x = 0. * CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = CONE END IF XJTMP = CONE XJ = ONE SCALE = ZERO XMAX = ZERO END IF 90 CONTINUE * * Scale x if necessary to avoid overflow when adding a * multiple of column j of A. * IF( XJ.GT.ONE ) THEN REC = ONE / XJ IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN * * Scale x by 1/(2*abs(x(j))). * REC = REC*HALF CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC END IF ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN * * Scale x by 1/2. * CALL PZDSCAL( N, HALF, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*HALF SCALE = SCALE*HALF END IF * IF( UPPER ) THEN IF( J.GT.1 ) THEN * * Compute the update * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) * ZDUM = -XJTMP*TSCAL CALL PZAXPY( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1, X, $ IX, JX, DESCX, 1 ) CALL PZAMAX( J-1, ZDUM, IMAX, X, IX, JX, DESCX, 1 ) XMAX = CABS1( ZDUM ) CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, $ -1, -1 ) END IF ELSE IF( J.LT.N ) THEN * * Compute the update * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) * ZDUM = -XJTMP*TSCAL CALL PZAXPY( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) CALL PZAMAX( N-J, ZDUM, I, X, IX+J, JX, DESCX, 1 ) XMAX = CABS1( ZDUM ) CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, $ -1, -1 ) END IF END IF 100 CONTINUE * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * * Solve A**T * x = b * DO 120 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) - sum A(k,j)*x(k). * k<>j * * XJ = CABS1( X( J ) ) CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) USCAL = DCMPLX( TSCAL ) REC = ONE / MAX( XMAX, ONE ) IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN * * If x(j) could overflow, scale x by 1/(2*XMAX). * REC = REC*HALF IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.ONE ) THEN * * Divide by A(j,j) when scaling x if A(j,j) > 1. * REC = MIN( ONE, REC*TJJ ) USCAL = ZLADIV( USCAL, TJJS ) END IF IF( REC.LT.ONE ) THEN CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * CSUMJ = CZERO IF( USCAL.EQ.CONE ) THEN * * If the scaling needed for A in the dot product is 1, * call PZDOTU to perform the dot product. * IF( UPPER ) THEN CALL PZDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ELSE IF( J.LT.N ) THEN CALL PZDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF ELSE * * Otherwise, scale column of A by USCAL before dot * product. Below is not the best way to do it. * IF( UPPER ) THEN * DO 130 I = 1, J - 1 * CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) * 130 CONTINUE ZDUM = DCONJG( USCAL ) CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) CALL PZDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ZDUM = ZLADIV( ZDUM, USCAL ) CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) ELSE IF( J.LT.N ) THEN * DO 140 I = J + 1, N * CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) * 140 CONTINUE ZDUM = DCONJG( USCAL ) CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) CALL PZDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) ZDUM = ZLADIV( ZDUM, USCAL ) CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF END IF * IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN * * Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) * was not used to scale the dotproduct. * * X( J ) = X( J ) - CSUMJ * XJ = CABS1( X( J ) ) XJTMP = XJTMP - CSUMJ XJ = CABS1( XJTMP ) * IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) * $ X( IROWX ) = XJTMP IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 110 END IF * * Compute x(j) = x(j) / A(j,j), scaling if necessary. * TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale X by 1/abs(x(j)). * REC = ONE / XJ CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = ZLADIV( X( J ), TJJS ) XJTMP = ZLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. * REC = ( TJJ*BIGNUM ) / XJ CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = ZLADIV( X( J ), TJJS ) XJTMP = ZLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0 and compute a solution to A**T *x = 0. * CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = CONE END IF XJTMP = CONE SCALE = ZERO XMAX = ZERO END IF 110 CONTINUE ELSE * * Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot * product has already been divided by 1/A(j,j). * * X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ XJTMP = ZLADIV( XJTMP, TJJS ) - CSUMJ IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF END IF XMAX = MAX( XMAX, CABS1( XJTMP ) ) 120 CONTINUE * ELSE * * Solve A**H * x = b * DO 140 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) - sum A(k,j)*x(k). * k<>j * CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) USCAL = TSCAL REC = ONE / MAX( XMAX, ONE ) IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN * * If x(j) could overflow, scale x by 1/(2*XMAX). * REC = REC*HALF IF( NOUNIT ) THEN * TJJS = DCONJG( A( J, J ) )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = DCONJG( A( ( ICOL-1 )*LDA+IROW ) )*TSCAL CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.ONE ) THEN * * Divide by A(j,j) when scaling x if A(j,j) > 1. * REC = MIN( ONE, REC*TJJ ) USCAL = ZLADIV( USCAL, TJJS ) END IF IF( REC.LT.ONE ) THEN CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * CSUMJ = CZERO IF( USCAL.EQ.CONE ) THEN * * If the scaling needed for A in the dot product is 1, * call PZDOTC to perform the dot product. * IF( UPPER ) THEN CALL PZDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ELSE IF( J.LT.N ) THEN CALL PZDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF ELSE * * Otherwise, scale column of A by USCAL before dot * product. Below is not the best way to do it. * IF( UPPER ) THEN * DO 180 I = 1, J - 1 * CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* * $ X( I ) * 180 CONTINUE ZDUM = DCONJG( USCAL ) CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) CALL PZDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ZDUM = ZLADIV( CONE, ZDUM ) CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) ELSE IF( J.LT.N ) THEN * DO 190 I = J + 1, N * CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* * $ X( I ) * 190 CONTINUE ZDUM = DCONJG( USCAL ) CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) CALL PZDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) ZDUM = ZLADIV( CONE, ZDUM ) CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF END IF * IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN * * Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) * was not used to scale the dotproduct. * * X( J ) = X( J ) - CSUMJ * XJ = CABS1( X( J ) ) XJTMP = XJTMP - CSUMJ XJ = CABS1( XJTMP ) * IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) * $ X( IROWX ) = XJTMP IF( NOUNIT ) THEN * TJJS = DCONJG( A( J, J ) )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = DCONJG( A( ( ICOL-1 )*LDA+IROW ) )*TSCAL CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 130 END IF * * Compute x(j) = x(j) / A(j,j), scaling if necessary. * TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale X by 1/abs(x(j)). * REC = ONE / XJ CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = ZLADIV( X( J ), TJJS ) XJTMP = ZLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. * REC = ( TJJ*BIGNUM ) / XJ CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = ZLADIV( X( J ), TJJS ) XJTMP = ZLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0 and compute a solution to A**H *x = 0. * CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = CONE XJTMP = CONE SCALE = ZERO XMAX = ZERO END IF 130 CONTINUE ELSE * * Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot * product has already been divided by 1/A(j,j). * * X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ XJTMP = ZLADIV( XJTMP, TJJS ) - CSUMJ IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP END IF XMAX = MAX( XMAX, CABS1( XJTMP ) ) 140 CONTINUE END IF SCALE = SCALE / TSCAL END IF * * Scale the column norms by 1/TSCAL for return. * IF( TSCAL.NE.ONE ) THEN CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) END IF * RETURN * * End of PZLATTRS * END scalapack-1.8.0/SRC/ssteqr2.f0000640000175000017500000003262410363532303015524 0ustar muammarmuammar SUBROUTINE SSTEQR2( COMPZ, N, D, E, Z, LDZ, NR, WORK, INFO ) * * -- LAPACK routine (version 2.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, LDZ, N, NR * .. * .. Array Arguments .. REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * SSTEQR2 is a modified version of LAPACK routine SSTEQR. * SSTEQR2 computes all eigenvalues and, optionally, eigenvectors of a * symmetric tridiagonal matrix using the implicit QL or QR method. * running SSTEQR2 to perform updates on a distributed matrix Q. * Proper usage of SSTEQR2 can be gleaned from examination of ScaLAPACK's * PSSYEV. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. * = 'I': Compute eigenvalues and eigenvectors of the * tridiagonal matrix. Z must be initialized to the * identity matrix by PDLASET or DLASET prior to entering * this subroutine. * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input/output) REAL array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in ascending order. * * E (input/output) REAL array, dimension (N-1) * On entry, the (n-1) subdiagonal elements of the tridiagonal * matrix. * On exit, E has been destroyed. * * Z (local input/local output) REAL array, global * dimension (N, N), local dimension (LDZ, NR). * On entry, if COMPZ = 'V', then Z contains the orthogonal * matrix used in the reduction to tridiagonal form. * On exit, if INFO = 0, then if COMPZ = 'V', Z contains the * orthonormal eigenvectors of the original symmetric matrix, * and if COMPZ = 'I', Z contains the orthonormal eigenvectors * of the symmetric tridiagonal matrix. * If COMPZ = 'N', then Z is not referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= 1, and if * eigenvectors are desired, then LDZ >= max(1,N). * * NR (input) INTEGER * NR = MAX(1, NUMROC( N, NB, MYPROW, 0, NPROCS ) ). * If COMPZ = 'N', then NR is not referenced. * * WORK (workspace) REAL array, dimension (max(1,2*N-2)) * If COMPZ = 'N', then WORK is not referenced. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: the algorithm has failed to find all the eigenvalues in * a total of 30*N iterations; if INFO = i, then i * elements of E have not converged to zero; on exit, D * and E contain the elements of a symmetric tridiagonal * matrix which is orthogonally similar to the original * matrix. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, TWO, THREE PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, $ THREE = 3.0E0 ) INTEGER MAXIT PARAMETER ( MAXIT = 30 ) * .. * .. Local Scalars .. INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, $ NM1, NMAXIT REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST * .. * .. External Functions .. LOGICAL LSAME REAL SLAMCH, SLANST, SLAPY2 EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 * .. * .. External Subroutines .. EXTERNAL SLAE2, SLAEV2, SLARTG, SLASCL, SLASR, $ SLASRT, SSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 * IF( LSAME( COMPZ, 'N' ) ) THEN ICOMPZ = 0 ELSE IF( LSAME( COMPZ, 'I' ) ) THEN ICOMPZ = 1 ELSE ICOMPZ = -1 END IF IF( ICOMPZ.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, NR ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SSTEQR2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( N.EQ.1 ) THEN IF( ICOMPZ.EQ.1 ) $ Z( 1, 1 ) = ONE RETURN END IF * * Determine the unit roundoff and over/underflow thresholds. * EPS = SLAMCH( 'E' ) EPS2 = EPS**2 SAFMIN = SLAMCH( 'S' ) SAFMAX = ONE / SAFMIN SSFMAX = SQRT( SAFMAX ) / THREE SSFMIN = SQRT( SAFMIN ) / EPS2 * * Compute the eigenvalues and eigenvectors of the tridiagonal * matrix. * NMAXIT = N*MAXIT JTOT = 0 * * Determine where the matrix splits and choose QL or QR iteration * for each block, according to whether top or bottom diagonal * element is smaller. * L1 = 1 NM1 = N - 1 * 10 CONTINUE IF( L1.GT.N ) $ GO TO 160 IF( L1.GT.1 ) $ E( L1-1 ) = ZERO IF( L1.LE.NM1 ) THEN DO 20 M = L1, NM1 TST = ABS( E( M ) ) IF( TST.EQ.ZERO ) $ GO TO 30 IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ $ 1 ) ) ) )*EPS ) THEN E( M ) = ZERO GO TO 30 END IF 20 CONTINUE END IF M = N * 30 CONTINUE L = L1 LSV = L LEND = M LENDSV = LEND L1 = M + 1 IF( LEND.EQ.L ) $ GO TO 10 * * Scale submatrix in rows and columns L to LEND * ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) ISCALE = 0 IF( ANORM.EQ.ZERO ) $ GO TO 10 IF( ANORM.GT.SSFMAX ) THEN ISCALE = 1 CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, $ INFO ) CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, $ INFO ) ELSE IF( ANORM.LT.SSFMIN ) THEN ISCALE = 2 CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, $ INFO ) CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, $ INFO ) END IF * * Choose between QL and QR iteration * IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN LEND = LSV L = LENDSV END IF * IF( LEND.GT.L ) THEN * * QL Iteration * * Look for small subdiagonal element. * 40 CONTINUE IF( L.NE.LEND ) THEN LENDM1 = LEND - 1 DO 50 M = L, LENDM1 TST = ABS( E( M ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ $ SAFMIN )GO TO 60 50 CONTINUE END IF * M = LEND * 60 CONTINUE IF( M.LT.LEND ) $ E( M ) = ZERO P = D( L ) IF( M.EQ.L ) $ GO TO 80 * * If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L+1 ) THEN IF( ICOMPZ.GT.0 ) THEN CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) WORK( L ) = C WORK( N-1+L ) = S CALL SLASR( 'R', 'V', 'B', NR, 2, WORK( L ), $ WORK( N-1+L ), Z( 1, L ), LDZ ) ELSE CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) END IF D( L ) = RT1 D( L+1 ) = RT2 E( L ) = ZERO L = L + 2 IF( L.LE.LEND ) $ GO TO 40 GO TO 140 END IF * IF( JTOT.EQ.NMAXIT ) $ GO TO 140 JTOT = JTOT + 1 * * Form shift. * G = ( D( L+1 )-P ) / ( TWO*E( L ) ) R = SLAPY2( G, ONE ) G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) * S = ONE C = ONE P = ZERO * * Inner loop * MM1 = M - 1 DO 70 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL SLARTG( G, F, C, S, R ) IF( I.NE.M-1 ) $ E( I+1 ) = R G = D( I+1 ) - P R = ( D( I )-G )*S + TWO*C*B P = S*R D( I+1 ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * IF( ICOMPZ.GT.0 ) THEN WORK( I ) = C WORK( N-1+I ) = -S END IF * 70 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * IF( ICOMPZ.GT.0 ) THEN MM = M - L + 1 CALL SLASR( 'R', 'V', 'B', NR, MM, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) END IF * D( L ) = D( L ) - P E( L ) = G GO TO 40 * * Eigenvalue found. * 80 CONTINUE D( L ) = P * L = L + 1 IF( L.LE.LEND ) $ GO TO 40 GO TO 140 * ELSE * * QR Iteration * * Look for small superdiagonal element. * 90 CONTINUE IF( L.NE.LEND ) THEN LENDP1 = LEND + 1 DO 100 M = L, LENDP1, -1 TST = ABS( E( M-1 ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ $ SAFMIN )GO TO 110 100 CONTINUE END IF * M = LEND * 110 CONTINUE IF( M.GT.LEND ) $ E( M-1 ) = ZERO P = D( L ) IF( M.EQ.L ) $ GO TO 130 * * If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L-1 ) THEN IF( ICOMPZ.GT.0 ) THEN CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) WORK( M ) = C WORK( N-1+M ) = S CALL SLASR( 'R', 'V', 'F', NR, 2, WORK( M ), $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) ELSE CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) END IF D( L-1 ) = RT1 D( L ) = RT2 E( L-1 ) = ZERO L = L - 2 IF( L.GE.LEND ) $ GO TO 90 GO TO 140 END IF * IF( JTOT.EQ.NMAXIT ) $ GO TO 140 JTOT = JTOT + 1 * * Form shift. * G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) R = SLAPY2( G, ONE ) G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) * S = ONE C = ONE P = ZERO * * Inner loop * LM1 = L - 1 DO 120 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL SLARTG( G, F, C, S, R ) IF( I.NE.M ) $ E( I-1 ) = R G = D( I ) - P R = ( D( I+1 )-G )*S + TWO*C*B P = S*R D( I ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * IF( ICOMPZ.GT.0 ) THEN WORK( I ) = C WORK( N-1+I ) = S END IF * 120 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * IF( ICOMPZ.GT.0 ) THEN MM = L - M + 1 CALL SLASR( 'R', 'V', 'F', NR, MM, WORK( M ), WORK( N-1+M ), $ Z( 1, M ), LDZ ) END IF * D( L ) = D( L ) - P E( LM1 ) = G GO TO 90 * * Eigenvalue found. * 130 CONTINUE D( L ) = P * L = L - 1 IF( L.GE.LEND ) $ GO TO 90 GO TO 140 * END IF * * Undo scaling if necessary * 140 CONTINUE IF( ISCALE.EQ.1 ) THEN CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ELSE IF( ISCALE.EQ.2 ) THEN CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) END IF * * Check for no convergence to an eigenvalue after a total * of N*MAXIT iterations. * IF( JTOT.LT.NMAXIT ) $ GO TO 10 DO 150 I = 1, N - 1 IF( E( I ).NE.ZERO ) $ INFO = INFO + 1 150 CONTINUE GO TO 190 * * Order eigenvalues and eigenvectors. * 160 CONTINUE IF( ICOMPZ.EQ.0 ) THEN * * Use Quick Sort * CALL SLASRT( 'I', N, D, INFO ) * ELSE * * Use Selection Sort to minimize swaps of eigenvectors * DO 180 II = 2, N I = II - 1 K = I P = D( I ) DO 170 J = II, N IF( D( J ).LT.P ) THEN K = J P = D( J ) END IF 170 CONTINUE IF( K.NE.I ) THEN D( K ) = D( I ) D( I ) = P CALL SSWAP( NR, Z( 1, I ), 1, Z( 1, K ), 1 ) END IF 180 CONTINUE END IF * 190 CONTINUE RETURN * * End of SSTEQR2 * END scalapack-1.8.0/SRC/pzlacp3.f0000640000175000017500000003024110363532303015466 0ustar muammarmuammar SUBROUTINE PZLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, II, JJ, LDB, M, REV * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), B( LDB, * ) * .. * * Purpose * ======= * * PZLACP3 is an auxiliary routine that copies from a global parallel * array into a local replicated array or vise versa. Notice that * the entire submatrix that is copied gets placed on one node or * more. The receiving node can be specified precisely, or all nodes * can receive, or just one row or column of nodes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * M is the order of the square submatrix that is copied. * M >= 0. * Unchanged on exit * * I (global input) INTEGER * A(I,I) is the global location that the copying starts from. * Unchanged on exit. * * A (global input/output) COMPLEX*16 array, dimension * (DESCA(LLD_),*) * On entry, the parallel matrix to be copied into or from. * On exit, if REV=1, the copied data. * Unchanged on exit if REV=0. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/output) COMPLEX*16 array of size (LDB,M) * If REV=0, this is the global portion of the array * A(I:I+M-1,I:I+M-1). * If REV=1, this is the unchanged on exit. * * LDB (local input) INTEGER * The leading dimension of B. * * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) * receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data * If II>=0,JJ=-1, then all cols in row II receive the data * If II=-1,JJ=-1, then all nodes receive the data * If REV<>0, then II is the source row index for the node(s) * sending the replicated B. * * JJ (global input) INTEGER * Similar description as II above * * REV (global input) INTEGER * Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in * A) into the global A. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN, $ III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP, $ JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG1L, ZGEBR2D, ZGEBS2D, $ ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.LE.0 ) $ RETURN * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M DO 10 IDJ = 1, M B( IDI, IDJ ) = ZERO 10 CONTINUE 20 CONTINUE END IF * IFIN = I + M - 1 * IF( MOD( I+HBL, HBL ).NE.0 ) THEN ISTOP = MIN( I+HBL-MOD( I+HBL, HBL ), IFIN ) ELSE ISTOP = I END IF IDJ = I ISTOPJ = ISTOP IF( IDJ.LE.IFIN ) THEN 30 CONTINUE IDI = I ISTOPI = ISTOP IF( IDI.LE.IFIN ) THEN 40 CONTINUE ROW = MOD( ( IDI-1 ) / HBL, NPROW ) COL = MOD( ( IDJ-1 ) / HBL, NPCOL ) CALL INFOG1L( IDI, HBL, NPROW, ROW, 0, IROW1, ITMP ) IROW2 = NUMROC( ISTOPI, HBL, ROW, 0, NPROW ) CALL INFOG1L( IDJ, HBL, NPCOL, COL, 0, ICOL1, ITMP ) ICOL2 = NUMROC( ISTOPJ, HBL, COL, 0, NPCOL ) IF( ( MYROW.EQ.ROW ) .AND. ( MYCOL.EQ.COL ) ) THEN IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to everyone * IF( REV.EQ.0 ) THEN CALL ZGEBS2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.NE.-1 ) ) THEN * * Send the message to Column MYCOL which better be JJ * IF( REV.EQ.0 ) THEN CALL ZGEBS2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to Row MYROW which better be II * IF( REV.EQ.0 ) THEN CALL ZGEBS2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.NE.-1 ) .AND. $ ( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) ) THEN * * Recv/Send the message to (II,JJ) * IF( REV.EQ.0 ) THEN CALL ZGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ A( ( ICOL1-1 )*LDA+IROW1 ), LDA, II, $ JJ ) ELSE CALL ZGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, II, JJ ) END IF END IF IF( REV.EQ.0 ) THEN DO 60 JJJ = ICOL1, ICOL2 DO 50 III = IROW1, IROW2 B( IDI+III-IROW1+1-I, IDJ+JJJ-ICOL1+1-I ) $ = A( ( JJJ-1 )*LDA+III ) 50 CONTINUE 60 CONTINUE ELSE DO 80 JJJ = ICOL1, ICOL2 DO 70 III = IROW1, IROW2 A( ( JJJ-1 )*LDA+III ) = B( IDI+III-IROW1+1-I, $ IDJ+JJJ-ICOL1+1-I ) 70 CONTINUE 80 CONTINUE END IF ELSE IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL ZGEBR2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL ZGEBR2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL ZGEBR2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL ZGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) ELSE CALL ZGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) * CALL ZGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, * $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL) END IF END IF END IF IDI = ISTOPI + 1 ISTOPI = MIN( ISTOPI+HBL, IFIN ) IF( IDI.LE.IFIN ) $ GO TO 40 END IF IDJ = ISTOPJ + 1 ISTOPJ = MIN( ISTOPJ+HBL, IFIN ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF RETURN * * End of PZLACP3 * END scalapack-1.8.0/SRC/pzgehd2.f0000640000175000017500000002700710363532303015463 0ustar muammarmuammar SUBROUTINE PZGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEHD2 reduces a complex general distributed matrix sub( A ) * to upper Hessenberg form H by an unitary similarity transformation: * Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+JLO-2 * and JA+JHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16 array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB + MAX( NpA0, NB ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on * exit in A(ia+ilo+i:ia+ihi-1,ja+ilo+i-2), and tau in TAU(ja+ilo+i-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follo- * wing example, with n = 7, ilo = 2 and ihi = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), h denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(ja+ilo+i-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IAROW, ICOFFA, ICTXT, IROFFA, J, K, LWMIN, $ MYCOL, MYROW, NPA0, NPCOL, NPROW COMPLEX*16 AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PXERBLA, $ PZELSET, PZLARF, PZLARFC, PZLARFG * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NPA0 = NUMROC( IHI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = DESCA( NB_ ) + MAX( NPA0, DESCA( NB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * DO 10 K = ILO, IHI-1 I = IA + K - 1 J = JA + K - 1 * * Compute elementary reflector H(j) to annihilate * A(i+2:ihi+ia-1,j) * CALL PZLARFG( IHI-K, AII, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PZELSET( A, I+1, J, DESCA, ONE ) * * Apply H(k) to A(ia:ihi+ia-1,j+1:ihi+ja-1) from the right * CALL PZLARF( 'Right', IHI, IHI-K, A, I+1, J, DESCA, 1, TAU, A, $ IA, J+1, DESCA, WORK ) * * Apply H(j) to A(i+1:ia+ihi-1,j+1:ja+n-1) from the left * CALL PZLARFC( 'Left', IHI-K, N-K, A, I+1, J, DESCA, 1, TAU, A, $ I+1, J+1, DESCA, WORK ) * CALL PZELSET( A, I+1, J, DESCA, AII ) 10 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEHD2 * END scalapack-1.8.0/SRC/psorg2r.f0000640000175000017500000002563410363532303015522 0ustar muammarmuammar SUBROUTINE PSORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORG2R generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PSGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PSGEQRF in the K columns of its array * argument A(IA:*,JA:JA+K-1). On exit, this array contains * the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, KQ, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 REAL TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLASET, PSSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja+k:ja+n-1 to columns of the unit matrix * CALL PSLASET( 'All', K, N-K, ZERO, ZERO, A, IA, JA+K, DESCA ) CALL PSLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, DESCA ) * TAUJ = ZERO KQ = MAX( 1, NUMROC( JA+K-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) DO 10 J = JA+K-1, JA, -1 * * Apply H(j) to A(ia+j-ja:ia+m-1,j:ja+n-1) from the left * IF( J.LT.JA+N-1 ) THEN CALL PSELSET( A, IA+J-JA, J, DESCA, ONE ) CALL PSLARF( 'Left', M-J+JA, JA+N-J-1, A, IA+J-JA, J, DESCA, $ 1, TAU, A, IA+J-JA, J+1, DESCA, WORK ) END IF * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, KQ ) ) IF( J-JA.LT.M-1 ) $ CALL PSSCAL( M-J+JA-1, -TAUJ, A, IA+J-JA+1, J, DESCA, 1 ) CALL PSELSET( A, IA+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia:ia+j-ja-1,j) to zero * CALL PSLASET( 'All', J-JA, 1, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORG2R * END scalapack-1.8.0/SRC/pchentrd.f0000640000175000017500000005315710363532303015734 0ustar muammarmuammar SUBROUTINE PCHENTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ), RWORK( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * Bugs * ==== * * * Support for UPLO='U' is limited to calling the old, slow, PCHETRD * code. * * * Purpose * * ======= * * PCHENTRD is a prototype version of PCHETRD which uses tailored * codes (either the serial, CHETRD, or the parallel code, PCHETTRD) * when the workspace provided by the user is adequate. * * * PCHENTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Features * ======== * * PCHENTRD is faster than PCHETRD on almost all matrices, * particularly small ones (i.e. N < 500 * sqrt(P) ), provided that * enough workspace is available to use the tailored codes. * * The tailored codes provide performance that is essentially * independent of the input data layout. * * The tailored codes place no restrictions on IA, JA, MB or NB. * At present, IA, JA, MB and NB are restricted to those values allowed * by PCHETRD to keep the interface simple. These restrictions are * documented below. (Search for "restrictions".) * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * For optimal performance, greater workspace is needed, i.e. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS + 4 ) * NPS * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = INT( SQRT( REAL( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * RWORK (local workspace/local output) COMPLEX array, * dimension (LRWORK) * On exit, RWORK( 1 ) returns the optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 1 * * For optimal performance, greater workspace is needed, i.e. * LRWORK >= MAX( 2 * N ) * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER ANB, CTXTB, I, IACOL, IAROW, ICOFFA, ICTXT, $ IINFO, INDB, INDRD, INDRE, INDTAU, INDW, IPW, $ IROFFA, J, JB, JX, K, KK, LLRWORK, LLWORK, $ LRWMIN, LWMIN, MINSZ, MYCOL, MYCOLB, MYROW, $ MYROWB, NB, NP, NPCOL, NPCOLB, NPROW, NPROWB, $ NPS, NQ, ONEPMIN, ONEPRMIN, SQNPC, TTLRWMIN, $ TTLWMIN * .. * .. Local Arrays .. INTEGER DESCB( DLEN_ ), DESCW( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, CHETRD, CHK1MAT, DESCSET, $ IGAMN2D, PCELSET, PCHER2K, PCHETD2, PCHETTRD, $ PCHK1MAT, PCLAMR1D, PCLATRD, PCTRMR2D, $ PSLAMR1D, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC, PJLAENV EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, INT, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( ( NP+1 )*NB, 3*NB ) ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) MINSZ = PJLAENV( ICTXT, 5, 'PCHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( REAL( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) TTLWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+2 )*NPS LRWMIN = 1 TTLRWMIN = 2*NPS * WORK( 1 ) = CMPLX( REAL( TTLWMIN ) ) RWORK( 1 ) = REAL( TTLRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 * * The following two restrictions are not necessary provided * that either of the tailored codes are used. * ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 600+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 13 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHENTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ONEPMIN = N*N + 3*N + 1 LLWORK = LWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLWORK, 1, 1, -1, -1, -1, $ -1 ) * ONEPRMIN = 2*N LLRWORK = LRWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLRWORK, 1, 1, -1, -1, -1, $ -1 ) * * * Use the serial, LAPACK, code: CTRD on small matrices if we * we have enough space. * NPROWB = 0 IF( ( N.LT.MINSZ .OR. SQNPC.EQ.1 ) .AND. LLWORK.GE.ONEPMIN .AND. $ LLRWORK.GE.ONEPRMIN .AND. .NOT.UPPER ) THEN NPROWB = 1 NPS = N ELSE IF( LLWORK.GE.TTLWMIN .AND. LLRWORK.GE.TTLRWMIN .AND. .NOT. $ UPPER ) THEN NPROWB = SQNPC END IF END IF * IF( NPROWB.GE.1 ) THEN NPCOLB = NPROWB SQNPC = NPROWB INDB = 1 INDRD = 1 INDRE = INDRD + NPS INDTAU = INDB + NPS*NPS INDW = INDTAU + NPS LLWORK = LLWORK - INDW + 1 * CALL BLACS_GET( ICTXT, 10, CTXTB ) CALL BLACS_GRIDINIT( CTXTB, 'Row major', SQNPC, SQNPC ) CALL BLACS_GRIDINFO( CTXTB, NPROWB, NPCOLB, MYROWB, MYCOLB ) CALL DESCSET( DESCB, N, N, 1, 1, 0, 0, CTXTB, NPS ) * CALL PCTRMR2D( UPLO, 'N', N, N, A, IA, JA, DESCA, WORK( INDB ), $ 1, 1, DESCB, ICTXT ) * * * Only those processors in context CTXTB are needed for a while * IF( NPROWB.GT.0 ) THEN * IF( NPROWB.EQ.1 ) THEN CALL CHETRD( UPLO, N, WORK( INDB ), NPS, RWORK( INDRD ), $ RWORK( INDRE ), WORK( INDTAU ), $ WORK( INDW ), LLWORK, INFO ) ELSE * CALL PCHETTRD( 'L', N, WORK( INDB ), 1, 1, DESCB, $ RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDTAU ), WORK( INDW ), LLWORK, $ INFO ) * END IF END IF * * All processors participate in moving the data back to the * way that PCHENTRD expects it. * CALL PSLAMR1D( N-1, RWORK( INDRE ), 1, 1, DESCB, E, 1, JA, $ DESCA ) * CALL PSLAMR1D( N, RWORK( INDRD ), 1, 1, DESCB, D, 1, JA, $ DESCA ) * CALL PCLAMR1D( N, WORK( INDTAU ), 1, 1, DESCB, TAU, 1, JA, $ DESCA ) * CALL PCTRMR2D( UPLO, 'N', N, N, WORK( INDB ), 1, 1, DESCB, A, $ IA, JA, DESCA, ICTXT ) * IF( MYROWB.GE.0 ) $ CALL BLACS_GRIDEXIT( CTXTB ) * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP*NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, $ INDXG2P( JA+N-KK, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, NP ) ) * DO 10 K = N - KK + 1, NB + 1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PCLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, $ TAU, WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PCHER2K( UPLO, 'No transpose', K-1, JB, -CONE, A, $ IA, J, DESCA, WORK, 1, 1, DESCW, ONE, A, $ IA, JA, DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PCELSET( A, I-1, J, DESCA, CMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+NPCOL-1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PCHETD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N - NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PCLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PCHER2K( UPLO, 'No transpose', N-K-NB+1, NB, -CONE, $ A, I+NB, J, DESCA, WORK, K+NB, 1, DESCW, $ ONE, A, I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PCELSET( A, I+NB, J+NB-1, DESCA, CMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PCHETD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, TAU, $ WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * END IF * WORK( 1 ) = CMPLX( REAL( TTLWMIN ) ) RWORK( 1 ) = REAL( TTLRWMIN ) * RETURN * * End of PCHENTRD * END scalapack-1.8.0/SRC/pdpttrsv.f0000640000175000017500000011026410363532303016004 0ustar muammarmuammar SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION AF( * ), B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Cholesky factorization code PDPTTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PDPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPTTRF and this is stored in AF. If a linear system * is to be solved using PDPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LEVEL_DIST, LLDA, LLDB, $ MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DAXPY, $ DESC_CONVERT, DGEMM, DGERV2D, DGESD2D, DMATADD, $ DPTTRSV, DTRTRS, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 7*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 10*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 10*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 10*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 10*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 7*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 10*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 10*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -6 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 7*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PDPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PDPTTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB( 5 ) PARAM_CHECK( 14, 1 ) = DESCB( 4 ) PARAM_CHECK( 13, 1 ) = DESCB( 3 ) PARAM_CHECK( 12, 1 ) = DESCB( 2 ) PARAM_CHECK( 11, 1 ) = DESCB( 1 ) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 15, 2 ) = 1005 PARAM_CHECK( 14, 2 ) = 1004 PARAM_CHECK( 13, 2 ) = 1003 PARAM_CHECK( 12, 2 ) = 1002 PARAM_CHECK( 11, 2 ) = 1001 PARAM_CHECK( 10, 2 ) = 9 PARAM_CHECK( 9, 2 ) = 705 PARAM_CHECK( 8, 2 ) = 704 PARAM_CHECK( 7, 2 ) = 703 PARAM_CHECK( 6, 2 ) = 701 PARAM_CHECK( 5, 2 ) = 6 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 100 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DPTTRSV( 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'T', 'N', 1, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 90 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*1+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*1+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*1+2+1 ), INT_ONE, WORK( 1 ), $ INT_ONE, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*1+1 ), INT_ONE, WORK( 1 ), $ INT_ONE, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTRTRS( 'L', 'T', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 90 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'N', 'N', ODD_SIZE, NRHS, 1, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-1 ), INT_ONE, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DPTTRSV( 'T', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 90 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 100 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDPTTRSV * END scalapack-1.8.0/SRC/pcpbtrf.f0000640000175000017500000014171210363532303015560 0ustar muammarmuammar SUBROUTINE PCPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PCPBTRF computes a Cholesky factorization * of an N-by-N complex banded * symmetric positive definite distributed matrix * with bandwidth BW: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PCPBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' U , if UPLO = 'U', or * * P A(1:N, JA:JA+N-1) P^T = L L', if UPLO = 'L' * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPBTRF and this is stored in AF. If a linear system * is to be solved using PCPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * bw*bw * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CAXPY, CGEMM, CGERV2D, CGESD2D, CLACPY, $ CLATCPY, CPBTRF, CPOTRF, CSYRK, CTBTRS, CTRMM, $ CTRRV2D, CTRSD2D, CTRSM, CTRTRS, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW * BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 6*100 + 4 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCPBTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPBTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (NB+2*BW)*BW * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PCPBTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = BW*BW * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PCPBTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 10 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Zero out space for work * DO 20 I=1, WORK_SIZE_MIN WORK( I ) = CZERO 20 CONTINUE * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL CTRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+ODD_SIZE*LLDA+(BW+1) ), $ LLDA-1, 0, MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {L_i}^C in each processor * CALL CPBTRF( UPLO, ODD_SIZE, BW, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * conjugate transpose the connection block in preparation. * CALL CLATCPY( 'U', BW, BW, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * Perform the triangular system solve {L_i}{{B'}_i}^C = {B_i}^C * CALL CTRTRS( 'L', 'N', 'N', BW, BW, $ A( OFST+1+(ODD_SIZE-BW)*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * * conjugate transpose resulting block to its location * in main storage. * CALL CLATCPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), $ LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^C * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL CHERK( UPLO, 'C', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+1+ODD_SIZE*LLDA ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL CTRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * CALL CTBTRS( 'L', 'N', 'N', ODD_SIZE, BW, BW, A( OFST + 1 ), $ LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * CALL CHERK( 'L', 'C', BW, ODD_SIZE, $ -ONE, AF( 1 ), ODD_SIZE, ZERO, $ AF( 1 + (ODD_SIZE+2*BW)*BW), BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL CGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine CTRMM. * Since we have G_i^C stored, conjugate transpose * H_i^C to H_i. * CALL CLATCPY( 'N', BW, BW, $ AF( ODD_SIZE-BW+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BW+1), BW ) * CALL CTRMM( 'R', 'U', 'C', 'N', BW, BW, -CONE, $ A( ( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA ) ), LLDA-1, $ AF( (ODD_SIZE)*BW+1 ), BW ) * * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL CGESD2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL CLACPY( 'N', BW, BW, $ A( OFST+ODD_SIZE*LLDA+1 ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), $ BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL CAXPY( MBW2, CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL CGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL CGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL CPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL CLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL CTRSM( 'L', 'L', 'N', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL CHERK( 'L', 'C', BW, BW, -ONE, $ AF( (ODD_SIZE)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL CTRSM( 'R', 'L', 'C', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL CHERK( 'L', 'N', BW, BW, -ONE, $ AF( (ODD_SIZE+2*BW)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL CGEMM( 'N', 'N', BW, BW, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, CZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * * CASE UPLO = 'U' * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL .GT. 0 ) THEN PREV_TRI_SIZE_M= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) PREV_TRI_SIZE_N=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL-1, 0, NPCOL ) ) ENDIF * IF( MYCOL .LT. NPCOL-1 ) THEN NEXT_TRI_SIZE_M=MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL+1, 0, NPCOL ) ) NEXT_TRI_SIZE_N= MIN( BW, $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) ) ENDIF * * * * Factor main partition A_i^C = U_i {U_i}^C in each processor * CALL CPBTRF( UPLO, ODD_SIZE, BW, A( OFST + 1), $ LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1600 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * Move the connection block in preparation. * CALL CLACPY( 'L', BW, BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * * Perform the triangular solve {L_i}{{B'}_i}^C = {B_i}^C * CALL CTRTRS( 'U', 'C', 'N', BW, BW, $ A( OFST+BW+1+(ODD_SIZE-BW)*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * Move the resulting block back to its location in main storage. * CALL CLACPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i^C = {C_i}^C-{{B'}_i}^C{{B'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL CHERK( UPLO, 'C', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+BW+1+ODD_SIZE*LLDA ), LLDA-1 ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1600 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Calculate the "spike" fillin, ${L_i} {{G}_i}^C = {D_i}$ . * * * Copy D block into AF storage for solve. * CALL CLATCPY( 'L', PREV_TRI_SIZE_N, PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * IF ( INFO.EQ.0 ) THEN * CALL CTBTRS( 'U', 'C', 'N', ODD_SIZE, BW, BW, $ A( OFST + 1 ), LLDA, $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^C * CALL CHERK( 'L', 'C', BW, ODD_SIZE, $ -ONE, AF( 1 ), ODD_SIZE, ZERO, $ AF( 1 + (ODD_SIZE+2*BW)*BW), BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL CGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * {F_i}^C = {H_i}{{B'}_i}^C * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine CTRMM. * Since we have G_i^C stored, conjugate transpose * H_i^C to H_i. * CALL CLATCPY( 'N', BW, BW, $ AF( ODD_SIZE-BW+1 ), ODD_SIZE, $ AF( (ODD_SIZE)*BW+1), BW ) * CALL CTRMM( 'R', 'L', 'N', 'N', BW, BW, -CONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( (ODD_SIZE)*BW+1 ), BW ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL CGESD2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, MYCOL-1 ) * ENDIF * * Transpose last diagonal block into AF storage for subsequent * operations. * CALL CLATCPY( 'U', BW, BW, $ A( OFST+ ODD_SIZE*LLDA+1+BW ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), $ BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * CALL CAXPY( MBW2, CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ 1, AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 22 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 21 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL CGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL CGERV2D( ICTXT, BW, BW, WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL CAXPY( MBW2, CONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 22 21 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL CPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL CLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+1 ), $ BW, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL CTRSM( 'L', 'L', 'N', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL CHERK( 'L', 'C', BW, BW, -ONE, $ AF( (ODD_SIZE)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL CGERV2D( ICTXT, BW, BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL CTRSM( 'R', 'L', 'C', 'N', BW, BW, CONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL CHERK( 'L', 'N', BW, BW, -ONE, $ AF( (ODD_SIZE+2*BW)*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL CGEMM( 'N', 'N', BW, BW, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, CZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 24 CONTINUE * ENDIF * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PCPBTRF * END scalapack-1.8.0/SRC/pssyngst.f0000640000175000017500000004141610363532303016012 0ustar muammarmuammar SUBROUTINE PSSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ) * .. * * Purpose * * ======= * * PSSYNGST reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * * PSSYNGST performs the same function as PSHEGST, but is based on * rank 2K updates, which are faster and more scalable than * triangular solves (the basis of PSSYNGST). * * PSSYNGST calls PSHEGST when UPLO='U', hence PSHENGST provides * improved performance only when UPLO='L', IBTYPE=1. * * PSSYNGST also calls PSHEGST when insufficient workspace is * provided, hence PSSYNGST provides improved * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PSPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PSPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) REAL * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB ) * * When IBTYPE = 1 and UPLO = 'L', PSSYNGST provides improved * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB * * where NB = MB_A = NB_A, * NP0 = NUMROC( N, NB, 0, 0, NPROW ), * NQ0 = NUMROC( N, NB, 0, 0, NPROW ), * * NUMROC ia a ScaLAPACK tool functions * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the * optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * * .. Parameters .. REAL ONEHALF, ONE, MONE PARAMETER ( ONEHALF = 0.5E0, ONE = 1.0E0, MONE = -1.0E0 ) INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( DLEN_ = 9, CTXT_ = 2, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, INDAA, INDG, INDR, INDRT, IROFFA, $ IROFFB, J, K, KB, LWMIN, LWOPT, MYCOL, MYROW, $ NB, NP0, NPCOL, NPK, NPROW, NQ0, POSTK * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ), $ DESCR( DLEN_ ), DESCRT( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL LSAME, INDXG2P, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PSGEMM, PSLACPY, PSSYGST, PSSYMM, PSSYR2K, $ PSTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) SCALE = 1.0E0 * NB = DESCA( MB_ ) * * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) LWMIN = MAX( NB*( NP0+1 ), 3*NB ) IF( IBTYPE.EQ.1 .AND. .NOT.UPPER ) THEN LWOPT = 2*NP0*NB + NQ0*NB + NB*NB ELSE LWOPT = LWMIN END IF WORK( 1 ) = REAL( LWOPT ) LQUERY = ( LWORK.EQ.-1 ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYNGST', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PSSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) RETURN END IF * CALL DESCSET( DESCG, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCR, N, NB, NB, NB, IAROW, IACOL, ICTXT, NP0 ) CALL DESCSET( DESCRT, NB, N, NB, NB, IAROW, IACOL, ICTXT, NB ) CALL DESCSET( DESCAA, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB ) * INDG = 1 INDR = INDG + DESCG( LLD_ )*NB INDAA = INDR + DESCR( LLD_ )*NB INDRT = INDAA + DESCAA( LLD_ )*NB * DO 30 K = 1, N, NB * KB = MIN( N-K+1, NB ) POSTK = K + KB NPK = N - POSTK + 1 * * CALL PSLACPY( 'A', N-POSTK+1, KB, B, POSTK+IB-1, K+JB-1, DESCB, $ WORK( INDG ), POSTK, 1, DESCG ) CALL PSLACPY( 'A', N-POSTK+1, KB, A, POSTK+IA-1, K+JA-1, DESCA, $ WORK( INDR ), POSTK, 1, DESCR ) CALL PSLACPY( 'A', KB, K-1, A, K+IA-1, JA, DESCA, $ WORK( INDRT ), 1, 1, DESCRT ) * CALL PSLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDR ), K, 1, DESCR ) CALL PSTRSM( 'Right', 'L', 'N', 'N', NPK, KB, MONE, B, K+IB-1, $ K+JB-1, DESCB, WORK( INDG ), POSTK, 1, DESCG ) * CALL PSSYMM( 'Right', 'L', NPK, KB, ONEHALF, A, K+IA-1, K+JA-1, $ DESCA, WORK( INDG ), POSTK, 1, DESCG, ONE, $ WORK( INDR ), POSTK, 1, DESCR ) * CALL PSSYR2K( 'Lower', 'No T', NPK, KB, ONE, WORK( INDG ), $ POSTK, 1, DESCG, WORK( INDR ), POSTK, 1, DESCR, $ ONE, A, POSTK+IA-1, POSTK+JA-1, DESCA ) * CALL PSGEMM( 'No T', 'No Conj', NPK, K-1, KB, ONE, $ WORK( INDG ), POSTK, 1, DESCG, WORK( INDRT ), 1, $ 1, DESCRT, ONE, A, POSTK+IA-1, JA, DESCA ) * CALL PSSYMM( 'Right', 'L', NPK, KB, ONE, WORK( INDR ), K, 1, $ DESCR, WORK( INDG ), POSTK, 1, DESCG, ONE, A, $ POSTK+IA-1, K+JA-1, DESCA ) * CALL PSTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, K-1, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, K+IA-1, JA, $ DESCA ) * CALL PSLACPY( 'L', KB, KB, A, K+IA-1, K+JA-1, DESCA, $ WORK( INDAA ), 1, 1, DESCAA ) * IF( MYROW.EQ.DESCAA( RSRC_ ) .AND. MYCOL.EQ.DESCAA( CSRC_ ) ) $ THEN DO 20 I = 1, KB DO 10 J = 1, I WORK( INDAA+J-1+( I-1 )*DESCAA( LLD_ ) ) $ = WORK( INDAA+I-1+( J-1 )*DESCAA( LLD_ ) ) 10 CONTINUE 20 CONTINUE END IF * CALL PSTRSM( 'Left', 'Lower', 'No Conj', 'Non-unit', KB, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, $ 1, DESCAA ) * CALL PSTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', KB, KB, ONE, $ B, K+IB-1, K+JB-1, DESCB, WORK( INDAA ), 1, 1, $ DESCAA ) * CALL PSLACPY( 'L', KB, KB, WORK( INDAA ), 1, 1, DESCAA, A, $ K+IA-1, K+JA-1, DESCA ) * CALL PSTRSM( 'Right', 'Lower', 'Conj', 'Non-unit', NPK, KB, $ ONE, B, K+IB-1, K+JB-1, DESCB, A, POSTK+IA-1, $ K+JA-1, DESCA ) * DESCR( CSRC_ ) = MOD( DESCR( CSRC_ )+1, NPCOL ) DESCG( CSRC_ ) = MOD( DESCG( CSRC_ )+1, NPCOL ) DESCRT( RSRC_ ) = MOD( DESCRT( RSRC_ )+1, NPROW ) DESCAA( RSRC_ ) = MOD( DESCAA( RSRC_ )+1, NPROW ) DESCAA( CSRC_ ) = MOD( DESCAA( CSRC_ )+1, NPCOL ) 30 CONTINUE * WORK( 1 ) = REAL( LWOPT ) * RETURN END scalapack-1.8.0/SRC/pctrrfs.f0000640000175000017500000007440110363532303015603 0ustar muammarmuammar SUBROUTINE PCTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LRWORK, LWORK, $ N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) REAL BERR( * ), FERR( * ), RWORK( * ) COMPLEX A( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PCTRRFS provides error bounds and backward error estimates for the * solution to a system of linear equations with a triangular * coefficient matrix. * * The solution matrix X must be computed by PCTRTRS or some other * means before entering this routine. PCTRRFS does not do iterative * refinement because doing so cannot improve the backward error. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**H * sub( X ) = sub( B ) * (Conjugate transpose) * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_A,LOCc(JA+N-1) ). This * array contains the local pieces of the original triangular * distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * If DIAG = 'U', the diagonal elements of sub( A ) are also * not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) COMPLEX pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The estimated forward error bounds for * each solution vector of sub( X ). If XTRUE is the true * solution, FERR bounds the magnitude of the largest entry * in (sub( X ) - XTRUE) divided by the magnitude of the * largest entry in sub( X ). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices sub( X ) and sub( B ) should be * distributed the same way on the same processes. These conditions * ensure that sub( X ) and sub( B ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, RONE PARAMETER ( ZERO = 0.0E+0, RONE = 1.0E+0 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER CHARACTER TRANSN, TRANST INTEGER IAROW, IXBCOL, IXBROW, IXCOL, IXROW, ICOFFA, $ ICOFFB, ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, $ IIW, IOFFXB, IPB, IPR, IPV, IROFFA, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT, $ DESCSET, INFOG2L, PCATRMV, PCAXPY, PCHK1MAT, $ PCHK2MAT, PCCOPY, PCLACON, PCTRMV, $ PCTRSV, PXERBLA, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IX, JX, DESCX, 17, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2*NPMOD WORK( 1 ) = REAL( LWMIN ) LRWMIN = NPMOD RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( NRHS.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -15 ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -16 ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1700+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1700+NB_ ) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1700+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -21 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 21 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 23 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, 0, IDUM1, IDUM2, $ INFO ) CALL PCHK2MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, N, 4, NRHS, 5, $ IX, JX, DESCX, 17, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCTRRFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANSN = 'N' TRANST = 'C' ELSE TRANSN = 'C' TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = maximum number of nonzero entries in each row of A, plus 1 * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) )*DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 90 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PCCOPY( N, X, IX, JX+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PCAXPY( N, -ONE, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, $ JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the matrix * or vector Z. If the i-th component of the denominator is less * than SAFE2, then SAFE1 is added to the i-th components of the * numerator and denominator before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 20 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 20 CONTINUE END IF END IF * CALL PCATRMV( UPLO, TRANS, DIAG, N, RONE, A, IA, JA, DESCA, X, $ IX, JX+K, DESCX, 1, RONE, RWORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 30 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix or * vector Z * NZ is the maximum number of nonzeros in any row of A, plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 40 CONTINUE END IF END IF * KASE = 0 50 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PCTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 60 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 60 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF CALL PCTRSV( UPLO, TRANSN, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 50 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 80 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 90 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 180 J = JN + 1, JB + NRHS - 1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 170 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PCCOPY( N, X, IX, J+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PCAXPY( N, -ONE, B, IB, J+K, DESCB, 1, WORK( IPR ), $ IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 100 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 100 CONTINUE END IF END IF * CALL PCATRMV( UPLO, TRANS, DIAG, N, RONE, A, IA, JA, DESCA, $ X, IX, J+K, DESCX, 1, RONE, RWORK( IPB ), IW, $ JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 110 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 110 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))/norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of A, * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 120 CONTINUE END IF END IF * KASE = 0 130 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PCTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 140 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 150 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 150 CONTINUE END IF END IF CALL PCTRSV( UPLO, TRANSN, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 130 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 160 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 170 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) * RETURN * * End of PCTRRFS * END scalapack-1.8.0/SRC/pzlauum.f0000640000175000017500000002117510363532303015615 0ustar muammarmuammar SUBROUTINE PZLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAUUM computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the blocked form of the algorithm, calling Level 3 PBLAS. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the * distributed matrix sub( A ) is upper or lower triangular: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the triangular factor U or L. N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER I, J, JB, JN * .. * .. External Subroutines .. EXTERNAL PZGEMM, PZHERK, PZLAUU2, PZTRMM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * * Handle first block separately * JB = JN-JA+1 CALL PZLAUU2( 'Upper', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PZHERK( 'Upper', 'No transpose', JB, N-JB, ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PZTRMM( 'Right', 'Upper', 'Conjugate transpose', $ 'Non-unit', J-JA, JB, CONE, A, I, J, DESCA, $ A, IA, J, DESCA ) CALL PZLAUU2( 'Upper', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PZGEMM( 'No transpose', 'Conjugate transpose', $ J-JA, JB, N-J-JB+JA, CONE, A, IA, J+JB, $ DESCA, A, I, J+JB, DESCA, CONE, A, IA, $ J, DESCA ) CALL PZHERK( 'Upper', 'No transpose', JB, N-J-JB+JA, ONE, $ A, I, J+JB, DESCA, ONE, A, I, J, DESCA ) END IF 10 CONTINUE ELSE * * Compute the product L' * L. * * Handle first block separately * JB = JN-JA+1 CALL PZLAUU2( 'Lower', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PZHERK( 'Lower', 'Conjugate transpose', JB, N-JB, ONE, $ A, IA+JB, JA, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PZTRMM( 'Left', 'Lower', 'Conjugate Transpose', $ 'Non-unit', JB, J-JA, CONE, A, I, J, DESCA, A, $ I, JA, DESCA ) CALL PZLAUU2( 'Lower', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PZGEMM( 'Conjugate transpose', 'No transpose', JB, $ J-JA, N-J-JB+JA, CONE, A, I+JB, J, DESCA, $ A, I+JB, JA, DESCA, CONE, A, I, JA, DESCA ) CALL PZHERK( 'Lower', 'Conjugate transpose', JB, $ N-J-JB+JA, ONE, A, I+JB, J, DESCA, ONE, $ A, I, J, DESCA ) END IF 20 CONTINUE END IF * RETURN * * End of PZLAUUM * END scalapack-1.8.0/SRC/pzgehrd.f0000640000175000017500000003560010363532303015561 0ustar muammarmuammar SUBROUTINE PZGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEHRD reduces a complex general distributed matrix sub( A ) * to upper Hessenberg form H by an unitary similarity transformation: * Q' * sub( A ) * Q = H, where * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that sub( A ) is already upper triangular in * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+ILO-2 * and JA+IHI:JA+N-1. See Further Details. If N > 0, * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the N-by-N * general distributed matrix sub( A ) to be reduced. On exit, * the upper triangle and the first subdiagonal of sub( A ) are * overwritten with the upper Hessenberg matrix H, and the ele- * ments below the first subdiagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16 array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are * set to zero. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*NB + NB*MAX( IHIP+1, IHLP+INLQ ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ), * ICOFFA = MOD( JA-1, NB ), IOFF = MOD( IA+ILO-2, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ), * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ), * IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ), * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ), * INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of (ihi-ilo) elementary * reflectors * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:I) = 0, v(I+1) = 1 and v(IHI+1:N) = 0; v(I+2:IHI) is stored on * exit in A(IA+ILO+I:IA+IHI-1,JA+ILO+I-2), and tau in TAU(JA+ILO+I-2). * * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follow- * ing example, with N = 7, ILO = 2 and IHI = 6: * * on entry on exit * * ( a a a a a a a ) ( a a h h h h a ) * ( a a a a a a ) ( a h h h h a ) * ( a a a a a a ) ( h h h h h h ) * ( a a a a a a ) ( v2 h h h h h ) * ( a a a a a a ) ( v2 v3 h h h h ) * ( a a a a a a ) ( v2 v3 v4 h h h ) * ( a ) ( a ) * * where a denotes an element of the original matrix sub( A ), H denotes * a modified element of the upper Hessenberg matrix H, and vi denotes * an element of the vector defining H(JA+ILO+I-2). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, IB, ICOFFA, ICTXT, IHIP, $ IHLP, IIA, IINFO, ILCOL, ILROW, IMCOL, INLQ, $ IOFF, IPT, IPW, IPY, IROFFA, J, JJ, JJA, JY, $ K, L, LWMIN, MYCOL, MYROW, NB, NPCOL, NPROW, $ NQ COMPLEX*16 EI * .. * .. Local Arrays .. INTEGER DESCY( DLEN_ ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG1L, $ INFOG2L, PCHK1MAT, PB_TOPGET, PB_TOPSET, $ PXERBLA, PZGEMM, PZGEHD2, PZLAHRD, PZLARFB * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, NB ) ICOFFA = MOD( JA-1, NB ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ) IOFF = MOD( IA+ILO-2, NB ) ILROW = INDXG2P( IA+ILO-1, NB, MYROW, DESCA( RSRC_ ), $ NPROW ) IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ) ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ) LWMIN = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN INFO = -2 ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = ILO IDUM2( 1 ) = 2 IDUM1( 2 ) = IHI IDUM2( 2 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEHRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Set elements JA:JA+ILO-2 and JA+JHI-1:JA+N-2 of TAU to zero. * NQ = NUMROC( JA+N-2, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) CALL INFOG1L( JA+ILO-2, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 10 J = JJA, MIN( JJ, NQ ) TAU( J ) = ZERO 10 CONTINUE * CALL INFOG1L( JA+IHI-1, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ, $ IMCOL ) DO 20 J = JJ, NQ TAU( J ) = ZERO 20 CONTINUE * * Quick return if possible * IF( IHI-ILO.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPT = 1 IPY = IPT + NB * NB IPW = IPY + IHIP * NB CALL DESCSET( DESCY, IHI+IROFFA, NB, NB, NB, IAROW, ILCOL, ICTXT, $ MAX( 1, IHIP ) ) * K = ILO IB = NB - IOFF JY = IOFF + 1 * * Loop over remaining block of columns * DO 30 L = 1, IHI-ILO+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns j:j+ib-1 to Hessenberg form, returning the * matrices V and T of the block reflector H = I - V*T*V' * which performs the reduction, and also the matrix Y = A*V*T * CALL PZLAHRD( IHI, K, IB, A, IA, J, DESCA, TAU, WORK( IPT ), $ WORK( IPY ), 1, JY, DESCY, WORK( IPW ) ) * * Apply the block reflector H to A(ia:ia+ihi-1,j+ib:ja+ihi-1) * from the right, computing A := A - Y * V'. * V(i+ib,ib-1) must be set to 1. * CALL PZELSET2( EI, A, I+IB, J+IB-1, DESCA, ONE ) CALL PZGEMM( 'No transpose', 'Conjugate transpose', IHI, $ IHI-K-IB+1, IB, -ONE, WORK( IPY ), 1, JY, DESCY, $ A, I+IB, J, DESCA, ONE, A, IA, J+IB, DESCA ) CALL PZELSET( A, I+IB, J+IB-1, DESCA, EI ) * * Apply the block reflector H to A(i+1:ia+ihi-1,j+ib:ja+n-1) from * the left * CALL PZLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', IHI-K, N-K-IB+1, IB, A, I+1, J, $ DESCA, WORK( IPT ), A, I+1, J+IB, DESCA, $ WORK( IPY ) ) * K = K + IB IB = NB JY = 1 DESCY( CSRC_ ) = MOD( DESCY( CSRC_ ) + 1, NPCOL ) * 30 CONTINUE * * Use unblocked code to reduce the rest of the matrix * CALL PZGEHD2( N, K, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEHRD * END scalapack-1.8.0/SRC/psgesv.f0000640000175000017500000002307710367447133015444 0ustar muammarmuammar SUBROUTINE PSGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * Jan 30, 2006 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSGESV computes the solution to a real system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed * matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS * distributed matrices. * * The LU decomposition with partial pivoting and row interchanges is * used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu- * tation matrix, L is unit lower triangular, and U is upper triangular. * L and U are stored in sub( A ). The factored form of sub( A ) is then * used to solve the system of equations sub( A ) * X = sub( B ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the N-by-N distributed matrix * sub( A ) to be factored. On exit, this array contains the * local pieces of the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand side * distributed matrix sub( B ). On exit, if INFO = 0, sub( B ) * is overwritten by the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, so the solution could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSGETRF, $ PSGETRS, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P EXTERNAL INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( N, 1, NRHS, 2, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( IBROW.NE.IAROW .OR. ICOFFA.NE.IROFFB ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1100+CTXT_) END IF END IF CALL PCHK2MAT( N, 1, N, 1, IA, JA, DESCA, 6, N, 1, NRHS, 2, $ IB, JB, DESCB, 11, 0, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGESV', -INFO ) RETURN END IF * * Compute the LU factorization of sub( A ). * CALL PSGETRF( N, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ), overwriting sub( B ) * with X. * CALL PSGETRS( 'No transpose', N, NRHS, A, IA, JA, DESCA, IPIV, $ B, IB, JB, DESCB, INFO ) * END IF * RETURN * * End of PSGESV * END scalapack-1.8.0/SRC/zdttrsv.f0000640000175000017500000001315310363532303015635 0ustar muammarmuammar SUBROUTINE ZDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, $ B, LDB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from ZGTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * ZDTTRSV solves one of the systems of equations * L * X = B, L**T * X = B, or L**H * X = B, * U * X = B, U**T * X = B, or U**H * X = B, * with factors of the tridiagonal matrix A from the LU factorization * computed by ZDTTRF. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether to solve with L or U. * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * DL (input) COMPLEX array, dimension (N-1) * The (n-1) multipliers that define the matrix L from the * LU factorization of A. * * D (input) COMPLEX array, dimension (N) * The n diagonal elements of the upper triangular matrix U from * the LU factorization of A. * * DU (input) COMPLEX array, dimension (N-1) * The (n-1) elements of the first superdiagonal of U. * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, B is overwritten by the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL LOWER, NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) LOWER = LSAME( UPLO, 'L' ) IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN INFO = -9 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( NOTRAN ) THEN * IF( LOWER ) THEN * Solve L*X = B, overwriting B with X. * DO 35 J = 1, NRHS * * Solve L*x = b. * DO 10 I = 1, N - 1 B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) 10 CONTINUE 35 CONTINUE * ELSE * Solve U*x = b. * DO 30 J = 1, NRHS B( N, J ) = B( N, J ) / D( N ) IF( N.GT.1 ) $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / $ D( N-1 ) DO 20 I = N - 2, 1, -1 B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J ) ) / D( I ) 20 CONTINUE 30 CONTINUE * ENDIF * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * IF( .NOT. LOWER ) THEN * Solve U**T * X = B, overwriting B with X. * DO 65 J = 1, NRHS * * Solve U**T * x = b. * B( 1, J ) = B( 1, J ) / D( 1 ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) DO 40 I = 3, N B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J ) ) / D( I ) 40 CONTINUE 65 CONTINUE * ELSE * * Solve L**T * X = B, overwriting B with X. DO 60 J = 1, NRHS * * Solve L**T * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) 50 CONTINUE 60 CONTINUE ENDIF ELSE * IF( .NOT. LOWER ) THEN * Solve U**H * X = B, overwriting B with X. * DO 95 J = 1, NRHS * * Solve U**H * x = b. * B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) / $ DCONJG( D( 2 ) ) DO 70 I = 3, N B( I, J ) = ( B( I, J ) $ -DCONJG( DU( I-1 ) )*B( I-1, J ) ) / $ DCONJG( D( I ) ) 70 CONTINUE 95 CONTINUE * ELSE * * Solve L**H * X = B, overwriting B with X. DO 90 J = 1, NRHS * * Solve L**H * x = b. * DO 80 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - DCONJG( DL( I ) )*B( I+1, J ) 80 CONTINUE 90 CONTINUE ENDIF END IF * * End of ZDTTRSV * END scalapack-1.8.0/SRC/pcgetri.f0000640000175000017500000003517310430435051015555 0ustar muammarmuammar SUBROUTINE PCGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.4) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * v1.7.4: May 10, 2006 * v1.7: May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ), IWORK( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCGETRI computes the inverse of a distributed matrix using the LU * factorization computed by PCGETRF. This method inverts U and then * computes the inverse of sub( A ) = A(IA:IA+N-1,JA:JA+N-1) denoted * InvA by solving the system InvA*L = inv(U) for InvA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the L and U obtained by the * factorization sub( A ) = P*L*U computed by PCGETRF. On * exit, if INFO = 0, sub( A ) contains the inverse of the * original distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A * keeps track of the pivoting information. IPIV(i) is the * global row index the local row i was swapped with. This * array is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = LOCr(N+MOD(IA-1,MB_A))*NB_A. WORK is used to keep a * copy of at most an entire column block of sub( A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK used as workspace for * physically transposing the pivots. * LIWORK is local input and must be at least * if NPROW == NPCOL then * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + NB_A, * else * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + * MAX( CEIL(CEIL(LOCr(M_A)/MB_A)/(LCM/NPROW)), * NB_A ) * where LCM is the least common multiple of process * rows and columns (NPROW and NPCOL). * end if * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,IA+K-1) is exactly zero; the * matrix is singular and its inverse could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IROFF, IW, J, $ JB, JN, LCM, LIWMIN, LWMIN, MP, MYCOL, MYROW, $ NN, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PCGEMM, PCLACPY, PCLASET, PCLAPIV, $ PCTRSM, PCTRTRI, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) LWMIN = NP * DESCA( NB_ ) * MP = NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) NQ = NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) IF( NPROW.EQ.NPCOL ) THEN LIWMIN = NQ + DESCA( NB_ ) ELSE * * Use the formula for the workspace given in PxLAPIV * to compute the minimum size LIWORK for IWORK * * The formula in PxLAPIV is * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * * where * M_P is the global length of the pivot vector * MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW * I_P is IA * I_P = IA * MB_P is the block size use for the block cyclic distribution of the * pivot vector * MB_P = DESCA (MB_ ) * LOCc ( . ) * NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) * LOCr ( . ) * NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) * CEIL ( X / Y ) * ICEIL( X, Y ) * LCM * LCM = ILCM( NPROW, NPCOL ) * LCM = ILCM( NPROW, NPCOL ) LIWMIN = NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW $ + MOD ( IA - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -8 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 8 IF( LIWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGETRI', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Form inv(U). If INFO > 0 from PCTRTRI, then U is singular, * and the inverse is not computed. * CALL PCTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) IF( INFO.GT.0 ) $ RETURN * * Define array descriptor for working array WORK * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) NN = ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1 IACOL = INDXG2P( NN, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, NP ) ) IW = IROFF + 1 * * Solve the equation inv(A)*L=inv(U) for inv(A) using blocked code. * DO 10 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Copy current block column of L to WORK and replace with zeros. * CALL PCLACPY( 'Lower', JA+N-1-J, JB, A, I+1, J, DESCA, $ WORK, IW+J-JA+1, 1, DESCW ) CALL PCLASET( 'Lower', JA+N-1-J, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute current block column of inv(A). * IF( J+JB.LE.JA+N-1 ) $ CALL PCGEMM( 'No transpose', 'No transpose', N, JB, $ JA+N-J-JB, -ONE, A, IA, J+JB, DESCA, WORK, $ IW+J+JB-JA, 1, DESCW, ONE, A, IA, J, DESCA ) CALL PCTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW+J-JA, 1, DESCW, A, IA, J, DESCA ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 * * Copy current block column of L to WORK and replace with zeros. * CALL PCLACPY( 'Lower', N-1, JB, A, IA+1, JA, DESCA, WORK, IW+1, $ 1, DESCW ) CALL PCLASET( 'Lower', N-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Compute current block column of inv(A). * IF( JA+JB.LE.JA+N-1 ) $ CALL PCGEMM( 'No transpose', 'No transpose', N, JB, $ N-JB, -ONE, A, IA, JA+JB, DESCA, WORK, IW+JB, 1, $ DESCW, ONE, A, IA, JA, DESCA ) CALL PCTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW, 1, DESCW, A, IA, JA, DESCA ) * * Use the row pivots and apply them to the columns of the global * matrix. * CALL DESCSET( DESCW, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ MP+DESCA( MB_ ) ) CALL PCLAPIV( 'Backward', 'Columns', 'Column', N, N, A, IA, $ JA, DESCA, IPIV, IA, 1, DESCW, IWORK ) * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PCGETRI * END scalapack-1.8.0/SRC/pdlaset.f0000640000175000017500000002173310363532303015554 0ustar muammarmuammar SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N DOUBLE PRECISION ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLASET initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) DOUBLE PRECISION * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) DOUBLE PRECISION * The constant to which the diagonal elements are to be set. * * A (local output) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBLK, IN, ITMP, J, JAA, JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PDLASE2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) CALL PDLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) JAA = JA + ITMP CALL PDLASE2( UPLO, IBLK, N-ITMP, ALPHA, BETA, $ A, I, JAA, DESCA ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL PDLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IAA = IA + JTMP CALL PDLASE2( UPLO, M-JTMP, JBLK, ALPHA, BETA, A, IAA, $ J, DESCA ) 20 CONTINUE ELSE IF( M.LE.N ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), $ IA+M-1 ) CALL PDLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) CALL PDLASE2( UPLO, IBLK, I-IA, ALPHA, ALPHA, A, I, $ JA, DESCA ) CALL PDLASE2( UPLO, IBLK, N-I+IA, ALPHA, BETA, A, I, $ JA+I-IA, DESCA ) 30 CONTINUE ELSE JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA+N-1 ) CALL PDLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) CALL PDLASE2( UPLO, J-JA, JBLK, ALPHA, ALPHA, A, IA, $ J, DESCA ) CALL PDLASE2( UPLO, M-J+JA, JBLK, ALPHA, BETA, A, $ IA+J-JA, J, DESCA ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PDLASET * END scalapack-1.8.0/SRC/psorg2l.f0000640000175000017500000002556510363532303015517 0ustar muammarmuammar SUBROUTINE PSORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORG2L generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PSGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PSGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0 REAL TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLASET, PSSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja:ja+n-k-1 to columns of the unit matrix * CALL PSLASET( 'All', M-N, N-K, ZERO, ZERO, A, IA, JA, DESCA ) CALL PSLASET( 'All', N, N-K, ZERO, ONE, A, IA+M-N, JA, DESCA ) * TAUJ = ZERO NQA0 = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) ) DO 10 J = JA+N-K, JA+N-1 * * Apply H(j) to A(ia:ia+m-n+j-ja,ja:j) from the left * CALL PSELSET( A, IA+M-N+J-JA, J, DESCA, ONE ) CALL PSLARF( 'Left', M-N+J-JA+1, J-JA, A, IA, J, DESCA, 1, TAU, $ A, IA, JA, DESCA, WORK ) * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, NQA0 ) ) CALL PSSCAL( M-N+J-JA, -TAUJ, A, IA, J, DESCA, 1 ) CALL PSELSET( A, IA+M-N+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia+m-n+j-ja+1:ia+m-1,j) to zero * CALL PSLASET( 'All', JA+N-1-J, 1, ZERO, ZERO, A, IA+M-N+J-JA+1, $ J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORG2L * END scalapack-1.8.0/SRC/psgeqr2.f0000640000175000017500000002743710363532303015512 0ustar muammarmuammar SUBROUTINE PSGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQR2 computes a QR factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 * and v(i) = 1; v(i+1:m) is stored on exit in A(ia+i:ia+m-1,ja+i-1), * and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, II, IACOL, IAROW, ICTXT, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ REAL AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PSELSET, PSLARF, PSLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA, SGEBR2D, SGEBS2D, $ SLARFG, SSCAL * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) I = II+(JJ-1)*DESCA( LLD_ ) IF( MYCOL.EQ.IACOL ) THEN AJJ = A( I ) CALL SLARFG( 1, AJJ, A( I ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL SSCAL( NQ-JJ, ALPHA, A( I+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL SSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), 1, $ IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA, JA+K-1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(i+1:ia+m-1,j) * CALL PSLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(j)' to A(i:ia+m-1,j+1:ja+n-1) from the left * CALL PSELSET( A, I, J, DESCA, ONE ) * CALL PSLARF( 'Left', M-J+JA, N-J+JA-1, A, I, J, DESCA, 1, $ TAU, A, I, J+1, DESCA, WORK ) END IF CALL PSELSET( A, I, J, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEQR2 * END scalapack-1.8.0/SRC/psstein.f0000640000175000017500000006030610602576752015620 0ustar muammarmuammar SUBROUTINE PSSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N REAL ORFAC * .. * .. Array Arguments .. INTEGER DESCZ( * ), IBLOCK( * ), ICLUSTR( * ), $ IFAIL( * ), ISPLIT( * ), IWORK( * ) REAL D( * ), E( * ), GAP( * ), W( * ), WORK( * ), $ Z( * ) * .. * * Purpose * ======= * * PSSTEIN computes the eigenvectors of a symmetric tridiagonal matrix * in parallel, using inverse iteration. The eigenvectors found * correspond to user specified eigenvalues. PSSTEIN does not * orthogonalize vectors that are on different processes. The extent * of orthogonalization is controlled by the input parameter LWORK. * Eigenvectors that are to be orthogonalized are computed by the same * process. PSSTEIN decides on the allocation of work among the * processes and then calls SSTEIN2 (modified LAPACK routine) on each * individual process. If insufficient workspace is allocated, the * expected orthogonalization may not be done. * * Note : If the eigenvectors obtained are not orthogonal, increase * LWORK and run the code again. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * P = NPROW * NPCOL is the total number of processes * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input) REAL array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (global input) REAL array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * * M (global input) INTEGER * The total number of eigenvectors to be found. 0 <= M <= N. * * W (global input/global output) REAL array, dim (M) * On input, the first M elements of W contain all the * eigenvalues for which eigenvectors are to be computed. The * eigenvalues should be grouped by split-off block and ordered * from smallest to largest within the block (The output array * W from PSSTEBZ with ORDER='b' is expected here). This * array should be replicated on all processes. * On output, the first M elements contain the input * eigenvalues in ascending order. * * Note : To obtain orthogonal vectors, it is best if * eigenvalues are computed to highest accuracy ( this can be * done by setting ABSTOL to the underflow threshold = * SLAMCH('U') --- ABSTOL is an input parameter * to PSSTEBZ ) * * IBLOCK (global input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W -- 1 for eigenvalues belonging to the * first submatrix from the top, 2 for those belonging to * the second submatrix, etc. (The output array IBLOCK * from PSSTEBZ is expected here). * * ISPLIT (global input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N (The output array * ISPLIT from PSSTEBZ is expected here.) * * ORFAC (global input) REAL * ORFAC specifies which eigenvectors should be orthogonalized. * Eigenvectors that correspond to eigenvalues which are within * ORFAC*||T|| of each other are to be orthogonalized. * However, if the workspace is insufficient (see LWORK), this * tolerance may be decreased until all eigenvectors to be * orthogonalized can be stored in one process. * No orthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) REAL array, * dimension (DESCZ(DLEN_), N/npcol + NB) * Z contains the computed eigenvectors associated with the * specified eigenvalues. Any vector which fails to converge is * set to its current iterate after MAXITS iterations ( See * SSTEIN2 ). * On output, Z is distributed across the P processes in block * cyclic format. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local workspace/global output) REAL array, * dimension ( LWORK ) * On output, WORK(1) gives a lower bound on the * workspace ( LWORK ) that guarantees the user desired * orthogonalization (see ORFAC). * Note that this may overestimate the minimum workspace needed. * * LWORK (local input) integer * LWORK controls the extent of orthogonalization which can be * done. The number of eigenvectors for which storage is * allocated on each process is * NVEC = floor(( LWORK- max(5*N,NP00*MQ00) )/N). * Eigenvectors corresponding to eigenvalue clusters of size * NVEC - ceil(M/P) + 1 are guaranteed to be orthogonal ( the * orthogonality is similar to that obtained from SSTEIN2). * Note : LWORK must be no smaller than: * max(5*N,NP00*MQ00) + ceil(M/P)*N, * and should have the same input value on all processes. * It is the minimum value of LWORK input on different processes * that is significant. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/global output) INTEGER array, * dimension ( 3*N+P+1 ) * On return, IWORK(1) contains the amount of integer workspace * required. * On return, the IWORK(2) through IWORK(P+2) indicate * the eigenvectors computed by each process. Process I computes * eigenvectors indexed IWORK(I+2)+1 thru' IWORK(I+3). * * LIWORK (local input) INTEGER * Size of array IWORK. Must be >= 3*N + P + 1 * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) integer array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after MAXITS * iterations (as in SSTEIN), then INFO > 0 is returned. * If mod(INFO,M+1)>0, then * for I=1 to mod(INFO,M+1), the eigenvector * corresponding to the eigenvalue W(IFAIL(I)) failed to * converge ( W refers to the array of eigenvalues on output ). * * ICLUSTR (global output) integer array, dimension (2*P) * This output array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace (see LWORK, * ORFAC and INFO). Eigenvectors corresponding to clusters of * eigenvalues indexed ICLUSTR(2*I-1) to ICLUSTR(2*I), I = 1 to * INFO/(M+1), could not be orthogonalized due to lack of * workspace. Hence the eigenvectors corresponding to these * clusters may not be orthogonal. ICLUSTR is a zero terminated * array --- ( ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0 ) * if and only if K is the number of clusters. * * GAP (global output) REAL array, dimension (P) * This output array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. The INFO/M output * values in this array correspond to the INFO/(M+1) clusters * indicated by the array ICLUSTR. As a result, the dot product * between eigenvectors corresponding to the I^th cluster may be * as high as ( O(n)*macheps ) / GAP(I). * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * < 0 : if INFO = -I, the I-th argument had an illegal value * > 0 : if mod(INFO,M+1) = I, then I eigenvectors failed to * converge in MAXITS iterations. Their indices are * stored in the array IFAIL. * if INFO/(M+1) = I, then eigenvectors corresponding to * I clusters of eigenvalues could not be orthogonalized * due to insufficient workspace. The indices of the * clusters are stored in the array ICLUSTR. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, IGEBR2D, $ IGEBS2D, PCHK1MAT, PSLAEVSWP, PXERBLA, SGEBR2D, $ SGEBS2D, SLASRT2, SSTEIN2 * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, NEGONE, ODM1, FIVE, ODM3, ODM18 PARAMETER ( ZERO = 0.0E+0, NEGONE = -1.0E+0, $ ODM1 = 1.0E-1, FIVE = 5.0E+0, ODM3 = 1.0E-3, $ ODM18 = 1.0E-18 ) * .. * .. Local Scalars .. LOGICAL LQUERY, SORTED INTEGER B1, BN, BNDRY, CLSIZ, COL, I, IFIRST, IINFO, $ ILAST, IM, INDRW, ITMP, J, K, LGCLSIZ, LLWORK, $ LOAD, LOCINFO, MAXVEC, MQ00, MYCOL, MYROW, $ NBLK, NERR, NEXT, NP00, NPCOL, NPROW, NVS, $ OLNBLK, P, ROW, SELF, TILL, TOTERR REAL DIFF, MINGAP, ONENRM, ORGFAC, ORTOL, TMPFAC * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1200+CTXT_ ) ELSE * * Make sure that NPROW>0 and NPCOL>0 before calling NUMROC * CALL CHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN * * Now we know that our context is good enough to * perform the rest of the checks * NP00 = NUMROC( N, DESCZ( MB_ ), 0, 0, NPROW ) MQ00 = NUMROC( M, DESCZ( NB_ ), 0, 0, NPCOL ) P = NPROW*NPCOL * * Compute the maximum number of vectors per process * LLWORK = LWORK CALL IGAMN2D( DESCZ( CTXT_ ), 'A', ' ', 1, 1, LLWORK, 1, 1, $ 1, -1, -1, -1 ) INDRW = MAX( 5*N, NP00*MQ00 ) IF( N.NE.0 ) $ MAXVEC = ( LLWORK-INDRW ) / N LOAD = ICEIL( M, P ) IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN TMPFAC = ORFAC CALL SGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1 ) ELSE CALL SGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1, 0, 0 ) END IF * LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( MAXVEC.LT.LOAD .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.3*N+P+1 .AND. .NOT.LQUERY ) THEN INFO = -16 ELSE DO 10 I = 2, M IF( IBLOCK( I ).LT.IBLOCK( I-1 ) ) THEN INFO = -6 GO TO 20 END IF IF( IBLOCK( I ).EQ.IBLOCK( I-1 ) .AND. W( I ).LT. $ W( I-1 ) ) THEN INFO = -5 GO TO 20 END IF 10 CONTINUE 20 CONTINUE IF( INFO.EQ.0 ) THEN IF( ABS( TMPFAC-ORFAC ).GT.FIVE*ABS( TMPFAC ) ) $ INFO = -8 END IF END IF * END IF IDUM1( 1 ) = M IDUM2( 1 ) = 4 CALL PCHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, 1, IDUM1, IDUM2, $ INFO ) WORK( 1 ) = REAL( MAX( 5*N, NP00*MQ00 )+ICEIL( M, P )*N ) IWORK( 1 ) = 3*N + P + 1 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PSSTEIN', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN RETURN END IF * DO 30 I = 1, M IFAIL( I ) = 0 30 CONTINUE DO 40 I = 1, P + 1 IWORK( I ) = 0 40 CONTINUE DO 50 I = 1, P GAP( I ) = NEGONE ICLUSTR( 2*I-1 ) = 0 ICLUSTR( 2*I ) = 0 50 CONTINUE * * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC ELSE TMPFAC = ODM3 END IF ORGFAC = TMPFAC * * Allocate the work among the processes * ILAST = M / LOAD IF( MOD( M, LOAD ).EQ.0 ) $ ILAST = ILAST - 1 OLNBLK = -1 NVS = 0 NEXT = 1 IM = 0 ONENRM = ZERO DO 100 I = 0, ILAST - 1 NEXT = NEXT + LOAD J = NEXT - 1 IF( J.GT.NVS ) THEN NBLK = IBLOCK( NEXT ) IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN * * Compute orthogonalization criterion * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 60 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 60 CONTINUE OLNBLK = NBLK END IF TILL = NVS + MAXVEC 70 CONTINUE J = NEXT - 1 IF( TMPFAC.GT.ODM18 ) THEN ORTOL = TMPFAC*ONENRM DO 80 J = NEXT - 1, MIN( TILL, M-1 ) IF( IBLOCK( J+1 ).NE.IBLOCK( J ) .OR. W( J+1 )- $ W( J ).GE.ORTOL ) THEN GO TO 90 END IF 80 CONTINUE IF( J.EQ.M .AND. TILL.GE.M ) $ GO TO 90 TMPFAC = TMPFAC*ODM1 GO TO 70 END IF 90 CONTINUE J = MIN( J, TILL ) END IF IF( SELF.EQ.I ) $ IM = MAX( 0, J-NVS ) * IWORK( I+1 ) = NVS NVS = MAX( J, NVS ) 100 CONTINUE IF( SELF.EQ.ILAST ) $ IM = M - NVS IWORK( ILAST+1 ) = NVS DO 110 I = ILAST + 2, P + 1 IWORK( I ) = M 110 CONTINUE * CLSIZ = 1 LGCLSIZ = 1 ILAST = 0 NBLK = 0 BNDRY = 2 K = 1 DO 140 I = 1, M IF( IBLOCK( I ).NE.NBLK ) THEN NBLK = IBLOCK( I ) IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 120 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 120 CONTINUE * END IF IF( I.GT.1 ) THEN DIFF = W( I ) - W( I-1 ) IF( IBLOCK( I ).NE.IBLOCK( I-1 ) .OR. I.EQ.M .OR. DIFF.GT. $ ORGFAC*ONENRM ) THEN IFIRST = ILAST IF( I.EQ.M ) THEN IF( IBLOCK( M ).NE.IBLOCK( M-1 ) .OR. DIFF.GT.ORGFAC* $ ONENRM ) THEN ILAST = M - 1 ELSE ILAST = M END IF ELSE ILAST = I - 1 END IF CLSIZ = ILAST - IFIRST IF( CLSIZ.GT.1 ) THEN IF( LGCLSIZ.LT.CLSIZ ) $ LGCLSIZ = CLSIZ MINGAP = ONENRM 130 CONTINUE IF( BNDRY.GT.P+1 ) $ GO TO 150 IF( IWORK( BNDRY ).GT.IFIRST .AND. IWORK( BNDRY ).LT. $ ILAST ) THEN MINGAP = MIN( W( IWORK( BNDRY )+1 )- $ W( IWORK( BNDRY ) ), MINGAP ) ELSE IF( IWORK( BNDRY ).GE.ILAST ) THEN IF( MINGAP.LT.ONENRM ) THEN ICLUSTR( 2*K-1 ) = IFIRST + 1 ICLUSTR( 2*K ) = ILAST GAP( K ) = MINGAP / ONENRM K = K + 1 END IF GO TO 140 END IF BNDRY = BNDRY + 1 GO TO 130 END IF END IF END IF 140 CONTINUE 150 CONTINUE INFO = ( K-1 )*( M+1 ) * * Call SSTEIN2 to find the eigenvectors * CALL SSTEIN2( N, D, E, IM, W( IWORK( SELF+1 )+1 ), $ IBLOCK( IWORK( SELF+1 )+1 ), ISPLIT, ORGFAC, $ WORK( INDRW+1 ), N, WORK, IWORK( P+2 ), $ IFAIL( IWORK( SELF+1 )+1 ), LOCINFO ) * * Redistribute the eigenvector matrix to conform with the block * cyclic distribution of the input matrix * * DO 160 I = 1, M IWORK( P+1+I ) = I 160 CONTINUE * CALL SLASRT2( 'I', M, W, IWORK( P+2 ), IINFO ) * DO 170 I = 1, M IWORK( M+P+1+IWORK( P+1+I ) ) = I 170 CONTINUE * * DO 180 I = 1, LOCINFO ITMP = IWORK( SELF+1 ) + I IFAIL( ITMP ) = IFAIL( ITMP ) + ITMP - I IFAIL( ITMP ) = IWORK( M+P+1+IFAIL( ITMP ) ) 180 CONTINUE * DO 190 I = 1, K - 1 ICLUSTR( 2*I-1 ) = IWORK( M+P+1+ICLUSTR( 2*I-1 ) ) ICLUSTR( 2*I ) = IWORK( M+P+1+ICLUSTR( 2*I ) ) 190 CONTINUE * * * Still need to apply the above permutation to IFAIL * * TOTERR = 0 DO 210 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, LOCINFO, 1 ) IF( LOCINFO.NE.0 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', LOCINFO, 1, $ IFAIL( IWORK( I )+1 ), LOCINFO ) DO 200 J = 1, LOCINFO IFAIL( TOTERR+J ) = IFAIL( IWORK( I )+J ) 200 CONTINUE TOTERR = TOTERR + LOCINFO END IF ELSE * ROW = ( I-1 ) / NPCOL COL = MOD( I-1, NPCOL ) * CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, NERR, 1, $ ROW, COL ) IF( NERR.NE.0 ) THEN CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', NERR, 1, $ IFAIL( TOTERR+1 ), NERR, ROW, COL ) TOTERR = TOTERR + NERR END IF END IF 210 CONTINUE INFO = INFO + TOTERR * * CALL PSLAEVSWP( N, WORK( INDRW+1 ), N, Z, IZ, JZ, DESCZ, IWORK, $ IWORK( M+P+2 ), WORK, INDRW ) * DO 220 I = 2, P IWORK( I ) = IWORK( M+P+1+IWORK( I ) ) 220 CONTINUE * * * Sort the IWORK array * * 230 CONTINUE SORTED = .TRUE. DO 240 I = 2, P - 1 IF( IWORK( I ).GT.IWORK( I+1 ) ) THEN ITMP = IWORK( I+1 ) IWORK( I+1 ) = IWORK( I ) IWORK( I ) = ITMP SORTED = .FALSE. END IF 240 CONTINUE IF( .NOT.SORTED ) $ GO TO 230 * DO 250 I = P + 1, 1, -1 IWORK( I+1 ) = IWORK( I ) 250 CONTINUE * WORK( 1 ) = ( LGCLSIZ+LOAD-1 )*N + INDRW IWORK( 1 ) = 3*N + P + 1 * * End of PSSTEIN * END scalapack-1.8.0/SRC/pslaed3.f0000640000175000017500000002636610363532303015462 0ustar muammarmuammar SUBROUTINE PSLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, $ INDXR, INDXC, CTOT, NPCOL, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL REAL RHO * .. * .. Array Arguments .. INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ), $ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * ) REAL BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ), $ W( * ), Z( * ) * .. * * Purpose * ======= * * PSLAED3 finds the roots of the secular equation, as defined by the * values in D, W, and RHO, between 1 and K. It makes the * appropriate calls to SLAED4 * * This code makes very mild assumptions about floating point * arithmetic. It will work on machines with a guard digit in * add/subtract, or on those binary machines without guard digits * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. * It could conceivably fail on hexadecimal or decimal machines * without guard digits, but we know of none. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * K (output) INTEGER * The number of non-deflated eigenvalues, and the order of the * related secular equation. 0 <= K <=N. * * N (input) INTEGER * The dimension of the symmetric tridiagonal matrix. N >= 0. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * D (input/output) REAL array, dimension (N) * On entry, D contains the eigenvalues of the two submatrices to * be combined. * On exit, D contains the trailing (N-K) updated eigenvalues * (those which were deflated) sorted into increasing order. * * DROW (global input) INTEGER * The process row over which the first row of the matrix D is * distributed. 0 <= DROW < NPROW. * * DCOL (global input) INTEGER * The process column over which the first column of the * matrix D is distributed. 0 <= DCOL < NPCOL. * * Q (input/output) REAL array, dimension (LDQ, N) * On entry, Q contains the eigenvectors of two submatrices in * the two square blocks with corners at (1,1), (N1,N1) * and (N1+1, N1+1), (N,N). * On exit, Q contains the trailing (N-K) updated eigenvectors * (those which were deflated) in its last N-K columns. * * LDQ (input) INTEGER * The leading dimension of the array Q. LDQ >= max(1,NQ). * * RHO (global input/output) REAL * On entry, the off-diagonal element associated with the rank-1 * cut which originally split the two submatrices which are now * being recombined. * On exit, RHO has been modified to the value required by * PSLAED3. * * DLAMDA (global output) REAL array, dimension (N) * A copy of the first K eigenvalues which will be used by * SLAED3 to form the secular equation. * * W (global output) REAL array, dimension (N) * The first k values of the final deflation-altered z-vector * which will be passed to SLAED3. * * Z (global input) REAL array, dimension (N) * On entry, Z contains the updating vector (the last * row of the first sub-eigenvector matrix and the first row of * the second sub-eigenvector matrix). * On exit, the contents of Z have been destroyed by the updating * process. * * U (global output) REAL array * global dimension (N, N), local dimension (LDU, NQ). * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * * LDU (input) INTEGER * The leading dimension of the array U. * * QBUF (workspace) REAL array, dimension 3*N * * * INDX (workspace) INTEGER array, dimension (N) * The permutation used to sort the contents of DLAMDA into * ascending order. * * INDCOL (workspace) INTEGER array, dimension (N) * * * INDROW (workspace) INTEGER array, dimension (N) * * * INDXR (workspace) INTEGER array, dimension (N) * * * INDXC (workspace) INTEGER array, dimension (N) * * CTOT (workspace) INTEGER array, dimension( NPCOL, 4) * * NPCOL (global input) INTEGER * The total number of columns over which the distributed * submatrix is distributed. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value. * > 0: The algorithm failed to compute the ith eigenvalue. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E0 ) * .. * .. Local Scalars .. INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU, $ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW, $ NPROW, PDC, PDR, ROW REAL AUX, TEMP * .. * .. External Functions .. INTEGER INDXG2L REAL SLAMC3, SNRM2 EXTERNAL INDXG2L, SLAMC3, SNRM2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D, SLAED4 * .. * .. Intrinsic Functions .. INTRINSIC MOD, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * IINFO = 0 * * Quick return if possible * IF( K.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * ROW = DROW COL = DCOL DO 20 I = 1, N, NB DO 10 J = 0, NB - 1 INDROW( I+J ) = ROW INDCOL( I+J ) = COL 10 CONTINUE ROW = MOD( ROW+1, NPROW ) COL = MOD( COL+1, NPCOL ) 20 CONTINUE * MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 ) KLR = MYKL / NPROW IF( MYROW.EQ.DROW ) THEN MYKLR = KLR + MOD( MYKL, NPROW ) ELSE MYKLR = KLR END IF PDC = 1 COL = DCOL 30 CONTINUE IF( MYCOL.NE.COL ) THEN PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) COL = MOD( COL+1, NPCOL ) GO TO 30 END IF PDR = PDC KL = KLR + MOD( MYKL, NPROW ) ROW = DROW 40 CONTINUE IF( MYROW.NE.ROW ) THEN PDR = PDR + KL KL = KLR ROW = MOD( ROW+1, NPROW ) GO TO 40 END IF * DO 50 I = 1, K DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) Z( I ) = ONE 50 CONTINUE IF( MYKLR.GT.0 ) THEN KK = PDR DO 80 I = 1, MYKLR CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO ) IF( IINFO.NE.0 ) THEN INFO = KK END IF * * ..Compute part of z * DO 60 J = 1, KK - 1 Z( J ) = Z( J )*( BUF( J ) / $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 60 CONTINUE Z( KK ) = Z( KK )*BUF( KK ) DO 70 J = KK + 1, K Z( J ) = Z( J )*( BUF( J ) / $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 70 CONTINUE KK = KK + 1 80 CONTINUE * IF( MYROW.NE.DROW ) THEN CALL SCOPY( K, Z, 1, BUF, 1 ) CALL SGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL ) ELSE IPD = 2*K + 1 CALL SCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) IF( KLR.GT.0 ) THEN IPD = MYKLR + IPD ROW = MOD( DROW+1, NPROW ) DO 100 I = 1, NPROW - 1 CALL SGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW, $ MYCOL ) CALL SCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) DO 90 J = 1, K Z( J ) = Z( J )*BUF( J ) 90 CONTINUE IPD = IPD + KLR ROW = MOD( ROW+1, NPROW ) 100 CONTINUE END IF END IF END IF * IF( MYROW.EQ.DROW ) THEN IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN CALL SCOPY( K, Z, 1, BUF, 1 ) CALL SCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) CALL SGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL ) ELSE IF( MYCOL.EQ.DCOL ) THEN IPD = 2*K + 1 COL = DCOL KL = MYKL DO 120 I = 1, NPCOL - 1 IPD = IPD + KL COL = MOD( COL+1, NPCOL ) KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) IF( KL.NE.0 ) THEN CALL SGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL ) CALL SCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 ) DO 110 J = 1, K Z( J ) = Z( J )*BUF( J ) 110 CONTINUE END IF 120 CONTINUE DO 130 I = 1, K Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) ) 130 CONTINUE * END IF END IF * * Diffusion * IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN CALL SCOPY( K, Z, 1, BUF, 1 ) CALL SCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) CALL SGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL ) CALL SCOPY( K, BUF, 1, Z, 1 ) END IF * * Copy of D at the good place * KLC = 0 KLR = 0 DO 140 I = 1, K GI = INDX( I ) D( GI ) = BUF( K+I ) COL = INDCOL( GI ) ROW = INDROW( GI ) IF( COL.EQ.MYCOL ) THEN KLC = KLC + 1 INDXC( KLC ) = I END IF IF( ROW.EQ.MYROW ) THEN KLR = KLR + 1 INDXR( KLR ) = I END IF 140 CONTINUE * * Compute eigenvectors of the modified rank-1 modification. * IF( MYKL.NE.0 ) THEN DO 180 J = 1, MYKL KK = INDXC( J ) JU = INDX( KK ) JJU = INDXG2L( JU, NB, J, J, NPCOL ) CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO ) IF( IINFO.NE.0 ) THEN INFO = KK END IF IF( K.EQ.1 .OR. K.EQ.2 ) THEN DO 150 I = 1, KLR KK = INDXR( I ) IU = INDX( KK ) IIU = INDXG2L( IU, NB, J, J, NPROW ) U( IIU, JJU ) = BUF( KK ) 150 CONTINUE GO TO 180 END IF * DO 160 I = 1, K BUF( I ) = Z( I ) / BUF( I ) 160 CONTINUE TEMP = SNRM2( K, BUF, 1 ) DO 170 I = 1, KLR KK = INDXR( I ) IU = INDX( KK ) IIU = INDXG2L( IU, NB, J, J, NPROW ) U( IIU, JJU ) = BUF( KK ) / TEMP 170 CONTINUE 180 CONTINUE END IF * 190 CONTINUE RETURN * * End of PSLAED3 * END scalapack-1.8.0/SRC/cdbtrf.f0000640000175000017500000002514310363532303015363 0ustar muammarmuammar SUBROUTINE CDBTRF( M, N, KL, KU, AB, LDAB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from CGBTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. COMPLEX AB( LDAB, * ) * .. * * Purpose * ======= * * Cdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting or row interchanges. * * This is the blocked version of the algorithm, calling Level 3 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) REAL array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER NBMAX, LDWORK PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) * .. * .. Local Scalars .. INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP, $ JU, KM, KV, NB, NW * .. * .. Local Arrays .. COMPLEX WORK13( LDWORK, NBMAX ), $ WORK31( LDWORK, NBMAX ) * .. * .. External Functions .. INTEGER ILAENV, ISAMAX EXTERNAL ILAENV, ISAMAX * .. * .. External Subroutines .. EXTERNAL CCOPY, CDBTF2, CGEMM, CGERU, CSCAL, $ CSWAP, CTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U * KV = KU * * Test the input parameters. * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 ) THEN INFO = -3 ELSE IF( KU.LT.0 ) THEN INFO = -4 ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Determine the block size for this environment * NB = ILAENV( 1, 'CDBTRF', ' ', M, N, KL, KU ) * * The block size must not exceed the limit set by the size of the * local arrays WORK13 and WORK31. * NB = MIN( NB, NBMAX ) * IF( NB.LE.1 .OR. NB.GT.KL ) THEN * * Use unblocked code * CALL CDBTF2( M, N, KL, KU, AB, LDAB, INFO ) ELSE * * Use blocked code * * Zero the superdiagonal elements of the work array WORK13 * DO 20 J = 1, NB DO 10 I = 1, J - 1 WORK13( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * * Zero the subdiagonal elements of the work array WORK31 * DO 40 J = 1, NB DO 30 I = J + 1, NB WORK31( I, J ) = ZERO 30 CONTINUE 40 CONTINUE * * JU is the index of the last column affected by the current * stage of the factorization * JU = 1 * DO 180 J = 1, MIN( M, N ), NB JB = MIN( NB, MIN( M, N )-J+1 ) * * The active part of the matrix is partitioned * * A11 A12 A13 * A21 A22 A23 * A31 A32 A33 * * Here A11, A21 and A31 denote the current block of JB columns * which is about to be factorized. The number of rows in the * partitioning are JB, I2, I3 respectively, and the numbers * of columns are JB, J2, J3. The superdiagonal elements of A13 * and the subdiagonal elements of A31 lie outside the band. * I2 = MIN( KL-JB, M-J-JB+1 ) I3 = MIN( JB, M-J-KL+1 ) * * J2 and J3 are computed after JU has been updated. * * Factorize the current block of JB columns * DO 80 JJ = J, J + JB - 1 * * Find pivot and test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-JJ ) JP = 1 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) * * Compute multipliers * CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), $ 1 ) * * Update trailing submatrix within the band and within * the current block. JM is the index of the last column * which needs to be updated. * JM = MIN( JU, J+JB-1 ) IF( JM.GT.JJ ) THEN CALL CGERU( KM, JM-JJ, -CONE, AB( KV+2, JJ ), 1, $ AB( KV, JJ+1 ), LDAB-1, $ AB( KV+1, JJ+1 ), LDAB-1 ) END IF END IF * * Copy current column of A31 into the work array WORK31 * NW = MIN( JJ-J+1, I3 ) IF( NW.GT.0 ) $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, $ WORK31( 1, JJ-J+1 ), 1 ) 80 CONTINUE IF( J+JB.LE.N ) THEN * * Apply the row interchanges to the other blocks. * J2 = MIN( JU-J+1, KV ) - JB J3 = MAX( 0, JU-J-KV+1 ) * * Update the relevant part of the trailing submatrix * IF( J2.GT.0 ) THEN * * Update A12 * CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J2, CONE, AB( KV+1, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1 ) * IF( I2.GT.0 ) THEN * * Update A22 * CALL CGEMM( 'No transpose', 'No transpose', I2, J2, $ JB, -CONE, AB( KV+1+JB, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1, CONE, $ AB( KV+1, J+JB ), LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A32 * CALL CGEMM( 'No transpose', 'No transpose', I3, J2, $ JB, -CONE, WORK31, LDWORK, $ AB( KV+1-JB, J+JB ), LDAB-1, CONE, $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) END IF END IF * IF( J3.GT.0 ) THEN * * Copy the lower triangle of A13 into the work array * WORK13 * DO 130 JJ = 1, J3 DO 120 II = JJ, JB WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 120 CONTINUE 130 CONTINUE * * Update A13 in the work array * CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J3, CONE, AB( KV+1, J ), LDAB-1, $ WORK13, LDWORK ) * IF( I2.GT.0 ) THEN * * Update A23 * CALL CGEMM( 'No transpose', 'No transpose', I2, J3, $ JB, -CONE, AB( KV+1+JB, J ), LDAB-1, $ WORK13, LDWORK, CONE, AB( 1+JB, J+KV ), $ LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A33 * CALL CGEMM( 'No transpose', 'No transpose', I3, J3, $ JB, -CONE, WORK31, LDWORK, WORK13, $ LDWORK, CONE, AB( 1+KL, J+KV ), LDAB-1 ) END IF * * Copy the lower triangle of A13 back into place * DO 150 JJ = 1, J3 DO 140 II = JJ, JB AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 140 CONTINUE 150 CONTINUE END IF ELSE END IF * * copy the upper triangle of A31 back into place * DO 170 JJ = J + JB - 1, J, -1 * * Copy the current column of A31 back into place * NW = MIN( I3, JJ-J+1 ) IF( NW.GT.0 ) $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1, $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 170 CONTINUE 180 CONTINUE END IF * RETURN * * End of CDBTRF * END scalapack-1.8.0/SRC/pzlacpy.f0000640000175000017500000002237010363532303015600 0ustar muammarmuammar SUBROUTINE PZLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZLACPY copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PZLACPY * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBB, IBLK, IN, ITMP, J, JAA, JBB, $ JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PZLACP2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PZLACP2( UPLO, M, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN CALL PZLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB + ITMP JBB = JB + ITMP JAA = JA + ITMP CALL PZLACP2( UPLO, IBLK, N-ITMP, A, I, JAA, DESCA, $ B, IBB, JBB, DESCB ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN CALL PZLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IBB = IB + JTMP JBB = JB + JTMP IAA = IA + JTMP CALL PZLACP2( UPLO, M-JTMP, JBLK, A, IAA, J, DESCA, $ B, IBB, JBB, DESCB ) 20 CONTINUE ELSE IF( M.LE.N ) THEN CALL PZLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB+ITMP CALL PZLACP2( UPLO, IBLK, N, A, I, JA, DESCA, $ B, IBB, JB, DESCB ) 30 CONTINUE ELSE CALL PZLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) JBB = JB+JTMP CALL PZLACP2( UPLO, M, JBLK, A, IA, J, DESCA, $ B, IB, JBB, DESCB ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PZLACPY * END scalapack-1.8.0/SRC/pspoequ.f0000640000175000017500000003117010363532303015610 0ustar muammarmuammar SUBROUTINE PSPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, N REAL AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), SC( * ), SR( * ) * .. * * Purpose * ======= * * PSPOEQU computes row and column scalings intended to * equilibrate a distributed symmetric positive definite matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) and reduce its condition number * (with respect to the two-norm). SR and SC contain the scale * factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled distri- * buted matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on * the diagonal. This choice of SR and SC puts the condition number * of B within a factor N of the smallest possible condition number * over all possible diagonal scalings. * * The scaling factor are stored along process rows in SR and along * process columns in SC. The duplication of information simplifies * greatly the application of the factors. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) REAL pointer into the local memory to an * array of local dimension ( LLD_A, LOCc(JA+N-1) ), the * N-by-N symmetric positive definite distributed matrix * sub( A ) whose scaling factors are to be computed. Only the * diagonal elements of sub( A ) are referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local output) REAL array, dimension LOCr(M_A) * If INFO = 0, SR(IA:IA+N-1) contains the row scale factors * for sub( A ). SR is aligned with the distributed matrix A, * and replicated across every process column. SR is tied to the * distributed matrix A. * * SC (local output) REAL array, dimension LOCc(N_A) * If INFO = 0, SC(JA:JA+N-1) contains the column scale factors * for A(IA:IA+M-1,JA:JA+N-1). SC is aligned with the distribu- * ted matrix A, and replicated down every process row. SC is * tied to the distributed matrix A. * * SCOND (global output) REAL * If INFO = 0, SCOND contains the ratio of the smallest SR(i) * (or SC(j)) to the largest SR(i) (or SC(j)), with * IA <= i <= IA+N-1 and JA <= j <= JA+N-1. If SCOND >= 0.1 * and AMAX is neither too large nor too small, it is not worth * scaling by SR (or SC). * * AMAX (global output) REAL * Absolute value of largest matrix element. If AMAX is very * close to overflow or very close to underflow, the matrix * should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the K-th diagonal entry of sub( A ) is * nonpositive. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER ALLCTOP, COLCTOP, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ IDUMM, II, IIA, IOFFA, IOFFD, IROFF, J, JB, JJ, $ JJA, JN, LDA, LL, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ REAL AII, SMIN * .. * .. Local Arrays .. INTEGER DESCSC( DLEN_ ), DESCSR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGAMN2D, $ INFOG2L, PCHK1MAT, PB_TOPGET, PXERBLA, $ SGAMN2D, SGAMX2D, SGSUM2D * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', ALLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Compute some local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) * * Assign descriptors for SR and SC arrays * CALL DESCSET( DESCSR, N, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCSC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Initialize the scaling factors to zero. * DO 10 II = IIA, IIA+NP-1 SR( II ) = ZERO 10 CONTINUE * DO 20 JJ = JJA, JJA+NQ-1 SC( JJ ) = ZERO 20 CONTINUE * * Find the minimum and maximum diagonal elements. * Handle first block separately. * II = IIA JJ = JJA JB = JN-JA+1 SMIN = ONE / PSLAMCH( ICTXT, 'S' ) AMAX = ZERO * IOFFA = II+(JJ-1)*LDA IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFD = IOFFA DO 30 LL = 0, JB-1 AII = A( IOFFD ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = LL + 1 IOFFD = IOFFD + LDA + 1 30 CONTINUE END IF * IF( MYROW.EQ.IAROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFD = IOFFA DO 40 LL = 0, JB-1 AII = A( IOFFD ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = J + LL - JA + 1 IOFFD = IOFFD + LDA + 1 40 CONTINUE END IF * IF( MYROW.EQ.ICURROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * * Compute scaling factors * CALL SGSUM2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, SC( JJA ), $ 1, -1, MYCOL ) CALL SGSUM2D( ICTXT, 'Rowwise', ROWCTOP, NP, 1, SR( IIA ), $ MAX( 1, NP ), -1, MYCOL ) * CALL SGAMX2D( ICTXT, 'All', ALLCTOP, 1, 1, AMAX, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, SMIN, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) * IF( SMIN.LE.ZERO ) THEN * * Find the first non-positive diagonal element and return. * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) RETURN * ELSE * * Set the scale factors to the reciprocals * of the diagonal elements. * DO 60 II = IIA, IIA+NP-1 SR( II ) = ONE / SQRT( SR( II ) ) 60 CONTINUE * DO 70 JJ = JJA, JJA+NQ-1 SC( JJ ) = ONE / SQRT( SC( JJ ) ) 70 CONTINUE * * Compute SCOND = min(S(I)) / max(S(I)) * SCOND = SQRT( SMIN ) / SQRT( AMAX ) * END IF * RETURN * * End of PSPOEQU * END scalapack-1.8.0/SRC/pcdttrs.f0000640000175000017500000006730710363532303015612 0ustar muammarmuammar SUBROUTINE PCDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PCDTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PCDTTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal diagonally dominant-like distributed * matrix. * * Routine PCDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDTTRF and this is stored in AF. If a linear system * is to be solved using PCDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, $ RETURN_CODE, STORE_M_B, STORE_N_A, TEMP, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, $ PCDTTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX CDOTC EXTERNAL CDOTC, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCDTTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDTTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ 10*NPCOL+4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PCDTTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB(5) PARAM_CHECK( 14, 1 ) = DESCB(4) PARAM_CHECK( 13, 1 ) = DESCB(3) PARAM_CHECK( 12, 1 ) = DESCB(2) PARAM_CHECK( 11, 1 ) = DESCB(1) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 15, 2 ) = 1105 PARAM_CHECK( 14, 2 ) = 1104 PARAM_CHECK( 13, 2 ) = 1103 PARAM_CHECK( 12, 2 ) = 1102 PARAM_CHECK( 11, 2 ) = 1101 PARAM_CHECK( 10, 2 ) = 10 PARAM_CHECK( 9, 2 ) = 805 PARAM_CHECK( 8, 2 ) = 804 PARAM_CHECK( 7, 2 ) = 803 PARAM_CHECK( 6, 2 ) = 801 PARAM_CHECK( 5, 2 ) = 7 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PCDTTRSV( 'L', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PCDTTRSV( 'U', 'C', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( TRANS, 'C' ) ) THEN * CALL PCDTTRSV( 'L', 'C', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PCDTTRSV( 'U', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCDTTRS * END scalapack-1.8.0/SRC/pdtrrfs.f0000640000175000017500000007363010363532303015607 0ustar muammarmuammar SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LIWORK, LWORK, $ N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ), IWORK( * ) DOUBLE PRECISION A( * ), B( * ), BERR( * ), FERR( * ), $ WORK( * ), X( * ) * .. * * Purpose * ======= * * PDTRRFS provides error bounds and backward error estimates for the * solution to a system of linear equations with a triangular * coefficient matrix. * * The solution matrix X must be computed by PDTRTRS or some other * means before entering this routine. PDTRRFS does not do iterative * refinement because doing so cannot improve the backward error. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**T * sub( X ) = sub( B ) * (Conjugate transpose = Transpose) * * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_A,LOCc(JA+N-1) ). This * array contains the local pieces of the original triangular * distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * If DIAG = 'U', the diagonal elements of sub( A ) are also * not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The estimated forward error bounds for * each solution vector of sub( X ). If XTRUE is the true * solution, FERR bounds the magnitude of the largest entry * in (sub( X ) - XTRUE) divided by the magnitude of the * largest entry in sub( X ). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD( IA-1, MB_A ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices sub( X ) and sub( B ) should be * distributed the same way on the same processes. These conditions * ensure that sub( X ) and sub( B ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER CHARACTER TRANST INTEGER IAROW, IXBCOL, IXBROW, IXCOL, IXROW, ICOFFA, $ ICOFFB, ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, $ IIW, IOFFXB, IPB, IPR, IPV, IROFFA, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ DGEBR2D, DGEBS2D, INFOG2L, PCHK1MAT, $ PCHK2MAT, PDATRMV, PDAXPY, PDCOPY, $ PDLACON, PDTRMV, PDTRSV, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IX, JX, DESCX, 17, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3*NPMOD WORK( 1 ) = DBLE( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( NRHS.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -15 ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -16 ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1700+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1700+NB_ ) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1700+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -21 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 21 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 23 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, 0, IDUM1, IDUM2, $ INFO ) CALL PCHK2MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, N, 4, NRHS, 5, $ IX, JX, DESCX, 17, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRRFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = maximum number of nonzero entries in each row of A, plus 1 * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) )*DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 90 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PDCOPY( N, X, IX, JX+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PDAXPY( N, -ONE, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, $ JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the matrix * or vector Z. If the i-th component of the denominator is less * than SAFE2, then SAFE1 is added to the i-th components of the * numerator and denominator before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 20 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 20 CONTINUE END IF END IF * CALL PDATRMV( UPLO, TRANS, DIAG, N, ONE, A, IA, JA, DESCA, X, $ IX, JX+K, DESCX, 1, ONE, WORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 30 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix or * vector Z * NZ is the maximum number of nonzeros in any row of A, plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 40 CONTINUE END IF END IF * KASE = 0 50 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PDTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 60 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 60 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF CALL PDTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 50 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 80 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 90 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 180 J = JN + 1, JB + NRHS - 1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 170 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PDCOPY( N, X, IX, J+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PDAXPY( N, -ONE, B, IB, J+K, DESCB, 1, WORK( IPR ), $ IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 100 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 100 CONTINUE END IF END IF * CALL PDATRMV( UPLO, TRANS, DIAG, N, ONE, A, IA, JA, DESCA, $ X, IX, J+K, DESCX, 1, ONE, WORK( IPB ), IW, $ JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 110 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 110 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))/norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of A, * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIW - 1, IIW + NP - 2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 120 CONTINUE END IF END IF * KASE = 0 130 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PDTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 140 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 150 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 150 CONTINUE END IF END IF CALL PDTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 130 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 160 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 170 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDTRRFS * END scalapack-1.8.0/SRC/sstein2.f0000640000175000017500000002637510363532303015516 0ustar muammarmuammar* * SUBROUTINE SSTEIN2( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, LDZ, $ WORK, IWORK, IFAIL, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INFO, LDZ, M, N REAL ORFAC * .. * .. Array Arguments .. INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), $ IWORK( * ) REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * SSTEIN2 computes the eigenvectors of a real symmetric tridiagonal * matrix T corresponding to specified eigenvalues, using inverse * iteration. * * The maximum number of iterations allowed for each eigenvector is * specified by an internal parameter MAXITS (currently set to 5). * * Arguments * ========= * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input) REAL array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (input) REAL array, dimension (N) * The (n-1) subdiagonal elements of the tridiagonal matrix * T, in elements 1 to N-1. E(N) need not be set. * * M (input) INTEGER * The number of eigenvectors to be found. 0 <= M <= N. * * W (input) REAL array, dimension (N) * The first M elements of W contain the eigenvalues for * which eigenvectors are to be computed. The eigenvalues * should be grouped by split-off block and ordered from * smallest to largest within the block. ( The output array * W from SSTEBZ with ORDER = 'B' is expected here. ) * * IBLOCK (input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to * the first submatrix from the top, =2 if W(i) belongs to * the second submatrix, etc. ( The output array IBLOCK * from SSTEBZ is expected here. ) * * ISPLIT (input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to * ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 * through ISPLIT( 2 ), etc. * ( The output array ISPLIT from SSTEBZ is expected here. ) * * ORFAC (input) REAL * ORFAC specifies which eigenvectors should be * orthogonalized. Eigenvectors that correspond to eigenvalues * which are within ORFAC*||T|| of each other are to be * orthogonalized. * * Z (output) REAL array, dimension (LDZ, M) * The computed eigenvectors. The eigenvector associated * with the eigenvalue W(i) is stored in the i-th column of * Z. Any vector which fails to converge is set to its current * iterate after MAXITS iterations. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= max(1,N). * * WORK (workspace) REAL array, dimension (5*N) * * IWORK (workspace) INTEGER array, dimension (N) * * IFAIL (output) INTEGER array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after * MAXITS iterations, then their indices are stored in * array IFAIL. * * INFO (output) INTEGER * = 0: successful exit. * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, then i eigenvectors failed to converge * in MAXITS iterations. Their indices are stored in * array IFAIL. * * Internal Parameters * =================== * * MAXITS INTEGER, default = 5 * The maximum number of iterations performed. * * EXTRA INTEGER, default = 2 * The number of iterations performed after norm growth * criterion is satisfied, should be at least 1. * * ===================================================================== * * .. Parameters .. REAL ZERO, ONE, TEN, ODM1 PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1, $ ODM1 = 1.0E-1 ) INTEGER MAXITS, EXTRA PARAMETER ( MAXITS = 5, EXTRA = 2 ) * .. * .. Local Scalars .. INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, $ JBLK, JMAX, NBLK, NRMCHK REAL EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, SCL, $ SEP, STPCRT, TOL, XJ, XJM, ZTR * .. * .. Local Arrays .. INTEGER ISEED( 4 ) * .. * .. External Functions .. INTEGER ISAMAX REAL SASUM, SDOT, SLAMCH, SNRM2 EXTERNAL ISAMAX, SASUM, SDOT, SLAMCH, SNRM2 * .. * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 DO 10 I = 1, M IFAIL( I ) = 0 10 CONTINUE * IF( N.LT.0 ) THEN INFO = -1 ELSE IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( ORFAC.LT.ZERO ) THEN INFO = -8 ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN INFO = -10 ELSE DO 20 J = 2, M IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN INFO = -6 GO TO 30 END IF IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) $ THEN INFO = -5 GO TO 30 END IF 20 CONTINUE 30 CONTINUE END IF * IF( INFO.NE.0 ) THEN CALL XERBLA( 'SSTEIN2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) THEN RETURN ELSE IF( N.EQ.1 ) THEN Z( 1, 1 ) = ONE RETURN END IF * * Get machine constants. * EPS = SLAMCH( 'Precision' ) * * Initialize seed for random number generator SLARNV. * DO 40 I = 1, 4 ISEED( I ) = 1 40 CONTINUE * * Initialize pointers. * INDRV1 = 0 INDRV2 = INDRV1 + N INDRV3 = INDRV2 + N INDRV4 = INDRV3 + N INDRV5 = INDRV4 + N * * Compute eigenvectors of matrix blocks. * J1 = 1 DO 160 NBLK = 1, IBLOCK( M ) * * Find starting and ending indices of block nblk. * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) BLKSIZ = BN - B1 + 1 IF( BLKSIZ.EQ.1 ) $ GO TO 60 GPIND = J1 * * Compute reorthogonalization criterion and stopping criterion. * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 50 I = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ $ ABS( E( I ) ) ) 50 CONTINUE ORTOL = ORFAC*ONENRM * STPCRT = SQRT( ODM1 / BLKSIZ ) * * Loop through eigenvalues of block nblk. * 60 CONTINUE JBLK = 0 DO 150 J = J1, M IF( IBLOCK( J ).NE.NBLK ) THEN J1 = J GO TO 160 END IF JBLK = JBLK + 1 XJ = W( J ) * * Skip all the work if the block size is one. * IF( BLKSIZ.EQ.1 ) THEN WORK( INDRV1+1 ) = ONE GO TO 120 END IF * * If eigenvalues j and j-1 are too close, add a relatively * small perturbation. * IF( JBLK.GT.1 ) THEN EPS1 = ABS( EPS*XJ ) PERTOL = TEN*EPS1 SEP = XJ - XJM IF( SEP.LT.PERTOL ) $ XJ = XJM + PERTOL END IF * ITS = 0 NRMCHK = 0 * * Get random starting vector. * CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) * * Copy the matrix T so it won't be destroyed in factorization. * CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) * * Compute LU factors with partial pivoting ( PT = LU ) * TOL = ZERO CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, $ IINFO ) * * Update iteration count. * 70 CONTINUE ITS = ITS + 1 IF( ITS.GT.MAXITS ) $ GO TO 100 * * Normalize and scale the righthand side vector Pb. * SCL = BLKSIZ*ONENRM*MAX( EPS, $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / $ SASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) * * Solve the system LU = Pb. * CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, $ WORK( INDRV1+1 ), TOL, IINFO ) * * Reorthogonalize by modified Gram-Schmidt if eigenvalues are * close enough. * IF( JBLK.EQ.1 ) $ GO TO 90 IF( ABS( XJ-XJM ).GT.ORTOL ) $ GPIND = J * IF( GPIND.NE.J ) THEN DO 80 I = GPIND, J - 1 ZTR = -SDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), $ 1 ) CALL SAXPY( BLKSIZ, ZTR, Z( B1, I ), 1, $ WORK( INDRV1+1 ), 1 ) 80 CONTINUE END IF * * Check the infinity norm of the iterate. * 90 CONTINUE JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) NRM = ABS( WORK( INDRV1+JMAX ) ) * * Continue for additional iterations after norm reaches * stopping criterion. * IF( NRM.LT.STPCRT ) $ GO TO 70 NRMCHK = NRMCHK + 1 IF( NRMCHK.LT.EXTRA+1 ) $ GO TO 70 * GO TO 110 * * If stopping criterion was not satisfied, update info and * store eigenvector number in array ifail. * 100 CONTINUE INFO = INFO + 1 IFAIL( INFO ) = J * * Accept iterate as jth eigenvector. * 110 CONTINUE SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) IF( WORK( INDRV1+JMAX ).LT.ZERO ) $ SCL = -SCL CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) 120 CONTINUE DO 130 I = 1, N Z( I, J ) = ZERO 130 CONTINUE DO 140 I = 1, BLKSIZ Z( B1+I-1, J ) = WORK( INDRV1+I ) 140 CONTINUE * * Save the shift to check eigenvalue spacing at next * iteration. * XJM = XJ * 150 CONTINUE 160 CONTINUE * RETURN * * End of SSTEIN2 * END scalapack-1.8.0/SRC/pzlauu2.f0000640000175000017500000002042510363532303015517 0ustar muammarmuammar SUBROUTINE PZLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAUU2 computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the unblocked form of the algorithm, calling Level 2 BLAS. * No communication is performed by this routine, the matrix to operate * on should be strictly local to one process. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the matrix * sub( A ) is upper or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the order of the triangular factor U or L. N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA, $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZDSCAL, ZGEMV, $ ZLACGV * .. * .. External Functions .. LOGICAL LSAME COMPLEX*16 ZDOTC EXTERNAL LSAME, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, DBLE * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * DO 10 NA = N-1, 1, -1 AII = A( IDIAG ) ICURR = IDIAG + LDA A( IDIAG ) = AII*AII + DBLE( ZDOTC( NA, A( ICURR ), LDA, $ A( ICURR ), LDA ) ) CALL ZLACGV( NA, A( ICURR ), LDA ) CALL ZGEMV( 'No transpose', N-NA-1, NA, ONE, $ A( IOFFA+LDA ), LDA, A( ICURR ), LDA, $ DCMPLX( AII ), A( IOFFA ), 1 ) CALL ZLACGV( NA, A( ICURR ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE AII = A( IDIAG ) CALL ZDSCAL( N, AII, A( IOFFA ), 1 ) * ELSE * * Compute the product L' * L. * DO 20 NA = 1, N-1 AII = A( IDIAG ) ICURR = IDIAG + 1 A( IDIAG ) = AII*AII + DBLE( ZDOTC( N-NA, A( ICURR ), 1, $ A( ICURR ), 1 ) ) CALL ZLACGV( NA-1, A( IOFFA ), LDA ) CALL ZGEMV( 'Conjugate transpose', N-NA, NA-1, ONE, $ A( IOFFA+1 ), LDA, A( ICURR ), 1, $ DCMPLX( AII ), A( IOFFA ), LDA ) CALL ZLACGV( NA-1, A( IOFFA ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 20 CONTINUE AII = A( IDIAG ) CALL ZDSCAL( N, AII, A( IOFFA ), LDA ) * END IF * END IF * RETURN * * End of PZLAUU2 * END scalapack-1.8.0/SRC/psormql.f0000640000175000017500000004217510363532303015620 0ustar muammarmuammar SUBROUTINE PSORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMQL overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PSGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PSGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, IPW, $ IROFFA, IROFFC, J, J1, J2, J3, JB, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSLARFB, $ PSLARFT, PSORM2L, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+K-1 ) + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN JB = J1 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PSORM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PSLARFT( 'Backward', 'Columnwise', NQ-K+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+j+jb-ja-1,jc:jc+n-1) * MI = M - K + J + JB - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+j+jb-ja-1) * NI = N - K + J + JB - JA END IF * * Apply H or H' * CALL PSLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, $ JB, A, IA, J, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN JB = J2 - JA IF( LEFT ) THEN MI = M - K + JB ELSE NI = N - K + JB END IF CALL PSORM2L( SIDE, TRANS, MI, NI, JB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMQL * END scalapack-1.8.0/SRC/pzung2r.f0000640000175000017500000002600110363532303015520 0ustar muammarmuammar SUBROUTINE PZUNG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNG2R generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PZGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PZGEQRF in the K columns of its array * argument A(IA:*,JA:JA+K-1). On exit, this array contains * the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, KQ, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX*16 TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, PZLARF, $ PZLASET, PZSCAL * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja+k:ja+n-1 to columns of the unit matrix * CALL PZLASET( 'All', K, N-K, ZERO, ZERO, A, IA, JA+K, DESCA ) CALL PZLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, DESCA ) * TAUJ = ZERO KQ = MAX( 1, NUMROC( JA+K-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) DO 10 J = JA+K-1, JA, -1 * * Apply H(j) to A(ia+j-ja:ia+m-1,j:ja+n-1) from the left * IF( J.LT.JA+N-1 ) THEN CALL PZELSET( A, IA+J-JA, J, DESCA, ONE ) CALL PZLARF( 'Left', M-J+JA, JA+N-J-1, A, IA+J-JA, J, DESCA, $ 1, TAU, A, IA+J-JA, J+1, DESCA, WORK ) END IF * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, KQ ) ) IF( J-JA.LT.M-1 ) $ CALL PZSCAL( M-J+JA-1, -TAUJ, A, IA+J-JA+1, J, DESCA, 1 ) CALL PZELSET( A, IA+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia:ia+j-ja-1,j) to zero * CALL PZLASET( 'All', J-JA, 1, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNG2R * END scalapack-1.8.0/SRC/pzdttrf.f0000640000175000017500000010751110363532303015614 0ustar muammarmuammar SUBROUTINE PZDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 AF( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDTTRF computes a LU factorization * of an N-by-N complex tridiagonal * diagonally dominant-like distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PZDTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * DL (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDTTRF and this is stored in AF. If a linear system * is to be solved using PZDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZAXPY, $ ZGEMM, ZGERV2D, ZGESD2D, ZLACPY, ZLATCPY, $ ZPBTRF, ZPOTRF, ZSYRK, ZTBTRS, ZTRMM, ZTRRV2D, $ ZTRSD2D, ZTRSM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX*16 ZDOTC EXTERNAL LSAME, NUMROC, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LWORK .LT. -1) THEN INFO = -10 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -1 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, $ 'PZDTTRF, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDTTRF, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check auxiliary storage size * LAF_MIN = (12*NPCOL+3*NB) * IF( LAF .LT. LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, $ 'PZDTTRF: auxiliary storage error ', $ -INFO ) RETURN ENDIF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, $ 'PZDTTRF: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA(5) PARAM_CHECK( 6, 1 ) = DESCA(4) PARAM_CHECK( 5, 1 ) = DESCA(3) PARAM_CHECK( 4, 1 ) = DESCA(1) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 605 PARAM_CHECK( 6, 2 ) = 604 PARAM_CHECK( 5, 2 ) = 603 PARAM_CHECK( 4, 2 ) = 601 PARAM_CHECK( 3, 2 ) = 5 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * Zero out space for fillin * DO 10 I=1, LAF_MIN AF( I ) = CZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF ( MYCOL .LT. NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL ZTRSD2D( ICTXT, 'U', 'N', 1, 1, $ DU( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * ENDIF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL ZDTTRF( ODD_SIZE, DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL+1 GOTO 1500 ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * * Apply factorization to lower connection block BL_i * Apply factorization to upper connection block BU_i * * * Perform the triangular solve {U_i}^C{BL'}_i^C = {BL_i}^C * * DL( PART_OFFSET+ODD_SIZE+1 ) = $ ( DL( PART_OFFSET+ODD_SIZE+1 ) ) $ / ( D( PART_OFFSET+ODD_SIZE ) ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 )- $ DL( PART_OFFSET+ODD_SIZE+1 )*DU( PART_OFFSET+ODD_SIZE ) * ENDIF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 1500 CONTINUE * If the processor could not locally factor, it jumps here. * IF ( MYCOL .NE. 0 ) THEN * * Move entry that causes spike to auxiliary storage * AF( WORK_U+1 ) = ( DL( PART_OFFSET+1 ) ) * IF (INFO.EQ.0) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL ZDTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( WORK_U+1 ), ODD_SIZE, $ INFO ) * * * Calculate the "spike" fillin, ${U_i}^C {{GL}_i}^C = {DU_i}^C$ * CALL ZTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * AF( 1 ) = DCONJG( AF( 1 ) ) * CALL ZDTTRSV( 'U', 'C', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), $ AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * AF( ODD_SIZE+3 ) = -CONE * $ ZDOTC( ODD_SIZE, AF( 1 ), 1, AF( WORK_U+1 ), 1 ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF ( MYCOL .LT. NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in conjugate transpose form. * AF( ODD_SIZE+1 ) = -CONE $ * DCONJG( DL( PART_OFFSET+ODD_SIZE+1 ) $ * AF( WORK_U+ODD_SIZE ) ) * * AF(WORK_U+(ODD_SIZE)+1 ) = -CONE $ * DU( PART_OFFSET+ODD_SIZE ) $ * DCONJG( AF( ODD_SIZE ) ) * ENDIF * ENDIF * End of "if ( MYCOL .ne. 0 )..." * ENDIF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * IF ( INFO.NE.0 ) THEN GOTO 1000 ENDIF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( (MOD( MYCOL+1, 2 ) .EQ. 0) .AND. ( MYCOL .GT. 0 ) ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * ENDIF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = $ DCMPLX( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, $ MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+AF( ODD_SIZE+3 ) * ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 )+WORK( 1 ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ) .EQ. CZERO ) THEN INFO = NPCOL + MYCOL ENDIF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST .EQ. 1 )THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( WORK_U+ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * AF( ODD_SIZE+3 ) = AF( WORK_U+ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST/2 ENDIF * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO .EQ. 0 ) THEN * * * Modify lower off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) $ / DCONJG( AF( ODD_SIZE+2 ) ) * ENDIF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*DCONJG( AF( ODD_SIZE+1 ) )* $ AF( WORK_U+(ODD_SIZE)+1 ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST .GT. 1)THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL ZGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST/2 ) * ENDIF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+3 ) $ / ( AF( ODD_SIZE+2 ) ) * * ENDIF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ *DCONJG( AF( WORK_U+ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) .EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST ENDIF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( WORK_U+ODD_SIZE+3 ) $ * AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 ) $ * AF( WORK_U+ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * ENDIF * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * * 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, $ -1, 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) ENDIF * * RETURN * * End of PZDTTRF * END scalapack-1.8.0/SRC/pdgbsv.f0000640000175000017500000004542310363532303015407 0ustar muammarmuammar SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) DOUBLE PRECISION A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDGBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination with pivoting * is used to factor a reordering * of the matrix into P L U. * * See PDGBTRF and PDGBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * +max(NRHS*(NB+2*bwl+4*bwu), 1) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PDGBTRF, PDGBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDGBTRF and PDGBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PDGBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Factor the matrix * CALL PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PDGBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) RETURN END IF * RETURN * * End of PDGBSV * END scalapack-1.8.0/SRC/pzlasmsub.f0000640000175000017500000003163710602576752016160 0ustar muammarmuammar SUBROUTINE PZLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, K, L, LWORK DOUBLE PRECISION SMLNUM * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), BUF( * ) * .. * * Purpose * ======= * * PZLASMSUB looks for a small subdiagonal element from the bottom * of the matrix that it can safely set to zero. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) COMPLEX*16 array, dimension (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * K (global output) INTEGER * On exit, this yields the bottom portion of the unreduced * submatrix. This will satisfy: L <= M <= I-1. * * SMLNUM (global input) DOUBLE PRECISION * On entry, a "small number" for the given matrix. * Unchanged on exit. * * BUF (local output) COMPLEX*16 array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * This code is basically a parallelization of the following snip * of LAPACK code from ZLAHQR: * * Look for a single small subdiagonal element. * * DO 20 K = I, L + 1, -1 * TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) * IF( TST1.EQ.ZERO ) * $ TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) * IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) * $ GO TO 30 * 20 CONTINUE * 30 CONTINUE * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2, $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC, $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA, $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, $ RIGHT, UP DOUBLE PRECISION TST1, ULP COMPLEX*16 CDUM, H10, H11, H22 * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, INFOG1L, INFOG2L, $ ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MAX, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS * ISTR1 = 0 ISTR2 = ( ( I-L ) / HBL ) IF( ISTR2*HBL.LT.( I-L ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.2*ISTR2 ) THEN * * Error! * RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY * UP THE TRIDIAGONAL * IBUF1 = 0 IBUF2 = 0 IRCV1 = 0 IRCV2 = 0 DO 10 K = I, L + 1, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * ADD UP THE RECEIVES * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT. $ 1 ) ) ) THEN * * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN * * WE MUST RECEIVE H(K ,K-1) FROM LEFT * IRCV2 = IRCV2 + 1 END IF END IF * * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND * IF( IBUF1.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW, $ RIGHT ) END IF * * RECEIVE APPROPRIATE DATA IF THERE IS ANY * IF( IRCV1.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW, $ LEFT ) END IF * * START MAIN LOOP * IBUF1 = 0 IBUF2 = 0 CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT. * * Start loop for subdiagonal search * DO 40 K = I, L + 1, -1 IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN * * Grab information from WORK array * IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H11 = BUF( ISTR1+IBUF1 ) ELSE H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF2 = IBUF2 + 1 H10 = BUF( ISTR2+IBUF2 ) ELSE H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF ELSE * * Information is local * H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF H22 = A( ( ICOL1-1 )*LDA+IROW1 ) TST1 = CABS1( H11 ) + CABS1( H22 ) IF( TST1.EQ.ZERO ) THEN * * FIND SOME NORM OF THE LOCAL H(L:I,L:I) * CALL INFOG1L( L, HBL, NPROW, MYROW, 0, ITMP1, III ) IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW ) CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ITMP2, III ) ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL ) DO 30 III = ITMP1, IROW2 DO 20 JJJ = ITMP2, ICOL2 TST1 = TST1 + CABS1( A( ( JJJ-1 )*LDA+III ) ) 20 CONTINUE 30 CONTINUE END IF IF( CABS1( H10 ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 50 IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN II = MOD( II+NPROW-1, NPROW ) JJ = MOD( JJ+NPCOL-1, NPCOL ) CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) END IF 40 CONTINUE 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) RETURN * * End of PZLASMSUB * END scalapack-1.8.0/SRC/psgetf2.f0000640000175000017500000002263210363532303015471 0ustar muammarmuammar SUBROUTINE PSGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ) * .. * * Purpose * ======= * * PSGETF2 computes an LU factorization of a general M-by-N * distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using * partial pivoting with row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal * elements (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). * * This is the right-looking Parallel Level 2 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). * NB_A-MOD(JA-1, NB_A) >= N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ). On exit, this array contains * the local pieces of the factors L and U from the factoriza- * tion sub( A ) = P*L*U; the unit diagonal elements of L are * not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IROFF, J, $ JJA, MN, MYCOL, MYROW, NPCOL, NPROW REAL GMAX * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PSAMAX, PSGER, $ PSSCAL, PSSWAP, PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * MN = MIN( M, N ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) * IF( MYCOL.EQ.IACOL ) THEN DO 10 J = JA, JA+MN-1 I = IA + J - JA * * Find pivot and test for singularity. * CALL PSAMAX( M-J+JA, GMAX, IPIV( IIA+J-JA ), A, I, J, $ DESCA, 1 ) IF( GMAX.NE.ZERO ) THEN * * Apply the row interchanges to columns JA:JA+N-1 * CALL PSSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, $ IPIV( IIA+J-JA ), JA, DESCA, DESCA( M_ ) ) * * Compute elements I+1:IA+M-1 of J-th column. * IF( J-JA+1.LT.M ) $ CALL PSSCAL( M-J+JA-1, ONE / GMAX, A, I+1, J, $ DESCA, 1 ) ELSE IF( INFO.EQ.0 ) THEN INFO = J - JA + 1 END IF * * Update trailing submatrix * IF( J-JA+1.LT.MN ) THEN CALL PSGER( M-J+JA-1, N-J+JA-1, -ONE, A, I+1, J, DESCA, $ 1, A, I, J+1, DESCA, DESCA( M_ ), A, I+1, $ J+1, DESCA ) END IF 10 CONTINUE * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MN, 1, IPIV( IIA ), $ MN, MYROW, IACOL ) * END IF * RETURN * * End of PSGETF2 * END scalapack-1.8.0/SRC/pcgeqlf.f0000640000175000017500000002733310363532303015543 0ustar muammarmuammar SUBROUTINE PCGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQLF computes a QL factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, IINFO, ICTXT, IPW, J, JB, JL, JN, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCGEQL2, $ PCLARFB, PCLARFT, PB_TOPGET, PB_TOPSET, $ PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEQLF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( (JA+N-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( JL.GE.JN+1 ) THEN * * Use blocked code initially * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Compute the QL factorization of the current block * A(ia:ia+m-n+j+jb-ja-1,j:j+jb-1) * CALL PCGEQL2( M-N+J+JB-JA, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( J.GT.JA ) THEN * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PCLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the * left * CALL PCLARFB( 'Left', 'Conjugate transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = M - N + JN - JA + 1 NU = JN - JA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PCGEQL2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEQLF * END scalapack-1.8.0/SRC/pssyntrd.f0000640000175000017500000005043310363532303016005 0ustar muammarmuammar SUBROUTINE PSSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * Bugs * ==== * * * Support for UPLO='U' is limited to calling the old, slow, PSSYTRD * code. * * * Purpose * ======= * * PSSYNTRD is a prototype version of PSSYTRD which uses tailored * codes (either the serial, SSYTRD, or the parallel code, PSSYTTRD) * when the workspace provided by the user is adequate. * * * PSSYNTRD reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Features * ======== * * PSSYNTRD is faster than PSSYTRD on almost all matrices, * particularly small ones (i.e. N < 500 * sqrt(P) ), provided that * enough workspace is available to use the tailored codes. * * The tailored codes provide performance that is essentially * independent of the input data layout. * * The tailored codes place no restrictions on IA, JA, MB or NB. * At present, IA, JA, MB and NB are restricted to those values allowed * by PSSYTRD to keep the interface simple. These restrictions are * documented below. (Search for "restrictions".) * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * For optimal performance, greater workspace is needed, i.e. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS + 4 ) * NPS * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) * SQNPC = INT( SQRT( REAL( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER ANB, CTXTB, I, IACOL, IAROW, ICOFFA, ICTXT, $ IINFO, INDB, INDD, INDE, INDTAU, INDW, IPW, $ IROFFA, J, JB, JX, K, KK, LLWORK, LWMIN, MINSZ, $ MYCOL, MYCOLB, MYROW, MYROWB, NB, NP, NPCOL, $ NPCOLB, NPROW, NPROWB, NPS, NQ, ONEPMIN, SQNPC, $ TTLWMIN * .. * .. Local Arrays .. INTEGER DESCB( DLEN_ ), DESCW( DLEN_ ), IDUM1( 2 ), $ IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ BLACS_GRIDINIT, CHK1MAT, DESCSET, IGAMN2D, $ PCHK1MAT, PSELSET, PSLAMR1D, PSLATRD, PSSYR2K, $ PSSYTD2, PSSYTTRD, PSTRMR2D, PB_TOPGET, $ PB_TOPSET, PXERBLA, SSYTRD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC, PJLAENV EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PJLAENV * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, INT, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( ( NP+1 )*NB, 3*NB ) ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) MINSZ = PJLAENV( ICTXT, 5, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( REAL( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) TTLWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS * WORK( 1 ) = REAL( TTLWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 * * The following two restrictions are not necessary provided * that either of the tailored codes are used. * ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 600+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYNTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * ONEPMIN = N*N + 3*N + 1 LLWORK = LWORK CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LLWORK, 1, 1, -1, -1, -1, $ -1 ) * * * * Use the serial, LAPACK, code: STRD on small matrices if we * we have enough space. * NPROWB = 0 IF( ( N.LT.MINSZ .OR. SQNPC.EQ.1 ) .AND. LLWORK.GE.ONEPMIN .AND. $ .NOT.UPPER ) THEN NPROWB = 1 NPS = N ELSE IF( LLWORK.GE.TTLWMIN .AND. .NOT.UPPER ) THEN NPROWB = SQNPC END IF END IF * IF( NPROWB.GE.1 ) THEN NPCOLB = NPROWB SQNPC = NPROWB INDB = 1 INDD = INDB + NPS*NPS INDE = INDD + NPS INDTAU = INDE + NPS INDW = INDTAU + NPS LLWORK = LLWORK - INDW + 1 * CALL BLACS_GET( ICTXT, 10, CTXTB ) CALL BLACS_GRIDINIT( CTXTB, 'Row major', SQNPC, SQNPC ) CALL BLACS_GRIDINFO( CTXTB, NPROWB, NPCOLB, MYROWB, MYCOLB ) CALL DESCSET( DESCB, N, N, 1, 1, 0, 0, CTXTB, NPS ) * CALL PSTRMR2D( UPLO, 'N', N, N, A, IA, JA, DESCA, WORK( INDB ), $ 1, 1, DESCB, ICTXT ) * * * Only those processors in context CTXTB are needed for a while * IF( NPROWB.GT.0 ) THEN * IF( NPROWB.EQ.1 ) THEN CALL SSYTRD( UPLO, N, WORK( INDB ), NPS, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDW ), $ LLWORK, INFO ) ELSE * CALL PSSYTTRD( 'L', N, WORK( INDB ), 1, 1, DESCB, $ WORK( INDD ), WORK( INDE ), $ WORK( INDTAU ), WORK( INDW ), LLWORK, $ INFO ) * END IF END IF * * All processors participate in moving the data back to the * way that PSSYNTRD expects it. * CALL PSLAMR1D( N-1, WORK( INDE ), 1, 1, DESCB, E, 1, JA, $ DESCA ) * CALL PSLAMR1D( N, WORK( INDD ), 1, 1, DESCB, D, 1, JA, DESCA ) * CALL PSLAMR1D( N, WORK( INDTAU ), 1, 1, DESCB, TAU, 1, JA, $ DESCA ) * CALL PSTRMR2D( UPLO, 'N', N, N, WORK( INDB ), 1, 1, DESCB, A, $ IA, JA, DESCA, ICTXT ) * IF( MYROWB.GE.0 ) $ CALL BLACS_GRIDEXIT( CTXTB ) * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP*NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, $ INDXG2P( JA+N-KK, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ), ICTXT, MAX( 1, NP ) ) * DO 10 K = N - KK + 1, NB + 1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PSLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, $ TAU, WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PSSYR2K( UPLO, 'No transpose', K-1, JB, -ONE, A, IA, $ J, DESCA, WORK, 1, 1, DESCW, ONE, A, IA, $ JA, DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PSELSET( A, I-1, J, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+NPCOL-1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PSSYTD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N - NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PSLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PSSYR2K( UPLO, 'No transpose', N-K-NB+1, NB, -ONE, $ A, I+NB, J, DESCA, WORK, K+NB, 1, DESCW, $ ONE, A, I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PSELSET( A, I+NB, J+NB-1, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PSSYTD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, TAU, $ WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * END IF * WORK( 1 ) = REAL( TTLWMIN ) * RETURN * * End of PSSYNTRD * END scalapack-1.8.0/SRC/pcunmrz.f0000640000175000017500000004333610363532303015621 0ustar muammarmuammar SUBROUTINE PCUNMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMRZ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JAA, $ JCC, LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCLARZB, $ PCLARZT, PCUNMR3, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IDUM1( 4 ) = L IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMRZ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PCUNMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PCLARZT( 'Backward', 'Rowwise', L, IB, A, I, JAA, DESCA, $ TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PCLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, IB, $ L, A, I, JAA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PCUNMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMRZ * END scalapack-1.8.0/SRC/psgetrs.f0000640000175000017500000002607010363532303015606 0ustar muammarmuammar SUBROUTINE PSGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSGETRS solves a system of distributed linear equations * * op( sub( A ) ) * X = sub( B ) * * with a general N-by-N distributed matrix sub( A ) using the LU * factorization computed by PSGETRF. * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), op( A ) = A or A**T and * sub( B ) denotes B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block data decomposition ( MB_A=NB_A ). * * Arguments * ========= * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': sub( A ) * X = sub( B ) (No transpose) * = 'T': sub( A )**T * X = sub( B ) (Transpose) * = 'C': sub( A )**T * X = sub( B ) (Transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) REAL pointer into the local * memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the factors * L and U from the factorization sub( A ) = P*L*U; the unit * diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand sides * sub( B ). On exit, sub( B ) is overwritten by the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PSLAPIV, PSTRSM, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE NOTRAN = LSAME( TRANS, 'N' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1200+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1200+CTXT_) END IF END IF IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, 3, $ IB, JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGETRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ DESCA( MB_ ) + NUMROC( DESCA( M_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) ) * IF( NOTRAN ) THEN * * Solve sub( A ) * X = sub( B ). * * Apply row interchanges to the right hand sides. * CALL PSLAPIV( 'Forward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A )' * X = sub( B ). * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Apply row interchanges to the solution vectors. * CALL PSLAPIV( 'Backward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * END IF * RETURN * * End of PSGETRS * END scalapack-1.8.0/SRC/psorgr2.f0000640000175000017500000002560310363532303015516 0ustar muammarmuammar SUBROUTINE PSORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGR2 generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1) H(2) . . . H(k) * * as returned by PSGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PSGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, LWMIN, MP, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 REAL TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLASET, PSSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia:ia+m-k-1 to rows of the unit matrix * CALL PSLASET( 'All', M-K, N-M, ZERO, ZERO, A, IA, JA, DESCA ) CALL PSLASET( 'All', M-K, M, ZERO, ONE, A, IA, JA+N-M, DESCA ) * END IF * TAUI = ZERO MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+M-K, IA+M-1 * * Apply H(i) to A(ia:i,ja:ja+n-k+i-1) from the right * CALL PSELSET( A, I, JA+N-M+I-IA, DESCA, ONE ) CALL PSLARF( 'Right', I-IA, I-IA+N-M+1, A, I, JA, DESCA, $ DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, MP ) ) CALL PSSCAL( I-IA+N-M, -TAUI, A, I, JA, DESCA, DESCA( M_ ) ) CALL PSELSET( A, I, JA+N-M+I-IA, DESCA, ONE-TAUI ) * * Set A(i,ja+n-m+i-ia+1:ja+n-1) to zero * CALL PSLASET( 'All', 1, IA+M-1-I, ZERO, ZERO, A, I, $ JA+N-M+I-IA+1, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGR2 * END scalapack-1.8.0/SRC/pddbtrsv.f0000640000175000017500000014460010363532303015747 0ustar muammarmuammar SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDDBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Gaussian elimination code PD@(dom_pre)BTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PDDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDBTRF and this is stored in AF. If a linear system * is to be solved using PDDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MAX_BW, MBW2, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 18, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ DGEMM, DGERV2D, DGESD2D, DLACPY, DMATADD, $ DTBTRS, DTRMM, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 9*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 12*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 12*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 12*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 12*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX( BWL, BWU ) MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 9*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -4 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -5 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 9*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 9*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 12*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 12*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -6 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -8 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 9*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PDDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = MAX( BWL, BWU )*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PDDBTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 18, 1 ) = DESCB( 5 ) PARAM_CHECK( 17, 1 ) = DESCB( 4 ) PARAM_CHECK( 16, 1 ) = DESCB( 3 ) PARAM_CHECK( 15, 1 ) = DESCB( 2 ) PARAM_CHECK( 14, 1 ) = DESCB( 1 ) PARAM_CHECK( 13, 1 ) = IB PARAM_CHECK( 12, 1 ) = DESCA( 5 ) PARAM_CHECK( 11, 1 ) = DESCA( 4 ) PARAM_CHECK( 10, 1 ) = DESCA( 3 ) PARAM_CHECK( 9, 1 ) = DESCA( 1 ) PARAM_CHECK( 8, 1 ) = JA PARAM_CHECK( 7, 1 ) = NRHS PARAM_CHECK( 6, 1 ) = BWU PARAM_CHECK( 5, 1 ) = BWL PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 18, 2 ) = 1205 PARAM_CHECK( 17, 2 ) = 1204 PARAM_CHECK( 16, 2 ) = 1203 PARAM_CHECK( 15, 2 ) = 1202 PARAM_CHECK( 14, 2 ) = 1201 PARAM_CHECK( 13, 2 ) = 11 PARAM_CHECK( 12, 2 ) = 905 PARAM_CHECK( 11, 2 ) = 904 PARAM_CHECK( 10, 2 ) = 903 PARAM_CHECK( 9, 2 ) = 901 PARAM_CHECK( 8, 2 ) = 8 PARAM_CHECK( 7, 2 ) = 6 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 18, PARAM_CHECK, 18, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 200 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW END IF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'N', 'U', ODD_SIZE, BWL, NRHS, $ A( OFST+1+BWU ), LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BWL, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWL+1 ), LLDB, $ WORK( 1 ), MAX_BW ) * CALL DTRMM( 'L', 'U', 'N', 'N', BWL, NRHS, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, WORK( 1 ), MAX_BW ) * CALL DMATADD( BWL, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Clear garbage out of workspace block * DO 10 IDUM1 = 1, WORK_SIZE_MIN WORK( IDUM1 ) = 0.0 10 CONTINUE * * IF( MYCOL.NE.0 ) THEN * * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'N', 'N', BWU, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ BWU, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+MAX_BW-BWU ), MAX_BW ) * END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 40 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 20 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 30 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 20 30 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 40 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 90 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 50 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 60 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 50 60 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( ( ODD_SIZE )*BWU+1 ), MAX_BW, WORK( 1 ), $ MAX_BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTBTRS( 'L', 'T', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * ***Modification Loop ******* * 70 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 80 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 70 80 CONTINUE * [End of GOTO Loop] * 90 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'T', 'N', ODD_SIZE, NRHS, BWU, -ONE, AF( 1 ), $ BWU, WORK( 1+MAX_BW-BWU ), MAX_BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BWL, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL DTRMM( 'L', 'U', 'T', 'N', BWL, NRHS, -ONE, $ A( ( OFST+( BWL+BWU+1 )+( ODD_SIZE-BWL )* $ LLDA ) ), LLDA-1, WORK( 1+MAX_BW-BWL ), $ MAX_BW ) * CALL DMATADD( BWL, NRHS, ONE, WORK( 1+MAX_BW-BWL ), $ MAX_BW, ONE, B( PART_OFFSET+ODD_SIZE-BWL+ $ 1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'T', 'U', ODD_SIZE, BWL, NRHS, $ A( OFST+1+BWU ), LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'T', 'N', ODD_SIZE, BWU, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BWU, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWU+1 ), LLDB, $ WORK( 1 ), MAX_BW ) * CALL DTRMM( 'L', 'L', 'T', 'N', BWU, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1 ), MAX_BW ) * CALL DMATADD( BWU, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Clear garbage out of workspace block * DO 100 IDUM1 = 1, WORK_SIZE_MIN WORK( IDUM1 ) = 0.0 100 CONTINUE * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'N', 'N', BWL, NRHS, ODD_SIZE, -ONE, $ AF( WORK_U+1 ), BWL, B( PART_OFFSET+1 ), $ LLDB, ZERO, WORK( 1+MAX_BW-BWL ), MAX_BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 130 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 110 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 120 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DMATADD( MAX_BW, NRHS, ONE, WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 110 120 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTBTRS( 'U', 'T', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 130 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 180 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 140 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 140 150 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -ONE, $ AF( WORK_U+( ODD_SIZE )*BWL+1 ), MAX_BW, $ WORK( 1 ), MAX_BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTBTRS( 'U', 'N', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), $ NRHS, AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, $ MAX_BW-1 ) ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 190 END IF * * * ***Modification Loop ******* * 160 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 170 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 160 170 CONTINUE * [End of GOTO Loop] * 180 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, MAX_BW, NRHS, WORK( 1 ), MAX_BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'T', 'N', ODD_SIZE, NRHS, BWL, -ONE, $ AF( WORK_U+1 ), BWL, WORK( 1+MAX_BW-BWL ), $ MAX_BW, ONE, B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BWU, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL DTRMM( 'L', 'L', 'N', 'N', BWU, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL DMATADD( BWU, NRHS, ONE, WORK( 1+MAX_BW-BWU ), $ MAX_BW+BWL, ONE, B( PART_OFFSET+ODD_SIZE- $ BWU+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'N', 'N', ODD_SIZE, BWU, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 190 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 200 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDDBTRSV * END scalapack-1.8.0/SRC/pzhetrd.f0000640000175000017500000004040410363532303015574 0ustar muammarmuammar SUBROUTINE PZHETRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZHETRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IINFO, IPW, $ IROFFA, J, JB, JX, K, KK, LWMIN, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZHER2K, $ PZHETD2, PZLATRD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( (NP+1)*NB, 3*NB ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHETRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP * NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, INDXG2P( JA+N-KK, $ NB, MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, NP ) ) * DO 10 K = N-KK+1, NB+1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PZLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, TAU, $ WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PZHER2K( UPLO, 'No transpose', K-1, JB, -CONE, A, IA, $ J, DESCA, WORK, 1, 1, DESCW, ONE, A, IA, JA, $ DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PZELSET( A, I-1, J, DESCA, DCMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PZHETD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PZLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PZHER2K( UPLO, 'No transpose', N-K-NB+1, NB, -CONE, A, $ I+NB, J, DESCA, WORK, K+NB, 1, DESCW, ONE, A, $ I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PZELSET( A, I+NB, J+NB-1, DESCA, DCMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PZHETD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZHETRD * END scalapack-1.8.0/SRC/psorm2r.f0000640000175000017500000004055110363532303015523 0ustar muammarmuammar SUBROUTINE PSORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORM2R overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PSGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 REAL AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PSELSET, PSELSET2, PSLARF, PB_TOPGET, $ PB_TOPSET, PXERBLA, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D, SSCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL SSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL SSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL SGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL SGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL SSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * DO 10 J = J1, J2, J3 IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H(j) or H(j)' * CALL PSELSET2( AJJ, A, IA+J-JA, J, DESCA, ONE ) CALL PSLARF( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, C, $ ICC, JCC, DESCC, WORK ) CALL PSELSET( A, IA+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORM2R * END scalapack-1.8.0/SRC/pcgecon.f0000640000175000017500000003743110363532303015540 0ustar muammarmuammar SUBROUTINE PCGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, INFO, JA, LRWORK, LWORK, N REAL ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL RWORK( * ) COMPLEX A( * ), WORK( * ) * .. * * Purpose * ======= * * PCGECON estimates the reciprocal of the condition number of a general * distributed complex matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm, using the LU factorization computed by * PCGETRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm * = 'I': Infinity-norm * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, * this array contains the local pieces of the factors L and U * from the factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U; the * unit diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) REAL * If NORM = '1' or 'O', the 1-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * If NORM = 'I', the infinity-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*CEIL(NPROW-1,NPCOL),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(NPCOL-1,NPROW)) ). * * LOCr and LOCc values can be computed using the ScaLAPACK * tool function NUMROC; NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 2*LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, ONENRM CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, JX, $ KASE, KASE1, LRWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPMOD, NPROW, NQ, NQMOD REAL AINVNM, SCALE, SL, SMLNUM, SU COMPLEX WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, CGEBR2D, $ CGEBS2D, INFOG2L, PCAMAX, PCHK1MAT, $ PCLATRS, PCLACON, PCSRSCL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, ICHAR, MAX, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600 + CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LRWMIN = 2*NQMOD RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGECON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPNL = 1 IPNU = IPNL + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the norm of inv(A). * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 * 10 CONTINUE CALL PCLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, RWORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), IX, $ JX, DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PCLATRS( 'Lower', 'Conjugate transpose', 'Unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SL, RWORK( IPNL ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Divide X by 1/(SL*SU) if doing so will not cause overflow. * SCALE = SL*SU NORMIN = 'Y' IF( SCALE.NE.ONE ) THEN CALL PCAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL CGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL CGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PCSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PCGECON * END scalapack-1.8.0/SRC/psorglq.f0000640000175000017500000003062210363532303015604 0ustar muammarmuammar SUBROUTINE PSORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGLQ generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k) . . . H(2) H(1) * * as returned by PSGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PSGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ J, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLARFB, $ PSLARFT, PSLASET, PSORGL2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IL = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * CALL PSLASET( 'All', IA+M-IL, IL-IA, ZERO, ZERO, A, IL, JA, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PSORGL2( IA+M-IL, N-IL+IA, IA+K-IL, A, IL, JA+IL-IA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of rows to loop over ? * IF( IL.GT.IN+1 ) THEN * * Use blocked code * DO 10 I = IL-DESCA( MB_ ), IN+1, -DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+M-I ) J = JA + I - IA * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PSLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-I+IA, IB, A, I, J, DESCA, $ WORK, A, I+IB, J, DESCA, WORK( IPW ) ) END IF * * Apply H' to columns j:ja+n-1 of current block * CALL PSORGL2( IB, N-I+IA, IB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set columns ia:i-1 of current block to zero * CALL PSLASET( 'All', IB, I-IA, ZERO, ZERO, A, I, JA, DESCA ) 10 CONTINUE * END IF * * Handle first block separately * IF( IL.GT.IA ) THEN * IB = IN - IA + 1 * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PSLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PSLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', M-IB, $ N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, JA, $ DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-1 of current block * CALL PSORGL2( IB, N, IB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGLQ * END scalapack-1.8.0/SRC/pclacpy.f0000640000175000017500000002236210363532303015552 0ustar muammarmuammar SUBROUTINE PCLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCLACPY copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PCLACPY * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBB, IBLK, IN, ITMP, J, JAA, JBB, $ JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PCLACP2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PCLACP2( UPLO, M, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN CALL PCLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB + ITMP JBB = JB + ITMP JAA = JA + ITMP CALL PCLACP2( UPLO, IBLK, N-ITMP, A, I, JAA, DESCA, $ B, IBB, JBB, DESCB ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN CALL PCLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IBB = IB + JTMP JBB = JB + JTMP IAA = IA + JTMP CALL PCLACP2( UPLO, M-JTMP, JBLK, A, IAA, J, DESCA, $ B, IBB, JBB, DESCB ) 20 CONTINUE ELSE IF( M.LE.N ) THEN CALL PCLACP2( UPLO, IN-IA+1, N, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) IBB = IB+ITMP CALL PCLACP2( UPLO, IBLK, N, A, I, JA, DESCA, $ B, IBB, JB, DESCB ) 30 CONTINUE ELSE CALL PCLACP2( UPLO, M, JN-JA+1, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) JBB = JB+JTMP CALL PCLACP2( UPLO, M, JBLK, A, IA, J, DESCA, $ B, IB, JBB, DESCB ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PCLACPY * END scalapack-1.8.0/SRC/pssygvx.f0000640000175000017500000010455210377154001015644 0ustar muammarmuammar SUBROUTINE PSSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, $ WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, $ GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, $ LIWORK, LWORK, M, N, NZ REAL ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCZ( * ), $ ICLUSTR( * ), IFAIL( * ), IWORK( * ) REAL A( * ), B( * ), GAP( * ), W( * ), WORK( * ), $ Z( * ) * .. * * Purpose * * ======= * * PSSYGVX computes all the eigenvalues, and optionally, * the eigenvectors * of a real generalized SY-definite eigenproblem, of the form * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or * sub( B )*sub( A )*x=(lambda)*x. * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be * SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed * to be symmetric positive definite. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * JOBZ (global input) CHARACTER*1 * = 'N': Compute eigenvalues only; * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored; * = 'L': Lower triangles of sub( A ) and sub( B ) are stored. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix. * * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains * the distributed matrix Z of eigenvectors. The eigenvectors * are normalized as follows: * if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I; * if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I. * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') * or the lower triangle (if UPLO='L') of sub( A ), including * the diagonal, is destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PSSYGVX cannot guarantee * correct error reporting. * * B (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( B ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( B ) contains * the lower triangular part of the matrix. * * On exit, if INFO <= N, the part of sub( B ) containing the * matrix is overwritten by the triangular factor U or L from * the Cholesky factorization sub( B ) = U**T*U or * sub( B ) = L*L**T. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * DESCB( CTXT_ ) must equal DESCA( CTXT_ ) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) REAL * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PSLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PSLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PSSYGVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PSSYGVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) REAL * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * The row index in the global array Z indicating the first * row of sub( Z ). * * JZ (global input) INTEGER * The column index in the global array Z indicating the * first column of sub( Z ). * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) REAL array, * dimension max(3,LWORK) * if JOBZ='N' WORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' WORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = * DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LWORK is too small: * If LWORK is too small to guarantee orthogonality, * PSSYGVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-23 * is returned. Note that when RANGE='V', PSSYGVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LWORK is large enough to allow PSSYGVX to * compute the eigenvalues, PSSYGVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * Greater performance can be achieved if adequate workspace * is provided. On the other hand, in some situations, * performance can decrease as the workspace provided * increases above the workspace amount shown below: * * For optimal performance, greater workspace may be * needed, i.e. * LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT, * NSYGST_LWOPT ) * Where: * LWORK, as defined previously, depends upon the number * of eigenvectors requested, and * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 3 ) * NPS * NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB * * ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L', * 0, 0, 0, 0) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * NB = DESCA( MB_ ) * NP0 = NUMROC( N, NB, 0, 0, NPROW ) * NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * For large N, no extra workspace is needed, however the * biggest boost in performance comes for small N, so it * is wise to provide the extra workspace (typically less * than a Megabyte per process). * * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PSSTEIN will perform no better than SSTEIN on 1 processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance on all work arrays. * Each of these values is returned in the first entry of the * corresponding work array, and no error message is issued by * PXERBLA. * * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (output) INTEGER array, dimension (N) * IFAIL provides additional information when INFO .NE. 0 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * * If neither of the above error conditions hold and JOBZ = 'V', * then the first M elements of IFAIL are set to zero. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) REAL array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PSSYGVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to * compute eigenvalues. * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/16,2).NE.0), then B was not positive * definite. IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1), * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties, * namely the following expressions should be true: * * DESCA(MB_) = DESCA(NB_) * IA = IB = IZ * JA = IB = JZ * DESCA(M_) = DESCB(M_) =DESCZ(M_) * DESCA(N_) = DESCB(N_)= DESCZ(N_) * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_) * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_) * MOD( IA-1, DESCA( MB_ ) ) = 0 * MOD( JA-1, DESCA( NB_ ) ) = 0 * MOD( IB-1, DESCB( MB_ ) ) = 0 * MOD( JB-1, DESCB( NB_ ) ) = 0 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL FIVE, ZERO PARAMETER ( FIVE = 5.0E+0, ZERO = 0.0E+0 ) INTEGER IERRNPD PARAMETER ( IERRNPD = 16 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ CHARACTER TRANS INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA, $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN, $ LWOPT, MQ0, MYCOL, MYROW, NB, NEIG, NN, NP0, $ NPCOL, NPROW, NPS, NQ0, NSYGST_LWOPT, $ NSYTRD_LWOPT, SQNPC REAL EPS, SCALE * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV REAL PSLAMCH EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCHK2MAT, $ PSPOTRF, PSSYEVX, PSSYNGST, PSTRMM, PSTRSM, $ PXERBLA, SGEBR2D, SGEBS2D, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, REAL, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2600+CTXT_ ) ELSE * * Get machine constants. * EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) * WANTZ = LSAME( JOBZ, 'V' ) UPPER = LSAME( UPLO, 'U' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( VALEIG ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, $ 0, 0 ) END IF IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) $ LQUERY = .TRUE. * LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) * NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) ELSE LWOPT = LWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD and GST algorithms * ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS NB = DESCA( MB_ ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB LWOPT = MAX( LWOPT, N+NSYTRD_LWOPT, NSYGST_LWOPT ) * * Version 1.0 Limitations * IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -2 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -3 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN INFO = -( 1300+M_ ) ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN INFO = -( 1300+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -( 1300+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN INFO = -( 1300+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN INFO = -( 1300+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2200+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2200+CTXT_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -11 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -12 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -15 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -16 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -17 ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -14 ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -15 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) $ THEN INFO = -18 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( WANTZ ) THEN IDUM1( 2 ) = ICHAR( 'V' ) ELSE IDUM1( 2 ) = ICHAR( 'N' ) END IF IDUM2( 2 ) = 2 IF( UPPER ) THEN IDUM1( 3 ) = ICHAR( 'U' ) ELSE IDUM1( 3 ) = ICHAR( 'L' ) END IF IDUM2( 3 ) = 3 IF( ALLEIG ) THEN IDUM1( 4 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 4 ) = ICHAR( 'I' ) ELSE IDUM1( 4 ) = ICHAR( 'V' ) END IF IDUM2( 4 ) = 4 IF( LQUERY ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 5 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, $ JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, $ INFO ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = REAL( LWOPT ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYGVX ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Form a Cholesky factorization of sub( B ). * CALL PSPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) IF( INFO.NE.0 ) THEN IWORK( 1 ) = LIWMIN WORK( 1 ) = REAL( LWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD RETURN END IF * * Transform problem to standard eigenvalue problem and solve. * CALL PSSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) CALL PSSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, $ IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, $ LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO ) * IF( WANTZ ) THEN * * Backtransform eigenvectors to the original problem. * NEIG = M IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN * * For sub( A )*x=(lambda)*sub( B )*x and * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors: * x = inv(L)'*y or inv(U)*y * IF( UPPER ) THEN TRANS = 'N' ELSE TRANS = 'T' END IF * CALL PSTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * For sub( B )*sub( A )*x=(lambda)*x; * backtransform eigenvectors: x = L*y or U'*y * IF( UPPER ) THEN TRANS = 'T' ELSE TRANS = 'N' END IF * CALL PSTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) END IF END IF * IF( SCALE.NE.ONE ) THEN CALL SSCAL( N, SCALE, W, 1 ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = REAL( LWOPT ) RETURN * * End of PSSYGVX * END scalapack-1.8.0/SRC/pdlacon.f0000640000175000017500000003233110363532303015534 0ustar muammarmuammar SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, $ EST, KASE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N DOUBLE PRECISION EST * .. * .. Array Arguments .. INTEGER DESCV( * ), DESCX( * ), ISGN( * ) DOUBLE PRECISION V( * ), X( * ) * .. * * Purpose * ======= * * PDLACON estimates the 1-norm of a square, real distributed matrix A. * Reverse communication is used for evaluating matrix-vector products. * X and V are aligned with the distributed matrix A, this information * is implicitly contained within IV, IX, DESCV, and DESCX. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vectors V and X. N >= 0. * * V (local workspace) DOUBLE PRECISION pointer into the local * memory to an array of dimension LOCr(N+MOD(IV-1,MB_V)). On * the final return, V = A*W, where EST = norm(V)/norm(W) * (W is not returned). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * X (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension * LOCr(N+MOD(IX-1,MB_X)). On an intermediate return, X * should be overwritten by * A * X, if KASE=1, * A' * X, if KASE=2, * PDLACON must be re-called with all the other parameters * unchanged. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * ISGN (local workspace) INTEGER array, dimension * LOCr(N+MOD(IX-1,MB_X)). ISGN is aligned with X and V. * * * EST (global output) DOUBLE PRECISION * An estimate (a lower bound) for norm(A). * * KASE (local input/local output) INTEGER * On the initial call to PDLACON, KASE should be 0. * On an intermediate return, KASE will be 1 or 2, indicating * whether X should be overwritten by A * X or A' * X. * On the final return from PDLACON, KASE will again be 0. * * Further Details * =============== * * The serial version DLACON has been contributed by Nick Higham, * University of Manchester. It was originally named SONEST, dated * March 16, 1988. * * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of * a real or complex matrix, with applications to condition estimation", * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ZERO, ONE, TWO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IFLAG, IIVX, IMAXROW, IOFFVX, IROFF, $ ITER, IVXCOL, IVXROW, J, JLAST, JJVX, JUMP, $ K, MYCOL, MYROW, NP, NPCOL, NPROW DOUBLE PRECISION ALTSGN, ESTOLD, JLMAX, TEMP, XMAX * .. * .. Local Arrays .. DOUBLE PRECISION WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, $ IGSUM2D, INFOG2L, PDAMAX, PDASUM, $ PDELGET * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, INDXL2G, NUMROC EXTERNAL INDXG2L, INDXG2P, INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MOD, NINT, SIGN * .. * .. Save statement .. SAVE * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) IF( MYCOL.NE.IVXCOL ) $ RETURN IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) $ NP = NP - IROFF IOFFVX = IIVX + (JJVX-1)*DESCX( LLD_ ) * IF( KASE.EQ.0 ) THEN DO 10 I = IOFFVX, IOFFVX+NP-1 X( I ) = ONE / DBLE( N ) 10 CONTINUE KASE = 1 JUMP = 1 RETURN END IF * GO TO ( 20, 40, 70, 110, 140 )JUMP * * ................ ENTRY (JUMP = 1) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X * 20 CONTINUE IF( N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN V( IOFFVX ) = X( IOFFVX ) EST = ABS( V( IOFFVX ) ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF * ... QUIT GO TO 150 END IF CALL PDASUM( N, EST, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * DO 30 I = IOFFVX, IOFFVX+NP-1 X( I ) = SIGN( ONE, X( I ) ) ISGN( I ) = NINT( X( I ) ) 30 CONTINUE KASE = 2 JUMP = 2 RETURN * * ................ ENTRY (JUMP = 2) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X * 40 CONTINUE CALL PDAMAX( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = DBLE( J ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( WORK( 2 ) ) END IF END IF ITER = 2 * * MAIN LOOP - ITERATIONS 2, 3,...,ITMAX * 50 CONTINUE DO 60 I = IOFFVX, IOFFVX+NP-1 X( I ) = ZERO 60 CONTINUE IMAXROW = INDXG2P( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) IF( MYROW.EQ.IMAXROW ) THEN I = INDXG2L( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) X( I ) = ONE END IF KASE = 1 JUMP = 3 RETURN * * ................ ENTRY (JUMP = 3) * X HAS BEEN OVERWRITTEN BY A*X * 70 CONTINUE CALL DCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) ESTOLD = EST CALL PDASUM( N, EST, V, IV, JV, DESCV, 1 ) IF( DESCV( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF IFLAG = 0 DO 80 I = IOFFVX, IOFFVX+NP-1 IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) THEN IFLAG = 1 GO TO 90 END IF 80 CONTINUE * 90 CONTINUE CALL IGSUM2D( ICTXT, 'C', ' ', 1, 1, IFLAG, 1, -1, MYCOL ) * * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. * ALONG WITH IT, TEST FOR CYCLING. * IF( IFLAG.EQ.0 .OR. EST.LE.ESTOLD ) $ GO TO 120 * DO 100 I = IOFFVX, IOFFVX+NP-1 X( I ) = SIGN( ONE, X( I ) ) ISGN( I ) = NINT( X( I ) ) 100 CONTINUE KASE = 2 JUMP = 4 RETURN * * ................ ENTRY (JUMP = 4) * X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X * 110 CONTINUE JLAST = J CALL PDAMAX( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = DBLE( J ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( WORK( 2 ) ) END IF END IF CALL PDELGET( 'Columnwise', ' ', JLMAX, X, JLAST, JX, DESCX ) IF( ( JLMAX.NE.ABS( XMAX ) ).AND.( ITER.LT.ITMAX ) ) THEN ITER = ITER + 1 GO TO 50 END IF * * ITERATION COMPLETE. FINAL STAGE. * 120 CONTINUE DO 130 I = IOFFVX, IOFFVX+NP-1 K = INDXL2G( I-IOFFVX+IIVX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW )-IX+1 IF( MOD( K, 2 ).EQ.0 ) THEN ALTSGN = -ONE ELSE ALTSGN = ONE END IF X( I ) = ALTSGN*( ONE+DBLE( K-1 ) / DBLE( N-1 ) ) 130 CONTINUE KASE = 1 JUMP = 5 RETURN * * ................ ENTRY (JUMP = 5) * X HAS BEEN OVERWRITTEN BY A*X * 140 CONTINUE CALL PDASUM( N, TEMP, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1, $ IVXROW, MYCOL ) END IF END IF TEMP = TWO*( TEMP / DBLE( 3*N ) ) IF( TEMP.GT.EST ) THEN CALL DCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) EST = TEMP END IF * 150 CONTINUE KASE = 0 * RETURN * * End of PDLACON * END scalapack-1.8.0/SRC/pcunm2l.f0000640000175000017500000004144310363532303015500 0ustar muammarmuammar SUBROUTINE PCUNM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNM2L overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PCGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PCGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 COMPLEX AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CGEBR2D, $ CGEBS2D, CGERV2D, CGESD2D, CHK1MAT, $ CSCAL, INFOG2L, PCELSET, PCELSET2, $ PCLARF, PCLARFC, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - CONJG( TAU( JJ ) ) END IF CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL CSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL CSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - CONJG( TAU( JJ ) ) END IF END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL CGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL CGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL CSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * DO 10 J = J1, J2, J3 * IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic:ic+m-k+j-ja,jc:jc+n-1) * MI = M - K + J - JA + 1 ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc:jc+n-k+j-ja) * NI = N - K + J - JA + 1 END IF * * Apply H(j) or H(j)' * CALL PCELSET2( AJJ, A, IA+NQ-K+J-JA, J, DESCA, ONE ) IF( NOTRAN ) THEN CALL PCLARF( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, $ IC, JC, DESCC, WORK ) ELSE CALL PCLARFC( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, $ IC, JC, DESCC, WORK ) END IF CALL PCELSET( A, IA+NQ-K+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNM2L * END scalapack-1.8.0/SRC/psporfs.f0000640000175000017500000010050310363532303015605 0ustar muammarmuammar SUBROUTINE PSPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, $ LIWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IWORK( * ) REAL A( * ), AF( * ), B( * ), $ BERR( * ), FERR( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PSPORFS improves the computed solution to a system of linear * equations when the coefficient matrix is symmetric positive definite * and provides error bounds and backward error estimates for the * solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) REAL pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1) ). * This array contains the local pieces of the N-by-N symmetric * distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) REAL pointer into the local memory * to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * On entry, this array contains the factors L or U from the * Cholesky factorization sub( A ) = L*L**T or U**T*U, as * computed by PSPOTRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * B (local input) REAL pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) REAL pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). On exit, it contains the * improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD( IA-1, MB_A ) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) REAL TWO, THREE PARAMETER ( TWO = 2.0E+0, THREE = 3.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK2MAT, PSASYMV, PSAXPY, PSCOPY, $ PSLACON, PSPOTRS, PSSYMV, PXERBLA, $ SGAMX2D, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 19, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3 * NPMOD LIWMIN = NPMOD WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -13 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1500 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1500 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1900 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -17 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1900 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -18 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1900 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -25 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 23 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 25 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 0, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 15, N, 2, NRHS, 3, $ IX, JX, DESCX, 19, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPORFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X) * CALL PSCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSSYMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i))/(abs(sub(A))*abs(sub(X))+abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to * the i-th components of the numerator and denominator * before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PSASYMV( UPLO, N, ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J) is larger than machine epsilon, and * 2) BERR(J) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PSAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + * NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B)) ))) / norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub( B ) - sub( A )*sub( X ). * CALL PSCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PSSYMV( UPLO, N, -ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPR ), IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / * ( abs(sub(A))*abs(sub(X)) + abs(sub(B)) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PSASYMV( UPLO, N, ONE, A, IA, JA, DESCA, X, IX, J+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PSAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(sub(A)))* * ( abs(R) + NZ*EPS*( * abs(sub(A))*abs(sub(X))+abs(sub(B)) )))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(sub(A)) is the inverse of sub(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(sub(A))*abs(sub(X)) * +abs(sub(B))) is incremented by SAFE1 if the i-th component * of abs(sub(A))*abs(sub(X)) + abs(sub(B)) is less than SAFE2. * * Use PSLACON to estimate the infinity-norm of the matrix * inv(sub(A)) * diag(W), where * W = abs(R) + NZ*EPS*( abs(sub(A))*abs(sub(X))+abs(sub(B))))) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PSLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(sub(A)'). * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(sub(A))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PSPOTRS( UPLO, N, 1, AF, IAF, JAF, DESCAF, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSPORFS * END scalapack-1.8.0/SRC/pslarfb.f0000640000175000017500000010363510363532303015553 0ustar muammarmuammar SUBROUTINE PSLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) REAL C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARFB applies a real block reflector Q or its transpose Q**T to a * real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) * from the left or the right. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * DIRECT (global input) CHARACTER * Indicates how Q is formed from a product of elementary * reflectors * = 'F': Q = H(1) H(2) . . . H(k) (Forward) * = 'B': Q = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * V (local input) REAL pointer into the local memory * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and * SIDE = 'R'. It contains the local pieces of the distributed * vectors V representing the Householder transformation. * See further details. * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1)); * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1)); * if STOREV = 'R', LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) REAL array, dimension MB_V by MB_V * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian- * gular matrix T in the representation of the block reflector. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) REAL array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL FORWARD CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW, $ ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV, $ ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV, $ IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV, $ ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND, $ JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC, $ MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV, $ NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG1L, INFOG2L, PB_TOPGET, $ PBSTRAN, SGEBR2D, SGEBS2D, SGEMM, $ SGSUM2D, SLACPY, SLASET, STRBR2D, $ STRBS2D, STRMM * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF FORWARD = LSAME( DIRECT, 'F' ) IF( FORWARD ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) IROFFV = MOD( IV-1, MBV ) ICOFFV = MOD( JV-1, NBV ) MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYCOL.EQ.ICCOL ) $ NQC = NQC - ICOFFC IF( MYROW.EQ.ICROW ) $ MPC = MPC - IROFFC JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFC = IIC + ( JJC-1 ) * LDC IOFFV = IIV + ( JJV-1 ) * LDV * IF( LSAME( STOREV, 'C' ) ) THEN * * V is stored columnwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV ) * IPV = 1 IPW = IPV + MPC * K LV = MAX( 1, MPC ) LW = MAX( 1, NQC ) * * Broadcast V to the other process columns. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ V( IOFFV ), LDV ) IF( MYROW.EQ.IVROW ) $ CALL STRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV ) CALL SLACPY( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ WORK( IPV ), LV, MYROW, IVCOL ) IF( MYROW.EQ.IVROW ) $ CALL STRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV, MYROW, IVCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPV) = ( V1 ) where V1 is unit lower triangular, * ( V2 ) zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + MPC - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 10 CONTINUE IF( K-ITOP .GT.0 ) THEN CALL SLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 10 END IF * ELSE * * WORK(IPV) = ( V1 ) where V2 is unit upper triangular, * ( V2 ) zeroes lower triangular part of V2 * JJ = JJV IOFF = MOD( IV+M-K-1, MBV ) CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ), $ II, ILASTROW ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 20 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL SLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL SLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPV+II-IIV+ITOP*LV ), LV ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 20 END IF * END IF * * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K * IF( MPC.GT.0 ) THEN CALL SGEMM( 'Transpose', 'No transpose', NQC, K, MPC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * CALL STRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, NBV, WORK( IPW ), LW ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL SGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form sub( C )*Q or sub( C )*Q' * * ICOFFC = IROFFV is required by the current transposition * routine PBSTRAN * NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NPV = NPV0 - IROFFV ELSE NPV = NPV0 END IF IF( MYCOL.EQ.ICCOL ) THEN NQC0 = NQC + ICOFFC ELSE NQC0 = NQC END IF * * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]' * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]' * WORK( IPT ) is the workspace for PBSTRAN * IPV = 1 IPW = IPV + K * NQC0 IPT = IPW + NPV0 * K LV = MAX( 1, K ) LW = MAX( 1, NPV0 ) * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN CALL SLASET( 'All', IROFFV, K, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + IROFFV CALL SLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL SLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower * triangular, zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + NPV - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 30 CONTINUE IF( ( K-ITOP ).GT.0 ) THEN CALL SLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ), $ LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 30 END IF * ELSE * * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper * triangular, zeroes lower triangular part of V2. * JJ = JJV CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW, $ DESCV( RSRC_ ), II, ILASTROW ) IOFF = MOD( IV+N-K-1, MBV ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 40 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL SLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL SLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPW1+II-IIV+ITOP*LW ), LW ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 40 END IF END IF END IF * CALL PBSTRAN( ICTXT, 'Columnwise', 'Transpose', N+IROFFV, K, $ MBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV, $ IVROW, IVCOL, -1, ICCOL, WORK( IPT ) ) * * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC * IF( MYCOL.EQ.ICCOL ) $ IPV = IPV + ICOFFC * LV * * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K * LW = MAX( 1, MPC ) * IF( NQC.GT.0 ) THEN CALL SGEMM( 'No transpose', 'Transpose', MPC, K, NQC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN * * Broadcast the block reflector to the other rows. * CALL STRBS2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV ) ELSE CALL STRBR2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV, IVROW, MYCOL ) END IF CALL STRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, NBV, WORK( IPW ), LW ) * CALL SGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V' * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL SGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) END IF * ELSE * * V is stored rowwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC = ICOFFV is required by the current transposition * routine PBSTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW ) THEN MPC0 = MPC + IROFFC ELSE MPC0 = MPC END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBSTRAN * IPV = 1 IPW = IPV + MPC0 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC0 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL SLASET( 'All', K, ICOFFV, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + ICOFFV * LW CALL SLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL SLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + MQV - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 50 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL SLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ), $ LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 50 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL, $ DESCV( CSRC_ ), JJ, ILASTCOL ) IOFF = MOD( JV+M-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 60 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL SLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL SLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 60 END IF END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K * CALL PBSTRAN( ICTXT, 'Rowwise', 'Transpose', K, M+ICOFFV, $ NBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV, $ IVROW, IVCOL, ICROW, -1, WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K * IF( MYROW.EQ.ICROW ) $ IPV = IPV + IROFFC * * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V' * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K * LW = MAX( 1, NQC ) * IF( MPC.GT.0 ) THEN CALL SGEMM( 'Transpose', 'No transpose', NQC, K, MPC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL STRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV ) ELSE CALL STRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL STRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V' * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL SGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC LV = MAX( 1, K ) LW = MAX( 1, MPC ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL STRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV ) CALL SLACPY( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL STRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + NQC - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 70 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL SLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ), $ LV ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 70 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ), $ JJ, ILASTCOL ) IOFF = MOD( JV+N-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 80 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL SLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL SLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 80 END IF * END IF * * WORK( IPV ) is K x NQC = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K * IF( NQC.GT.0 ) THEN CALL SGEMM( 'No Transpose', 'Transpose', MPC, K, NQC, $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL SLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL STRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL SGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) * END IF * END IF * RETURN * * End of PSLARFB * END scalapack-1.8.0/SRC/pzhegst.f0000640000175000017500000004234110363532303015602 0ustar muammarmuammar* * SUBROUTINE PZHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N DOUBLE PRECISION SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZHEGST reduces a complex Hermitian-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PZPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PZPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) DOUBLE PRECISION * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE, HALF PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), $ HALF = ( 0.5D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, K, KB, MYCOL, MYROW, NB, $ NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZHEGS2, PZHEMM, PZHER2K, PZTRMM, PZTRSM * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * SCALE = ONE * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHEGST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN * * Compute inv(U')*sub( A )*inv(U) * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 10 CONTINUE * * Update the upper triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PZHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, IB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PZTRSM( 'Left', UPLO, 'Conjugate Transpose', $ 'Non-unit', KB, N-K-KB+1, CONE, B, IB+K-1, $ JB+K-1, DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, CONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PZHER2K( UPLO, 'Conjugate Transpose', N-K-KB+1, KB, $ -CONE, A, IA+K-1, JA+K+KB-1, DESCA, B, $ IB+K-1, JB+K+KB-1, DESCB, ONE, A, $ IA+K+KB-1, JA+K+KB-1, DESCA ) CALL PZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, CONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PZTRSM( 'Right', UPLO, 'No transpose', 'Non-unit', $ KB, N-K-KB+1, CONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 10 * ELSE * * Compute inv(L)*sub( A )*inv(L') * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 20 CONTINUE * * Update the lower triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PZHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PZTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-unit', N-K-KB+1, KB, CONE, B, IB+K-1, $ JB+K-1, DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, CONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PZHER2K( UPLO, 'No transpose', N-K-KB+1, KB, -CONE, $ A, IA+K+KB-1, JA+K-1, DESCA, B, IB+K+KB-1, $ JB+K-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, CONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PZTRSM( 'Left', UPLO, 'No transpose', 'Non-unit', $ N-K-KB+1, KB, CONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 20 * END IF * ELSE * IF( UPPER ) THEN * * Compute U*sub( A )*U' * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 30 CONTINUE * * Update the upper triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PZTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', K-1, $ KB, CONE, B, IB, JB, DESCB, A, IA, JA+K-1, $ DESCA ) CALL PZHEMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, CONE, A, $ IA, JA+K-1, DESCA ) CALL PZHER2K( UPLO, 'No transpose', K-1, KB, CONE, A, IA, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA, DESCA ) CALL PZHEMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, CONE, A, $ IA, JA+K-1, DESCA ) CALL PZTRMM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-unit', K-1, KB, CONE, B, IB+K-1, JB+K-1, $ DESCB, A, IA, JA+K-1, DESCA ) CALL PZHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 30 * ELSE * * Compute L'*sub( A )*L * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 40 CONTINUE * * Update the lower triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PZTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', KB, $ K-1, CONE, B, IB, JB, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PZHEMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, CONE, A, IA+K-1, $ JA, DESCA ) CALL PZHER2K( UPLO, 'Conjugate transpose', K-1, KB, CONE, A, $ IA+K-1, JA, DESCA, B, IB+K-1, JB, DESCB, ONE, $ A, IA, JA, DESCA ) CALL PZHEMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, CONE, A, IA+K-1, $ JA, DESCA ) CALL PZTRMM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-unit', KB, K-1, CONE, B, IB+K-1, JB+K-1, $ DESCB, A, IA+K-1, JA, DESCA ) CALL PZHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 40 * END IF * END IF * RETURN * * End of PZHEGST * END scalapack-1.8.0/SRC/pclaevswp.f0000640000175000017500000002573210363532303016127 0ustar muammarmuammar* * SUBROUTINE PCLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ RWORK, LRWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LRWORK, N * .. * .. Array Arguments .. INTEGER DESCZ( * ), KEY( * ), NVS( * ) REAL RWORK( * ), ZIN( LDZI, * ) COMPLEX Z( * ) * .. * * Purpose * ======= * * PCLAEVSWP moves the eigenvectors (potentially unsorted) from * where they are computed, to a ScaLAPACK standard block cyclic * array, sorted so that the corresponding eigenvalues are sorted. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * ZIN (local input) REAL array, * dimension ( LDZI, NVS(iam) ) * The eigenvectors on input. Each eigenvector resides entirely * in one process. Each process holds a contiguous set of * NVS(iam) eigenvectors. The first eigenvector which the * process holds is: sum for i=[0,iam-1) of NVS(i) * * LDZI (locl input) INTEGER * leading dimension of the ZIN array * * Z (local output) COMPLEX array * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ) * The eigenvectors on output. The eigenvectors are distributed * in a block cyclic manner in both dimensions, with a * block size of NB. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * NVS (global input) INTEGER array, dimension( nprocs+1 ) * nvs(i) = number of processes * number of eigenvectors held by processes [0,i-1) * nvs(1) = number of eigen vectors held by [0,1-1) == 0 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) == * total number of eigenvectors * * KEY (global input) INTEGER array, dimension( N ) * Indicates the actual index (after sorting) for each of the * eigenvectors. * * RWORK (local workspace) REAL array, dimension (LRWORK) * * LRWORK (local input) INTEGER dimension of RWORK * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J, $ MAXI, MAXII, MINI, MINII, MYCOL, MYROW, NB, $ NBUFSIZE, NPCOL, NPROCS, NPROW, PCOL, RECVCOL, $ RECVFROM, RECVROW, SENDCOL, SENDROW, SENDTO * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P EXTERNAL INDXG2L, INDXG2P * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * NB = DESCZ( MB_ ) * NPROCS = NPROW*NPCOL * * If PxSTEIN operates on a sub-matrix of a global matrix, the * key [] that contains the indicies of the eigenvectors is refe- * renced to the dimensions of the sub-matrix and not the global * distrubited matrix. Because of this, PxLAEVSWP will incorrectly * map the eigenvectors to the global eigenvector matrix, Z, unless * the key[] elements are shifted as below. * DO 10 J = DESCZ( N_ ), 1, -1 KEY( J ) = KEY( J-JZ+1 ) + ( JZ-1 ) 10 CONTINUE * DO 110 DIST = 0, NPROCS - 1 * SENDTO = MOD( IAM+DIST, NPROCS ) RECVFROM = MOD( NPROCS+IAM-DIST, NPROCS ) * SENDROW = MOD( SENDTO, NPROW ) SENDCOL = SENDTO / NPROW RECVROW = MOD( RECVFROM, NPROW ) RECVCOL = RECVFROM / NPROW * SENDROW = SENDTO / NPCOL SENDCOL = MOD( SENDTO, NPCOL ) RECVROW = RECVFROM / NPCOL RECVCOL = MOD( RECVFROM, NPCOL ) * * Figure out what I have that process "sendto" wants * NBUFSIZE = 0 * * We are looping through the eigenvectors that I presently own. * DO 40 J = NVS( 1+IAM ) + JZ, NVS( 1+IAM+1 ) + JZ - 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( SENDCOL.EQ.PCOL ) THEN MINII = MOD( SENDROW+DESCZ( RSRC_ ), NPROW )* $ DESCZ( MB_ ) + 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 30 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+DESCZ( MB_ )-1, N+IZ-1 ) DO 20 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 RWORK( NBUFSIZE ) = ZIN( I+1-IZ, $ J-NVS( 1+IAM )+1-JZ ) 20 CONTINUE 30 CONTINUE END IF 40 CONTINUE * * IF( MYROW.NE.SENDROW .OR. MYCOL.NE.SENDCOL ) $ CALL SGESD2D( DESCZ( CTXT_ ), NBUFSIZE, 1, RWORK, NBUFSIZE, $ SENDROW, SENDCOL ) * * * Figure out what process "recvfrom" has that I want * NBUFSIZE = 0 DO 70 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 60 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 50 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 50 CONTINUE 60 CONTINUE END IF 70 CONTINUE * * * IF( MYROW.NE.RECVROW .OR. MYCOL.NE.RECVCOL ) $ CALL SGERV2D( DESCZ( CTXT_ ), 1, NBUFSIZE, RWORK, 1, $ RECVROW, RECVCOL ) * NBUFSIZE = 0 DO 100 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CYCLIC_J = INDXG2L( KEY( J ), DESCZ( MB_ ), -1, -1, $ NPCOL ) CYCLIC_I = 1 MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 90 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) CYCLIC_I = INDXG2L( MINI, DESCZ( MB_ ), -1, -1, $ NPROW ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 80 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 Z( CYCLIC_I+( CYCLIC_J-1 )*DESCZ( LLD_ ) ) $ = CMPLX( RWORK( NBUFSIZE ) ) CYCLIC_I = CYCLIC_I + 1 80 CONTINUE 90 CONTINUE END IF 100 CONTINUE * 110 CONTINUE RETURN * * End of PCLAEVSWP * END scalapack-1.8.0/SRC/pdlaedz.f0000640000175000017500000001225010363532303015535 0ustar muammarmuammar SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER ID, IQ, JQ, LDQ, N, N1 * .. * .. Array Arguments .. INTEGER DESCQ( * ) DOUBLE PRECISION Q( LDQ, * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDLAEDZ Form the z-vector which consists of the last row of Q_1 * and the first row of Q_2. * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. * INTEGER COL, I, IBUF, ICTXT, IIQ, IIZ1, IIZ2, IQCOL, $ IQROW, IZ, IZ1, IZ1COL, IZ1ROW, IZ2, IZ2COL, $ IZ2ROW, J, JJQ, JJZ1, JJZ2, MYCOL, MYROW, N2, $ NB, NBLOC, NPCOL, NPROW, NQ1, NQ2, ZSIZ * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, $ DGERV2D, DGESD2D, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( ID, ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) N2 = N - N1 * * Form z1 which consist of the last row of Q1 * CALL INFOG2L( IQ-1+( ID+N1-1 ), JQ-1+ID, DESCQ, NPROW, NPCOL, $ MYROW, MYCOL, IIZ1, JJZ1, IZ1ROW, IZ1COL ) NQ1 = NUMROC( N1, NB, MYCOL, IZ1COL, NPCOL ) IF( ( MYROW.EQ.IZ1ROW ) .AND. ( NQ1.NE.0 ) ) THEN CALL DCOPY( NQ1, Q( IIZ1, JJZ1 ), LDQ, WORK, 1 ) IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) $ CALL DGESD2D( ICTXT, NQ1, 1, WORK, NQ1, IQROW, IQCOL ) END IF * * Proc (IQROW, IQCOL) receive the parts of z1 * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN COL = IZ1COL DO 20 I = 0, NPCOL - 1 NQ1 = NUMROC( N1, NB, COL, IZ1COL, NPCOL ) IF( NQ1.GT.0 ) THEN IF( IZ1ROW.NE.IQROW .OR. COL.NE.IQCOL ) THEN IBUF = N1 + 1 CALL DGERV2D( ICTXT, NQ1, 1, WORK( IBUF ), NQ1, $ IZ1ROW, COL ) ELSE IBUF = 1 END IF IZ1 = 0 IZ = I*NB + 1 NBLOC = ( NQ1-1 ) / NB + 1 DO 10 J = 1, NBLOC ZSIZ = MIN( NB, NQ1-IZ1 ) CALL DCOPY( ZSIZ, WORK( IBUF+IZ1 ), 1, Z( IZ ), 1 ) IZ1 = IZ1 + NB IZ = IZ + NB*NPCOL 10 CONTINUE END IF COL = MOD( COL+1, NPCOL ) 20 CONTINUE END IF * * Form z2 which consist of the first row of Q2 * CALL INFOG2L( IQ-1+( ID+N1 ), JQ-1+( ID+N1 ), DESCQ, NPROW, NPCOL, $ MYROW, MYCOL, IIZ2, JJZ2, IZ2ROW, IZ2COL ) NQ2 = NUMROC( N2, NB, MYCOL, IZ2COL, NPCOL ) IF( ( MYROW.EQ.IZ2ROW ) .AND. ( NQ2.NE.0 ) ) THEN CALL DCOPY( NQ2, Q( IIZ2, JJZ2 ), LDQ, WORK, 1 ) IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) $ CALL DGESD2D( ICTXT, NQ2, 1, WORK, NQ2, IQROW, IQCOL ) END IF * * Proc (IQROW, IQCOL) receive the parts of z2 * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN COL = IZ2COL DO 40 I = 0, NPCOL - 1 NQ2 = NUMROC( N2, NB, COL, IZ2COL, NPCOL ) IF( NQ2.GT.0 ) THEN IF( IQROW.NE.IZ2ROW .OR. IQCOL.NE.COL ) THEN IBUF = 1 + N2 CALL DGERV2D( ICTXT, NQ2, 1, WORK( IBUF ), NQ2, $ IZ2ROW, COL ) ELSE IBUF = 1 END IF IZ2 = 0 IZ = NB*I + N1 + 1 NBLOC = ( NQ2-1 ) / NB + 1 DO 30 J = 1, NBLOC ZSIZ = MIN( NB, NQ2-IZ2 ) CALL DCOPY( ZSIZ, WORK( IBUF+IZ2 ), 1, Z( IZ ), 1 ) IZ2 = IZ2 + NB IZ = IZ + NB*NPCOL 30 CONTINUE END IF COL = MOD( COL+1, NPCOL ) 40 CONTINUE END IF * * proc(IQROW,IQCOL) broadcast Z=(Z1,Z2) * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', N, 1, Z, N ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', N, 1, Z, N, IQROW, IQCOL ) END IF * RETURN * * End of PDLAEDZ * END scalapack-1.8.0/SRC/pzlassq.f0000640000175000017500000002434610363532303015620 0ustar muammarmuammar SUBROUTINE PZLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SCALE, SUMSQ * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZLASSQ returns the values scl and smsq such that * * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, * * where x( i ) = sub( X ) = abs( X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) ). * The value of sumsq is assumed to be at least unity and the value of * ssq will then satisfy * * 1.0 .le. ssq .le. ( sumsq + 2*n ). * * scale is assumed to be non-negative and scl returns the value * * scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), * i * * scale and sumsq must be supplied in SCALE and SUMSQ respectively. * SCALE and SUMSQ are overwritten by scl and ssq respectively. * * The routine makes only one pass through the vector sub( X ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * The result are only available in the scope of sub( X ), i.e if * sub( X ) is distributed along a process row, the correct results are * only available in this process row of the grid. Similarly if sub( X ) * is distributed along a process column, the correct results are only * available in this process column of the grid. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (input) COMPLEX*16 * The vector for which a scaled sum of squares is computed. * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= n. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * SCALE (local input/local output) DOUBLE PRECISION * On entry, the value scale in the equation above. * On exit, SCALE is overwritten with scl , the scaling factor * for the sum of squares. * * SUMSQ (local input/local output) DOUBLE PRECISION * On entry, the value sumsq in the equation above. * On exit, SUMSQ is overwritten with smsq , the basic sum of * squares from which scl has been factored out. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, ICOFF, ICTXT, IIX, IOFF, IROFF, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ DOUBLE PRECISION TEMP1 * .. * .. Local Arrays .. DOUBLE PRECISION WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, INFOG2L, PDTREECOMB * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFF * * Code direct from LAPACK's ZLASSQ, (save subroutine call) * IF( NQ.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 10 I = 1, NQ IF( DBLE( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( DBLE( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ * ( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IF( DIMAG( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( DIMAG( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ * ( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + LDX 10 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PDTREECOMB( ICTXT, 'Rowwise', 2, WORK, -1, IXCOL, $ DCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * ELSE IF( INCX.EQ.1 ) THEN * * X is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF * * Code direct from LAPACK's ZLASSQ, (save subroutine call) * IF( NP.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 20 I = 1, NP IF( DBLE( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( DBLE( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IF( DIMAG( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( DIMAG( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + 1 20 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PDTREECOMB( ICTXT, 'Columnwise', 2, WORK, -1, IXCOL, $ DCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * END IF * RETURN * * End of PZLASSQ * END scalapack-1.8.0/SRC/pcrot.c0000640000175000017500000004225310602576752015260 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- Mark R. Fahey * June 28, 2000 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pcrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s ) /* * Mark Fahey * June 22, 2000 */ /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; float * c; complex * s; /* * .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex X[], Y[]; { /* * Purpose * ======= * * PCROT applies a plane rotation, where the cos (C) is real and the * sin (S) is complex, and the vectors CX and CY are complex, i.e., * * [ sub( X ) ] := [ C S ] [ sub( X ) ] * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ] * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y, * * and where C*C + S*CONJG(S) = 1.0. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * Arguments * ========= * * N (input) INTEGER * The number of elements in the vectors CX and CY. * * X (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * On output, CX is overwritten with C*X + S*Y. * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * On output, CY is overwritten with -CONJG(S)*X + C*Y. * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * C (input) pointer to FLOAT * S (input) pointer COMPLEX * C and S define a rotation * [ C S ] * [ -conjg(S) C ] * where C*C + S*CONJG(S) = 1.0. * * ===================================================================== * * .. Local Scalars .. */ int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, mycol, myrow, nn, np, np0, nprow, npcol, nq, nz, ione=1, tmp1, wksz; complex xwork[1], ywork[1], zero; /* .. * .. PBLAS Buffer .. */ complex * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void cgerv2d_(); void cgesd2d_(); void pbchkvect(); void PB_Cabort(); char * getpbbuf(); F_INTG_FCT pbctrnv_(); F_INTG_FCT crot_(); F_INTG_FCT ilcm_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(500+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -9; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -8; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1000+MB_+1); } else { info = -11; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -9; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -8; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1000+MB_+1); } else { info = -11; } } else { info = -6; } } if( ictxt != desc_Y[CTXT_] ) info = -(1000+CTXT_+1); } } if( info ) { PB_Cabort( ictxt, "PCROT", info ); return; } /* if( info ) { pberror_( &ictxt, "PCROT", &info ); return; } */ /* * Quick return if possible. */ zero.re = ZERO; zero.im = ZERO; if( *n == 0 ) return; /* * rotation */ if( *n == 1 ) { if( ( myrow == ixrow ) && ( mycol == ixcol ) ) { buff = &X[iix-1+(jjx-1)*desc_X[LLD_]]; if( ( myrow != iyrow ) || ( mycol != iycol ) ) { cgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol ); cgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol ); } else *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]]; crot_( n, buff, n, ywork, n, c, s ); X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff; if( ( myrow == iyrow ) && ( mycol == iycol ) ) Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork; } else if( ( myrow == iyrow ) && ( mycol == iycol ) ) { cgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, &ixrow, &ixcol ); cgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol ); crot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s ); } return; } if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) { crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } else { if( myrow == ixrow ) { cgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) ); cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol ); crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], buff, &ione, c, s ); } else if( myrow == iyrow ) { cgesd2d_( &ictxt, &ione, &nq, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &mycol ); buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) ); cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol ); crot_( &nq, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) { crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } else { if( mycol == ixcol ) { cgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) ); cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol ); crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } else if( mycol == iycol ) { cgesd2d_( &ictxt, &np, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &myrow, &ixcol ); buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) ); cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol ); crot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmp = lcm / nprow; nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow ); tmp1 = np0 / desc_X[MB_]; wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp ); wksz = np + wksz; buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) ); if( mycol == iycol ) jjy -= nz; if( myrow == ixrow ) np -= nz; pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol, &ixrow, &ixcol, buff+np ); if( mycol == ixcol ) { crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, buff, &ione, &zero, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &ixcol, &iyrow, &iycol, buff+np ); } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*jx-1) % desc_X[NB_]; nn = *n + nz; tmp1 = nn / desc_X[MB_]; np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) ); if( myrow == iyrow ) np -= nz; pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { crot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, buff, &ione, &zero, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &iycol, &ixrow, &ixcol, buff+np ); } } } scalapack-1.8.0/SRC/psorgl2.f0000640000175000017500000002601310363532303015504 0ustar muammarmuammar SUBROUTINE PSORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGL2 generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k) . . . H(2) H(1) * * as returned by PSGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PSGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, J, KP, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 REAL TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSLARF, PSLASET, PSSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia+k:ia+m-1 to rows of the unit matrix * CALL PSLASET( 'All', M-K, K, ZERO, ZERO, A, IA+K, JA, DESCA ) CALL PSLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, $ DESCA ) * END IF * TAUI = ZERO KP = NUMROC( IA+K-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+K-1, IA, -1 * * Apply H(i) to A(i:ia+m-1,ja+i-ia:ja+n-1) from the right * J = JA + I - IA II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, KP ) ) IF( J.LT.JA+N-1 ) THEN IF( I.LT.IA+M-1 ) THEN CALL PSELSET( A, I, J, DESCA, ONE ) CALL PSLARF( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PSSCAL( N-J+JA-1, -TAUI, A, I, J+1, DESCA, $ DESCA( M_ ) ) END IF CALL PSELSET( A, I, J, DESCA, ONE-TAUI ) * * Set A(i,ja:j-1) to zero * CALL PSLASET( 'All', 1, J-JA, ZERO, ZERO, A, I, JA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGL2 * END scalapack-1.8.0/SRC/pztrrfs.f0000640000175000017500000007446310363532303015642 0ustar muammarmuammar SUBROUTINE PZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LRWORK, LWORK, $ N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), DESCX( * ) DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) COMPLEX*16 A( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PZTRRFS provides error bounds and backward error estimates for the * solution to a system of linear equations with a triangular * coefficient matrix. * * The solution matrix X must be computed by PZTRTRS or some other * means before entering this routine. PZTRRFS does not do iterative * refinement because doing so cannot improve the backward error. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**H * sub( X ) = sub( B ) * (Conjugate transpose) * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_A,LOCc(JA+N-1) ). This * array contains the local pieces of the original triangular * distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. * If DIAG = 'U', the diagonal elements of sub( A ) are also * not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_B, LOCc(JB+NRHS-1) ). * On entry, this array contains the the local pieces of the * right hand sides sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input) COMPLEX*16 pointer into the local memory * to an array of local dimension (LLD_X, LOCc(JX+NRHS-1) ). * On entry, this array contains the the local pieces of the * solution vectors sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The estimated forward error bounds for * each solution vector of sub( X ). If XTRUE is the true * solution, FERR bounds the magnitude of the largest entry * in (sub( X ) - XTRUE) divided by the magnitude of the * largest entry in sub( X ). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD( IA-1, MB_A ) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD( IB-1, MB_B ) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices sub( X ) and sub( B ) should be * distributed the same way on the same processes. These conditions * ensure that sub( X ) and sub( B ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, RONE PARAMETER ( ZERO = 0.0D+0, RONE = 1.0D+0 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER CHARACTER TRANSN, TRANST INTEGER IAROW, IXBCOL, IXBROW, IXCOL, IXROW, ICOFFA, $ ICOFFB, ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, $ IIW, IOFFXB, IPB, IPR, IPV, IROFFA, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX*16 ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ INFOG2L, PCHK1MAT, PCHK2MAT, PXERBLA, PZATRMV, $ PZAXPY, PZCOPY, PZLACON, PZTRMV, $ PZTRSV, ZGEBR2D, ZGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DIMAG, ICHAR, MAX, MIN, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IX, JX, DESCX, 17, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2*NPMOD WORK( 1 ) = DBLE( LWMIN ) LRWMIN = NPMOD RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( N.LT.0 ) THEN INFO = -4 ELSE IF( NRHS.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -15 ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -16 ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 1700+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 1700+NB_ ) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 1700+CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -21 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -23 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 21 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 23 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, 0, IDUM1, IDUM2, $ INFO ) CALL PCHK2MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, N, 4, NRHS, 5, $ IX, JX, DESCX, 17, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTRRFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANSN = 'N' TRANST = 'C' ELSE TRANSN = 'C' TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = maximum number of nonzero entries in each row of A, plus 1 * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) )*DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 90 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PZCOPY( N, X, IX, JX+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PZAXPY( N, -ONE, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, $ JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the matrix * or vector Z. If the i-th component of the denominator is less * than SAFE2, then SAFE1 is added to the i-th components of the * numerator and denominator before dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 20 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 20 CONTINUE END IF END IF * CALL PZATRMV( UPLO, TRANS, DIAG, N, RONE, A, IA, JA, DESCA, X, $ IX, JX+K, DESCX, 1, RONE, RWORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 30 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix or * vector Z * NZ is the maximum number of nonzeros in any row of A, plus 1 * EPS is machine epsilon * * The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 40 CONTINUE END IF END IF * KASE = 0 50 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PZTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 60 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 60 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF CALL PZTRSV( UPLO, TRANSN, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 50 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 80 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 90 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 180 J = JN + 1, JB + NRHS - 1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 170 K = 0, JBRHS - 1 * * Compute residual R = B - op(A) * X, * where op(A) = A or A', depending on TRANS. * CALL PZCOPY( N, X, IX, J+K, DESCX, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PZTRMV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) CALL PZAXPY( N, -ONE, B, IB, J+K, DESCB, 1, WORK( IPR ), $ IW, JW, DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 100 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 100 CONTINUE END IF END IF * CALL PZATRMV( UPLO, TRANS, DIAG, N, RONE, A, IA, JA, DESCA, $ X, IX, J+K, DESCX, 1, RONE, RWORK( IPB ), IW, $ JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 110 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 110 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Bound error from formula * * norm(X - XTRUE) / norm(X) .le. FERR = * norm( abs(inv(op(A)))* * ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))/norm(X) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(A)) is the inverse of op(A) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of A, * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) * is incremented by SAFE1 if the i-th component of * abs(op(A))*abs(X) + abs(B) is less than SAFE2. * * Use PZLACON to estimate the infinity-norm of the matrix * inv(op(A)) * diag(W), * where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIW - 1, IIW + NP - 2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 120 CONTINUE END IF END IF * KASE = 0 130 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PZLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(A)'). * CALL PZTRSV( UPLO, TRANST, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 140 CONTINUE END IF END IF ELSE * * Multiply by inv(op(A))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 150 II = IIW - 1, IIW + NP - 2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 150 CONTINUE END IF END IF CALL PZTRSV( UPLO, TRANSN, DIAG, N, A, IA, JA, DESCA, $ WORK( IPR ), IW, JW, DESCW, 1 ) END IF GO TO 130 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIXB, IIXB + NP - 1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 160 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, $ IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 170 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) RWORK( 1 ) = DBLE( LRWMIN ) * RETURN * * End of PZTRRFS * END scalapack-1.8.0/SRC/pzdrscl.f0000640000175000017500000001630410363532303015577 0ustar muammarmuammar SUBROUTINE PZDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SA * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 SX( * ) * .. * * Purpose * ======= * * PZDRSCL multiplies an N-element complex distributed vector * sub( X ) by the real scalar 1/a. This is done without overflow or * underflow as long as the final sub( X )/a does not overflow or * underflow. * * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1, * X(IX:IX,JX:JX+N-1), if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector descA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * SA (global input) DOUBLE PRECISION * The scalar a which is used to divide each component of * sub( X ). SA must be >= 0, or the subroutine will divide by * zero. * * SX (local input/local output) COMPLEX*16 array * containing the local pieces of a distributed matrix of * dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER ICTXT, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PDLABAD, PZDSCAL * .. * .. External Functions .. DOUBLE PRECISION PDLAMCH EXTERNAL PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL PDLABAD( ICTXT, SMLNUM, BIGNUM ) * * Initialize the denominator to SA and the numerator to 1. * CDEN = SA CNUM = ONE * 10 CONTINUE CDEN1 = CDEN*SMLNUM CNUM1 = CNUM / BIGNUM IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN * * Pre-multiply sub( X ) by SMLNUM if CDEN is large compared to * CNUM. * MUL = SMLNUM DONE = .FALSE. CDEN = CDEN1 ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN * * Pre-multiply sub( X ) by BIGNUM if CDEN is small compared to * CNUM. * MUL = BIGNUM DONE = .FALSE. CNUM = CNUM1 ELSE * * Multiply sub( X ) by CNUM / CDEN and return. * MUL = CNUM / CDEN DONE = .TRUE. END IF * * Scale the vector sub( X ) by MUL * CALL PZDSCAL( N, MUL, SX, IX, JX, DESCX, INCX ) * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PZDRSCL * END scalapack-1.8.0/SRC/pslahrd.f0000640000175000017500000002404110602576752015564 0ustar muammarmuammar SUBROUTINE PSLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * January 30, 2006 * * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCY( * ) REAL A( * ), T( * ), TAU( * ), WORK( * ), Y( * ) * .. * * Purpose * ======= * * PSLAHRD reduces the first NB columns of a real general N-by-(N-K+1) * distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that elements below the * k-th subdiagonal are zero. The reduction is performed by an orthogo- * nal similarity transformation Q' * A * Q. The routine returns the * matrices V and T which determine Q as a block reflector I - V*T*V', * and also the matrix Y = A * V * T. * * This is an auxiliary routine called by PSGEHRD. In the following * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). * N >= 0. * * K (global input) INTEGER * The offset for the reduction. Elements below the k-th * subdiagonal in the first NB columns are reduced to zero. * * NB (global input) INTEGER * The number of columns to be reduced. * * A (local input/local output) REAL pointer into * the local memory to an array of dimension (LLD_A, * LOCc(JA+N-K)). On entry, this array contains the the local * pieces of the N-by-(N-K+1) general distributed matrix * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above * the k-th subdiagonal in the first NB columns are overwritten * with the corresponding elements of the reduced distributed * matrix; the elements below the k-th subdiagonal, with the * array TAU, represent the matrix Q as a product of elementary * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are * unchanged. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). TAU is tied to the distributed matrix A. * * T (local output) REAL array, dimension (NB_A,NB_A) * The upper triangular matrix T. * * Y (local output) REAL pointer into the local memory * to an array of dimension (LLD_Y,NB_A). On exit, this array * contains the local pieces of the N-by-NB distributed * matrix Y. LLD_Y >= LOCr(IA+N-1). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) REAL array, dimension (NB) * * Further Details * =============== * * The matrix Q is represented as a product of nb elementary reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the (n-k+1)-by-nb matrix * V which is needed, with T and Y, to apply the transformation to the * unreduced part of the matrix, using an update of the form: * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V'). * * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the * following example with n = 7, k = 3 and nb = 2: * * ( a h a a a ) * ( a h a a a ) * ( a h a a a ) * ( h h a a a ) * ( v1 h a a a ) * ( v1 v2 a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper * Hessenberg matrix H, and vi denotes an element of the vector * defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL IPROC INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL, $ JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ REAL EI * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PSELSET, $ PSGEMV, PSLARFG, PSSCAL, SAXPY, $ SCOPY, SSCAL, STRMV * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.1 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) * IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - IOFF * EI = ZERO JW = IOFF + 1 CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW, $ IACOL, ICTXT, 1 ) * DO 10 L = 1, NB I = IA + K + L - 2 J = JA + L - 1 * IF( L.GT.1 ) THEN * * Update A(ia:ia+n-1,j) * * Compute i-th column of A - Y * V' * CALL PSGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J, $ DESCA, 1 ) * * Apply I - V * T' * V' to this column (call it b) from the * left, using the last column of T as workspace * * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) * ( V2 ) ( b2 ) * * where V1 is unit lower triangular * * w := V1' * b1 * IF( IPROC ) THEN CALL SCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1, $ WORK( JW ), 1 ) CALL STRMV( 'Lower', 'Transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) END IF * * w := w + V2'*b2 * CALL PSGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA, $ DESCA, A, I+1, J, DESCA, 1, ONE, WORK, 1, JW, $ DESCW, DESCW( M_ ) ) * * w := T'*w * IF( IPROC ) $ CALL STRMV( 'Upper', 'Transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), WORK( JW ), 1 ) * * b2 := b2 - V2*w * CALL PSGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE, $ A, I+1, J, DESCA, 1 ) * * b1 := b1 - V1*w * IF( IPROC ) THEN CALL STRMV( 'Lower', 'No transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) CALL SAXPY( L-1, -ONE, WORK( JW ), 1, $ A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 ) END IF CALL PSELSET( A, I, J-1, DESCA, EI ) END IF * * Generate the elementary reflector H(i) to annihilate * A(ia+k+i:ia+n-1,j) * CALL PSLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PSELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(iy:y+n-1,jy+l-1) * CALL PSGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1, $ DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1, $ DESCY, 1 ) CALL PSGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA, DESCA, $ A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW, DESCW, $ DESCW( M_ ) ) CALL PSGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY, $ JY+L-1, DESCY, 1 ) JL = MIN( JJ+L-1, JA+NQ-1 ) CALL PSSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 ) * * Compute T(1:i,i) * IF( IPROC ) THEN JT = ( L-1 ) * DESCA( NB_ ) CALL SSCAL( L-1, -TAU( JL ), WORK( JW ), 1 ) CALL SCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 ) CALL STRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), T( JT+1 ), 1 ) T( JT+L ) = TAU( JL ) END IF 10 CONTINUE * CALL PSELSET( A, K+NB+IA-1, J, DESCA, EI ) * RETURN * * End of PSLAHRD * END scalapack-1.8.0/SRC/psgecon.f0000640000175000017500000003711310363532303015555 0ustar muammarmuammar SUBROUTINE PSGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, INFO, JA, LIWORK, LWORK, N REAL ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSGECON estimates the reciprocal of the condition number of a general * distributed real matrix A(IA:IA+N-1,JA:JA+N-1), in either the 1-norm * or the infinity-norm, using the LU factorization computed by PSGETRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm * = 'I': Infinity-norm * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, * this array contains the local pieces of the factors L and U * from the factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U; the * unit diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) REAL * If NORM = '1' or 'O', the 1-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * If NORM = 'I', the infinity-norm of the original distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) REAL * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + 2*LOCc(N+MOD(JA-1,NB_A)) * + MAX( 2, MAX( NB_A*MAX( 1, CEIL(NPROW-1,NPCOL) ), * LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX( 1, CEIL(NPCOL-1,NPROW) ) ). * * LOCr and LOCc values can be computed using the ScaLAPACK * tool function NUMROC; NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, ONENRM CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, JX, $ KASE, KASE1, LIWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPMOD, NPROW, NQ, NQMOD REAL AINVNM, SCALE, SL, SMLNUM, SU, WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PCHK1MAT, PSAMAX, PSLATRS, PSLACON, $ PSRSCL, PB_TOPGET, PB_TOPSET, PXERBLA, SGEBR2D, $ SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600 + CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + 2*NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = REAL( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LIWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGECON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPNL = IPV + NP IPNU = IPNL + NQ IPW = IPNU + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the norm of inv(A). * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 * 10 CONTINUE CALL PSLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PSLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Divide X by 1/(SL*SU) if doing so will not cause overflow. * SCALE = SL*SU NORMIN = 'Y' IF( SCALE.NE.ONE ) THEN CALL PSAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL SGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL SGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PSRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PSGECON * END scalapack-1.8.0/SRC/pzungr2.f0000640000175000017500000002621310363532303015525 0ustar muammarmuammar SUBROUTINE PZUNGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGR2 generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PZGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, LWMIN, MP, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX*16 TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLACGV, PZLARFC, PZLASET, PZSCAL * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia:ia+m-k-1 to rows of the unit matrix * CALL PZLASET( 'All', M-K, N-M, ZERO, ZERO, A, IA, JA, DESCA ) CALL PZLASET( 'All', M-K, M, ZERO, ONE, A, IA, JA+N-M, DESCA ) * END IF * TAUI = ZERO MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+M-K, IA+M-1 * * Apply H(i)' to A(ia:i,ja:ja+n-m+i-ia) from the right * CALL PZLACGV( I-IA+N-M, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZELSET( A, I, JA+N-M+I-IA, DESCA, ONE ) CALL PZLARFC( 'Right', I-IA, I-IA+N-M+1, A, I, JA, DESCA, $ DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, MP ) ) CALL PZSCAL( I-IA+N-M, -TAUI, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZLACGV( I-IA+N-M, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZELSET( A, I, JA+N-M+I-IA, DESCA, ONE-DCONJG( TAUI ) ) * * Set A(i,ja+n-m+i-ia+1:ja+n-1) to zero * CALL PZLASET( 'All', 1, IA+M-1-I, ZERO, ZERO, A, I, $ JA+N-M+I-IA+1, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGR2 * END scalapack-1.8.0/SRC/pcgeequ.f0000640000175000017500000003302110363532303015542 0ustar muammarmuammar SUBROUTINE PCGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N REAL AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL C( * ), R( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCGEEQU computes row and column scalings intended to equilibrate an * M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and * reduce its condition number. R returns the row scale factors and C * the column scale factors, chosen to try to make the largest entry in * each row and column of the distributed matrix B with elements * B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1. * * R(i) and C(j) are restricted to be between SMLNUM = smallest safe * number and BIGNUM = largest safe number. Use of these scaling * factors is not guaranteed to reduce the condition number of * sub( A ) but works well in practice. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the * local pieces of the M-by-N distributed matrix whose * equilibration factors are to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local output) REAL array, dimension LOCr(M_A) * If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row * scale factors for sub( A ). R is aligned with the distributed * matrix A, and replicated across every process column. R is * tied to the distributed matrix A. * * C (local output) REAL array, dimension LOCc(N_A) * If INFO = 0, C(JA:JA+N-1) contains the column scale factors * for sub( A ). C is aligned with the distributed matrix A, and * replicated down every process row. C is tied to the distri- * buted matrix A. * * ROWCND (global output) REAL * If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of * the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1). * If ROWCND >= 0.1 and AMAX is neither too large nor too small, * it is not worth scaling by R(IA:IA+M-1). * * COLCND (global output) REAL * If INFO = 0, COLCND contains the ratio of the smallest C(j) * to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it * is not worth scaling by C(JA:JA+N-1). * * AMAX (global output) REAL * Absolute value of largest distributed matrix element. If * AMAX is very close to overflow or very close to underflow, * the matrix should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, and i is * <= M: the i-th row of the distributed matrix sub( A ) * is exactly zero, * > M: the (i-M)-th column of the distributed * matrix sub( A ) is exactly zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA, $ IOFFA, IROFF, J, JJA, LDA, MP, MYCOL, MYROW, $ NPCOL, NPROW, NQ REAL BIGNUM, RCMAX, RCMIN, SMLNUM COMPLEX ZDUM * .. * .. Local Arrays .. INTEGER DESCC( DLEN_ ), DESCR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGAMX2D, $ INFOG2L, PCHK1MAT, PB_TOPGET, PXERBLA, SGAMN2D, $ SGAMX2D * .. * .. External Functions .. INTEGER INDXL2G, NUMROC REAL PSLAMCH EXTERNAL INDXL2G, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MAX, MIN, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN ROWCND = ONE COLCND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Get machine constants and local indexes. * SMLNUM = PSLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Assign descriptors for R and C arrays * CALL DESCSET( DESCR, M, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Compute row scale factors. * DO 10 I = IIA, IIA+MP-1 R( I ) = ZERO 10 CONTINUE * * Find the maximum element in each row. * IOFFA = (JJA-1)*LDA DO 30 J = JJA, JJA+NQ-1 DO 20 I = IIA, IIA+MP-1 R( I ) = MAX( R( I ), CABS1( A( IOFFA + I ) ) ) 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE CALL SGAMX2D( ICTXT, 'Rowwise', ROWCTOP, MP, 1, R( IIA ), $ MAX( 1, MP ), IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 40 I = IIA, IIA+MP-1 RCMAX = MAX( RCMAX, R( I ) ) RCMIN = MIN( RCMIN, R( I ) ) 40 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) AMAX = RCMAX * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 50 I = IIA, IIA+MP-1 IF( R( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = INDXL2G( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) - IA + 1 50 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 60 I = IIA, IIA+MP-1 R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) 60 CONTINUE * * Compute ROWCND = min(R(I)) / max(R(I)) * ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * * Compute column scale factors * DO 70 J = JJA, JJA+NQ-1 C( J ) = ZERO 70 CONTINUE * * Find the maximum element in each column, * assuming the row scaling computed above. * IOFFA = (JJA-1)*LDA DO 90 J = JJA, JJA+NQ-1 DO 80 I = IIA, IIA+MP-1 C( J ) = MAX( C( J ), CABS1( A( IOFFA + I ) )*R( I ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, C( JJA ), $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) * * Find the maximum and minimum scale factors. * RCMIN = BIGNUM RCMAX = ZERO DO 100 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 100 CONTINUE CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM, $ IDUMM, -1, -1, MYCOL ) * IF( RCMIN.EQ.ZERO ) THEN * * Find the first zero scale factor and return an error code. * DO 110 J = JJA, JJA+NQ-1 IF( C( J ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = M + INDXL2G( J, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) - JA + 1 110 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN ELSE * * Invert the scale factors. * DO 120 J = JJA, JJA+NQ-1 C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) 120 CONTINUE * * Compute COLCND = min(C(J)) / max(C(J)) * COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF * RETURN * * End of PCGEEQU * END scalapack-1.8.0/SRC/pdlaed2.f0000640000175000017500000003513510363532303015434 0ustar muammarmuammar SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, $ NN1, NN2, IB1, IB2 ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N, $ N1, NB, NN, NN1, NN2, NPCOL DOUBLE PRECISION RHO * .. * .. Array Arguments .. INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ), $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ), $ PSM( 0: NPCOL-1, 4 ) DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * ) * .. * * Purpose * ======= * * PDLAED2 sorts the two sets of eigenvalues together into a single * sorted set. Then it tries to deflate the size of the problem. * There are two ways in which deflation can occur: when two or more * eigenvalues are close together or if there is a tiny entry in the * Z vector. For each such occurrence the order of the related secular * equation problem is reduced by one. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle, indicating the global context of * the operation on the matrix. The context itself is global. * * K (output) INTEGER * The number of non-deflated eigenvalues, and the order of the * related secular equation. 0 <= K <=N. * * N (input) INTEGER * The dimension of the symmetric tridiagonal matrix. N >= 0. * * N1 (input) INTEGER * The location of the last eigenvalue in the leading sub-matrix. * min(1,N) < N1 < N. * * NB (global input) INTEGER * The blocking factor used to distribute the columns of the * matrix. NB >= 1. * * D (input/output) DOUBLE PRECISION array, dimension (N) * On entry, D contains the eigenvalues of the two submatrices to * be combined. * On exit, D contains the trailing (N-K) updated eigenvalues * (those which were deflated) sorted into increasing order. * * DROW (global input) INTEGER * The process row over which the first row of the matrix D is * distributed. 0 <= DROW < NPROW. * * DCOL (global input) INTEGER * The process column over which the first column of the * matrix D is distributed. 0 <= DCOL < NPCOL. * * Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) * On entry, Q contains the eigenvectors of two submatrices in * the two square blocks with corners at (1,1), (N1,N1) * and (N1+1, N1+1), (N,N). * On exit, Q contains the trailing (N-K) updated eigenvectors * (those which were deflated) in its last N-K columns. * * LDQ (input) INTEGER * The leading dimension of the array Q. LDQ >= max(1,NQ). * * RHO (global input/output) DOUBLE PRECISION * On entry, the off-diagonal element associated with the rank-1 * cut which originally split the two submatrices which are now * being recombined. * On exit, RHO has been modified to the value required by * PDLAED3. * * Z (global input) DOUBLE PRECISION array, dimension (N) * On entry, Z contains the updating vector (the last * row of the first sub-eigenvector matrix and the first row of * the second sub-eigenvector matrix). * On exit, the contents of Z have been destroyed by the updating * process. * * DLAMDA (global output) DOUBLE PRECISION array, dimension (N) * A copy of the first K eigenvalues which will be used by * SLAED3 to form the secular equation. * * W (global output) DOUBLE PRECISION array, dimension (N) * The first k values of the final deflation-altered z-vector * which will be passed to SLAED3. * * Q2 (output) DOUBLE PRECISION array, dimension (LDQ2, NQ) * A copy of the first K eigenvectors which will be used by * * LDQ2 (input) INTEGER * The leading dimension of the array Q2. * * QBUF (workspace) DOUBLE PRECISION array, dimension 3*N * * CTOT (workspace) INTEGER array, dimension( NPCOL, 4) * * PSM (workspace) INTEGER array, dimension( NPCOL, 4) * * NPCOL (global input) INTEGER * The total number of columns over which the distributed * submatrix is distributed. * * INDX (workspace) INTEGER array, dimension (N) * The permutation used to sort the contents of DLAMDA into * ascending order. * * INDXC (output) INTEGER array, dimension (N) * The permutation used to arrange the columns of the deflated * Q matrix into three groups: the first group contains non-zero * elements only at and above N1, the second contains * non-zero elements only below N1, and the third is dense. * * INDXP (workspace) INTEGER array, dimension (N) * The permutation used to place deflated values of D at the end * of the array. INDXP(1:K) points to the nondeflated D-values * and INDXP(K+1:N) points to the deflated eigenvalues. * * INDCOL (workspace) INTEGER array, dimension (N) * * COLTYP (workspace/output) INTEGER array, dimension (N) * During execution, a label which will indicate which of the * following types a column in the Q2 matrix is: * 1 : non-zero in the upper half only; * 2 : dense; * 3 : non-zero in the lower half only; * 4 : deflated. * * NN (global output) INTEGER, the order of matrix U, (PDLAED1). * NN1 (global output) INTEGER, the order of matrix Q1, (PDLAED1). * NN2 (global output) INTEGER, the order of matrix Q2, (PDLAED1). * IB1 (global output) INTEGER, pointeur on Q1, (PDLAED1). * IB2 (global output) INTEGER, pointeur on Q2, (PDLAED1). * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, $ TWO = 2.0D0, EIGHT = 8.0D0 ) * .. * .. Local Scalars .. INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2, $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ, $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ DOUBLE PRECISION C, EPS, S, T, TAU, TOL * .. * .. External Functions .. INTEGER IDAMAX, INDXG2L, INDXL2G, NUMROC DOUBLE PRECISION DLAPY2, PDLAMCH EXTERNAL IDAMAX, INDXG2L, INDXL2G, NUMROC, PDLAMCH, $ DLAPY2 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, BLACS_PINFO, DCOPY, DGERV2D, $ DGESD2D, DLAPST, DROT, DSCAL, INFOG1L * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. External Functions .. * .. * .. Local Arrays .. INTEGER PTT( 4 ) * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_PINFO( IAM, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NUMROC( N, NB, MYROW, DROW, NPROW ) * N2 = N - N1 N1P1 = N1 + 1 * IF( RHO.LT.ZERO ) THEN CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) END IF * * Normalize z so that norm(z) = 1. Since z is the concatenation of * two normalized vectors, norm2(z) = sqrt(2). * T = ONE / SQRT( TWO ) CALL DSCAL( N, T, Z, 1 ) * * RHO = ABS( norm(z)**2 * RHO ) * RHO = ABS( TWO*RHO ) * * Calculate the allowable deflation tolerance * IMAX = IDAMAX( N, Z, 1 ) JMAX = IDAMAX( N, D, 1 ) EPS = PDLAMCH( ICTXT, 'Epsilon' ) TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) * * If the rank-1 modifier is small enough, no more needs to be done * except to reorganize Q so that its columns correspond with the * elements in D. * IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN K = 0 GO TO 220 END IF * * If there are multiple eigenvalues then the problem deflates. Here * the number of equal eigenvalues are found. As each equal * eigenvalue is found, an elementary reflector is computed to rotate * the corresponding eigensubspace so that the corresponding * components of Z are zero in this new basis. * * CALL DLAPST( 'I', N, D, INDX, INFO ) * DO 10 I = 1, N1 COLTYP( I ) = 1 10 CONTINUE DO 20 I = N1P1, N COLTYP( I ) = 3 20 CONTINUE COL = DCOL DO 40 I = 1, N, NB DO 30 J = 0, NB - 1 IF( I+J.LE.N ) $ INDCOL( I+J ) = COL 30 CONTINUE COL = MOD( COL+1, NPCOL ) 40 CONTINUE * K = 0 K2 = N + 1 DO 50 J = 1, N NJ = INDX( J ) IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN * * Deflate due to small z component. * K2 = K2 - 1 COLTYP( NJ ) = 4 INDXP( K2 ) = NJ IF( J.EQ.N ) $ GO TO 80 ELSE PJ = NJ GO TO 60 END IF 50 CONTINUE 60 CONTINUE J = J + 1 NJ = INDX( J ) IF( J.GT.N ) $ GO TO 80 IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN * * Deflate due to small z component. * K2 = K2 - 1 COLTYP( NJ ) = 4 INDXP( K2 ) = NJ ELSE * * Check if eigenvalues are close enough to allow deflation. * S = Z( PJ ) C = Z( NJ ) * * Find sqrt(a**2+b**2) without overflow or * destructive underflow. * TAU = DLAPY2( C, S ) T = D( NJ ) - D( PJ ) C = C / TAU S = -S / TAU IF( ABS( T*C*S ).LE.TOL ) THEN * * Deflation is possible. * Z( NJ ) = TAU Z( PJ ) = ZERO IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) $ COLTYP( NJ ) = 2 COLTYP( PJ ) = 4 CALL INFOG1L( NJ, NB, NPCOL, MYCOL, DCOL, NJJ, NJCOL ) CALL INFOG1L( PJ, NB, NPCOL, MYCOL, DCOL, PJJ, PJCOL ) IF( INDCOL( PJ ).EQ.INDCOL( NJ ) .AND. MYCOL.EQ.NJCOL ) THEN CALL DROT( NP, Q( 1, PJJ ), 1, Q( 1, NJJ ), 1, C, S ) ELSE IF( MYCOL.EQ.PJCOL ) THEN CALL DGESD2D( ICTXT, NP, 1, Q( 1, PJJ ), NP, MYROW, $ NJCOL ) CALL DGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, NJCOL ) CALL DROT( NP, Q( 1, PJJ ), 1, QBUF, 1, C, S ) ELSE IF( MYCOL.EQ.NJCOL ) THEN CALL DGESD2D( ICTXT, NP, 1, Q( 1, NJJ ), NP, MYROW, $ PJCOL ) CALL DGERV2D( ICTXT, NP, 1, QBUF, NP, MYROW, PJCOL ) CALL DROT( NP, QBUF, 1, Q( 1, NJJ ), 1, C, S ) END IF T = D( PJ )*C**2 + D( NJ )*S**2 D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 D( PJ ) = T K2 = K2 - 1 I = 1 70 CONTINUE IF( K2+I.LE.N ) THEN IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN INDXP( K2+I-1 ) = INDXP( K2+I ) INDXP( K2+I ) = PJ I = I + 1 GO TO 70 ELSE INDXP( K2+I-1 ) = PJ END IF ELSE INDXP( K2+I-1 ) = PJ END IF PJ = NJ ELSE K = K + 1 DLAMDA( K ) = D( PJ ) W( K ) = Z( PJ ) INDXP( K ) = PJ PJ = NJ END IF END IF GO TO 60 80 CONTINUE * * Record the last eigenvalue. * K = K + 1 DLAMDA( K ) = D( PJ ) W( K ) = Z( PJ ) INDXP( K ) = PJ * * Count up the total number of the various types of columns, then * form a permutation which positions the four column types into * four uniform groups (although one or more of these groups may be * empty). * DO 100 J = 1, 4 DO 90 I = 0, NPCOL - 1 CTOT( I, J ) = 0 90 CONTINUE PTT( J ) = 0 100 CONTINUE DO 110 J = 1, N CT = COLTYP( J ) COL = INDCOL( J ) CTOT( COL, CT ) = CTOT( COL, CT ) + 1 110 CONTINUE * * PSM(*) = Position in SubMatrix (of types 1 through 4) * DO 120 COL = 0, NPCOL - 1 PSM( COL, 1 ) = 1 PSM( COL, 2 ) = 1 + CTOT( COL, 1 ) PSM( COL, 3 ) = PSM( COL, 2 ) + CTOT( COL, 2 ) PSM( COL, 4 ) = PSM( COL, 3 ) + CTOT( COL, 3 ) 120 CONTINUE PTT( 1 ) = 1 DO 140 I = 2, 4 CT = 0 DO 130 J = 0, NPCOL - 1 CT = CT + CTOT( J, I-1 ) 130 CONTINUE PTT( I ) = PTT( I-1 ) + CT 140 CONTINUE * * Fill out the INDXC array so that the permutation which it induces * will place all type-1 columns first, all type-2 columns next, * then all type-3's, and finally all type-4's. * DO 150 J = 1, N JS = INDXP( J ) COL = INDCOL( JS ) CT = COLTYP( JS ) I = INDXL2G( PSM( COL, CT ), NB, COL, DCOL, NPCOL ) INDX( J ) = I INDXC( PTT( CT ) ) = I PSM( COL, CT ) = PSM( COL, CT ) + 1 PTT( CT ) = PTT( CT ) + 1 150 CONTINUE * * DO 160 J = 1, N JS = INDXP( J ) JJS = INDXG2L( JS, NB, J, J, NPCOL ) COL = INDCOL( JS ) IF( COL.EQ.MYCOL ) THEN I = INDX( J ) JJQ2 = INDXG2L( I, NB, J, J, NPCOL ) CALL DCOPY( NP, Q( 1, JJS ), 1, Q2( 1, JJQ2 ), 1 ) END IF 160 CONTINUE * * * The deflated eigenvalues and their corresponding vectors go back * into the last N - K slots of D and Q respectively. * CALL DCOPY( N, D, 1, Z, 1 ) DO 170 J = K + 1, N JS = INDXP( J ) I = INDX( J ) D( I ) = Z( JS ) 170 CONTINUE * PTT( 1 ) = 1 DO 190 I = 2, 4 CT = 0 DO 180 J = 0, NPCOL - 1 CT = CT + CTOT( J, I-1 ) 180 CONTINUE PTT( I ) = PTT( I-1 ) + CT 190 CONTINUE * * IB1 = INDXC( 1 ) IE1 = IB1 IB2 = INDXC( PTT( 2 ) ) IE2 = IB2 DO 200 I = 2, PTT( 3 ) - 1 IB1 = MIN( IB1, INDXC( I ) ) IE1 = MAX( IE1, INDXC( I ) ) 200 CONTINUE DO 210 I = PTT( 2 ), PTT( 4 ) - 1 IB2 = MIN( IB2, INDXC( I ) ) IE2 = MAX( IE2, INDXC( I ) ) 210 CONTINUE NN1 = IE1 - IB1 + 1 NN2 = IE2 - IB2 + 1 NN = MAX( IE1, IE2 ) - MIN( IB1, IB2 ) + 1 220 CONTINUE RETURN * * End of PDLAED2 * END scalapack-1.8.0/SRC/pcunmr3.f0000640000175000017500000003544710363532303015516 0ustar muammarmuammar SUBROUTINE PCUNMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMR3 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JAA, JCC, LCM, $ LCMP, LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCLARZ, $ PCLARZC, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( L.LT.0 .OR. L.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic+i-ia:icc+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * IF( NOTRAN ) THEN CALL PCLARZ( SIDE, MI, NI, L, A, I, JAA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) ELSE CALL PCLARZC( SIDE, MI, NI, L, A, I, JAA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMR3 * END scalapack-1.8.0/SRC/pdlatrz.f0000640000175000017500000002162310363532303015576 0ustar muammarmuammar SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDLATRZ reduces the M-by-N ( M<=N ) real upper trapezoidal matrix * sub( A ) = [ A(IA:IA+M-1,JA:JA+M-1) A(IA:IA+M-1,JA+N-L:JA+N-1) ] to * upper triangular form by means of orthogonal transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N orthogonal matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. L > 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements N-L+1 to N of the first M rows * of sub( A ), with the array TAU, represent the orthogonal * matrix Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), which is used to introduce zeros into * the (m - k + 1)th row of sub( A ), is given in the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, ICTXT, II, J, J1, MP, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL INFOG1L, PDELSET, PDLARFG, PDLARZ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, IAROW ) DO 10 I = II, MP TAU( I ) = ZERO 10 CONTINUE * ELSE * J1 = JA + N - L DO 20 I = IA+M-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * [ A(i, j) A(i,j1:ja+n-1) ] * CALL PDLARFG( L+1, AII, I, J, A, I, J1, DESCA, DESCA( M_ ), $ TAU ) * * Apply H(i) to A(ia:i-1,j:ja+n-1) from the right * CALL PDLARZ( 'Right', I-IA, JA+N-J, L, A, I, J1, DESCA, $ DESCA( M_ ), TAU, A, IA, J, DESCA, WORK ) CALL PDELSET( A, I, J, DESCA, AII ) * 20 CONTINUE * END IF * RETURN * * End of PDLATRZ * END scalapack-1.8.0/SRC/pclauum.f0000640000175000017500000002117210363532303015563 0ustar muammarmuammar SUBROUTINE PCLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLAUUM computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the blocked form of the algorithm, calling Level 3 PBLAS. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the * distributed matrix sub( A ) is upper or lower triangular: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the triangular factor U or L. N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, J, JB, JN * .. * .. External Subroutines .. EXTERNAL PCGEMM, PCHERK, PCLAUU2, PCTRMM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * * Handle first block separately * JB = JN-JA+1 CALL PCLAUU2( 'Upper', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PCHERK( 'Upper', 'No transpose', JB, N-JB, ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PCTRMM( 'Right', 'Upper', 'Conjugate transpose', $ 'Non-unit', J-JA, JB, CONE, A, I, J, DESCA, $ A, IA, J, DESCA ) CALL PCLAUU2( 'Upper', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PCGEMM( 'No transpose', 'Conjugate transpose', $ J-JA, JB, N-J-JB+JA, CONE, A, IA, J+JB, $ DESCA, A, I, J+JB, DESCA, CONE, A, IA, $ J, DESCA ) CALL PCHERK( 'Upper', 'No transpose', JB, N-J-JB+JA, ONE, $ A, I, J+JB, DESCA, ONE, A, I, J, DESCA ) END IF 10 CONTINUE ELSE * * Compute the product L' * L. * * Handle first block separately * JB = JN-JA+1 CALL PCLAUU2( 'Lower', JB, A, IA, JA, DESCA ) IF( JB.LE.N-1 ) THEN CALL PCHERK( 'Lower', 'Conjugate transpose', JB, N-JB, ONE, $ A, IA+JB, JA, DESCA, ONE, A, IA, JA, DESCA ) END IF * * Loop over remaining block of columns * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA CALL PCTRMM( 'Left', 'Lower', 'Conjugate Transpose', $ 'Non-unit', JB, J-JA, CONE, A, I, J, DESCA, A, $ I, JA, DESCA ) CALL PCLAUU2( 'Lower', JB, A, I, J, DESCA ) IF( J+JB.LE.JA+N-1 ) THEN CALL PCGEMM( 'Conjugate transpose', 'No transpose', JB, $ J-JA, N-J-JB+JA, CONE, A, I+JB, J, DESCA, $ A, I+JB, JA, DESCA, CONE, A, I, JA, DESCA ) CALL PCHERK( 'Lower', 'Conjugate transpose', JB, $ N-J-JB+JA, ONE, A, I+JB, J, DESCA, ONE, $ A, I, J, DESCA ) END IF 20 CONTINUE END IF * RETURN * * End of PCLAUUM * END scalapack-1.8.0/SRC/pclarfg.f0000640000175000017500000002432510363532303015536 0ustar muammarmuammar SUBROUTINE PCLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N COMPLEX ALPHA * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX TAU( * ), X( * ) * .. * * Purpose * ======= * * PCLARFG generates a complex elementary reflector H of order n, such * that * * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I. * ( x ) ( 0 ) * * where alpha is a real scalar, and sub( X ) is an (N-1)-element * complex distributed vector X(IX:IX+N-2,JX) if INCX = 1 and * X(IX,JX:JX+N-2) if INCX = DESCX(M_). H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v' ) , * ( v ) * * where tau is a complex scalar and v is a complex (N-1)-element * vector. Note that H is not Hermitian. * * If the elements of sub( X ) are all zero and X(IAX,JAX) is real, * then tau = 0 and H is taken to be the unit matrix. * * Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The global order of the elementary reflector. N >= 0. * * ALPHA (local output) COMPLEX * On exit, alpha is computed in the process scope having the * vector sub( X ). * * IAX (global input) INTEGER * The global row index in X of X(IAX,JAX). * * JAX (global input) INTEGER * The global column index in X of X(IAX,JAX). * * X (local input/local output) COMPLEX, pointer into the * local memory to an array of dimension (LLD_X,*). This array * contains the local pieces of the distributed vector sub( X ). * Before entry, the incremented array sub( X ) must contain * the vector x. On exit, it is overwritten with the vector v. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * TAU (local output) COMPLEX, array, dimension LOCc(JX) * if INCX = 1, and LOCr(IX) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX, $ KNT, MYCOL, MYROW, NPCOL, NPROW REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, PCSCAL, $ PCSSCAL, INFOG2L, PSCNRM2 * .. * .. External Functions .. REAL SLAMCH, SLAPY3 COMPLEX CLADIV EXTERNAL CLADIV, SLAPY3, SLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is distributed across a process row. * CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYROW.NE.IXROW ) $ RETURN * * Broadcast X(IAX,JAX) across the process row. * IF( MYCOL.EQ.IXCOL ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1, $ MYROW, IXCOL ) END IF * INDXTAU = IIAX * ELSE * * sub( X ) is distributed across a process column. * CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYCOL.NE.IXCOL ) $ RETURN * * Broadcast X(IAX,JAX) across the process column. * IF( MYROW.EQ.IXROW ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, ALPHA, 1, $ IXROW, MYCOL ) END IF * INDXTAU = JJAX * END IF * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO RETURN END IF * CALL PSCNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) ALPHR = REAL( ALPHA ) ALPHI = AIMAG( ALPHA ) * IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN * * H = I * TAU( INDXTAU ) = ZERO * ELSE * * General case * BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) SAFMIN = SLAMCH( 'S' ) RSAFMN = ONE / SAFMIN IF( ABS( BETA ).LT.SAFMIN ) THEN * * XNORM, BETA may be inaccurate; scale X and recompute them * KNT = 0 10 CONTINUE KNT = KNT + 1 CALL PCSSCAL( N-1, RSAFMN, X, IX, JX, DESCX, INCX ) BETA = BETA*RSAFMN ALPHI = ALPHI*RSAFMN ALPHR = ALPHR*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN * CALL PSCNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) ALPHA = CMPLX( ALPHR, ALPHI ) BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) TAU( INDXTAU ) = CMPLX( ( BETA-ALPHR ) / BETA, $ -ALPHI / BETA ) ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) CALL PCSCAL( N-1, ALPHA, X, IX, JX, DESCX, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * ALPHA = BETA DO 20 J = 1, KNT ALPHA = ALPHA*SAFMIN 20 CONTINUE ELSE TAU( INDXTAU ) = CMPLX( ( BETA-ALPHR ) / BETA, $ -ALPHI / BETA ) ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) CALL PCSCAL( N-1, ALPHA, X, IX, JX, DESCX, INCX ) ALPHA = BETA END IF END IF * RETURN * * End of PCLARFG * END scalapack-1.8.0/SRC/pdlamch.f0000640000175000017500000000507110363532303015525 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER CMACH INTEGER ICTXT * .. * * Purpose * ======= * * PDLAMCH determines double precision machine parameters. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle in which the computation takes * place. * * CMACH (global input) CHARACTER*1 * Specifies the value to be returned by PDLAMCH: * = 'E' or 'e', PDLAMCH := eps * = 'S' or 's , PDLAMCH := sfmin * = 'B' or 'b', PDLAMCH := base * = 'P' or 'p', PDLAMCH := eps*base * = 'N' or 'n', PDLAMCH := t * = 'R' or 'r', PDLAMCH := rnd * = 'M' or 'm', PDLAMCH := emin * = 'U' or 'u', PDLAMCH := rmin * = 'L' or 'l', PDLAMCH := emax * = 'O' or 'o', PDLAMCH := rmax * * where * * eps = relative machine precision * sfmin = safe minimum, such that 1/sfmin does not overflow * base = base of the machine * prec = eps*base * t = number of (base) digits in the mantissa * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise * emin = minimum exponent before (gradual) underflow * rmin = underflow threshold - base**(emin-1) * emax = largest exponent before overflow * rmax = overflow threshold - (base**emax)*(1-eps) * * ===================================================================== * * .. Local Scalars .. INTEGER IDUMM DOUBLE PRECISION TEMP * .. * .. External Subroutines .. EXTERNAL DGAMN2D, DGAMX2D * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH, LSAME * .. * .. Executable Statements .. * TEMP = DLAMCH( CMACH ) * IF( LSAME( CMACH, 'E' ).OR.LSAME( CMACH, 'S' ).OR. $ LSAME( CMACH, 'M' ).OR.LSAME( CMACH, 'U' ) ) THEN CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) ELSE IF( LSAME( CMACH, 'L' ).OR.LSAME( CMACH, 'O' ) ) THEN CALL DGAMN2D( ICTXT, 'All', ' ', 1, 1, TEMP, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * PDLAMCH = TEMP * * End of PDLAMCH * END scalapack-1.8.0/SRC/pcgerfs.f0000640000175000017500000010265010363532303015547 0ustar muammarmuammar SUBROUTINE PCGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, $ JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, $ LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX, $ LRWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ) REAL BERR( * ), FERR( * ), RWORK( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PCGERFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**H * sub( X ) = sub( B ) * (Conjugate transpose) * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) COMPLEX pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) COMPLEX pointer into the local * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * factors of the matrix sub( A ) = P * L * U as computed by * PCGETRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF. * This array contains the pivoting information as computed * by PCGETRF. IPIV(i) -> The global row local row i * was swapped with. This array is tied to the distributed * matrix A. * * B (local input) COMPLEX pointer into the local * memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local * pieces of the distributed matrix of right hand sides * sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input and output) COMPLEX pointer into the * local memory to an array of local dimension * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains * the local pieces of the distributed matrix solution * sub( X ). On exit, the improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) REAL array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr( N + MOD(IA-1,MB_A) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCr( N + MOD(IB-1,MB_B) ). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) REAL ZERO, RONE, TWO, THREE PARAMETER ( ZERO = 0.0E+0, RONE = 1.0E+0, TWO = 2.0E+0, $ THREE = 3.0E+0 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN CHARACTER TRANSN, TRANST INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LRWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN COMPLEX ZDUM * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC REAL PSLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CHK1MAT, $ DESCSET, INFOG2L, PCAGEMV, PCAXPY, PCHK2MAT, $ PCCOPY, PCGEMV, PCGETRS, PCLACON, $ PXERBLA, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * NOTRAN = LSAME( TRANS, 'N' ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 20, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 2 * NPMOD LRWMIN = NPMOD WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( ( .NOT.NOTRAN ) .AND. ( .NOT.LSAME( TRANS, 'T' ) ) .AND. $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -14 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1600 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1600 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 2000 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -18 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 2000 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -19 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 2000 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -24 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -26 END IF END IF * IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 24 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 26 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 5, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, N, 2, NRHS, 3, $ IX, JX, DESCX, 20, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGERFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANSN = 'N' TRANST = 'C' ELSE TRANSN = 'C' TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = 1 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PSLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PCCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, IX, $ JX+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PCAGEMV( TRANS, N, N, RONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, RONE, RWORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PCGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PCGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PCGETRS( TRANSN, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PCCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PCGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, $ IX, J+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) + * abs(sub(B)))(i)) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 RWORK( IIW+II-IIXB ) = CABS1( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PCAGEMV( TRANS, N, N, RONE, A, IA, JA, DESCA, X, IX, $ J+K, DESCX, 1, RONE, RWORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, CABS1( WORK( IPR+II ) ) / $ RWORK( IPB+II ) ) ELSE S = MAX( S, ( CABS1( WORK( IPR+II ) )+SAFE1 ) / $ ( RWORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PCGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PCAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PCLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( RWORK( IPB+II ).GT.SAFE2 ) THEN RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) ELSE RWORK( IPB+II ) = CABS1( WORK( IPR+II ) ) + $ NZ*EPS*RWORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL CGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PCLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PCGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = RWORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PCGETRS( TRANSN, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, $ INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, CABS1( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL SGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, $ 1, IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) RWORK( 1 ) = REAL( LRWMIN ) * RETURN * * End of PCGERFS * END scalapack-1.8.0/SRC/pzgetri.f0000640000175000017500000003520110430435051015574 0ustar muammarmuammar SUBROUTINE PZGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.4) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * v1.7.4: May 10, 2006 * v1.7: May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ), IWORK( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZGETRI computes the inverse of a distributed matrix using the LU * factorization computed by PZGETRF. This method inverts U and then * computes the inverse of sub( A ) = A(IA:IA+N-1,JA:JA+N-1) denoted * InvA by solving the system InvA*L = inv(U) for InvA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the L and U obtained by the * factorization sub( A ) = P*L*U computed by PZGETRF. On * exit, if INFO = 0, sub( A ) contains the inverse of the * original distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A * keeps track of the pivoting information. IPIV(i) is the * global row index the local row i was swapped with. This * array is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = LOCr(N+MOD(IA-1,MB_A))*NB_A. WORK is used to keep a * copy of at most an entire column block of sub( A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK used as workspace for * physically transposing the pivots. * LIWORK is local input and must be at least * if NPROW == NPCOL then * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + NB_A, * else * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + * MAX( CEIL(CEIL(LOCr(M_A)/MB_A)/(LCM/NPROW)), * NB_A ) * where LCM is the least common multiple of process * rows and columns (NPROW and NPCOL). * end if * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,IA+K-1) is exactly zero; the * matrix is singular and its inverse could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IROFF, IW, J, $ JB, JN, LCM, LIWMIN, LWMIN, MP, MYCOL, MYROW, $ NN, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PZGEMM, PZLACPY, PZLASET, PZLAPIV, $ PZTRSM, PZTRTRI, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) LWMIN = NP * DESCA( NB_ ) * MP = NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) NQ = NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) IF( NPROW.EQ.NPCOL ) THEN LIWMIN = NQ + DESCA( NB_ ) ELSE * * Use the formula for the workspace given in PxLAPIV * to compute the minimum size LIWORK for IWORK * * The formula in PxLAPIV is * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * * where * M_P is the global length of the pivot vector * MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW * I_P is IA * I_P = IA * MB_P is the block size use for the block cyclic distribution of the * pivot vector * MB_P = DESCA (MB_ ) * LOCc ( . ) * NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) * LOCr ( . ) * NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) * CEIL ( X / Y ) * ICEIL( X, Y ) * LCM * LCM = ILCM( NPROW, NPCOL ) * LCM = ILCM( NPROW, NPCOL ) LIWMIN = NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW $ + MOD ( IA - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -8 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 8 IF( LIWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGETRI', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Form inv(U). If INFO > 0 from PZTRTRI, then U is singular, * and the inverse is not computed. * CALL PZTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) IF( INFO.GT.0 ) $ RETURN * * Define array descriptor for working array WORK * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) NN = ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1 IACOL = INDXG2P( NN, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, NP ) ) IW = IROFF + 1 * * Solve the equation inv(A)*L=inv(U) for inv(A) using blocked code. * DO 10 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Copy current block column of L to WORK and replace with zeros. * CALL PZLACPY( 'Lower', JA+N-1-J, JB, A, I+1, J, DESCA, $ WORK, IW+J-JA+1, 1, DESCW ) CALL PZLASET( 'Lower', JA+N-1-J, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute current block column of inv(A). * IF( J+JB.LE.JA+N-1 ) $ CALL PZGEMM( 'No transpose', 'No transpose', N, JB, $ JA+N-J-JB, -ONE, A, IA, J+JB, DESCA, WORK, $ IW+J+JB-JA, 1, DESCW, ONE, A, IA, J, DESCA ) CALL PZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW+J-JA, 1, DESCW, A, IA, J, DESCA ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 * * Copy current block column of L to WORK and replace with zeros. * CALL PZLACPY( 'Lower', N-1, JB, A, IA+1, JA, DESCA, WORK, IW+1, $ 1, DESCW ) CALL PZLASET( 'Lower', N-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Compute current block column of inv(A). * IF( JA+JB.LE.JA+N-1 ) $ CALL PZGEMM( 'No transpose', 'No transpose', N, JB, $ N-JB, -ONE, A, IA, JA+JB, DESCA, WORK, IW+JB, 1, $ DESCW, ONE, A, IA, JA, DESCA ) CALL PZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW, 1, DESCW, A, IA, JA, DESCA ) * * Use the row pivots and apply them to the columns of the global * matrix. * CALL DESCSET( DESCW, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ MP+DESCA( MB_ ) ) CALL PZLAPIV( 'Backward', 'Columns', 'Column', N, N, A, IA, $ JA, DESCA, IPIV, IA, 1, DESCW, IWORK ) * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PZGETRI * END scalapack-1.8.0/SRC/pclassq.f0000640000175000017500000002431110363532303015561 0ustar muammarmuammar SUBROUTINE PCLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N REAL SCALE, SUMSQ * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX X( * ) * .. * * Purpose * ======= * * PCLASSQ returns the values scl and smsq such that * * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, * * where x( i ) = sub( X ) = abs( X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ) ). * The value of sumsq is assumed to be at least unity and the value of * ssq will then satisfy * * 1.0 .le. ssq .le. ( sumsq + 2*n ). * * scale is assumed to be non-negative and scl returns the value * * scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), * i * * scale and sumsq must be supplied in SCALE and SUMSQ respectively. * SCALE and SUMSQ are overwritten by scl and ssq respectively. * * The routine makes only one pass through the vector sub( X ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * The result are only available in the scope of sub( X ), i.e if * sub( X ) is distributed along a process row, the correct results are * only available in this process row of the grid. Similarly if sub( X ) * is distributed along a process column, the correct results are only * available in this process column of the grid. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (input) COMPLEX * The vector for which a scaled sum of squares is computed. * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= n. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * SCALE (local input/local output) REAL * On entry, the value scale in the equation above. * On exit, SCALE is overwritten with scl , the scaling factor * for the sum of squares. * * SUMSQ (local input/local output) REAL * On entry, the value sumsq in the equation above. * On exit, SUMSQ is overwritten with smsq , the basic sum of * squares from which scl has been factored out. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, ICOFF, ICTXT, IIX, IOFF, IROFF, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ REAL TEMP1 * .. * .. Local Arrays .. REAL WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSTREECOMB, SCOMBSSQ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFF * * Code direct from LAPACK's CLASSQ, (save subroutine call) * IF( NQ.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 10 I = 1, NQ IF( REAL( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( REAL( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ * ( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IF( AIMAG( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( AIMAG( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + LDX 10 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PSTREECOMB( ICTXT, 'Rowwise', 2, WORK, -1, IXCOL, $ SCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * ELSE IF( INCX.EQ.1 ) THEN * * X is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF * * Code direct from LAPACK's CLASSQ, (save subroutine call) * IF( NP.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 20 I = 1, NP IF( REAL( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( REAL( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IF( AIMAG( X( IOFF ) ).NE.ZERO ) THEN TEMP1 = ABS( AIMAG( X( IOFF ) ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + 1 20 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PSTREECOMB( ICTXT, 'Columnwise', 2, WORK, -1, IXCOL, $ SCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * END IF * RETURN * * End of PCLASSQ * END scalapack-1.8.0/SRC/pzgetrs.f0000640000175000017500000002611010363532303015610 0ustar muammarmuammar SUBROUTINE PZGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZGETRS solves a system of distributed linear equations * * op( sub( A ) ) * X = sub( B ) * * with a general N-by-N distributed matrix sub( A ) using the LU * factorization computed by PZGETRF. * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), op( A ) = A, A**T or A**H * and sub( B ) denotes B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block data decomposition ( MB_A=NB_A ). * * Arguments * ========= * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': sub( A ) * X = sub( B ) (No transpose) * = 'T': sub( A )**T * X = sub( B ) (Transpose) * = 'C': sub( A )**H * X = sub( B ) (Conjugate transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local * memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the factors * L and U from the factorization sub( A ) = P*L*U; the unit * diagonal elements of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand sides * sub( B ). On exit, sub( B ) is overwritten by the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCIP( DLEN_ ), IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ PXERBLA, PZLAPIV, PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE NOTRAN = LSAME( TRANS, 'N' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1200+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1200+CTXT_) END IF END IF IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, 3, $ IB, JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGETRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ DESCA( MB_ ) + NUMROC( DESCA( M_ ), DESCA( MB_ ), $ MYROW, DESCA( RSRC_ ), NPROW ) ) * IF( NOTRAN ) THEN * * Solve sub( A ) * X = sub( B ). * * Apply row interchanges to the right hand sides. * CALL PZLAPIV( 'Forward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A )' * X = sub( B ). * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, $ A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Apply row interchanges to the solution vectors. * CALL PZLAPIV( 'Backward', 'Row', 'Col', N, NRHS, B, IB, JB, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * END IF * RETURN * * End of PZGETRS * END scalapack-1.8.0/SRC/pchetrd.f0000640000175000017500000004033610363532303015551 0ustar muammarmuammar SUBROUTINE PCHETRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCHETRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IINFO, IPW, $ IROFFA, J, JB, JX, K, KK, LWMIN, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHER2K, $ PCHETD2, PCHK1MAT, PCLATRD, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( (NP+1)*NB, 3*NB ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHETRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP * NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, INDXG2P( JA+N-KK, $ NB, MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, NP ) ) * DO 10 K = N-KK+1, NB+1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PCLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, TAU, $ WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PCHER2K( UPLO, 'No transpose', K-1, JB, -CONE, A, IA, $ J, DESCA, WORK, 1, 1, DESCW, ONE, A, IA, JA, $ DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PCELSET( A, I-1, J, DESCA, CMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PCHETD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PCLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PCHER2K( UPLO, 'No transpose', N-K-NB+1, NB, -CONE, A, $ I+NB, J, DESCA, WORK, K+NB, 1, DESCW, ONE, A, $ I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PCELSET( A, I+NB, J+NB-1, DESCA, CMPLX( E( JX ) ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PCHETD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCHETRD * END scalapack-1.8.0/SRC/pclange.f0000640000175000017500000002664710363532303015542 0ustar muammarmuammar REAL FUNCTION PCLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLANGE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * distributed matrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1). * * PCLANGE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+M-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PCLANGE as described * above. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PCLANGE * is set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PCLANGE is set to zero. N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, ICOFF, IOFFA, $ IROFF, J, JJ, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASSQ, INFOG2L, SCOMBSSQ, $ SGEBR2D, SGEBS2D, SGAMX2D, SGSUM2D, $ PSTREECOMB * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, NUMROC EXTERNAL LSAME, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO IF( NQ.GT.0 .AND. MP.GT.0 ) THEN IOFFA = (JJ-1)*LDA DO 20 J = JJ, JJ+NQ-1 DO 10 I = II, MP+II-1 VALUE = MAX( VALUE, ABS( A( IOFFA+I ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE END IF CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, J, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * * Find norm1( sub( A ) ). * IF( NQ.GT.0 ) THEN IOFFA = ( JJ - 1 ) * LDA DO 40 J = JJ, JJ+NQ-1 SUM = ZERO IF( MP.GT.0 ) THEN DO 30 I = II, MP+II-1 SUM = SUM + ABS( A( IOFFA+I ) ) 30 CONTINUE END IF IOFFA = IOFFA + LDA WORK( J-JJ+1 ) = SUM 40 CONTINUE END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, J, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * * Find normI( sub( A ) ). * IF( MP.GT.0 ) THEN IOFFA = II + ( JJ - 1 ) * LDA DO 60 I = II, II+MP-1 SUM = ZERO IF( NQ.GT.0 ) THEN DO 50 J = IOFFA, IOFFA + NQ*LDA - 1, LDA SUM = SUM + ABS( A( J ) ) 50 CONTINUE END IF WORK( I-II+1 ) = SUM IOFFA = IOFFA + 1 60 CONTINUE END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for supnorm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( ISAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, I, $ J, -1, 0, 0 ) END IF * ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE IOFFA = II + ( JJ - 1 ) * LDA IF( NQ.GT.0 ) THEN DO 70 J = IOFFA, IOFFA + NQ*LDA - 1, LDA CALL CLASSQ( MP, A( J ), 1, SCALE, SUM ) 70 CONTINUE END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PCLANGE = VALUE * RETURN * * End of PCLANGE * END scalapack-1.8.0/SRC/pzpotf2.f0000640000175000017500000003133710363532303015525 0ustar muammarmuammar SUBROUTINE PZPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZPOTF2 computes the Cholesky factorization of a complex hermitian * positive definite distributed matrix sub( A )=A(IA:IA+N-1,JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires N <= NB_A-MOD(JA-1, NB_A) and square block * decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURR, IDIAG, IIA, $ IOFFA, IROFF, J, JJA, LDA, MYCOL, MYROW, $ NPCOL, NPROW DOUBLE PRECISION AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, $ IGEBS2D, INFOG2L, PB_TOPGET, PXERBLA, ZGEMV, $ ZLACGV, ZDSCAL * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD, SQRT * .. * .. External Functions .. LOGICAL LSAME COMPLEX*16 ZDOTC EXTERNAL LSAME, ZDOTC * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( N+ICOFF.GT.DESCA( NB_ ) ) THEN INFO = -2 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Compute local information * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF ( UPPER ) THEN * * Process (IAROW, IACOL) owns block to be factorized * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN * * Compute the Cholesky factorization A = U'*U. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 10 J = JA, JA+N-1 * * Compute U(J,J) and test for non-positive-definiteness. * AJJ = DBLE( A( IDIAG ) ) - $ ZDOTC( J-JA, A( IOFFA ), 1, A( IOFFA ), 1 ) IF( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 20 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of row J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + LDA CALL ZLACGV( J-JA, A( IOFFA ), 1 ) CALL ZGEMV( 'Transpose', J-JA, JA+N-J-1, -CONE, $ A( IOFFA+LDA ), LDA, A( IOFFA ), 1, $ CONE, A( ICURR ), LDA ) CALL ZLACGV( J-JA, A( IOFFA ), 1 ) CALL ZDSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), $ LDA ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE * 20 CONTINUE * * Broadcast INFO to all processes in my IAROW. * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) END IF * * IAROW bcasts along columns so that everyone has INFO * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, 1, $ IAROW, MYCOL ) * END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Compute the Cholesky factorization A = L*L'. * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * DO 30 J = JA, JA+N-1 * * Compute L(J,J) and test for non-positive-definiteness. * AJJ = DBLE( A( IDIAG ) ) - $ ZDOTC( J-JA, A( IOFFA ), LDA, A( IOFFA ), LDA ) IF ( AJJ.LE.ZERO ) THEN A( IDIAG ) = AJJ INFO = J - JA + 1 GO TO 40 END IF AJJ = SQRT( AJJ ) A( IDIAG ) = AJJ * * Compute elements J+1:JA+N-1 of column J. * IF( J.LT.JA+N-1 ) THEN ICURR = IDIAG + 1 CALL ZLACGV( J-JA, A( IOFFA ), LDA ) CALL ZGEMV( 'No transpose', JA+N-J-1, J-JA, -CONE, $ A( IOFFA+1 ), LDA, A( IOFFA ), LDA, $ CONE, A( ICURR ), 1 ) CALL ZLACGV( J-JA, A( IOFFA ), LDA ) CALL ZDSCAL( JA+N-J-1, ONE / AJJ, A( ICURR ), 1 ) END IF IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 30 CONTINUE * 40 CONTINUE * * Broadcast INFO to everyone in IACOL * CALL IGEBS2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Columnwise', COLBTOP, 1, 1, INFO, $ 1, IAROW, MYCOL ) * END IF * * IACOL bcasts INFO along rows so that everyone has it * CALL IGEBS2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1 ) * ELSE * CALL IGEBR2D( ICTXT, 'Rowwise', ROWBTOP, 1, 1, INFO, 1, $ MYROW, IACOL ) * END IF * END IF * RETURN * * End of PZPOTF2 * END scalapack-1.8.0/SRC/pdlared1d.f0000640000175000017500000001507510367447133015774 0ustar muammarmuammar SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 12, 2005 * * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESC( * ) DOUBLE PRECISION BYALL( * ), BYCOL( * ), WORK( LWORK ) * .. * * Purpose * ======= * * PDLARED1D redistributes a 1D array * * It assumes that the input array, BYCOL, is distributed across * rows and that all process columns contain the same copy of * BYCOL. The output array, BYALL, will be identical on all processes * and will contain the entire array. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NP = Number of local rows in BYCOL() * * N (global input) INTEGER * The number of elements to be redistributed. N >= 0. * * IA (global input) INTEGER * IA must be equal to 1 * * JA (global input) INTEGER * JA must be equal to 1 * * DESC (global/local input) INTEGER Array of dimension DLEN_ * A 2D array descriptor, which describes BYCOL * * BYCOL (local input) distributed block cyclic DOUBLE PRECISION array * global dimension (N), local dimension (NP) * BYCOL is distributed across the process rows * All process columns are assumed to contain the same value * * BYALL (global output) DOUBLE PRECISION global dimension( N ) * local dimension (N) * BYALL is exactly duplicated on all processes * It contains the same values as BYCOL, but it is replicated * across all processes rather than being distributed * * BYALL(i) = BYCOL( NUMROC(i,DESC( NB_ ),MYROW,0,NPROW ) on the procs * whose MYROW == mod((i-1)/DESC( NB_ ),NPROW) * * WORK (local workspace) DOUBLE PRECISION dimension (LWORK) * Used to hold the buffers sent from one process to another * * LWORK (local input) INTEGER size of WORK array * LWORK >= NUMROC(N, DESC( NB_ ), 0, 0, NPCOL) * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ALLI, BUFLEN, I, II, MYCOL, MYROW, NB, NPCOL, $ NPROW, PCOL * .. * .. External Functions .. * INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. * EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) NB = DESC( MB_ ) * * DO 30 PCOL = 0, NPCOL - 1 BUFLEN = NUMROC( N, NB, PCOL, 0, NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CALL DCOPY( BUFLEN, BYCOL, 1, WORK, 1 ) CALL DGEBS2D( DESC( CTXT_ ), 'R', ' ', 1, BUFLEN, WORK, 1 ) ELSE CALL DGEBR2D( DESC( CTXT_ ), 'R', ' ', 1, BUFLEN, WORK, 1, $ MYROW, PCOL ) END IF * ALLI = PCOL*NB DO 20 II = 1, BUFLEN, NB DO 10 I = 1, MIN( NB, BUFLEN-II+1 ) BYALL( ALLI+I ) = WORK( II-1+I ) 10 CONTINUE ALLI = ALLI + NB*NPCOL 20 CONTINUE 30 CONTINUE * RETURN * * End of PDLARED1D * END scalapack-1.8.0/SRC/psdtsv.f0000640000175000017500000004611210363532303015441 0ustar muammarmuammar SUBROUTINE PSDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL B( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PSDTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal diagonally dominant-like distributed * matrix. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PSDTTRF and PSDTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) REAL pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL+3*NB) * +max(10*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PSDTTRF, PSDTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PSDTTRF and PSDTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PSDTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL+3*NB) * * Factor the matrix * CALL PSDTTRF( N, DL, D, DU, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PSDTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PSDTTRS( 'N', N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSDTSV', -INFO ) RETURN END IF * RETURN * * End of PSDTSV * END scalapack-1.8.0/SRC/pdorgqr.f0000640000175000017500000003074110363532303015575 0ustar muammarmuammar SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGQR generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PDGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PDGEQRF in the K columns of its distributed * matrix argument A(IA:*,JA:JA+K-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+K-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JL, $ JN, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDLARFB, $ PDLARFT, PDLASET, PDORG2R, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL PDLASET( 'All', JL-JA, JA+N-JL, ZERO, ZERO, A, IA, JL, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PDORG2R( M-JL+JA, JA+N-JL, JA+K-JL, A, IA+JL-JA, JL, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of columns to loop over ? * IF( JL.GT.JN+1 ) THEN * * Use blocked code * DO 10 J = JL-DESCA( NB_ ), JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PDLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, $ J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PDLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-I+IA, N-J-JB+JA, JB, A, I, $ J, DESCA, WORK, A, I, J+JB, DESCA, $ WORK( IPW ) ) END IF * * Apply H to rows i:ia+m-1 of current block * CALL PDORG2R( M-I+IA, JB, JB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia:i-1 of current block to zero * CALL PDLASET( 'All', I-IA, JB, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * END IF * * Handle first block separately * IF( JL.GT.JA ) THEN * JB = JN - JA + 1 * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PDLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PDLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, $ JA+JB, DESCA, WORK( IPW ) ) * * Apply H to rows ia:ia+m-1 of current block * CALL PDORG2R( M, JB, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGQR * END scalapack-1.8.0/SRC/psorgrq.f0000640000175000017500000002675010363532303015621 0ustar muammarmuammar SUBROUTINE PSORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGRQ generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1) H(2) . . . H(k) * * as returned by PSGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. * N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PSGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLARFB, $ PSLARFT, PSLASET, PSORGR2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Set A(ia:in,ja+n-m+in-ia+1:ja-n+1) to zero. * CALL PSLASET( 'All', IN-IA+1, M-IN+IA-1, ZERO, ZERO, A, IA, $ JA+N-M+IN-IA+1, DESCA ) * * Use unblocked code for the first or only block. * CALL PSORGR2( IN-IA+1, N-M+IN-IA+1, IN-IA-M+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PSLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the right * CALL PSLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', $ I-IA, N-M+I+IB-IA, IB, A, I, JA, DESCA, WORK, A, $ IA, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-m+i+ib-ia-1 of current block * CALL PSORGR2( IB, N-M+I+IB-IA, IB, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows i:i+ib-1,ja+n-m+i+ib-ia:ja+n-1 of current block to * zero * CALL PSLASET( 'All', IB, M-I-IB+IA, ZERO, ZERO, A, I, $ JA+N-M+I+IB-IA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGRQ * END scalapack-1.8.0/SRC/pzposvx.f0000640000175000017500000006571410363532303015660 0ustar muammarmuammar SUBROUTINE PZPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, $ WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LRWORK, $ LWORK, N, NRHS DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), DESCX( * ) DOUBLE PRECISION BERR( * ), FERR( * ), SC( * ), $ SR( * ), RWORK( * ) COMPLEX*16 A( * ), AF( * ), $ B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to * compute the solution to a complex system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. In the following comments Y denotes Y(IY:IY+M-1,JY:JY+K-1) * a M-by-K matrix where Y can be A, AF, B and X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * diag(SR) * A * diag(SC) * inv(diag(SC)) * X = diag(SR) * B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(SR)*A*diag(SC) and B by diag(SR)*B. * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as * A = U**T* U, if UPLO = 'U', or * A = L * L**T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If equilibration was used, the matrix X is premultiplied by * diag(SR) so that it solves the original system before * equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix A is * supplied on entry, and if not, whether the matrix A should be * equilibrated before it is factored. * = 'F': On entry, AF contains the factored form of A. * If EQUED = 'Y', the matrix A has been equilibrated * with scaling factors given by S. A and AF will not * be modified. * = 'N': The matrix A will be copied to AF and factored. * = 'E': The matrix A will be equilibrated if necessary, then * copied to AF and factored. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A is stored; * = 'L': Lower triangle of A is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices B and X. NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * the local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). * On entry, the Hermitian matrix A, except if FACT = 'F' and * EQUED = 'Y', then A must contain the equilibrated matrix * diag(SR)*A*diag(SC). If UPLO = 'U', the leading * N-by-N upper triangular part of A contains the upper * triangular part of the matrix A, and the strictly lower * triangular part of A is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper * triangular part of A is not referenced. A is not modified if * FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. * * On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by * diag(SR)*A*diag(SC). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) COMPLEX*16 pointer * into the local memory to an array of local dimension * ( LLD_AF, LOCc(JA+N-1)). * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(SR)*A*diag(SC). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * EQUED (global input/global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'Y': Equilibration was done, i.e., A has been replaced by * diag(SR) * A * diag(SC). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * SR (local input/local output) COMPLEX*16 array, * dimension (LLD_A) * The scale factors for A distributed across process rows; * not accessed if EQUED = 'N'. SR is an input variable if * FACT = 'F'; otherwise, SR is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SR must be * positive. * * SC (local input/local output) COMPLEX*16 array, * dimension (LOC(N_A)) * The scale factors for A distributed across * process columns; not accessed if EQUED = 'N'. SC is an input * variable if FACT = 'F'; otherwise, SC is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SC must be * positive. * * B (local input/local output) COMPLEX*16 pointer into * the local memory to an array of local dimension * ( LLD_B, LOCc(JB+NRHS-1) ). * On entry, the N-by-NRHS right-hand side matrix B. * On exit, if EQUED = 'N', B is not modified; if TRANS = 'N' * and EQUED = 'R' or 'B', B is overwritten by diag(R)*B; if * TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is overwritten * by diag(C)*B. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) COMPLEX*16 pointer into * the local memory to an array of local dimension * ( LLD_X, LOCc(JX+NRHS-1) ). * If INFO = 0, the N-by-NRHS solution matrix X to the original * system of equations. Note that A and B are modified on exit * if EQUED .ne. 'N', and the solution to the equilibrated * system is inv(diag(SC))*X if TRANS = 'N' and EQUED = 'C' or * 'B', or inv(diag(SR))*X if TRANS = 'T' or 'C' and EQUED = 'R' * or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) DOUBLE PRECISION * The estimate of the reciprocal condition number of the matrix * A after equilibration (if done). If RCOND is less than the * machine precision (in particular, if RCOND = 0), the matrix * is singular to working precision. This condition is * indicated by a return code of INFO > 0, and the solution and * error bounds are not computed. * * FERR (local output) DOUBLE PRECISION array, dimension (LOC(N_B)) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution, FERR(j) bounds the magnitude * of the largest entry in (X(j) - XTRUE) divided by * the magnitude of the largest entry in X(j). The quality of * the error bound depends on the quality of the estimate of * norm(inv(A)) computed in the code; if the estimate of * norm(inv(A)) is accurate, the error bound is guaranteed. * * BERR (local output) DOUBLE PRECISION array, dimension (LOC(N_B)) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A or B that makes X(j) an exact solution). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PZPOCON( LWORK ), PZPORFS( LWORK ) ) * + LOCr( N_A ). * LWORK = 3*DESCA( LLD_ ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK = 2*LOCc(N_A). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: if INFO = i, the leading minor of order i of A * is not positive definite, so the factorization * could not be completed, and the solution and error * bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the matrix * is singular to working precision, and the solution * and error bounds have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL EQUIL, LQUERY, NOFACT, RCEQU INTEGER I, IACOL, IAROW, IAFROW, IBROW, IBCOL, ICOFF, $ ICOFFA, ICTXT, IDUMM, IIA, IIB, IIX, INFEQU, $ IROFF, IROFFA, IROFFAF, IROFFB, IROFFX, IXCOL, $ IXROW, J, JJA, JJB, JJX, LDB, LDX, LRWMIN, $ LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NRHSQ, $ NQ DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, $ DGAMN2D, DGAMX2D, INFOG2L, $ PXERBLA, PZPOCON, PZPOEQU, $ PZPORFS, PZPOTRF, PZPOTRS, $ PZLACPY, PZLAQSY * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PZLANHE EXTERNAL PDLAMCH, INDXG2P, LSAME, NUMROC, PZLANHE * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA LWMIN = 3*DESCA( LLD_ ) LRWMIN = MAX( 2*NQ, NP ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' RCEQU = .FALSE. ELSE RCEQU = LSAME( EQUED, 'Y' ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( RCEQU ) THEN * SMIN = BIGNUM SMAX = ZERO DO 10 J = IIA, IIA + NP - 1 SMIN = MIN( SMIN, SR( J ) ) SMAX = MAX( SMAX, SR( J ) ) 10 CONTINUE CALL DGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, SMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, SMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( SMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) ELSE SCOND = ONE END IF END IF END IF END IF * WORK( 1 ) = DBLE( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( UPLO ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 13 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 28 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 5, IDUM1, IDUM2, $ INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 28 IF( LRWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 4, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOSVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PZPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFEQU ) * IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PZLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) RCEQU = LSAME( EQUED, 'Y' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) LDB = DESCB( LLD_ ) IROFF = MOD( IB-1, DESCB( MB_ ) ) ICOFF = MOD( JB-1, DESCB( NB_ ) ) NP = NUMROC( N+IROFF, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 30 J = JJB, JJB+NRHSQ-1 DO 20 I = IIB, IIB+NP-1 B( I + ( J-1 )*LDB ) = SR( I )*B( I + ( J-1 )*LDB ) 20 CONTINUE 30 CONTINUE END IF * IF( NOFACT .OR. EQUIL ) THEN * * Compute the Cholesky factorization A = U'*U or A = L*L'. * CALL PZLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PZPOTRF( UPLO, N, AF, IAF, JAF, DESCAF, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * ANORM = PZLANHE( '1', UPLO, N, A, IA, JA, DESCA, RWORK ) * * Compute the reciprocal of the condition number of A. * CALL PZPOCON( UPLO, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PZLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PZPOTRS( UPLO, N, NRHS, AF, IAF, JAF, DESCAF, X, IX, JX, $ DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PZPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX( MB_ ) ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 50 J = JJX, JJX+NRHSQ-1 DO 40 I = IIX, IIX+NP-1 X( I + ( J-1 )*LDX ) = SR( I )*X( I + ( J-1 )*LDX ) 40 CONTINUE 50 CONTINUE DO 60 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / SCOND 60 CONTINUE END IF * WORK( 1 ) = DBLE( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) RETURN * * End of PZPOSVX * END scalapack-1.8.0/SRC/pcunm2r.f0000640000175000017500000004161010363532303015502 0ustar muammarmuammar SUBROUTINE PCUNM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNM2R overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PCGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PCGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 COMPLEX AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CGEBR2D, $ CGEBS2D, CGERV2D, CGESD2D, CHK1MAT, $ CSCAL, INFOG2L, PCELSET, PCELSET2, $ PCLARF, PCLARFC, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - CONJG( TAU( JJ ) ) END IF CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL CSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL CSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - CONJG( TAU( JJ ) ) END IF END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL CGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL CGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL CSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * DO 10 J = J1, J2, J3 IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H(j) or H(j)' * CALL PCELSET2( AJJ, A, IA+J-JA, J, DESCA, ONE ) IF( NOTRAN ) THEN CALL PCLARF( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, $ C, ICC, JCC, DESCC, WORK ) ELSE CALL PCLARFC( SIDE, MI, NI, A, IA+J-JA, J, DESCA, 1, TAU, $ C, ICC, JCC, DESCC, WORK ) END IF CALL PCELSET( A, IA+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNM2R * END scalapack-1.8.0/SRC/zlanv2.f0000640000175000017500000000674010363532303015335 0ustar muammarmuammar SUBROUTINE ZLANV2( A, B, C, D, RT1, RT2, CS, SN ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. DOUBLE PRECISION CS COMPLEX*16 A, B, C, D, RT1, RT2, SN * .. * * Purpose * ======= * * ZLANV2 computes the Schur factorization of a complex 2-by-2 * nonhermitian matrix in standard form: * * [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] * [ C D ] [ SN CS ] [ 0 DD ] [-SN CS ] * * Arguments * ========= * * A (input/output) COMPLEX*16 * B (input/output) COMPLEX*16 * C (input/output) COMPLEX*16 * D (input/output) COMPLEX*16 * On entry, the elements of the input matrix. * On exit, they are overwritten by the elements of the * standardised Schur form. * * RT1 (output) COMPLEX*16 * RT2 (output) COMPLEX*16 * The two eigenvalues. * * CS (output) DOUBLE PRECISION * SN (output) COMPLEX*16 * Parameters of the rotation matrix. * * Further Details * =============== * * Implemented by Mark R. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION RZERO, HALF, RONE PARAMETER ( RZERO = 0.0D+0, HALF = 0.5D+0, $ RONE = 1.0D+0 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. COMPLEX*16 AA, BB, DD, T, TEMP, TEMP2, U, X, Y * .. * .. External Functions .. COMPLEX*16 ZLADIV EXTERNAL ZLADIV * .. * .. External Subroutines .. EXTERNAL ZLARTG * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG, SQRT * .. * .. Executable Statements .. * * Initialize CS and SN * CS = RONE SN = ZERO * IF( C.EQ.ZERO ) THEN GO TO 10 * ELSE IF( B.EQ.ZERO ) THEN * * Swap rows and columns * CS = RZERO SN = ONE TEMP = D D = A A = TEMP B = -C C = ZERO GO TO 10 ELSE IF( ( A-D ).EQ.ZERO ) THEN TEMP = SQRT( B*C ) A = A + TEMP D = D - TEMP IF( ( B+C ).EQ.ZERO ) THEN CS = SQRT( HALF ) SN = DCMPLX( RZERO, RONE )*CS ELSE TEMP = SQRT( B+C ) TEMP2 = ZLADIV( SQRT( B ), TEMP ) CS = DBLE( TEMP2 ) SN = ZLADIV( SQRT( C ), TEMP ) END IF B = B - C C = ZERO GO TO 10 ELSE * * Compute eigenvalue closest to D * T = D U = B*C X = HALF*( A-T ) Y = SQRT( X*X+U ) IF( DBLE( X )*DBLE( Y )+DIMAG( X )*DIMAG( Y ).LT.RZERO ) $ Y = -Y T = T - ZLADIV( U, ( X+Y ) ) * * Do one QR step with exact shift T - resulting 2 x 2 in * triangular form. * CALL ZLARTG( A-T, C, CS, SN, AA ) * D = D - T BB = CS*B + SN*D DD = -DCONJG( SN )*B + CS*D * A = AA*CS + BB*DCONJG( SN ) + T B = -AA*SN + BB*CS C = ZERO D = T * END IF * 10 CONTINUE * * Store eigenvalues in RT1 and RT2. * RT1 = A RT2 = D RETURN * * End of ZLANV2 * END scalapack-1.8.0/SRC/pzdbtrsv.f0000640000175000017500000014521610363532303016001 0ustar muammarmuammar SUBROUTINE PZDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Gaussian elimination code PZ@(dom_pre)BTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PZDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDBTRF and this is stored in AF. If a linear system * is to be solved using PZDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MAX_BW, MBW2, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 18, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZGEMM, $ ZGERV2D, ZGESD2D, ZLACPY, ZMATADD, ZTBTRS, $ ZTRMM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 12*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 12*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 12*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 12*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) MBW2 = MAX_BW * MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -4 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -5 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 9*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 12*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 12*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -6 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -8 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 9*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PZDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ MAX(BWL,BWU)*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PZDBTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 18, 1 ) = DESCB(5) PARAM_CHECK( 17, 1 ) = DESCB(4) PARAM_CHECK( 16, 1 ) = DESCB(3) PARAM_CHECK( 15, 1 ) = DESCB(2) PARAM_CHECK( 14, 1 ) = DESCB(1) PARAM_CHECK( 13, 1 ) = IB PARAM_CHECK( 12, 1 ) = DESCA(5) PARAM_CHECK( 11, 1 ) = DESCA(4) PARAM_CHECK( 10, 1 ) = DESCA(3) PARAM_CHECK( 9, 1 ) = DESCA(1) PARAM_CHECK( 8, 1 ) = JA PARAM_CHECK( 7, 1 ) = NRHS PARAM_CHECK( 6, 1 ) = BWU PARAM_CHECK( 5, 1 ) = BWL PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 18, 2 ) = 1205 PARAM_CHECK( 17, 2 ) = 1204 PARAM_CHECK( 16, 2 ) = 1203 PARAM_CHECK( 15, 2 ) = 1202 PARAM_CHECK( 14, 2 ) = 1201 PARAM_CHECK( 13, 2 ) = 11 PARAM_CHECK( 12, 2 ) = 905 PARAM_CHECK( 11, 2 ) = 904 PARAM_CHECK( 10, 2 ) = 903 PARAM_CHECK( 9, 2 ) = 901 PARAM_CHECK( 8, 2 ) = 8 PARAM_CHECK( 7, 2 ) = 6 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 18, PARAM_CHECK, 18, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'N', 'U', ODD_SIZE, $ BWL, NRHS, $ A( OFST+1+BWU ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BWL, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWL+1), LLDB, $ WORK( 1 ), MAX_BW ) * CALL ZTRMM( 'L', 'U', 'N', 'N', BWL, NRHS, -CONE, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, WORK( 1 ), MAX_BW ) * CALL ZMATADD( BWL, NRHS, CONE, WORK( 1 ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * Clear garbage out of workspace block * DO 10 IDUM1=1, WORK_SIZE_MIN WORK( IDUM1 )=0.0 10 CONTINUE * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', BWU, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+MAX_BW-BWU ), MAX_BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTBTRS( 'L', 'C', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, BWU, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+MAX_BW-BWU ), MAX_BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BWL, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL ZTRMM( 'L', 'U', 'C', 'N', BWL, NRHS, -CONE, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL ZMATADD( BWL, NRHS, CONE, WORK( 1+MAX_BW-BWL ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE-BWL+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'C', 'U', ODD_SIZE, $ BWL, NRHS, $ A( OFST+1+BWU ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BWU, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BWU, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWU+1), LLDB, $ WORK( 1 ), MAX_BW ) * CALL ZTRMM( 'L', 'L', 'C', 'N', BWU, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, WORK( 1 ), $ MAX_BW ) * CALL ZMATADD( BWU, NRHS, CONE, WORK( 1 ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * Clear garbage out of workspace block * DO 20 IDUM1=1, WORK_SIZE_MIN WORK( IDUM1 )=0.0 20 CONTINUE * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', BWL, NRHS, ODD_SIZE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, CZERO, WORK( 1+MAX_BW-BWL ), MAX_BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTBTRS( 'U', 'C', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 ) ), $ MAX_BW+1, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTBTRS( 'U', 'N', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 ) ), $ MAX_BW+1, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, BWL, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, WORK( 1+MAX_BW-BWL ), $ MAX_BW, CONE, B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL ZLACPY( 'N', BWU, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL ZTRMM( 'L', 'L', 'N', 'N', BWU, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL ZMATADD( BWU, NRHS, CONE, WORK( 1+MAX_BW-BWU ), $ MAX_BW+BWL, CONE, $ B( PART_OFFSET+ODD_SIZE-BWU+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BWU, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZDBTRSV * END scalapack-1.8.0/SRC/pclarf.f0000640000175000017500000007013610363532303015370 0ustar muammarmuammar SUBROUTINE PCLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARF applies a complex elementary reflector Q to a complex M-by-N * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the * left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST COMPLEX TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CGEBR2D, CGEBS2D, $ CGEMV, CGERC, CGERV2D, CGESD2D, $ CGSUM2D, CLASET, INFOG2L, PB_TOPGET, $ PBCTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL CCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL CCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MP+1 CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFV.GT.0 .AND. IOFFC.GT.0 ) $ CALL CGERC( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL CCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 IF( IOFFV.GT.0 ) $ CALL CCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQ+1 CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFC.GT.0 ) $ CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PCLARF * END scalapack-1.8.0/SRC/pslasmsub.f0000640000175000017500000003116710602576752016147 0ustar muammarmuammar SUBROUTINE PSLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER I, K, L, LWORK REAL SMLNUM * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), BUF( * ) * .. * * Purpose * ======= * * PSLASMSUB looks for a small subdiagonal element from the bottom * of the matrix that it can safely set to zero. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) REAL array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * K (global output) INTEGER * On exit, this yields the bottom portion of the unreduced * submatrix. This will satisfy: L <= M <= I-1. * * SMLNUM (global input) REAL * On entry, a "small number" for the given matrix. * Unchanged on exit. * * BUF (local output) REAL array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * This code is basically a parallelization of the following snip * of LAPACK code from SLAHQR: * * Look for a single small subdiagonal element. * * DO 20 K = I, L + 1, -1 * TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) * IF( TST1.EQ.ZERO ) * $ TST1 = SLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) * IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) * $ GO TO 30 * 20 CONTINUE * 30 CONTINUE * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2, $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC, $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA, $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, $ RIGHT, UP REAL H10, H11, H22, TST1, ULP * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH EXTERNAL ILCM, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGERV2D, SGESD2D, IGAMX2D, $ INFOG1L, INFOG2L * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS * ISTR1 = 0 ISTR2 = ( ( I-L ) / HBL ) IF( ISTR2*HBL.LT.( I-L ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.2*ISTR2 ) THEN * * Error! * RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY * UP THE TRIDIAGONAL * IBUF1 = 0 IBUF2 = 0 IRCV1 = 0 IRCV2 = 0 DO 10 K = I, L + 1, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) ) THEN * * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * ADD UP THE RECEIVES * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT. $ 1 ) ) ) THEN * * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN * * WE MUST RECEIVE H(K ,K-1) FROM LEFT * IRCV2 = IRCV2 + 1 END IF END IF * * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND * IF( IBUF1.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW, $ RIGHT ) END IF * * RECEIVE APPROPRIATE DATA IF THERE IS ANY * IF( IRCV1.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW, $ LEFT ) END IF * * START MAIN LOOP * IBUF1 = 0 IBUF2 = 0 CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-1+HBL, HBL ) * * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT. * * Start loop for subdiagonal search * DO 40 K = I, L + 1, -1 IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN * * Grab information from WORK array * IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H11 = BUF( ISTR1+IBUF1 ) ELSE H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF2 = IBUF2 + 1 H10 = BUF( ISTR2+IBUF2 ) ELSE H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF ELSE * * Information is local * H11 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF H22 = A( ( ICOL1-1 )*LDA+IROW1 ) TST1 = ABS( H11 ) + ABS( H22 ) IF( TST1.EQ.ZERO ) THEN * * FIND SOME NORM OF THE LOCAL H(L:I,L:I) * CALL INFOG1L( L, HBL, NPROW, MYROW, 0, ITMP1, III ) IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW ) CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ITMP2, III ) ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL ) DO 30 III = ITMP1, IROW2 DO 20 JJJ = ITMP2, ICOL2 TST1 = TST1 + ABS( A( ( JJJ-1 )*LDA+III ) ) 20 CONTINUE 30 CONTINUE END IF IF( ABS( H10 ).LE.MAX( ULP*TST1, SMLNUM ) ) $ GO TO 50 IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN II = MOD( II+NPROW-1, NPROW ) JJ = MOD( JJ+NPCOL-1, NPCOL ) CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) END IF 40 CONTINUE 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) RETURN * * End of PSLASMSUB * END scalapack-1.8.0/SRC/pzgeqrf.f0000640000175000017500000002756410363532303015606 0ustar muammarmuammar SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQRF computes a QR factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1), and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IINFO, IPW, J, $ JB, JN, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZGEQR2, PZLARFB, $ PZLARFT * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JB = JN - JA + 1 * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) * CALL PZGEQR2( M, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( JA+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ja) H(ja+1) . . . H(jn) * CALL PZLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PZLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', M, N-JB, JB, A, IA, JA, DESCA, $ WORK, A, IA, JA+JB, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of columns * DO 10 J = JN+1, JA+K-1, DESCA( NB_ ) JB = MIN( K-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) * CALL PZGEQR2( M-J+JA, JB, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PZLARFT( 'Forward', 'Columnwise', M-J+JA, JB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PZLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', M-J+JA, N-J-JB+JA, JB, A, I, J, $ DESCA, WORK, A, I, J+JB, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEQRF * END scalapack-1.8.0/SRC/pzpttrsv.f0000640000175000017500000014021110363532303016025 0ustar muammarmuammar SUBROUTINE PZPTTRSV( UPLO, TRANS, N, NRHS, D, E, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 AF( * ), B( * ), E( * ), WORK( * ) DOUBLE PRECISION D( * ) * .. * * * Purpose * ======= * * PZPTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Cholesky factorization code PZPTTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PZPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPTTRF and this is stored in AF. If a linear system * is to be solved using PZPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZGEMM, $ ZGERV2D, ZGESD2D, ZLACPY, ZMATADD, ZTBTRS, $ ZTRMM, ZTRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -4 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PZPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PZPTTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1105 PARAM_CHECK( 15, 2 ) = 1104 PARAM_CHECK( 14, 2 ) = 1103 PARAM_CHECK( 13, 2 ) = 1102 PARAM_CHECK( 12, 2 ) = 1101 PARAM_CHECK( 11, 2 ) = 10 PARAM_CHECK( 10, 2 ) = 805 PARAM_CHECK( 9, 2 ) = 804 PARAM_CHECK( 8, 2 ) = 803 PARAM_CHECK( 7, 2 ) = 801 PARAM_CHECK( 6, 2 ) = 7 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZPTTRSV( UPLO, 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'C', 'N', 1, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTRTRS( 'L', 'C', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, 1, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-1 ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -DCONJG( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZPTTRSV( UPLO, 'C', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL ZPTTRSV( UPLO, 'C', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -DCONJG( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL ZGEMM( 'T', 'N', 1, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * CALL ZMATADD( INT_ONE, NRHS, CONE, $ WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL ZTRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL ZGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( ODD_SIZE*1+2+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), $ INT_ONE, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE, $ AF( (ODD_SIZE)*1+1 ), $ INT_ONE, $ WORK( 1 ), $ INT_ONE, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL ZTRTRS( 'L', 'C', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL ZGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL ZGERV2D( ICTXT, INT_ONE, NRHS, $ WORK( 1 ), INT_ONE, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL ZGEMM( 'C', 'N', ODD_SIZE, NRHS, 1, -CONE, AF( 1 ), $ INT_ONE, WORK( 1 ), INT_ONE, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL ZAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL ZPTTRSV( UPLO, 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZPTTRSV * END scalapack-1.8.0/SRC/psgbtrf.f0000640000175000017500000011021610363532303015562 0ustar muammarmuammar SUBROUTINE PSGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ), AF( * ), WORK( * ) * .. * * Purpose * ======= * * PSGBTRF computes a LU factorization * of an N-by-N real banded * distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PSGBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) Q = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P and Q are permutation matrices. * The matrix Q represents reordering of columns * for parallelism's sake, while P represents * reordering of rows for numerical stability using * classic partial pivoting. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSGBTRF and this is stored in AF. If a linear system * is to be solved using PSGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 1 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian National University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW, $ CSRC, DBPTR, FIRST_PROC, I, I1, I2, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, J, JA_NEW, JPTR, $ L, LAF_MIN, LBWL, LBWU, LDB, LDBB, LLDA, LM, $ LMJ, LN, LNJ, LPTR, MYCOL, MYROW, MY_NUM_COLS, $ NB, NEICOL, NP, NPACT, NPCOL, NPROW, NPSTR, $ NP_SAVE, NRHS, ODD_N, ODD_SIZE, ODPTR, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ SGBTRF, SGEMM, SGER, SGERV2D, SGESD2D, SGETRF, $ SLACPY, SLASWP, SLATCPY, SSWAP, STRRV2D, $ STRSD2D, STRSM, GLOBCHK, IGAMX2D, IGEBR2D, $ IGEBS2D, PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -11 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -2 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( 2*BWL+2*BWU+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * BW = BWU + BWL * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PSGBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PSGBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( NB+BWU )*( BWL+BWU ) + 6*( BWL+BWU )*( BWL+2*BWU ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -9 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PSGBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 1 * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -11 * put minimum value of work into work( 1 ) WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, 'PSGBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 11 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSGBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 210 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * DO 30 J = 1, ODD_SIZE DO 20 I = 1, BW A( I+( J-1 )*LLDA ) = ZERO 20 CONTINUE 30 CONTINUE * * Begin main code * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Transfer triangle B_i of local matrix to next processor * for fillin. Overlap the send with the factorization of A_i. * IF( MYCOL.LE.NPCOL-2 ) THEN * * The last processor does not need to send anything. * BIPTR = location of triangle B_i in memory BIPTR = ( NB-BW )*LLDA + 2*BW + 1 * CALL STRSD2D( ICTXT, 'U', 'N', $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0, $ NPCOL ) ), BW, A( BIPTR ), LLDA-1, 0, MYCOL+1 ) * END IF * * Factor main partition P_i A_i = L_i U_i on each processor * * LBWL, LBWU: lower and upper bandwidth of local solver * Note that for MYCOL > 0 one has lower triangular blocks! * LM is the number of rows which is usually NB except for * MYCOL = 0 where it is BWU less and MYCOL=NPCOL-1 where it * is NR+BWU where NR is the number of columns on the last processor * Finally APTR is the pointer to the first element of A. As LAPACK * has a slightly different matrix format than Scalapack the pointer * has to be adjusted on processor MYCOL=0. * IF( MYCOL.NE.0 ) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1 + BWU END IF * IF( MYCOL.NE.NPCOL-1 ) THEN LM = NB - LBWU LN = NB - BW ELSE IF( MYCOL.NE.0 ) THEN LM = ODD_SIZE + BWU LN = MAX( ODD_SIZE-BW, 0 ) ELSE LM = N LN = MAX( N-BW, 0 ) END IF * IF( LN.GT.0 ) THEN * CALL SGBTRF( LM, LN, LBWL, LBWU, A( APTR ), LLDA, IPIV, INFO ) * IF( INFO.NE.0 ) THEN INFO = INFO + NB*MYCOL GO TO 80 END IF * NRHS = BW LDB = LLDA - 1 * * Update the last BW columns of A_i (code modified from DGBTRS) * * Only the eliminations of unknowns > LN-BW have an effect on * the last BW columns. Loop over them... * DO 40 J = MAX( LN-BW+1, 1 ), LN * LMJ = MIN( LBWL, LM-J ) LNJ = MIN( BW, J+BW-LN+APTR-1 ) * L = IPIV( J ) * JPTR = J - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA * IF( L.NE.J ) THEN * * Element (L,LN+1) is swapped with element (J,LN+1) etc * Furthermore, the elements in the same row are LDB=LLDA-1 apart * The complicated formulas are to cope with the banded * data format: * LPTR = L - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA * CALL SSWAP( LNJ, A( LPTR ), LDB, A( JPTR ), LDB ) * END IF * * LPTR is the pointer to the beginning of the * coefficients of L * LPTR = BW + 1 + APTR + ( J-1 )*LLDA * CALL SGER( LMJ, LNJ, -ONE, A( LPTR ), 1, A( JPTR ), LDB, $ A( JPTR+1 ), LDB ) 40 CONTINUE * END IF * * Compute spike fill-in, L_i F_i = P_i B_{i-1} * * Receive triangle B_{i-1} from previous processor * IF( MYCOL.GT.0 ) THEN CALL STRRV2D( ICTXT, 'U', 'N', MIN( BW, LM ), BW, AF( 1 ), BW, $ 0, MYCOL-1 ) * * Transpose transmitted upper triangular (trapezoidal) matrix * DO 60 I2 = 1, MIN( BW, LM ) DO 50 I1 = I2 + 1, BW AF( I1+( I2-1 )*BW ) = AF( I2+( I1-1 )*BW ) AF( I2+( I1-1 )*BW ) = ZERO 50 CONTINUE 60 CONTINUE * * Permutation and forward elimination (triang. solve) * DO 70 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L.NE.J ) THEN CALL SSWAP( BW, AF( ( L-1 )*BW+1 ), 1, $ AF( ( J-1 )*BW+1 ), 1 ) END IF * LPTR = BW + 1 + APTR + ( J-1 )*LLDA * CALL SGER( NRHS, LMJ, -ONE, AF( ( J-1 )*BW+1 ), 1, $ A( LPTR ), 1, AF( J*BW+1 ), BW ) * 70 CONTINUE * END IF * 80 CONTINUE * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU BN = BW ELSE BM = MIN( BW, ODD_SIZE ) + BWU BN = MIN( BW, ODD_SIZE ) END IF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = ( NB+BWU )*BW + 1 LDBB = 2*BW + BWU * * Copy from A and AF into block bidiagonal matrix (tail of AF) * * DBPTR = Pointer to diagonal blocks in A DBPTR = BW + 1 + LBWU + LN*LLDA * CALL SLACPY( 'G', BM, BN, A( DBPTR ), LLDA-1, AF( BBPTR+BW*LDBB ), $ LDBB ) * * Zero out any junk entries that were copied * DO 100 J = 1, BM DO 90 I = J + LBWL, BM - 1 AF( BBPTR+BW*LDBB+( J-1 )*LDBB+I ) = ZERO 90 CONTINUE 100 CONTINUE * IF( MYCOL.NE.0 ) THEN * * ODPTR = Pointer to offdiagonal blocks in A * ODPTR = ( LM-BM )*BW + 1 CALL SLATCPY( 'G', BW, BM, AF( ODPTR ), BW, $ AF( BBPTR+2*BW*LDBB ), LDBB ) END IF * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. CALL SGETRF( N-LN, N-LN, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) * END IF * * Loop over levels ... only occurs if npcol > 1 * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) are used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels * 110 CONTINUE IF( NPACT.LE.1 ) $ GO TO 190 * * Test if processor is active * IF( MOD( MYCOL, NPSTR ).EQ.0 ) THEN * * Send/Receive blocks * * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * * This node will potentially do more work later * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW ELSE IF( NEICOL / NPSTR.EQ.NPACT-1 ) THEN ODD_N = NUMROC( N, NB, NPCOL-1, 0, NPCOL ) BMN = MIN( BW, ODD_N ) + BWU ELSE * * Last processor skips to next level GO TO 180 END IF * * BM1 = M for 1st block on proc pair, BM2 2nd block * BM1 = BM BM2 = BMN * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * CALL SGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ 0, NEICOL ) * CALL SGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BM ), LDBB, 0, $ NEICOL ) * IF( NPACT.EQ.2 ) THEN * * Copy diagonal block to align whole system * CALL SLACPY( 'G', BMN, BW, AF( BBPTR+BM ), LDBB, $ AF( BBPTR+2*BW*LDBB+BM ), LDBB ) END IF * END IF * ELSE * * This node stops work after this stage -- an extra copy * is required to make the odd and even frontal matrices * look identical * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * BM1 = BMN BM2 = BM * CALL SGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, 0, $ NEICOL ) * CALL SLACPY( 'G', BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ AF( BBPTR+BMN ), LDBB ) * DO 130 J = BBPTR + 2*BW*LDBB, BBPTR + 3*BW*LDBB - 1, LDBB DO 120 I = 0, LDBB - 1 AF( I+J ) = ZERO 120 CONTINUE 130 CONTINUE * CALL SGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ 0, NEICOL ) * IF( NPACT.EQ.2 ) THEN * * Copy diagonal block to align whole system * CALL SLACPY( 'G', BM, BW, AF( BBPTR+BMN ), LDBB, $ AF( BBPTR+2*BW*LDBB+BMN ), LDBB ) END IF * END IF * * LU factorization with partial pivoting * IF( NPACT.NE.2 ) THEN * CALL SGETRF( BM+BMN, BW, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) * * Backsolve left side * DO 150 J = BBPTR, BBPTR + BW*LDBB - 1, LDBB DO 140 I = 0, BM1 - 1 AF( I+J ) = ZERO 140 CONTINUE 150 CONTINUE * CALL SLASWP( BW, AF( BBPTR ), LDBB, 1, BW, IPIV( LN+1 ), 1 ) * CALL STRSM( 'L', 'L', 'N', 'U', BW, BW, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, AF( BBPTR ), LDBB ) * * Use partial factors to update remainder * CALL SGEMM( 'N', 'N', BM+BMN-BW, BW, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, AF( BBPTR ), LDBB, $ ONE, AF( BBPTR+BW ), LDBB ) * * Backsolve right side * NRHS = BW * CALL SLASWP( NRHS, AF( BBPTR+2*BW*LDBB ), LDBB, 1, BW, $ IPIV( LN+1 ), 1 ) * CALL STRSM( 'L', 'L', 'N', 'U', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB ) * * Use partial factors to update remainder * CALL SGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB, ONE, $ AF( BBPTR+2*BW*LDBB+BW ), LDBB ) * * * Test if processor is active in next round * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * * Reset BM * BM = BM1 + BM2 - BW * * Local copying in the block bidiagonal area * * CALL SLACPY( 'G', BM, BW, AF( BBPTR+BW ), LDBB, $ AF( BBPTR+BW*LDBB ), LDBB ) CALL SLACPY( 'G', BM, BW, AF( BBPTR+2*BW*LDBB+BW ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB ) * * Zero out space that held original copy * DO 170 J = 0, BW - 1 DO 160 I = 0, BM - 1 AF( BBPTR+2*BW*LDBB+BW+J*LDBB+I ) = ZERO 160 CONTINUE 170 CONTINUE * END IF * ELSE * * Factor the final 2 by 2 block matrix * CALL SGETRF( BM+BMN, BM+BMN, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) END IF * END IF * * Last processor in an odd-sized NPACT skips to here * 180 CONTINUE * NPACT = ( NPACT+1 ) / 2 NPSTR = NPSTR*2 GO TO 110 * 190 CONTINUE * End loop over levels * 200 CONTINUE * If error was found in Phase 1, processors jump here. * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 210 CONTINUE * If this processor did not hold part of the grid it * jumps here. * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PSGBTRF * END scalapack-1.8.0/SRC/pzlarfc.f0000640000175000017500000007010410363532303015555 0ustar muammarmuammar SUBROUTINE PZLARFC( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARFC applies a complex elementary reflector Q**H to a * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), * from either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q**H * sub( C ), * = 'R': form sub( C ) * Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or * sub( C ) * Q**H if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST COMPLEX*16 TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBZTRNV, $ ZCOPY, ZGEBR2D, ZGEBS2D, ZGEMV, $ ZGERC, ZGERV2D, ZGESD2D, ZGSUM2D, $ ZLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = DCONJG( TAU( JJV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL ZCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = DCONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), 1, $ C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL ZCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * IPW = MP+1 CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = DCONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = DCONJG( TAU( IIV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL ZCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = DCONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL ZCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = DCONJG( TAU( IIV ) ) * ELSE * IPW = NQ+1 CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = DCONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = DCONJG( TAU( JJV ) ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) TAULOC = DCONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL ZGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PZLARFC * END scalapack-1.8.0/SRC/psgbsv.f0000640000175000017500000004535710363532303015434 0ustar muammarmuammar SUBROUTINE PSGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) REAL A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSGBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * banded distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination with pivoting * is used to factor a reordering * of the matrix into P L U. * * See PSGBTRF and PSGBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * +max(NRHS*(NB+2*bwl+4*bwu), 1) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PSGBTRF, PSGBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PSGBTRF and PSGBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PSGBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Factor the matrix * CALL PSGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PSGBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PSGBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGBSV', -INFO ) RETURN END IF * RETURN * * End of PSGBSV * END scalapack-1.8.0/SRC/pdlaed0.f0000640000175000017500000001756210363532303015436 0ustar muammarmuammar SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER INFO, IQ, JQ, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) DOUBLE PRECISION D( * ), E( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAED0 computes all eigenvalues and corresponding eigenvectors of a * symmetric tridiagonal matrix using the divide and conquer method. * * * Arguments * ========= * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input/output) DOUBLE PRECISION array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in descending order. * * E (global input/output) DOUBLE PRECISION array, dimension (N-1) * On entry, the subdiagonal elements of the tridiagonal matrix. * On exit, E has been destroyed. * * Q (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * On output, Q is distributed across the P processes in block * cyclic format. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * * WORK (local workspace ) DOUBLE PRECISION array, dimension (LWORK) * LWORK = 6*N + 2*NP*NQ, with * NP = NUMROC( N, MB_Q, MYROW, IQROW, NPROW ) * NQ = NUMROC( N, NB_Q, MYCOL, IQCOL, NPCOL ) * IQROW = INDXG2P( IQ, NB_Q, MYROW, RSRC_Q, NPROW ) * IQCOL = INDXG2P( JQ, MB_Q, MYCOL, CSRC_Q, NPCOL ) * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * LIWORK = 2 + 7*N + 8*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ID, IDCOL, IDROW, IID, IINFO, IIQ, IM1, IM2, $ IPQ, IQCOL, IQROW, J, JJD, JJQ, LDQ, MATSIZ, $ MYCOL, MYROW, N1, NB, NBL, NBL1, NPCOL, NPROW, $ SUBPBS, TSUBPBS * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D, $ DGESD2D, DSTEQR, INFOG2L, PDLAED1, PXERBLA * .. * .. External Functions .. * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 IF( DESCQ( NB_ ).GT.N .OR. N.LT.2 ) $ INFO = -1 IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED0', -INFO ) RETURN END IF * NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) CALL INFOG2L( IQ, JQ, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) * * Determine the size and placement of the submatrices, and save in * the leading elements of IWORK. * TSUBPBS = ( N-1 ) / NB + 1 IWORK( 1 ) = TSUBPBS SUBPBS = 1 10 CONTINUE IF( IWORK( SUBPBS ).GT.1 ) THEN DO 20 J = SUBPBS, 1, -1 IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 IWORK( 2*J-1 ) = IWORK( J ) / 2 20 CONTINUE SUBPBS = 2*SUBPBS GO TO 10 END IF DO 30 J = 2, SUBPBS IWORK( J ) = IWORK( J ) + IWORK( J-1 ) 30 CONTINUE * * Divide the matrix into TSUBPBS submatrices of size at most NB * using rank-1 modifications (cuts). * DO 40 I = NB + 1, N, NB IM1 = I - 1 D( IM1 ) = D( IM1 ) - ABS( E( IM1 ) ) D( I ) = D( I ) - ABS( E( IM1 ) ) 40 CONTINUE * * Solve each submatrix eigenproblem at the bottom of the divide and * conquer tree. D is the same on each process. * DO 50 ID = 1, N, NB CALL INFOG2L( IQ-1+ID, JQ-1+ID, DESCQ, NPROW, NPCOL, MYROW, $ MYCOL, IID, JJD, IDROW, IDCOL ) MATSIZ = MIN( NB, N-ID+1 ) IF( MYROW.EQ.IDROW .AND. MYCOL.EQ.IDCOL ) THEN IPQ = IID + ( JJD-1 )*LDQ CALL DSTEQR( 'I', MATSIZ, D( ID ), E( ID ), Q( IPQ ), LDQ, $ WORK, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'DSTEQR', -INFO ) RETURN END IF IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) THEN CALL DGESD2D( DESCQ( CTXT_ ), MATSIZ, 1, D( ID ), MATSIZ, $ IQROW, IQCOL ) END IF ELSE IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL DGERV2D( DESCQ( CTXT_ ), MATSIZ, 1, D( ID ), MATSIZ, $ IDROW, IDCOL ) END IF 50 CONTINUE * IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) THEN CALL DGEBS2D( DESCQ( CTXT_ ), 'A', ' ', N, 1, D, N ) ELSE CALL DGEBR2D( DESCQ( CTXT_ ), 'A', ' ', N, 1, D, N, IQROW, $ IQCOL ) END IF * * Successively merge eigensystems of adjacent submatrices * into eigensystem for the corresponding larger matrix. * * while ( SUBPBS > 1 ) * 60 CONTINUE IF( SUBPBS.GT.1 ) THEN IM2 = SUBPBS - 2 DO 80 I = 0, IM2, 2 IF( I.EQ.0 ) THEN NBL = IWORK( 2 ) NBL1 = IWORK( 1 ) IF( NBL1.EQ.0 ) $ GO TO 70 ID = 1 MATSIZ = MIN( N, NBL*NB ) N1 = NBL1*NB ELSE NBL = IWORK( I+2 ) - IWORK( I ) NBL1 = NBL / 2 IF( NBL1.EQ.0 ) $ GO TO 70 ID = IWORK( I )*NB + 1 MATSIZ = MIN( NB*NBL, N-ID+1 ) N1 = NBL1*NB END IF * * Merge lower order eigensystems (of size N1 and MATSIZ - N1) * into an eigensystem of size MATSIZ. * CALL PDLAED1( MATSIZ, N1, D( ID ), ID, Q, IQ, JQ, DESCQ, $ E( ID+N1-1 ), WORK, IWORK( SUBPBS+1 ), IINFO ) IF( IINFO.NE.0 ) THEN INFO = IINFO*( N+1 ) + ID END IF * 70 CONTINUE IWORK( I / 2+1 ) = IWORK( I+2 ) 80 CONTINUE SUBPBS = SUBPBS / 2 * GO TO 60 END IF * * end while * 90 CONTINUE RETURN * * End of PDLAED0 * END scalapack-1.8.0/SRC/pzpotrs.f0000640000175000017500000002402510363532303015636 0ustar muammarmuammar SUBROUTINE PZPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZPOTRS solves a system of linear equations * * sub( A ) * X = sub( B ) * A(IA:IA+N-1,JA:JA+N-1)*X = B(IB:IB+N-1,JB:JB+NRHS-1) * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a N-by-N * hermitian positive definite distributed matrix using the Cholesky * factorization sub( A ) = U**H*U or L*L**H computed by PZPOTRF. * sub( B ) denotes the distributed matrix B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into local memory to * an array of dimension (LLD_A, LOCc(JA+N-1)). On entry, this * array contains the factors L or U from the Cholesky facto- * rization sub( A ) = L*L**H or U**H*U, as computed by PZPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * the local pieces of the right hand sides sub( B ). * On exit, this array contains the local pieces of the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICTXT, IROFFA, IROFFB, ICOFFA, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 11, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( UPPER ) THEN * * Solve sub( A ) * X = sub( B ) where sub( A ) = U'*U. * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Upper', 'Conjugate transpose', $ 'Non-unit', N, NRHS, ONE, A, IA, JA, DESCA, B, IB, $ JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A ) *X = sub( B ) where sub( A ) = L*L'. * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PZTRSM( 'Left', 'Lower', 'Conjugate transpose', $ 'Non-unit', N, NRHS, ONE, A, IA, JA, DESCA, B, IB, $ JB, DESCB ) END IF * RETURN * * End of PZPOTRS * END scalapack-1.8.0/SRC/psdbtrs.f0000640000175000017500000006402210363532303015577 0ustar muammarmuammar SUBROUTINE PSDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PSDBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PSDBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Routine PSDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDBTRF and this is stored in AF. If a linear system * is to be solved using PSDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PSDBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -15 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -3 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( BWL+BWU+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PSDBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PSDBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( MAX( BWL, BWU )*NRHS ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, 'PSDBTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PSDBTRSV( 'L', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PSDBTRSV( 'U', 'T', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF * * Call backsolve routine * IF( ( LSAME( TRANS, 'C' ) ) .OR. ( LSAME( TRANS, 'T' ) ) ) THEN * CALL PSDBTRSV( 'L', 'T', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PSDBTRSV( 'U', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSDBTRS * END scalapack-1.8.0/SRC/pclatrd.f0000640000175000017500000004422210363532303015547 0ustar muammarmuammar SUBROUTINE PCLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCW( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAU( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PCLATRD reduces NB rows and columns of a complex Hermitian * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) to complex * tridiagonal form by an unitary similarity transformation * Q' * sub( A ) * Q, and returns the matrices V and W which are * needed to apply the transformation to the unreduced part of sub( A ). * * If UPLO = 'U', PCLATRD reduces the last NB rows and columns of a * matrix, of which the upper triangle is supplied; * if UPLO = 'L', PCLATRD reduces the first NB rows and columns of a * matrix, of which the lower triangle is supplied. * * This is an auxiliary routine called by PCHETRD. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NB (global input) INTEGER * The number of rows and columns to be reduced. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. * On exit, if UPLO = 'U', the last NB columns have been reduced * to tridiagonal form, with the diagonal elements overwriting * the diagonal elements of sub( A ); the elements above the * diagonal with the array TAU, represent the unitary matrix Q * as a product of elementary reflectors. If UPLO = 'L', the * first NB columns have been reduced to tridiagonal form, with * the diagonal elements overwriting the diagonal elements of * sub( A ); the elements below the diagonal with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors; See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * W (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_W,NB_W), This array contains * the local pieces of the N-by-NB_W matrix W required to * update the unreduced part of sub( A ). * * IW (global input) INTEGER * The row index in the global array W indicating the first * row of sub( W ). * * JW (global input) INTEGER * The column index in the global array W indicating the * first column of sub( W ). * * DESCW (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix W. * * WORK (local workspace) COMPLEX array, dimension (NB_A) * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n) H(n-1) . . . H(n-nb+1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the N-by-NB matrix V * which is needed, with W, to apply the transformation to the unreduced * part of the matrix, using a Hermitian rank-2k update of the form: * sub( A ) := sub( A ) - V*W' - W*V'. * * The contents of A on exit are illustrated by the following examples * with n = 5 and nb = 2: * * if UPLO = 'U': if UPLO = 'L': * * ( a a a v4 v5 ) ( d ) * ( a a v4 v5 ) ( 1 d ) * ( a 1 v5 ) ( v1 1 a ) * ( d 1 ) ( v1 v2 a a ) * ( d ) ( v1 v2 a a a ) * * where d denotes a diagonal element of the reduced matrix, a denotes * an element of the original matrix that is unchanged, and vi denotes * an element of the vector defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX HALF, ONE, ZERO PARAMETER ( HALF = ( 0.5E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, JP, JWK, K, $ KW, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX AII, ALPHA, BETA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ), DESCWK( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESCSET, INFOG2L, PCAXPY, $ PCDOTC, PCELGET, PCELSET, PCGEMV, $ PCHEMV, PCLACGV, PCLARFG, PCSCAL, $ PSELSET, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, REAL * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) AII = ZERO BETA = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * CALL INFOG2L( N+IA-NB, N+JA-NB, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, II, JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce last NB columns of upper triangle * DO 10 J = JA+N-1, JA+N-NB, -1 I = IA + J - JA K = J - JA + 1 KW = MOD( K-1, DESCA( MB_ ) ) + 1 * * Update A(IA:I,I) * CALL PCELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( AII ) ) ) CALL PCLACGV( N-K, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ) ) CALL PCGEMV( 'No transpose', K, N-K, -ONE, A, IA, J+1, $ DESCA, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ), $ ONE, A, IA, J, DESCA, 1 ) CALL PCLACGV( N-K, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ) ) CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCGEMV( 'No transpose', K, N-K, -ONE, W, IW, JW+KW, $ DESCW, A, I, J+1, DESCA, DESCA( M_ ), ONE, A, $ IA, J, DESCA, 1 ) CALL PCLACGV( N-K, A, I, J+1, DESCA, DESCA( M_ ) ) CALL PCELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( AII ) ) ) IF( N-K.GT.0 ) $ CALL PCELSET( A, I, J+1, DESCA, CMPLX( E( JP ) ) ) * * Generate elementary reflector H(i) to annihilate * A(IA:I-2,I) * JP = MIN( JJ+KW-1, NQ ) CALL PCLARFG( K-1, BETA, I-1, J, A, IA, J, DESCA, 1, $ TAU ) CALL PSELSET( E, 1, J, DESCE, REAL( BETA ) ) CALL PCELSET( A, I-1, J, DESCA, ONE ) * * Compute W(IW:IW+K-2,JW+KW-1) * CALL PCHEMV( 'Upper', K-1, ONE, A, IA, JA, DESCA, A, IA, J, $ DESCA, 1, ZERO, W, IW, JW+KW-1, DESCW, 1 ) * JWK = MOD( K-1, DESCWK( NB_ ) ) + 2 CALL PCGEMV( 'Conjugate transpose', K-1, N-K, ONE, W, IW, $ JW+KW, DESCW, A, IA, J, DESCA, 1, ZERO, WORK, $ 1, JWK, DESCWK, DESCWK( M_ ) ) CALL PCGEMV( 'No transpose', K-1, N-K, -ONE, A, IA, J+1, $ DESCA, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', K-1, N-K, ONE, A, IA, $ J+1, DESCA, A, IA, J, DESCA, 1, ZERO, WORK, 1, $ JWK, DESCWK, DESCWK( M_ ) ) CALL PCGEMV( 'No transpose', K-1, N-K, -ONE, W, IW, JW+KW, $ DESCW, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE, $ W, IW, JW+KW-1, DESCW, 1 ) CALL PCSCAL( K-1, TAU( JP ), W, IW, JW+KW-1, DESCW, 1 ) * CALL PCDOTC( K-1, ALPHA, W, IW, JW+KW-1, DESCW, 1, A, IA, J, $ DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PCAXPY( K-1, ALPHA, A, IA, J, DESCA, 1, W, IW, JW+KW-1, $ DESCW, 1 ) CALL PCELGET( 'E', ' ', BETA, A, I, J, DESCA ) CALL PSELSET( D, 1, J, DESCD, REAL( BETA ) ) * 10 CONTINUE * ELSE * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW, $ IACOL, ICTXT, 1 ) CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) * * Reduce first NB columns of lower triangle * DO 20 J = JA, JA+NB-1 I = IA + J - JA K = J - JA + 1 * * Update A(J:JA+N-1,J) * CALL PCELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( AII ) ) ) CALL PCLACGV( K-1, W, IW+K-1, JW, DESCW, DESCW( M_ ) ) CALL PCGEMV( 'No transpose', N-K+1, K-1, -ONE, A, I, JA, $ DESCA, W, IW+K-1, JW, DESCW, DESCW( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PCLACGV( K-1, W, IW+K-1, JW, DESCW, DESCW( M_ ) ) CALL PCLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCGEMV( 'No transpose', N-K+1, K-1, -ONE, W, IW+K-1, $ JW, DESCW, A, I, JA, DESCA, DESCA( M_ ), ONE, $ A, I, J, DESCA, 1 ) CALL PCLACGV( K-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCELGET( 'E', ' ', AII, A, I, J, DESCA ) CALL PCELSET( A, I, J, DESCA, CMPLX( REAL( AII ) ) ) IF( K.GT.1 ) $ CALL PCELSET( A, I, J-1, DESCA, CMPLX( E( JP ) ) ) * * * Generate elementary reflector H(i) to annihilate * A(I+2:IA+N-1,I) * JP = MIN( JJ+K-1, NQ ) CALL PCLARFG( N-K, BETA, I+1, J, A, I+2, J, DESCA, 1, $ TAU ) CALL PSELSET( E, 1, J, DESCE, REAL( BETA ) ) CALL PCELSET( A, I+1, J, DESCA, ONE ) * * Compute W(IW+K:IW+N-1,JW+K-1) * CALL PCHEMV( 'Lower', N-K, ONE, A, I+1, J+1, DESCA, A, I+1, $ J, DESCA, 1, ZERO, W, IW+K, JW+K-1, DESCW, 1 ) * CALL PCGEMV( 'Conjugate Transpose', N-K, K-1, ONE, W, IW+K, $ JW, DESCW, A, I+1, J, DESCA, 1, ZERO, WORK, 1, $ 1, DESCWK, DESCWK( M_ ) ) CALL PCGEMV( 'No transpose', N-K, K-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PCGEMV( 'Conjugate transpose', N-K, K-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, $ 1, DESCWK, DESCWK( M_ ) ) CALL PCGEMV( 'No transpose', N-K, K-1, -ONE, W, IW+K, JW, $ DESCW, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W, $ IW+K, JW+K-1, DESCW, 1 ) CALL PCSCAL( N-K, TAU( JP ), W, IW+K, JW+K-1, DESCW, 1 ) CALL PCDOTC( N-K, ALPHA, W, IW+K, JW+K-1, DESCW, 1, A, I+1, $ J, DESCA, 1 ) IF( MYCOL.EQ.IACOL ) $ ALPHA = -HALF*TAU( JP )*ALPHA CALL PCAXPY( N-K, ALPHA, A, I+1, J, DESCA, 1, W, IW+K, $ JW+K-1, DESCW, 1 ) CALL PCELGET( 'E', ' ', BETA, A, I, J, DESCA ) CALL PSELSET( D, 1, J, DESCD, REAL( BETA ) ) * 20 CONTINUE * END IF * * Broadcast columnwise the diagonal elements into D. * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1, $ IAROW, MYCOL ) END IF END IF * RETURN * * End of PCLATRD * END scalapack-1.8.0/SRC/psormr2.f0000640000175000017500000003444710363532303015532 0ustar muammarmuammar SUBROUTINE PSORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMR2 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IROFFC, LCM, LCMP, LWMIN, MI, $ MPC0, MYCOL, MYROW, NI, NPCOL, NPROW, NQ, NQC0 REAL AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PSELSET, $ PSELSET2, PSLARF, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic:ic+m-k+i-ia,jc:jc+n-1) * MI = M - K + I - IA + 1 ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc:jc+n-k+i-ia+1) * NI = N - K + I - IA + 1 END IF * * Apply H(i) or H(i)' * CALL PSELSET2( AII, A, I, JA+NQ-K+I-IA, DESCA, ONE ) CALL PSLARF( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) CALL PSELSET( A, I, JA+NQ-K+I-IA, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMR2 * END scalapack-1.8.0/SRC/pslared1d.f0000640000175000017500000001502710367447133016010 0ustar muammarmuammar SUBROUTINE PSLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 12, 2005 * * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESC( * ) REAL BYALL( * ), BYCOL( * ), WORK( LWORK ) * .. * * Purpose * ======= * * PSLARED1D redistributes a 1D array * * It assumes that the input array, BYCOL, is distributed across * rows and that all process columns contain the same copy of * BYCOL. The output array, BYALL, will be identical on all processes * and will contain the entire array. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NP = Number of local rows in BYCOL() * * N (global input) INTEGER * The number of elements to be redistributed. N >= 0. * * IA (global input) INTEGER * IA must be equal to 1 * * JA (global input) INTEGER * JA must be equal to 1 * * DESC (global/local input) INTEGER Array of dimension DLEN_ * A 2D array descriptor, which describes BYCOL * * BYCOL (local input) distributed block cyclic REAL array * global dimension (N), local dimension (NP) * BYCOL is distributed across the process rows * All process columns are assumed to contain the same value * * BYALL (global output) REAL global dimension( N ) * local dimension (N) * BYALL is exactly duplicated on all processes * It contains the same values as BYCOL, but it is replicated * across all processes rather than being distributed * * BYALL(i) = BYCOL( NUMROC(i,DESC( NB_ ),MYROW,0,NPROW ) on the procs * whose MYROW == mod((i-1)/DESC( NB_ ),NPROW) * * WORK (local workspace) REAL dimension (LWORK) * Used to hold the buffers sent from one process to another * * LWORK (local input) INTEGER size of WORK array * LWORK >= NUMROC(N, DESC( NB_ ), 0, 0, NPCOL) * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ALLI, BUFLEN, I, II, MYCOL, MYROW, NB, NPCOL, $ NPROW, PCOL * .. * .. External Functions .. * INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. * EXTERNAL BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) NB = DESC( MB_ ) * DO 30 PCOL = 0, NPCOL - 1 BUFLEN = NUMROC( N, NB, PCOL, 0, NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CALL SCOPY( BUFLEN, BYCOL, 1, WORK, 1 ) CALL SGEBS2D( DESC( CTXT_ ), 'R', ' ', 1, BUFLEN, WORK, 1 ) ELSE CALL SGEBR2D( DESC( CTXT_ ), 'R', ' ', 1, BUFLEN, WORK, 1, $ MYROW, PCOL ) END IF * ALLI = PCOL*NB DO 20 II = 1, BUFLEN, NB DO 10 I = 1, MIN( NB, BUFLEN-II+1 ) BYALL( ALLI+I ) = WORK( II-1+I ) 10 CONTINUE ALLI = ALLI + NB*NPCOL 20 CONTINUE 30 CONTINUE * RETURN * * End of PSLARED1D * END scalapack-1.8.0/SRC/pxsyevx.h0000640000175000017500000000452110363532303015644 0ustar muammarmuammar /* * These macros define how C routines will be called. ADD_ assumes that * they will be called by fortran, which expects C routines to have an * underscore postfixed to the name (Suns, and the Intel expect this). * NOCHANGE indicates that fortran will be calling, and that it expects * the name called by fortran to be identical to that compiled by the C * (RS6K's do this). UPCASE says it expects C routines called by fortran * to be in all upcase (CRAY wants this). */ #define ADD_ 0 #define NOCHANGE 1 #define UPCASE 2 #define C_CALL 3 #ifdef UpCase #define F77_CALL_C UPCASE #endif #ifdef NoChange #define F77_CALL_C NOCHANGE #endif #ifdef Add_ #define F77_CALL_C ADD_ #endif #ifndef F77_CALL_C #define F77_CALL_C ADD_ #endif #if (F77_CALL_C == ADD_) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * No redefinition necessary to have following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm_(...) * * This is the default. */ #endif #if (F77_CALL_C == UPCASE) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void PDGEMM(...) */ /* TOOLS */ #define pdlasnbt_ PDLASNBT #define pdlachkieee_ PDLACHKIEEE #define pdlaiectl_ PDLAIECTL #define pdlaiectb_ PDLAIECTB #define pslasnbt_ PSLASNBT #define pslachkieee_ PSLACHKIEEE #define pslaiect_ PSLAIECT #endif #if (F77_CALL_C == NOCHANGE) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * for following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm(...) */ /* TOOLS */ #define pdlasnbt_ pdlasnbt #define pdlachkieee_ pdlachkieee #define pdlaiectl_ pdlaiectl #define pdlaiectb_ pdlaiectb #define pslasnbt_ pslasnbt #define pslachkieee_ pslachkieee #define pslaiect_ pslaiect #endif scalapack-1.8.0/SRC/pzgerqf.f0000640000175000017500000002727610363532303015606 0ustar muammarmuammar SUBROUTINE PZGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGERQF computes a RQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IL, IN, IPW, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZGERQ2, PZLARFB, $ PZLARFT * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGERQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( IL.GE.IN+1 ) THEN * * Use blocked code initially * DO 10 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Compute the RQ factorization of the current block * A(i:i+ib-1,ja:ja+n-m+i+ib-ia-1) * CALL PZGERQ2( IB, N-M+I+IB-IA, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PZLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, $ I, JA, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the * right * CALL PZLARFB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = IN - IA + 1 NU = N - M + IN - IA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PZGERQ2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGERQF * END scalapack-1.8.0/SRC/zlaref.f0000640000175000017500000003160310363532303015400 0ustar muammarmuammar SUBROUTINE ZLAREF( TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1, $ ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ, $ LIHIZ, VECS, V2, V3, T1, T2, T3 ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. LOGICAL BLOCK, WANTZ CHARACTER TYPE INTEGER ICOL1, IROW1, ISTART, ISTOP, ITMP1, ITMP2, LDA, $ LDZ, LIHIZ, LILOZ COMPLEX*16 T1, T2, T3, V2, V3 * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), VECS( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * ZLAREF applies one or several Householder reflectors of size 3 * to one or two matrices (if column is specified) on either their * rows or columns. * * Arguments * ========= * * TYPE (global input) CHARACTER*1 * If 'R': Apply reflectors to the rows of the matrix * (apply from left) * Otherwise: Apply reflectors to the columns of the matrix * Unchanged on exit. * * A (global input/output) COMPLEX*16 array, (LDA,*) * On entry, the matrix to receive the reflections. * The updated matrix on exit. * * LDA (local input) INTEGER * On entry, the leading dimension of A. Unchanged on exit. * * WANTZ (global input) LOGICAL * If .TRUE., then apply any column reflections to Z as well. * If .FALSE., then do no additional work on Z. * * Z (global input/output) COMPLEX*16 array, (LDZ,*) * On entry, the second matrix to receive column reflections. * This is changed only if WANTZ is set. * * LDZ (local input) INTEGER * On entry, the leading dimension of Z. Unchanged on exit. * * BLOCK (global input) LOGICAL * If .TRUE., then apply several reflectors at once and read * their data from the VECS array. * If .FALSE., apply the single reflector given by V2, V3, * T1, T2, and T3. * * IROW1 (local input/output) INTEGER * On entry, the local row element of A. * Undefined on output. * * * ICOL1 (local input/output) INTEGER * On entry, the local column element of A. * Undefined on output. * * ISTART (global input) INTEGER * Specifies the "number" of the first reflector. This is * used as an index into VECS if BLOCK is set. * ISTART is ignored if BLOCK is .FALSE.. * * ISTOP (global input) INTEGER * Specifies the "number" of the last reflector. This is * used as an index into VECS if BLOCK is set. * ISTOP is ignored if BLOCK is .FALSE.. * * ITMP1 (local input) INTEGER * Starting range into A. For rows, this is the local * first column. For columns, this is the local first row. * * ITMP2 (local input) INTEGER * Ending range into A. For rows, this is the local last * column. For columns, this is the local last row. * * LILOZ * LIHIZ (local input) INTEGER * These serve the same purpose as ITMP1,ITMP2 but for Z * when WANTZ is set. * * VECS (global input) COMPLEX*16 array of size 3*N (matrix size) * This holds the size 3 reflectors one after another and this * is only accessed when BLOCK is .TRUE. * * V2 * V3 * T1 * T2 * T3 (global input/output) COMPLEX*16 * This holds information on a single size 3 Householder * reflector and is read when BLOCK is .FALSE., and * overwritten when BLOCK is .TRUE. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Local Scalars .. INTEGER J, K COMPLEX*16 A1, A11, A2, A22, A3, A4, A5, B1, B2, B3, B4, $ B5, H11, H22, SUM, SUM1, SUM2, SUM3, T12, T13, $ T22, T23, T32, T33, TMP1, TMP2, TMP3, V22, V23, $ V32, V33 * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MOD * .. * .. Executable Statements .. * IF( LSAME( TYPE, 'R' ) ) THEN IF( BLOCK ) THEN DO 30 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 10 J = ITMP1, ITMP2 - MOD( ITMP2-ITMP1+1, 2 ), 2 A1 = A( IROW1, J ) A2 = A( IROW1+1, J ) A3 = A( IROW1+2, J ) A4 = A( IROW1+3, J ) A5 = A( IROW1+4, J ) B1 = A( IROW1, J+1 ) B2 = A( IROW1+1, J+1 ) B3 = A( IROW1+2, J+1 ) B4 = A( IROW1+3, J+1 ) B5 = A( IROW1+4, J+1 ) SUM1 = DCONJG( T1 )*A1 + DCONJG( T2 )*A2 + $ DCONJG( T3 )*A3 A( IROW1, J ) = A1 - SUM1 H11 = A2 - SUM1*V2 H22 = A3 - SUM1*V3 TMP1 = DCONJG( T1 )*B1 + DCONJG( T2 )*B2 + $ DCONJG( T3 )*B3 A( IROW1, J+1 ) = B1 - TMP1 A11 = B2 - TMP1*V2 A22 = B3 - TMP1*V3 SUM2 = DCONJG( T12 )*H11 + DCONJG( T22 )*H22 + $ DCONJG( T32 )*A4 A( IROW1+1, J ) = H11 - SUM2 H11 = H22 - SUM2*V22 H22 = A4 - SUM2*V32 TMP2 = DCONJG( T12 )*A11 + DCONJG( T22 )*A22 + $ DCONJG( T32 )*B4 A( IROW1+1, J+1 ) = A11 - TMP2 A11 = A22 - TMP2*V22 A22 = B4 - TMP2*V32 SUM3 = DCONJG( T13 )*H11 + DCONJG( T23 )*H22 + $ DCONJG( T33 )*A5 A( IROW1+2, J ) = H11 - SUM3 A( IROW1+3, J ) = H22 - SUM3*V23 A( IROW1+4, J ) = A5 - SUM3*V33 TMP3 = DCONJG( T13 )*A11 + DCONJG( T23 )*A22 + $ DCONJG( T33 )*B5 A( IROW1+2, J+1 ) = A11 - TMP3 A( IROW1+3, J+1 ) = A22 - TMP3*V23 A( IROW1+4, J+1 ) = B5 - TMP3*V33 10 CONTINUE DO 20 J = ITMP2 - MOD( ITMP2-ITMP1+1, 2 ) + 1, ITMP2 SUM = DCONJG( T1 )*A( IROW1, J ) + $ DCONJG( T2 )*A( IROW1+1, J ) + $ DCONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM H11 = A( IROW1+1, J ) - SUM*V2 H22 = A( IROW1+2, J ) - SUM*V3 SUM = DCONJG( T12 )*H11 + DCONJG( T22 )*H22 + $ DCONJG( T32 )*A( IROW1+3, J ) A( IROW1+1, J ) = H11 - SUM H11 = H22 - SUM*V22 H22 = A( IROW1+3, J ) - SUM*V32 SUM = DCONJG( T13 )*H11 + DCONJG( T23 )*H22 + $ DCONJG( T33 )*A( IROW1+4, J ) A( IROW1+2, J ) = H11 - SUM A( IROW1+3, J ) = H22 - SUM*V23 A( IROW1+4, J ) = A( IROW1+4, J ) - SUM*V33 20 CONTINUE IROW1 = IROW1 + 3 30 CONTINUE DO 50 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 40 J = ITMP1, ITMP2 SUM = DCONJG( T1 )*A( IROW1, J ) + $ DCONJG( T2 )*A( IROW1+1, J ) + $ DCONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3 40 CONTINUE IROW1 = IROW1 + 1 50 CONTINUE ELSE DO 60 J = ITMP1, ITMP2 SUM = DCONJG( T1 )*A( IROW1, J ) + $ DCONJG( T2 )*A( IROW1+1, J ) + $ DCONJG( T3 )*A( IROW1+2, J ) A( IROW1, J ) = A( IROW1, J ) - SUM A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2 A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3 60 CONTINUE END IF ELSE * * Do column transforms * IF( BLOCK ) THEN DO 90 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3 V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) V22 = VECS( ( K-1 )*3+4 ) V32 = VECS( ( K-1 )*3+5 ) T12 = VECS( ( K-1 )*3+6 ) V23 = VECS( ( K-1 )*3+7 ) V33 = VECS( ( K-1 )*3+8 ) T13 = VECS( ( K-1 )*3+9 ) T2 = T1*V2 T3 = T1*V3 T22 = T12*V22 T32 = T12*V32 T23 = T13*V23 T33 = T13*V33 DO 70 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM H11 = A( J, ICOL1+1 ) - SUM*DCONJG( V2 ) H22 = A( J, ICOL1+2 ) - SUM*DCONJG( V3 ) SUM = T12*H11 + T22*H22 + T32*A( J, ICOL1+3 ) A( J, ICOL1+1 ) = H11 - SUM H11 = H22 - SUM*DCONJG( V22 ) H22 = A( J, ICOL1+3 ) - SUM*DCONJG( V32 ) SUM = T13*H11 + T23*H22 + T33*A( J, ICOL1+4 ) A( J, ICOL1+2 ) = H11 - SUM A( J, ICOL1+3 ) = H22 - SUM*DCONJG( V23 ) A( J, ICOL1+4 ) = A( J, ICOL1+4 ) - SUM*DCONJG( V33 ) 70 CONTINUE IF( WANTZ ) THEN DO 80 J = LILOZ, LIHIZ SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) + $ T3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM H11 = Z( J, ICOL1+1 ) - SUM*DCONJG( V2 ) H22 = Z( J, ICOL1+2 ) - SUM*DCONJG( V3 ) SUM = T12*H11 + T22*H22 + T32*Z( J, ICOL1+3 ) Z( J, ICOL1+1 ) = H11 - SUM H11 = H22 - SUM*DCONJG( V22 ) H22 = Z( J, ICOL1+3 ) - SUM*DCONJG( V32 ) SUM = T13*H11 + T23*H22 + T33*Z( J, ICOL1+4 ) Z( J, ICOL1+2 ) = H11 - SUM Z( J, ICOL1+3 ) = H22 - SUM*DCONJG( V23 ) Z( J, ICOL1+4 ) = Z( J, ICOL1+4 ) - $ SUM*DCONJG( V33 ) 80 CONTINUE END IF ICOL1 = ICOL1 + 3 90 CONTINUE DO 120 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP V2 = VECS( ( K-1 )*3+1 ) V3 = VECS( ( K-1 )*3+2 ) T1 = VECS( ( K-1 )*3+3 ) T2 = T1*V2 T3 = T1*V3 DO 100 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*DCONJG( V2 ) A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*DCONJG( V3 ) 100 CONTINUE IF( WANTZ ) THEN DO 110 J = LILOZ, LIHIZ SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) + $ T3*Z( J, ICOL1+2 ) Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM Z( J, ICOL1+1 ) = Z( J, ICOL1+1 ) - $ SUM*DCONJG( V2 ) Z( J, ICOL1+2 ) = Z( J, ICOL1+2 ) - $ SUM*DCONJG( V3 ) 110 CONTINUE END IF ICOL1 = ICOL1 + 1 120 CONTINUE ELSE DO 130 J = ITMP1, ITMP2 SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) + $ T3*A( J, ICOL1+2 ) A( J, ICOL1 ) = A( J, ICOL1 ) - SUM A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*DCONJG( V2 ) A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*DCONJG( V3 ) 130 CONTINUE END IF END IF RETURN * * End of ZLAREF * END scalapack-1.8.0/SRC/pcunmrq.f0000640000175000017500000004240610363532303015605 0ustar muammarmuammar SUBROUTINE PCUNMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMRQ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PCGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, LCM, $ LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, NI, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCLARFB, $ PCLARFT, PCUNMR2, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PCUNMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PCLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-IA, IB, $ A, I, JA, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+i+ib-ia-1,jc:jc+n-1) * MI = M - K + I + IB - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+i+ib-ia-1) * NI = N - K + I + IB - IA END IF * * Apply H or H' * CALL PCLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, $ IB, A, I, JA, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PCUNMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMRQ * END scalapack-1.8.0/SRC/pzlacgv.f0000640000175000017500000001541210363532303015563 0ustar muammarmuammar SUBROUTINE PZLACGV( N, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INCX, IX, JX, N * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZLACGV conjugates a complex vector of length N, sub( X ), where * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = DESCX( M_ ) and * X(IX:IX+N-1,JX) if INCX = 1, and * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_X,*). * On entry the vector to be conjugated * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= N. * On exit the conjugated vector. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICOFFX, ICTXT, IIX, IOFFX, IROFFX, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIX, JJX, IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFFX = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFFX * IF( NQ.GT.0 ) THEN IOFFX = IIX+(JJX-1)*LDX DO 10 I = 1, NQ X( IOFFX ) = DCONJG( X( IOFFX ) ) IOFFX = IOFFX + LDX 10 CONTINUE END IF * ELSE IF( INCX.EQ.1 ) THEN * * sub( X ) is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFFX = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFFX * IF( NP.GT.0 ) THEN IOFFX = IIX+(JJX-1)*LDX DO 20 I = IOFFX, IOFFX+NP-1 X( I ) = DCONJG( X( I ) ) 20 CONTINUE END IF * END IF * RETURN * * End of PZLACGV * END scalapack-1.8.0/SRC/pcpbtrsv.f0000640000175000017500000014172310363532303015765 0ustar muammarmuammar SUBROUTINE PCPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, $ IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCPBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Cholesky factorization code PCPBTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PCPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPBTRF and this is stored in AF. If a linear system * is to be solved using PCPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MBW2, MYCOL, MYROW, MY_NUM_COLS, NB, NP, $ NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CGEMM, CGERV2D, CGESD2D, CLACPY, CMATADD, $ CTBTRS, CTRMM, CTRTRS, DESC_CONVERT, GLOBCHK, $ PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW * BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PCPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ BW*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, $ 'PCPBTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BW PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW ENDIF * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1), LLDB, $ WORK( 1 ), BW ) * CALL CTRMM( 'L', 'U', 'N', 'N', BW, NRHS, -CONE, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ WORK( 1 ), BW ) * CALL CMATADD( BW, NRHS, CONE, WORK( 1 ), BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', BW, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+BW-BW ), BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTRTRS( 'L', 'N', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTRTRS( 'L', 'C', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+BW-BW ), BW ) * CALL CTRMM( 'L', 'U', 'C', 'N', BW, NRHS, -CONE, $ A(( OFST+(BW+1)+(ODD_SIZE-BW)*LLDA )), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL CMATADD( BW, NRHS, CONE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1), LLDB, $ WORK( 1 ), BW ) * CALL CTRMM( 'L', 'L', 'C', 'N', BW, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, WORK( 1 ), $ BW ) * CALL CMATADD( BW, NRHS, CONE, WORK( 1 ), BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', BW, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+BW-BW ), BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( BW, NRHS, CONE, $ WORK( 1 ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTRTRS( 'L', 'N', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', BW, NRHS, BW, -CONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), $ BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', BW, NRHS, BW, -CONE, $ AF( (ODD_SIZE)*BW+1 ), $ BW, $ WORK( 1 ), $ BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTRTRS( 'L', 'C', 'N', BW, NRHS, AF( ODD_SIZE*BW+MBW2+1 ), $ BW, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, BW, NRHS, $ WORK( 1 ), BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+BW-BW ), BW ) * CALL CTRMM( 'L', 'L', 'N', 'N', BW, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL CMATADD( BW, NRHS, CONE, WORK( 1+BW-BW ), BW, CONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BW, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCPBTRSV * END scalapack-1.8.0/SRC/sdttrsv.f0000640000175000017500000001135710363532303015632 0ustar muammarmuammar SUBROUTINE SDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, $ B, LDB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from SGTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. REAL B( LDB, * ), D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * SDTTRSV solves one of the systems of equations * L * X = B, L**T * X = B, or L**H * X = B, * U * X = B, U**T * X = B, or U**H * X = B, * with factors of the tridiagonal matrix A from the LU factorization * computed by SDTTRF. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether to solve with L or U. * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * DL (input) COMPLEX array, dimension (N-1) * The (n-1) multipliers that define the matrix L from the * LU factorization of A. * * D (input) COMPLEX array, dimension (N) * The n diagonal elements of the upper triangular matrix U from * the LU factorization of A. * * DU (input) COMPLEX array, dimension (N-1) * The (n-1) elements of the first superdiagonal of U. * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, B is overwritten by the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL LOWER, NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) LOWER = LSAME( UPLO, 'L' ) IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN INFO = -9 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( NOTRAN ) THEN * IF( LOWER ) THEN * Solve L*X = B, overwriting B with X. * DO 35 J = 1, NRHS * * Solve L*x = b. * DO 10 I = 1, N - 1 B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) 10 CONTINUE 35 CONTINUE * ELSE * Solve U*x = b. * DO 30 J = 1, NRHS B( N, J ) = B( N, J ) / D( N ) IF( N.GT.1 ) $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / $ D( N-1 ) DO 20 I = N - 2, 1, -1 B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J ) ) / D( I ) 20 CONTINUE 30 CONTINUE * ENDIF * ELSE * IF( .NOT. LOWER ) THEN * Solve U**T * X = B, overwriting B with X. * DO 65 J = 1, NRHS * * Solve U**T * x = b. * B( 1, J ) = B( 1, J ) / D( 1 ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) DO 40 I = 3, N B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J ) ) / D( I ) 40 CONTINUE 65 CONTINUE * ELSE * * Solve L**T * X = B, overwriting B with X. DO 60 J = 1, NRHS * * Solve L**T * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) 50 CONTINUE 60 CONTINUE ENDIF END IF * * End of SDTTRSV * END scalapack-1.8.0/SRC/pzlaswp.f0000640000175000017500000002034210363532303015613 0ustar muammarmuammar SUBROUTINE PZLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ) * .. * * Purpose: * ======== * * PZLASWP performs a series of row or column interchanges on * the distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1). One * interchange is initiated for each of rows or columns K1 trough K2 of * sub( A ). This routine assumes that the pivoting information has * already been broadcast along the process row or column. * Also note that this routine will only work for K1-K2 being in the * same MB (or NB) block. If you want to pivot a full matrix, use * PZLAPIV. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) * = 'B' (Backward) * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are permuted: * = 'R' (Rows) * = 'C' (Columns) * * N (global input) INTEGER * If ROWCOL = 'R', the length of the rows of the distributed * matrix A(*,JA:JA+N-1) to be permuted; * If ROWCOL = 'C', the length of the columns of the distributed * matrix A(IA:IA+N-1,*) to be permuted. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, * ). * On entry, this array contains the local pieces of the distri- * buted matrix to which the row/columns interchanges will be * applied. On exit the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * K1 (global input) INTEGER * The first element of IPIV for which a row or column inter- * change will be done. * * K2 (global input) INTEGER * The last element of IPIV for which a row or column inter- * change will be done. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A for * row pivoting and LOCc(N_A)+NB_A for column pivoting. This * array is tied to the matrix A, IPIV(K) = L implies rows * (or columns) K and L are to be interchanged. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICURCOL, ICURROW, IIA, IP, J, JJA, JP, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PZSWAP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( K1, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 10 I = K1, K2 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PZSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 10 CONTINUE ELSE CALL INFOG2L( K2, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 20 I = K2, K1, -1 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PZSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 20 CONTINUE END IF ELSE IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( IA, K1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 30 J = K1, K2 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PZSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 30 CONTINUE ELSE CALL INFOG2L( IA, K2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 40 J = K2, K1, -1 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PZSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 40 CONTINUE END IF END IF * RETURN * * End PZLASWP * END scalapack-1.8.0/SRC/ddbtf2.f0000640000175000017500000001214510363532303015262 0ustar muammarmuammar SUBROUTINE DDBTF2( M, N, KL, KU, AB, LDAB, INFO ) * * Modified by Andrew J. Cleary in November, 96 from: * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. DOUBLE PRECISION AB( LDAB, * ) * .. * * Purpose * ======= * * Ddbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting with row interchanges. * * This is the unblocked version of the algorithm, calling Level 2 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine; elements marked * + need not be set on entry, but are required by the routine to store * elements of U, because of fill-in resulting from the row * interchanges. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER J, JP, JU, KM, KV * .. * .. External Functions .. INTEGER ISAMAX EXTERNAL ISAMAX * .. * .. External Subroutines .. EXTERNAL DGER, DSCAL, DSWAP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U, allowing for * fill-in. * KV = KU * * Test the input parameters. * INFO = 0 *ECA IF( M.LT.0 ) THEN *ECA INFO = -1 *ECA ELSE IF( N.LT.0 ) THEN *ECA INFO = -2 *ECA ELSE IF( KL.LT.0 ) THEN *ECA INFO = -3 *ECA ELSE IF( KU.LT.0 ) THEN *ECA INFO = -4 *ECA ELSE IF( LDAB.LT.KL+KV+1 ) THEN *ECA INFO = -6 *ECA END IF *ECA IF( INFO.NE.0 ) THEN *ECA CALL XERBLA( 'DDBTF2', -INFO ) *ECA RETURN *ECA END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Gaussian elimination without partial pivoting * * JU is the index of the last column affected by the current stage * of the factorization. * JU = 1 * DO 40 J = 1, MIN( M, N ) * * Test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-J ) JP = 1 IF( AB( KV+1, J ).NE.ZERO ) THEN JU = MAX( JU, MIN( J+KU, N ) ) * IF( KM.GT.0 ) THEN * * Compute multipliers. * CALL DSCAL( KM, ONE / AB( KU+1, J ), AB( KU+2, J ), 1 ) * * Update trailing submatrix within the band. * IF( JU.GT.J ) THEN CALL DGER( KM, JU-J, -ONE, AB( KU+2, J ), 1, $ AB( KU, J+1 ), LDAB-1, AB( KU+1, J+1 ), $ LDAB-1 ) END IF END IF ELSE * IF( INFO.EQ.0 ) $ INFO = J END IF 40 CONTINUE RETURN * * End of DDBTF2 * END scalapack-1.8.0/SRC/pspotrs.f0000640000175000017500000002371510363532303015634 0ustar muammarmuammar SUBROUTINE PSPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSPOTRS solves a system of linear equations * * sub( A ) * X = sub( B ) * A(IA:IA+N-1,JA:JA+N-1)*X = B(IB:IB+N-1,JB:JB+NRHS-1) * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a N-by-N * symmetric positive definite distributed matrix using the Cholesky * factorization sub( A ) = U**T*U or L*L**T computed by PSPOTRF. * sub( B ) denotes the distributed matrix B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) REAL pointer into local memory to * an array of dimension (LLD_A, LOCc(JA+N-1)). On entry, this * array contains the factors L or U from the Cholesky facto- * rization sub( A ) = L*L**T or U**T*U, as computed by PSPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * the local pieces of the right hand sides sub( B ). * On exit, this array contains the local pieces of the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICTXT, IROFFA, IROFFB, ICOFFA, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 11, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( UPPER ) THEN * * Solve sub( A ) * X = sub( B ) where sub( A ) = U'*U. * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A ) *X = sub( B ) where sub( A ) = L*L'. * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PSTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) END IF * RETURN * * End of PSPOTRS * END scalapack-1.8.0/SRC/pdpotri.f0000640000175000017500000001716110363532303015601 0ustar muammarmuammar SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDPOTRI computes the inverse of a real symmetric positive definite * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the * Cholesky factorization sub( A ) = U**T*U or L*L**T computed by * PDPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor U or L * from the Cholesky factorization of the distributed matrix * sub( A ) = U**T*U or L*L**T, as computed by PDPOTRF. * On exit, the local pieces of the upper or lower triangle of * the (symmetric) inverse of sub( A ), overwriting the input * factor U or L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the (i,i) element of the factor U or L is * zero, and the inverse could not be computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER ICOFF, ICTXT, IROFF, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PDLAUUM, $ PDTRTRI, PXERBLA * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.NE.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Invert the triangular Cholesky factor U or L. * CALL PDTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * IF( INFO.GT.0 ) $ RETURN * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * RETURN * * End of PDPOTRI * END scalapack-1.8.0/SRC/pzheevd.f0000640000175000017500000003734710363532303015575 0ustar muammarmuammar SUBROUTINE PZHEEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 25, 2002 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) DOUBLE PRECISION RWORK( * ), W( * ) COMPLEX*16 A( * ), WORK( * ), Z( * ) * * * Purpose * ======= * * PZHEEVD computes all the eigenvalues and eigenvectors of a Hermitian * matrix A by using a divide and conquer algorithm. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only; (NOT IMPLEMENTED YET) * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX*16 array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PZHEEV cannot guarantee * correct error reporting. * * W (global output) DOUBLE PRECISION array, dimension (N) * If INFO=0, the eigenvalues in ascending order. * * Z (local output) COMPLEX*16 array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * Z contains the orthonormal eigenvectors of the matrix A. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX*16 array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed for the * computation. * * LWORK (local input) INTEGER * If eigenvectors are requested: * LWORK = N + ( NP0 + MQ0 + NB ) * NB, * with NP0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine calculates the size for all * work arrays. Each of these values is returned in the first * entry of the corresponding work array, and no error message * is issued by PXERBLA. * * RWORK (local workspace/output) DOUBLE PRECISION array, * dimension (LRWORK) * On output RWORK(1) returns the real workspace needed to * guarantee completion. If the input parameters are incorrect, * RWORK(1) may also be incorrect. * * LRWORK (local input) INTEGER * Size of RWORK array. * LRWORK >= 1 + 9*N + 3*NP*NQ, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On output IWORK(1) returns the integer workspace needed. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in PDLAED3. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ), sub( Z ) must verify * some alignment properties, namely the following expression * should be true: * ( MB_A.EQ.NB_A.EQ.MB_Z.EQ.NB_Z .AND. IROFFA.EQ.ICOFFA .AND. * IROFFA.EQ.0 .AND.IROFFA.EQ.IROFFZ. AND. IAROW.EQ.IZROW) * with IROFFA = MOD( IA-1, MB_A ) * and ICOFFA = MOD( JA-1, NB_A ). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LOWER, LQUERY INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, IINFO, IIZ, $ INDD, INDE, INDE2, INDRWORK, INDTAU, INDWORK, $ INDZ, IPR, IPZ, IROFFA, IROFFZ, ISCALE, IZCOL, $ IZROW, J, JJZ, LDR, LDZ, LIWMIN, LLRWORK, $ LLWORK, LRWMIN, LWMIN, MB_A, MYCOL, MYROW, NB, $ NB_A, NN, NP0, NPCOL, NPROW, NQ, NQ0, OFFSET, $ RSRC_A DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCRZ( 9 ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC DOUBLE PRECISION PZLANHE, PDLAMCH EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC, PZLANHE, $ PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCINIT, INFOG2L, $ PZELGET, PZHETRD, PCHK2MAT, PZLASCL, PZLASET, $ PZUNMTR, PDLARED1D, PDLASET, PDSTEDC, PXERBLA, $ DSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCMPLX, ICHAR, MAX, MIN, MOD, DBLE, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * INFO = 0 * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( N, 2, N, 2, IZ, JZ, DESCZ, 11, INFO ) IF( INFO.EQ.0 ) THEN LOWER = LSAME( UPLO, 'L' ) NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( IA, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( JA, MB_A, MYCOL, CSRC_A, NPCOL ) NP0 = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ0 = NUMROC( N, NB, MYCOL, IACOL, NPCOL ) IROFFZ = MOD( IZ-1, MB_A ) CALL INFOG2L( IZ, JZ, DESCZ, NPROW, NPCOL, MYROW, MYCOL, $ IIZ, JJZ, IZROW, IZCOL ) LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * * Compute the total amount of space needed * NN = MAX( N, NB, 2 ) NQ = NUMROC( NN, NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+NQ+NB )*NB LRWMIN = 1 + 9*N + 3*NP0*NQ0 LIWMIN = 7*N + 8*NPCOL + 2 WORK( 1 ) = DCMPLX( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN IF( .NOT.LSAME( JOBZ, 'V' ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -16 ELSE IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( LOWER ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'U' ) END IF IDUM2( 1 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, $ JZ, DESCZ, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZHEEVD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Get machine constants. * SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * * Set up pointers into the WORK array * INDTAU = 1 INDWORK = INDTAU + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDE = 1 INDD = INDE + N INDE2 = INDD + N INDRWORK = INDE2 + N LLRWORK = LRWORK - INDRWORK + 1 * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PZLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PZLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce Hermitian matrix to tridiagonal form. * CALL PZHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE2 ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet workMx Mawith arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PDLARED1D( N, IA, JA, DESCA, RWORK( INDD ), W, $ RWORK( INDRWORK ), LLRWORK ) * CALL PDLARED1D( N, IA, JA, DESCA, RWORK( INDE2 ), $ RWORK( INDE ), RWORK( INDRWORK ), LLRWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PZELGET( 'A', ' ', WORK( INDWORK ), A, I+IA-1, I+JA-1, $ DESCA ) W( I ) = DBLE( WORK( INDWORK ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDWORK ), A, I+IA-1, I+JA, $ DESCA ) RWORK( INDE+I-1 ) = DBLE( WORK( INDWORK ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDWORK ), A, I+IA, I+JA-1, $ DESCA ) RWORK( INDE+I-1 ) = DBLE( WORK( INDWORK ) ) 30 CONTINUE END IF END IF * * Call PDSTEDC to compute eigenvalues and eigenvectors. * INDZ = INDE + N INDRWORK = INDZ + NP0*NQ0 LLRWORK = LRWORK - INDRWORK + 1 LDR = MAX( 1, NP0 ) CALL DESCINIT( DESCRZ, DESCZ( M_ ), DESCZ( N_ ), DESCZ( MB_ ), $ DESCZ( NB_ ), DESCZ( RSRC_ ), DESCZ( CSRC_ ), $ DESCZ( CTXT_ ), LDR, INFO ) CALL PZLASET( 'Full', N, N, CZERO, CONE, Z, IZ, JZ, DESCZ ) CALL PDLASET( 'Full', N, N, ZERO, ONE, RWORK( INDZ ), 1, 1, $ DESCRZ ) CALL PDSTEDC( 'I', N, W, RWORK( INDE+OFFSET ), RWORK( INDZ ), IZ, $ JZ, DESCRZ, RWORK( INDRWORK ), LLRWORK, IWORK, $ LIWORK, IINFO ) * LDZ = DESCZ( LLD_ ) LDR = DESCRZ( LLD_ ) IIZ = INDXG2L( IZ, NB, MYROW, MYROW, NPROW ) JJZ = INDXG2L( JZ, NB, MYCOL, MYCOL, NPCOL ) IPZ = IIZ + ( JJZ-1 )*LDZ IPR = INDZ - 1 + IIZ + ( JJZ-1 )*LDR DO 50 J = 0, NQ0 - 1 DO 40 I = 0, NP0 - 1 Z( IPZ+I+J*LDZ ) = RWORK( IPR+I+J*LDR ) 40 CONTINUE 50 CONTINUE * * Z = Q * Z * CALL PZUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, WORK( INDWORK ), $ LLWORK, IINFO ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = DCMPLX( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PZHEEVD * END scalapack-1.8.0/SRC/pclamr1d.f0000640000175000017500000001067110363532303015622 0ustar muammarmuammar SUBROUTINE PCLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Bugs * ==== * * I am not sure that this works correctly when IB and JB are not equal * to 1. Indeed, I suspect that IB should always be set to 1 or ignored * with 1 used in its place. * * PCLAMR1D has not been tested except withint the contect of * PCHEPTRD, the prototype reduction to tridiagonal form code. * * Purpose * * ======= * * PCLAMR1D redistributes a one-dimensional row vector from one data * decomposition to another. * * This is an auxiliary routine called by PCHETRD to redistribute D, E * and TAU. * * Notes * ===== * * Although all processes call PCGEMR2D, only the processes that own * the first column of A send data and only processes that own the * first column of B receive data. The calls to CGEBS2D/CGEBR2D * spread the data down. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * A (local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JA+N-1)). * On output, A is replicated across all processes in * this processor column. * * IA (global input) INTEGER * A's global row index, which points to the beginning of * the submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JB+N-1)). * * IB (global input) INTEGER * B's global row index, NOT USED * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCBB( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, PCGEMR2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return if possible * IF( N.LE.0 ) $ RETURN * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) DESCBB( I ) = DESCB( I ) 10 CONTINUE * DESCAA( M_ ) = 1 DESCBB( M_ ) = 1 DESCAA( LLD_ ) = 1 DESCBB( LLD_ ) = 1 * ICTXT = DESCB( CTXT_ ) CALL PCGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN CALL CGEBS2D( ICTXT, 'C', ' ', NQ, 1, B, NQ ) ELSE CALL CGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * RETURN * * End of PCLAMR1D * END scalapack-1.8.0/SRC/pddttrsv.f0000640000175000017500000014201610363532303015770 0ustar muammarmuammar SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PDDTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Gaussian elimination code PD@(dom_pre)TTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PDDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDTTRF and this is stored in AF. If a linear system * is to be solved using PDDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DAXPY, DDTTRSV, $ DESC_CONVERT, DGEMM, DGERV2D, DGESD2D, DMATADD, $ DTBTRS, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 9*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 12*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 12*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 12*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 12*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 9*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 12*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 12*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -4 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -8 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 9*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PDDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PDDTTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB( 5 ) PARAM_CHECK( 15, 1 ) = DESCB( 4 ) PARAM_CHECK( 14, 1 ) = DESCB( 3 ) PARAM_CHECK( 13, 1 ) = DESCB( 2 ) PARAM_CHECK( 12, 1 ) = DESCB( 1 ) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA( 5 ) PARAM_CHECK( 9, 1 ) = DESCA( 4 ) PARAM_CHECK( 8, 1 ) = DESCA( 3 ) PARAM_CHECK( 7, 1 ) = DESCA( 1 ) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1205 PARAM_CHECK( 15, 2 ) = 1204 PARAM_CHECK( 14, 2 ) = 1203 PARAM_CHECK( 13, 2 ) = 1202 PARAM_CHECK( 12, 2 ) = 1201 PARAM_CHECK( 11, 2 ) = 11 PARAM_CHECK( 10, 2 ) = 905 PARAM_CHECK( 9, 2 ) = 904 PARAM_CHECK( 8, 2 ) = 903 PARAM_CHECK( 7, 2 ) = 901 PARAM_CHECK( 6, 2 ) = 8 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 180 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -DL( PART_OFFSET+ODD_SIZE+1 ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, ODD_SIZE, -ONE, $ AF( 1 ), ODD_SIZE, B( PART_OFFSET+1 ), LLDB, $ ZERO, WORK( 1+INT_ONE-INT_ONE ), INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTBTRS( 'L', 'N', 'U', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTBTRS( 'L', 'T', 'U', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -ONE, $ AF( 1 ), ODD_SIZE, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE, ONE, B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -( DL( PART_OFFSET+ODD_SIZE+1 ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DDTTRSV( UPLO, 'T', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DDTTRSV( UPLO, 'T', ODD_SIZE, NRHS, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, ODD_SIZE, -ONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, ZERO, WORK( 1+INT_ONE-INT_ONE ), $ INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 110 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 90 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 100 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * CALL DMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 90 100 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTBTRS( 'U', 'T', 'N', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 110 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 160 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 120 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 130 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 120 130 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE, $ WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTBTRS( 'U', 'N', 'N', INT_ONE, $ MIN( INT_ONE, INT_ONE-1 ), NRHS, $ AF( ODD_SIZE+2 ), INT_ONE+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 140 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 140 150 CONTINUE * [End of GOTO Loop] * 160 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, $ 0, MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -ONE, $ AF( WORK_U+1 ), ODD_SIZE, $ WORK( 1+INT_ONE-INT_ONE ), INT_ONE, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL DAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, $ DU( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 170 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 180 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDDTTRSV * END scalapack-1.8.0/SRC/pdlarz.f0000640000175000017500000010177210363532303015416 0ustar muammarmuammar SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) DOUBLE PRECISION C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PDLARZ applies a real elementary reflector Q (or Q**T) to a real * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from * either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a real scalar and v is a real vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PDTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q, Q = Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST DOUBLE PRECISION TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DAXPY, DCOPY, DGEBR2D, $ DGEBS2D, DGEMV, DGER, DGERV2D, $ DGESD2D, DGSUM2D, DLASET, INFOG2L, $ PB_TOPGET, PBDTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL DGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL DGER( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL DGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL DLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL DGER( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL DCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL DGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL DGER( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL DGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL DGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL DCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL DGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MPV+1 CALL DGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL DGEMV( 'Transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL DAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL DGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL DGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL DLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * IF( MPC2.GT.0 .AND. NQV.GT.0 ) $ CALL DGER( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL DCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL DGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL DGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL DCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQV+1 CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL DGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL DGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL DLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL DAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL DGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PDLARZ * END scalapack-1.8.0/SRC/pdgerfs.f0000640000175000017500000010212010363532303015540 0ustar muammarmuammar SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, $ JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, $ LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX, $ LIWORK, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ),IPIV( * ), IWORK( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), BERR( * ), FERR( * ), $ WORK( * ), X( * ) * .. * * Purpose * ======= * * PDGERFS improves the computed solution to a system of linear * equations and provides error bounds and backward error estimates for * the solutions. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * In the following comments, sub( A ), sub( X ) and sub( B ) denote * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and * B(IB:IB+N-1,JB:JB+NRHS-1). * * Arguments * ========= * * TRANS (global input) CHARACTER*1 * Specifies the form of the system of equations. * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose) * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose) * = 'C': sub( A )**T * sub( X ) = sub( B ) * (Conjugate transpose = Transpose) * * * N (global input) INTEGER * The order of the matrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrices sub( B ) and sub( X ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input) DOUBLE PRECISION pointer into the local * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * factors of the matrix sub( A ) = P * L * U as computed by * PDGETRF. * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF. * This array contains the pivoting information as computed * by PDGETRF. IPIV(i) -> The global row local row i * was swapped with. This array is tied to the distributed * matrix A. * * B (local input) DOUBLE PRECISION pointer into the local * memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local * pieces of the distributed matrix of right hand sides * sub( B ). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input and output) DOUBLE PRECISION pointer into the * local memory to an array of local dimension * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains * the local pieces of the distributed matrix solution * sub( X ). On exit, the improved solution vectors. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * FERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). * The estimated forward error bound for each solution vector * of sub( X ). If XTRUE is the true solution corresponding * to sub( X ), FERR is an estimated upper bound for the * magnitude of the largest element in (sub( X ) - XTRUE) * divided by the magnitude of the largest element in sub( X ). * The estimate is as reliable as the estimate for RCOND, and * is almost always a slight overestimate of the true error. * This array is tied to the distributed matrix X. * * BERR (local output) DOUBLE PRECISION array of local dimension * LOCc(JB+NRHS-1). The componentwise relative backward * error of each solution vector (i.e., the smallest re- * lative change in any entry of sub( A ) or sub( B ) * that makes sub( X ) an exact solution). * This array is tied to the distributed matrix X. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*LOCr( N + MOD(IA-1,MB_A) ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr( N + MOD(IB-1,MB_B) ). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Internal Parameters * =================== * * ITMAX is the maximum number of steps of iterative refinement. * * Notes * ===== * * This routine temporarily returns when N <= 1. * * The distributed submatrices op( A ) and op( AF ) (respectively * sub( X ) and sub( B ) ) should be distributed the same way on the * same processes. These conditions ensure that sub( A ) and sub( AF ) * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned. * * Moreover, this routine requires the distributed submatrices sub( A ), * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary, * i.e., if f(x,y) = MOD( x-1, y ): * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0, * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0, * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) DOUBLE PRECISION TWO, THREE PARAMETER ( TWO = 2.0D+0, THREE = 3.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOTRAN CHARACTER TRANST INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL, $ IXBROW, IXCOL, IXROW, ICOFFA, ICOFFAF, ICOFFB, $ ICOFFX, ICTXT, ICURCOL, IDUM, II, IIXB, IIW, $ IOFFXB, IPB, IPR, IPV, IROFFA, IROFFAF, IROFFB, $ IROFFX, IW, J, JBRHS, JJ, JJFBE, JJXB, JN, JW, $ K, KASE, LDXB, LIWMIN, LWMIN, MYCOL, MYRHS, $ MYROW, NP, NP0, NPCOL, NPMOD, NPROW, NZ DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMX2D, $ DGEBR2D, DGEBS2D, INFOG2L, PCHK2MAT, $ PDAGEMV, PDAXPY, PDCOPY, PDGEMV, $ PDGETRS, PDLACON, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * NOTRAN = LSAME( TRANS, 'N' ) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, N, 2, IAF, JAF, DESCAF, 11, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IX, JX, DESCX, 20, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFAF = MOD( JAF-1, DESCAF( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFCOL = INDXG2P( JAF, DESCAF( NB_ ), MYCOL, $ DESCAF( CSRC_ ), NPCOL ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, $ IIXB, JJXB, IXBROW, IXBCOL ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IXCOL = INDXG2P( JX, DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LWMIN = 3 * NPMOD LIWMIN = NPMOD WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( ( .NOT.NOTRAN ) .AND. ( .NOT.LSAME( TRANS, 'T' ) ) .AND. $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700 + NB_ ) ELSE IF( DESCA( MB_ ).NE.DESCAF( MB_ ) ) THEN INFO = -( 1100 + MB_ ) ELSE IF( IROFFAF.NE.0 .OR. IAROW.NE.IAFROW ) THEN INFO = -9 ELSE IF( DESCA( NB_ ).NE.DESCAF( NB_ ) ) THEN INFO = -( 1100 + NB_ ) ELSE IF( ICOFFAF.NE.0 .OR. IACOL.NE.IAFCOL ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -( 1100 + CTXT_ ) ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IXBROW ) THEN INFO = -14 ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1600 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1600 + CTXT_ ) ELSE IF( DESCB( MB_ ).NE.DESCX( MB_ ) ) THEN INFO = -( 2000 + MB_ ) ELSE IF( IROFFX.NE.0 .OR. IXBROW.NE.IXROW ) THEN INFO = -18 ELSE IF( DESCB( NB_ ).NE.DESCX( NB_ ) ) THEN INFO = -( 2000 + NB_ ) ELSE IF( ICOFFB.NE.ICOFFX .OR. IXBCOL.NE.IXCOL ) THEN INFO = -19 ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -( 2000 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -24 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -26 END IF END IF * IF( NOTRAN ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 1 ) = ICHAR( 'T' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IDUM1( 2 ) = N IDUM2( 2 ) = 2 IDUM1( 3 ) = NRHS IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 24 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 26 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, N, 2, IAF, $ JAF, DESCAF, 11, 5, IDUM1, IDUM2, INFO ) CALL PCHK2MAT( N, 2, NRHS, 3, IB, JB, DESCB, 16, N, 2, NRHS, 3, $ IX, JX, DESCX, 20, 5, IDUM1, IDUM2, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERFS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * JJFBE = JJXB MYRHS = NUMROC( JB+NRHS-1, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) * * Quick return if possible * IF( N.LE.1 .OR. NRHS.EQ.0 ) THEN DO 10 JJ = JJFBE, MYRHS FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE RETURN END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * NP0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IXBROW, NPROW ) CALL DESCSET( DESCW, N+IROFFB, 1, DESCA( MB_ ), 1, IXBROW, IXBCOL, $ ICTXT, MAX( 1, NP0 ) ) IPB = 1 IPR = IPB + NP0 IPV = IPR + NP0 IF( MYROW.EQ.IXBROW ) THEN IIW = 1 + IROFFB NP = NP0 - IROFFB ELSE IIW = 1 NP = NP0 END IF IW = 1 + IROFFB JW = 1 LDXB = DESCB( LLD_ ) IOFFXB = ( JJXB-1 )*LDXB * * NZ = 1 + maximum number of nonzero entries in each row of sub( A ) * NZ = N + 1 EPS = PDLAMCH( ICTXT, 'Epsilon' ) SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) SAFE1 = NZ*SAFMIN SAFE2 = SAFE1 / EPS JN = MIN( ICEIL( JB, DESCB( NB_ ) ) * DESCB( NB_ ), JB+NRHS-1 ) * * Handle first block separately * JBRHS = JN - JB + 1 DO 100 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 20 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PDCOPY( N, B, IB, JB+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, IX, $ JX+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 30 II = IIXB, IIXB + NP - 1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 30 CONTINUE END IF END IF * CALL PDAGEMV( TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, JX+K, $ DESCX, 1, ONE, WORK( IPB ), IW, JW, DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 40 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 40 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.IXBCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during the * last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PDGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PDAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, IX, $ JX+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 20 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 50 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 50 CONTINUE END IF END IF * KASE = 0 60 CONTINUE IF( MYCOL.EQ.IXBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, IXBCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = IXBCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PDGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 70 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 70 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 80 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )*WORK( IPR+II ) 80 CONTINUE END IF END IF * CALL PDGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) END IF GO TO 60 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.IXBCOL ) THEN IF( NP.GT.0 ) THEN DO 90 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 90 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, 1, IDUM, $ IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 100 CONTINUE * ICURCOL = MOD( IXBCOL+1, NPCOL ) * * Do for each right hand side * DO 200 J = JN+1, JB+NRHS-1, DESCB( NB_ ) JBRHS = MIN( JB+NRHS-J, DESCB( NB_ ) ) DESCW( CSRC_ ) = ICURCOL * DO 190 K = 0, JBRHS-1 * COUNT = 1 LSTRES = THREE 110 CONTINUE * * Loop until stopping criterion is satisfied. * * Compute residual R = sub(B) - op(sub(A)) * sub(X), * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H), * depending on TRANS. * CALL PDCOPY( N, B, IB, J+K, DESCB, 1, WORK( IPR ), IW, JW, $ DESCW, 1 ) CALL PDGEMV( TRANS, N, N, -ONE, A, IA, JA, DESCA, X, $ IX, J+K, DESCX, 1, ONE, WORK( IPR ), IW, JW, $ DESCW, 1 ) * * Compute componentwise relative backward error from formula * * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) + * abs(sub(B)))(i)) * * where abs(Z) is the componentwise absolute value of the * matrix or vector Z. If the i-th component of the * denominator is less than SAFE2, then SAFE1 is added to the * i-th components of the numerator and denominator before * dividing. * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 120 II = IIXB, IIXB+NP-1 WORK( IIW+II-IIXB ) = ABS( B( II+IOFFXB ) ) 120 CONTINUE END IF END IF * CALL PDAGEMV( TRANS, N, N, ONE, A, IA, JA, DESCA, X, IX, $ J+K, DESCX, 1, ONE, WORK( IPB ), IW, JW, $ DESCW, 1 ) * S = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 )THEN DO 130 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN S = MAX( S, ABS( WORK( IPR+II ) ) / $ WORK( IPB+II ) ) ELSE S = MAX( S, ( ABS( WORK( IPR+II ) )+SAFE1 ) / $ ( WORK( IPB+II )+SAFE1 ) ) END IF 130 CONTINUE END IF END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, S, 1, IDUM, IDUM, 1, $ -1, MYCOL ) IF( MYCOL.EQ.ICURCOL ) $ BERR( JJFBE ) = S * * Test stopping criterion. Continue iterating if * 1) The residual BERR(J+K) is larger than machine epsilon, * and * 2) BERR(J+K) decreased by at least a factor of 2 during * the last iteration, and * 3) At most ITMAX iterations tried. * IF( S.GT.EPS .AND. TWO*S.LE.LSTRES .AND. $ COUNT.LE.ITMAX ) THEN * * Update solution and try again. * CALL PDGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, IPIV, $ WORK( IPR ), IW, JW, DESCW, INFO ) CALL PDAXPY( N, ONE, WORK( IPR ), IW, JW, DESCW, 1, X, $ IX, J+K, DESCX, 1 ) LSTRES = S COUNT = COUNT + 1 GO TO 110 END IF * * Bound error from formula * * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR = * norm( abs(inv(op(sub(A))))* * ( abs(R) + NZ*EPS*( * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X)) * * where * norm(Z) is the magnitude of the largest component of Z * inv(op(sub(A))) is the inverse of op(sub(A)) * abs(Z) is the componentwise absolute value of the matrix * or vector Z * NZ is the maximum number of nonzeros in any row of sub(A), * plus 1 * EPS is machine epsilon * * The i-th component of * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))) * is incremented by SAFE1 if the i-th component of * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than * SAFE2. * * Use PDLACON to estimate the infinity-norm of the matrix * inv(op(sub(A))) * diag(W), where * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 140 II = IIW-1, IIW+NP-2 IF( WORK( IPB+II ).GT.SAFE2 ) THEN WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) ELSE WORK( IPB+II ) = ABS( WORK( IPR+II ) ) + $ NZ*EPS*WORK( IPB+II ) + SAFE1 END IF 140 CONTINUE END IF END IF * KASE = 0 150 CONTINUE IF( MYCOL.EQ.ICURCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IPR ), $ DESCW( LLD_ ), MYROW, ICURCOL ) END IF DESCW( CSRC_ ) = MYCOL CALL PDLACON( N, WORK( IPV ), IW, JW, DESCW, WORK( IPR ), $ IW, JW, DESCW, IWORK, EST, KASE ) DESCW( CSRC_ ) = ICURCOL * IF( KASE.NE.0 ) THEN IF( KASE.EQ.1 ) THEN * * Multiply by diag(W)*inv(op(sub(A))'). * CALL PDGETRS( TRANST, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, INFO ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 160 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 160 CONTINUE END IF END IF ELSE * * Multiply by inv(op(sub(A)))*diag(W). * IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 170 II = IIW-1, IIW+NP-2 WORK( IPR+II ) = WORK( IPB+II )* $ WORK( IPR+II ) 170 CONTINUE END IF END IF * CALL PDGETRS( TRANS, N, 1, AF, IAF, JAF, DESCAF, $ IPIV, WORK( IPR ), IW, JW, DESCW, $ INFO ) END IF GO TO 150 END IF * * Normalize error. * LSTRES = ZERO IF( MYCOL.EQ.ICURCOL ) THEN IF( NP.GT.0 ) THEN DO 180 II = IIXB, IIXB+NP-1 LSTRES = MAX( LSTRES, ABS( X( IOFFXB+II ) ) ) 180 CONTINUE END IF CALL DGAMX2D( ICTXT, 'Column', ' ', 1, 1, LSTRES, $ 1, IDUM, IDUM, 1, -1, MYCOL ) IF( LSTRES.NE.ZERO ) $ FERR( JJFBE ) = EST / LSTRES * JJXB = JJXB + 1 JJFBE = JJFBE + 1 IOFFXB = IOFFXB + LDXB * END IF * 190 CONTINUE * ICURCOL = MOD( ICURCOL+1, NPCOL ) * 200 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDGERFS * END scalapack-1.8.0/SRC/pslaswp.f0000640000175000017500000002033410363532303015605 0ustar muammarmuammar SUBROUTINE PSLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) REAL A( * ) * .. * * Purpose: * ======== * * PSLASWP performs a series of row or column interchanges on * the distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1). One * interchange is initiated for each of rows or columns K1 trough K2 of * sub( A ). This routine assumes that the pivoting information has * already been broadcast along the process row or column. * Also note that this routine will only work for K1-K2 being in the * same MB (or NB) block. If you want to pivot a full matrix, use * PSLAPIV. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) * = 'B' (Backward) * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are permuted: * = 'R' (Rows) * = 'C' (Columns) * * N (global input) INTEGER * If ROWCOL = 'R', the length of the rows of the distributed * matrix A(*,JA:JA+N-1) to be permuted; * If ROWCOL = 'C', the length of the columns of the distributed * matrix A(IA:IA+N-1,*) to be permuted. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, * ). * On entry, this array contains the local pieces of the distri- * buted matrix to which the row/columns interchanges will be * applied. On exit the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * K1 (global input) INTEGER * The first element of IPIV for which a row or column inter- * change will be done. * * K2 (global input) INTEGER * The last element of IPIV for which a row or column inter- * change will be done. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A for * row pivoting and LOCc(N_A)+NB_A for column pivoting. This * array is tied to the matrix A, IPIV(K) = L implies rows * (or columns) K and L are to be interchanged. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICURCOL, ICURROW, IIA, IP, J, JJA, JP, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSSWAP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( K1, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 10 I = K1, K2 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PSSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 10 CONTINUE ELSE CALL INFOG2L( K2, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 20 I = K2, K1, -1 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PSSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 20 CONTINUE END IF ELSE IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( IA, K1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 30 J = K1, K2 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PSSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 30 CONTINUE ELSE CALL INFOG2L( IA, K2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 40 J = K2, K1, -1 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PSSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 40 CONTINUE END IF END IF * RETURN * * End PSLASWP * END scalapack-1.8.0/SRC/psorm2l.f0000640000175000017500000004041410363532303015513 0ustar muammarmuammar SUBROUTINE PSORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORM2L overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PSGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PSGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 REAL AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PSELSET, PSELSET2, PSLARF, PB_TOPGET, $ PB_TOPSET, PXERBLA, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D, SSCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL SSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL SSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL SGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL SGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL SSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * DO 10 J = J1, J2, J3 * IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic:ic+m-k+j-ja,jc:jc+n-1) * MI = M - K + J - JA + 1 ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc:jc+n-k+j-ja) * NI = N - K + J - JA + 1 END IF * * Apply H(j) or H(j)' * CALL PSELSET2( AJJ, A, IA+NQ-K+J-JA, J, DESCA, ONE ) CALL PSLARF( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, IC, $ JC, DESCC, WORK ) CALL PSELSET( A, IA+NQ-K+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORM2L * END scalapack-1.8.0/SRC/pzlaset.f0000640000175000017500000002171110363532303015576 0ustar muammarmuammar SUBROUTINE PZLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N COMPLEX*16 ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASET initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) COMPLEX*16 * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) COMPLEX*16 * The constant to which the diagonal elements are to be set. * * A (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBLK, IN, ITMP, J, JAA, JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PZLASE2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PZLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) CALL PZLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) JAA = JA + ITMP CALL PZLASE2( UPLO, IBLK, N-ITMP, ALPHA, BETA, $ A, I, JAA, DESCA ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL PZLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IAA = IA + JTMP CALL PZLASE2( UPLO, M-JTMP, JBLK, ALPHA, BETA, A, IAA, $ J, DESCA ) 20 CONTINUE ELSE IF( M.LE.N ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), $ IA+M-1 ) CALL PZLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) CALL PZLASE2( UPLO, IBLK, I-IA, ALPHA, ALPHA, A, I, $ JA, DESCA ) CALL PZLASE2( UPLO, IBLK, N-I+IA, ALPHA, BETA, A, I, $ JA+I-IA, DESCA ) 30 CONTINUE ELSE JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA+N-1 ) CALL PZLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) CALL PZLASE2( UPLO, J-JA, JBLK, ALPHA, ALPHA, A, IA, $ J, DESCA ) CALL PZLASE2( UPLO, M-J+JA, JBLK, ALPHA, BETA, A, $ IA+J-JA, J, DESCA ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PZLASET * END scalapack-1.8.0/SRC/pddttrf.f0000640000175000017500000010605310363532303015566 0ustar muammarmuammar SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION AF( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PDDTTRF computes a LU factorization * of an N-by-N real tridiagonal * diagonally dominant-like distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PDDTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * DL (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDTTRF and this is stored in AF. If a linear system * is to be solved using PDDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DDTTRF, $ DDTTRSV, DESC_CONVERT, DGERV2D, DGESD2D, $ DTRRV2D, DTRSD2D, GLOBCHK, IGAMX2D, IGEBR2D, $ IGEBS2D, PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION DDOT EXTERNAL NUMROC, DDOT * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( 12*NPCOL+3*NB ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDTTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PDDTTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA( 5 ) PARAM_CHECK( 6, 1 ) = DESCA( 4 ) PARAM_CHECK( 5, 1 ) = DESCA( 3 ) PARAM_CHECK( 4, 1 ) = DESCA( 1 ) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 605 PARAM_CHECK( 6, 2 ) = 604 PARAM_CHECK( 5, 2 ) = 603 PARAM_CHECK( 4, 2 ) = 601 PARAM_CHECK( 3, 2 ) = 5 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 70 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL DTRSD2D( ICTXT, 'U', 'N', 1, 1, $ DU( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL DDTTRF( ODD_SIZE, DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 20 END IF * * IF( MYCOL.LT.NP-1 ) THEN * * Apply factorization to lower connection block BL_i * Apply factorization to upper connection block BU_i * * * Perform the triangular solve {U_i}^T{BL'}_i^T = {BL_i}^T * * DL( PART_OFFSET+ODD_SIZE+1 ) = ( DL( PART_OFFSET+ODD_SIZE+1 ) ) $ / ( D( PART_OFFSET+ODD_SIZE ) ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 ) - $ DL( PART_OFFSET+ODD_SIZE+1 )* $ DU( PART_OFFSET+ODD_SIZE ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 20 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * * Move entry that causes spike to auxiliary storage * AF( WORK_U+1 ) = ( DL( PART_OFFSET+1 ) ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL DDTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( WORK_U+1 ), ODD_SIZE, $ INFO ) * * * Calculate the "spike" fillin, ${U_i}^T {{GL}_i}^T = {DU_i}^T$ * CALL DTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * CALL DDTTRSV( 'U', 'T', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * AF( ODD_SIZE+3 ) = -ONE*DDOT( ODD_SIZE, AF( 1 ), 1, $ AF( WORK_U+1 ), 1 ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * AF( ODD_SIZE+1 ) = -ONE*( DL( PART_OFFSET+ODD_SIZE+1 )* $ AF( WORK_U+ODD_SIZE ) ) * * AF( WORK_U+( ODD_SIZE )+1 ) = -ONE* $ DU( PART_OFFSET+ODD_SIZE )*( AF( ODD_SIZE ) ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 60 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 50 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = DBLE( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + AF( ODD_SIZE+3 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 30 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 40 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 30 40 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ).EQ.ZERO ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( WORK_U+ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * AF( ODD_SIZE+3 ) = AF( WORK_U+ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify lower off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) / ( AF( ODD_SIZE+2 ) ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*( AF( ODD_SIZE+1 ) )* $ AF( WORK_U+( ODD_SIZE )+1 ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+2+1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST / 2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+3 ) / ( AF( ODD_SIZE+2 ) ) * * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*( AF( WORK_U+ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( WORK_U+ODD_SIZE+3 )*AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( WORK_U+ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 50 CONTINUE * * 60 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 70 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PDDTTRF * END scalapack-1.8.0/SRC/psstebz.f0000640000175000017500000014377410363532303015624 0ustar muammarmuammar SUBROUTINE PSSTEBZ( ICTXT, RANGE, ORDER, N, VL, VU, IL, IU, $ ABSTOL, D, E, M, NSPLIT, W, IBLOCK, ISPLIT, $ WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER ORDER, RANGE INTEGER ICTXT, IL, INFO, IU, LIWORK, LWORK, M, N, $ NSPLIT REAL ABSTOL, VL, VU * .. * .. Array Arguments .. INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) REAL D( * ), E( * ), W( * ), WORK( * ) * .. * * Purpose * ======= * * PSSTEBZ computes the eigenvalues of a symmetric tridiagonal matrix in * parallel. The user may ask for all eigenvalues, all eigenvalues in * the interval [VL, VU], or the eigenvalues indexed IL through IU. A * static partitioning of work is done at the beginning of PSSTEBZ which * results in all processes finding an (almost) equal number of * eigenvalues. * * NOTE : It is assumed that the user is on an IEEE machine. If the user * is not on an IEEE mchine, set the compile time flag NO_IEEE * to 1 (in SLmake.inc). The features of IEEE arithmetic that * are needed for the "fast" Sturm Count are : (a) infinity * arithmetic (b) the sign bit of a double precision floating * point number is assumed be in the 32nd or 64th bit position * (c) the sign of negative zero. * * See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal * Matrix", Report CS41, Computer Science Dept., Stanford * University, July 21, 1966. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle. * * RANGE (global input) CHARACTER * Specifies which eigenvalues are to be found. * = 'A': ("All") all eigenvalues will be found. * = 'V': ("Value") all eigenvalues in the interval * [VL, VU] will be found. * = 'I': ("Index") the IL-th through IU-th eigenvalues (of the * entire matrix) will be found. * * ORDER (global input) CHARACTER * Specifies the order in which the eigenvalues and their block * numbers are stored in W and IBLOCK. * = 'B': ("By Block") the eigenvalues will be grouped by * split-off block (see IBLOCK, ISPLIT) and * ordered from smallest to largest within * the block. * = 'E': ("Entire matrix") * the eigenvalues for the entire matrix * will be ordered from smallest to largest. * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Eigenvalues less than VL will not be * returned. Not referenced if RANGE='A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Eigenvalues greater than VU will not be * returned. VU must be greater than VL. Not referenced if * RANGE='A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL must be at least 1. * Not referenced if RANGE='A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. IU must be at least IL * and no greater than N. Not referenced if RANGE='A' or 'V'. * * ABSTOL (global input) REAL * The absolute tolerance for the eigenvalues. An eigenvalue * (or cluster) is considered to be located if it has been * determined to lie in an interval whose width is ABSTOL or * less. If ABSTOL is less than or equal to zero, then ULP*|T| * will be used, where |T| means the 1-norm of T. * Eigenvalues will be computed most accurately when ABSTOL is * set to the underflow threshold SLAMCH('U'), not zero. * Note : If eigenvectors are desired later by inverse iteration * ( PSSTEIN ), ABSTOL should be set to 2*PSLAMCH('S'). * * D (global input) REAL array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. To * avoid overflow, the matrix must be scaled so that its largest * entry is no greater than overflow**(1/2) * underflow**(1/4) * in absolute value, and for greatest accuracy, it should not * be much smaller than that. * * E (global input) REAL array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * To avoid overflow, the matrix must be scaled so that its * largest entry is no greater than overflow**(1/2) * * underflow**(1/4) in absolute value, and for greatest * accuracy, it should not be much smaller than that. * * M (global output) INTEGER * The actual number of eigenvalues found. 0 <= M <= N. * (See also the description of INFO=2) * * NSPLIT (global output) INTEGER * The number of diagonal blocks in the matrix T. * 1 <= NSPLIT <= N. * * W (global output) REAL array, dimension (N) * On exit, the first M elements of W contain the eigenvalues * on all processes. * * IBLOCK (global output) INTEGER array, dimension (N) * At each row/column j where E(j) is zero or small, the * matrix T is considered to split into a block diagonal * matrix. On exit IBLOCK(i) specifies which block (from 1 * to the number of blocks) the eigenvalue W(i) belongs to. * NOTE: in the (theoretically impossible) event that bisection * does not converge for some or all eigenvalues, INFO is set * to 1 and the ones for which it did not are identified by a * negative block number. * * ISPLIT (global output) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. * (Only the first NSPLIT elements will actually be used, but * since the user cannot know a priori what value NSPLIT will * have, N words must be reserved for ISPLIT.) * * WORK (local workspace) REAL array, dimension ( MAX( 5*N, 7 ) ) * * LWORK (local input) INTEGER * size of array WORK must be >= MAX( 5*N, 7 ) * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace) INTEGER array, dimension ( MAX( 4*N, 14 ) ) * * LIWORK (local input) INTEGER * size of array IWORK must be >= MAX( 4*N, 14, NPROCS ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0 : successful exit * < 0 : if INFO = -i, the i-th argument had an illegal value * > 0 : some or all of the eigenvalues failed to converge or * were not computed: * = 1 : Bisection failed to converge for some eigenvalues; * these eigenvalues are flagged by a negative block * number. The effect is that the eigenvalues may not * be as accurate as the absolute and relative * tolerances. This is generally caused by arithmetic * which is less accurate than PSLAMCH says. * = 2 : There is a mismatch between the number of * eigenvalues output and the number desired. * = 3 : RANGE='i', and the Gershgorin interval initially * used was incorrect. No eigenvalues were computed. * Probable cause: your machine has sloppy floating * point arithmetic. * Cure: Increase the PARAMETER "FUDGE", recompile, * and try again. * * Internal Parameters * =================== * * RELFAC REAL, default = 2.0 * The relative tolerance. An interval [a,b] lies within * "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), * where "ulp" is the machine precision (distance from 1 to * the next larger floating point number.) * * FUDGE REAL, default = 2.0 * A "fudge factor" to widen the Gershgorin intervals. Ideally, * a value of 1 should work, but on machines with sloppy * arithmetic, this needs to be larger. The default for * publicly released versions should be large enough to handle * the worst machine around. Note that this has no effect * on the accuracy of the solution. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. External Functions .. LOGICAL LSAME INTEGER BLACS_PNUM REAL PSLAMCH EXTERNAL LSAME, BLACS_PNUM, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_FREEBUFF, BLACS_GET, BLACS_GRIDEXIT, $ BLACS_GRIDINFO, BLACS_GRIDMAP, GLOBCHK, $ IGEBR2D, IGEBS2D, IGERV2D, IGESD2D, IGSUM2D, $ PSLAEBZ, PSLAIECT, PSLAPDCT, PSLASNBT, PXERBLA, $ SGEBR2D, SGEBS2D, SGERV2D, SGESD2D, SLASRT2 * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER BIGNUM, DESCMULT PARAMETER ( BIGNUM = 10000, DESCMULT = 100 ) REAL ZERO, ONE, TWO, FIVE, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, $ FIVE = 5.0E+0, HALF = 1.0E+0 / TWO ) REAL FUDGE, RELFAC PARAMETER ( FUDGE = 2.0E+0, RELFAC = 2.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER BLKNO, FOUND, I, IBEGIN, IEFLAG, IEND, IFRST, $ IINFO, ILAST, ILOAD, IM, IMYLOAD, IN, INDRIW1, $ INDRIW2, INDRW1, INDRW2, INXTLOAD, IOFF, $ IORDER, IOUT, IRANGE, IRECV, IREM, ITMP1, $ ITMP2, J, JB, K, LAST, LEXTRA, LREQ, MYCOL, $ MYROW, NALPHA, NBETA, NCMP, NEIGINT, NEXT, NGL, $ NGLOB, NGU, NINT, NPCOL, NPROW, OFFSET, $ ONEDCONTEXT, P, PREV, REXTRA, RREQ, SELF, $ TORECV REAL ALPHA, ATOLI, BETA, BNORM, DRECV, DSEND, GL, $ GU, INITVL, INITVU, LSAVE, MID, PIVMIN, RELTOL, $ SAFEMN, TMP1, TMP2, TNORM, ULP * .. * .. Local Arrays .. INTEGER IDUM( 5, 2 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Set up process grid * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * INFO = 0 M = 0 * * Decode RANGE * IF( LSAME( RANGE, 'A' ) ) THEN IRANGE = 1 ELSE IF( LSAME( RANGE, 'V' ) ) THEN IRANGE = 2 ELSE IF( LSAME( RANGE, 'I' ) ) THEN IRANGE = 3 ELSE IRANGE = 0 END IF * * Decode ORDER * IF( LSAME( ORDER, 'B' ) ) THEN IORDER = 2 ELSE IF( LSAME( ORDER, 'E' ) .OR. LSAME( ORDER, 'A' ) ) THEN IORDER = 1 ELSE IORDER = 0 END IF * * Check for Errors * IF( NPROW.EQ.-1 ) THEN INFO = -1 ELSE * * Get machine constants * SAFEMN = PSLAMCH( ICTXT, 'S' ) ULP = PSLAMCH( ICTXT, 'P' ) RELTOL = ULP*RELFAC IDUM( 1, 1 ) = ICHAR( RANGE ) IDUM( 1, 2 ) = 2 IDUM( 2, 1 ) = ICHAR( ORDER ) IDUM( 2, 2 ) = 3 IDUM( 3, 1 ) = N IDUM( 3, 2 ) = 4 NGLOB = 5 IF( IRANGE.EQ.3 ) THEN IDUM( 4, 1 ) = IL IDUM( 4, 2 ) = 7 IDUM( 5, 1 ) = IU IDUM( 5, 2 ) = 8 ELSE IDUM( 4, 1 ) = 0 IDUM( 4, 2 ) = 0 IDUM( 5, 1 ) = 0 IDUM( 5, 2 ) = 0 END IF IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( IRANGE.EQ.2 ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL SGEBS2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL SGEBR2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3, 0, 0 ) END IF LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IRANGE.EQ.0 ) THEN INFO = -2 ELSE IF( IORDER.EQ.0 ) THEN INFO = -3 ELSE IF( IRANGE.EQ.2 .AND. VL.GE.VU ) THEN INFO = -5 ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, $ N ) ) ) THEN INFO = -6 ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, $ IL ) .OR. IU.GT.N ) ) THEN INFO = -7 ELSE IF( LWORK.LT.MAX( 5*N, 7 ) .AND. .NOT.LQUERY ) THEN INFO = -18 ELSE IF( LIWORK.LT.MAX( 4*N, 14, NPROW*NPCOL ) .AND. .NOT. $ LQUERY ) THEN INFO = -20 ELSE IF( IRANGE.EQ.2 .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE* $ ULP*ABS( VL ) ) ) THEN INFO = -5 ELSE IF( IRANGE.EQ.2 .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE* $ ULP*ABS( VU ) ) ) THEN INFO = -6 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*ULP*ABS( ABSTOL ) ) $ THEN INFO = -9 END IF END IF IF( INFO.EQ.0 ) $ INFO = BIGNUM CALL GLOBCHK( ICTXT, NGLOB, IDUM, 5, IWORK, INFO ) IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF END IF WORK( 1 ) = REAL( MAX( 5*N, 7 ) ) IWORK( 1 ) = MAX( 4*N, 14, NPROW*NPCOL ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSTEBZ', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .AND. LIWORK.EQ.-1 ) THEN RETURN END IF * * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * K = 1 DO 20 I = 0, NPROW - 1 DO 10 J = 0, NPCOL - 1 IWORK( K ) = BLACS_PNUM( ICTXT, I, J ) K = K + 1 10 CONTINUE 20 CONTINUE * P = NPROW*NPCOL NPROW = 1 NPCOL = P * CALL BLACS_GET( ICTXT, 10, ONEDCONTEXT ) CALL BLACS_GRIDMAP( ONEDCONTEXT, IWORK, NPROW, NPROW, NPCOL ) CALL BLACS_GRIDINFO( ONEDCONTEXT, I, J, K, SELF ) * * Simplifications: * IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) $ IRANGE = 1 * NEXT = MOD( SELF+1, P ) PREV = MOD( P+SELF-1, P ) * * Compute squares of off-diagonals, splitting points and pivmin. * Interleave diagonals and off-diagonals. * INDRW1 = MAX( 2*N, 4 ) INDRW2 = INDRW1 + 2*N INDRIW1 = MAX( 2*N, 8 ) NSPLIT = 1 WORK( INDRW1+2*N ) = ZERO PIVMIN = ONE * DO 30 I = 1, N - 1 TMP1 = E( I )**2 J = 2*I WORK( INDRW1+J-1 ) = D( I ) IF( ABS( D( I+1 )*D( I ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN ISPLIT( NSPLIT ) = I NSPLIT = NSPLIT + 1 WORK( INDRW1+J ) = ZERO ELSE WORK( INDRW1+J ) = TMP1 PIVMIN = MAX( PIVMIN, TMP1 ) END IF 30 CONTINUE WORK( INDRW1+2*N-1 ) = D( N ) ISPLIT( NSPLIT ) = N PIVMIN = PIVMIN*SAFEMN * * Compute Gershgorin interval [gl,gu] for entire matrix * GU = D( 1 ) GL = D( 1 ) TMP1 = ZERO * DO 40 I = 1, N - 1 TMP2 = ABS( E( I ) ) GU = MAX( GU, D( I )+TMP1+TMP2 ) GL = MIN( GL, D( I )-TMP1-TMP2 ) TMP1 = TMP2 40 CONTINUE GU = MAX( GU, D( N )+TMP1 ) GL = MIN( GL, D( N )-TMP1 ) TNORM = MAX( ABS( GL ), ABS( GU ) ) GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN * IF( ABSTOL.LE.ZERO ) THEN ATOLI = ULP*TNORM ELSE ATOLI = ABSTOL END IF * * Find out if on an IEEE machine, the sign bit is the * 32nd bit (Big Endian) or the 64th bit (Little Endian) * IF( IRANGE.EQ.1 .OR. NSPLIT.EQ.1 ) THEN CALL PSLASNBT( IEFLAG ) ELSE IEFLAG = 0 END IF LEXTRA = 0 REXTRA = 0 * * Form Initial Interval containing desired eigenvalues * IF( IRANGE.EQ.1 ) THEN INITVL = GL INITVU = GU WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = 0 IWORK( 2 ) = N IFRST = 1 ILAST = N ELSE IF( IRANGE.EQ.2 ) THEN IF( VL.GT.GL ) THEN IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( VL, N, WORK( INDRW1+1 ), PIVMIN, IFRST ) ELSE CALL PSLAIECT( VL, N, WORK( INDRW1+1 ), IFRST ) END IF IFRST = IFRST + 1 INITVL = VL ELSE INITVL = GL IFRST = 1 END IF IF( VU.LT.GU ) THEN IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( VU, N, WORK( INDRW1+1 ), PIVMIN, ILAST ) ELSE CALL PSLAIECT( VU, N, WORK( INDRW1+1 ), ILAST ) END IF INITVU = VU ELSE INITVU = GU ILAST = N END IF WORK( 1 ) = INITVL WORK( 2 ) = INITVU IWORK( 1 ) = IFRST - 1 IWORK( 2 ) = ILAST ELSE IF( IRANGE.EQ.3 ) THEN WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = 0 IWORK( 2 ) = N IWORK( 5 ) = IL - 1 IWORK( 6 ) = IU CALL PSLAEBZ( 0, N, 2, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+1 ), IWORK( 5 ), WORK, IWORK, NINT, $ LSAVE, IEFLAG, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 3 GO TO 230 END IF IF( NINT.GT.1 ) THEN IF( IWORK( 5 ).EQ.IL-1 ) THEN WORK( 2 ) = WORK( 4 ) IWORK( 2 ) = IWORK( 4 ) ELSE WORK( 1 ) = WORK( 3 ) IWORK( 1 ) = IWORK( 3 ) END IF IF( IWORK( 1 ).LT.0 .OR. IWORK( 1 ).GT.IL-1 .OR. $ IWORK( 2 ).LE.MIN( IU-1, IWORK( 1 ) ) .OR. $ IWORK( 2 ).GT.N ) THEN INFO = 3 GO TO 230 END IF END IF LEXTRA = IL - 1 - IWORK( 1 ) REXTRA = IWORK( 2 ) - IU INITVL = WORK( 1 ) INITVU = WORK( 2 ) IFRST = IL ILAST = IU END IF * NVL = IFRST - 1 * NVU = ILAST GL = INITVL GU = INITVU NGL = IWORK( 1 ) NGU = IWORK( 2 ) IM = 0 FOUND = 0 INDRIW2 = INDRIW1 + NGU - NGL IEND = 0 IF( IFRST.GT.ILAST ) $ GO TO 100 IF( IFRST.EQ.1 .AND. ILAST.EQ.N ) $ IRANGE = 1 * * Find Eigenvalues -- Loop Over Blocks * DO 90 JB = 1, NSPLIT IOFF = IEND IBEGIN = IOFF + 1 IEND = ISPLIT( JB ) IN = IEND - IOFF IF( JB.NE.1 ) THEN IF( IRANGE.NE.1 ) THEN FOUND = IM * * Find total number of eigenvalues found thus far * CALL IGSUM2D( ONEDCONTEXT, 'All', ' ', 1, 1, FOUND, 1, $ -1, -1 ) ELSE FOUND = IOFF END IF END IF * IF( SELF.GE.P ) * $ GO TO 30 IF( IN.NE.N ) THEN * * Compute Gershgorin interval [gl,gu] for split matrix * GU = D( IBEGIN ) GL = D( IBEGIN ) TMP1 = ZERO * DO 50 J = IBEGIN, IEND - 1 TMP2 = ABS( E( J ) ) GU = MAX( GU, D( J )+TMP1+TMP2 ) GL = MIN( GL, D( J )-TMP1-TMP2 ) TMP1 = TMP2 50 CONTINUE * GU = MAX( GU, D( IEND )+TMP1 ) GL = MIN( GL, D( IEND )-TMP1 ) BNORM = MAX( ABS( GL ), ABS( GU ) ) GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN * * Compute ATOLI for the current submatrix * IF( ABSTOL.LE.ZERO ) THEN ATOLI = ULP*BNORM ELSE ATOLI = ABSTOL END IF * IF( GL.LT.INITVL ) THEN GL = INITVL IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( GL, IN, WORK( INDRW1+2*IOFF+1 ), $ PIVMIN, NGL ) ELSE CALL PSLAIECT( GL, IN, WORK( INDRW1+2*IOFF+1 ), NGL ) END IF ELSE NGL = 0 END IF IF( GU.GT.INITVU ) THEN GU = INITVU IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( GU, IN, WORK( INDRW1+2*IOFF+1 ), $ PIVMIN, NGU ) ELSE CALL PSLAIECT( GU, IN, WORK( INDRW1+2*IOFF+1 ), NGU ) END IF ELSE NGU = IN END IF IF( NGL.GE.NGU ) $ GO TO 90 WORK( 1 ) = GL WORK( 2 ) = GU IWORK( 1 ) = NGL IWORK( 2 ) = NGU END IF OFFSET = FOUND - NGL BLKNO = JB * * Do a static partitioning of work so that each process * has to find an (almost) equal number of eigenvalues * NCMP = NGU - NGL ILOAD = NCMP / P IREM = NCMP - ILOAD*P ITMP1 = MOD( SELF-FOUND, P ) IF( ITMP1.LT.0 ) $ ITMP1 = ITMP1 + P IF( ITMP1.LT.IREM ) THEN IMYLOAD = ILOAD + 1 ELSE IMYLOAD = ILOAD END IF IF( IMYLOAD.EQ.0 ) THEN GO TO 90 ELSE IF( IN.EQ.1 ) THEN WORK( INDRW2+IM+1 ) = WORK( INDRW1+2*IOFF+1 ) IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = OFFSET + 1 IM = IM + 1 GO TO 90 ELSE INXTLOAD = ILOAD ITMP2 = MOD( SELF+1-FOUND, P ) IF( ITMP2.LT.0 ) $ ITMP2 = ITMP2 + P IF( ITMP2.LT.IREM ) $ INXTLOAD = INXTLOAD + 1 LREQ = NGL + ITMP1*ILOAD + MIN( IREM, ITMP1 ) RREQ = LREQ + IMYLOAD IWORK( 5 ) = LREQ IWORK( 6 ) = RREQ TMP1 = WORK( 1 ) ITMP1 = IWORK( 1 ) CALL PSLAEBZ( 1, IN, 1, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+2*IOFF+1 ), IWORK( 5 ), WORK, $ IWORK, NINT, LSAVE, IEFLAG, IINFO ) ALPHA = WORK( 1 ) BETA = WORK( 2 ) NALPHA = IWORK( 1 ) NBETA = IWORK( 2 ) DSEND = BETA IF( NBETA.GT.RREQ+INXTLOAD ) THEN NBETA = RREQ DSEND = ALPHA END IF LAST = MOD( FOUND+MIN( NGU-NGL, P )-1, P ) IF( LAST.LT.0 ) $ LAST = LAST + P IF( SELF.NE.LAST ) THEN CALL SGESD2D( ONEDCONTEXT, 1, 1, DSEND, 1, 0, NEXT ) CALL IGESD2D( ONEDCONTEXT, 1, 1, NBETA, 1, 0, NEXT ) END IF IF( SELF.NE.MOD( FOUND, P ) ) THEN CALL SGERV2D( ONEDCONTEXT, 1, 1, DRECV, 1, 0, PREV ) CALL IGERV2D( ONEDCONTEXT, 1, 1, IRECV, 1, 0, PREV ) ELSE DRECV = TMP1 IRECV = ITMP1 END IF WORK( 1 ) = MAX( LSAVE, DRECV ) IWORK( 1 ) = IRECV ALPHA = MAX( ALPHA, WORK( 1 ) ) NALPHA = MAX( NALPHA, IRECV ) IF( BETA-ALPHA.LE.MAX( ATOLI, RELTOL*MAX( ABS( ALPHA ), $ ABS( BETA ) ) ) ) THEN MID = HALF*( ALPHA+BETA ) DO 60 J = OFFSET + NALPHA + 1, OFFSET + NBETA WORK( INDRW2+IM+1 ) = MID IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = J IM = IM + 1 60 CONTINUE WORK( 2 ) = ALPHA IWORK( 2 ) = NALPHA END IF END IF NEIGINT = IWORK( 2 ) - IWORK( 1 ) IF( NEIGINT.LE.0 ) $ GO TO 90 * * Call the main computational routine * CALL PSLAEBZ( 2, IN, NEIGINT, 1, ATOLI, RELTOL, PIVMIN, $ WORK( INDRW1+2*IOFF+1 ), IWORK, WORK, IWORK, $ IOUT, LSAVE, IEFLAG, IINFO ) IF( IINFO.NE.0 ) THEN INFO = 1 END IF DO 80 I = 1, IOUT MID = HALF*( WORK( 2*I-1 )+WORK( 2*I ) ) IF( I.GT.IOUT-IINFO ) $ BLKNO = -BLKNO DO 70 J = OFFSET + IWORK( 2*I-1 ) + 1, $ OFFSET + IWORK( 2*I ) WORK( INDRW2+IM+1 ) = MID IWORK( INDRIW1+IM+1 ) = BLKNO IWORK( INDRIW2+IM+1 ) = J IM = IM + 1 70 CONTINUE 80 CONTINUE 90 CONTINUE * * Find out total number of eigenvalues computed * 100 CONTINUE M = IM CALL IGSUM2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, M, 1, -1, -1 ) * * Move the eigenvalues found to their final destinations * DO 130 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, IM, 1 ) IF( IM.NE.0 ) THEN CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ IWORK( INDRIW2+1 ), IM ) CALL SGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ WORK( INDRW2+1 ), IM ) CALL IGEBS2D( ONEDCONTEXT, 'ALL', ' ', IM, 1, $ IWORK( INDRIW1+1 ), IM ) DO 110 J = 1, IM W( IWORK( INDRIW2+J ) ) = WORK( INDRW2+J ) IBLOCK( IWORK( INDRIW2+J ) ) = IWORK( INDRIW1+J ) 110 CONTINUE END IF ELSE CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', 1, 1, TORECV, 1, 0, $ I-1 ) IF( TORECV.NE.0 ) THEN CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, IWORK, $ TORECV, 0, I-1 ) CALL SGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, WORK, $ TORECV, 0, I-1 ) CALL IGEBR2D( ONEDCONTEXT, 'ALL', ' ', TORECV, 1, $ IWORK( N+1 ), TORECV, 0, I-1 ) DO 120 J = 1, TORECV W( IWORK( J ) ) = WORK( J ) IBLOCK( IWORK( J ) ) = IWORK( N+J ) 120 CONTINUE END IF END IF 130 CONTINUE IF( NSPLIT.GT.1 .AND. IORDER.EQ.1 ) THEN * * Sort the eigenvalues * * DO 140 I = 1, M IWORK( M+I ) = I 140 CONTINUE CALL SLASRT2( 'I', M, W, IWORK( M+1 ), IINFO ) DO 150 I = 1, M IWORK( I ) = IBLOCK( I ) 150 CONTINUE DO 160 I = 1, M IBLOCK( I ) = IWORK( IWORK( M+I ) ) 160 CONTINUE END IF IF( IRANGE.EQ.3 .AND. ( LEXTRA.GT.0 .OR. REXTRA.GT.0 ) ) THEN * * Discard unwanted eigenvalues (occurs only when RANGE = 'I', * and eigenvalues IL, and/or IU are in a cluster) * DO 170 I = 1, M WORK( I ) = W( I ) IWORK( I ) = I IWORK( M+I ) = I 170 CONTINUE DO 190 I = 1, LEXTRA ITMP1 = I DO 180 J = I + 1, M IF( WORK( J ).LT.WORK( ITMP1 ) ) THEN ITMP1 = J END IF 180 CONTINUE TMP1 = WORK( I ) WORK( I ) = WORK( ITMP1 ) WORK( ITMP1 ) = TMP1 IWORK( IWORK( M+ITMP1 ) ) = I IWORK( IWORK( M+I ) ) = ITMP1 ITMP2 = IWORK( M+I ) IWORK( M+I ) = IWORK( M+ITMP1 ) IWORK( M+ITMP1 ) = ITMP2 190 CONTINUE DO 210 I = 1, REXTRA ITMP1 = M - I + 1 DO 200 J = M - I, LEXTRA + 1, -1 IF( WORK( J ).GT.WORK( ITMP1 ) ) THEN ITMP1 = J END IF 200 CONTINUE TMP1 = WORK( M-I+1 ) WORK( M-I+1 ) = WORK( ITMP1 ) WORK( ITMP1 ) = TMP1 IWORK( IWORK( M+ITMP1 ) ) = M - I + 1 IWORK( IWORK( 2*M-I+1 ) ) = ITMP1 ITMP2 = IWORK( 2*M-I+1 ) IWORK( 2*M-I+1 ) = IWORK( M+ITMP1 ) IWORK( M+ITMP1 ) = ITMP2 * IWORK( ITMP1 ) = 1 210 CONTINUE J = 0 DO 220 I = 1, M IF( IWORK( I ).GT.LEXTRA .AND. IWORK( I ).LE.M-REXTRA ) THEN J = J + 1 W( J ) = WORK( IWORK( I ) ) IBLOCK( J ) = IBLOCK( I ) END IF 220 CONTINUE M = M - LEXTRA - REXTRA END IF IF( M.NE.ILAST-IFRST+1 ) THEN INFO = 2 END IF * 230 CONTINUE CALL BLACS_FREEBUFF( ONEDCONTEXT, 1 ) CALL BLACS_GRIDEXIT( ONEDCONTEXT ) RETURN * * End of PSSTEBZ * END * SUBROUTINE PSLAEBZ( IJOB, N, MMAX, MINP, ABSTOL, RELTOL, PIVMIN, $ D, NVAL, INTVL, INTVLCT, MOUT, LSAVE, IEFLAG, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER IEFLAG, IJOB, INFO, MINP, MMAX, MOUT, N REAL ABSTOL, LSAVE, PIVMIN, RELTOL * .. * .. Array Arguments .. INTEGER INTVLCT( * ), NVAL( * ) REAL D( * ), INTVL( * ) * .. * * Purpose * ======= * * PSLAEBZ contains the iteration loop which computes the eigenvalues * contained in the input intervals [ INTVL(2*j-1), INTVL(2*j) ] where * j = 1,...,MINP. It uses and computes the function N(w), which is * the count of eigenvalues of a symmetric tridiagonal matrix less than * or equal to its argument w. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * IJOB (input) INTEGER * Specifies the computation done by PSLAEBZ * = 0 : Find an interval with desired values of N(w) at the * endpoints of the interval. * = 1 : Find a floating point number contained in the initial * interval with a desired value of N(w). * = 2 : Perform bisection iteration to find eigenvalues of T. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * MMAX (input) INTEGER * The maximum number of intervals that may be generated. If * more than MMAX intervals are generated, then PSLAEBZ will * quit with INFO = MMAX+1. * * MINP (input) INTEGER * The initial number of intervals. MINP <= MMAX. * * ABSTOL (input) REAL * The minimum (absolute) width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * This must be at least zero. * * RELTOL (input) REAL * The minimum relative width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This should be at least radix*machine epsilon. * * PIVMIN (input) REAL * The minimum absolute of a "pivot" in the "paranoid" * implementation of the Sturm sequence loop. This must be at * least max_j |e(j)^2| *safe_min, and at least safe_min, where * safe_min is at least the smallest number that can divide 1.0 * without overflow. * See PSLAPDCT for the "paranoid" implementation of the Sturm * sequence loop. * * D (input) REAL array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * NVAL (input/output) INTEGER array, dimension (4) * If IJOB = 0, the desired values of N(w) are in NVAL(1) and * NVAL(2). * If IJOB = 1, NVAL(2) is the desired value of N(w). * If IJOB = 2, not referenced. * This array will, in general, be reordered on output. * * INTVL (input/output) REAL array, dimension (2*MMAX) * The endpoints of the intervals. INTVL(2*j-1) is the left * endpoint of the j-th interval, and INTVL(2*j) is the right * endpoint of the j-th interval. The input intervals will, * in general, be modified, split and reordered by the * calculation. * On input, INTVL contains the MINP input intervals. * On output, INTVL contains the converged intervals. * * INTVLCT (input/output) INTEGER array, dimension (2*MMAX) * The counts at the endpoints of the intervals. INTVLCT(2*j-1) * is the count at the left endpoint of the j-th interval, i.e., * the function value N(INTVL(2*j-1)), and INTVLCT(2*j) is the * count at the right endpoint of the j-th interval. * On input, INTVLCT contains the counts at the endpoints of * the MINP input intervals. * On output, INTVLCT contains the counts at the endpoints of * the converged intervals. * * MOUT (output) INTEGER * The number of intervals output. * * LSAVE (output) REAL * If IJOB = 0 or 2, not referenced. * If IJOB = 1, this is the largest floating point number * encountered which has count N(w) = NVAL(1). * * IEFLAG (input) INTEGER * A flag which indicates whether N(w) should be speeded up by * exploiting IEEE Arithmetic. * * INFO (output) INTEGER * = 0 : All intervals converged. * = 1 - MMAX : The last INFO intervals did not converge. * = MMAX + 1 : More than MMAX intervals were generated. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, INT, LOG, MAX, MIN * .. * .. External Subroutines .. EXTERNAL PSLAECV, PSLAIECT, PSLAPDCT * .. * .. Parameters .. REAL ZERO, TWO, HALF PARAMETER ( ZERO = 0.0E+0, TWO = 2.0E+0, $ HALF = 1.0E+0 / TWO ) * .. * .. Local Scalars .. INTEGER I, ITMAX, J, K, KF, KL, KLNEW, L, LCNT, LREQ, $ NALPHA, NBETA, NMID, RCNT, RREQ REAL ALPHA, BETA, MID * .. * .. Executable Statements .. * KF = 1 KL = MINP + 1 INFO = 0 IF( INTVL( 2 )-INTVL( 1 ).LE.ZERO ) THEN INFO = MINP MOUT = KF RETURN END IF IF( IJOB.EQ.0 ) THEN * * Check if some input intervals have "converged" * CALL PSLAECV( 0, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 * * Compute upper bound on number of iterations needed * ITMAX = INT( ( LOG( INTVL( 2 )-INTVL( 1 )+PIVMIN )- $ LOG( PIVMIN ) ) / LOG( TWO ) ) + 2 * * Iteration Loop * DO 20 I = 1, ITMAX KLNEW = KL DO 10 J = KF, KL - 1 K = 2*J * * Bisect the interval and find the count at that point * MID = HALF*( INTVL( K-1 )+INTVL( K ) ) IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE CALL PSLAIECT( MID, N, D, NMID ) END IF LREQ = NVAL( K-1 ) RREQ = NVAL( K ) IF( KL.EQ.1 ) $ NMID = MIN( INTVLCT( K ), $ MAX( INTVLCT( K-1 ), NMID ) ) IF( NMID.LE.NVAL( K-1 ) ) THEN INTVL( K-1 ) = MID INTVLCT( K-1 ) = NMID END IF IF( NMID.GE.NVAL( K ) ) THEN INTVL( K ) = MID INTVLCT( K ) = NMID END IF IF( NMID.GT.LREQ .AND. NMID.LT.RREQ ) THEN L = 2*KLNEW INTVL( L-1 ) = MID INTVL( L ) = INTVL( K ) INTVLCT( L-1 ) = NVAL( K ) INTVLCT( L ) = INTVLCT( K ) INTVL( K ) = MID INTVLCT( K ) = NVAL( K-1 ) NVAL( L-1 ) = NVAL( K ) NVAL( L ) = NVAL( L-1 ) NVAL( K ) = NVAL( K-1 ) KLNEW = KLNEW + 1 END IF 10 CONTINUE KL = KLNEW CALL PSLAECV( 0, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 20 CONTINUE ELSE IF( IJOB.EQ.1 ) THEN ALPHA = INTVL( 1 ) BETA = INTVL( 2 ) NALPHA = INTVLCT( 1 ) NBETA = INTVLCT( 2 ) LSAVE = ALPHA LREQ = NVAL( 1 ) RREQ = NVAL( 2 ) 30 CONTINUE IF( NBETA.NE.RREQ .AND. BETA-ALPHA.GT. $ MAX( ABSTOL, RELTOL*MAX( ABS( ALPHA ), ABS( BETA ) ) ) ) $ THEN * * Bisect the interval and find the count at that point * MID = HALF*( ALPHA+BETA ) IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE CALL PSLAIECT( MID, N, D, NMID ) END IF NMID = MIN( NBETA, MAX( NALPHA, NMID ) ) IF( NMID.GE.RREQ ) THEN BETA = MID NBETA = NMID ELSE ALPHA = MID NALPHA = NMID IF( NMID.EQ.LREQ ) $ LSAVE = ALPHA END IF GO TO 30 END IF KL = KF INTVL( 1 ) = ALPHA INTVL( 2 ) = BETA INTVLCT( 1 ) = NALPHA INTVLCT( 2 ) = NBETA ELSE IF( IJOB.EQ.2 ) THEN * * Check if some input intervals have "converged" * CALL PSLAECV( 1, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 * * Compute upper bound on number of iterations needed * ITMAX = INT( ( LOG( INTVL( 2 )-INTVL( 1 )+PIVMIN )- $ LOG( PIVMIN ) ) / LOG( TWO ) ) + 2 * * Iteration Loop * DO 50 I = 1, ITMAX KLNEW = KL DO 40 J = KF, KL - 1 K = 2*J MID = HALF*( INTVL( K-1 )+INTVL( K ) ) IF( IEFLAG.EQ.0 ) THEN CALL PSLAPDCT( MID, N, D, PIVMIN, NMID ) ELSE CALL PSLAIECT( MID, N, D, NMID ) END IF LCNT = INTVLCT( K-1 ) RCNT = INTVLCT( K ) NMID = MIN( RCNT, MAX( LCNT, NMID ) ) * * Form New Interval(s) * IF( NMID.EQ.LCNT ) THEN INTVL( K-1 ) = MID ELSE IF( NMID.EQ.RCNT ) THEN INTVL( K ) = MID ELSE IF( KLNEW.LT.MMAX+1 ) THEN L = 2*KLNEW INTVL( L-1 ) = MID INTVL( L ) = INTVL( K ) INTVLCT( L-1 ) = NMID INTVLCT( L ) = INTVLCT( K ) INTVL( K ) = MID INTVLCT( K ) = NMID KLNEW = KLNEW + 1 ELSE INFO = MMAX + 1 RETURN END IF 40 CONTINUE KL = KLNEW CALL PSLAECV( 1, KF, KL, INTVL, INTVLCT, NVAL, $ MAX( ABSTOL, PIVMIN ), RELTOL ) IF( KF.GE.KL ) $ GO TO 60 50 CONTINUE END IF 60 CONTINUE INFO = MAX( KL-KF, 0 ) MOUT = KL - 1 RETURN * * End of PSLAEBZ * END * * SUBROUTINE PSLAECV( IJOB, KF, KL, INTVL, INTVLCT, NVAL, ABSTOL, $ RELTOL ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER IJOB, KF, KL REAL ABSTOL, RELTOL * .. * .. Array Arguments .. INTEGER INTVLCT( * ), NVAL( * ) REAL INTVL( * ) * .. * * Purpose * ======= * * PSLAECV checks if the input intervals [ INTVL(2*i-1), INTVL(2*i) ], * i = KF, ... , KL-1, have "converged". * PSLAECV modifies KF to be the index of the last converged interval, * i.e., on output, all intervals [ INTVL(2*i-1), INTVL(2*i) ], i < KF, * have converged. Note that the input intervals may be reordered by * PSLAECV. * * This is a SCALAPACK internal procedure and arguments are not checked * for unreasonable values. * * Arguments * ========= * * IJOB (input) INTEGER * Specifies the criterion for "convergence" of an interval. * = 0 : When an interval is narrower than ABSTOL, or than * RELTOL times the larger (in magnitude) endpoint, then * it is considered to have "converged". * = 1 : When an interval is narrower than ABSTOL, or than * RELTOL times the larger (in magnitude) endpoint, or if * the counts at the endpoints are identical to the counts * specified by NVAL ( see NVAL ) then the interval is * considered to have "converged". * * KF (input/output) INTEGER * On input, the index of the first input interval is 2*KF-1. * On output, the index of the last converged interval * is 2*KF-3. * * KL (input) INTEGER * The index of the last input interval is 2*KL-3. * * INTVL (input/output) REAL array, dimension (2*(KL-KF)) * The endpoints of the intervals. INTVL(2*j-1) is the left * oendpoint f the j-th interval, and INTVL(2*j) is the right * endpoint of the j-th interval. The input intervals will, * in general, be reordered on output. * On input, INTVL contains the KL-KF input intervals. * On output, INTVL contains the converged intervals, 1 thru' * KF-1, and the unconverged intervals, KF thru' KL-1. * * INTVLCT (input/output) INTEGER array, dimension (2*(KL-KF)) * The counts at the endpoints of the intervals. INTVLCT(2*j-1) * is the count at the left endpoint of the j-th interval, i.e., * the function value N(INTVL(2*j-1)), and INTVLCT(2*j) is the * count at the right endpoint of the j-th interval. This array * will, in general, be reordered on output. * See the comments in PSLAEBZ for more on the function N(w). * * NVAL (input/output) INTEGER array, dimension (2*(KL-KF)) * The desired counts, N(w), at the endpoints of the * corresponding intervals. This array will, in general, * be reordered on output. * * ABSTOL (input) REAL * The minimum (absolute) width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This must be at least zero. * * RELTOL (input) REAL * The minimum relative width of an interval. When an interval * is narrower than ABSTOL, or than RELTOL times the larger (in * magnitude) endpoint, then it is considered to be sufficiently * small, i.e., converged. * Note : This should be at least radix*machine epsilon. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. * .. Local Scalars .. LOGICAL CONDN INTEGER I, ITMP1, ITMP2, J, K, KFNEW REAL TMP1, TMP2, TMP3, TMP4 * .. * .. Executable Statements .. * KFNEW = KF DO 10 I = KF, KL - 1 K = 2*I TMP3 = INTVL( K-1 ) TMP4 = INTVL( K ) TMP1 = ABS( TMP4-TMP3 ) TMP2 = MAX( ABS( TMP3 ), ABS( TMP4 ) ) CONDN = TMP1.LT.MAX( ABSTOL, RELTOL*TMP2 ) IF( IJOB.EQ.0 ) $ CONDN = CONDN .OR. ( ( INTVLCT( K-1 ).EQ.NVAL( K-1 ) ) .AND. $ INTVLCT( K ).EQ.NVAL( K ) ) IF( CONDN ) THEN IF( I.GT.KFNEW ) THEN * * Reorder Intervals * J = 2*KFNEW TMP1 = INTVL( K-1 ) TMP2 = INTVL( K ) ITMP1 = INTVLCT( K-1 ) ITMP2 = INTVLCT( K ) INTVL( K-1 ) = INTVL( J-1 ) INTVL( K ) = INTVL( J ) INTVLCT( K-1 ) = INTVLCT( J-1 ) INTVLCT( K ) = INTVLCT( J ) INTVL( J-1 ) = TMP1 INTVL( J ) = TMP2 INTVLCT( J-1 ) = ITMP1 INTVLCT( J ) = ITMP2 IF( IJOB.EQ.0 ) THEN ITMP1 = NVAL( K-1 ) NVAL( K-1 ) = NVAL( J-1 ) NVAL( J-1 ) = ITMP1 ITMP1 = NVAL( K ) NVAL( K ) = NVAL( J ) NVAL( J ) = ITMP1 END IF END IF KFNEW = KFNEW + 1 END IF 10 CONTINUE KF = KFNEW RETURN * * End of PSLAECV * END * SUBROUTINE PSLAPDCT( SIGMA, N, D, PIVMIN, COUNT ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * * .. Scalar Arguments .. INTEGER COUNT, N REAL PIVMIN, SIGMA * .. * .. Array Arguments .. REAL D( * ) * .. * * Purpose * ======= * * PSLAPDCT counts the number of negative eigenvalues of (T - SIGMA I). * This implementation of the Sturm Sequence loop has conditionals in * the innermost loop to avoid overflow and determine the sign of a * floating point number. PSLAPDCT will be referred to as the "paranoid" * implementation of the Sturm Sequence loop. * * This is a SCALAPACK internal procedure and arguments are not checked * for unreasonable values. * * Arguments * ========= * * SIGMA (input) REAL * The shift. PSLAPDCT finds the number of eigenvalues of T less * than or equal to SIGMA. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * D (input) REAL array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * PIVMIN (input) REAL * The minimum absolute of a "pivot" in this "paranoid" * implementation of the Sturm sequence loop. This must be at * least max_j |e(j)^2| *safe_min, and at least safe_min, where * safe_min is at least the smallest number that can divide 1.0 * without overflow. * * COUNT (output) INTEGER * The count of the number of eigenvalues of T less than or * equal to SIGMA. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I REAL TMP * .. * .. Executable Statements .. * TMP = D( 1 ) - SIGMA IF( ABS( TMP ).LE.PIVMIN ) $ TMP = -PIVMIN COUNT = 0 IF( TMP.LE.ZERO ) $ COUNT = 1 DO 10 I = 3, 2*N - 1, 2 TMP = D( I ) - D( I-1 ) / TMP - SIGMA IF( ABS( TMP ).LE.PIVMIN ) $ TMP = -PIVMIN IF( TMP.LE.ZERO ) $ COUNT = COUNT + 1 10 CONTINUE * RETURN * * End of PSLAPDCT * END scalapack-1.8.0/SRC/pcpoequ.f0000640000175000017500000003125010363532303015567 0ustar muammarmuammar SUBROUTINE PCPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, N REAL AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL SC( * ), SR( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCPOEQU computes row and column scalings intended to * equilibrate a distributed Hermitian positive definite matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) and reduce its condition number * (with respect to the two-norm). SR and SC contain the scale * factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled distri- * buted matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on * the diagonal. This choice of SR and SC puts the condition number * of B within a factor N of the smallest possible condition number * over all possible diagonal scalings. * * The scaling factor are stored along process rows in SR and along * process columns in SC. The duplication of information simplifies * greatly the application of the factors. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory to an * array of local dimension ( LLD_A, LOCc(JA+N-1) ), the * N-by-N Hermitian positive definite distributed matrix * sub( A ) whose scaling factors are to be computed. Only the * diagonal elements of sub( A ) are referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local output) REAL array, dimension LOCr(M_A) * If INFO = 0, SR(IA:IA+N-1) contains the row scale factors * for sub( A ). SR is aligned with the distributed matrix A, * and replicated across every process column. SR is tied to the * distributed matrix A. * * SC (local output) REAL array, dimension LOCc(N_A) * If INFO = 0, SC(JA:JA+N-1) contains the column scale factors * for A(IA:IA+M-1,JA:JA+N-1). SC is aligned with the distribu- * ted matrix A, and replicated down every process row. SC is * tied to the distributed matrix A. * * SCOND (global output) REAL * If INFO = 0, SCOND contains the ratio of the smallest SR(i) * (or SC(j)) to the largest SR(i) (or SC(j)), with * IA <= i <= IA+N-1 and JA <= j <= JA+N-1. If SCOND >= 0.1 * and AMAX is neither too large nor too small, it is not worth * scaling by SR (or SC). * * AMAX (global output) REAL * Absolute value of largest matrix element. If AMAX is very * close to overflow or very close to underflow, the matrix * should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the K-th diagonal entry of sub( A ) is * nonpositive. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER ALLCTOP, COLCTOP, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ IDUMM, II, IIA, IOFFA, IOFFD, IROFF, J, JB, JJ, $ JJA, JN, LDA, LL, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ REAL AII, SMIN * .. * .. Local Arrays .. INTEGER DESCSC( DLEN_ ), DESCSR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, IGAMN2D, $ INFOG2L, PCHK1MAT, PB_TOPGET, PXERBLA, $ SGAMN2D, SGAMX2D, SGSUM2D * .. * .. External Functions .. INTEGER ICEIL, NUMROC REAL PSLAMCH EXTERNAL ICEIL, NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', ALLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Compute some local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) * * Assign descriptors for SR and SC arrays * CALL DESCSET( DESCSR, N, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCSC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Initialize the scaling factors to zero. * DO 10 II = IIA, IIA+NP-1 SR( II ) = ZERO 10 CONTINUE * DO 20 JJ = JJA, JJA+NQ-1 SC( JJ ) = ZERO 20 CONTINUE * * Find the minimum and maximum diagonal elements. * Handle first block separately. * II = IIA JJ = JJA JB = JN-JA+1 SMIN = ONE / PSLAMCH( ICTXT, 'S' ) AMAX = ZERO * IOFFA = II+(JJ-1)*LDA IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFD = IOFFA DO 30 LL = 0, JB-1 AII = REAL( A( IOFFD ) ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = LL + 1 IOFFD = IOFFD + LDA + 1 30 CONTINUE END IF * IF( MYROW.EQ.IAROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFD = IOFFA DO 40 LL = 0, JB-1 AII = REAL( A( IOFFD ) ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = J + LL - JA + 1 IOFFD = IOFFD + LDA + 1 40 CONTINUE END IF * IF( MYROW.EQ.ICURROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * * Compute scaling factors * CALL SGSUM2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, SC( JJA ), $ 1, -1, MYCOL ) CALL SGSUM2D( ICTXT, 'Rowwise', ROWCTOP, NP, 1, SR( IIA ), $ MAX( 1, NP ), -1, MYCOL ) * CALL SGAMX2D( ICTXT, 'All', ALLCTOP, 1, 1, AMAX, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) CALL SGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, SMIN, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) * IF( SMIN.LE.ZERO ) THEN * * Find the first non-positive diagonal element and return. * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) RETURN * ELSE * * Set the scale factors to the reciprocals * of the diagonal elements. * DO 60 II = IIA, IIA+NP-1 SR( II ) = ONE / SQRT( SR( II ) ) 60 CONTINUE * DO 70 JJ = JJA, JJA+NQ-1 SC( JJ ) = ONE / SQRT( SC( JJ ) ) 70 CONTINUE * * Compute SCOND = min(S(I)) / max(S(I)) * SCOND = SQRT( SMIN ) / SQRT( AMAX ) * END IF * RETURN * * End of PCPOEQU * END scalapack-1.8.0/SRC/psstedc.f0000640000175000017500000002274110363532303015565 0ustar muammarmuammar SUBROUTINE PSSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, IQ, JQ, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) REAL D( * ), E( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * PSSTEDC computes all eigenvalues and eigenvectors of a * symmetric tridiagonal matrix in parallel, using the divide and * conquer algorithm. * * This code makes very mild assumptions about floating point * arithmetic. It will work on machines with a guard digit in * add/subtract, or on those binary machines without guard digits * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. * It could conceivably fail on hexadecimal or decimal machines * without guard digits, but we know of none. See SLAED3 for details. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. (NOT IMPLEMENTED YET) * = 'I': Compute eigenvectors of tridiagonal matrix also. * = 'V': Compute eigenvectors of original dense symmetric * matrix also. On entry, Z contains the orthogonal * matrix used to reduce the original matrix to * tridiagonal form. (NOT IMPLEMENTED YET) * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input/output) REAL array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in descending order. * * E (global input/output) REAL array, dimension (N-1) * On entry, the subdiagonal elements of the tridiagonal matrix. * On exit, E has been destroyed. * * Q (local output) REAL array, * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * On output, Q is distributed across the P processes in block * cyclic format. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * * WORK (local workspace/output) REAL array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed. * * LWORK (local input/output) INTEGER, * the dimension of the array WORK. * LWORK = 6*N + 2*NP*NQ * NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) * NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * IWORK (local workspace/output) INTEGER array, dimension (LIWORK) * On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. * * LIWORK (input) INTEGER * The dimension of the array IWORK. * LIWORK = 2 + 7*N + 8*NPCOL * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the INFO/(N+1) th * eigenvalue while working on the submatrix lying in * global rows and columns mod(INFO,N+1). * * Further Details * ======= ======= * * Contributed by Francoise Tisseur, University of Manchester. * * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and * Conquer Algorithm for the Symmetric Eigenvalue Problem * on Distributed Memory Architectures", * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236. * (see also LAPACK Working Note 132) * http://www.netlib.org/lapack/lawns/lawn132.ps * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER ICOFFQ, IIQ, IPQ, IQCOL, IQROW, IROFFQ, JJQ, $ LDQ, LIWMIN, LWMIN, MYCOL, MYROW, NB, NP, $ NPCOL, NPROW, NQ REAL ORGNRM * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL SLANST EXTERNAL INDXG2P, LSAME, NUMROC, SLANST * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG2L, PSLAED0, $ PSLASRT, PXERBLA, SLASCL, SSTEDC * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) LDQ = DESCQ( LLD_ ) NB = DESCQ( NB_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) NQ = NUMROC( N, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE CALL CHK1MAT( N, 2, N, 2, IQ, JQ, DESCQ, 8, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCQ( NB_ ) IROFFQ = MOD( IQ-1, DESCQ( MB_ ) ) ICOFFQ = MOD( JQ-1, DESCQ( NB_ ) ) IQROW = INDXG2P( IQ, NB, MYROW, DESCQ( RSRC_ ), NPROW ) IQCOL = INDXG2P( JQ, NB, MYCOL, DESCQ( CSRC_ ), NPCOL ) LWMIN = 6*N + 2*NP*NQ LIWMIN = 2 + 7*N + 8*NPCOL WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( .NOT.LSAME( COMPZ, 'I' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( IROFFQ.NE.ICOFFQ .OR. ICOFFQ.NE.0 ) THEN INFO = -5 ELSE IF( DESCQ( MB_ ).NE.DESCQ( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PSSTEDC', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return * IF( N.EQ.0 ) $ GO TO 10 CALL INFOG2L( IQ, JQ, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) IF( N.EQ.1 ) THEN IF( MYROW.EQ.IQROW .AND. MYCOL.EQ.IQCOL ) $ Q( 1 ) = ONE GO TO 10 END IF * * If N is smaller than the minimum divide size NB, then * solve the problem with the serial divide and conquer * code locally. * IF( N.LE.NB ) THEN IF( ( MYROW.EQ.IQROW ) .AND. ( MYCOL.EQ.IQCOL ) ) THEN IPQ = IIQ + ( JJQ-1 )*LDQ CALL SSTEDC( 'I', N, D, E, Q( IPQ ), LDQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) IF( INFO.NE.0 ) THEN INFO = ( N+1 ) + N GO TO 10 END IF END IF GO TO 10 END IF * * If P=NPROW*NPCOL=1, solve the problem with SSTEDC. * IF( NPCOL*NPROW.EQ.1 ) THEN IPQ = IIQ + ( JJQ-1 )*LDQ CALL SSTEDC( 'I', N, D, E, Q( IPQ ), LDQ, WORK, LWORK, IWORK, $ LIWORK, INFO ) GO TO 10 END IF * * Scale matrix to allowable range, if necessary. * ORGNRM = SLANST( 'M', N, D, E ) IF( ORGNRM.NE.ZERO ) THEN CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N-1, 1, E, N-1, INFO ) END IF * CALL PSLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * Sort eigenvalues and corresponding eigenvectors * CALL PSLASRT( 'I', N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, IWORK, $ LIWORK, INFO ) * * Scale back. * IF( ORGNRM.NE.ZERO ) $ CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) * 10 CONTINUE * IF( LWORK.GT.0 ) $ WORK( 1 ) = REAL( LWMIN ) IF( LIWORK.GT.0 ) $ IWORK( 1 ) = LIWMIN RETURN * * End of PSSTEDC * END scalapack-1.8.0/SRC/pdlarzb.f0000640000175000017500000005562410363532303015564 0ustar muammarmuammar SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PDLARZB applies a real block reflector Q or its transpose Q**T to * a real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) * from the left or the right. * * Q is a product of k elementary reflectors as returned by PDTZRZF. * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * DIRECT (global input) CHARACTER * Indicates how H is formed from a product of elementary * reflectors * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise (not supported yet) * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L', * (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local * pieces of the distributed vectors V representing the * Householder transformation as returned by PDTZRZF. * LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V * The lower triangular matrix T in the representation of the * block reflector. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LEFT CHARACTER COLBTOP, TRANST INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2, $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV, $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL, $ IVROW, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV, $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20, $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV, $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, DGEBR2D, $ DGEBS2D,DGEMM, DGSUM2D, DLACPY, $ DLASET, DTRBR2D, DTRBS2D, DTRMM, $ INFOG2L, PBDMATADD, PBDTRAN, PB_TOPGET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -4 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * LEFT = LSAME( SIDE, 'L' ) IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) ICOFFV = MOD( JV-1, NBV ) NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFV = IIV + ( JJV-1 ) * LDV MBC = DESCC( MB_ ) NBC = DESCC( NB_ ) NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1, $ JJC1, ICROW1, ICCOL1 ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NQCALL ) ) * IF( LEFT ) THEN IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, MBC ) MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = ICOFFC1 NQC2 = NQC1 ELSE IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = IROFFC1 MPC2 = MPC1 ICOFFC2 = MOD( JC+N-L-1, NBC ) NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NQCALL ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC2 = ICOFFV is required by the current transposition * routine PBDTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW2 ) THEN MPC20 = MPC2 + IROFFC2 ELSE MPC20 = MPC2 END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBDTRAN * IPV = 1 IPW = IPV + MPC20 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC20 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL DLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW+ICOFFV*LW ), LW ) ELSE CALL DLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW ), LW ) END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K * CALL PBDTRAN( ICTXT, 'Rowwise', 'Transpose', K, M+ICOFFV, $ DESCV( NB_ ), WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K * IF( MYROW.EQ.ICROW2 ) $ IPV = IPV + IROFFC2 * * WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V' * WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K * LW = MAX( 1, NQC2 ) * IF( MPC2.GT.0 ) THEN CALL DGEMM( 'Transpose', 'No transpose', NQC2, K, MPC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', NQC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 10 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBDMATADD( ICTXT, 'Transpose', NQC2, IINXT-IIBEG+1, $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE, $ WORK( IPW+ITOP ), LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 10 END IF END IF * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL DTRBS2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV ) ELSE CALL DTRBR2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL DTRMM( 'Right', 'Lower', TRANST, 'Non unit', NQC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 20 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBDMATADD( ICTXT, 'Transpose', IINXT-IIBEG+1, NQC2, $ -ONE, WORK( IPW+ITOP ), LW, ONE, $ C( IIBEG+(JJC1-1)*LDC ), LDC ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 20 END IF END IF * * C2 C2 - V' * W' * C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )' * MPC2 x NQC2 MPC2 x K K x NQC2 * CALL DGEMM( 'No transpose', 'Transpose', MPC2, NQC2, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC2 ), LDC ) * ELSE * * Form sub( C ) * Q or sub( C ) * Q' * * Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2 * WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC2 LV = MAX( 1, K ) LW = MAX( 1, MPC2 ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL DTRBS2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV ) CALL DLACPY( 'All', K, NQC2, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL DTRBR2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * * WORK( IPV ) is K x NQC2 = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K * IF( NQC2.GT.0 ) THEN CALL DGEMM( 'No Transpose', 'Transpose', MPC2, K, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL DLASET( 'All', MPC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 30 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBDMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC, ONE, $ WORK( IPW+ILEFT*LW ), LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 30 END IF END IF * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL DTRMM( 'Right', 'Lower', TRANS, 'Non unit', MPC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 40 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBDMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, -ONE, $ WORK( IPW+ILEFT*LW ), LW, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 40 END IF END IF * * C2 C2 - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC2 x NQC2 MPC2 x K K x NQC2 * IF( IOFFC2.GT.0 ) $ CALL DGEMM( 'No transpose', 'No transpose', MPC2, NQC2, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC2 ), LDC ) * END IF * RETURN * * End of PDLARZB * END scalapack-1.8.0/SRC/pcgelqf.f0000640000175000017500000002747410363532303015551 0ustar muammarmuammar SUBROUTINE PCGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGELQF computes a LQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1)' H(ia+k-2)' . . . H(ia)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in * A(ia+i-1,ja+i:ja+n-1), and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ IROFF, J, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCGELQ2, $ PCLARFB, PCLARFT, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGELQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Handle the first block of rows separately * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IB = IN - IA + 1 * * Compute the LQ factorization of the first block A(ia:in:ja:ja+n-1) * CALL PCGELQ2( IB, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( IA+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ia) H(ia+1) . . . H(in) * CALL PCLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PCLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-IB, N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, $ JA, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of rows * DO 10 I = IN+1, IA+K-1, DESCA( MB_ ) IB = MIN( K-I+IA, DESCA( MB_ ) ) J = JA + I - IA * * Compute the LQ factorization of the current block * A(i:i+ib-1:j:ja+n-1) * CALL PCGELQ2( IB, N-I+IA, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PCLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PCLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-J+JA, IB, A, I, J, DESCA, WORK, $ A, I+IB, J, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGELQF * END scalapack-1.8.0/SRC/pdpbtrs.f0000640000175000017500000006322510363532303015600 0ustar muammarmuammar SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PDPBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded symmetric positive definite distributed * matrix with bandwidth BW. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PDPBTRF. * * Routine PDPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPBTRF and this is stored in AF. If a linear system * is to be solved using PDPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PDPBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 7*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 10*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 10*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 10*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 10*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 7*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 7*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 7*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 10*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 10*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -6 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 7*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = ( BW*NRHS ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB( 5 ) PARAM_CHECK( 15, 1 ) = DESCB( 4 ) PARAM_CHECK( 14, 1 ) = DESCB( 3 ) PARAM_CHECK( 13, 1 ) = DESCB( 2 ) PARAM_CHECK( 12, 1 ) = DESCB( 1 ) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA( 5 ) PARAM_CHECK( 9, 1 ) = DESCA( 4 ) PARAM_CHECK( 8, 1 ) = DESCA( 3 ) PARAM_CHECK( 7, 1 ) = DESCA( 1 ) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PDPBTRSV( 'L', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PDPBTRSV( 'U', 'T', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PDPBTRSV( 'L', 'T', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PDPBTRSV( 'U', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDPBTRS * END scalapack-1.8.0/SRC/pspbtrf.f0000640000175000017500000014150010363532303015573 0ustar muammarmuammar SUBROUTINE PSPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), AF( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPBTRF computes a Cholesky factorization * of an N-by-N real banded * symmetric positive definite distributed matrix * with bandwidth BW: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PSPBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' U , if UPLO = 'U', or * * P A(1:N, JA:JA+N-1) P^T = L L', if UPLO = 'L' * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPBTRF and this is stored in AF. If a linear system * is to be solved using PSPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * bw*bw * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MBW2, MYCOL, MYROW, $ MY_NUM_COLS, NB, NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, OFST, PART_OFFSET, PART_SIZE, $ PREV_TRI_SIZE_M, PREV_TRI_SIZE_N, RETURN_CODE, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, PXERBLA, $ RESHAPE, SAXPY, SGEMM, SGERV2D, SGESD2D, $ SLACPY, SLATCPY, SPBTRF, SPOTRF, SSYRK, STBTRS, $ STRMM, STRRV2D, STRSD2D, STRSM, STRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PSPBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PSPBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( NB+2*BW )*BW * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PSPBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = BW*BW * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PSPBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 10 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 120 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW END IF * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Zero out space for work * DO 20 I = 1, WORK_SIZE_MIN WORK( I ) = ZERO 20 CONTINUE * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL STRSD2D( ICTXT, 'U', 'N', NEXT_TRI_SIZE_M, $ NEXT_TRI_SIZE_N, A( OFST+ODD_SIZE*LLDA+( BW+ $ 1 ) ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {L_i}^T in each processor * CALL SPBTRF( UPLO, ODD_SIZE, BW, A( OFST+1 ), LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 30 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * transpose the connection block in preparation. * CALL SLATCPY( 'U', BW, BW, A( ( OFST+( BW+1 )+( ODD_SIZE- $ BW )*LLDA ) ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * Perform the triangular system solve {L_i}{{B'}_i}^T = {B_i}^T * CALL STRTRS( 'L', 'N', 'N', BW, BW, $ A( OFST+1+( ODD_SIZE-BW )*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * * transpose resulting block to its location * in main storage. * CALL SLATCPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A( ( OFST+( BW+1 )+( ODD_SIZE-BW )* $ LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^T * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL SSYRK( UPLO, 'T', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+1+ODD_SIZE*LLDA ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 30 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL STRRV2D( ICTXT, 'U', 'N', PREV_TRI_SIZE_M, $ PREV_TRI_SIZE_N, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * CALL STBTRS( 'L', 'N', 'N', ODD_SIZE, BW, BW, $ A( OFST+1 ), LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * CALL SSYRK( 'L', 'T', BW, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, ZERO, AF( 1+( ODD_SIZE+2*BW )*BW ), $ BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL SGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine STRMM. * Since we have G_i^T stored, transpose * H_i^T to H_i. * CALL SLATCPY( 'N', BW, BW, AF( ODD_SIZE-BW+1 ), $ ODD_SIZE, AF( ( ODD_SIZE )*BW+1 ), BW ) * CALL STRMM( 'R', 'U', 'T', 'N', BW, BW, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )* $ LLDA ) ), LLDA-1, AF( ( ODD_SIZE )*BW+1 ), $ BW ) * * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, $ 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 110 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 60 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL SGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * CALL SLACPY( 'N', BW, BW, A( OFST+ODD_SIZE*LLDA+1 ), LLDA-1, $ AF( ODD_SIZE*BW+MBW2+1 ), BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL SAXPY( MBW2, ONE, AF( ODD_SIZE*BW+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL SGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL SPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL SLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL STRSM( 'L', 'L', 'N', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL SSYRK( 'L', 'T', BW, BW, -ONE, AF( ( ODD_SIZE )*BW+1 ), $ BW, ZERO, WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL STRSM( 'R', 'L', 'T', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL SSYRK( 'L', 'N', BW, BW, -ONE, $ AF( ( ODD_SIZE+2*BW )*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL SGEMM( 'N', 'N', BW, BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, ZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 60 CONTINUE * ELSE * * CASE UPLO = 'U' * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Sizes of the extra triangles communicated bewtween processors * IF( MYCOL.GT.0 ) THEN PREV_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) PREV_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL-1, 0, $ NPCOL ) ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN NEXT_TRI_SIZE_M = MIN( BW, NUMROC( N, PART_SIZE, MYCOL+1, 0, $ NPCOL ) ) NEXT_TRI_SIZE_N = MIN( BW, NUMROC( N, PART_SIZE, MYCOL, 0, $ NPCOL ) ) END IF * * * * Factor main partition A_i^T = U_i {U_i}^T in each processor * CALL SPBTRF( UPLO, ODD_SIZE, BW, A( OFST+1 ), LLDA, INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 70 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * Move the connection block in preparation. * CALL SLACPY( 'L', BW, BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), $ LLDA-1, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), BW ) * * * Perform the triangular solve {L_i}{{B'}_i}^T = {B_i}^T * CALL STRTRS( 'U', 'T', 'N', BW, BW, $ A( OFST+BW+1+( ODD_SIZE-BW )*LLDA ), LLDA-1, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, INFO ) * * Move the resulting block back to its location in main storage. * CALL SLACPY( 'L', BW, BW, AF( ODD_SIZE*BW+2*MBW2+1+BW-BW ), $ BW, A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1 ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i^T = {C_i}^T-{{B'}_i}^T{{B'}_i} * * The following method uses more flops than necessary but * does not necessitate the writing of a new BLAS routine. * * CALL SSYRK( UPLO, 'T', BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, ONE, $ A( OFST+BW+1+ODD_SIZE*LLDA ), LLDA-1 ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 70 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * Discard temporary matrix stored beginning in * AF( (odd_size+2*bw)*bw+1 ) and use for * off_diagonal block of reduced system. * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * * * Copy D block into AF storage for solve. * CALL SLATCPY( 'L', PREV_TRI_SIZE_N, PREV_TRI_SIZE_M, $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE ) * IF( INFO.EQ.0 ) THEN * CALL STBTRS( 'U', 'T', 'N', ODD_SIZE, BW, BW, $ A( OFST+1 ), LLDA, AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * CALL SSYRK( 'L', 'T', BW, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, ZERO, AF( 1+( ODD_SIZE+2*BW )*BW ), $ BW ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL SGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * * Copy matrix H_i (the last bw cols of G_i) to AF storage * as per requirements of BLAS routine STRMM. * Since we have G_i^T stored, transpose * H_i^T to H_i. * CALL SLATCPY( 'N', BW, BW, AF( ODD_SIZE-BW+1 ), $ ODD_SIZE, AF( ( ODD_SIZE )*BW+1 ), BW ) * CALL STRMM( 'R', 'L', 'N', 'N', BW, BW, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ AF( ( ODD_SIZE )*BW+1 ), BW ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, $ 0, 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 110 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 100 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL SGESD2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ MYCOL-1 ) * END IF * * Transpose last diagonal block into AF storage for subsequent * operations. * CALL SLATCPY( 'U', BW, BW, A( OFST+ODD_SIZE*LLDA+1+BW ), $ LLDA-1, AF( ODD_SIZE*BW+MBW2+1 ), BW ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ 0, MYCOL+1 ) * * Add contribution to diagonal block * CALL SAXPY( MBW2, ONE, AF( ODD_SIZE*BW+2*MBW2+1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 80 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 90 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL SGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL SAXPY( MBW2, ONE, WORK( 1 ), 1, $ AF( ODD_SIZE*BW+MBW2+1 ), 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 80 90 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... * * Factor diagonal block * CALL SPOTRF( 'L', BW, AF( ODD_SIZE*BW+MBW2+1 ), BW, INFO ) * IF( INFO.NE.0 ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * CALL SLACPY( 'N', BW, BW, AF( ODD_SIZE*BW+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+1 ), BW, 0, $ COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * CALL STRSM( 'L', 'L', 'N', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * CALL SSYRK( 'L', 'T', BW, BW, -ONE, AF( ( ODD_SIZE )*BW+1 ), $ BW, ZERO, WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, BW, BW, AF( ODD_SIZE*BW+2*MBW2+1 ), $ BW, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * CALL STRSM( 'R', 'L', 'T', 'N', BW, BW, ONE, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * CALL SSYRK( 'L', 'N', BW, BW, -ONE, $ AF( ( ODD_SIZE+2*BW )*BW+1 ), BW, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * CALL SGEMM( 'N', 'N', BW, BW, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ AF( ODD_SIZE*BW+1 ), BW, ZERO, WORK( 1 ), $ BW ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, BW, BW, WORK( 1 ), BW, 0, $ COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 100 CONTINUE * END IF * 110 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 120 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PSPBTRF * END scalapack-1.8.0/SRC/pzlamr1d.f0000640000175000017500000001067110363532303015651 0ustar muammarmuammar SUBROUTINE PZLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Bugs * ==== * * I am not sure that this works correctly when IB and JB are not equal * to 1. Indeed, I suspect that IB should always be set to 1 or ignored * with 1 used in its place. * * PZLAMR1D has not been tested except withint the contect of * PZHEPTRD, the prototype reduction to tridiagonal form code. * * Purpose * * ======= * * PZLAMR1D redistributes a one-dimensional row vector from one data * decomposition to another. * * This is an auxiliary routine called by PZHETRD to redistribute D, E * and TAU. * * Notes * ===== * * Although all processes call PZGEMR2D, only the processes that own * the first column of A send data and only processes that own the * first column of B receive data. The calls to ZGEBS2D/ZGEBR2D * spread the data down. * * Arguments * ========= * * N (global input) INTEGER * The size of the matrix to be transposed. * * A (local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JA+N-1)). * On output, A is replicated across all processes in * this processor column. * * IA (global input) INTEGER * A's global row index, which points to the beginning of * the submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LOCc(JB+N-1)). * * IB (global input) INTEGER * B's global row index, NOT USED * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace) COMPLEX*16 array, dimension ( LWORK ) * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB * NUMROC( N, 1, 0, 0, NPROW ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICTXT, MYCOL, MYROW, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCAA( DLEN_ ), DESCBB( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, PZGEMR2D, ZGEBR2D, ZGEBS2D * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return if possible * IF( N.LE.0 ) $ RETURN * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) DESCBB( I ) = DESCB( I ) 10 CONTINUE * DESCAA( M_ ) = 1 DESCBB( M_ ) = 1 DESCAA( LLD_ ) = 1 DESCBB( LLD_ ) = 1 * ICTXT = DESCB( CTXT_ ) CALL PZGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN CALL ZGEBS2D( ICTXT, 'C', ' ', NQ, 1, B, NQ ) ELSE CALL ZGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * RETURN * * End of PZLAMR1D * END scalapack-1.8.0/SRC/pzpttrs.f0000640000175000017500000006724310363532303015654 0ustar muammarmuammar SUBROUTINE PZPTTRS( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 AF( * ), B( * ), E( * ), WORK( * ) DOUBLE PRECISION D( * ) * .. * * * Purpose * ======= * * PZPTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PZPTTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal symmetric positive definite distributed * matrix. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'D *U or L*D L' as computed by PZPTTRF. * * Routine PZPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * IMPORTANT NOTE: The current version of this code supports * only IB=JA * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZPTTRF and this is stored in AF. If a linear system * is to be solved using PZPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, I, ICTXT, ICTXT_NEW, $ ICTXT_SAVE, IDUM1, IDUM3, JA_NEW, LLDA, LLDB, $ MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ PZPTTRSV, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 6*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 9*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 9*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 9*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -13 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 6*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 9*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -5 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 6*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZPTTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, $ 'PZPTTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (10+2*MIN(100,NRHS))*NPCOL+4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -13 CALL PXERBLA( ICTXT, $ 'PZPTTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB(5) PARAM_CHECK( 14, 1 ) = DESCB(4) PARAM_CHECK( 13, 1 ) = DESCB(3) PARAM_CHECK( 12, 1 ) = DESCB(2) PARAM_CHECK( 11, 1 ) = DESCB(1) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 15, 2 ) = 905 PARAM_CHECK( 14, 2 ) = 904 PARAM_CHECK( 13, 2 ) = 903 PARAM_CHECK( 12, 2 ) = 902 PARAM_CHECK( 11, 2 ) = 901 PARAM_CHECK( 10, 2 ) = 8 PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 13 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PZPTTRSV( 'L', 'N', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ELSE * CALL PZPTTRSV( 'U', 'C', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ENDIF * * Divide by the main diagonal: B <- D^{-1} B * * The main partition is first * DO 10 I=PART_OFFSET+1, PART_OFFSET+ODD_SIZE CALL ZSCAL( NRHS, DCMPLX( CONE/D( I ) ), B( I ), LLDB ) 10 CONTINUE * * Reduced system is next * IF( MYCOL .LT. NPCOL-1 ) THEN I=PART_OFFSET+ODD_SIZE+1 CALL ZSCAL( NRHS, CONE/AF( ODD_SIZE+2 ), B( I ), LLDB ) ENDIF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PZPTTRSV( 'L', 'C', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ELSE * CALL PZPTTRSV( 'U', 'N', N, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), JA_NEW, DESCA_1XP, B, IB, $ DESCB_PX1, AF, LAF, WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZPTTRS * END scalapack-1.8.0/SRC/pcpbtrs.f0000640000175000017500000006406110363532303015576 0ustar muammarmuammar SUBROUTINE PCPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCPBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PCPBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded symmetric positive definite distributed * matrix with bandwidth BW. * Depending on the value of UPLO, A stores either U or L in the equn * A(1:N, JA:JA+N-1) = U'*U or L*L' as computed by PCPBTRF. * * Routine PCPBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCPBTRF and this is stored in AF. If a linear system * is to be solved using PCPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 16, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, $ PCPBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 7*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 10*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 10*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 10*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 10*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -14 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF(( BW .GT. N-1 ) .OR. $ ( BW .LT. 0 ) ) THEN INFO = -3 ENDIF * IF( LLDA .LT. (BW+1) ) THEN INFO = -( 7*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 7*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 10*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 10*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -6 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 7*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCPBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*BW )) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, $ 'PCPBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (BW*NRHS) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, $ 'PCPBTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 16, 1 ) = DESCB(5) PARAM_CHECK( 15, 1 ) = DESCB(4) PARAM_CHECK( 14, 1 ) = DESCB(3) PARAM_CHECK( 13, 1 ) = DESCB(2) PARAM_CHECK( 12, 1 ) = DESCB(1) PARAM_CHECK( 11, 1 ) = IB PARAM_CHECK( 10, 1 ) = DESCA(5) PARAM_CHECK( 9, 1 ) = DESCA(4) PARAM_CHECK( 8, 1 ) = DESCA(3) PARAM_CHECK( 7, 1 ) = DESCA(1) PARAM_CHECK( 6, 1 ) = JA PARAM_CHECK( 5, 1 ) = NRHS PARAM_CHECK( 4, 1 ) = BW PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 16, 2 ) = 1005 PARAM_CHECK( 15, 2 ) = 1004 PARAM_CHECK( 14, 2 ) = 1003 PARAM_CHECK( 13, 2 ) = 1002 PARAM_CHECK( 12, 2 ) = 1001 PARAM_CHECK( 11, 2 ) = 9 PARAM_CHECK( 10, 2 ) = 705 PARAM_CHECK( 9, 2 ) = 704 PARAM_CHECK( 8, 2 ) = 703 PARAM_CHECK( 7, 2 ) = 701 PARAM_CHECK( 6, 2 ) = 6 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCPBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PCPBTRSV( 'L', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PCPBTRSV( 'U', 'C', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( UPLO, 'L' ) ) THEN * CALL PCPBTRSV( 'L', 'C', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PCPBTRSV( 'U', 'N', N, BW, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCPBTRS * END scalapack-1.8.0/SRC/pzlaqge.f0000640000175000017500000002341010363532303015555 0ustar muammarmuammar SUBROUTINE PZLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N DOUBLE PRECISION AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION C( * ), R( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAQGE equilibrates a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using the row and scaling * factors in the vectors R and C. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)) * containing on entry the M-by-N matrix sub( A ). On exit, * the equilibrated distributed matrix. See EQUED for the * form of the equilibrated distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local input) DOUBLE PRECISION array, dimension LOCr(M_A) * The row scale factors for sub( A ). R is aligned with the * distributed matrix A, and replicated across every process * column. R is tied to the distributed matrix A. * * C (local input) DOUBLE PRECISION array, dimension LOCc(N_A) * The column scale factors of sub( A ). C is aligned with the * distributed matrix A, and replicated down every process * row. C is tied to the distributed matrix A. * * ROWCND (global input) DOUBLE PRECISION * The global ratio of the smallest R(i) to the largest R(i), * IA <= i <= IA+M-1. * * COLCND (global input) DOUBLE PRECISION * The global ratio of the smallest C(i) to the largest C(i), * JA <= j <= JA+N-1. * * AMAX (global input) DOUBLE PRECISION * Absolute value of largest distributed submatrix entry. * * EQUED (global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration * = 'R': Row equilibration, i.e., sub( A ) has been pre- * multiplied by diag(R(IA:IA+M-1)), * = 'C': Column equilibration, i.e., sub( A ) has been post- * multiplied by diag(C(JA:JA+N-1)), * = 'B': Both row and column equilibration, i.e., sub( A ) * has been replaced by * diag(R(IA:IA+M-1)) * sub( A ) * diag(C(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if row or column scaling * should be done based on the ratio of the row or column scaling * factors. If ROWCND < THRESH, row scaling is done, and if * COLCND < THRESH, column scaling is done. * * LARGE and SMALL are threshold values used to decide if row scaling * should be done based on the absolute size of the largest matrix * element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, THRESH PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, $ IROFF, J, JJA, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PDLAMCH( ICTXT, 'Safe minimum' ) / $ PDLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) $ THEN * * No row scaling * IF( COLCND.GE.THRESH ) THEN * * No column scaling * EQUED = 'N' * ELSE * * Column scaling * IOFFA = (JJA-1)*LDA DO 20 J = JJA, JJA+NQ-1 CJ = C( J ) DO 10 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*A( IOFFA + I ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE EQUED = 'C' END IF * ELSE IF( COLCND.GE.THRESH ) THEN * * Row scaling, no column scaling * IOFFA = (JJA-1)*LDA DO 40 J = JJA, JJA+NQ-1 DO 30 I = IIA, IIA+MP-1 A( IOFFA + I ) = R( I )*A( IOFFA + I ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE EQUED = 'R' * ELSE * * Row and column scaling * IOFFA = (JJA-1)*LDA DO 60 J = JJA, JJA+NQ-1 CJ = C( J ) DO 50 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*R( I )*A( IOFFA + I ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE EQUED = 'B' * END IF * RETURN * * End of PZLAQGE * END scalapack-1.8.0/SRC/pslassq.f0000640000175000017500000002237710363532303015613 0ustar muammarmuammar SUBROUTINE PSLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IX, INCX, JX, N REAL SCALE, SUMSQ * .. * .. Array Arguments .. INTEGER DESCX( * ) REAL X( * ) * .. * * Purpose * ======= * * PSLASSQ returns the values scl and smsq such that * * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, * * where x( i ) = sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ). * The value of sumsq is assumed to be non-negative and scl returns the * value * * scl = max( scale, abs( x( i ) ) ). * * scale and sumsq must be supplied in SCALE and SUMSQ respectively. * SCALE and SUMSQ are overwritten by scl and ssq respectively. * * The routine makes only one pass through the vector sub( X ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * The result are only available in the scope of sub( X ), i.e if * sub( X ) is distributed along a process row, the correct results are * only available in this process row of the grid. Similarly if sub( X ) * is distributed along a process column, the correct results are only * available in this process column of the grid. * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vector sub( X ). * * X (input) REAL * The vector for which a scaled sum of squares is computed. * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= n. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * SCALE (local input/local output) REAL * On entry, the value scale in the equation above. * On exit, SCALE is overwritten with scl , the scaling factor * for the sum of squares. * * SUMSQ (local input/local output) REAL * On entry, the value sumsq in the equation above. * On exit, SUMSQ is overwritten with smsq , the basic sum of * squares from which scl has been factored out. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, ICOFF, ICTXT, IIX, IOFF, IROFF, IXCOL, $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ REAL TEMP1 * .. * .. Local Arrays .. REAL WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSTREECOMB, SCOMBSSQ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * LDX = DESCX( LLD_ ) IF( INCX.EQ.DESCX( M_ ) ) THEN * * X is rowwise distributed. * IF( MYROW.NE.IXROW ) $ RETURN ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ - ICOFF * * Code direct from LAPACK's SLASSQ, (save subroutine call) * IF( NQ.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 10 I = 1, NQ IF( X( IOFF ).NE.ZERO ) THEN TEMP1 = ABS( X( IOFF ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ * ( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + LDX 10 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PSTREECOMB( ICTXT, 'Rowwise', 2, WORK, -1, IXCOL, $ SCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * ELSE IF( INCX.EQ.1 ) THEN * * X is columnwise distributed. * IF( MYCOL.NE.IXCOL ) $ RETURN IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP - IROFF * * Code direct from LAPACK's SLASSQ, (save subroutine call) * IF( NP.GT.0 ) THEN IOFF = IIX + ( JJX - 1 ) * LDX DO 20 I = 1, NP IF( X( IOFF ).NE.ZERO ) THEN TEMP1 = ABS( X( IOFF ) ) IF( SCALE.LT.TEMP1 ) THEN SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 SCALE = TEMP1 ELSE SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 END IF END IF IOFF = IOFF + 1 20 CONTINUE END IF * * Take local result and find global * WORK( 1 ) = SCALE WORK( 2 ) = SUMSQ * CALL PSTREECOMB( ICTXT, 'Columnwise', 2, WORK, -1, IXCOL, $ SCOMBSSQ ) * SCALE = WORK( 1 ) SUMSQ = WORK( 2 ) * END IF * RETURN * * End of PSLASSQ * END scalapack-1.8.0/SRC/pzunml2.f0000640000175000017500000003624610363532303015534 0ustar muammarmuammar SUBROUTINE PZUNML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNML2 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PZGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JCC, LCM, LCMP, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 COMPLEX*16 AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZELSET2, PZLACGV, PZLARF, PZLARFC * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K -1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(i:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * IF( I-IA+1.LT.NQ ) $ CALL PZLACGV( NQ-I+IA-1, A, I, JA+I-IA+1, DESCA, $ DESCA( M_ ) ) CALL PZELSET2( AII, A, I, JA+I-IA, DESCA, ONE ) IF( NOTRAN ) THEN CALL PZLARFC( SIDE, MI, NI, A, I, JA+I-IA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) ELSE CALL PZLARF( SIDE, MI, NI, A, I, JA+I-IA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) END IF CALL PZELSET( A, I, JA+I-IA, DESCA, AII ) IF( I-IA+1.LT.NQ ) $ CALL PZLACGV( NQ-I+IA-1, A, I, JA+I-IA+1, DESCA, $ DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNML2 * END scalapack-1.8.0/SRC/psgesvd.f0000640000175000017500000005515410377355407015614 0ustar muammarmuammar SUBROUTINE PSGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, + VT,IVT,JVT,DESCVT,WORK,LWORK,INFO) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Oak Ridge National Laboratory * and Univ. of California Berkeley. * Jan 2006 * * .. Scalar Arguments .. CHARACTER JOBU,JOBVT INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N * .. * .. Array Arguments .. INTEGER DESCA(*),DESCU(*),DESCVT(*) REAL A(*),U(*),VT(*),WORK(*) REAL S(*) * .. * * Purpose * ======= * * PSGESVD computes the singular value decomposition (SVD) of an * M-by-N matrix A, optionally computing the left and/or right * singular vectors. The SVD is written as * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A and the columns of U and V are the * corresponding right and left singular vectors, respectively. The * singular values are returned in array S in decreasing order and * only the first min(M,N) columns of U and rows of VT = V**T are * computed. * * Notes * ===== * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, and * assume that its process grid has dimension r x c. LOCr( K ) denotes * the number of elements of K that a process would receive if K were * distributed over the r processes of its process column. Similarly, * LOCc( K ) denotes the number of elements of K that a process would * receive if K were distributed over the c processes of its process * row. The values of LOCr() and LOCc() may be determined via a call * to the ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * MP = number of local rows in A and U * NQ = number of local columns in A and VT * SIZE = min( M, N ) * SIZEQ = number of local columns in U * SIZEP = number of local rows in VT * * JOBU (global input) CHARACTER*1 * Specifies options for computing U: * = 'V': the first SIZE columns of U (the left singular * vectors) are returned in the array U; * = 'N': no columns of U (no left singular vectors) are * computed. * * JOBVT (global input) CHARACTER*1 * Specifies options for computing V**T: * = 'V': the first SIZE rows of V**T (the right singular * vectors) are returned in the array VT; * = 'N': no rows of V**T (no right singular vectors) are * computed. * * M (global input) INTEGER * The number of rows of the input matrix A. M >= 0. * * N (global input) INTEGER * The number of columns of the input matrix A. N >= 0. * * A (local input/workspace) block cyclic REAL * array, * global dimension (M, N), local dimension (MP, NQ) * On exit, the contents of A are destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix A. * * S (global output) REAL array, dimension SIZE * The singular values of A, sorted so that S(i) >= S(i+1). * * U (local output) REAL array, local dimension * (MP, SIZEQ), global dimension (M, SIZE) * if JOBU = 'V', U contains the first min(m,n) columns of U * if JOBU = 'N', U is not referenced. * * IU (global input) INTEGER * The row index in the global array U indicating the first * row of sub( U ). * * JU (global input) INTEGER * The column index in the global array U indicating the * first column of sub( U ). * * DESCU (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix U. * * VT (local output) REAL array, local dimension * (SIZEP, NQ), global dimension (SIZE, N). * If JOBVT = 'V', VT contains the first SIZE rows of * V**T. If JOBVT = 'N', VT is not referenced. * * IVT (global input) INTEGER * The row index in the global array VT indicating the first * row of sub( VT ). * * JVT (global input) INTEGER * The column index in the global array VT indicating the * first column of sub( VT ). * * DESCVT (global input) INTEGER array of dimension DLEN_ * The array descriptor for the distributed matrix VT. * * WORK (local workspace/output) REAL array, dimension * (LWORK) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (local input) INTEGER * The dimension of the array WORK. * * LWORK >= 1 + 6*SIZEB + MAX(WATOBD, WBDTOSVD), * * where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer, * respectively, to the workspace required to bidiagonalize * the matrix A and to go from the bidiagonal matrix to the * singular value decomposition U*S*VT. * * For WATOBD, the following holds: * * WATOBD = MAX(MAX(WPSLANGE,WPSGEBRD), * MAX(WPSLARED2D,WP(pre)LARED1D)), * * where WPSLANGE, WPSLARED1D, WPSLARED2D, WPSGEBRD are the * workspaces required respectively for the subprograms * PSLANGE, PSLARED1D, PSLARED2D, PSGEBRD. Using the * standard notation * * MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW), * NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL), * * the workspaces required for the above subprograms are * * WPSLANGE = MP, * WPSLARED1D = NQ0, * WPSLARED2D = MP0, * WPSGEBRD = NB*(MP + NQ + 1) + NQ, * * where NQ0 and MP0 refer, respectively, to the values obtained * at MYCOL = 0 and MYROW = 0. In general, the upper limit for * the workspace is given by a workspace required on * processor (0,0): * * WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0. * * In case of a homogeneous process grid this upper limit can * be used as an estimate of the minimum workspace for every * processor. * * For WBDTOSVD, the following holds: * * WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) + * MAX(WSBDSQR, * MAX(WANTU*WPSORMBRQLN, WANTVT*WPSORMBRPRT)), * * where * * 1, if left(right) singular vectors are wanted * WANTU(WANTVT) = * 0, otherwise * * and WSBDSQR, WPSORMBRQLN and WPSORMBRPRT refer respectively * to the workspace required for the subprograms SBDSQR, * PSORMBR(QLN), and PSORMBR(PRT), where QLN and PRT are the * values of the arguments VECT, SIDE, and TRANS in the call * to PSORMBR. NRU is equal to the local number of rows of * the matrix U when distributed 1-dimensional "column" of * processes. Analogously, NCVT is equal to the local number * of columns of the matrix VT when distributed across * 1-dimensional "row" of processes. Calling the LAPACK * procedure SBDSQR requires * * WSBDSQR = MAX(1, 4*SIZE ) * * on every processor. Finally, * * WPSORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB, * WPSORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB, * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the work array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if SBDSQR did not converge * If INFO = MIN(M,N) + 1, then PSGESVD has detected * heterogeneity by finding that eigenvalues were not * identical across the process grid. In this case, the * accuracy of the results from PSGESVD cannot be * guaranteed. * * ===================================================================== * * The results of PSGEBRD, and therefore PSGESVD, may vary slightly * from run to run with the same input data. If repeatability is an * issue, call BLACS_SET with the appropriate option after defining * the process grid. * * Alignment requirements * ====================== * * The routine PSGESVD inherits the same alignement requirement as * the routine PSGEBRD, namely: * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * * ===================================================================== * * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_, + CSRC_,LLD_,ITHVAL PARAMETER (BLOCK_CYCLIC_2D=1,DLEN_=9,DTYPE_=1,CTXT_=2,M_=3,N_=4, + MB_=5,NB_=6,RSRC_=7,CSRC_=8,LLD_=9,ITHVAL=10) REAL ZERO,ONE PARAMETER (ZERO= (0.0E+0),ONE= (1.0E+0)) * .. * .. Local Scalars .. CHARACTER UPLO INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ, + INDU,INDV,INDWORK,IOFFD,IOFFE,ISCALE,J,K,LDU,LDVT,LLWORK, + LWMIN,MAXIM,MB,MP,MYPCOL,MYPCOLC,MYPCOLR,MYPROW,MYPROWC, + MYPROWR,NB,NCVT,NPCOL,NPCOLC,NPCOLR,NPROCS,NPROW,NPROWC, + NPROWR,NQ,NRU,SIZE,SIZEB,SIZEP,SIZEPOS,SIZEQ,WANTU,WANTVT, + WATOBD,WBDTOSVD,WSBDSQR,WPSGEBRD,WPSLANGE,WPSORMBRPRT, + WPSORMBRQLN REAL ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM * .. * .. Local Arrays .. INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3) REAL C(1,1) * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC REAL PSLAMCH,PSLANGE EXTERNAL LSAME,NUMROC,PDLAMCH,PZLANGE * .. * .. External Subroutines .. EXTERNAL BLACS_GET,BLACS_GRIDEXIT,BLACS_GRIDINFO,BLACS_GRIDINIT, + CHK1MAT,SBDSQR,DESCINIT,SGAMN2D,SGAMX2D,SSCAL,IGAMX2D, + IGEBR2D,IGEBS2D,PCHK1MAT,PSGEBRD,PSGEMR2D,PSLARED1D, + PSLARED2D,PSLASCL,PSLASET,PSORMBR,PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,REAL * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) ISCALE = 0 INFO = 0 * IF (NPROW.EQ.-1) THEN INFO = - (800+CTXT_) ELSE * SIZE = MIN(M,N) SIZEB = MAX(M,N) NPROCS = NPROW*NPCOL IF (M.GE.N) THEN IOFFD = JA - 1 IOFFE = IA - 1 SIZEPOS = 1 ELSE IOFFD = IA - 1 IOFFE = JA - 1 SIZEPOS = 3 END IF * IF (LSAME(JOBU,'V')) THEN WANTU = 1 ELSE WANTU = 0 END IF IF (LSAME(JOBVT,'V')) THEN WANTVT = 1 ELSE WANTVT = 0 END IF * CALL CHK1MAT(M,3,N,4,IA,JA,DESCA,8,INFO) IF (WANTU.EQ.1) THEN CALL CHK1MAT(M,3,SIZE,SIZEPOS,IU,JU,DESCU,13,INFO) END IF IF (WANTVT.EQ.1) THEN CALL CHK1MAT(SIZE,SIZEPOS,N,4,IVT,JVT,DESCVT,17,INFO) END IF CALL IGAMX2D(DESCA(CTXT_),'A',' ',1,1,INFO,1,1,1,-1,-1,0) * IF (INFO.EQ.0) THEN * * Set up pointers into the WORK array. * INDD = 2 INDE = INDD + SIZEB + IOFFD INDD2 = INDE + SIZEB + IOFFE INDE2 = INDD2 + SIZEB + IOFFD * INDTAUQ = INDE2 + SIZEB + IOFFE INDTAUP = INDTAUQ + SIZEB + JA - 1 INDWORK = INDTAUP + SIZEB + IA - 1 LLWORK = LWORK - INDWORK + 1 * * Initialize contexts for "column" and "row" process matrices. * CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTC) CALL BLACS_GRIDINIT(CONTEXTC,'R',NPROCS,1) CALL BLACS_GRIDINFO(CONTEXTC,NPROWC,NPCOLC,MYPROWC, + MYPCOLC) CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTR) CALL BLACS_GRIDINIT(CONTEXTR,'R',1,NPROCS) CALL BLACS_GRIDINFO(CONTEXTR,NPROWR,NPCOLR,MYPROWR, + MYPCOLR) * * Set local dimensions of matrices (this is for MB=NB=1). * NRU = NUMROC(M,1,MYPROWC,0,NPROCS) NCVT = NUMROC(N,1,MYPCOLR,0,NPROCS) NB = DESCA(NB_) MB = DESCA(MB_) MP = NUMROC(M,MB,MYPROW,DESCA(RSRC_),NPROW) NQ = NUMROC(N,NB,MYPCOL,DESCA(CSRC_),NPCOL) IF (WANTVT.EQ.1) THEN SIZEP = NUMROC(SIZE,DESCVT(MB_),MYPROW,DESCVT(RSRC_), + NPROW) ELSE SIZEP = 0 END IF IF (WANTU.EQ.1) THEN SIZEQ = NUMROC(SIZE,DESCU(NB_),MYPCOL,DESCU(CSRC_), + NPCOL) ELSE SIZEQ = 0 END IF * * Transmit MAX(NQ0, MP0). * IF (MYPROW.EQ.0 .AND. MYPCOL.EQ.0) THEN MAXIM = MAX(NQ,MP) CALL IGEBS2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1) ELSE CALL IGEBR2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1,0,0) END IF * WPSLANGE = MP WPSGEBRD = NB* (MP+NQ+1) + NQ WATOBD = MAX(MAX(WPSLANGE,WPSGEBRD),MAXIM) * WSBDSQR = MAX(1,4*SIZE) WPSORMBRQLN = MAX((NB* (NB-1))/2, (SIZEQ+MP)*NB) + NB*NB WPSORMBRPRT = MAX((MB* (MB-1))/2, (SIZEP+NQ)*MB) + MB*MB WBDTOSVD = SIZE* (WANTU*NRU+WANTVT*NCVT) + + MAX(WSBDSQR,MAX(WANTU*WPSORMBRQLN, + WANTVT*WPSORMBRPRT)) * * Finally, calculate required workspace. * LWMIN = 1 + 6*SIZEB + MAX(WATOBD,WBDTOSVD) WORK(1) = REAL(LWMIN) * IF (WANTU.NE.1 .AND. .NOT. (LSAME(JOBU,'N'))) THEN INFO = -1 ELSE IF (WANTVT.NE.1 .AND. .NOT. (LSAME(JOBVT,'N'))) THEN INFO = -2 ELSE IF (LWORK.LT.LWMIN .AND. LWORK.NE.-1) THEN INFO = -19 END IF * END IF * IDUM1(1) = WANTU IDUM1(2) = WANTVT IF (LWORK.EQ.-1) THEN IDUM1(3) = -1 ELSE IDUM1(3) = 1 END IF IDUM2(1) = 1 IDUM2(2) = 2 IDUM2(3) = 19 CALL PCHK1MAT(M,3,N,4,IA,JA,DESCA,8,3,IDUM1,IDUM2,INFO) IF (INFO.EQ.0) THEN IF (WANTU.EQ.1) THEN CALL PCHK1MAT(M,3,SIZE,4,IU,JU,DESCU,13,0,IDUM1,IDUM2, + INFO) END IF IF (WANTVT.EQ.1) THEN CALL PCHK1MAT(SIZE,3,N,4,IVT,JVT,DESCVT,17,0,IDUM1, + IDUM2,INFO) END IF END IF * END IF * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PSGESVD',-INFO) RETURN ELSE IF (LWORK.EQ.-1) THEN GO TO 40 END IF * * Quick return if possible. * IF (M.LE.0 .OR. N.LE.0) GO TO 40 * * Get machine constants. * SAFMIN = PSLAMCH(DESCA(CTXT_),'Safe minimum') EPS = PSLAMCH(DESCA(CTXT_),'Precision') SMLNUM = SAFMIN/EPS BIGNUM = ONE/SMLNUM RMIN = SQRT(SMLNUM) RMAX = MIN(SQRT(BIGNUM),ONE/SQRT(SQRT(SAFMIN))) * * Scale matrix to allowable range, if necessary. * ANRM = PSLANGE('1',M,N,A,IA,JA,DESCA,WORK(INDWORK)) IF (ANRM.GT.ZERO .AND. ANRM.LT.RMIN) THEN ISCALE = 1 SIGMA = RMIN/ANRM ELSE IF (ANRM.GT.RMAX) THEN ISCALE = 1 SIGMA = RMAX/ANRM END IF * IF (ISCALE.EQ.1) THEN CALL PSLASCL('G',ONE,SIGMA,M,N,A,IA,JA,DESCA,INFO) END IF * CALL PSGEBRD(M,N,A,IA,JA,DESCA,WORK(INDD),WORK(INDE), + WORK(INDTAUQ),WORK(INDTAUP),WORK(INDWORK),LLWORK, + INFO) * * Copy D and E to all processes. * Array D is in local array of dimension: * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * Array E is in local array of dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * IF (M.GE.N) THEN * Distribute D CALL PSLARED1D(N+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PSLARED2D(M+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2), + WORK(INDWORK),LLWORK) ELSE * Distribute D CALL PSLARED2D(M+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2), + WORK(INDWORK),LLWORK) * Distribute E CALL PSLARED1D(N+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2), + WORK(INDWORK),LLWORK) END IF * * Prepare for calling PSBDSQR. * IF (M.GE.N) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * INDU = INDWORK INDV = INDU + SIZE*NRU*WANTU INDWORK = INDV + SIZE*NCVT*WANTVT * LDU = MAX(1,NRU) LDVT = MAX(1,SIZE) * CALL DESCINIT(DESCTU,M,SIZE,1,1,0,0,CONTEXTC,LDU,INFO) CALL DESCINIT(DESCTVT,SIZE,N,1,1,0,0,CONTEXTR,LDVT,INFO) * IF (WANTU.EQ.1) THEN CALL PSLASET('Full',M,SIZE,ZERO,ONE,WORK(INDU),1,1,DESCTU) ELSE NRU = 0 END IF * IF (WANTVT.EQ.1) THEN CALL PSLASET('Full',SIZE,N,ZERO,ONE,WORK(INDV),1,1,DESCTVT) ELSE NCVT = 0 END IF * CALL SBDSQR(UPLO,SIZE,NCVT,NRU,0,WORK(INDD2+IOFFD), + WORK(INDE2+IOFFE),WORK(INDV),SIZE,WORK(INDU),LDU,C,1, + WORK(INDWORK),INFO) * * Redistribute elements of U and VT in the block-cyclic fashion. * IF (WANTU.EQ.1) CALL PSGEMR2D(M,SIZE,WORK(INDU),1,1,DESCTU,U,IU, + JU,DESCU,DESCU(CTXT_)) * IF (WANTVT.EQ.1) CALL PSGEMR2D(SIZE,N,WORK(INDV),1,1,DESCTVT,VT, + IVT,JVT,DESCVT,DESCVT(CTXT_)) * * Set to ZERO "non-square" elements of the larger matrices U, VT. * IF (M.GT.N .AND. WANTU.EQ.1) THEN CALL PSLASET('Full',M-SIZE,SIZE,ZERO,ZERO,U,IA+SIZE,JU,DESCU) ELSE IF (N.GT.M .AND. WANTVT.EQ.1) THEN CALL PSLASET('Full',SIZE,N-SIZE,ZERO,ZERO,VT,IVT,JVT+SIZE, + DESCVT) END IF * * Multiply Householder rotations from bidiagonalized matrix. * IF (WANTU.EQ.1) CALL PSORMBR('Q','L','N',M,SIZE,N,A,IA,JA,DESCA, + WORK(INDTAUQ),U,IU,JU,DESCU, + WORK(INDWORK),LLWORK,INFO) * IF (WANTVT.EQ.1) CALL PSORMBR('P','R','T',SIZE,N,M,A,IA,JA,DESCA, + WORK(INDTAUP),VT,IVT,JVT,DESCVT, + WORK(INDWORK),LLWORK,INFO) * * Copy singular values into output array S. * DO 10 I = 1,SIZE S(I) = WORK(INDD2+IOFFD+I-1) 10 CONTINUE * * If matrix was scaled, then rescale singular values appropriately. * IF (ISCALE.EQ.1) THEN CALL SSCAL(SIZE,ONE/SIGMA,S,1) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF (SIZE.LE.ITHVAL) THEN J = SIZE K = 1 ELSE J = SIZE/ITHVAL K = ITHVAL END IF * DO 20 I = 1,J WORK(I+INDE) = S((I-1)*K+1) WORK(I+INDD2) = S((I-1)*K+1) 20 CONTINUE * CALL SGAMN2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDE),J,1,1,-1,-1,0) CALL SGAMX2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDD2),J,1,1,-1,-1,0) * DO 30 I = 1,J IF ((WORK(I+INDE)-WORK(I+INDD2)).NE.ZERO) THEN INFO = SIZE + 1 END IF 30 CONTINUE * 40 CONTINUE * CALL BLACS_GRIDEXIT(CONTEXTC) CALL BLACS_GRIDEXIT(CONTEXTR) * * End of PSGESVD * RETURN END scalapack-1.8.0/SRC/psorgql.f0000640000175000017500000002674510363532303015617 0ustar muammarmuammar SUBROUTINE PSORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORGQL generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PSGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PSGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JN, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSLARFB, $ PSLARFT, PSLASET, PSORG2L, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORGQL', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Set A(ia+m-n+jn-ja+1:ia-m+1,ja:jn) to zero. * CALL PSLASET( 'All', N-JN+JA-1, JN-JA+1, ZERO, ZERO, A, $ IA+M-N+JN-JA+1, JA, DESCA ) * * Use unblocked code for the first or only block. * CALL PSORG2L( M-N+JN-JA+1, JN-JA+1, JN-JA-N+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PSLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the left * CALL PSLARFB( 'Left', 'No transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H to rows ia:m-k+i+ib-1 of current block * CALL PSORG2L( M-N+J+JB-JA, JB, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia+m-n+j+jb-ja:ia+m-1,j:j+jb-1 of current block to * zero * CALL PSLASET( 'All', N-J-JB+JA, JB, ZERO, ZERO, A, $ IA+M-N+J+JB-JA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORGQL * END scalapack-1.8.0/SRC/pcdbsv.f0000640000175000017500000004535310363532303015405 0ustar muammarmuammar SUBROUTINE PCDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PCDBTRF and PCDBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * +max((max(bwl,bwu)*NRHS), max(bwl,bwu)*max(bwl,bwu)) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PCDBTRF, PCDBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PCDBTRF and PCDBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PCDBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU) * * Factor the matrix * CALL PCDBTRF( N, BWL, BWU, A, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PCDBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PCDBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBSV', -INFO ) RETURN END IF * RETURN * * End of PCDBSV * END scalapack-1.8.0/SRC/psgebrd.f0000640000175000017500000004007310363532303015544 0ustar muammarmuammar SUBROUTINE PSGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PSGEBRD reduces a real general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an orthogonal transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * orthogonal matrix Q as a product of elementary reflectors, * and the elements above the first superdiagonal, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. If M < N, the diagonal and the * first subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the orthogonal matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) REAL array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0 * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY, $ IW, J, JB, JS, JW, K, L, LWMIN, MN, MP, MYCOL, $ MYROW, NB, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ), $ IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PSELSET, PSGEBD2, PSGEMM, PSLABRD, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( MB_ ) IOFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) LWMIN = NB*( MP+NQ+1 ) + NQ * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IOFF.NE.MOD( JA-1, DESCA( NB_ ) ) ) THEN INFO = -5 ELSE IF( NB.NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEBRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * MN = MIN( M, N ) IF( MN.EQ.0 ) $ RETURN * * Initialize parameters. * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPY = MP * NB + 1 IPW = NQ * NB + IPY * CALL DESCSET( DESCWX, M+IOFF, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCWY, NB, N+IOFF, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * MP = NUMROC( M+IA-1, NB, MYROW, DESCA( RSRC_ ), NPROW ) NQ = NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) K = 1 JB = NB - IOFF IW = IOFF + 1 JW = IOFF + 1 * DO 10 L = 1, MN+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce rows and columns i:i+nb-1 to bidiagonal form and return * the matrices X and Y which are needed to update the unreduced * part of the matrix. * CALL PSLABRD( M-K+1, N-K+1, JB, A, I, J, DESCA, D, E, TAUQ, $ TAUP, WORK, IW, JW, DESCWX, WORK( IPY ), IW, $ JW, DESCWY, WORK( IPW ) ) * * Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using * an update of the form A := A - V*Y' - X*U'. * CALL PSGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, A, I+JB, J, DESCA, $ WORK( IPY ), IW, JW+JB, DESCWY, ONE, A, I+JB, $ J+JB, DESCA ) CALL PSGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, WORK, IW+JB, JW, DESCWX, A, I, $ J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * * Copy last off-diagonal elements of B back into sub( A ). * IF( M.GE.N ) THEN JS = MIN( INDXG2L( I+JB-1, NB, 0, DESCA( RSRC_ ), NPROW ), $ MP ) IF( JS.GT.0 ) $ CALL PSELSET( A, I+JB-1, J+JB, DESCA, E( JS ) ) ELSE JS = MIN( INDXG2L( J+JB-1, NB, 0, DESCA( CSRC_ ), NPCOL ), $ NQ ) IF( JS.GT.0 ) $ CALL PSELSET( A, I+JB, J+JB-1, DESCA, E( JS ) ) END IF * K = K + JB JB = NB IW = 1 JW = 1 DESCWX( M_ ) = DESCWX( M_ ) - JB DESCWX( RSRC_ ) = MOD( DESCWX( RSRC_ ) + 1, NPROW ) DESCWX( CSRC_ ) = MOD( DESCWX( CSRC_ ) + 1, NPCOL ) DESCWY( N_ ) = DESCWY( N_ ) - JB DESCWY( RSRC_ ) = MOD( DESCWY( RSRC_ ) + 1, NPROW ) DESCWY( CSRC_ ) = MOD( DESCWY( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the remainder of the matrix. * CALL PSGEBD2( M-K+1, N-K+1, A, IA+K-1, JA+K-1, DESCA, D, E, TAUQ, $ TAUP, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEBRD * END scalapack-1.8.0/SRC/pzlatrz.f0000640000175000017500000002272710363532303015632 0ustar muammarmuammar SUBROUTINE PZLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZLATRZ reduces the M-by-N ( M<=N ) complex upper trapezoidal * matrix sub( A ) = [A(IA:IA+M-1,JA:JA+M-1) A(IA:IA+M-1,JA+N-L:JA+N-1)] * to upper triangular form by means of unitary transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N unitary matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. L > 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements N-L+1 to N of the first M rows * of sub( A ), with the array TAU, represent the unitary matrix * Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), whose conjugate transpose is used to * introduce zeros into the (m - k + 1)th row of sub( A ), is given in * the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, IAROW, ICTXT, II, J, J1, MP, MYCOL, MYROW, $ NPCOL, NPROW COMPLEX*16 AII * .. * .. Local Arrays .. INTEGER DESCTAU( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL DESCSET, INFOG1L, PZELSET, PZLACGV, $ PZLARFG, PZLARZ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX * .. * .. Executable Statements .. * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) * CALL DESCSET( DESCTAU, DESCA( M_ ), 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, ICTXT, MAX( 1, MP ) ) * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, IAROW ) DO 10 I = II, MP TAU( I ) = ZERO 10 CONTINUE * ELSE * AII = ZERO * J1 = JA + N - L DO 20 I = IA+M-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * [ A(i, j) A(i,j1:ja+n-1) ] * CALL PZLACGV( 1, A, I, J, DESCA, DESCA( M_ ) ) CALL PZLACGV( L, A, I, J1, DESCA, DESCA( M_ ) ) CALL PZLARFG( L+1, AII, I, J, A, I, J1, DESCA, DESCA( M_ ), $ TAU ) * * Apply H(i) to A(ia:i-1,j:ja+n-1) from the right * CALL PZLARZ( 'Right', I-IA, JA+N-J, L, A, I, J1, DESCA, $ DESCA( M_ ), TAU, A, IA, J, DESCA, WORK ) CALL PZELSET( A, I, J, DESCA, DCONJG( AII ) ) * 20 CONTINUE * CALL PZLACGV( M, TAU, IA, 1, DESCTAU, 1 ) * END IF * RETURN * * End of PZLATRZ * END scalapack-1.8.0/SRC/pcgbtrs.f0000640000175000017500000011300110363532303015552 0ustar muammarmuammar SUBROUTINE PCGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWU, BWL, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV(*) COMPLEX A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCGBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PCGBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded distributed * matrix with bandwidth BWL, BWU. * * Routine PCGBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCGBTRF and this is stored in AF. If a linear system * is to be solved using PCGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NRHS*(NB+2*bwl+4*bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Marbwus Hegland, Australian Natonal University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC, $ FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, J, JA_NEW, L, LBWL, LBWU, LDBB, $ LDW, LLDA, LLDB, LM, LMJ, LN, LPTR, MYCOL, $ MYROW, NB, NEICOL, NP, NPACT, NPCOL, NPROW, $ NPSTR, NP_SAVE, ODD_SIZE, PART_OFFSET, $ RECOVERY_VAL, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WPTR * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (2*BWL+2*BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * BW = BWU+BWL * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCGBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.(BWL+BWU+1) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCGBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * * Check worksize * WORK_SIZE_MIN = NRHS*(NB+2*BWL+4*BWU) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PCGBTRS: worksize error ', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 16 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCGBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * * Move data into workspace - communicate/copy (overlap) * IF (MYCOL .LT. NPCOL-1) THEN CALL CGESD2D( ICTXT, BWU, NRHS, B(NB-BWU+1), LLDB, $ 0, MYCOL + 1) ENDIF * IF (MYCOL .LT. NPCOL-1) THEN LM = NB-BWU ELSE LM = NB ENDIF * IF (MYCOL .GT. 0) THEN WPTR = BWU+1 ELSE WPTR = 1 ENDIF * LDW = NB+BWU + 2*BW+BWU * CALL CLACPY( 'G', LM, NRHS, B(1), LLDB, WORK( WPTR ), LDW ) * * Zero out rest of work * DO 1501 J=1, NRHS DO 1502 L=WPTR+LM, LDW WORK( (J-1)*LDW+L ) = CZERO 1502 CONTINUE 1501 CONTINUE * IF (MYCOL .GT. 0) THEN CALL CGERV2D( ICTXT, BWU, NRHS, WORK(1), LDW, $ 0, MYCOL-1) ENDIF * ******************************************************************** * PHASE 1: Local computation phase -- Solve L*X = B ******************************************************************** * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF (MYCOL .NE. 0) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1+BWU ENDIF * IF (MYCOL .NE. NPCOL-1) THEN LM = NB - LBWU LN = NB - BW ELSE IF (MYCOL .NE. 0) THEN LM = ODD_SIZE + BWU LN = MAX(ODD_SIZE-BW,0) ELSE LM = N LN = MAX( N-BW, 0 ) ENDIF * DO 21 J = 1, LN * LMJ = MIN(LBWL,LM-J) L = IPIV( J ) * IF( L.NE.J ) THEN CALL CSWAP(NRHS, WORK(L), LDW, WORK(J), LDW) ENDIF * LPTR = BW+1 + (J-1)*LLDA + APTR * CALL CGERU(LMJ,NRHS,-CONE, A(LPTR),1, WORK(J),LDW, $ WORK(J+1),LDW) * 21 CONTINUE * ******************************************************************** * PHASE 2: Global computation phase -- Solve L*X = B ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU BN = BW ELSE BM = MIN(BW,ODD_SIZE) + BWU BN = MIN(BW,ODD_SIZE) ENDIF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = (NB+BWU)*BW + 1 LDBB = 2*BW + BWU * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. CALL CGETRS( 'N', N-LN, NRHS, AF(BBPTR+BW*LDBB), LDBB, $ IPIV(LN+1), WORK( LN+1 ), LDW, INFO) * ENDIF * * Loop over levels ... * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) is used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels 200 IF (NPACT .LE. 1) GOTO 300 * * Test if processor is active IF (MOD(MYCOL,NPSTR) .EQ. 0) THEN * * Send/Receive blocks * IF (MOD(MYCOL,2*NPSTR) .EQ. 0) THEN * NEICOL = MYCOL + NPSTR * IF (NEICOL/NPSTR .LE. NPACT-1) THEN * IF (NEICOL/NPSTR .LT. NPACT-1) THEN BMN = BW ELSE BMN = MIN(BW,NUMROC(N, NB, NEICOL, 0, NPCOL))+BWU ENDIF * CALL CGESD2D( ICTXT, BM, NRHS, $ WORK(LN+1), LDW, 0, NEICOL ) * IF( NPACT .NE. 2 )THEN * * Receive answers back from partner processor * CALL CGERV2D(ICTXT, BM+BMN-BW, NRHS, $ WORK( LN+1 ), LDW, 0, NEICOL ) * BM = BM+BMN-BW * ENDIF * ENDIF * ELSE * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * CALL CLACPY( 'G', BM, NRHS, WORK(LN+1), LDW, $ WORK(NB+BWU+BMN+1), LDW ) * CALL CGERV2D( ICTXT, BMN, NRHS, WORK( NB+BWU+1 ), $ LDW, 0, NEICOL ) * * and do the permutations and eliminations * IF (NPACT .NE. 2) THEN * * Solve locally for BW variables * CALL CLASWP( NRHS, WORK(NB+BWU+1), LDW, 1, BW, $ IPIV(LN+1), 1) * CALL CTRSM('L','L','N','U', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) * * Use soln just calculated to update RHS * CALL CGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, $ -CONE, AF(BBPTR+BW*LDBB+BW), LDBB, $ WORK(NB+BWU+1), LDW, $ CONE, WORK(NB+BWU+1+BW), LDW ) * * Give answers back to partner processor * CALL CGESD2D( ICTXT, BM+BMN-BW, NRHS, $ WORK(NB+BWU+1+BW), LDW, 0, NEICOL ) * ELSE * * Finish up calculations for final level * CALL CLASWP( NRHS, WORK(NB+BWU+1), LDW, 1, BM+BMN, $ IPIV(LN+1), 1) * CALL CTRSM('L','L','N','U', BM+BMN, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) ENDIF * ENDIF * NPACT = (NPACT + 1)/2 NPSTR = NPSTR * 2 GOTO 200 * ENDIF * 300 CONTINUE * * ************************************** * BACKSOLVE ******************************************************************** * PHASE 2: Global computation phase -- Solve U*Y = X ******************************************************************** * IF (NPCOL.EQ.1) THEN * * In this case the loop over the levels will not be * performed. * In fact, the backsolve portion was done in the call to * CGETRS in the frontsolve. * ENDIF * * Compute variable needed to reverse loop structure in * reduced system. * RECOVERY_VAL = NPACT*NPSTR - NPCOL * * Loop over levels * Terminal values of NPACT and NPSTR from frontsolve are used * 2200 IF( NPACT .GE. NPCOL ) GOTO 2300 * NPSTR = NPSTR/2 * NPACT = NPACT*2 * * Have to adjust npact for non-power-of-2 * NPACT = NPACT-MOD( (RECOVERY_VAL/NPSTR), 2 ) * * Find size of submatrix in this proc at this level * IF( MYCOL/NPSTR .LT. NPACT-1 ) THEN BN = BW ELSE BN = MIN(BW, NUMROC(N, NB, NPCOL-1, 0, NPCOL) ) ENDIF * * If this processor is even in this level... * IF( MOD( MYCOL, 2*NPSTR ) .EQ. 0 ) THEN * NEICOL = MYCOL+NPSTR * IF( NEICOL/NPSTR .LE. NPACT-1 ) THEN * IF( NEICOL/NPSTR .LT. NPACT-1 ) THEN BMN = BW BNN = BW ELSE BMN = MIN(BW,NUMROC(N, NB, NEICOL, 0, NPCOL))+BWU BNN = MIN(BW, NUMROC(N, NB, NEICOL, 0, NPCOL) ) ENDIF * IF( NPACT .GT. 2 ) THEN * CALL CGESD2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * CALL CGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * ELSE * CALL CGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * ENDIF * ENDIF * ELSE * This processor is odd on this level * NEICOL = MYCOL - NPSTR * IF (NEICOL .EQ. 0) THEN BMN = BW - BWU ELSE BMN = BW ENDIF * IF( NEICOL .LT. NPCOL-1 ) THEN BNN = BW ELSE BNN = MIN(BW, NUMROC(N, NB, NEICOL, 0, NPCOL) ) ENDIF * IF( NPACT .GT. 2 ) THEN * * Move RHS to make room for received solutions * CALL CLACPY( 'G', BW, NRHS, WORK(NB+BWU+1), $ LDW, WORK(NB+BWU+BW+1), LDW ) * CALL CGERV2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * CALL CGEMM( 'N', 'N', BW, NRHS, BN, $ -CONE, AF(BBPTR), LDBB, $ WORK(LN+1), LDW, $ CONE, WORK(NB+BWU+BW+1), LDW ) * * IF( MYCOL .GT. NPSTR ) THEN * CALL CGEMM( 'N', 'N', BW, NRHS, BW, $ -CONE, AF(BBPTR+2*BW*LDBB), LDBB, $ WORK(LN+BW+1), LDW, $ CONE, WORK(NB+BWU+BW+1), LDW ) * ENDIF * CALL CTRSM('L','U','N','N', BW, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+BW+1), LDW) * * Send new solution to neighbor * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK( NB+BWU+BW+1 ), LDW, 0, NEICOL ) * * Copy new solution into expected place * CALL CLACPY( 'G', BW, NRHS, WORK(NB+BWU+1+BW), $ LDW, WORK(LN+BW+1), LDW ) * ELSE * * Solve with local diagonal block * CALL CTRSM( 'L','U','N','N', BN+BNN, NRHS, CONE, $ AF(BBPTR+BW*LDBB), LDBB, WORK(NB+BWU+1), LDW) * * Send new solution to neighbor * CALL CGESD2D( ICTXT, BW, NRHS, $ WORK(NB+BWU+1), LDW, 0, NEICOL ) * * Shift solutions into expected positions * CALL CLACPY( 'G', BNN+BN-BW, NRHS, WORK(NB+BWU+1+BW), $ LDW, WORK(LN+1), LDW ) * * IF( (NB+BWU+1) .NE. (LN+1+BW) ) THEN * * Copy one row at a time since spaces may overlap * DO 1064 J=1, BW CALL CCOPY( NRHS, WORK(NB+BWU+J), LDW, $ WORK(LN+BW+J), LDW ) 1064 CONTINUE * ENDIF * ENDIF * ENDIF * GOTO 2200 * 2300 CONTINUE * End of loop over levels * ******************************************************************** * PHASE 1: (Almost) Local computation phase -- Solve U*Y = X ******************************************************************** * * Reset BM to value it had before reduced system frontsolve... * IF (MYCOL .NE. NPCOL-1) THEN BM = BW - LBWU ELSE BM = MIN(BW,ODD_SIZE) + BWU ENDIF * * First metastep is to account for the fillin blocks AF * IF( MYCOL .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, BW, NRHS, WORK( NB-BW+1 ), $ LDW, 0, MYCOL+1 ) * ENDIF * IF( MYCOL .GT. 0 ) THEN * CALL CGERV2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), $ LDW, 0, MYCOL-1 ) * * Modify local right hand sides with received rhs's * CALL CGEMM( 'N', 'N', LM-BM, NRHS, BW, -CONE, $ AF( 1 ), LM, WORK( NB+BWU+1 ), LDW, CONE, $ WORK( 1 ), LDW ) * ENDIF * DO 2021 J = LN, 1, -1 * LMJ = MIN( BW, ODD_SIZE-1 ) * LPTR = BW-1+J*LLDA+APTR * * In the following, the TRANS=T option is used to reverse * the order of multiplication, not as a true transpose * CALL CGEMV( 'T', LMJ, NRHS, -CONE, WORK( J+1), LDW, $ A( LPTR ), LLDA-1, CONE, WORK( J ), LDW ) * * Divide by diagonal element * CALL CSCAL( NRHS, CONE/A( LPTR-LLDA+1 ), $ WORK( J ), LDW ) 2021 CONTINUE * * * CALL CLACPY( 'G', ODD_SIZE, NRHS, WORK( 1 ), LDW, $ B( 1 ), LLDB ) * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * RETURN * * End of PCGBTRS * END scalapack-1.8.0/SRC/psggrqf.f0000640000175000017500000003623710363532303015576 0ustar muammarmuammar SUBROUTINE PSGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PSGGRQF computes a generalized RQ factorization of * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1): * * sub( A ) = R*Q, sub( B ) = Z*T*Q, * * where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal * matrix, and R and T assume one of the forms: * * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, * N-M M ( R21 ) N * N * * where R12 or R21 is upper triangular, and * * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, * ( 0 ) P-N P N-P * N * * where T11 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GRQ * factorization of sub( A ) and sub( B ) implicitly gives the RQ * factorization of sub( A )*inv( sub( B ) ): * * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z' * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( B ). P >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and sub( B ). * N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAUA, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors which represent the orthogonal unitary matrix Q. * TAUA is tied to the distributed matrix A (see Further * Details). * * B (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, the local pieces of the P-by-N distributed matrix * sub( B ) which is to be factored. On exit, the elements on * and above the diagonal of sub( B ) contain the min(P,N) by N * upper trapezoidal matrix T (T is upper triangular if P >= N); * the elements below the diagonal, with the array TAUB, * represent the orthogonal matrix Z as a product of elementary * reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) REAL, array, dimension * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors * TAUB of the elementary reflectors which represent the * orthogonal matrix Z. TAUB is tied to the distributed matrix * B (see Further Details). * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ), * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) + * MB_A * MB_A, * NB_B * ( PpB0 + NqB0 + NB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1). * To form Q explicitly, use ScaLAPACK subroutine PSORGRQ. * To use Q to update another matrix, use ScaLAPACK subroutine PSORMRQ. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a real scalar, and v is a real vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1). * To form Z explicitly, use ScaLAPACK subroutine PSORGQR. * To use Z to update another matrix, use ScaLAPACK subroutine PSORMQR. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0, NQB0, PPB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSGEQRF, $ PSGERQF, PSORMRQ, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC INT, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( M, 1, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( P, 2, N, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) PPB0 = NUMROC( P+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NQB0 = NUMROC( N+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ), $ MAX( MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( PPB0 + NQB0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ), $ DESCB( NB_ ) * ( PPB0 + NQB0 + DESCB( NB_ ) ) ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IACOL.NE.IBCOL .OR. ICOFFA.NE.ICOFFB ) THEN INFO = -11 ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -1204 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( M, 1, N, 3, IA, JA, DESCA, 7, P, 2, N, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGGRQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q * CALL PSGERQF( M, N, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := sub( B )*Q' * CALL PSORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), A, $ MAX( IA, IA+M-N ), JA, DESCA, TAUA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) LWMIN = MAX( LWMIN, INT( WORK( 1 ) ) ) * * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T * CALL PSGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = REAL( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * RETURN * * End of PSGGRQF * END scalapack-1.8.0/SRC/psormbr.f0000640000175000017500000005427610363532303015614 0ustar muammarmuammar SUBROUTINE PSORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * If VECT = 'Q', PSORMBR overwrites the general real distributed M-by-N * matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * If VECT = 'P', PSORMBR overwrites sub( C ) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': P * sub( C ) sub( C ) * P * TRANS = 'T': P**T * sub( C ) sub( C ) * P**T * * Here Q and P**T are the orthogonal distributed matrices determined by * PSGEBRD when reducing a real distributed matrix A(IA:*,JA:*) to * bidiagonal form: A(IA:*,JA:*) = Q * B * P**T. Q and P**T are defined * as products of elementary reflectors H(i) and G(i) respectively. * * Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the * order of the orthogonal matrix Q or P**T that is applied. * * If VECT = 'Q', A(IA:*,JA:*) is assumed to have been an NQ-by-K * matrix: * if nq >= k, Q = H(1) H(2) . . . H(k); * if nq < k, Q = H(1) H(2) . . . H(nq-1). * * If VECT = 'P', A(IA:*,JA:*) is assumed to have been a K-by-NQ * matrix: * if k < nq, P = G(1) G(2) . . . G(k); * if k >= nq, P = G(1) G(2) . . . G(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * VECT (global input) CHARACTER * = 'Q': apply Q or Q**T; * = 'P': apply P or P**T. * * SIDE (global input) CHARACTER * = 'L': apply Q, Q**T, P or P**T from the Left; * = 'R': apply Q, Q**T, P or P**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q or P; * = 'T': Transpose, apply Q**T or P**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * If VECT = 'Q', the number of columns in the original * distributed matrix reduced by PSGEBRD. * If VECT = 'P', the number of rows in the original * distributed matrix reduced by PSGEBRD. * K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+MIN(NQ,K)-1)) if * VECT='Q', and (LLD_A,LOCc(JA+NQ-1)) if VECT = 'P'. NQ = M * if SIDE = 'L', and NQ = N otherwise. The vectors which * define the elementary reflectors H(i) and G(i), whose * products determine the matrices Q and P, as returned by * PSGEBRD. * If VECT = 'Q', LLD_A >= max(1,LOCr(IA+NQ-1)); * if VECT = 'P', LLD_A >= max(1,LOCr(IA+MIN(NQ,K)-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL array, dimension * LOCc(JA+MIN(NQ,K)-1) if VECT = 'Q', LOCr(IA+MIN(NQ,K)-1) if * VECT = 'P', TAU(i) must contain the scalar factor of the * elementary reflector H(i) or G(i), which determines Q or P, * as returned by PDGEBRD in its array argument TAUQ or TAUP. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, if VECT='Q', sub( C ) is overwritten by Q*sub( C ) * or Q'*sub( C ) or sub( C )*Q' or sub( C )*Q; if VECT='P, * sub( C ) is overwritten by P*sub( C ) or P'*sub( C ) or * sub( C )*P or sub( C )*P'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * NQ = M; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA+1; JAA=JA; MI=M-1; NI=N; ICC=IC+1; JCC=JC; * end if * else if SIDE = 'R', * NQ = N; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA; JAA=JA+1; MI=M; NI=N-1; ICC=IC; JCC=JC+1; * end if * end if * * If VECT = 'Q', * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * else if VECT <> 'Q', * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( MI+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * end if * * where LCMP = LCM / NPROW, LCMQ = LCM / NPCOL, with * LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JAA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( MI+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If VECT = 'Q', * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * else * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN CHARACTER TRANST INTEGER IAA, IACOL, IAROW, ICC, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IINFO, IROFFA, IROFFC, JAA, JCC, $ LCM, LCMP, LCMQ, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSORMLQ, $ PSORMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE APPLYQ = LSAME( VECT, 'Q' ) LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q or P * IF( LEFT ) THEN NQ = M IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA + 1 JAA = JA MI = M - 1 NI = N ICC = IC + 1 JCC = JC END IF * IF( APPLYQ ) THEN CALL CHK1MAT( M, 4, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, M, 4, IA, JA, DESCA, 10, INFO ) END IF ELSE NQ = N IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA JAA = JA + 1 MI = M NI = N - 1 ICC = IC JCC = JC + 1 END IF * IF( APPLYQ ) THEN CALL CHK1MAT( N, 5, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, N, 5, IA, JA, DESCA, 10, INFO ) END IF END IF CALL CHK1MAT( M, 4, N, 5, IC, JC, DESCC, 15, INFO ) * IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) ICOFFA = MOD( JAA-1, DESCA( NB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JAA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( APPLYQ ) THEN IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF ELSE * IF( LEFT ) THEN MQA0 = NUMROC( MI+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ MI+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN INFO = -1 ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -2 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -3 ELSE IF( K.LT.0 ) THEN INFO = -6 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( APPLYQ .AND. LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( APPLYQ .AND. LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( APPLYQ ) THEN IDUM1( 1 ) = ICHAR( 'Q' ) ELSE IDUM1( 1 ) = ICHAR( 'P' ) END IF IDUM2( 1 ) = 1 IF( LEFT ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'R' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'T' ) END IF IDUM2( 3 ) = 3 IDUM1( 4 ) = K IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( APPLYQ ) THEN IF( LEFT ) THEN CALL PCHK2MAT( M, 4, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( N, 5, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF ELSE IF( LEFT ) THEN CALL PCHK2MAT( K, 6, M, 4, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( K, 6, N, 5, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( APPLYQ ) THEN * * Apply Q * IF( NQ.GE.K ) THEN * * Q was determined by a call to PSGEBRD with nq >= k * CALL PSORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * Q was determined by a call to PSGEBRD with nq < k * CALL PSORMQR( SIDE, TRANS, MI, NI, NQ-1, A, IA+1, JA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) END IF ELSE * * Apply P * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF IF( NQ.GT.K ) THEN * * P was determined by a call to PSGEBRD with nq > k * CALL PSORMLQ( SIDE, TRANST, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * P was determined by a call to PSGEBRD with nq <= k * CALL PSORMLQ( SIDE, TRANST, MI, NI, NQ-1, A, IA, JA+1, $ DESCA, TAU, C, ICC, JCC, DESCC, WORK, LWORK, $ IINFO ) END IF END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMBR * END scalapack-1.8.0/SRC/pdlatra.f0000640000175000017500000001545610363532303015554 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLATRA computes the trace of an N-by-N distributed matrix sub( A ) * denoting A( IA:IA+N-1, JA:JA+N-1 ). The result is left on every * process of the grid. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix the trace * is to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER ICURCOL, ICURROW, II, IOFFA, J, JB, JJ, JN, $ LDA, LL, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION TRACE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGSUM2D, INFOG2L * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * TRACE = ZERO IF( N.EQ.0 ) THEN PDLATRA = TRACE RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ ICURROW, ICURCOL ) * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first diagonal block separately * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over the remaining block of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * CALL DGSUM2D( DESCA( CTXT_ ), 'All', ' ', 1, 1, TRACE, 1, -1, $ MYCOL ) * PDLATRA = TRACE * RETURN * * End of PDLATRA * END scalapack-1.8.0/SRC/pcstein.f0000640000175000017500000006031010602576752015573 0ustar muammarmuammar SUBROUTINE PCSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N REAL ORFAC * .. * .. Array Arguments .. INTEGER DESCZ( * ), IBLOCK( * ), ICLUSTR( * ), $ IFAIL( * ), ISPLIT( * ), IWORK( * ) REAL D( * ), E( * ), GAP( * ), W( * ), WORK( * ) COMPLEX Z( * ) * .. * * Purpose * ======= * * PCSTEIN computes the eigenvectors of a symmetric tridiagonal matrix * in parallel, using inverse iteration. The eigenvectors found * correspond to user specified eigenvalues. PCSTEIN does not * orthogonalize vectors that are on different processes. The extent * of orthogonalization is controlled by the input parameter LWORK. * Eigenvectors that are to be orthogonalized are computed by the same * process. PCSTEIN decides on the allocation of work among the * processes and then calls SSTEIN2 (modified LAPACK routine) on each * individual process. If insufficient workspace is allocated, the * expected orthogonalization may not be done. * * Note : If the eigenvectors obtained are not orthogonal, increase * LWORK and run the code again. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * P = NPROW * NPCOL is the total number of processes * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input) REAL array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (global input) REAL array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * * M (global input) INTEGER * The total number of eigenvectors to be found. 0 <= M <= N. * * W (global input/global output) REAL array, dim (M) * On input, the first M elements of W contain all the * eigenvalues for which eigenvectors are to be computed. The * eigenvalues should be grouped by split-off block and ordered * from smallest to largest within the block (The output array * W from PSSTEBZ with ORDER='b' is expected here). This * array should be replicated on all processes. * On output, the first M elements contain the input * eigenvalues in ascending order. * * Note : To obtain orthogonal vectors, it is best if * eigenvalues are computed to highest accuracy ( this can be * done by setting ABSTOL to the underflow threshold = * SLAMCH('U') --- ABSTOL is an input parameter * to PSSTEBZ ) * * IBLOCK (global input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W -- 1 for eigenvalues belonging to the * first submatrix from the top, 2 for those belonging to * the second submatrix, etc. (The output array IBLOCK * from PSSTEBZ is expected here). * * ISPLIT (global input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N (The output array * ISPLIT from PSSTEBZ is expected here.) * * ORFAC (global input) REAL * ORFAC specifies which eigenvectors should be orthogonalized. * Eigenvectors that correspond to eigenvalues which are within * ORFAC*||T|| of each other are to be orthogonalized. * However, if the workspace is insufficient (see LWORK), this * tolerance may be decreased until all eigenvectors to be * orthogonalized can be stored in one process. * No orthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX array, * dimension (DESCZ(DLEN_), N/npcol + NB) * Z contains the computed eigenvectors associated with the * specified eigenvalues. Any vector which fails to converge is * set to its current iterate after MAXITS iterations ( See * SSTEIN2 ). * On output, Z is distributed across the P processes in block * cyclic format. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local workspace/global output) REAL array, * dimension ( LWORK ) * On output, WORK(1) gives a lower bound on the * workspace ( LWORK ) that guarantees the user desired * orthogonalization (see ORFAC). * Note that this may overestimate the minimum workspace needed. * * LWORK (local input) integer * LWORK controls the extent of orthogonalization which can be * done. The number of eigenvectors for which storage is * allocated on each process is * NVEC = floor(( LWORK- max(5*N,NP00*MQ00) )/N). * Eigenvectors corresponding to eigenvalue clusters of size * NVEC - ceil(M/P) + 1 are guaranteed to be orthogonal ( the * orthogonality is similar to that obtained from CSTEIN2). * Note : LWORK must be no smaller than: * max(5*N,NP00*MQ00) + ceil(M/P)*N, * and should have the same input value on all processes. * It is the minimum value of LWORK input on different processes * that is significant. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/global output) INTEGER array, * dimension ( 3*N+P+1 ) * On return, IWORK(1) contains the amount of integer workspace * required. * On return, the IWORK(2) through IWORK(P+2) indicate * the eigenvectors computed by each process. Process I computes * eigenvectors indexed IWORK(I+2)+1 thru' IWORK(I+3). * * LIWORK (local input) INTEGER * Size of array IWORK. Must be >= 3*N + P + 1 * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) integer array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after MAXITS * iterations (as in CSTEIN), then INFO > 0 is returned. * If mod(INFO,M+1)>0, then * for I=1 to mod(INFO,M+1), the eigenvector * corresponding to the eigenvalue W(IFAIL(I)) failed to * converge ( W refers to the array of eigenvalues on output ). * * ICLUSTR (global output) integer array, dimension (2*P) * This output array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace (see LWORK, * ORFAC and INFO). Eigenvectors corresponding to clusters of * eigenvalues indexed ICLUSTR(2*I-1) to ICLUSTR(2*I), I = 1 to * INFO/(M+1), could not be orthogonalized due to lack of * workspace. Hence the eigenvectors corresponding to these * clusters may not be orthogonal. ICLUSTR is a zero terminated * array --- ( ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0 ) * if and only if K is the number of clusters. * * GAP (global output) REAL array, dimension (P) * This output array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. The INFO/M output * values in this array correspond to the INFO/(M+1) clusters * indicated by the array ICLUSTR. As a result, the dot product * between eigenvectors corresponding to the I^th cluster may be * as high as ( O(n)*macheps ) / GAP(I). * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * < 0 : if INFO = -I, the I-th argument had an illegal value * > 0 : if mod(INFO,M+1) = I, then I eigenvectors failed to * converge in MAXITS iterations. Their indices are * stored in the array IFAIL. * if INFO/(M+1) = I, then eigenvectors corresponding to * I clusters of eigenvalues could not be orthogonalized * due to insufficient workspace. The indices of the * clusters are stored in the array ICLUSTR. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, IGEBR2D, $ IGEBS2D, PCHK1MAT, PCLAEVSWP, PXERBLA, SGEBR2D, $ SGEBS2D, SLASRT2, SSTEIN2 * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, NEGONE, ODM1, FIVE, ODM3, ODM18 PARAMETER ( ZERO = 0.0E+0, NEGONE = -1.0E+0, $ ODM1 = 1.0E-1, FIVE = 5.0E+0, ODM3 = 1.0E-3, $ ODM18 = 1.0E-18 ) * .. * .. Local Scalars .. LOGICAL LQUERY, SORTED INTEGER B1, BN, BNDRY, CLSIZ, COL, I, IFIRST, IINFO, $ ILAST, IM, INDRW, ITMP, J, K, LGCLSIZ, LLWORK, $ LOAD, LOCINFO, MAXVEC, MQ00, MYCOL, MYROW, $ NBLK, NERR, NEXT, NP00, NPCOL, NPROW, NVS, $ OLNBLK, P, ROW, SELF, TILL, TOTERR REAL DIFF, MINGAP, ONENRM, ORGFAC, ORTOL, TMPFAC * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1200+CTXT_ ) ELSE * * Make sure that NPROW>0 and NPCOL>0 before calling NUMROC * CALL CHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN * * Now we know that our context is good enough to * perform the rest of the checks * NP00 = NUMROC( N, DESCZ( MB_ ), 0, 0, NPROW ) MQ00 = NUMROC( M, DESCZ( NB_ ), 0, 0, NPCOL ) P = NPROW*NPCOL * * Compute the maximum number of vectors per process * LLWORK = LWORK CALL IGAMN2D( DESCZ( CTXT_ ), 'A', ' ', 1, 1, LLWORK, 1, 1, $ 1, -1, -1, -1 ) INDRW = MAX( 5*N, NP00*MQ00 ) IF( N.NE.0 ) $ MAXVEC = ( LLWORK-INDRW ) / N LOAD = ICEIL( M, P ) IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN TMPFAC = ORFAC CALL SGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1 ) ELSE CALL SGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1, 0, 0 ) END IF * LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( MAXVEC.LT.LOAD .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.3*N+P+1 .AND. .NOT.LQUERY ) THEN INFO = -16 ELSE DO 10 I = 2, M IF( IBLOCK( I ).LT.IBLOCK( I-1 ) ) THEN INFO = -6 GO TO 20 END IF IF( IBLOCK( I ).EQ.IBLOCK( I-1 ) .AND. W( I ).LT. $ W( I-1 ) ) THEN INFO = -5 GO TO 20 END IF 10 CONTINUE 20 CONTINUE IF( INFO.EQ.0 ) THEN IF( ABS( TMPFAC-ORFAC ).GT.FIVE*ABS( TMPFAC ) ) $ INFO = -8 END IF END IF * END IF IDUM1( 1 ) = M IDUM2( 1 ) = 4 CALL PCHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, 1, IDUM1, IDUM2, $ INFO ) WORK( 1 ) = REAL( MAX( 5*N, NP00*MQ00 )+ICEIL( M, P )*N ) IWORK( 1 ) = 3*N + P + 1 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PCSTEIN', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN RETURN END IF * DO 30 I = 1, M IFAIL( I ) = 0 30 CONTINUE DO 40 I = 1, P + 1 IWORK( I ) = 0 40 CONTINUE DO 50 I = 1, P GAP( I ) = NEGONE ICLUSTR( 2*I-1 ) = 0 ICLUSTR( 2*I ) = 0 50 CONTINUE * * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC ELSE TMPFAC = ODM3 END IF ORGFAC = TMPFAC * * Allocate the work among the processes * ILAST = M / LOAD IF( MOD( M, LOAD ).EQ.0 ) $ ILAST = ILAST - 1 OLNBLK = -1 NVS = 0 NEXT = 1 IM = 0 ONENRM = ZERO DO 100 I = 0, ILAST - 1 NEXT = NEXT + LOAD J = NEXT - 1 IF( J.GT.NVS ) THEN NBLK = IBLOCK( NEXT ) IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN * * Compute orthogonalization criterion * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 60 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 60 CONTINUE OLNBLK = NBLK END IF TILL = NVS + MAXVEC 70 CONTINUE J = NEXT - 1 IF( TMPFAC.GT.ODM18 ) THEN ORTOL = TMPFAC*ONENRM DO 80 J = NEXT - 1, MIN( TILL, M-1 ) IF( IBLOCK( J+1 ).NE.IBLOCK( J ) .OR. W( J+1 )- $ W( J ).GE.ORTOL ) THEN GO TO 90 END IF 80 CONTINUE IF( J.EQ.M .AND. TILL.GE.M ) $ GO TO 90 TMPFAC = TMPFAC*ODM1 GO TO 70 END IF 90 CONTINUE J = MIN( J, TILL ) END IF IF( SELF.EQ.I ) $ IM = MAX( 0, J-NVS ) * IWORK( I+1 ) = NVS NVS = MAX( J, NVS ) 100 CONTINUE IF( SELF.EQ.ILAST ) $ IM = M - NVS IWORK( ILAST+1 ) = NVS DO 110 I = ILAST + 2, P + 1 IWORK( I ) = M 110 CONTINUE * CLSIZ = 1 LGCLSIZ = 1 ILAST = 0 NBLK = 0 BNDRY = 2 K = 1 DO 140 I = 1, M IF( IBLOCK( I ).NE.NBLK ) THEN NBLK = IBLOCK( I ) IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 120 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 120 CONTINUE * END IF IF( I.GT.1 ) THEN DIFF = W( I ) - W( I-1 ) IF( IBLOCK( I ).NE.IBLOCK( I-1 ) .OR. I.EQ.M .OR. DIFF.GT. $ ORGFAC*ONENRM ) THEN IFIRST = ILAST IF( I.EQ.M ) THEN IF( IBLOCK( M ).NE.IBLOCK( M-1 ) .OR. DIFF.GT.ORGFAC* $ ONENRM ) THEN ILAST = M - 1 ELSE ILAST = M END IF ELSE ILAST = I - 1 END IF CLSIZ = ILAST - IFIRST IF( CLSIZ.GT.1 ) THEN IF( LGCLSIZ.LT.CLSIZ ) $ LGCLSIZ = CLSIZ MINGAP = ONENRM 130 CONTINUE IF( BNDRY.GT.P+1 ) $ GO TO 150 IF( IWORK( BNDRY ).GT.IFIRST .AND. IWORK( BNDRY ).LT. $ ILAST ) THEN MINGAP = MIN( W( IWORK( BNDRY )+1 )- $ W( IWORK( BNDRY ) ), MINGAP ) ELSE IF( IWORK( BNDRY ).GE.ILAST ) THEN IF( MINGAP.LT.ONENRM ) THEN ICLUSTR( 2*K-1 ) = IFIRST + 1 ICLUSTR( 2*K ) = ILAST GAP( K ) = MINGAP / ONENRM K = K + 1 END IF GO TO 140 END IF BNDRY = BNDRY + 1 GO TO 130 END IF END IF END IF 140 CONTINUE 150 CONTINUE INFO = ( K-1 )*( M+1 ) * * Call SSTEIN2 to find the eigenvectors * CALL SSTEIN2( N, D, E, IM, W( IWORK( SELF+1 )+1 ), $ IBLOCK( IWORK( SELF+1 )+1 ), ISPLIT, ORGFAC, $ WORK( INDRW+1 ), N, WORK, IWORK( P+2 ), $ IFAIL( IWORK( SELF+1 )+1 ), LOCINFO ) * * Redistribute the eigenvector matrix to conform with the block * cyclic distribution of the input matrix * * DO 160 I = 1, M IWORK( P+1+I ) = I 160 CONTINUE * CALL SLASRT2( 'I', M, W, IWORK( P+2 ), IINFO ) * DO 170 I = 1, M IWORK( M+P+1+IWORK( P+1+I ) ) = I 170 CONTINUE * * DO 180 I = 1, LOCINFO ITMP = IWORK( SELF+1 ) + I IFAIL( ITMP ) = IFAIL( ITMP ) + ITMP - I IFAIL( ITMP ) = IWORK( M+P+1+IFAIL( ITMP ) ) 180 CONTINUE * DO 190 I = 1, K - 1 ICLUSTR( 2*I-1 ) = IWORK( M+P+1+ICLUSTR( 2*I-1 ) ) ICLUSTR( 2*I ) = IWORK( M+P+1+ICLUSTR( 2*I ) ) 190 CONTINUE * * * Still need to apply the above permutation to IFAIL * * TOTERR = 0 DO 210 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, LOCINFO, 1 ) IF( LOCINFO.NE.0 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', LOCINFO, 1, $ IFAIL( IWORK( I )+1 ), LOCINFO ) DO 200 J = 1, LOCINFO IFAIL( TOTERR+J ) = IFAIL( IWORK( I )+J ) 200 CONTINUE TOTERR = TOTERR + LOCINFO END IF ELSE * ROW = ( I-1 ) / NPCOL COL = MOD( I-1, NPCOL ) * CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, NERR, 1, $ ROW, COL ) IF( NERR.NE.0 ) THEN CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', NERR, 1, $ IFAIL( TOTERR+1 ), NERR, ROW, COL ) TOTERR = TOTERR + NERR END IF END IF 210 CONTINUE INFO = INFO + TOTERR * * CALL PCLAEVSWP( N, WORK( INDRW+1 ), N, Z, IZ, JZ, DESCZ, IWORK, $ IWORK( M+P+2 ), WORK, INDRW ) * DO 220 I = 2, P IWORK( I ) = IWORK( M+P+1+IWORK( I ) ) 220 CONTINUE * * * Sort the IWORK array * * 230 CONTINUE SORTED = .TRUE. DO 240 I = 2, P - 1 IF( IWORK( I ).GT.IWORK( I+1 ) ) THEN ITMP = IWORK( I+1 ) IWORK( I+1 ) = IWORK( I ) IWORK( I ) = ITMP SORTED = .FALSE. END IF 240 CONTINUE IF( .NOT.SORTED ) $ GO TO 230 * DO 250 I = P + 1, 1, -1 IWORK( I+1 ) = IWORK( I ) 250 CONTINUE * WORK( 1 ) = ( LGCLSIZ+LOAD-1 )*N + INDRW IWORK( 1 ) = 3*N + P + 1 * * End of PCSTEIN * END scalapack-1.8.0/SRC/pslase2.f0000640000175000017500000003735210363532303015475 0ustar muammarmuammar SUBROUTINE PSLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASE2 initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. PSLASE2 requires that only dimension of the matrix * operand is distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) REAL * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) REAL * The constant to which the diagonal elements are to be set. * * A (local output) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, ICOFFA, II, IIA, $ IIBEG, IIEND, IINXT, ILEFT, IRIGHT, IROFFA, $ ITOP, JJ, JJA, JJBEG, JJEND, JJNXT, LDA, MBA, $ MP, MPA, MYCOL, MYDIST, MYROW, NBA, NPCOL, $ NPROW, NQ, NQA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, SLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MB_A * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MPA.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEG = IIA IIEND = IIA + MPA - 1 IINXT = MIN( ICEIL( IIBEG, MBA ) * MBA, IIEND ) * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL SLASET( UPLO, IINXT-IIBEG+1, N-ITOP, ALPHA, BETA, $ A( IIBEG+(JJA+ITOP-1)*LDA ), LDA ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEG = IINXT +1 IINXT = MIN( IINXT+MBA, IIEND ) GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL SLASET( 'All', MP, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL SLASET( UPLO, MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * II = IIA JJ = JJA MP = MPA IBASE = MIN( ITOP+MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 30 CONTINUE IF( JJ.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL SLASET( 'All', MPA, ITOP-JJ+JJA, ALPHA, ALPHA, $ A( IIA+(JJ-1)*LDA ), LDA ) CALL SLASET( 'All', MPA-MP, HEIGHT, ALPHA, ALPHA, $ A( IIA+(JJA+ITOP-1)*LDA ), LDA ) CALL SLASET( 'All', MP, HEIGHT, ALPHA, BETA, $ A( II+(JJA+ITOP-1)*LDA ), LDA ) MP = MAX( 0, MP - HEIGHT ) II = II + HEIGHT JJ = JJA + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 30 END IF * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQA.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEG = JJA JJEND = JJA + NQA - 1 JJNXT = MIN( ICEIL( JJBEG, NBA ) * NBA, JJEND ) * 40 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL SLASET( UPLO, M-ILEFT, JJNXT-JJBEG+1, ALPHA, $ BETA, A( IIA+ILEFT+(JJBEG-1)*LDA ), LDA ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBA, JJEND ) GO TO 40 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 50 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL SLASET( 'All', ILEFT-II+IIA, NQ, ALPHA, ALPHA, $ A( II+(JJ-1)*LDA ), LDA ) CALL SLASET( UPLO, WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 50 END IF * ELSE * II = IIA JJ = JJA NQ = NQA IRIGHT = MIN( ILEFT+NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 60 CONTINUE IF( II.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL SLASET( 'All', ILEFT-II+IIA, NQA, ALPHA, ALPHA, $ A( II+(JJA-1)*LDA ), LDA ) CALL SLASET( 'All', WIDE, NQA-NQ, ALPHA, ALPHA, $ A( IIA+ILEFT+(JJA-1)*LDA ), LDA ) CALL SLASET( 'All', WIDE, NQ, ALPHA, BETA, $ A( IIA+ILEFT+(JJ-1)*LDA ), LDA ) NQ = MAX( 0, NQ - WIDE ) II = IIA + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 60 END IF * END IF * END IF * END IF * RETURN * * End of PSLASE2 * END scalapack-1.8.0/SRC/pztrcon.f0000640000175000017500000004044610363532303015621 0ustar muammarmuammar SUBROUTINE PZTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LRWORK, LWORK, N DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZTRCON estimates the reciprocal of the condition number of a * triangular distributed matrix A(IA:IA+N-1,JA:JA+N-1), in either the * 1-norm or the infinity-norm. * * The norm of A(IA:IA+N-1,JA:JA+N-1) is computed and an estimate is * obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), then the reciprocal * of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies whether the 1-norm condition number or the * infinity-norm condition number is required: * = '1' or 'O': 1-norm; * = 'I': Infinity-norm. * * UPLO (global input) CHARACTER * = 'U': A(IA:IA+N-1,JA:JA+N-1) is upper triangular; * = 'L': A(IA:IA+N-1,JA:JA+N-1) is lower triangular. * * DIAG (global input) CHARACTER * = 'N': A(IA:IA+N-1,JA:JA+N-1) is non-unit triangular; * = 'U': A(IA:IA+N-1,JA:JA+N-1) is unit triangular. * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the triangular distributed * matrix A(IA:IA+N-1,JA:JA+N-1). If UPLO = 'U', the leading * N-by-N upper triangular part of this distributed matrix con- * tains the upper triangular matrix, and its strictly lower * triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of this ditributed * matrix contains the lower triangular matrix, and the strictly * upper triangular part is not referenced. If DIAG = 'U', the * diagonal elements of A(IA:IA+N-1,JA:JA+N-1) are also not * referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*CEIL(P-1,Q),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(Q-1,P)) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, NOUNIT, ONENRM, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPN, IPV, IPW, $ IPX, IROFF, IV, IX, IXX, JJA, JV, JX, KASE, $ KASE1, LRWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, $ NPMOD, NPROW, NQMOD DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM COMPLEX*16 WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 5 ), $ IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PCHK1MAT, $ PZAMAX, PZLATRS, PZLACON, PZDRSCL, $ ZGEBR2D, ZGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PZLANTR EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH, $ PZLANTR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, ICHAR, MAX, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) NOUNIT = LSAME( DIAG, 'N' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LRWMIN = NQMOD RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -13 END IF END IF * IF( ONENRM ) THEN IDUM1( 1 ) = ICHAR( '1' ) ELSE IDUM1( 1 ) = ICHAR( 'I' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'U' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 11 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 13 CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 5, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTRCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * RCOND = ZERO SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' )*DBLE( MAX( 1, N ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPN = 1 * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Compute the norm of the triangular matrix A. * ANORM = PZLANTR( NORM, UPLO, DIAG, N, N, A, IA, JA, DESCA, RWORK ) * * Continue only if ANORM > 0. * IF( ANORM.GT.ZERO ) THEN * * Estimate the norm of the inverse of A. * AINVNM = ZERO NORMIN = 'N' IF( ONENRM ) THEN KASE1 = 1 ELSE KASE1 = 2 END IF KASE = 0 10 CONTINUE CALL PZLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), $ IX, JX, DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( KASE.EQ.KASE1 ) THEN * * Multiply by inv(A). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( UPLO, 'No transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, RWORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(A'). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SCALE, RWORK( IPN ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF NORMIN = 'Y' * * Multiply by 1/SCALE if doing so will not cause overflow. * IF( SCALE.NE.ONE ) THEN CALL PZAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, $ DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL ZGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1 ) ELSE CALL ZGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, $ 1, IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PZDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / ANORM ) / AINVNM END IF * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PZTRCON * END scalapack-1.8.0/SRC/pzdttrs.f0000640000175000017500000006733210363532303015637 0ustar muammarmuammar SUBROUTINE PZDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PZDTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PZDTTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal diagonally dominant-like distributed * matrix. * * Routine PZDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDTTRF and this is stored in AF. If a linear system * is to be solved using PZDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, $ RETURN_CODE, STORE_M_B, STORE_N_A, TEMP, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ PZDTTRSV, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC COMPLEX*16 ZDOTC EXTERNAL LSAME, NUMROC, ZDOTC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP .EQ. 502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 ENDIF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -3 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZDTTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDTTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ 10*NPCOL+4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PZDTTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB(5) PARAM_CHECK( 14, 1 ) = DESCB(4) PARAM_CHECK( 13, 1 ) = DESCB(3) PARAM_CHECK( 12, 1 ) = DESCB(2) PARAM_CHECK( 11, 1 ) = DESCB(1) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA(5) PARAM_CHECK( 8, 1 ) = DESCA(4) PARAM_CHECK( 7, 1 ) = DESCA(3) PARAM_CHECK( 6, 1 ) = DESCA(1) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 15, 2 ) = 1105 PARAM_CHECK( 14, 2 ) = 1104 PARAM_CHECK( 13, 2 ) = 1103 PARAM_CHECK( 12, 2 ) = 1102 PARAM_CHECK( 11, 2 ) = 1101 PARAM_CHECK( 10, 2 ) = 10 PARAM_CHECK( 9, 2 ) = 805 PARAM_CHECK( 8, 2 ) = 804 PARAM_CHECK( 7, 2 ) = 803 PARAM_CHECK( 6, 2 ) = 801 PARAM_CHECK( 5, 2 ) = 7 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PZDTTRSV( 'L', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PZDTTRSV( 'U', 'C', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( TRANS, 'C' ) ) THEN * CALL PZDTTRSV( 'L', 'C', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PZDTTRSV( 'U', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZDTTRS * END scalapack-1.8.0/SRC/psposv.f0000640000175000017500000002431110363532303015445 0ustar muammarmuammar SUBROUTINE PSPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSPOSV computes the solution to a real system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is an N-by-N * symmetric distributed positive definite matrix and X and sub( B ) * denoting B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS distributed * matrices. * * The Cholesky decomposition is used to factor sub( A ) as * * sub( A ) = U**T * U, if UPLO = 'U', or * * sub( A ) = L * L**T, if UPLO = 'L', * * where U is an upper triangular matrix and L is a lower triangular * matrix. The factored form of sub( A ) is then used to solve the * system of equations. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if INFO = 0, this array contains the * local pieces of the factor U or L from the Cholesky factori- * zation sub( A ) = U**T*U or L*L**T. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_B,LOC(JB+NRHS-1)). * On entry, the local pieces of the right hand sides distribu- * ted matrix sub( B ). On exit, if INFO = 0, sub( B ) is over- * written with the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed, and the * solution has not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSPOTRF, $ PSPOTRS, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1000+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSPOSV', -INFO ) RETURN END IF * * Compute the Cholesky factorization sub( A ) = U'*U or L*L'. * CALL PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ) overwriting sub( B ) * with X. * CALL PSPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * END IF * RETURN * * End of PSPOSV * END scalapack-1.8.0/SRC/psgebd2.f0000640000175000017500000004246610363532303015454 0ustar muammarmuammar SUBROUTINE PSGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAUP( * ), TAUQ( * ), $ WORK( * ) * .. * * Purpose * ======= * * PSGEBD2 reduces a real general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an orthogonal transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * orthogonal matrix Q as a product of elementary reflectors, * and the elements above the first superdiagonal, with the * array TAUP, represent the orthogonal matrix P as a product * of elementary reflectors. If M < N, the diagonal and the * first subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the orthogonal matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) REAL array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix Q. TAUQ * is tied to the distributed matrix A. See Further Details. * * TAUP (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the orthogonal matrix P. TAUP * is tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MpA0, NqA0 ) * * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ) * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+IROFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are real scalars, and v and u are real vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, II, IROFFA, J, $ JJ, K, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 REAL ALPHA * .. * .. Local Arrays .. INTEGER DESCD( DLEN_ ), DESCE( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DESCSET, $ INFOG2L, PSLARF, PSLARFG, PSELSET, $ PXERBLA, SGEBR2D, SGEBS2D, SLARFG * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Test the input parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) LWMIN = MAX( MPA0, NQA0 ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( M.EQ.1 .AND. N.EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN I = II+(JJ-1)*DESCA( LLD_ ) CALL SLARFG( 1, A( I ), A( I ), 1, TAUQ( JJ ) ) D( JJ ) = A( I ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1 ) CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1 ) ELSE CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, D( JJ ), $ 1, IAROW, IACOL ) CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAUQ( JJ ), $ 1, IAROW, IACOL ) END IF END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO RETURN END IF * ALPHA = ZERO * IF( M.GE.N ) THEN * * Reduce to upper bidiagonal form * CALL DESCSET( DESCD, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) CALL DESCSET( DESCE, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) DO 10 K = 1, N I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector H(j) to annihilate * A(ia+i:ia+m-1,j) * CALL PSLARFG( M-K+1, ALPHA, I, J, A, MIN( I+1, M+IA-1 ), $ J, DESCA, 1, TAUQ ) CALL PSELSET( D, 1, J, DESCD, ALPHA ) CALL PSELSET( A, I, J, DESCA, ONE ) * * Apply H(i) to A(i:ia+m-1,i+1:ja+n-1) from the left * CALL PSLARF( 'Left', M-K+1, N-K, A, I, J, DESCA, 1, TAUQ, A, $ I, J+1, DESCA, WORK ) CALL PSELSET( A, I, J, DESCA, ALPHA ) * IF( K.LT.N ) THEN * * Generate elementary reflector G(i) to annihilate * A(i,ja+j+1:ja+n-1) * CALL PSLARFG( N-K, ALPHA, I, J+1, A, I, $ MIN( J+2, JA+N-1 ), DESCA, DESCA( M_ ), $ TAUP ) CALL PSELSET( E, I, 1, DESCE, ALPHA ) CALL PSELSET( A, I, J+1, DESCA, ONE ) * * Apply G(i) to A(i+1:ia+m-1,i+1:ja+n-1) from the right * CALL PSLARF( 'Right', M-K, N-K, A, I, J+1, DESCA, $ DESCA( M_ ), TAUP, A, I+1, J+1, DESCA, $ WORK ) CALL PSELSET( A, I, J+1, DESCA, ALPHA ) ELSE CALL PSELSET( TAUP, I, 1, DESCE, ZERO ) END IF 10 CONTINUE * ELSE * * Reduce to lower bidiagonal form * CALL DESCSET( DESCD, IA+MIN(M,N)-1, 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, DESCA( CTXT_ ), $ DESCA( LLD_ ) ) CALL DESCSET( DESCE, 1, JA+MIN(M,N)-1, 1, DESCA( NB_ ), MYROW, $ DESCA( CSRC_ ), DESCA( CTXT_ ), 1 ) DO 20 K = 1, M I = IA + K - 1 J = JA + K - 1 * * Generate elementary reflector G(i) to annihilate * A(i,ja+j:ja+n-1) * CALL PSLARFG( N-K+1, ALPHA, I, J, A, I, $ MIN( J+1, JA+N-1 ), DESCA, DESCA( M_ ), TAUP ) CALL PSELSET( D, I, 1, DESCD, ALPHA ) CALL PSELSET( A, I, J, DESCA, ONE ) * * Apply G(i) to A(i:ia+m-1,j:ja+n-1) from the right * CALL PSLARF( 'Right', M-K, N-K+1, A, I, J, DESCA, $ DESCA( M_ ), TAUP, A, MIN( I+1, IA+M-1 ), J, $ DESCA, WORK ) CALL PSELSET( A, I, J, DESCA, ALPHA ) * IF( K.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:ia+m-1,j) * CALL PSLARFG( M-K, ALPHA, I+1, J, A, $ MIN( I+2, IA+M-1 ), J, DESCA, 1, TAUQ ) CALL PSELSET( E, 1, J, DESCE, ALPHA ) CALL PSELSET( A, I+1, J, DESCA, ONE ) * * Apply H(i) to A(i+1:ia+m-1,j+1:ja+n-1) from the left * CALL PSLARF( 'Left', M-K, N-K, A, I+1, J, DESCA, 1, TAUQ, $ A, I+1, J+1, DESCA, WORK ) CALL PSELSET( A, I+1, J, DESCA, ALPHA ) ELSE CALL PSELSET( TAUQ, 1, J, DESCE, ZERO ) END IF 20 CONTINUE END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEBD2 * END scalapack-1.8.0/SRC/pdlaed1.f0000640000175000017500000002253410363532303015432 0ustar muammarmuammar SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, $ IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER ID, INFO, IQ, JQ, N, N1 DOUBLE PRECISION RHO * .. * .. Array Arguments .. INTEGER DESCQ( * ), IWORK( * ) DOUBLE PRECISION D( * ), Q( * ), WORK( * ) * .. * * Purpose * ======= * * PDLAED1 computes the updated eigensystem of a diagonal * matrix after modification by a rank-one symmetric matrix, * in parallel. * * T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) * * where Z = Q'u, u is a vector of length N with ones in the * N1 and N1 + 1 th elements and zeros elsewhere. * * The eigenvectors of the original matrix are stored in Q, and the * eigenvalues are in D. The algorithm consists of three stages: * * The first stage consists of deflating the size of the problem * when there are multiple eigenvalues or if there is a zero in * the Z vector. For each such occurence the dimension of the * secular equation problem is reduced by one. This stage is * performed by the routine PDLAED2. * * The second stage consists of calculating the updated * eigenvalues. This is done by finding the roots of the secular * equation via the routine SLAED4 (as called by PDLAED3). * This routine also calculates the eigenvectors of the current * problem. * * The final stage consists of computing the updated eigenvectors * directly using the updated eigenvalues. The eigenvectors for * the current problem are multiplied with the eigenvectors from * the overall problem. * * Arguments * ========= * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * * N1 (input) INTEGER * The location of the last eigenvalue in the leading * sub-matrix. * min(1,N) <= N1 <= N. * * D (global input/output) DOUBLE PRECISION array, dimension (N) * On entry,the eigenvalues of the rank-1-perturbed matrix. * On exit, the eigenvalues of the repaired matrix. * * ID (global input) INTEGER * Q's global row/col index, which points to the beginning * of the submatrix which is to be operated on. * * Q (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Q, LOCc(JQ+N-1)) * Q contains the orthonormal eigenvectors of the symmetric * tridiagonal matrix. * * IQ (global input) INTEGER * Q's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JQ (global input) INTEGER * Q's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCQ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * RHO (input) DOUBLE PRECISION * The subdiagonal entry used to create the rank-1 modification. * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension 6*N + 2*NP*NQ * * IWORK (local workspace/output) INTEGER array, * dimension 7*N + 8*NPCOL + 2 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: The algorithm failed to compute the ith eigenvalue. * * ===================================================================== * * .. Parameters .. * INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER COL, COLTYP, IBUF, ICTOT, ICTXT, IDLMDA, IIQ, $ INDCOL, INDROW, INDX, INDXC, INDXP, INDXR, INQ, $ IPQ, IPQ2, IPSM, IPU, IPWORK, IQ1, IQ2, IQCOL, $ IQQ, IQROW, IW, IZ, J, JC, JJ2C, JJC, JJQ, JNQ, $ K, LDQ, LDQ2, LDU, MYCOL, MYROW, NB, NN, NN1, $ NN2, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCQ2( DLEN_ ), DESCU( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DESCINIT, INFOG1L, $ INFOG2L, PDGEMM, PDLAED2, PDLAED3, PDLAEDZ, $ PDLASET, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE IF( N.LT.0 ) THEN INFO = -1 ELSE IF( ID.GT.DESCQ( N_ ) ) THEN INFO = -4 ELSE IF( N1.GE.N ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED1', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * The following values are integer pointers which indicate * the portion of the workspace used by a particular array * in PDLAED2 and PDLAED3. * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) LDQ = DESCQ( LLD_ ) * CALL INFOG2L( IQ-1+ID, JQ-1+ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, $ IIQ, JJQ, IQROW, IQCOL ) * NP = NUMROC( N, DESCQ( MB_ ), MYROW, IQROW, NPROW ) NQ = NUMROC( N, DESCQ( NB_ ), MYCOL, IQCOL, NPCOL ) * LDQ2 = MAX( NP, 1 ) LDU = LDQ2 * IZ = 1 IDLMDA = IZ + N IW = IDLMDA + N IPQ2 = IW + N IPU = IPQ2 + LDQ2*NQ IBUF = IPU + LDU*NQ * (IBUF est de taille 3*N au maximum) * ICTOT = 1 IPSM = ICTOT + NPCOL*4 INDX = IPSM + NPCOL*4 INDXC = INDX + N INDXP = INDXC + N INDCOL = INDXP + N COLTYP = INDCOL + N INDROW = COLTYP + N INDXR = INDROW + N * CALL DESCINIT( DESCQ2, N, N, NB, NB, IQROW, IQCOL, ICTXT, LDQ2, $ INFO ) CALL DESCINIT( DESCU, N, N, NB, NB, IQROW, IQCOL, ICTXT, LDU, $ INFO ) * * Form the z-vector which consists of the last row of Q_1 and the * first row of Q_2. * IPWORK = IDLMDA CALL PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, WORK( IZ ), $ WORK( IPWORK ) ) * * Deflate eigenvalues. * IPQ = IIQ + ( JJQ-1 )*LDQ CALL PDLAED2( ICTXT, K, N, N1, NB, D, IQROW, IQCOL, Q( IPQ ), LDQ, $ RHO, WORK( IZ ), WORK( IW ), WORK( IDLMDA ), $ WORK( IPQ2 ), LDQ2, WORK( IBUF ), IWORK( ICTOT ), $ IWORK( IPSM ), NPCOL, IWORK( INDX ), IWORK( INDXC ), $ IWORK( INDXP ), IWORK( INDCOL ), IWORK( COLTYP ), $ NN, NN1, NN2, IQ1, IQ2 ) * * * Solve Secular Equation. * IF( K.NE.0 ) THEN CALL PDLASET( 'A', N, N, ZERO, ONE, WORK( IPU ), 1, 1, DESCU ) CALL PDLAED3( ICTXT, K, N, NB, D, IQROW, IQCOL, RHO, $ WORK( IDLMDA ), WORK( IW ), WORK( IZ ), $ WORK( IPU ), LDQ2, WORK( IBUF ), IWORK( INDX ), $ IWORK( INDCOL ), IWORK( INDROW ), IWORK( INDXR ), $ IWORK( INDXC ), IWORK( ICTOT ), NPCOL, INFO ) * * Compute the updated eigenvectors. * IQQ = MIN( IQ1, IQ2 ) IF( NN1.GT.0 ) THEN INQ = IQ - 1 + ID JNQ = JQ - 1 + ID + IQQ - 1 CALL PDGEMM( 'N', 'N', N1, NN, NN1, ONE, WORK( IPQ2 ), 1, $ IQ1, DESCQ2, WORK( IPU ), IQ1, IQQ, DESCU, $ ZERO, Q, INQ, JNQ, DESCQ ) END IF IF( NN2.GT.0 ) THEN INQ = IQ - 1 + ID + N1 JNQ = JQ - 1 + ID + IQQ - 1 CALL PDGEMM( 'N', 'N', N-N1, NN, NN2, ONE, WORK( IPQ2 ), $ N1+1, IQ2, DESCQ2, WORK( IPU ), IQ2, IQQ, $ DESCU, ZERO, Q, INQ, JNQ, DESCQ ) END IF * DO 10 J = K + 1, N JC = IWORK( INDX+J-1 ) CALL INFOG1L( JQ-1+JC, NB, NPCOL, MYCOL, IQCOL, JJC, COL ) CALL INFOG1L( JC, NB, NPCOL, MYCOL, IQCOL, JJ2C, COL ) IF( MYCOL.EQ.COL ) THEN IQ2 = IPQ2 + ( JJ2C-1 )*LDQ2 INQ = IPQ + ( JJC-1 )*LDQ CALL DCOPY( NP, WORK( IQ2 ), 1, Q( INQ ), 1 ) END IF 10 CONTINUE END IF * 20 CONTINUE RETURN * * End of PDLAED1 * END scalapack-1.8.0/SRC/psgesvx.f0000640000175000017500000010452410363532303015617 0ustar muammarmuammar SUBROUTINE PSGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, IPIV, EQUED, R, C, B, IB, $ JB, DESCB, X, IX, JX, DESCX, RCOND, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, TRANS INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, $ LWORK, N, NRHS REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ), IWORK( * ) REAL A( * ), AF( * ), B( * ), BERR( * ), C( * ), $ FERR( * ), R( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PSGESVX uses the LU factorization to compute the solution to a real * system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * In the following description, A denotes A(IA:IA+N-1,JA:JA+N-1), * B denotes B(IB:IB+N-1,JB:JB+NRHS-1) and X denotes * X(IX:IX+N-1,JX:JX+NRHS-1). * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') * or diag(C)*B (if TRANS = 'T' or 'C'). * * 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if FACT = 'E') as * A = P * L * U, * where P is a permutation matrix, L is a unit lower triangular * matrix, and U is upper triangular. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If FACT = 'E' and equilibration was used, the matrix X is * premultiplied by diag(C) (if TRANS = 'N') or diag(R) (if * TRANS = 'T' or 'C') so that it solves the original system * before equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix * A(IA:IA+N-1,JA:JA+N-1) is supplied on entry, and if not, * whether the matrix A(IA:IA+N-1,JA:JA+N-1) should be * equilibrated before it is factored. * = 'F': On entry, AF(IAF:IAF+N-1,JAF:JAF+N-1) and IPIV con- * tain the factored form of A(IA:IA+N-1,JA:JA+N-1). * If EQUED is not 'N', the matrix * A(IA:IA+N-1,JA:JA+N-1) has been equilibrated with * scaling factors given by R and C. * A(IA:IA+N-1,JA:JA+N-1), AF(IAF:IAF+N-1,JAF:JAF+N-1), * and IPIV are not modified. * = 'N': The matrix A(IA:IA+N-1,JA:JA+N-1) will be copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * = 'E': The matrix A(IA:IA+N-1,JA:JA+N-1) will be equili- * brated if necessary, then copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': A(IA:IA+N-1,JA:JA+N-1) * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (No transpose) * = 'T': A(IA:IA+N-1,JA:JA+N-1)**T * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * = 'C': A(IA:IA+N-1,JA:JA+N-1)**H * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right-hand sides, i.e., the number of columns * of the distributed submatrices B(IB:IB+N-1,JB:JB+NRHS-1) and * X(IX:IX+N-1,JX:JX+NRHS-1). NRHS >= 0. * * A (local input/local output) REAL pointer into * the local memory to an array of local dimension * (LLD_A,LOCc(JA+N-1)). On entry, the N-by-N matrix * A(IA:IA+N-1,JA:JA+N-1). If FACT = 'F' and EQUED is not 'N', * then A(IA:IA+N-1,JA:JA+N-1) must have been equilibrated by * the scaling factors in R and/or C. A(IA:IA+N-1,JA:JA+N-1) is * not modified if FACT = 'F' or 'N', or if FACT = 'E' and * EQUED = 'N' on exit. * * On exit, if EQUED .ne. 'N', A(IA:IA+N-1,JA:JA+N-1) is scaled * as follows: * EQUED = 'R': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * EQUED = 'C': A(IA:IA+N-1,JA:JA+N-1) := * A(IA:IA+N-1,JA:JA+N-1) * diag(C) * EQUED = 'B': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) REAL pointer * into the local memory to an array of local dimension * (LLD_AF,LOCc(JA+N-1)). If FACT = 'F', then * AF(IAF:IAF+N-1,JAF:JAF+N-1) is an input argument and on * entry contains the factors L and U from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by PSGETRF. * If EQUED .ne. 'N', then AF is the factored form of the * equilibrated matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'N', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original * matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equili- * brated matrix A(IA:IA+N-1,JA:JA+N-1) (see the description of * A(IA:IA+N-1,JA:JA+N-1) for the form of the equilibrated * matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input or local output) INTEGER array, dimension * LOCr(M_A)+MB_A. If FACT = 'F', then IPIV is an input argu- * ment and on entry contains the pivot indices from the fac- * torization A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by * PSGETRF; IPIV(i) -> The global row local row i was * swapped with. This array must be aligned with * A( IA:IA+N-1, * ). * * If FACT = 'N', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original matrix * A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equilibrated matrix * A(IA:IA+N-1,JA:JA+N-1). * * EQUED (global input or global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'R': Row equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) has * been premultiplied by diag(R). * = 'C': Column equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) * has been postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., * A(IA:IA+N-1,JA:JA+N-1) has been replaced by * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * R (local input or local output) REAL array, * dimension LOCr(M_A). * The row scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'R' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the left by diag(R); if EQUED='N' or 'C', R is not acces- * sed. R is an input variable if FACT = 'F'; otherwise, R is * an output variable. * If FACT = 'F' and EQUED = 'R' or 'B', each element of R must * be positive. * R is replicated in every process column, and is aligned * with the distributed matrix A. * * C (local input or local output) REAL array, * dimension LOCc(N_A). * The column scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'C' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the right by diag(C); if EQUED = 'N' or 'R', C is not * accessed. C is an input variable if FACT = 'F'; otherwise, * C is an output variable. If FACT = 'F' and EQUED = 'C' or * 'B', each element of C must be positive. * C is replicated in every process row, and is aligned with * the distributed matrix A. * * B (local input/local output) REAL pointer * into the local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1) ). On entry, the N-by-NRHS right-hand * side matrix B(IB:IB+N-1,JB:JB+NRHS-1). On exit, if * EQUED = 'N', B(IB:IB+N-1,JB:JB+NRHS-1) is not modified; if * TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by * diag(R)*B(IB:IB+N-1,JB:JB+NRHS-1); if TRANS = 'T' or 'C' * and EQUED = 'C' or 'B', B(IB:IB+N-1,JB:JB+NRHS-1) is over- * written by diag(C)*B(IB:IB+N-1,JB:JB+NRHS-1). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) REAL pointer * into the local memory to an array of local dimension * (LLD_X, LOCc(JX+NRHS-1)). If INFO = 0, the N-by-NRHS * solution matrix X(IX:IX+N-1,JX:JX+NRHS-1) to the original * system of equations. Note that A(IA:IA+N-1,JA:JA+N-1) and * B(IB:IB+N-1,JB:JB+NRHS-1) are modified on exit if * EQUED .ne. 'N', and the solution to the equilibrated system * is inv(diag(C))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'N' * and EQUED = 'C' or 'B', or * inv(diag(R))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'T' or 'C' * and EQUED = 'R' or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) REAL * The estimate of the reciprocal condition number of the matrix * A(IA:IA+N-1,JA:JA+N-1) after equilibration (if done). If * RCOND is less than the machine precision (in particular, if * RCOND = 0), the matrix is singular to working precision. * This condition is indicated by a return code of INFO > 0. * * FERR (local output) REAL array, dimension LOCc(N_B) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix * X(IX:IX+N-1,JX:JX+NRHS-1). If XTRUE is the true solution, * FERR(j) bounds the magnitude of the largest entry in * (X(j) - XTRUE) divided by the magnitude of the largest entry * in X(j). The estimate is as reliable as the estimate for * RCOND, and is almost always a slight overestimate of the * true error. FERR is replicated in every process row, and is * aligned with the matrices B and X. * * BERR (local output) REAL array, dimension LOCc(N_B). * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A(IA:IA+N-1,JA:JA+N-1) or * B(IB:IB+N-1,JB:JB+NRHS-1) that makes X(j) an exact solution). * BERR is replicated in every process row, and is aligned * with the matrices B and X. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PSGECON( LWORK ), PSGERFS( LWORK ) ) * + LOCr( N_A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK = LOCr(N_A). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: U(IA+I-1,IA+I-1) is exactly zero. The * factorization has been completed, but the * factor U is exactly singular, so the solution * and error bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the * matrix is singular to working precision, and * the solution and error bounds have not been * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL COLEQU, EQUIL, LQUERY, NOFACT, NOTRAN, ROWEQU CHARACTER NORM INTEGER CONWRK, I, IACOL, IAROW, IAFROW, IBROW, IBCOL, $ ICOFFA, ICOFFB, ICOFFX, ICTXT, IDUMM, $ IIA, IIB, IIX, $ INFEQU, IROFFA, IROFFAF, IROFFB, $ IROFFX, IXCOL, IXROW, J, JJA, JJB, JJX, $ LCM, LCMQ, $ LIWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, $ NQ, NQB, NRHSQ, RFSWRK REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, $ ROWCND, SMLNUM * .. * .. Local Arrays .. INTEGER CDESC( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ INFOG2L, PSGECON, PSGEEQU, PSGERFS, $ PSGETRF, PSGETRS, PSLACPY, $ PSLAQGE, PSCOPY, PXERBLA, SGEBR2D, $ SGEBS2D, SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC REAL PSLAMCH, PSLANGE EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC, PSLANGE, $ PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IX, JX, DESCX, 24, INFO ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) NOTRAN = LSAME( TRANS, 'N' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' ROWEQU = .FALSE. COLEQU = .FALSE. ELSE ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA NQB = ICEIL( N+IROFFA, DESCA( NB_ )*NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL CONWRK = 2*NP + 2*NQ + MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQ + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) RFSWRK = 3*NP IF( LSAME( TRANS, 'N' ) ) THEN RFSWRK = RFSWRK + NP + NQ + $ ICEIL( NQB, LCMQ )*DESCA( NB_ ) ELSE IF( LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) ) THEN RFSWRK = RFSWRK + NP + NQ END IF LWMIN = MAX( CONWRK, RFSWRK ) WORK( 1 ) = REAL( LWMIN ) LIWMIN = NP IWORK( 1 ) = LIWMIN IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT. LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( ROWEQU ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 10 J = IIA, IIA + NP - 1 RCMIN = MIN( RCMIN, R( J ) ) RCMAX = MAX( RCMAX, R( J ) ) 10 CONTINUE CALL SGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN ROWCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE ROWCND = ONE END IF END IF IF( COLEQU .AND. INFO.EQ.0 ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 20 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 20 CONTINUE CALL SGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -15 ELSE IF( N.GT.0 ) THEN COLCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE COLCND = ONE END IF END IF END IF END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( DESCX( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2400+NB_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -29 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -31 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( TRANS ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 14 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 29 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 5, IDUM1, $ IDUM2, INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 29 IF( LIWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 4, IDUM1, $ IDUM2, INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGESVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PSGEEQU( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFEQU ) IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PSLAQGE( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) NP = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFB IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFB * IF( NOTRAN ) THEN IF( ROWEQU ) THEN DO 40 J = JJB, JJB+NRHSQ-1 DO 30 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = R( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 30 CONTINUE 40 CONTINUE END IF ELSE IF( COLEQU ) THEN * * Transpose the Column scale factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PSCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), WORK, IB, JB, $ DESCB, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IIB ), $ DESCB( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IIB ), $ DESCB( LLD_ ), MYROW, IBCOL ) END IF DO 60 J = JJB, JJB+NRHSQ-1 DO 50 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = WORK( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 50 CONTINUE 60 CONTINUE END IF * IF( NOFACT.OR.EQUIL ) THEN * * Compute the LU factorization of A. * CALL PSLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PSGETRF( N, N, AF, IAF, JAF, DESCAF, IPIV, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * IF( NOTRAN ) THEN NORM = '1' ELSE NORM = 'I' END IF ANORM = PSLANGE( NORM, N, N, A, IA, JA, DESCA, WORK ) * * Compute the reciprocal of the condition number of A. * CALL PSGECON( NORM, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PSLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PSLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PSGETRS( TRANS, N, NRHS, AF, IAF, JAF, DESCAF, IPIV, X, IX, $ JX, DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PSGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFX IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFX * IF( NOTRAN ) THEN IF( COLEQU ) THEN * * Transpose the column scaling factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PSCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), WORK, IX, $ JX, DESCX, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL SGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, $ WORK( IIX ), DESCX( LLD_ ) ) ELSE CALL SGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, $ WORK( IIX ), DESCX( LLD_ ), MYROW, IBCOL ) END IF * DO 80 J = JJX, JJX+NRHSQ-1 DO 70 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = WORK( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 70 CONTINUE 80 CONTINUE DO 90 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / COLCND 90 CONTINUE END IF ELSE IF( ROWEQU ) THEN DO 110 J = JJX, JJX+NRHSQ-1 DO 100 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = R( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 100 CONTINUE 110 CONTINUE DO 120 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / ROWCND 120 CONTINUE END IF * WORK( 1 ) = REAL( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PSGESVX * END scalapack-1.8.0/SRC/pbchkvect.c0000640000175000017500000002052110363532303016060 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- PBLAS routine (version 1.5) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 17, 1995 * * --------------------------------------------------------------------- */ /* * Include files */ #include "tools.h" void pbchkvect( n, npos0, ix, jx, desc_X, incx, dpos0, iix, jjx, ixrow, ixcol, nprow, npcol, myrow, mycol, info ) /* * .. Scalar Arguments .. */ int dpos0, * iix, incx, * info, ix, * ixcol, * ixrow, * jjx, jx, myrow, mycol, npcol, nprow, n, npos0; /* * .. Array Arguments .. */ int desc_X[]; { /* * * Purpose * ======= * * pbchkvect checks the validity of a descriptor vector DESCX, the * related global indexes IX, JX and the global increment INCX. It also * computes the starting local indexes (IIX,JJX) corresponding to the * submatrix starting globally at the entry pointed by (IX,JX). * Moreover, this routine returns the coordinates in the grid of the * process owning the global matrix entry of indexes (IX,JX), namely * (IXROW,IXCOL). The routine prevents out-of-bound memory access * by performing the appropriate MIN operation on iix and JJX. Finally, * if an inconsistency is found among its parameters IX, JX, DESCX and * INCX, the routine returns an error code in info. * * Arguments * ========= * * N (global input) INTEGER * The length of the vector X being operated on. * * NPOS0 (global input) INTEGER * Where in the calling routine's parameter list N appears. * * IX (global input) INTEGER * X's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JX (global input) INTEGER * X's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * DPOS0 (global input) INTEGER * Where in the calling routine's parameter list DESCX * appears. Note that we assume IX and JX are respectively 2 * and 1 entries behind DESCX, and INCX is 1 entry after DESCX. * * IIX (local output) pointer to INTEGER * The local rows starting index of the submatrix. * * JJX (local output) pointer to INTEGER * The local columns starting index of the submatrix. * * IXROW (global output) pointer to INTEGER * The row coordinate of the process that possesses the first * row and column of the submatrix. * * IXCOL (global output) pointer to INTEGER * The column coordinate of the process that possesses the * first row and column of the submatrix. * * NPROW (global input) INTEGER * The total number of process rows over which the distributed * matrix is distributed. * * NPCOL (global input) INTEGER * The total number of process columns over which the * distributed matrix is distributed. * * MYROW (local input) INTEGER * The row coordinate of the process calling this routine. * * MYCOL (local input) INTEGER * The column coordinate of the process calling this routine. * * INFO (local input/local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. */ #define DESCMULT 100 #define BIGNUM 10000 /* .. * .. Local Scalars .. */ int descpos, ExtraColBlock, ExtraRowBlock, icpos, ixpos, jxpos, MyColBlock, MyColDist, MyRowBlock, MyRowDist, NColBlock, np, npos, nq, NRowBlock; /* .. * .. External Functions .. */ F_INTG_FCT numroc_(); /* * .. Executable Statements .. */ if( *info >= 0 ) *info = BIGNUM; else if( *info < -DESCMULT ) *info = -(*info); else *info = -(*info) * DESCMULT; /* * Figure where in parameter list each parameter was, factoring in * descriptor multiplier */ npos = npos0 * DESCMULT; ixpos = ( dpos0 - 2 ) * DESCMULT; jxpos = ( dpos0 - 1 ) * DESCMULT; icpos = ( dpos0 + 1 ) * DESCMULT; descpos = dpos0 * DESCMULT + 1; /* * Check that we have a legal descriptor type */ if(desc_X[DT_] != BLOCK_CYCLIC_2D) *info = MIN( *info, descpos + DT_ ); /* * Check that matrix values make sense from local viewpoint */ if( n < 0 ) *info = MIN( *info, npos ); else if( ix < 1 ) *info = MIN( *info, ixpos ); else if( jx < 1 ) *info = MIN( *info, jxpos ); else if( desc_X[MB_] < 1 ) *info = MIN( *info, descpos + MB_ ); else if( desc_X[NB_] < 1 ) *info = MIN( *info, descpos + NB_ ); else if( ( desc_X[RSRC_] < 0 ) || ( desc_X[RSRC_] >= nprow ) ) *info = MIN( *info, descpos + RSRC_ ); else if( ( desc_X[CSRC_] < 0 ) || ( desc_X[CSRC_] >= npcol ) ) *info = MIN( *info, descpos + CSRC_ ); else if( incx != 1 && incx != desc_X[M_] ) *info = MIN( *info, icpos ); else if( desc_X[LLD_] < 1 ) *info = MIN( *info, descpos + LLD_ ); if( n == 0 ) { /* * NULL matrix, relax some checks */ if( desc_X[M_] < 0 ) *info = MIN( *info, descpos + M_ ); if( desc_X[N_] < 0 ) *info = MIN( *info, descpos + N_ ); } else { /* * more rigorous checks for non-degenerate matrices */ if( desc_X[M_] < 1 ) *info = MIN( *info, descpos + M_ ); else if( desc_X[N_] < 1 ) *info = MIN( *info, descpos + N_ ); else if( ( incx == desc_X[M_] ) && ( jx+n-1 > desc_X[N_] ) ) *info = MIN( *info, jxpos ); else if( ( incx == 1 ) && ( incx != desc_X[M_] ) && ( ix+n-1 > desc_X[M_] ) ) *info = MIN( *info, ixpos ); else { if( ix > desc_X[M_] ) *info = MIN( *info, ixpos ); else if( jx > desc_X[N_] ) *info = MIN( *info, jxpos ); } } /* * Retrieve local information for vector X, and prepare output: * set info = 0 if no error, and divide by DESCMULT if error is not * in a descriptor entry. */ if( *info == BIGNUM ) { MyRowDist = ( myrow + nprow - desc_X[RSRC_] ) % nprow; MyColDist = ( mycol + npcol - desc_X[CSRC_] ) % npcol; NRowBlock = desc_X[M_] / desc_X[MB_]; NColBlock = desc_X[N_] / desc_X[NB_]; np = ( NRowBlock / nprow ) * desc_X[MB_]; nq = ( NColBlock / npcol ) * desc_X[NB_]; ExtraRowBlock = NRowBlock % nprow; ExtraColBlock = NColBlock % npcol; ix--; jx--; MyRowBlock = ix / desc_X[MB_]; MyColBlock = jx / desc_X[NB_]; *ixrow = ( MyRowBlock + desc_X[RSRC_] ) % nprow; *ixcol = ( MyColBlock + desc_X[CSRC_] ) % npcol; *iix = ( MyRowBlock / nprow + 1 ) * desc_X[MB_] + 1; *jjx = ( MyColBlock / npcol + 1 ) * desc_X[NB_] + 1; if( MyRowDist >= ( MyRowBlock % nprow ) ) { if( myrow == *ixrow ) *iix += ix % desc_X[MB_]; *iix -= desc_X[MB_]; } if( MyRowDist < ExtraRowBlock ) np += desc_X[MB_]; else if( MyRowDist == ExtraRowBlock ) np += ( desc_X[M_] % desc_X[MB_] ); np = MAX( 1, np ); if( MyColDist >= ( MyColBlock % npcol ) ) { if( mycol == *ixcol ) *jjx += jx % desc_X[NB_]; *jjx -= desc_X[NB_]; } if( MyColDist < ExtraColBlock ) nq += desc_X[NB_]; else if( MyColDist == ExtraColBlock ) nq += ( desc_X[N_] % desc_X[NB_] ); nq = MAX( 1, nq ); *iix = MIN( *iix, np ); *jjx = MIN( *jjx, nq ); if( desc_X[LLD_] < np ) { if( numroc_(&desc_X[N_], &desc_X[NB_], &mycol, &desc_X[CSRC_], &npcol) ) *info = -( descpos + LLD_ ); else *info = 0; } else *info = 0; } else if( *info % DESCMULT == 0 ) { *info = -(*info) / DESCMULT; } else { *info = -(*info); } } scalapack-1.8.0/SRC/psdttrf.f0000640000175000017500000010575710363532303015617 0ustar muammarmuammar SUBROUTINE PSDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL AF( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PSDTTRF computes a LU factorization * of an N-by-N real tridiagonal * diagonally dominant-like distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PSDTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = L U * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * DL (local input/local output) REAL pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSDTTRF and this is stored in AF. If a linear system * is to be solved using PSDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, JA_NEW, LAF_MIN, $ LEVEL_DIST, LLDA, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ TEMP, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, PXERBLA, $ RESHAPE, SDTTRF, SDTTRSV, SGERV2D, SGESD2D, $ STRRV2D, STRSD2D * .. * .. External Functions .. INTEGER NUMROC REAL SDOT EXTERNAL NUMROC, SDOT * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -10 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PSDTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PSDTTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( 12*NPCOL+3*NB ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -8 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PSDTTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -10 CALL PXERBLA( ICTXT, 'PSDTTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA( 5 ) PARAM_CHECK( 6, 1 ) = DESCA( 4 ) PARAM_CHECK( 5, 1 ) = DESCA( 3 ) PARAM_CHECK( 4, 1 ) = DESCA( 1 ) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 605 PARAM_CHECK( 6, 2 ) = 604 PARAM_CHECK( 5, 2 ) = 603 PARAM_CHECK( 4, 2 ) = 601 PARAM_CHECK( 3, 2 ) = 5 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 10 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 70 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * Offset to workspace for Upper triangular factor * WORK_U = INT_ONE*ODD_SIZE + 3 * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL STRSD2D( ICTXT, 'U', 'N', 1, 1, $ DU( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, $ MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {U_i} in each processor * CALL SDTTRF( ODD_SIZE, DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 20 END IF * * IF( MYCOL.LT.NP-1 ) THEN * * Apply factorization to lower connection block BL_i * Apply factorization to upper connection block BU_i * * * Perform the triangular solve {U_i}^T{BL'}_i^T = {BL_i}^T * * DL( PART_OFFSET+ODD_SIZE+1 ) = ( DL( PART_OFFSET+ODD_SIZE+1 ) ) $ / ( D( PART_OFFSET+ODD_SIZE ) ) * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{BL'}_i}{{BU'}_i} * * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 ) - $ DL( PART_OFFSET+ODD_SIZE+1 )* $ DU( PART_OFFSET+ODD_SIZE ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 20 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * * Move entry that causes spike to auxiliary storage * AF( WORK_U+1 ) = ( DL( PART_OFFSET+1 ) ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{GU}_i} = {DL_i}$ . * CALL SDTTRSV( 'L', 'N', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( WORK_U+1 ), ODD_SIZE, $ INFO ) * * * Calculate the "spike" fillin, ${U_i}^T {{GL}_i}^T = {DU_i}^T$ * CALL STRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * CALL SDTTRSV( 'U', 'T', ODD_SIZE, INT_ONE, $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ), $ DU( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * * Calculate the update block for previous proc, E_i = GL_i{GU_i} * AF( ODD_SIZE+3 ) = -ONE*SDOT( ODD_SIZE, AF( 1 ), 1, $ AF( WORK_U+1 ), 1 ) * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * AF( ODD_SIZE+1 ) = -ONE*( DL( PART_OFFSET+ODD_SIZE+1 )* $ AF( WORK_U+ODD_SIZE ) ) * * AF( WORK_U+( ODD_SIZE )+1 ) = -ONE* $ DU( PART_OFFSET+ODD_SIZE )*( AF( ODD_SIZE ) ) * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 60 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 50 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = REAL( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + AF( ODD_SIZE+3 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 30 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 40 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 30 40 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ).EQ.ZERO ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( WORK_U+ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * AF( ODD_SIZE+3 ) = AF( WORK_U+ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( WORK_U+ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify lower off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) / ( AF( ODD_SIZE+2 ) ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*( AF( ODD_SIZE+1 ) )* $ AF( WORK_U+( ODD_SIZE )+1 ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, $ AF( WORK_U+ODD_SIZE+2+1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST / 2 ) * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL SGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+3 ) / ( AF( ODD_SIZE+2 ) ) * * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*( AF( WORK_U+ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( WORK_U+ODD_SIZE+3 )*AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( WORK_U+ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 50 CONTINUE * * 60 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 70 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PSDTTRF * END scalapack-1.8.0/SRC/pcdbtrsv.f0000640000175000017500000014520210363532303015745 0ustar muammarmuammar SUBROUTINE PCDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDBTRSV solves a banded triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a banded * triangular matrix factor produced by the * Gaussian elimination code PC@(dom_pre)BTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H * is dictated by the user by the parameter TRANS. * * Routine PCDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDBTRF and this is stored in AF. If a linear system * is to be solved using PCDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MAX_BW, MBW2, MYCOL, MYROW, MY_NUM_COLS, $ NB, NP, NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WORK_U * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 18, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO, $ CGEMM, CGERV2D, CGESD2D, CLACPY, CMATADD, $ CTBTRS, CTRMM, CTRTRS, DESC_CONVERT, GLOBCHK, $ PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 9*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 12*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 12*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 12*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 12*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Size of separator blocks is maximum of bandwidths * MAX_BW = MAX(BWL,BWU) MBW2 = MAX_BW * MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF ( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -2 END IF * IF( LWORK .LT. -1) THEN INFO = -16 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -3 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -4 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -5 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 9*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 9*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 12*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 12*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -6 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -8 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 9*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PCDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ MAX(BWL,BWU)*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, $ 'PCDBTRSV: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 18, 1 ) = DESCB(5) PARAM_CHECK( 17, 1 ) = DESCB(4) PARAM_CHECK( 16, 1 ) = DESCB(3) PARAM_CHECK( 15, 1 ) = DESCB(2) PARAM_CHECK( 14, 1 ) = DESCB(1) PARAM_CHECK( 13, 1 ) = IB PARAM_CHECK( 12, 1 ) = DESCA(5) PARAM_CHECK( 11, 1 ) = DESCA(4) PARAM_CHECK( 10, 1 ) = DESCA(3) PARAM_CHECK( 9, 1 ) = DESCA(1) PARAM_CHECK( 8, 1 ) = JA PARAM_CHECK( 7, 1 ) = NRHS PARAM_CHECK( 6, 1 ) = BWU PARAM_CHECK( 5, 1 ) = BWL PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 18, 2 ) = 1205 PARAM_CHECK( 17, 2 ) = 1204 PARAM_CHECK( 16, 2 ) = 1203 PARAM_CHECK( 15, 2 ) = 1202 PARAM_CHECK( 14, 2 ) = 1201 PARAM_CHECK( 13, 2 ) = 11 PARAM_CHECK( 12, 2 ) = 905 PARAM_CHECK( 11, 2 ) = 904 PARAM_CHECK( 10, 2 ) = 903 PARAM_CHECK( 9, 2 ) = 901 PARAM_CHECK( 8, 2 ) = 8 PARAM_CHECK( 7, 2 ) = 6 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 16 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 18, PARAM_CHECK, 18, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF ( MYCOL .EQ. 0 ) THEN PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE ) ENDIF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF ( MYCOL .LT. NP-1 ) THEN ODD_SIZE = ODD_SIZE - MAX_BW ENDIF * * Offset to workspace for Upper triangular factor * WORK_U = BWU*ODD_SIZE + 3*MBW2 * * * * Begin main code * IF ( LSAME( UPLO, 'L' ) ) THEN * IF ( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'N', 'U', ODD_SIZE, $ BWL, NRHS, $ A( OFST+1+BWU ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BWL, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWL+1), LLDB, $ WORK( 1 ), MAX_BW ) * CALL CTRMM( 'L', 'U', 'N', 'N', BWL, NRHS, -CONE, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, WORK( 1 ), MAX_BW ) * CALL CMATADD( BWL, NRHS, CONE, WORK( 1 ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * Clear garbage out of workspace block * DO 10 IDUM1=1, WORK_SIZE_MIN WORK( IDUM1 )=0.0 10 CONTINUE * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', BWU, NRHS, ODD_SIZE, -CONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO, $ WORK( 1+MAX_BW-BWU ), MAX_BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 14 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 12 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 12 11 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTBTRS( 'L', 'N', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 14 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 24 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 27 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 27 26 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( ODD_SIZE*BWU+2*MBW2+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( (ODD_SIZE)*BWU+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTBTRS( 'L', 'C', 'U', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 22 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 21 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 22 21 CONTINUE * [End of GOTO Loop] * 24 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, BWU, -CONE, AF( 1 ), $ ODD_SIZE, WORK( 1+MAX_BW-BWU ), MAX_BW, CONE, $ B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BWL, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL CTRMM( 'L', 'U', 'C', 'N', BWL, NRHS, -CONE, $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )), $ LLDA-1, WORK( 1+MAX_BW-BWL ), MAX_BW ) * CALL CMATADD( BWL, NRHS, CONE, WORK( 1+MAX_BW-BWL ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE-BWL+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'C', 'U', ODD_SIZE, $ BWL, NRHS, $ A( OFST+1+BWU ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF ( LSAME( TRANS, 'C' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'C', 'N', ODD_SIZE, $ BWU, NRHS, $ A( OFST+1 ), LLDA, $ B( PART_OFFSET+1 ), LLDB, INFO ) * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BWU, NRHS, $ B( PART_OFFSET+ODD_SIZE-BWU+1), LLDB, $ WORK( 1 ), MAX_BW ) * CALL CTRMM( 'L', 'L', 'C', 'N', BWU, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, WORK( 1 ), $ MAX_BW ) * CALL CMATADD( BWU, NRHS, CONE, WORK( 1 ), MAX_BW, $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * ENDIF * * Clear garbage out of workspace block * DO 20 IDUM1=1, WORK_SIZE_MIN WORK( IDUM1 )=0.0 20 CONTINUE * * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL CGEMM( 'C', 'N', BWL, NRHS, ODD_SIZE, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ), $ LLDB, CZERO, WORK( 1+MAX_BW-BWL ), MAX_BW ) ENDIF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL + 1 ) * * Combine contribution to locally stored right hand sides * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 44 ENDIF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 42 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * CALL CMATADD( MAX_BW, NRHS, CONE, $ WORK( 1 ), MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB ) * ENDIF * LEVEL_DIST = LEVEL_DIST*2 * GOTO 42 41 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL CTBTRS( 'U', 'C', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 ) ), $ MAX_BW+1, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * * ********* IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL CGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, CZERO, $ WORK( 1 ), $ MAX_BW ) * * Send contribution to diagonal block's owning processor. * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 44 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL .EQ. NPCOL-1 ) THEN GOTO 54 ENDIF * * Determine number of steps in tree loop * LEVEL_DIST = 1 57 CONTINUE IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56 * LEVEL_DIST = LEVEL_DIST*2 * GOTO 57 56 CONTINUE * * IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND. $ ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL CGEMM( 'C', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) ENDIF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN * * Receive solution from processor to right * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), $ MAX_BW, 0, MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL CGEMM( 'N', 'N', MAX_BW, NRHS, MAX_BW, -CONE, $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), $ MAX_BW, $ WORK( 1 ), $ MAX_BW, CONE, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * ENDIF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL CTBTRS( 'U', 'N', 'N', MAX_BW, MIN( BWU, MAX_BW-1 ), NRHS, $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 ) ), $ MAX_BW+1, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ INFO ) * IF( INFO.NE.0 ) THEN GO TO 1000 ENDIF * * * ***Modification Loop ******* * 52 CONTINUE IF( LEVEL_DIST .EQ. 1 ) GOTO 51 * LEVEL_DIST = LEVEL_DIST/2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL+LEVEL_DIST ) * ENDIF * * Send solution to left * IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, 0, MYCOL-LEVEL_DIST ) * ENDIF * GOTO 52 51 CONTINUE * [End of GOTO Loop] * 54 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL .LT. NPCOL-1) THEN * CALL CGESD2D( ICTXT, MAX_BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ 0, MYCOL +1 ) * ENDIF * * Receive modifications to processor's right hand sides * IF( MYCOL .GT. 0) THEN * CALL CGERV2D( ICTXT, MAX_BW, NRHS, $ WORK( 1 ), MAX_BW, $ 0, MYCOL - 1 ) * ENDIF * * * ********************************************** * Local computation phase ********************************************** * IF ( MYCOL .NE. 0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL CGEMM( 'N', 'N', ODD_SIZE, NRHS, BWL, -CONE, $ AF( WORK_U+1 ), ODD_SIZE, WORK( 1+MAX_BW-BWL ), $ MAX_BW, CONE, B( PART_OFFSET+1 ), LLDB ) * ENDIF * * IF ( MYCOL .LT. NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL CLACPY( 'N', BWU, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL CTRMM( 'L', 'L', 'N', 'N', BWU, NRHS, -CONE, $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL ) * CALL CMATADD( BWU, NRHS, CONE, WORK( 1+MAX_BW-BWU ), $ MAX_BW+BWL, CONE, $ B( PART_OFFSET+ODD_SIZE-BWU+1 ), LLDB ) * ENDIF * * Use main partition in each processor to solve locally * CALL CTBTRS( UPLO, 'N', 'N', ODD_SIZE, $ BWU, NRHS, $ A( OFST+1 ), $ LLDA, B( PART_OFFSET+1 ), $ LLDB, INFO ) * ENDIF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ENDIF * End of "IF( LSAME( UPLO, 'L' ) )"... 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCDBTRSV * END scalapack-1.8.0/SRC/pcdbtrs.f0000640000175000017500000006460310363532303015564 0ustar muammarmuammar SUBROUTINE PCDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCDBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PCDBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Routine PCDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PCDBTRF and this is stored in AF. If a linear system * is to be solved using PCDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, $ PCDBTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PCDBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PCDBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (MAX(BWL,BWU)*NRHS) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PCDBTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCDBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PCDBTRSV( 'L', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PCDBTRSV( 'U', 'C', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( TRANS, 'C' ) ) THEN * CALL PCDBTRSV( 'L', 'C', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PCDBTRSV( 'U', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PCDBTRS * END scalapack-1.8.0/SRC/pstrtri.f0000640000175000017500000003067610363532303015635 0ustar muammarmuammar SUBROUTINE PSTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSTRTRI computes the inverse of a upper or lower triangular * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the distributed matrix sub( A ) is upper * or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) * is unit triangular: * = 'N': Non-unit triangular, * = 'U': Unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * triangular matrix sub( A ). If UPLO = 'U', the leading * N-by-N upper triangular part of the matrix sub( A ) contains * the upper triangular matrix to be inverted, and the strictly * lower triangular part of sub( A ) is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * the matrix sub( A ) contains the lower triangular matrix, * and the strictly upper triangular part of sub( A ) is not * referenced. * On exit, the (triangular) inverse of the original matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, A(IA+K-1,JA+K-1) is exactly zero. The * triangular matrix sub( A ) is singular and its * inverse can not be computed. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER I, ICOFF, ICTXT, IROFF, ICURCOL, ICURROW, $ IDUMMY, II, IOFFA, J, JB, JJ, JN, LDA, MYCOL, $ MYROW, NN, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK1MAT, PSTRTI2, PSTRMM, PSTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) * CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOUNIT ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 * CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTRTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) * * Handle first block separately * JB = JN-JA+1 LDA = DESCA( LLD_ ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 10 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 IOFFA = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 20 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 IOFFA = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Use blocked code * IF( UPPER ) THEN * * Compute inverse of upper triangular matrix * JB = JN-JA+1 * * Handle first block of column separately * CALL PSTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * * Loop over remaining block of columns * DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Compute rows 1:j-1 of current block column * CALL PSTRMM( 'Left', UPLO, 'No transpose', DIAG, J-JA, JB, $ ONE, A, IA, JA, DESCA, A, IA, J, DESCA ) CALL PSTRSM( 'Right', UPLO, 'No transpose', DIAG, J-JA, $ JB, -ONE, A, I, J, DESCA, A, IA, J, DESCA ) * * Compute inverse of current diagonal block * CALL PSTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 40 CONTINUE * ELSE * * Compute inverse of lower triangular matrix * NN = ( ( JA+N-2 ) / DESCA( NB_ ) )*DESCA( NB_ ) + 1 DO 50 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA IF( J+JB.LE.JA+N-1 ) THEN * * Compute rows j+jb:ja+n-1 of current block column * CALL PSTRMM( 'Left', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, ONE, A, I+JB, J+JB, DESCA, $ A, I+JB, J, DESCA ) CALL PSTRSM( 'Right', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, -ONE, A, I, J, DESCA, $ A, I+JB, J, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PSTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 50 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 IF( JA+JB.LE.JA+N-1 ) THEN * * Compute rows ja+jb:ja+n-1 of current block column * CALL PSTRMM( 'Left', UPLO, 'No transpose', DIAG, N-JB, JB, $ ONE, A, IA+JB, JA+JB, DESCA, A, IA+JB, JA, $ DESCA ) CALL PSTRSM( 'Right', UPLO, 'No transpose', DIAG, N-JB, JB, $ -ONE, A, IA, JA, DESCA, A, IA+JB, JA, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PSTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * END IF * RETURN * * End PSTRTRI * END scalapack-1.8.0/SRC/pzlahqr.f0000640000175000017500000033332310602576752015616 0ustar muammarmuammar SUBROUTINE PZLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ, $ IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7.3) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * 1.7.3: March 22, 2006 * modification suggested by Mark Fahey and Greg Henry * 1.7.0: July 31, 2001 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) COMPLEX*16 A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZLAHQR is an auxiliary routine used to find the Schur decomposition * and or eigenvalues of a matrix already in Hessenberg form from * cols ILO to IHI. * If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ, * with Z'Z=I, and H in Schur form. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCp() and LOCq() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * WANTT (global input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (global input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (global input) INTEGER * The order of the Hessenberg matrix A (and Z if WANTZ). * N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that A is already upper quasi-triangular in * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless * ILO = 1). PZLAHQR works primarily with the Hessenberg * submatrix in rows and columns ILO to IHI, but applies * transformations to all of H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * A (global input/output) COMPLEX*16 array, dimension * (DESCA(LLD_),*) * On entry, the upper Hessenberg matrix A. * On exit, if WANTT is .TRUE., A is upper triangular in rows * and columns ILO:IHI. If WANTT is .FALSE., the contents of * A are unspecified on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * W (global replicated output) COMPLEX*16 array, dimension (N) * The computed eigenvalues ILO to IHI are stored in the * corresponding elements of W. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in A. A may be returned with * larger diagonal blocks until the next release. * * ILOZ (global input) INTEGER * IHIZ (global input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (global input/output) COMPLEX*16 array. * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations accumulated by PZHSEQR, and on * exit Z has been updated; transformations are applied only to * the submatrix Z(ILOZ:IHIZ,ILO:IHI). * If WANTZ is .FALSE., Z is not referenced. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local output) COMPLEX*16 array of size LWORK * (Unless LWORK=-1, in which case WORK must be at least size 1) * * LWORK (local input) INTEGER * WORK(LWORK) is a local array and LWORK is assumed big enough * so that LWORK >= 3*N + * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N), * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) + * MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 ) * If LWORK=-1, then WORK(1) gets set to the above number and * the code returns immediately. * * IWORK (global and local input) INTEGER array of size ILWORK * This will hold some of the IBLK integer arrays. * This is held as a place holder for a future release. * Currently unreferenced. * * ILWORK (local input) INTEGER * This will hold the size of the IWORK array. * This is held as a place holder for a future release. * Currently unreferenced. * * INFO (global output) INTEGER * < 0: parameter number -INFO incorrect or inconsistent * = 0: successful exit * > 0: PZLAHQR failed to compute all the eigenvalues ILO to IHI * in a total of 30*(IHI-ILO+1) iterations; if INFO = i, * elements i+1:ihi of W contains those eigenvalues * which have been successfully computed. * * Logic: * This algorithm is very similar to DLAHQR. Unlike DLAHQR, * instead of sending one double shift through the largest * unreduced submatrix, this algorithm sends multiple double shifts * and spaces them apart so that there can be parallelism across * several processor row/columns. Another critical difference is * that this algorithm aggregrates multiple transforms together in * order to apply them in a block fashion. * * Important Local Variables: * IBLK = The maximum number of bulges that can be computed. * Currently fixed. Future releases this won't be fixed. * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) * ROTN = The number of transforms to block together * NBULGE = The number of bulges that will be attempted on the * current submatrix. * IBULGE = The current number of bulges started. * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). * * Subroutines: * From LAPACK, this routine calls: * ZLAHQR -> Serial QR used to determine shifts and * eigenvalues * ZLARFG -> Determine the Householder transforms * * This ScaLAPACK, this routine calls: * PZLACONSB -> To determine where to start each iteration * ZLAMSH -> Sends multiple shifts through a small * submatrix to see how the consecutive * subdiagonals change (if PZLACONSB indicates * we can start a run in the middle) * PZLAWIL -> Given the shift, get the transformation * PZLACP3 -> Parallel array to local replicated array copy * & back. * ZLAREF -> Row/column reflector applier. Core routine * here. * PZLASMSUB -> Finds negligible subdiagonal elements. * * Current Notes and/or Restrictions: * 1.) This code requires the distributed block size to be square * and at least six (6); unlike simpler codes like LU, this * algorithm is extremely sensitive to block size. Unwise * choices of too small a block size can lead to bad * performance. * 2.) This code requires A and Z to be distributed identically * and have identical contxts. A future version may allow Z to * have a different contxt to 1D row map it to all nodes (so no * communication on Z is necessary.) * 3.) This code does not currently block the initial transforms * so that none of the rows or columns for any bulge are * completed until all are started. To offset pipeline * start-up it is recommended that at least 2*LCM(NPROW,NPCOL) * bulges are used (if possible) * 4.) The maximum number of bulges currently supported is fixed at * 32. In future versions this will be limited only by the * incoming WORK and IWORK array. * 5.) The matrix A must be in upper Hessenberg form. If elements * below the subdiagonal are nonzero, the resulting transforms * may be nonsimilar. This is also true with the LAPACK * routine ZLAHQR. * 6.) For this release, this code has only been tested for * RSRC_=CSRC_=0, but it has been written for the general case. * 7.) Currently, all the eigenvalues are distributed to all the * nodes. Future releases will probably distribute the * eigenvalues by the column partitioning. * 8.) The internals of this routine are subject to change. * 9.) To optimize this for your architecture, try tuning ZLAREF. * 10.) This code has only been tested for WANTZ = .TRUE. and may * behave unpredictably for WANTZ set to .FALSE. * * Further Details * =============== * * Contributed by Mark Fahey, June, 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION RONE PARAMETER ( RONE = 1.0D+0 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) DOUBLE PRECISION CONST PARAMETER ( CONST = 1.50D+0 ) INTEGER IBLK PARAMETER ( IBLK = 32 ) * .. * .. Local Scalars .. LOGICAL SKIP INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE, $ ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II, $ IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART, $ ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP, $ ITERMAX, ITMP1, ITMP2, ITN, ITS, IZBUF, J, $ JAFIRST, JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ, $ LEFT, LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1, $ LOCALI2, LOCALK, LOCALM, M, MODKM1, MYCOL, $ MYROW, NBULGE, NH, NODE, NPCOL, NPROW, NQ, NR, $ NUM, NZ, RIGHT, ROTN, UP, VECSIDX DOUBLE PRECISION CS, OVFL, S, SMLNUM, ULP, UNFL COMPLEX*16 CDUM, H10, H11, H22, H33, H43H34, H44, SN, SUM, $ T1, T1COPY, T2, T3, V1SAVE, V2, V2SAVE, V3, $ V3SAVE * .. * .. Local Arrays .. INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ), $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ), $ KP2ROW( IBLK ), KROW( IBLK ) COMPLEX*16 S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ), $ VCOPY( 3 ) * .. * .. External Functions .. INTEGER ILCM, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, NUMROC, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMN2D, IGEBR2D, IGEBS2D, $ INFOG1L, INFOG2L, PDLABAD, PXERBLA, PZLACONSB, $ PZLACP3, PZLASMSUB, PZLAWIL, PZROT, ZCOPY, $ ZGEBR2D, ZGEBS2D, ZGERV2D, ZGESD2D, ZGSUM2D, $ ZLAHQR2, ZLAMSH, ZLANV2, ZLAREF, ZLARFG * .. * .. Intrinsic Functions .. * INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) IF( N.EQ.0 ) $ RETURN * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) IAFIRST = DESCA( RSRC_ ) JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) LCMRC = ILCM( NPROW, NPCOL ) IF( ( NPROW.LE.3 ) .OR. ( NPCOL.LE.3 ) ) THEN SKIP = .TRUE. ELSE SKIP = .FALSE. END IF * * Determine the number of columns we have so we can check workspace * NQ = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL ) JJ = N / HBL IF( JJ*HBL.LT.N ) $ JJ = JJ + 1 JJ = 7*JJ / LCMRC JJ = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, JJ ) JJ = JJ + MAX( 2*N, ( 8*LCMRC+2 )**2 ) IF( LWORK.EQ.-1 ) THEN WORK( 1 ) = JJ RETURN END IF IF( LWORK.LT.JJ ) THEN INFO = -14 END IF IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) END IF IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1300+NB_ ) END IF IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1300+MB_ ) END IF IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN INFO = -( 700+RSRC_ ) END IF IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN INFO = -( 1300+RSRC_ ) END IF IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN INFO = -4 END IF IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN INFO = -5 END IF IF( HBL.LT.5 ) THEN INFO = -( 700+MB_ ) END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PZLAHQR', -INFO ) RETURN END IF * * Set work array indices * VECSIDX = 0 IDIA = 3*N ISUB = 3*N ISUP = 3*N IRBUF = 3*N ICBUF = 3*N IZBUF = 5*N * * Find a value for ROTN * ROTN = HBL / 3 ROTN = MIN( ROTN, HBL-2 ) ROTN = MAX( ROTN, 1 ) * IF( ILO.EQ.IHI ) THEN CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN W( ILO ) = A( ( ICOL-1 )*LDA+IROW ) ELSE W( ILO ) = ZERO END IF RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, IAFIRST, LILOZ, LIHIZ ) LIHIZ = NUMROC( IHIZ, HBL, MYROW, IAFIRST, NPROW ) * * Set machine-dependent constants for the stopping criterion. * If NORM(H) <= SQRT(OVFL), overflow should not occur. * UNFL = PDLAMCH( CONTXT, 'SAFE MINIMUM' ) OVFL = RONE / UNFL CALL PDLABAD( CONTXT, UNFL, OVFL ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = ITERMAX * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of our schur block size (<=2*IBLK). Each * iteration of the loop works with the active submatrix in rows * and columns L to I. Eigenvalues I+1 to IHI have already * converged. Either L = ILO or the global A(L,L-1) is negligible * so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 570 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 540 ITS = 0, ITN * * Look for a single small subdiagonal element. * CALL PZLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ), $ LWORK-IRBUF ) L = K * IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN A( ( ICOL-1 )*LDA+IROW ) = ZERO END IF WORK( ISUB+L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * IF( WANTT ) THEN * For Schur form, use 2x2 blocks IF( L.GE.I-1 ) THEN GO TO 550 END IF ELSE * If we don't want the Schur form, use bigger blocks. IF( L.GE.I-( 2*IBLK-1 ) ) THEN GO TO 550 END IF END IF * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * * Copy submatrix of size 2*JBLK and prepare to do generalized * Wilkinson shift or an exceptional shift * JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 ) IF( JBLK.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * JBLK = JBLK - MOD( JBLK, LCMRC ) END IF JBLK = MIN( JBLK, 2*LCMRC ) JBLK = MAX( JBLK, 1 ) * CALL PZLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1, $ 0 ) IF( ( ITS.EQ.20 .OR. ITS.EQ.40 ) .AND. ( JBLK.GT.1 ) ) THEN * * Exceptional shift. * DO 20 II = 2*JBLK, 2, -1 S1( II, II ) = CONST*( CABS1( S1( II, II ) )+ $ CABS1( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE S1( 1, 1 ) = CONST*CABS1( S1( 1, 1 ) ) ELSE CALL ZLAHQR2( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1, $ 2*IBLK, WORK( IRBUF+1 ), 1, 2*JBLK, Z, LDZ, $ IERR ) * * Prepare to use Wilkinson's double shift * H44 = S1( 2*JBLK, 2*JBLK ) H33 = S1( 2*JBLK-1, 2*JBLK-1 ) H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 ) * END IF * * Look for two consecutive small subdiagonal elements: * PZLACONSB is the routine that does this. * CALL PZLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, $ WORK( IRBUF+1 ), LWORK-IRBUF ) * * Double-shift QR step * * NBULGE is the number of bulges that will be attempted * ISTOP = MIN( M+ROTN-1-MOD( M-( M / HBL )*HBL-1, ROTN ), I-2 ) ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) ) ISTOP = MIN( ISTOP, I2-2 ) ISTOP = MAX( ISTOP, M ) NBULGE = ( I-1-ISTOP ) / HBL * * Do not exceed maximum determined. * NBULGE = MIN( NBULGE, JBLK ) IF( NBULGE.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * NBULGE = NBULGE - MOD( NBULGE, LCMRC ) END IF NBULGE = MAX( NBULGE, 1 ) * * If we are starting in the middle because of consecutive small * subdiagonal elements, we need to see how many bulges we * can send through without breaking the consecutive small * subdiagonal property. * IF( ( NBULGE.GT.1 ) .AND. ( M.GT.L ) ) THEN * * Copy a chunk of elements from global A(M-1:,M-1:) * CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) II = MIN( 4*NBULGE+2, N-M+2 ) CALL PZLACP3( II, M-1, A, DESCA, WORK( IRBUF+1 ), II, ITMP1, $ ITMP2, 0 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN * * Find a new NBULGE based on the bulges we have. * CALL ZLAMSH( S1, 2*IBLK, NBULGE, JBLK, WORK( IRBUF+1 ), $ II, II, ULP ) IF( NUM.GT.1 ) THEN CALL IGEBS2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1 ) END IF ELSE * * Everyone needs to receive the new NBULGE * CALL IGEBR2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1, ITMP1, $ ITMP2 ) END IF END IF * * IBULGE is the number of bulges going so far * IBULGE = 1 * * "A" row defs : main row transforms from LOCALK to LOCALI2 * CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, ITMP1, LOCALK ) LOCALK = NQ CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ICOL1, LOCALI2 ) LOCALI2 = NUMROC( I2, HBL, MYCOL, JAFIRST, NPCOL ) * * "A" col defs : main col transforms from LOCALI1 to LOCALM * CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, LOCALI1, ICOL1 ) CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, LOCALM, ICOL1 ) ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, IAFIRST, NPROW ) * * Which row & column will start the bulges * ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST * CALL INFOG1L( M, HBL, NPROW, MYROW, IAFIRST, II, ITMP2 ) CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, JJ, ITMP2 ) CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, ISTOP, $ KP2ROW( 1 ) ) KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, IAFIRST, NPROW ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ISTOP, $ KP2COL( 1 ) ) KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, JAFIRST, NPCOL ) * * Set all values for bulges. All bulges are stored in * intermediate steps as loops over KI. Their current "task" * over the global M to I-1 values is always K1(KI) to K2(KI). * However, because there are many bulges, K1(KI) & K2(KI) might * go past that range while later bulges (KI+1,KI+2,etc..) are * finishing up. Even if ROTN=1, in order to minimize border * communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both * border messages can be handled at once. * * Rules: * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER*1 * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER*1 * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise * = 'R': rowwise * * N (global input) INTEGER * The order of the block reflector H. N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) DOUBLE PRECISION pointer into the local memory * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1)) * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if * STOREV = 'R'. The distributed matrix V contains the * Householder vectors. See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) DOUBLE PRECISION array, dimension (NB_V,NB_V) * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains * the k-by-k triangular factor of the block reflector asso- * ciated with V. If DIRECT = 'F', T is upper triangular; * if DIRECT = 'B', T is lower triangular. * * WORK (local workspace) DOUBLE PRECISION array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 ) * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 ) * ( v1 v2 1 ) ( 1 v3 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 ) * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 ) * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) * ( 1 v3 ) * ( 1 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL FORWARD INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJ, JJV, LDV, MICOL, MIROW, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION VII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOPY, DGEMV, DGSUM2D, $ DLASET, DTRMV, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 .OR. K.LE.0 ) $ RETURN * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( LSAME( STOREV, 'C' ) .AND. MYCOL.EQ.IVCOL ) THEN * IW = 1 LDV = DESCV( LLD_ ) IROFF = MOD( IV-1, DESCV( MB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NP = NP - IROFF II = IIV + 1 ELSE II = IIV END IF IF( IROFF+1.EQ.DESCV( MB_ ) ) THEN MIROW = MOD( IVROW+1, NPROW ) ELSE MIROW = IVROW END IF ITMP0 = 0 * DO 10 JJ = JJV+1, JJV+K-1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( NP-II+IIV.GT.0 ) THEN CALL DGEMV( 'Transpose', NP-II+IIV, ITMP0, $ -TAU( JJ ), V( II+(JJV-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL DLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II + 1 END IF * IF( MOD( IV+ITMP0, DESCV( MB_ ) ).EQ.0 ) $ MIROW = MOD( MIROW+1, NPROW ) * 10 CONTINUE * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( JJV ) * DO 20 JJ = JJV+1, JJV+K-1 * * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( NB_ ) CALL DCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( NB_ ), T( ITMP1 ), 1 ) T(ITMP1+ITMP0) = TAU( JJ ) * 20 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF-1, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ NP = NP - IROFF MIROW = INDXG2P( IV+N-2, DESCV( MB_ ), MYROW, $ DESCV( RSRC_ ), NPROW ) II = IIV + NP - 1 ITMP0 = 0 * DO 30 JJ = JJV+K-2, JJV, -1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( II-IIV+1.GT.0 ) THEN CALL DGEMV( 'Transpose', II-IIV+1, ITMP0, -TAU( JJ ), $ V( IIV+JJ*LDV ), LDV, $ V( IIV+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL DLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II - 1 END IF * IF( MOD( IV+N-ITMP0-2, DESCV(MB_) ).EQ.0 ) $ MIROW = MOD( MIROW+NPROW-1, NPROW ) * 30 CONTINUE * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( NB_ ) * T( ITMP1-1 ) = TAU( JJV+K-1 ) * DO 40 JJ = JJV+K-2, JJV, -1 * * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( NB_ ) - 1 CALL DCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( NB_ ) ), $ DESCV( NB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( JJ ) * 40 CONTINUE * END IF * END IF * ELSE IF( LSAME( STOREV, 'R' ) .AND. MYROW.EQ.IVROW ) THEN * IW = 1 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN NQ = NQ - ICOFF JJ = JJV + 1 ELSE JJ = JJV END IF IF( ICOFF+1.EQ.DESCV( NB_ ) ) THEN MICOL = MOD( IVCOL+1, NPCOL ) ELSE MICOL = IVCOL END IF ITMP0 = 0 * DO 50 II = IIV+1, IIV+K-1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( iv+i-1 ) * * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ-JJ+JJV.GT.0 ) THEN CALL DGEMV( 'No transpose', ITMP0, NQ-JJ+JJV, $ -TAU(II), V( IIV+(JJ-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) ELSE CALL DLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ + 1 END IF * IF( MOD( JV+ITMP0, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+1, NPCOL ) * 50 CONTINUE * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( IIV ) * DO 60 II = IIV+1, IIV+K-1 * * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( MB_ ) CALL DCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1+ITMP0 ) = TAU( II ) * 60 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF-1, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF MICOL = INDXG2P( JV+N-2, DESCV( NB_ ), MYCOL, $ DESCV( CSRC_ ), NPCOL ) JJ = JJV + NQ - 1 ITMP0 = 0 * DO 70 II = IIV+K-2, IIV, -1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)' * ITMP0 = ITMP0 + 1 IF( JJ-JJV+1.GT.0 ) THEN CALL DGEMV( 'No transpose', ITMP0, JJ-JJV+1, $ -TAU( II ), V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) ELSE CALL DLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ - 1 END IF * IF( MOD( JV+N-ITMP0-2, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+NPCOL-1, NPCOL ) * 70 CONTINUE * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 80 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL DCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( MB_ ) ), $ DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 80 CONTINUE * END IF * END IF * END IF * RETURN * * End of PDLARFT * END scalapack-1.8.0/SRC/psgels.f0000640000175000017500000005417510363532303015423 0ustar muammarmuammar SUBROUTINE PSGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PSGELS solves overdetermined or underdetermined real linear * systems involving an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1), * or its transpose, using a QR or LQ factorization of sub( A ). It is * assumed that sub( A ) has full rank. * * The following options are provided: * * 1. If TRANS = 'N' and m >= n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )*X ||. * * 2. If TRANS = 'N' and m < n: find the minimum norm solution of * an underdetermined system sub( A ) * X = sub( B ). * * 3. If TRANS = 'T' and m >= n: find the minimum norm solution of * an undetermined system sub( A )**T * X = sub( B ). * * 4. If TRANS = 'T' and m < n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )**T * X ||. * * where sub( B ) denotes B( IB:IB+M-1, JB:JB+NRHS-1 ) when TRANS = 'N' * and B( IB:IB+N-1, JB:JB+NRHS-1 ) otherwise. Several right hand side * vectors b and solution vectors x can be handled in a single call; * When TRANS = 'N', the solution vectors are stored as the columns of * the N-by-NRHS right hand side matrix sub( B ) and the M-by-NRHS * right hand side matrix sub( B ) otherwise. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER * = 'N': the linear system involves sub( A ); * = 'T': the linear system involves sub( A )**T. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e. the number of columns * of the distributed submatrices sub( B ) and X. NRHS >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). On entry, the M-by-N matrix A. * if M >= N, sub( A ) is overwritten by details of its QR * factorization as returned by PSGEQRF; * if M < N, sub( A ) is overwritten by details of its LQ * factorization as returned by PSGELQF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) REAL pointer into the * local memory to an array of local dimension * (LLD_B, LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributed matrix B of right hand side * vectors, stored columnwise; * sub( B ) is M-by-NRHS if TRANS='N', and N-by-NRHS otherwise. * On exit, sub( B ) is overwritten by the solution vectors, * stored columnwise: if TRANS = 'N' and M >= N, rows 1 to N * of sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements N+1 to M in that * column; if TRANS = 'N' and M < N, rows 1 to N of sub( B ) * contain the minimum norm solution vectors; if TRANS = 'T' * and M >= N, rows 1 to M of sub( B ) contain the minimum norm * solution vectors; if TRANS = 'T' and M < N, rows 1 to M of * sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements M+1 to N in that * column. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= LTAU + MAX( LWF, LWS ) where * If M >= N, then * LTAU = NUMROC( JA+MIN(M,N)-1, NB_A, MYCOL, CSRC_A, NPCOL ), * LWF = NB_A * ( MpA0 + NqA0 + NB_A ) * LWS = MAX( (NB_A*(NB_A-1))/2, (NRHSqB0 + MpB0)*NB_A ) + * NB_A * NB_A * Else * LTAU = NUMROC( IA+MIN(M,N)-1, MB_A, MYROW, RSRC_A, NPROW ), * LWF = MB_A * ( MpA0 + NqA0 + MB_A ) * LWS = MAX( (MB_A*(MB_A-1))/2, ( NpB0 + MAX( NqA0 + * NUMROC( NUMROC( N+IROFFB, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NRHSqB0 ) )*MB_A ) + * MB_A * MB_A * End if * * where LCMP = LCM / NPROW with LCM = ILCM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * MpB0 = NUMROC( M+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NRHSqB0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, TPSD INTEGER BROW, IACOL, IAROW, IASCL, IBCOL, IBROW, IBSCL, $ ICOFFA, ICOFFB, ICTXT, IPW, IROFFA, IROFFB, $ LCM, LCMP, LTAU, LWF, LWMIN, LWS, MPA0, MPB0, $ MYCOL, MYROW, NPB0, NPCOL, NPROW, NQA0, $ NRHSQB0, SCLLEN REAL ANRM, BIGNUM, BNRM, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) REAL RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM INTEGER INDXG2P, NUMROC REAL PSLANGE, PSLAMCH EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, PSLANGE, $ PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSGELQF, $ PSGEQRF, PSLABAD, PSLASCL, PSLASET, $ PSORMLQ, PSORMQR, PSTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( M, 2, N, 3, IA, JA, DESCA, 8, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( IA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( IB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NRHSQB0 = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, $ NPCOL ) IF( M.GE.N ) THEN MPB0 = NUMROC( M+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( JA+MIN(M,N)-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) LWF = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) LWS = MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPB0 + NRHSQB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ )*DESCA( NB_ ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( IA+MIN(M,N)-1, DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) LWF = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) LWS = MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( NPB0 + MAX( NQA0 + NUMROC( NUMROC( N+IROFFB, $ DESCA( MB_ ), 0, 0, NPROW ), DESCA( MB_ ), 0, 0, $ LCMP ), NRHSQB0 ) )*DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF LWMIN = LTAU + MAX( LWF, LWS ) WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) * TPSD = .TRUE. IF( LSAME( TRANS, 'N' ) ) $ TPSD = .FALSE. * IF( .NOT.( LSAME( TRANS, 'N' ) .OR. $ LSAME( TRANS, 'T' ) ) ) THEN INFO = -1 ELSE IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( M.GE.N .AND. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. IAROW.NE.IBROW ) THEN INFO = -10 ELSE IF( M.LT.N .AND. ICOFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( M.LT.N .AND. DESCA( NB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1200 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF END IF * IF( .NOT.TPSD ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IDUM1( 1 ) = ICHAR( 'T' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( M, 2, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, 4, $ IB, JB, DESCB, 12, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGELS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PSLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, $ IB, JB, DESCB ) RETURN END IF * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'P' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) * * Scale A, B if max entry outside range [SMLNUM,BIGNUM] * ANRM = PSLANGE( 'M', M, N, A, IA, JA, DESCA, RWORK ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PSLASCL( 'G', ANRM, SMLNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 1 ELSE IF( ANRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PSLASCL( 'G', ANRM, BIGNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 2 ELSE IF( ANRM.EQ.ZERO ) THEN * * Matrix all zero. Return zero solution. * CALL PSLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, IB, JB, $ DESCB ) GO TO 10 END IF * BROW = M IF( TPSD ) $ BROW = N * BNRM = PSLANGE( 'M', BROW, NRHS, B, IB, JB, DESCB, RWORK ) * IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PSLASCL( 'G', BNRM, SMLNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 1 ELSE IF( BNRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PSLASCL( 'G', BNRM, BIGNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 2 END IF * IPW = LTAU + 1 * IF( M.GE.N ) THEN * * compute QR factorization of A * CALL PSGEQRF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least N, optimally N*NB * IF( .NOT.TPSD ) THEN * * Least-Squares Problem min || A * X - B || * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q' * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PSORMQR( 'Left', 'Transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+N-1,JB:JB+NRHS-1) := inv(R) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PSTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * SCLLEN = N * ELSE * * Overdetermined system of equations sub( A )' * X = sub( B ) * * sub( B ) := inv(R') * sub( B ) * CALL PSTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * B(IB+N:IB+M-1,JB:JB+NRHS-1) = ZERO * CALL PSLASET( 'All', M-N, NRHS, ZERO, ZERO, B, IB+N, JB, $ DESCB ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q(1:N,:) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PSORMQR( 'Left', 'No transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = M * END IF * ELSE * * Compute LQ factorization of sub( A ) * CALL PSGELQF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least M, optimally M*NB. * IF( .NOT.TPSD ) THEN * * underdetermined system of equations sub( A ) * X = sub( B ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L) * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PSTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * B(IB+M:IB+N-1,JB:JB+NRHS-1) = 0 * CALL PSLASET( 'All', N-M, NRHS, ZERO, ZERO, B, IB+M, JB, $ DESCB ) * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q(1:N,:)' * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PSORMLQ( 'Left', 'Transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = N * ELSE * * overdetermined system min || A' * X - B || * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PSORMLQ( 'Left', 'No transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L') * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PSTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * SCLLEN = M * END IF * END IF * * Undo scaling * IF( IASCL.EQ.1 ) THEN CALL PSLASCL( 'G', ANRM, SMLNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IASCL.EQ.2 ) THEN CALL PSLASCL( 'G', ANRM, BIGNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF IF( IBSCL.EQ.1 ) THEN CALL PSLASCL( 'G', SMLNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IBSCL.EQ.2 ) THEN CALL PSLASCL( 'G', BIGNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF * 10 CONTINUE * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGELS * END scalapack-1.8.0/SRC/sdttrf.f0000640000175000017500000000626510363532303015431 0ustar muammarmuammar SUBROUTINE SDTTRF( N, DL, D, DU, INFO ) * * Written by Andrew J. Cleary, November 1996. * Modified from SGTTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. INTEGER INFO, N * .. * .. Array Arguments .. REAL D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * SDTTRF computes an LU factorization of a complex tridiagonal matrix A * using elimination without partial pivoting. * * The factorization has the form * A = L * U * where L is a product of unit lower bidiagonal * matrices and U is upper triangular with nonzeros in only the main * diagonal and first superdiagonal. * * Arguments * ========= * * N (input) INTEGER * The order of the matrix A. N >= 0. * * DL (input/output) COMPLEX array, dimension (N-1) * On entry, DL must contain the (n-1) subdiagonal elements of * A. * On exit, DL is overwritten by the (n-1) multipliers that * define the matrix L from the LU factorization of A. * * D (input/output) COMPLEX array, dimension (N) * On entry, D must contain the diagonal elements of A. * On exit, D is overwritten by the n diagonal elements of the * upper triangular matrix U from the LU factorization of A. * * DU (input/output) COMPLEX array, dimension (N-1) * On entry, DU must contain the (n-1) superdiagonal elements * of A. * On exit, DU is overwritten by the (n-1) elements of the first * superdiagonal of U. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Local Scalars .. INTEGER I REAL FACT * .. * .. Intrinsic Functions .. INTRINSIC ABS * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Executable Statements .. * INFO = 0 IF( N.LT.0 ) THEN INFO = -1 CALL XERBLA( 'SDTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * DO 20 I = 1, N - 1 IF( DL( I ).EQ.ZERO ) THEN * * Subdiagonal is zero, no elimination is required. * IF( D( I ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I ELSE * FACT = DL( I ) / D( I ) DL( I ) = FACT D( I+1 ) = D( I+1 ) - FACT*DU( I ) END IF 20 CONTINUE IF( D( N ).EQ.ZERO .AND. INFO.EQ.0 ) THEN INFO = N RETURN END IF * RETURN * * End of SDTTRF * END scalapack-1.8.0/SRC/pdstein.f0000640000175000017500000006043210602576752015601 0ustar muammarmuammar SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N DOUBLE PRECISION ORFAC * .. * .. Array Arguments .. INTEGER DESCZ( * ), IBLOCK( * ), ICLUSTR( * ), $ IFAIL( * ), ISPLIT( * ), IWORK( * ) DOUBLE PRECISION D( * ), E( * ), GAP( * ), W( * ), WORK( * ), $ Z( * ) * .. * * Purpose * ======= * * PDSTEIN computes the eigenvectors of a symmetric tridiagonal matrix * in parallel, using inverse iteration. The eigenvectors found * correspond to user specified eigenvalues. PDSTEIN does not * orthogonalize vectors that are on different processes. The extent * of orthogonalization is controlled by the input parameter LWORK. * Eigenvectors that are to be orthogonalized are computed by the same * process. PDSTEIN decides on the allocation of work among the * processes and then calls DSTEIN2 (modified LAPACK routine) on each * individual process. If insufficient workspace is allocated, the * expected orthogonalization may not be done. * * Note : If the eigenvectors obtained are not orthogonal, increase * LWORK and run the code again. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * P = NPROW * NPCOL is the total number of processes * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input) DOUBLE PRECISION array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (global input) DOUBLE PRECISION array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * * M (global input) INTEGER * The total number of eigenvectors to be found. 0 <= M <= N. * * W (global input/global output) DOUBLE PRECISION array, dim (M) * On input, the first M elements of W contain all the * eigenvalues for which eigenvectors are to be computed. The * eigenvalues should be grouped by split-off block and ordered * from smallest to largest within the block (The output array * W from PDSTEBZ with ORDER='b' is expected here). This * array should be replicated on all processes. * On output, the first M elements contain the input * eigenvalues in ascending order. * * Note : To obtain orthogonal vectors, it is best if * eigenvalues are computed to highest accuracy ( this can be * done by setting ABSTOL to the underflow threshold = * DLAMCH('U') --- ABSTOL is an input parameter * to PDSTEBZ ) * * IBLOCK (global input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W -- 1 for eigenvalues belonging to the * first submatrix from the top, 2 for those belonging to * the second submatrix, etc. (The output array IBLOCK * from PDSTEBZ is expected here). * * ISPLIT (global input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N (The output array * ISPLIT from PDSTEBZ is expected here.) * * ORFAC (global input) DOUBLE PRECISION * ORFAC specifies which eigenvectors should be orthogonalized. * Eigenvectors that correspond to eigenvalues which are within * ORFAC*||T|| of each other are to be orthogonalized. * However, if the workspace is insufficient (see LWORK), this * tolerance may be decreased until all eigenvectors to be * orthogonalized can be stored in one process. * No orthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) DOUBLE PRECISION array, * dimension (DESCZ(DLEN_), N/npcol + NB) * Z contains the computed eigenvectors associated with the * specified eigenvalues. Any vector which fails to converge is * set to its current iterate after MAXITS iterations ( See * DSTEIN2 ). * On output, Z is distributed across the P processes in block * cyclic format. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local workspace/global output) DOUBLE PRECISION array, * dimension ( LWORK ) * On output, WORK(1) gives a lower bound on the * workspace ( LWORK ) that guarantees the user desired * orthogonalization (see ORFAC). * Note that this may overestimate the minimum workspace needed. * * LWORK (local input) integer * LWORK controls the extent of orthogonalization which can be * done. The number of eigenvectors for which storage is * allocated on each process is * NVEC = floor(( LWORK- max(5*N,NP00*MQ00) )/N). * Eigenvectors corresponding to eigenvalue clusters of size * NVEC - ceil(M/P) + 1 are guaranteed to be orthogonal ( the * orthogonality is similar to that obtained from DSTEIN2). * Note : LWORK must be no smaller than: * max(5*N,NP00*MQ00) + ceil(M/P)*N, * and should have the same input value on all processes. * It is the minimum value of LWORK input on different processes * that is significant. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/global output) INTEGER array, * dimension ( 3*N+P+1 ) * On return, IWORK(1) contains the amount of integer workspace * required. * On return, the IWORK(2) through IWORK(P+2) indicate * the eigenvectors computed by each process. Process I computes * eigenvectors indexed IWORK(I+2)+1 thru' IWORK(I+3). * * LIWORK (local input) INTEGER * Size of array IWORK. Must be >= 3*N + P + 1 * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) integer array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after MAXITS * iterations (as in DSTEIN), then INFO > 0 is returned. * If mod(INFO,M+1)>0, then * for I=1 to mod(INFO,M+1), the eigenvector * corresponding to the eigenvalue W(IFAIL(I)) failed to * converge ( W refers to the array of eigenvalues on output ). * * ICLUSTR (global output) integer array, dimension (2*P) * This output array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace (see LWORK, * ORFAC and INFO). Eigenvectors corresponding to clusters of * eigenvalues indexed ICLUSTR(2*I-1) to ICLUSTR(2*I), I = 1 to * INFO/(M+1), could not be orthogonalized due to lack of * workspace. Hence the eigenvectors corresponding to these * clusters may not be orthogonal. ICLUSTR is a zero terminated * array --- ( ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0 ) * if and only if K is the number of clusters. * * GAP (global output) DOUBLE PRECISION array, dimension (P) * This output array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. The INFO/M output * values in this array correspond to the INFO/(M+1) clusters * indicated by the array ICLUSTR. As a result, the dot product * between eigenvectors corresponding to the I^th cluster may be * as high as ( O(n)*macheps ) / GAP(I). * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * < 0 : if INFO = -I, the I-th argument had an illegal value * > 0 : if mod(INFO,M+1) = I, then I eigenvectors failed to * converge in MAXITS iterations. Their indices are * stored in the array IFAIL. * if INFO/(M+1) = I, then eigenvectors corresponding to * I clusters of eigenvalues could not be orthogonalized * due to insufficient workspace. The indices of the * clusters are stored in the array ICLUSTR. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DLASRT2, DSTEIN2, IGAMN2D, IGEBR2D, IGEBS2D, $ PCHK1MAT, PDLAEVSWP, PXERBLA * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, NEGONE, ODM1, FIVE, ODM3, ODM18 PARAMETER ( ZERO = 0.0D+0, NEGONE = -1.0D+0, $ ODM1 = 1.0D-1, FIVE = 5.0D+0, ODM3 = 1.0D-3, $ ODM18 = 1.0D-18 ) * .. * .. Local Scalars .. LOGICAL LQUERY, SORTED INTEGER B1, BN, BNDRY, CLSIZ, COL, I, IFIRST, IINFO, $ ILAST, IM, INDRW, ITMP, J, K, LGCLSIZ, LLWORK, $ LOAD, LOCINFO, MAXVEC, MQ00, MYCOL, MYROW, $ NBLK, NERR, NEXT, NP00, NPCOL, NPROW, NVS, $ OLNBLK, P, ROW, SELF, TILL, TOTERR DOUBLE PRECISION DIFF, MINGAP, ONENRM, ORGFAC, ORTOL, TMPFAC * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1200+CTXT_ ) ELSE * * Make sure that NPROW>0 and NPCOL>0 before calling NUMROC * CALL CHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN * * Now we know that our context is good enough to * perform the rest of the checks * NP00 = NUMROC( N, DESCZ( MB_ ), 0, 0, NPROW ) MQ00 = NUMROC( M, DESCZ( NB_ ), 0, 0, NPCOL ) P = NPROW*NPCOL * * Compute the maximum number of vectors per process * LLWORK = LWORK CALL IGAMN2D( DESCZ( CTXT_ ), 'A', ' ', 1, 1, LLWORK, 1, 1, $ 1, -1, -1, -1 ) INDRW = MAX( 5*N, NP00*MQ00 ) IF( N.NE.0 ) $ MAXVEC = ( LLWORK-INDRW ) / N LOAD = ICEIL( M, P ) IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN TMPFAC = ORFAC CALL DGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1 ) ELSE CALL DGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1, 0, 0 ) END IF * LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( MAXVEC.LT.LOAD .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.3*N+P+1 .AND. .NOT.LQUERY ) THEN INFO = -16 ELSE DO 10 I = 2, M IF( IBLOCK( I ).LT.IBLOCK( I-1 ) ) THEN INFO = -6 GO TO 20 END IF IF( IBLOCK( I ).EQ.IBLOCK( I-1 ) .AND. W( I ).LT. $ W( I-1 ) ) THEN INFO = -5 GO TO 20 END IF 10 CONTINUE 20 CONTINUE IF( INFO.EQ.0 ) THEN IF( ABS( TMPFAC-ORFAC ).GT.FIVE*ABS( TMPFAC ) ) $ INFO = -8 END IF END IF * END IF IDUM1( 1 ) = M IDUM2( 1 ) = 4 CALL PCHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, 1, IDUM1, IDUM2, $ INFO ) WORK( 1 ) = DBLE( MAX( 5*N, NP00*MQ00 )+ICEIL( M, P )*N ) IWORK( 1 ) = 3*N + P + 1 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PDSTEIN', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN RETURN END IF * DO 30 I = 1, M IFAIL( I ) = 0 30 CONTINUE DO 40 I = 1, P + 1 IWORK( I ) = 0 40 CONTINUE DO 50 I = 1, P GAP( I ) = NEGONE ICLUSTR( 2*I-1 ) = 0 ICLUSTR( 2*I ) = 0 50 CONTINUE * * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC ELSE TMPFAC = ODM3 END IF ORGFAC = TMPFAC * * Allocate the work among the processes * ILAST = M / LOAD IF( MOD( M, LOAD ).EQ.0 ) $ ILAST = ILAST - 1 OLNBLK = -1 NVS = 0 NEXT = 1 IM = 0 ONENRM = ZERO DO 100 I = 0, ILAST - 1 NEXT = NEXT + LOAD J = NEXT - 1 IF( J.GT.NVS ) THEN NBLK = IBLOCK( NEXT ) IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN * * Compute orthogonalization criterion * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 60 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 60 CONTINUE OLNBLK = NBLK END IF TILL = NVS + MAXVEC 70 CONTINUE J = NEXT - 1 IF( TMPFAC.GT.ODM18 ) THEN ORTOL = TMPFAC*ONENRM DO 80 J = NEXT - 1, MIN( TILL, M-1 ) IF( IBLOCK( J+1 ).NE.IBLOCK( J ) .OR. W( J+1 )- $ W( J ).GE.ORTOL ) THEN GO TO 90 END IF 80 CONTINUE IF( J.EQ.M .AND. TILL.GE.M ) $ GO TO 90 TMPFAC = TMPFAC*ODM1 GO TO 70 END IF 90 CONTINUE J = MIN( J, TILL ) END IF IF( SELF.EQ.I ) $ IM = MAX( 0, J-NVS ) * IWORK( I+1 ) = NVS NVS = MAX( J, NVS ) 100 CONTINUE IF( SELF.EQ.ILAST ) $ IM = M - NVS IWORK( ILAST+1 ) = NVS DO 110 I = ILAST + 2, P + 1 IWORK( I ) = M 110 CONTINUE * CLSIZ = 1 LGCLSIZ = 1 ILAST = 0 NBLK = 0 BNDRY = 2 K = 1 DO 140 I = 1, M IF( IBLOCK( I ).NE.NBLK ) THEN NBLK = IBLOCK( I ) IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 120 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 120 CONTINUE * END IF IF( I.GT.1 ) THEN DIFF = W( I ) - W( I-1 ) IF( IBLOCK( I ).NE.IBLOCK( I-1 ) .OR. I.EQ.M .OR. DIFF.GT. $ ORGFAC*ONENRM ) THEN IFIRST = ILAST IF( I.EQ.M ) THEN IF( IBLOCK( M ).NE.IBLOCK( M-1 ) .OR. DIFF.GT.ORGFAC* $ ONENRM ) THEN ILAST = M - 1 ELSE ILAST = M END IF ELSE ILAST = I - 1 END IF CLSIZ = ILAST - IFIRST IF( CLSIZ.GT.1 ) THEN IF( LGCLSIZ.LT.CLSIZ ) $ LGCLSIZ = CLSIZ MINGAP = ONENRM 130 CONTINUE IF( BNDRY.GT.P+1 ) $ GO TO 150 IF( IWORK( BNDRY ).GT.IFIRST .AND. IWORK( BNDRY ).LT. $ ILAST ) THEN MINGAP = MIN( W( IWORK( BNDRY )+1 )- $ W( IWORK( BNDRY ) ), MINGAP ) ELSE IF( IWORK( BNDRY ).GE.ILAST ) THEN IF( MINGAP.LT.ONENRM ) THEN ICLUSTR( 2*K-1 ) = IFIRST + 1 ICLUSTR( 2*K ) = ILAST GAP( K ) = MINGAP / ONENRM K = K + 1 END IF GO TO 140 END IF BNDRY = BNDRY + 1 GO TO 130 END IF END IF END IF 140 CONTINUE 150 CONTINUE INFO = ( K-1 )*( M+1 ) * * Call DSTEIN2 to find the eigenvectors * CALL DSTEIN2( N, D, E, IM, W( IWORK( SELF+1 )+1 ), $ IBLOCK( IWORK( SELF+1 )+1 ), ISPLIT, ORGFAC, $ WORK( INDRW+1 ), N, WORK, IWORK( P+2 ), $ IFAIL( IWORK( SELF+1 )+1 ), LOCINFO ) * * Redistribute the eigenvector matrix to conform with the block * cyclic distribution of the input matrix * * DO 160 I = 1, M IWORK( P+1+I ) = I 160 CONTINUE * CALL DLASRT2( 'I', M, W, IWORK( P+2 ), IINFO ) * DO 170 I = 1, M IWORK( M+P+1+IWORK( P+1+I ) ) = I 170 CONTINUE * * DO 180 I = 1, LOCINFO ITMP = IWORK( SELF+1 ) + I IFAIL( ITMP ) = IFAIL( ITMP ) + ITMP - I IFAIL( ITMP ) = IWORK( M+P+1+IFAIL( ITMP ) ) 180 CONTINUE * DO 190 I = 1, K - 1 ICLUSTR( 2*I-1 ) = IWORK( M+P+1+ICLUSTR( 2*I-1 ) ) ICLUSTR( 2*I ) = IWORK( M+P+1+ICLUSTR( 2*I ) ) 190 CONTINUE * * * Still need to apply the above permutation to IFAIL * * TOTERR = 0 DO 210 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, LOCINFO, 1 ) IF( LOCINFO.NE.0 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', LOCINFO, 1, $ IFAIL( IWORK( I )+1 ), LOCINFO ) DO 200 J = 1, LOCINFO IFAIL( TOTERR+J ) = IFAIL( IWORK( I )+J ) 200 CONTINUE TOTERR = TOTERR + LOCINFO END IF ELSE * ROW = ( I-1 ) / NPCOL COL = MOD( I-1, NPCOL ) * CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, NERR, 1, $ ROW, COL ) IF( NERR.NE.0 ) THEN CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', NERR, 1, $ IFAIL( TOTERR+1 ), NERR, ROW, COL ) TOTERR = TOTERR + NERR END IF END IF 210 CONTINUE INFO = INFO + TOTERR * * CALL PDLAEVSWP( N, WORK( INDRW+1 ), N, Z, IZ, JZ, DESCZ, IWORK, $ IWORK( M+P+2 ), WORK, INDRW ) * DO 220 I = 2, P IWORK( I ) = IWORK( M+P+1+IWORK( I ) ) 220 CONTINUE * * * Sort the IWORK array * * 230 CONTINUE SORTED = .TRUE. DO 240 I = 2, P - 1 IF( IWORK( I ).GT.IWORK( I+1 ) ) THEN ITMP = IWORK( I+1 ) IWORK( I+1 ) = IWORK( I ) IWORK( I ) = ITMP SORTED = .FALSE. END IF 240 CONTINUE IF( .NOT.SORTED ) $ GO TO 230 * DO 250 I = P + 1, 1, -1 IWORK( I+1 ) = IWORK( I ) 250 CONTINUE * WORK( 1 ) = ( LGCLSIZ+LOAD-1 )*N + INDRW IWORK( 1 ) = 3*N + P + 1 * * End of PDSTEIN * END scalapack-1.8.0/SRC/cdttrsv.f0000640000175000017500000001314510363532303015607 0ustar muammarmuammar SUBROUTINE CDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, $ B, LDB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from CGTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) * .. * * Purpose * ======= * * CDTTRSV solves one of the systems of equations * L * X = B, L**T * X = B, or L**H * X = B, * U * X = B, U**T * X = B, or U**H * X = B, * with factors of the tridiagonal matrix A from the LU factorization * computed by CDTTRF. * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether to solve with L or U. * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * DL (input) COMPLEX array, dimension (N-1) * The (n-1) multipliers that define the matrix L from the * LU factorization of A. * * D (input) COMPLEX array, dimension (N) * The n diagonal elements of the upper triangular matrix U from * the LU factorization of A. * * DU (input) COMPLEX array, dimension (N-1) * The (n-1) elements of the first superdiagonal of U. * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, B is overwritten by the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL LOWER, NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) LOWER = LSAME( UPLO, 'L' ) IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN INFO = -9 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CDTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( NOTRAN ) THEN * IF( LOWER ) THEN * Solve L*X = B, overwriting B with X. * DO 35 J = 1, NRHS * * Solve L*x = b. * DO 10 I = 1, N - 1 B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) 10 CONTINUE 35 CONTINUE * ELSE * Solve U*x = b. * DO 30 J = 1, NRHS B( N, J ) = B( N, J ) / D( N ) IF( N.GT.1 ) $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / $ D( N-1 ) DO 20 I = N - 2, 1, -1 B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J ) ) / D( I ) 20 CONTINUE 30 CONTINUE * ENDIF * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * IF( .NOT. LOWER ) THEN * Solve U**T * X = B, overwriting B with X. * DO 65 J = 1, NRHS * * Solve U**T * x = b. * B( 1, J ) = B( 1, J ) / D( 1 ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) DO 40 I = 3, N B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J ) ) / D( I ) 40 CONTINUE 65 CONTINUE * ELSE * * Solve L**T * X = B, overwriting B with X. DO 60 J = 1, NRHS * * Solve L**T * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) 50 CONTINUE 60 CONTINUE ENDIF ELSE * IF( .NOT. LOWER ) THEN * Solve U**H * X = B, overwriting B with X. * DO 95 J = 1, NRHS * * Solve U**H * x = b. * B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) IF( N.GT.1 ) $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / $ CONJG( D( 2 ) ) DO 70 I = 3, N B( I, J ) = ( B( I, J ) $ -CONJG( DU( I-1 ) )*B( I-1, J ) ) / $ CONJG( D( I ) ) 70 CONTINUE 95 CONTINUE * ELSE * * Solve L**H * X = B, overwriting B with X. DO 90 J = 1, NRHS * * Solve L**H * x = b. * DO 80 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - CONJG( DL( I ) )*B( I+1, J ) 80 CONTINUE 90 CONTINUE ENDIF END IF * * End of CDTTRSV * END scalapack-1.8.0/SRC/pdlaswp.f0000640000175000017500000002035010363532303015564 0ustar muammarmuammar SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ) * .. * * Purpose: * ======== * * PDLASWP performs a series of row or column interchanges on * the distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1). One * interchange is initiated for each of rows or columns K1 trough K2 of * sub( A ). This routine assumes that the pivoting information has * already been broadcast along the process row or column. * Also note that this routine will only work for K1-K2 being in the * same MB (or NB) block. If you want to pivot a full matrix, use * PDLAPIV. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) * = 'B' (Backward) * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are permuted: * = 'R' (Rows) * = 'C' (Columns) * * N (global input) INTEGER * If ROWCOL = 'R', the length of the rows of the distributed * matrix A(*,JA:JA+N-1) to be permuted; * If ROWCOL = 'C', the length of the columns of the distributed * matrix A(IA:IA+N-1,*) to be permuted. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, * ). * On entry, this array contains the local pieces of the distri- * buted matrix to which the row/columns interchanges will be * applied. On exit the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * K1 (global input) INTEGER * The first element of IPIV for which a row or column inter- * change will be done. * * K2 (global input) INTEGER * The last element of IPIV for which a row or column inter- * change will be done. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A for * row pivoting and LOCc(N_A)+NB_A for column pivoting. This * array is tied to the matrix A, IPIV(K) = L implies rows * (or columns) K and L are to be interchanged. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICURCOL, ICURROW, IIA, IP, J, JJA, JP, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PDSWAP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( K1, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 10 I = K1, K2 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PDSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 10 CONTINUE ELSE CALL INFOG2L( K2, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 20 I = K2, K1, -1 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PDSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 20 CONTINUE END IF ELSE IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( IA, K1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 30 J = K1, K2 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PDSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 30 CONTINUE ELSE CALL INFOG2L( IA, K2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 40 J = K2, K1, -1 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PDSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 40 CONTINUE END IF END IF * RETURN * * End PDLASWP * END scalapack-1.8.0/SRC/pdlaconsb.f0000640000175000017500000005047010363532303016065 0ustar muammarmuammar SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER I, L, LWORK, M DOUBLE PRECISION H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), BUF( * ) * .. * * Purpose * ======= * * PDLACONSB looks for two consecutive small subdiagonal elements by * seeing the effect of starting a double shift QR iteration * given by H44, H33, & H43H34 and see if this would make a * subdiagonal negligible. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) DOUBLE PRECISION array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * M (global output) INTEGER * On exit, this yields the starting location of the QR double * shift. This will satisfy: L <= M <= I-2. * * H44 * H33 * H43H34 (global input) DOUBLE PRECISION * These three values are for the double shift QR iteration. * * BUF (local output) DOUBLE PRECISION array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 7*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Logic: * ====== * * Two consecutive small subdiagonal elements will stall * convergence of a double shift if their product is small * relatively even if each is not very small. Thus it is * necessary to scan the "tridiagonal portion of the matrix." In * the LAPACK algorithm DLAHQR, a loop of M goes from I-2 down to * L and examines * H(m,m),H(m+1,m+1),H(m+1,m),H(m,m+1),H(m-1,m-1),H(m,m-1), and * H(m+2,m-1). Since these elements may be on separate * processors, the first major loop (10) goes over the tridiagonal * and has each node store whatever values of the 7 it has that * the node owning H(m,m) does not. This will occur on a border * and can happen in no more than 3 locations per block assuming * square blocks. There are 5 buffers that each node stores these * values: a buffer to send diagonally down and right, a buffer * to send up, a buffer to send left, a buffer to send diagonally * up and left and a buffer to send right. Each of these buffers * is actually stored in one buffer BUF where BUF(ISTR1+1) starts * the first buffer, BUF(ISTR2+1) starts the second, etc.. After * the values are stored, if there are any values that a node * needs, they will be sent and received. Then the next major * loop passes over the data and searches for two consecutive * small subdiagonals. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, IBUF3, IBUF4, $ IBUF5, ICOL1, II, IRCV1, IRCV2, IRCV3, IRCV4, $ IRCV5, IROW1, ISRC, ISTR1, ISTR2, ISTR3, ISTR4, $ ISTR5, JJ, JSRC, LDA, LEFT, MODKM1, MYCOL, $ MYROW, NPCOL, NPROW, NUM, RIGHT, UP DOUBLE PRECISION H00, H10, H11, H12, H21, H22, H33S, H44S, S, $ TST1, ULP, V1, V2, V3 * .. * .. External Functions .. INTEGER ILCM DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGERV2D, DGESD2D, IGAMX2D, $ INFOG2L, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 starts at BUF(ISTR1+1) and will contain IBUF1 elements * BUFFER2 starts at BUF(ISTR2+1) and will contain IBUF2 elements * BUFFER3 starts at BUF(ISTR3+1) and will contain IBUF3 elements * BUFFER4 starts at BUF(ISTR4+1) and will contain IBUF4 elements * BUFFER5 starts at BUF(ISTR5+1) and will contain IBUF5 elements * ISTR1 = 0 ISTR2 = ( ( I-L-1 ) / HBL ) IF( ISTR2*HBL.LT.( I-L-1 ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PDLACONSB', 10 ) RETURN END IF ISTR3 = 3*ISTR2 ISTR4 = ISTR3 + ISTR2 ISTR5 = ISTR3 + ISTR3 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) * * Copy our relevant pieces of triadiagonal that we owe into * 5 buffers to send to whomever owns H(M,M) as M moves diagonally * up the tridiagonal * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 IRCV1 = 0 IRCV2 = 0 IRCV3 = 0 IRCV4 = 0 IRCV5 = 0 DO 10 M = I - 2, L, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M-1,M-1) and send it diagonal down * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( M-1, M-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M ,M-1) and send it right * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF5 = IBUF5 + 1 BUF( ISTR5+IBUF5 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+1,M) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( MYROW.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M ,M+1) and send it left * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF3 = IBUF3 + 1 BUF( ISTR3+IBUF3 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M+1,M+1) & H(M+2,M+1) and send it * diagonally up * IF( ( UP.NE.MYROW ) .OR. ( LEFT.NE.MYCOL ) ) THEN CALL INFOG2L( M+1, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF4 = IBUF4 + 2 BUF( ISTR4+IBUF4-1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) BUF( ISTR4+IBUF4 ) = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+2,M+1) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * Add up the receives * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( M.GT.L ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M-1,M-1) from diagonal up * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) .AND. ( M.GT.L ) ) $ THEN * * We must receive H(M ,M-1) from left * IRCV5 = IRCV5 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+1,M ) from down * IRCV2 = IRCV2 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPCOL.GT.1 ) ) THEN * * We must receive H(M ,M+1) from right * IRCV3 = IRCV3 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M+1:M+2,M+1) from diagonal down * IRCV4 = IRCV4 + 2 END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+2,M+1) from down * IRCV2 = IRCV2 + 1 END IF END IF * * Possibly change owners (occurs only when MOD(M-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * * Send data on to the appropriate node if there is any data to send * IF( IBUF1.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, UP, $ MYCOL ) END IF IF( IBUF3.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF3, 1, BUF( ISTR3+1 ), IBUF3, MYROW, $ LEFT ) END IF IF( IBUF4.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF4, 1, BUF( ISTR4+1 ), IBUF4, UP, $ LEFT ) END IF IF( IBUF5.GT.0 ) THEN CALL DGESD2D( CONTXT, IBUF5, 1, BUF( ISTR5+1 ), IBUF5, MYROW, $ RIGHT ) END IF * * Receive appropriate data if there is any * IF( IRCV1.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, DOWN, $ MYCOL ) END IF IF( IRCV3.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV3, 1, BUF( ISTR3+1 ), IRCV3, MYROW, $ RIGHT ) END IF IF( IRCV4.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV4, 1, BUF( ISTR4+1 ), IRCV4, DOWN, $ RIGHT ) END IF IF( IRCV5.GT.0 ) THEN CALL DGERV2D( CONTXT, IRCV5, 1, BUF( ISTR5+1 ), IRCV5, MYROW, $ LEFT ) END IF * * Start main loop * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. $ ( MODKM1.NE.HBL-1 ) ) THEN CALL INFOG2L( I-2, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) END IF * * Look for two consecutive small subdiagonal elements. * DO 20 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H00 = BUF( ISTR1+IBUF1 ) ELSE H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF5 = IBUF5 + 1 H10 = BUF( ISTR5+IBUF5 ) ELSE H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF END IF IF( MODKM1.EQ.HBL-1 ) THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) H11 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( NUM.GT.1 ) THEN IBUF4 = IBUF4 + 2 H22 = BUF( ISTR4+IBUF4-1 ) V3 = BUF( ISTR4+IBUF4 ) ELSE H22 = A( ICOL1*LDA+IROW1+1 ) V3 = A( ( ICOL1+1 )*LDA+IROW1+1 ) END IF IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 H21 = BUF( ISTR2+IBUF2 ) ELSE H21 = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF3 = IBUF3 + 1 H12 = BUF( ISTR3+IBUF3 ) ELSE H12 = A( ICOL1*LDA+IROW1 ) END IF IF( M.GT.L ) THEN H00 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF * * Adjust ICOL1 for next iteration where MODKM1=HBL-2 * ICOL1 = ICOL1 + 1 END IF IF( MODKM1.EQ.HBL-2 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 V3 = BUF( ISTR2+IBUF2 ) ELSE V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) END IF H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.LT.HBL-2 ) .AND. ( MODKM1.GT.0 ) ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S IF( M.EQ.L ) $ GO TO 30 TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 ) $ GO TO 30 * * Slide indices diagonally up one for next iteration * IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF IF( M.EQ.L ) THEN * * Stop regardless of which node we are * GO TO 30 END IF * * Possibly change owners if on border * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 20 CONTINUE 30 CONTINUE * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * RETURN * * End of PDLACONSB * END scalapack-1.8.0/SRC/pclattrs.f0000640000175000017500000013024510363532303015753 0ustar muammarmuammar SUBROUTINE PCLATTRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, JA, $ DESCA, X, IX, JX, DESCX, SCALE, CNORM, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, INFO, IX, JA, JX, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCX( * ) REAL CNORM( * ) COMPLEX A( * ), X( * ) * .. * * Purpose * ======= * * PCLATTRS solves one of the triangular systems * * A * x = s*b, A**T * x = s*b, or A**H * x = s*b, * * with scaling to prevent overflow. Here A is an upper or lower * triangular matrix, A**T denotes the transpose of A, A**H denotes the * conjugate transpose of A, x and b are n-element vectors, and s is a * scaling factor, usually less than or equal to 1, chosen so that the * components of x will be less than the overflow threshold. If the * unscaled problem will not cause overflow, the Level 2 PBLAS routine * PCTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j) * then s is set to 0 and a non-trivial solution to A*x = 0 is returned. * * This is very slow relative to PCTRSV. This should only be used * when scaling is necessary to control overflow, or when it is modified * to scale better. * Notes * * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the matrix A is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * TRANS (global input) CHARACTER*1 * Specifies the operation applied to A. * = 'N': Solve A * x = s*b (No transpose) * = 'T': Solve A**T * x = s*b (Transpose) * = 'C': Solve A**H * x = s*b (Conjugate transpose) * * DIAG (global input) CHARACTER*1 * Specifies whether or not the matrix A is unit triangular. * = 'N': Non-unit triangular * = 'U': Unit triangular * * NORMIN (global input) CHARACTER*1 * Specifies whether CNORM has been set or not. * = 'Y': CNORM contains the column norms on entry * = 'N': CNORM is not set on entry. On exit, the norms will * be computed and stored in CNORM. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * A (local input) COMPLEX array, dimension (DESCA(LLD_),*) * The triangular matrix A. If UPLO = 'U', the leading n by n * upper triangular part of the array A contains the upper * triangular matrix, and the strictly lower triangular part of * A is not referenced. If UPLO = 'L', the leading n by n lower * triangular part of the array A contains the lower triangular * matrix, and the strictly upper triangular part of A is not * referenced. If DIAG = 'U', the diagonal elements of A are * also not referenced and are assumed to be 1. * * IA (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix A to operate on. * * JA (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix A to operate on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * X (local input/output) COMPLEX array, * dimension (DESCX(LLD_),*) * On entry, the right hand side b of the triangular system. * On exit, X is overwritten by the solution vector x. * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * SCALE (global output) REAL * The scaling factor s for the triangular system * A * x = s*b, A**T * x = s*b, or A**H * x = s*b. * If SCALE = 0, the matrix A is singular or badly scaled, and * the vector x is an exact or approximate solution to A*x = 0. * * CNORM (global input or global output) REAL array, * dimension (N) * If NORMIN = 'Y', CNORM is an input argument and CNORM(j) * contains the norm of the off-diagonal part of the j-th column * of A. If TRANS = 'N', CNORM(j) must be greater than or equal * to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) * must be greater than or equal to the 1-norm. * * If NORMIN = 'N', CNORM is an output argument and CNORM(j) * returns the 1-norm of the offdiagonal part of the j-th column * of A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -k, the k-th argument had an illegal value * * Further Details * ======= ======= * * A rough bound on x is computed; if that is less than overflow, PCTRSV * is called, otherwise, specific code is used which checks for possible * overflow or divide-by-zero at every operation. * * A columnwise scheme is used for solving A*x = b. The basic algorithm * if A is lower triangular is * * x[1:n] := b[1:n] * for j = 1, ..., n * x(j) := x(j) / A(j,j) * x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] * end * * Define bounds on the components of x after j iterations of the loop: * M(j) = bound on x[1:j] * G(j) = bound on x[j+1:n] * Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. * * Then for iteration j+1 we have * M(j+1) <= G(j) / | A(j+1,j+1) | * G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | * <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) * * where CNORM(j+1) is greater than or equal to the infinity-norm of * column j+1 of A, not counting the diagonal. Hence * * G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) * 1<=i<=j * and * * |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) * 1<=i< j * * Since |x(j)| <= M(j), we use the Level 2 PBLAS routine PCTRSV if the * reciprocal of the largest M(j), j=1,..,n, is larger than * max(underflow, 1/overflow). * * The bound on x(j) is also used to determine when a step in the * columnwise method can be performed without fear of overflow. If * the computed bound is greater than a large constant, x is scaled to * prevent overflow, but if the bound overflows, x is set to 0, x(j) to * 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. * * Similarly, a row-wise scheme is used to solve A**T *x = b or * A**H *x = b. The basic algorithm for A upper triangular is * * for j = 1, ..., n * x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) * end * * We simultaneously compute two bounds * G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j * M(j) = bound on x(i), 1<=i<=j * * The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we * add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. * Then the bound on x(j) is * * M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | * * <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) * 1<=i<=j * * and we can safely call PCTRSV if 1/M(n) and 1/G(n) are both greater * than max(underflow, 1/overflow). * * Last modified by: Mark R. Fahey, August 2000 * * ===================================================================== * * .. Parameters .. REAL ZERO, HALF, ONE, TWO PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, $ TWO = 2.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER CONTXT, CSRC, I, ICOL, ICOLX, IMAX, IROW, $ IROWX, ITMP1, ITMP1X, ITMP2, ITMP2X, J, JFIRST, $ JINC, JLAST, LDA, LDX, MB, MYCOL, MYROW, NB, $ NPCOL, NPROW, RSRC REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, $ XBND, XJ, XMAX COMPLEX CSUMJ, TJJS, USCAL, XJTMP, ZDUM * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX REAL PSLAMCH COMPLEX CLADIV EXTERNAL LSAME, ISAMAX, PSLAMCH, CLADIV * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGSUM2D, SSCAL, INFOG2L, $ PSCASUM, PSLABAD, PXERBLA, PCAMAX, PCAXPY, $ PCDOTC, PCDOTU, PCSSCAL, PCLASET, PCSCAL, $ PCTRSV, CGEBR2D, CGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CMPLX, CONJG, AIMAG, MAX, MIN * .. * .. Statement Functions .. REAL CABS1, CABS2 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2.E0 ) + $ ABS( AIMAG( ZDUM ) / 2.E0 ) * .. * .. Executable Statements .. * INFO = 0 UPPER = LSAME( UPLO, 'U' ) NOTRAN = LSAME( TRANS, 'N' ) NOUNIT = LSAME( DIAG, 'N' ) * CONTXT = DESCA( CTXT_ ) RSRC = DESCA( RSRC_ ) CSRC = DESCA( CSRC_ ) MB = DESCA( MB_ ) NB = DESCA( NB_ ) LDA = DESCA( LLD_ ) LDX = DESCX( LLD_ ) * * Test the input parameters. * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. $ LSAME( NORMIN, 'N' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 END IF * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( CONTXT, 'PCLATTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Determine machine dependent parameters to control overflow. * SMLNUM = PSLAMCH( CONTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( CONTXT, SMLNUM, BIGNUM ) SMLNUM = SMLNUM / PSLAMCH( CONTXT, 'Precision' ) BIGNUM = ONE / SMLNUM SCALE = ONE * * IF( LSAME( NORMIN, 'N' ) ) THEN * * Compute the 1-norm of each column, not including the diagonal. * IF( UPPER ) THEN * * A is upper triangular. * CNORM( 1 ) = ZERO DO 10 J = 2, N CALL PSCASUM( J-1, CNORM( J ), A, IA, JA+J-1, DESCA, 1 ) 10 CONTINUE ELSE * * A is lower triangular. * DO 20 J = 1, N - 1 CALL PSCASUM( N-J, CNORM( J ), A, IA+J, JA+J-1, DESCA, $ 1 ) 20 CONTINUE CNORM( N ) = ZERO END IF CALL SGSUM2D( CONTXT, 'Row', ' ', N, 1, CNORM, 1, -1, -1 ) END IF * * Scale the column norms by TSCAL if the maximum element in CNORM is * greater than BIGNUM/2. * IMAX = ISAMAX( N, CNORM, 1 ) TMAX = CNORM( IMAX ) IF( TMAX.LE.BIGNUM*HALF ) THEN TSCAL = ONE ELSE TSCAL = HALF / ( SMLNUM*TMAX ) CALL SSCAL( N, TSCAL, CNORM, 1 ) END IF * * Compute a bound on the computed solution vector to see if the * Level 2 PBLAS routine PCTRSV can be used. * XMAX = ZERO CALL PCAMAX( N, ZDUM, IMAX, X, IX, JX, DESCX, 1 ) XMAX = CABS2( ZDUM ) CALL SGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, -1, -1 ) XBND = XMAX * IF( NOTRAN ) THEN * * Compute the growth in A * x = b. * IF( UPPER ) THEN JFIRST = N JLAST = 1 JINC = -1 ELSE JFIRST = 1 JLAST = N JINC = 1 END IF * IF( TSCAL.NE.ONE ) THEN GROW = ZERO GO TO 50 END IF * IF( NOUNIT ) THEN * * A is non-unit triangular. * * Compute GROW = 1/G(j) and XBND = 1/M(j). * Initially, G(0) = max{x(i), i=1,...,n}. * GROW = HALF / MAX( XBND, SMLNUM ) XBND = GROW DO 30 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 50 * * TJJS = A( J, J ) CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW ) CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF TJJ = CABS1( TJJS ) * IF( TJJ.GE.SMLNUM ) THEN * * M(j) = G(j-1) / abs(A(j,j)) * XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) ELSE * * M(j) could overflow, set XBND to 0. * XBND = ZERO END IF * IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN * * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) * GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) ELSE * * G(j) could overflow, set GROW to 0. * GROW = ZERO END IF 30 CONTINUE GROW = XBND ELSE * * A is unit triangular. * * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. * GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) DO 40 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 50 * * G(j) = G(j-1)*( 1 + CNORM(j) ) * GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) 40 CONTINUE END IF 50 CONTINUE * ELSE * * Compute the growth in A**T * x = b or A**H * x = b. * IF( UPPER ) THEN JFIRST = 1 JLAST = N JINC = 1 ELSE JFIRST = N JLAST = 1 JINC = -1 END IF * IF( TSCAL.NE.ONE ) THEN GROW = ZERO GO TO 80 END IF * IF( NOUNIT ) THEN * * A is non-unit triangular. * * Compute GROW = 1/G(j) and XBND = 1/M(j). * Initially, M(0) = max{x(i), i=1,...,n}. * GROW = HALF / MAX( XBND, SMLNUM ) XBND = GROW DO 60 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 80 * * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) * XJ = ONE + CNORM( J ) GROW = MIN( GROW, XBND / XJ ) * * TJJS = A( J, J ) CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW ) CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF TJJ = CABS1( TJJS ) * IF( TJJ.GE.SMLNUM ) THEN * * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) * IF( XJ.GT.TJJ ) $ XBND = XBND*( TJJ / XJ ) ELSE * * M(j) could overflow, set XBND to 0. * XBND = ZERO END IF 60 CONTINUE GROW = MIN( GROW, XBND ) ELSE * * A is unit triangular. * * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. * GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) DO 70 J = JFIRST, JLAST, JINC * * Exit the loop if the growth factor is too small. * IF( GROW.LE.SMLNUM ) $ GO TO 80 * * G(j) = ( 1 + CNORM(j) )*G(j-1) * XJ = ONE + CNORM( J ) GROW = GROW / XJ 70 CONTINUE END IF 80 CONTINUE END IF * IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN * * Use the Level 2 PBLAS solve if the reciprocal of the bound on * elements of X is not too small. * CALL PCTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX, $ DESCX, 1 ) ELSE * * Use a Level 1 PBLAS solve, scaling intermediate results. * IF( XMAX.GT.BIGNUM*HALF ) THEN * * Scale X so that its components are less than or equal to * BIGNUM in absolute value. * SCALE = ( BIGNUM*HALF ) / XMAX CALL PCSSCAL( N, SCALE, X, IX, JX, DESCX, 1 ) XMAX = BIGNUM ELSE XMAX = XMAX*TWO END IF * IF( NOTRAN ) THEN * * Solve A * x = b * DO 100 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) / A(j,j), scaling x if necessary. * * XJ = CABS1( X( J ) ) CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 90 END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by 1/b(j). * REC = ONE / XJ CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = CLADIV( X( J ), TJJS ) * XJ = CABS1( X( J ) ) XJTMP = CLADIV( XJTMP, TJJS ) XJ = CABS1( XJTMP ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM * to avoid overflow when dividing by A(j,j). * REC = ( TJJ*BIGNUM ) / XJ IF( CNORM( J ).GT.ONE ) THEN * * Scale by 1/CNORM(j) to avoid overflow when * multiplying x(j) times column j. * REC = REC / CNORM( J ) END IF CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = CLADIV( X( J ), TJJS ) * XJ = CABS1( X( J ) ) XJTMP = CLADIV( XJTMP, TJJS ) XJ = CABS1( XJTMP ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0, and compute a solution to A*x = 0. * CALL PCLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = CONE END IF XJTMP = CONE XJ = ONE SCALE = ZERO XMAX = ZERO END IF 90 CONTINUE * * Scale x if necessary to avoid overflow when adding a * multiple of column j of A. * IF( XJ.GT.ONE ) THEN REC = ONE / XJ IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN * * Scale x by 1/(2*abs(x(j))). * REC = REC*HALF CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC END IF ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN * * Scale x by 1/2. * CALL PCSSCAL( N, HALF, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*HALF SCALE = SCALE*HALF END IF * IF( UPPER ) THEN IF( J.GT.1 ) THEN * * Compute the update * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) * ZDUM = -XJTMP*TSCAL CALL PCAXPY( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1, X, $ IX, JX, DESCX, 1 ) CALL PCAMAX( J-1, ZDUM, IMAX, X, IX, JX, DESCX, 1 ) XMAX = CABS1( ZDUM ) CALL SGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, $ -1, -1 ) END IF ELSE IF( J.LT.N ) THEN * * Compute the update * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) * ZDUM = -XJTMP*TSCAL CALL PCAXPY( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) CALL PCAMAX( N-J, ZDUM, I, X, IX+J, JX, DESCX, 1 ) XMAX = CABS1( ZDUM ) CALL SGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, $ -1, -1 ) END IF END IF 100 CONTINUE * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * * Solve A**T * x = b * DO 120 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) - sum A(k,j)*x(k). * k<>j * * XJ = CABS1( X( J ) ) CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) USCAL = CMPLX( TSCAL ) REC = ONE / MAX( XMAX, ONE ) IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN * * If x(j) could overflow, scale x by 1/(2*XMAX). * REC = REC*HALF IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.ONE ) THEN * * Divide by A(j,j) when scaling x if A(j,j) > 1. * REC = MIN( ONE, REC*TJJ ) USCAL = CLADIV( USCAL, TJJS ) END IF IF( REC.LT.ONE ) THEN CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * CSUMJ = CZERO IF( USCAL.EQ.CONE ) THEN * * If the scaling needed for A in the dot product is 1, * call PCDOTU to perform the dot product. * IF( UPPER ) THEN CALL PCDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ELSE IF( J.LT.N ) THEN CALL PCDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL CGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL CGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF ELSE * * Otherwise, scale column of A by USCAL before dot * product. Below is not the best way to do it. * IF( UPPER ) THEN * DO 130 I = 1, J - 1 * CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) * 130 CONTINUE ZDUM = CONJG( USCAL ) CALL PCSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) CALL PCDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ZDUM = CLADIV( ZDUM, USCAL ) CALL PCSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) ELSE IF( J.LT.N ) THEN * DO 140 I = J + 1, N * CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) * 140 CONTINUE ZDUM = CONJG( USCAL ) CALL PCSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) CALL PCDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) ZDUM = CLADIV( ZDUM, USCAL ) CALL PCSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL CGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL CGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF END IF * IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN * * Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) * was not used to scale the dotproduct. * * X( J ) = X( J ) - CSUMJ * XJ = CABS1( X( J ) ) XJTMP = XJTMP - CSUMJ XJ = CABS1( XJTMP ) * IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) * $ X( IROWX ) = XJTMP IF( NOUNIT ) THEN * TJJS = A( J, J )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = A( ( ICOL-1 )*LDA+IROW )*TSCAL CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 110 END IF * * Compute x(j) = x(j) / A(j,j), scaling if necessary. * TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale X by 1/abs(x(j)). * REC = ONE / XJ CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = CLADIV( X( J ), TJJS ) XJTMP = CLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. * REC = ( TJJ*BIGNUM ) / XJ CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = CLADIV( X( J ), TJJS ) XJTMP = CLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0 and compute a solution to A**T *x = 0. * CALL PCLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = CONE END IF XJTMP = CONE SCALE = ZERO XMAX = ZERO END IF 110 CONTINUE ELSE * * Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot * product has already been divided by 1/A(j,j). * * X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ XJTMP = CLADIV( XJTMP, TJJS ) - CSUMJ IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ THEN X( IROWX ) = XJTMP END IF END IF XMAX = MAX( XMAX, CABS1( XJTMP ) ) 120 CONTINUE * ELSE * * Solve A**H * x = b * DO 140 J = JFIRST, JLAST, JINC * * Compute x(j) = b(j) - sum A(k,j)*x(k). * k<>j * CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, $ MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) THEN XJTMP = X( IROWX ) CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, $ ITMP1X, ITMP2X ) END IF XJ = CABS1( XJTMP ) USCAL = TSCAL REC = ONE / MAX( XMAX, ONE ) IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN * * If x(j) could overflow, scale x by 1/(2*XMAX). * REC = REC*HALF IF( NOUNIT ) THEN * TJJS = CONJG( A( J, J ) )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = CONJG( A( ( ICOL-1 )*LDA+IROW ) )*TSCAL CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL END IF TJJ = CABS1( TJJS ) IF( TJJ.GT.ONE ) THEN * * Divide by A(j,j) when scaling x if A(j,j) > 1. * REC = MIN( ONE, REC*TJJ ) USCAL = CLADIV( USCAL, TJJS ) END IF IF( REC.LT.ONE ) THEN CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * CSUMJ = CZERO IF( USCAL.EQ.CONE ) THEN * * If the scaling needed for A in the dot product is 1, * call PCDOTC to perform the dot product. * IF( UPPER ) THEN CALL PCDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ELSE IF( J.LT.N ) THEN CALL PCDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL CGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL CGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF ELSE * * Otherwise, scale column of A by USCAL before dot * product. Below is not the best way to do it. * IF( UPPER ) THEN * DO 180 I = 1, J - 1 * CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* * $ X( I ) * 180 CONTINUE ZDUM = CONJG( USCAL ) CALL PCSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) CALL PCDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1, $ X, IX, JX, DESCX, 1 ) ZDUM = CLADIV( CONE, ZDUM ) CALL PCSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 ) ELSE IF( J.LT.N ) THEN * DO 190 I = J + 1, N * CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* * $ X( I ) * 190 CONTINUE ZDUM = CONJG( USCAL ) CALL PCSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) CALL PCDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1, $ X, IX+J, JX, DESCX, 1 ) ZDUM = CLADIV( CONE, ZDUM ) CALL PCSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 ) END IF IF( MYCOL.EQ.ITMP2X ) THEN CALL CGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 ) ELSE CALL CGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, $ MYROW, ITMP2X ) END IF END IF * IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN * * Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) * was not used to scale the dotproduct. * * X( J ) = X( J ) - CSUMJ * XJ = CABS1( X( J ) ) XJTMP = XJTMP - CSUMJ XJ = CABS1( XJTMP ) * IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) * $ X( IROWX ) = XJTMP IF( NOUNIT ) THEN * TJJS = CONJG( A( J, J ) )*TSCAL CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, $ MYROW, MYCOL, IROW, ICOL, ITMP1, $ ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) $ THEN TJJS = CONJG( A( ( ICOL-1 )*LDA+IROW ) )*TSCAL CALL CGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, $ 1 ) ELSE CALL CGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, $ ITMP1, ITMP2 ) END IF ELSE TJJS = TSCAL IF( TSCAL.EQ.ONE ) $ GO TO 130 END IF * * Compute x(j) = x(j) / A(j,j), scaling if necessary. * TJJ = CABS1( TJJS ) IF( TJJ.GT.SMLNUM ) THEN * * abs(A(j,j)) > SMLNUM: * IF( TJJ.LT.ONE ) THEN IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale X by 1/abs(x(j)). * REC = ONE / XJ CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF END IF * X( J ) = CLADIV( X( J ), TJJS ) XJTMP = CLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP ELSE IF( TJJ.GT.ZERO ) THEN * * 0 < abs(A(j,j)) <= SMLNUM: * IF( XJ.GT.TJJ*BIGNUM ) THEN * * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. * REC = ( TJJ*BIGNUM ) / XJ CALL PCSSCAL( N, REC, X, IX, JX, DESCX, 1 ) XJTMP = XJTMP*REC SCALE = SCALE*REC XMAX = XMAX*REC END IF * X( J ) = CLADIV( X( J ), TJJS ) XJTMP = CLADIV( XJTMP, TJJS ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and * scale = 0 and compute a solution to A**H *x = 0. * CALL PCLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, $ DESCX ) IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = CONE XJTMP = CONE SCALE = ZERO XMAX = ZERO END IF 130 CONTINUE ELSE * * Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot * product has already been divided by 1/A(j,j). * * X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ XJTMP = CLADIV( XJTMP, TJJS ) - CSUMJ IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) $ X( IROWX ) = XJTMP END IF XMAX = MAX( XMAX, CABS1( XJTMP ) ) 140 CONTINUE END IF SCALE = SCALE / TSCAL END IF * * Scale the column norms by 1/TSCAL for return. * IF( TSCAL.NE.ONE ) THEN CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) END IF * RETURN * * End of PCLATTRS * END scalapack-1.8.0/SRC/pzgelqf.f0000640000175000017500000002754110363532303015573 0ustar muammarmuammar SUBROUTINE PZGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGELQF computes a LQ factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = L * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and below the diagonal of sub( A ) contain the M by min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); * the elements above the diagonal, with the array TAU, repre- * sent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCr(IA+MIN(M,N)-1). This array contains the scalar factors * of the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia+k-1)' H(ia+k-2)' . . . H(ia)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in * A(ia+i-1,ja+i:ja+n-1), and tau in TAU(ia+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ IROFF, J, K, LWMIN, MP0, MYCOL, MYROW, NPCOL, $ NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZGELQ2, PZLARFB, $ PZLARFT * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGELQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Handle the first block of rows separately * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) IB = IN - IA + 1 * * Compute the LQ factorization of the first block A(ia:in:ja:ja+n-1) * CALL PZGELQ2( IB, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IINFO ) * IF( IA+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(ia) H(ia+1) . . . H(in) * CALL PZLARFT( 'Forward', 'Rowwise', N, IB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia+ib:ia+m-1,ja:ja+n-1) from the right * CALL PZLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-IB, N, IB, A, IA, JA, DESCA, WORK, A, IA+IB, $ JA, DESCA, WORK( IPW ) ) END IF * * Loop over the remaining blocks of rows * DO 10 I = IN+1, IA+K-1, DESCA( MB_ ) IB = MIN( K-I+IA, DESCA( MB_ ) ) J = JA + I - IA * * Compute the LQ factorization of the current block * A(i:i+ib-1:j:ja+n-1) * CALL PZGELQ2( IB, N-I+IA, A, I, J, DESCA, TAU, WORK, LWORK, $ IINFO ) * IF( I+IB.LE.IA+M-1 ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PZLARFT( 'Forward', 'Rowwise', N-I+IA, IB, A, I, J, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i+ib:ia+m-1,j:ja+n-1) from the right * CALL PZLARFB( 'Right', 'No transpose', 'Forward', 'Rowwise', $ M-I-IB+IA, N-J+JA, IB, A, I, J, DESCA, WORK, $ A, I+IB, J, DESCA, WORK( IPW ) ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGELQF * END scalapack-1.8.0/SRC/pzmax1.f0000640000175000017500000003330510363532303015336 0ustar muammarmuammar SUBROUTINE PZMAX1( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER INDX, INCX, IX, JX, N COMPLEX*16 AMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX*16 X( * ) * .. * * Purpose * ======= * * PZMAX1 computes the global index of the maximum element in absolute * value of a distributed vector sub( X ). The global index is returned * in INDX and the value is returned in AMAX, * * where sub( X ) denotes X(IX:IX+N-1,JX) if INCX = 1, * X(IX,JX:JX+N-1) if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * When the result of a vector-oriented PBLAS call is a scalar, it will * be made available only within the scope which owns the vector(s) * being operated on. Let X be a generic term for the input vector(s). * Then, the processes which receive the answer will be (note that if * an operation involves more than one vector, the processes which re- * ceive the result will be the union of the following calculation for * each vector): * * If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process * row or process column owns the vector operand, therefore only the * process of coordinate {RSRC_X, CSRC_X} receives the result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process part of this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process part of this column receives the result; * * Based on PZAMAX from Level 1 PBLAS. The change is to use the * 'genuine' absolute value. * * The serial version was contributed to LAPACK by Nick Higham for use * with ZLACON. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * AMAX (global output) pointer to DOUBLE PRECISION * The absolute value of the largest entry of the distributed * vector sub( X ) only in the scope of sub( X ). * * INDX (global output) pointer to INTEGER * The global index of the element of the distributed vector * sub( X ) whose real part has maximum absolute value. * * X (local input) COMPLEX*16 array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. CHARACTER CBTOP, CCTOP, RBTOP, RCTOP INTEGER ICOFF, ICTXT, IDUMM, IIX, IROFF, IXCOL, IXROW, $ JJX, LCINDX, LDX, MAXPOS, MYCOL, MYROW, NP, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. COMPLEX*16 WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, INFOG2L, $ PB_TOPGET, PZTREECOMB, ZCOMBAMAX1, ZGAMX2D * .. * .. External Functions .. LOGICAL LSAME INTEGER IZMAX1, INDXL2G, NUMROC EXTERNAL IZMAX1, INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, MOD, NINT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * INDX = 0 AMAX = ZERO IF( N.LE.0 ) $ RETURN * * Retrieve local information for vector X. * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * IF( INCX.EQ.1 .AND. DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN INDX = JX AMAX = X( IIX+(JJX-1)*LDX ) RETURN END IF * * Find the maximum value and its index * IF( INCX.EQ.DESCX( M_ ) ) THEN * IF( MYROW.EQ.IXROW ) THEN * ICOFF = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ-ICOFF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', RBTOP ) * IF( LSAME( RBTOP, ' ' ) ) THEN * IF( NQ.GT.0 ) THEN LCINDX = JJX-1+IZMAX1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) WORK( 1 ) = X( IIX+(LCINDX-1)*LDX ) WORK( 2 ) = DCMPLX( DBLE( INDXL2G( LCINDX, $ DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), NPCOL ) ) ) ELSE WORK( 1 ) = ZERO WORK( 2 ) = ZERO END IF * CALL PZTREECOMB( ICTXT, 'Row', 2, WORK, -1, MYCOL, $ ZCOMBAMAX1 ) * AMAX = WORK( 1 ) IF( AMAX.EQ.ZERO ) THEN INDX = JX ELSE INDX = NINT( DBLE( WORK( 2 ) ) ) END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', RCTOP ) * IF( NQ.GT.0 ) THEN LCINDX = JJX-1+IZMAX1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) AMAX = X( IIX + (LCINDX-1)*LDX ) ELSE AMAX = ZERO END IF * * Find the maximum value * CALL ZGAMX2D( ICTXT, 'Rowwise', RCTOP, 1, 1, AMAX, 1, $ IDUMM, MAXPOS, 1, -1, MYROW ) * IF( AMAX.NE.ZERO ) THEN * * Broadcast corresponding global index * IF( MYCOL.EQ.MAXPOS ) THEN INDX = INDXL2G( LCINDX, DESCX( NB_ ), MYCOL, $ DESCX( CSRC_ ), NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', RBTOP, 1, 1, INDX, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', RBTOP, 1, 1, INDX, $ 1, MYROW, MAXPOS ) END IF * ELSE * INDX = JX * END IF * END IF * END IF * ELSE * IF( MYCOL.EQ.IXCOL ) THEN * IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP-IROFF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) * IF( LSAME( CBTOP, ' ' ) ) THEN * IF( NP.GT.0 ) THEN LCINDX = IIX-1+IZMAX1( NP, X( IIX+(JJX-1)*LDX ), 1 ) WORK( 1 ) = X( LCINDX + (JJX-1)*LDX ) WORK( 2 ) = DCMPLX( DBLE( INDXL2G( LCINDX, $ DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) ) ) ELSE WORK( 1 ) = ZERO WORK( 2 ) = ZERO END IF * CALL PZTREECOMB( ICTXT, 'Column', 2, WORK, -1, MYCOL, $ ZCOMBAMAX1 ) * AMAX = WORK( 1 ) IF( AMAX.EQ.ZERO ) THEN INDX = IX ELSE INDX = NINT( DBLE( WORK( 2 ) ) ) END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', CCTOP ) * IF( NP.GT.0 ) THEN LCINDX = IIX-1+IZMAX1( NP, X( IIX+(JJX-1)*LDX ), 1 ) AMAX = X( LCINDX + (JJX-1)*LDX ) ELSE AMAX = ZERO END IF * * Find the maximum value * CALL ZGAMX2D( ICTXT, 'Columnwise', CCTOP, 1, 1, AMAX, 1, $ MAXPOS, IDUMM, 1, -1, MYCOL ) * IF( AMAX.NE.ZERO ) THEN * * Broadcast corresponding global index * IF( MYROW.EQ.MAXPOS ) THEN INDX = INDXL2G( LCINDX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW ) CALL IGEBS2D( ICTXT, 'Columnwise', CBTOP, 1, 1, $ INDX, 1 ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', CBTOP, 1, 1, $ INDX, 1, MAXPOS, MYCOL ) END IF * ELSE * INDX = IX * END IF * END IF * END IF * END IF * RETURN * * End of PZMAX1 * END * SUBROUTINE ZCOMBAMAX1 ( V1, V2 ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. COMPLEX*16 V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * ZCOMBAMAX1 finds the element having maximum real part absolute * value as well as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) COMPLEX*16 array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) COMPLEX*16 array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, DBLE * .. * .. Executable Statements .. * IF( ABS( DBLE( V1( 1 ) ) ).LT.ABS( DBLE( V2( 1 ) ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of ZCOMBAMAX1 * END scalapack-1.8.0/SRC/psormrz.f0000640000175000017500000004325110363532303015633 0ustar muammarmuammar SUBROUTINE PSORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMRZ overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PSTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PSTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JAA, $ JCC, LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSLARZB, $ PSLARZT, PSORMR3, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IDUM1( 4 ) = L IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMRZ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'T' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PSORMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PSLARZT( 'Backward', 'Rowwise', L, IB, A, I, JAA, DESCA, $ TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PSLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, IB, $ L, A, I, JAA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PSORMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMRZ * END scalapack-1.8.0/SRC/pzpocon.f0000640000175000017500000003672010363532303015612 0ustar muammarmuammar SUBROUTINE PZPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LRWORK, LWORK, N DOUBLE PRECISION ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION RWORK( * ) COMPLEX*16 A( * ), WORK( * ) * .. * * Purpose * ======= * * PZPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a complex Hermitian positive definite distributed matrix * using the Cholesky factorization A = U**H*U or A = L*L**H computed by * PZPOTRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the factor stored in * A(IA:IA+N-1,JA:JA+N-1) is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory to * an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, this * array contains the local pieces of the factors L or U from * the Cholesky factorization A(IA:IA+N-1,JA:JA+N-1) = U'*U or * L*L', as computed by PZPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) DOUBLE PRECISION * The 1-norm (or infinity-norm) of the hermitian distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + * MAX( 2, MAX(NB_A*MAX(1,CEIL(P-1,Q)),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*MAX(1,CEIL(Q-1,P))) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK >= 2*LOCc(N+MOD(JA-1,NB_A)). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, $ JX, KASE, LRWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPROW, NPMOD, NQ, NQMOD DOUBLE PRECISION AINVNM, SCALE, SL, SU, SMLNUM COMPLEX*16 WMAX, ZDUM * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PCHK1MAT, $ PZAMAX, PZLATRS, PZLACON, PZDRSCL, $ ZGEBR2D, ZGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, ICHAR, MAX, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LRWMIN = 2*NQMOD RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LRWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPW = IPV + NP IPNL = 1 IPNU = IPNL + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the 1-norm (or I-norm) of inv(A). * AINVNM = ZERO KASE = 0 NORMIN = 'N' * 10 CONTINUE CALL PZLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( UPPER ) THEN * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SL, RWORK( IPNL ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, RWORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, RWORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PZLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', $ NORMIN, N, A, IA, JA, DESCA, WORK( IPX ), $ IX, JX, DESCX, SU, RWORK( IPNU ), $ WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Multiply by 1/SCALE if doing so will not cause overflow. * SCALE = SL*SU IF( SCALE.NE.ONE ) THEN CALL PZAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL ZGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL ZGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.CABS1( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PZDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PZPOCON * END scalapack-1.8.0/SRC/pdgetri.f0000640000175000017500000003542410430435051015555 0ustar muammarmuammar SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.4) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * v1.7.4: May 10, 2006 * v1.7: May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LIWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ), IWORK( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDGETRI computes the inverse of a distributed matrix using the LU * factorization computed by PDGETRF. This method inverts U and then * computes the inverse of sub( A ) = A(IA:IA+N-1,JA:JA+N-1) denoted * InvA by solving the system InvA*L = inv(U) for InvA. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the L and U obtained by the * factorization sub( A ) = P*L*U computed by PDGETRF. On * exit, if INFO = 0, sub( A ) contains the inverse of the * original distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A * keeps track of the pivoting information. IPIV(i) is the * global row index the local row i was swapped with. This * array is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = LOCr(N+MOD(IA-1,MB_A))*NB_A. WORK is used to keep a * copy of at most an entire column block of sub( A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK used as workspace for * physically transposing the pivots. * LIWORK is local input and must be at least * if NPROW == NPCOL then * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + NB_A, * else * LIWORK = LOCc( N_A + MOD(JA-1, NB_A) ) + * MAX( CEIL(CEIL(LOCr(M_A)/MB_A)/(LCM/NPROW)), * NB_A ) * where LCM is the least common multiple of process * rows and columns (NPROW and NPCOL). * end if * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,IA+K-1) is exactly zero; the * matrix is singular and its inverse could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IROFF, IW, J, $ JB, JN, LCM, LIWMIN, LWMIN, MP, MYCOL, MYROW, $ NN, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PDGEMM, PDLACPY, PDLASET, PDLAPIV, $ PDTRSM, PDTRTRI, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) LWMIN = NP * DESCA( NB_ ) * MP = NUMROC( DESCA( M_ ), DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) NQ = NUMROC( DESCA( N_ ), DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) IF( NPROW.EQ.NPCOL ) THEN LIWMIN = NQ + DESCA( NB_ ) ELSE * * Use the formula for the workspace given in PxLAPIV * to compute the minimum size LIWORK for IWORK * * The formula in PxLAPIV is * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * * where * M_P is the global length of the pivot vector * MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW * I_P is IA * I_P = IA * MB_P is the block size use for the block cyclic distribution of the * pivot vector * MB_P = DESCA (MB_ ) * LOCc ( . ) * NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) * LOCr ( . ) * NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) * CEIL ( X / Y ) * ICEIL( X, Y ) * LCM * LCM = ILCM( NPROW, NPCOL ) * LCM = ILCM( NPROW, NPCOL ) LIWMIN = NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW $ + MOD ( IA - 1, DESCA( MB_ ) ), DESCA ( NB_ ), $ MYCOL, DESCA( CSRC_ ), NPCOL ) + $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL( $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW, $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ), $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) ) * END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -4 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -8 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 8 IF( LIWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRI', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Form inv(U). If INFO > 0 from PDTRTRI, then U is singular, * and the inverse is not computed. * CALL PDTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) IF( INFO.GT.0 ) $ RETURN * * Define array descriptor for working array WORK * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) NN = ( ( JA+N-2 ) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1 IACOL = INDXG2P( NN, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) CALL DESCSET( DESCW, N+IROFF, DESCA( NB_ ), DESCA( MB_ ), $ DESCA( NB_ ), IAROW, IACOL, ICTXT, MAX( 1, NP ) ) IW = IROFF + 1 * * Solve the equation inv(A)*L=inv(U) for inv(A) using blocked code. * DO 10 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Copy current block column of L to WORK and replace with zeros. * CALL PDLACPY( 'Lower', JA+N-1-J, JB, A, I+1, J, DESCA, $ WORK, IW+J-JA+1, 1, DESCW ) CALL PDLASET( 'Lower', JA+N-1-J, JB, ZERO, ZERO, A, I+1, J, $ DESCA ) * * Compute current block column of inv(A). * IF( J+JB.LE.JA+N-1 ) $ CALL PDGEMM( 'No transpose', 'No transpose', N, JB, $ JA+N-J-JB, -ONE, A, IA, J+JB, DESCA, WORK, $ IW+J+JB-JA, 1, DESCW, ONE, A, IA, J, DESCA ) CALL PDTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW+J-JA, 1, DESCW, A, IA, J, DESCA ) DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 * * Copy current block column of L to WORK and replace with zeros. * CALL PDLACPY( 'Lower', N-1, JB, A, IA+1, JA, DESCA, WORK, IW+1, $ 1, DESCW ) CALL PDLASET( 'Lower', N-1, JB, ZERO, ZERO, A, IA+1, JA, DESCA ) * * Compute current block column of inv(A). * IF( JA+JB.LE.JA+N-1 ) $ CALL PDGEMM( 'No transpose', 'No transpose', N, JB, $ N-JB, -ONE, A, IA, JA+JB, DESCA, WORK, IW+JB, 1, $ DESCW, ONE, A, IA, JA, DESCA ) CALL PDTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, $ ONE, WORK, IW, 1, DESCW, A, IA, JA, DESCA ) * * Use the row pivots and apply them to the columns of the global * matrix. * * * JL: I do not get why the size of the PIVOT vector is DESCA( M_ ) + DESCA( MB_ )*NPROW * should be DESCA( M_ ) + DESCA( MB_ ) no? * CALL DESCSET( DESCW, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, $ MP+DESCA( MB_ ) ) CALL PDLAPIV( 'Backward', 'Columns', 'Column', N, N, A, IA, $ JA, DESCA, IPIV, IA, 1, DESCW, IWORK ) * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDGETRI * END scalapack-1.8.0/SRC/pcgeql2.f0000640000175000017500000003012610363532303015451 0ustar muammarmuammar SUBROUTINE PCGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQL2 computes a QL factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, CGEBR2D, $ CGEBS2D, CLARFG, CSCAL, $ INFOG2L, PCELSET, PCLARFC, $ PCLARFG, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IACOL = INDXG2P( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN I = II+(JJ+NQ-2)*DESCA( LLD_ ) AJJ = A( I ) CALL CLARFG( 1, AJJ, A( I ), 1, TAU( JJ+NQ-1 ) ) IF( N.GT.1 ) THEN ALPHA = ONE - CONJG( TAU( JJ+NQ-1 ) ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL CSCAL( NQ-1, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL CSCAL( NQ, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1, IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA+K-1, JA, -1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(ia:i+m-k-1,j+n-k) * CALL PCLARFG( M-K+I-IA+1, AJJ, M-K+I, N-K+J, A, IA, $ N-K+J, DESCA, 1, TAU ) * * Apply H(j)' to A(ia:i+m-k,ja:j+n-k-1) from the left * CALL PCELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PCLARFC( 'Left', M-K+I-IA+1, N-K+J-JA, A, IA, N-K+J, $ DESCA, 1, TAU, A, IA, JA, DESCA, WORK ) CALL PCELSET( A, I+M-K, J+N-K, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEQL2 * END scalapack-1.8.0/SRC/pdlapiv.f0000640000175000017500000003365510363532303015565 0ustar muammarmuammar SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ), IWORK( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLAPIV applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), resulting in row or column * pivoting. The pivot vector may be distributed across a process row * or a column. The pivot vector should be aligned with the distributed * matrix A. This routine will transpose the pivot vector if necessary. * For example if the row pivots should be applied to the columns of * sub( A ), pass ROWCOL='C' and PIVROC='C'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Restrictions * ============ * * IPIV must always be a distributed vector (not a matrix). Thus: * IF( ROWPIV .EQ. 'C' ) THEN * JP must be 1 * ELSE * IP must be 1 * END IF * * The following restrictions apply when IPIV must be transposed: * IF( ROWPIV.EQ.'C' .AND. PIVROC.EQ.'C') THEN * DESCIP(MB_) must equal DESCA(NB_) * ELSE IF( ROWPIV.EQ.'R" .AND. PIVROC.EQ.'R') THEN * DESCIP(NB_) must equal DESCA(MB_) * END IF * * Arguments * ========= * * DIREC (global input) CHARACTER*1 * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P*sub( A ). * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P )*sub( A ). * * ROWCOL (global input) CHARACTER*1 * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * PIVROC (global input) CHARACTER*1 * Specifies whether IPIV is distributed over a process row * or column: * = 'R' IPIV distributed over a process row * = 'C' IPIV distributed over a process column * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * distributed submatrix sub( A ) to which the row or column * interchanges will be applied. On exit, the local pieces * of the permuted distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension (LIPIV) where LIPIV is * when ROWCOL='R' or 'r': * >= LOCr( IA+M-1 ) + MB_A if PIVROC='C' or 'c', * >= LOCc( M + MOD(JP-1,NB_P) ) if PIVROC='R' or 'r', and, * when ROWCOL='C' or 'c': * >= LOCr( N + MOD(IP-1,MB_P) ) if PIVROC='C' or 'c', * >= LOCc( JA+N-1 ) + NB_A if PIVROC='R' or 'r'. * This array contains the pivoting information. IPIV(i) is the * global row (column), local row (column) i was swapped with. * When ROWCOL='R' or 'r' and PIVROC='C' or 'c', or ROWCOL='C' * or 'c' and PIVROC='R' or 'r', the last piece of this array of * size MB_A (resp. NB_A) is used as workspace. In those cases, * this array is tied to the distributed matrix A. * * IP (global input) INTEGER * The row index in the global array P indicating the first * row of sub( P ). * * JP (global input) INTEGER * The column index in the global array P indicating the * first column of sub( P ). * * DESCIP (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed vector IPIV. * * IWORK (local workspace) INTEGER array, dimension (LDW) * where LDW is equal to the workspace necessary for * transposition, and the storage of the tranposed IPIV: * * Let LCM be the least common multiple of NPROW and NPCOL. * IF( ROWCOL.EQ.'R' .AND. PIVROC.EQ.'R' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + NB_P * ELSE * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + * NB_P * CEIL( CEIL(LOCc(N_P)/NB_P) / (LCM/NPCOL) ) * END IF * ELSE IF( ROWCOL.EQ.'C' .AND. PIVROC.EQ.'C' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P * ELSE * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * END IF * ELSE * IWORK is not referenced. * END IF * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ROWPVT INTEGER I, ICTXT, ICURCOL, ICURROW, IIP, ITMP, IPT, $ JJP, JPT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCPT( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, $ INFOG2L, PDLAPV2, PICOL2ROW, PIROW2COL * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC, INDXG2P EXTERNAL LSAME, NUMROC, INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'C' ) ) THEN CALL PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PDLAPV2 * ELSE * * Take IPIV distributed over row 0, and store it in * iwork, distributed over column 0 * IPT = MOD( JP-1, DESCA(MB_) ) DESCPT(M_) = M + IPT + NPROW*DESCA(MB_) DESCPT(N_) = 1 DESCPT(MB_) = DESCA(MB_) DESCPT(NB_) = 1 DESCPT(RSRC_) = INDXG2P( IA, DESCA(MB_), IA, DESCA(RSRC_), $ NPROW ) DESCPT(CSRC_) = MYCOL DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = NUMROC( DESCPT(M_), DESCPT(MB_), MYROW, $ DESCPT(RSRC_), NPROW ) ITMP = NUMROC( DESCIP(N_), DESCIP(NB_), MYCOL, $ DESCIP(CSRC_), NPCOL ) CALL INFOG2L( IP, JP-IPT, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) CALL PIROW2COL( ICTXT, M+IPT, 1, DESCIP(NB_), IPIV(JJP), $ ITMP, IWORK, DESCPT(LLD_), 0, ICURCOL, $ DESCPT(RSRC_), $ MYCOL, IWORK(DESCPT(LLD_)-DESCPT(MB_)+1) ) * * Send column-distributed pivots to all columns * ITMP = DESCPT(LLD_) - DESCPT(MB_) IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP, $ MYROW, 0 ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * IPT = IPT + 1 DO 10 I = 1, ITMP IWORK(I) = IWORK(I) - JP + IPT 10 CONTINUE CALL PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ IPT, 1, DESCPT ) END IF * * Otherwise, we're pivoting the columns of sub( A ) * ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'R' ) ) THEN CALL PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PDLAPV2 * ELSE * * Take IPIV distributed over column 0, and store it in * iwork, distributed over row 0 * JPT = MOD( IP-1, DESCA(NB_) ) DESCPT(M_) = 1 DESCPT(N_) = N + JPT + NPCOL*DESCA(NB_) DESCPT(MB_) = 1 DESCPT(NB_) = DESCA(NB_) DESCPT(RSRC_) = MYROW DESCPT(CSRC_) = INDXG2P( JA, DESCA(NB_), JA, DESCA(CSRC_), $ NPCOL ) DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = 1 CALL INFOG2L( IP-JPT, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) ITMP = NUMROC( N+JPT, DESCPT(NB_), MYCOL, DESCPT(CSRC_), $ NPCOL ) CALL PICOL2ROW( ICTXT, N+JPT, 1, DESCIP(MB_), IPIV(IIP), $ DESCIP(LLD_), IWORK, MAX(1, ITMP), ICURROW, $ 0, 0, DESCPT(CSRC_), IWORK(ITMP+1) ) * * Send row-distributed pivots to all rows * IF( MYROW.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP, 0, MYCOL ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * JPT = JPT + 1 DO 20 I = 1, ITMP IWORK(I) = IWORK(I) - IP + JPT 20 CONTINUE CALL PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ 1, JPT, DESCPT ) END IF END IF * RETURN * * End of PDLAPIV * END scalapack-1.8.0/SRC/pdgbtrf.f0000640000175000017500000011027610363532303015551 0ustar muammarmuammar SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) DOUBLE PRECISION A( * ), AF( * ), WORK( * ) * .. * * Purpose * ======= * * PDGBTRF computes a LU factorization * of an N-by-N real banded * distributed matrix * with bandwidth BWL, BWU: A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PDGBTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) Q = L U * * where U is a banded upper triangular matrix and L is banded * lower triangular, and P and Q are permutation matrices. * The matrix Q represents reordering of columns * for parallelism's sake, while P represents * reordering of rows for numerical stability using * classic partial pivoting. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed matrix * A(1:N, JA:JA+N-1) to be factored. * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDGBTRF and this is stored in AF. If a linear system * is to be solved using PDGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 1 * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian National University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW, $ CSRC, DBPTR, FIRST_PROC, I, I1, I2, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, J, JA_NEW, JPTR, $ L, LAF_MIN, LBWL, LBWU, LDB, LDBB, LLDA, LM, $ LMJ, LN, LNJ, LPTR, MYCOL, MYROW, MY_NUM_COLS, $ NB, NEICOL, NP, NPACT, NPCOL, NPROW, NPSTR, $ NP_SAVE, NRHS, ODD_N, ODD_SIZE, ODPTR, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ DGBTRF, DGEMM, DGER, DGERV2D, DGESD2D, DGETRF, $ DLACPY, DLASWP, DLATCPY, DSWAP, DTRRV2D, $ DTRSD2D, DTRSM, GLOBCHK, IGAMX2D, IGEBR2D, $ IGEBS2D, PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 6*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -11 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 6*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -2 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -3 END IF * IF( LLDA.LT.( 2*BWL+2*BWU+1 ) ) THEN INFO = -( 6*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 6*100+4 ) END IF * BW = BWU + BWL * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 6*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( NB+BWU )*( BWL+BWU ) + 6*( BWL+BWU )*( BWL+2*BWU ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -9 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 1 * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -11 * put minimum value of work into work( 1 ) WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = BWU PARAM_CHECK( 3, 1 ) = BWL PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 9, 2 ) = 605 PARAM_CHECK( 8, 2 ) = 604 PARAM_CHECK( 7, 2 ) = 603 PARAM_CHECK( 6, 2 ) = 601 PARAM_CHECK( 5, 2 ) = 5 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 11 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 210 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * DO 30 J = 1, ODD_SIZE DO 20 I = 1, BW A( I+( J-1 )*LLDA ) = ZERO 20 CONTINUE 30 CONTINUE * * Begin main code * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * * Transfer triangle B_i of local matrix to next processor * for fillin. Overlap the send with the factorization of A_i. * IF( MYCOL.LE.NPCOL-2 ) THEN * * The last processor does not need to send anything. * BIPTR = location of triangle B_i in memory BIPTR = ( NB-BW )*LLDA + 2*BW + 1 * CALL DTRSD2D( ICTXT, 'U', 'N', $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0, $ NPCOL ) ), BW, A( BIPTR ), LLDA-1, 0, MYCOL+1 ) * END IF * * Factor main partition P_i A_i = L_i U_i on each processor * * LBWL, LBWU: lower and upper bandwidth of local solver * Note that for MYCOL > 0 one has lower triangular blocks! * LM is the number of rows which is usually NB except for * MYCOL = 0 where it is BWU less and MYCOL=NPCOL-1 where it * is NR+BWU where NR is the number of columns on the last processor * Finally APTR is the pointer to the first element of A. As LAPACK * has a slightly different matrix format than Scalapack the pointer * has to be adjusted on processor MYCOL=0. * IF( MYCOL.NE.0 ) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1 + BWU END IF * IF( MYCOL.NE.NPCOL-1 ) THEN LM = NB - LBWU LN = NB - BW ELSE IF( MYCOL.NE.0 ) THEN LM = ODD_SIZE + BWU LN = MAX( ODD_SIZE-BW, 0 ) ELSE LM = N LN = MAX( N-BW, 0 ) END IF * IF( LN.GT.0 ) THEN * CALL DGBTRF( LM, LN, LBWL, LBWU, A( APTR ), LLDA, IPIV, INFO ) * IF( INFO.NE.0 ) THEN INFO = INFO + NB*MYCOL GO TO 80 END IF * NRHS = BW LDB = LLDA - 1 * * Update the last BW columns of A_i (code modified from DGBTRS) * * Only the eliminations of unknowns > LN-BW have an effect on * the last BW columns. Loop over them... * DO 40 J = MAX( LN-BW+1, 1 ), LN * LMJ = MIN( LBWL, LM-J ) LNJ = MIN( BW, J+BW-LN+APTR-1 ) * L = IPIV( J ) * JPTR = J - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA * IF( L.NE.J ) THEN * * Element (L,LN+1) is swapped with element (J,LN+1) etc * Furthermore, the elements in the same row are LDB=LLDA-1 apart * The complicated formulas are to cope with the banded * data format: * LPTR = L - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA * CALL DSWAP( LNJ, A( LPTR ), LDB, A( JPTR ), LDB ) * END IF * * LPTR is the pointer to the beginning of the * coefficients of L * LPTR = BW + 1 + APTR + ( J-1 )*LLDA * CALL DGER( LMJ, LNJ, -ONE, A( LPTR ), 1, A( JPTR ), LDB, $ A( JPTR+1 ), LDB ) 40 CONTINUE * END IF * * Compute spike fill-in, L_i F_i = P_i B_{i-1} * * Receive triangle B_{i-1} from previous processor * IF( MYCOL.GT.0 ) THEN CALL DTRRV2D( ICTXT, 'U', 'N', MIN( BW, LM ), BW, AF( 1 ), BW, $ 0, MYCOL-1 ) * * Transpose transmitted upper triangular (trapezoidal) matrix * DO 60 I2 = 1, MIN( BW, LM ) DO 50 I1 = I2 + 1, BW AF( I1+( I2-1 )*BW ) = AF( I2+( I1-1 )*BW ) AF( I2+( I1-1 )*BW ) = ZERO 50 CONTINUE 60 CONTINUE * * Permutation and forward elimination (triang. solve) * DO 70 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L.NE.J ) THEN CALL DSWAP( BW, AF( ( L-1 )*BW+1 ), 1, $ AF( ( J-1 )*BW+1 ), 1 ) END IF * LPTR = BW + 1 + APTR + ( J-1 )*LLDA * CALL DGER( NRHS, LMJ, -ONE, AF( ( J-1 )*BW+1 ), 1, $ A( LPTR ), 1, AF( J*BW+1 ), BW ) * 70 CONTINUE * END IF * 80 CONTINUE * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU BN = BW ELSE BM = MIN( BW, ODD_SIZE ) + BWU BN = MIN( BW, ODD_SIZE ) END IF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = ( NB+BWU )*BW + 1 LDBB = 2*BW + BWU * * Copy from A and AF into block bidiagonal matrix (tail of AF) * * DBPTR = Pointer to diagonal blocks in A DBPTR = BW + 1 + LBWU + LN*LLDA * CALL DLACPY( 'G', BM, BN, A( DBPTR ), LLDA-1, AF( BBPTR+BW*LDBB ), $ LDBB ) * * Zero out any junk entries that were copied * DO 100 J = 1, BM DO 90 I = J + LBWL, BM - 1 AF( BBPTR+BW*LDBB+( J-1 )*LDBB+I ) = ZERO 90 CONTINUE 100 CONTINUE * IF( MYCOL.NE.0 ) THEN * * ODPTR = Pointer to offdiagonal blocks in A * ODPTR = ( LM-BM )*BW + 1 CALL DLATCPY( 'G', BW, BM, AF( ODPTR ), BW, $ AF( BBPTR+2*BW*LDBB ), LDBB ) END IF * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. CALL DGETRF( N-LN, N-LN, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) * END IF * * Loop over levels ... only occurs if npcol > 1 * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) are used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels * 110 CONTINUE IF( NPACT.LE.1 ) $ GO TO 190 * * Test if processor is active * IF( MOD( MYCOL, NPSTR ).EQ.0 ) THEN * * Send/Receive blocks * * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * * This node will potentially do more work later * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW ELSE IF( NEICOL / NPSTR.EQ.NPACT-1 ) THEN ODD_N = NUMROC( N, NB, NPCOL-1, 0, NPCOL ) BMN = MIN( BW, ODD_N ) + BWU ELSE * * Last processor skips to next level GO TO 180 END IF * * BM1 = M for 1st block on proc pair, BM2 2nd block * BM1 = BM BM2 = BMN * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * CALL DGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ 0, NEICOL ) * CALL DGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BM ), LDBB, 0, $ NEICOL ) * IF( NPACT.EQ.2 ) THEN * * Copy diagonal block to align whole system * CALL DLACPY( 'G', BMN, BW, AF( BBPTR+BM ), LDBB, $ AF( BBPTR+2*BW*LDBB+BM ), LDBB ) END IF * END IF * ELSE * * This node stops work after this stage -- an extra copy * is required to make the odd and even frontal matrices * look identical * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * BM1 = BMN BM2 = BM * CALL DGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, 0, $ NEICOL ) * CALL DLACPY( 'G', BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ AF( BBPTR+BMN ), LDBB ) * DO 130 J = BBPTR + 2*BW*LDBB, BBPTR + 3*BW*LDBB - 1, LDBB DO 120 I = 0, LDBB - 1 AF( I+J ) = ZERO 120 CONTINUE 130 CONTINUE * CALL DGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, $ 0, NEICOL ) * IF( NPACT.EQ.2 ) THEN * * Copy diagonal block to align whole system * CALL DLACPY( 'G', BM, BW, AF( BBPTR+BMN ), LDBB, $ AF( BBPTR+2*BW*LDBB+BMN ), LDBB ) END IF * END IF * * LU factorization with partial pivoting * IF( NPACT.NE.2 ) THEN * CALL DGETRF( BM+BMN, BW, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) * * Backsolve left side * DO 150 J = BBPTR, BBPTR + BW*LDBB - 1, LDBB DO 140 I = 0, BM1 - 1 AF( I+J ) = ZERO 140 CONTINUE 150 CONTINUE * CALL DLASWP( BW, AF( BBPTR ), LDBB, 1, BW, IPIV( LN+1 ), 1 ) * CALL DTRSM( 'L', 'L', 'N', 'U', BW, BW, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, AF( BBPTR ), LDBB ) * * Use partial factors to update remainder * CALL DGEMM( 'N', 'N', BM+BMN-BW, BW, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, AF( BBPTR ), LDBB, $ ONE, AF( BBPTR+BW ), LDBB ) * * Backsolve right side * NRHS = BW * CALL DLASWP( NRHS, AF( BBPTR+2*BW*LDBB ), LDBB, 1, BW, $ IPIV( LN+1 ), 1 ) * CALL DTRSM( 'L', 'L', 'N', 'U', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB ) * * Use partial factors to update remainder * CALL DGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB, ONE, $ AF( BBPTR+2*BW*LDBB+BW ), LDBB ) * * * Test if processor is active in next round * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * * Reset BM * BM = BM1 + BM2 - BW * * Local copying in the block bidiagonal area * * CALL DLACPY( 'G', BM, BW, AF( BBPTR+BW ), LDBB, $ AF( BBPTR+BW*LDBB ), LDBB ) CALL DLACPY( 'G', BM, BW, AF( BBPTR+2*BW*LDBB+BW ), LDBB, $ AF( BBPTR+2*BW*LDBB ), LDBB ) * * Zero out space that held original copy * DO 170 J = 0, BW - 1 DO 160 I = 0, BM - 1 AF( BBPTR+2*BW*LDBB+BW+J*LDBB+I ) = ZERO 160 CONTINUE 170 CONTINUE * END IF * ELSE * * Factor the final 2 by 2 block matrix * CALL DGETRF( BM+BMN, BM+BMN, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), INFO ) END IF * END IF * * Last processor in an odd-sized NPACT skips to here * 180 CONTINUE * NPACT = ( NPACT+1 ) / 2 NPSTR = NPSTR*2 GO TO 110 * 190 CONTINUE * End loop over levels * 200 CONTINUE * If error was found in Phase 1, processors jump here. * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 210 CONTINUE * If this processor did not hold part of the grid it * jumps here. * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PDGBTRF * END scalapack-1.8.0/SRC/pslaqge.f0000640000175000017500000002325610363532303015556 0ustar muammarmuammar SUBROUTINE PSLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N REAL AMAX, COLCND, ROWCND * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), C( * ), R( * ) * .. * * Purpose * ======= * * PSLAQGE equilibrates a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) using the row and scaling * factors in the vectors R and C. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)) * containing on entry the M-by-N matrix sub( A ). On exit, * the equilibrated distributed matrix. See EQUED for the * form of the equilibrated distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * R (local input) REAL array, dimension LOCr(M_A) * The row scale factors for sub( A ). R is aligned with the * distributed matrix A, and replicated across every process * column. R is tied to the distributed matrix A. * * C (local input) REAL array, dimension LOCc(N_A) * The column scale factors of sub( A ). C is aligned with the * distributed matrix A, and replicated down every process * row. C is tied to the distributed matrix A. * * ROWCND (global input) REAL * The global ratio of the smallest R(i) to the largest R(i), * IA <= i <= IA+M-1. * * COLCND (global input) REAL * The global ratio of the smallest C(i) to the largest C(i), * JA <= j <= JA+N-1. * * AMAX (global input) REAL * Absolute value of largest distributed submatrix entry. * * EQUED (global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration * = 'R': Row equilibration, i.e., sub( A ) has been pre- * multiplied by diag(R(IA:IA+M-1)), * = 'C': Column equilibration, i.e., sub( A ) has been post- * multiplied by diag(C(JA:JA+N-1)), * = 'B': Both row and column equilibration, i.e., sub( A ) * has been replaced by * diag(R(IA:IA+M-1)) * sub( A ) * diag(C(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if row or column scaling * should be done based on the ratio of the row or column scaling * factors. If ROWCND < THRESH, row scaling is done, and if * COLCND < THRESH, column scaling is done. * * LARGE and SMALL are threshold values used to decide if row scaling * should be done based on the absolute size of the largest matrix * element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, THRESH PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IIA, IOFFA, $ IROFF, J, JJA, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. INTEGER NUMROC REAL PSLAMCH EXTERNAL NUMROC, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PSLAMCH( ICTXT, 'Safe minimum' ) / $ PSLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) $ THEN * * No row scaling * IF( COLCND.GE.THRESH ) THEN * * No column scaling * EQUED = 'N' * ELSE * * Column scaling * IOFFA = (JJA-1)*LDA DO 20 J = JJA, JJA+NQ-1 CJ = C( J ) DO 10 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*A( IOFFA + I ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE EQUED = 'C' END IF * ELSE IF( COLCND.GE.THRESH ) THEN * * Row scaling, no column scaling * IOFFA = (JJA-1)*LDA DO 40 J = JJA, JJA+NQ-1 DO 30 I = IIA, IIA+MP-1 A( IOFFA + I ) = R( I )*A( IOFFA + I ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE EQUED = 'R' * ELSE * * Row and column scaling * IOFFA = (JJA-1)*LDA DO 60 J = JJA, JJA+NQ-1 CJ = C( J ) DO 50 I = IIA, IIA+MP-1 A( IOFFA + I ) = CJ*R( I )*A( IOFFA + I ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE EQUED = 'B' * END IF * RETURN * * End of PSLAQGE * END scalapack-1.8.0/SRC/pzgetrf.f0000640000175000017500000002645110363532303015603 0ustar muammarmuammar SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZGETRF computes an LU factorization of a general M-by-N distributed * matrix sub( A ) = (IA:IA+M-1,JA:JA+N-1) using partial pivoting with * row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal ele- * ments (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). L and U are stored in sub( A ). * * This is the right-looking Parallel Level 3 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be factored. On exit, this * array contains the local pieces of the factors L and U from * the factorization sub( A ) = P*L*U; the unit diagonal ele- * ments of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, COLCTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZGEMM, PZGETF2, $ PZLASWP, PZTRSM * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUM1, $ IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGETRF', -INFO ) RETURN END IF * * Quick return if possible * IF( DESCA( M_ ).EQ.1 ) THEN IPIV( 1 ) = 1 RETURN ELSE IF( M.EQ.0 .OR. N.EQ.0 ) THEN RETURN END IF * * Split-ring topology for the communication along process rows * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', ' ' ) * * Handle the first block of columns separately * MN = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PZGETF2( M, JB, A, IA, JA, DESCA, IPIV, INFO ) * IF( JB+1.LE.N ) THEN * * Apply interchanges to columns JN+1:JA+N-1. * CALL PZLASWP( 'Forward', 'Rows', N-JB, A, IA, JN+1, DESCA, $ IA, IN, IPIV ) * * Compute block row of U. * CALL PZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-JB, ONE, A, IA, JA, DESCA, A, IA, JN+1, DESCA ) * IF( JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PZGEMM( 'No transpose', 'No transpose', M-JB, N-JB, JB, $ -ONE, A, IN+1, JA, DESCA, A, IA, JN+1, DESCA, $ ONE, A, IN+1, JN+1, DESCA ) * END IF END IF * * Loop over the remaining blocks of columns. * DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PZGETF2( M-J+JA, JB, A, I, J, DESCA, IPIV, IINFO ) * IF( INFO.EQ.0 .AND. IINFO.GT.0 ) $ INFO = IINFO + J - JA * * Apply interchanges to columns JA:J-JA. * CALL PZLASWP( 'Forward', 'Rowwise', J-JA, A, IA, JA, DESCA, $ I, I+JB-1, IPIV ) * IF( J-JA+JB+1.LE.N ) THEN * * Apply interchanges to columns J+JB:JA+N-1. * CALL PZLASWP( 'Forward', 'Rowwise', N-J-JB+JA, A, IA, J+JB, $ DESCA, I, I+JB-1, IPIV ) * * Compute block row of U. * CALL PZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-J-JB+JA, ONE, A, I, J, DESCA, A, I, J+JB, $ DESCA ) * IF( J-JA+JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PZGEMM( 'No transpose', 'No transpose', M-J-JB+JA, $ N-J-JB+JA, JB, -ONE, A, I+JB, J, DESCA, A, $ I, J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * END IF END IF * 10 CONTINUE * IF( INFO.EQ.0 ) $ INFO = MN + 1 CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, INFO, 1, IDUM1, IDUM2, $ -1, -1, MYCOL ) IF( INFO.EQ.MN+1 ) $ INFO = 0 * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * RETURN * * End of PZGETRF * END scalapack-1.8.0/SRC/pslange.f0000640000175000017500000002661410363532303015554 0ustar muammarmuammar REAL FUNCTION PSLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLANGE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * distributed matrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1). * * PSLANGE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+M-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PSLANGE as described * above. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PSLANGE * is set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PSLANGE is set to zero. N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, ICOFF, IOFFA, $ IROFF, J, JJ, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSTREECOMB, $ SCOMBSSQ, SGEBR2D, SGEBS2D, $ SGAMX2D, SGSUM2D, SLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ISAMAX, NUMROC EXTERNAL LSAME, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO IF( NQ.GT.0 .AND. MP.GT.0 ) THEN IOFFA = (JJ-1)*LDA DO 20 J = JJ, JJ+NQ-1 DO 10 I = II, MP+II-1 VALUE = MAX( VALUE, ABS( A( IOFFA+I ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE END IF CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, J, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * * Find norm1( sub( A ) ). * IF( NQ.GT.0 ) THEN IOFFA = ( JJ - 1 ) * LDA DO 40 J = JJ, JJ+NQ-1 SUM = ZERO IF( MP.GT.0 ) THEN DO 30 I = II, MP+II-1 SUM = SUM + ABS( A( IOFFA+I ) ) 30 CONTINUE END IF IOFFA = IOFFA + LDA WORK( J-JJ+1 ) = SUM 40 CONTINUE END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, J, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * * Find normI( sub( A ) ). * IF( MP.GT.0 ) THEN IOFFA = II + ( JJ - 1 ) * LDA DO 60 I = II, II+MP-1 SUM = ZERO IF( NQ.GT.0 ) THEN DO 50 J = IOFFA, IOFFA + NQ*LDA - 1, LDA SUM = SUM + ABS( A( J ) ) 50 CONTINUE END IF WORK( I-II+1 ) = SUM IOFFA = IOFFA + 1 60 CONTINUE END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for supnorm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( ISAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, I, $ J, -1, 0, 0 ) END IF * ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE IOFFA = II + ( JJ - 1 ) * LDA IF( NQ.GT.0 ) THEN DO 70 J = IOFFA, IOFFA + NQ*LDA - 1, LDA CALL SLASSQ( MP, A( J ), 1, SCALE, SUM ) 70 CONTINUE END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PSLANGE = VALUE * RETURN * * End of PSLANGE * END scalapack-1.8.0/SRC/pztrtrs.f0000640000175000017500000003144110363532303015645 0ustar muammarmuammar SUBROUTINE PZTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZTRTRS solves a triangular system of the form * * sub( A ) * X = sub( B ) or sub( A )**T * X = sub( B ) or * * sub( A )**H * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a triangular * distributed matrix of order N, and B(IB:IB+N-1,JB:JB+NRHS-1) is an * N-by-NRHS distributed matrix denoted by sub( B ). A check is made * to verify that sub( A ) is nonsingular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': Solve sub( A ) * X = sub( B ) (No transpose) * = 'T': Solve sub( A )**T * X = sub( B ) (Transpose) * = 'C': Solve sub( A )**H * X = sub( B ) (Conjugate transpose) * * DIAG (global input) CHARACTER * = 'N': sub( A ) is non-unit triangular; * = 'U': sub( A ) is unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed matrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1) ). This array * contains the local pieces of the distributed triangular * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of sub( A ) contains the lower triangular * matrix, and the strictly upper triangular part of sub( A ) * is not referenced. If DIAG = 'U', the diagonal elements of * sub( A ) are also not referenced and are assumed to be 1. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the right hand side distributed matrix * sub( B ). On exit, if INFO = 0, sub( B ) is overwritten by * the solution matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = i, the i-th diagonal element of sub( A ) is * zero, indicating that the submatrix is singular and the * solutions X have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOTRAN, NOUNIT, UPPER INTEGER I, IAROW, IBROW, ICOFFA, ICTXT, ICURCOL, $ ICURROW, IROFFA, IROFFB, IDUM, II, IOFFA, J, $ JBLK, JJ, JN, LDA, LL, MYCOL, MYROW, NPCOL, $ NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK2MAT, PXERBLA, PZTRSM * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL ICEIL, INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) NOTRAN = LSAME( TRANS, 'N' ) * CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, NRHS, 5, IB, JB, DESCB, 13, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN INFO = -8 ELSE IF( IROFFA.NE.IROFFB .OR. IAROW.NE.IBROW ) THEN INFO = -11 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -904 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -1304 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM1( 2 ) = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IF( NOUNIT ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'D' ) END IF IDUM2( 3 ) = 3 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, NRHS, 5, $ IB, JB, DESCB, 13, 3, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTRTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first block separately * JBLK = JN-JA+1 IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 10 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 LL = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JBLK = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN LL = IOFFA DO 20 I = 0, JBLK-1 IF( A( LL ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 LL = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JBLK IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JBLK*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Solve A * x = b, A**T * x = b, or A**H * x = b. * CALL PZTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * RETURN * * End of PZTRTRS * END scalapack-1.8.0/SRC/clamsh.f0000640000175000017500000002272310363532303015367 0ustar muammarmuammar SUBROUTINE CLAMSH( S, LDS, NBULGE, JBLK, H, LDH, N, ULP ) * * -- ScaLAPACK routine (version 1.7) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * May 28, 1999 * * .. Scalar Arguments .. INTEGER JBLK, LDH, LDS, N, NBULGE REAL ULP * .. * .. Array Arguments .. COMPLEX H( LDH, * ), S( LDS, * ) * .. * * Purpose * ======= * * CLAMSH sends multiple shifts through a small (single node) matrix to * see how consecutive small subdiagonal elements are modified by * subsequent shifts in an effort to maximize the number of bulges * that can be sent through. * CLAMSH should only be called when there are multiple shifts/bulges * (NBULGE > 1) and the first shift is starting in the middle of an * unreduced Hessenberg matrix because of two or more consecutive * small subdiagonal elements. * * Arguments * ========= * * S (local input/output) COMPLEX array, ( LDS,* ) * On entry, the matrix of shifts. Only the 2x2 diagonal of S * is referenced. It is assumed that S has JBLK double shifts * (size 2). * On exit, the data is rearranged in the best order for * applying. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * 1 < NBULGE <= JBLK <= LDS/2 * * NBULGE (local input/output) INTEGER * On entry, the number of bulges to send through H ( >1 ). * NBULGE should be less than the maximum determined (JBLK). * 1 < NBULGE <= JBLK <= LDS/2 * On exit, the maximum number of bulges that can be sent * through. * * JBLK (local input) INTEGER * On entry, the number of shifts determined for S. * Unchanged on exit. * * H (local input/output) COMPLEX array ( LDH,N ) * On entry, the local matrix to apply the shifts on. * H should be aligned so that the starting row is 2. * On exit, the data is destroyed. * * LDH (local input) INTEGER * On entry, the leading dimension of H. Unchanged on exit. * * N (local input) INTEGER * On entry, the size of H. If all the bulges are expected to * go through, N should be at least 4*NBULGE+2. * Otherwise, NBULGE may be reduced by this routine. * * ULP (local input) REAL * On entry, machine precision * Unchanged on exit. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. REAL RONE, TEN PARAMETER ( RONE = 1.0E+0, TEN = 10.0E+0 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IBULGE, IVAL, J, K, M, NR REAL DVAL, S1, TST1 COMPLEX CDUM, H00, H10, H11, H12, H21, H22, H33, H33S, $ H43H34, H44, H44S, SUM, T1, T2, T3, V1, V2, V3 * .. * .. Local Arrays .. COMPLEX V( 3 ) * .. * .. External Subroutines .. EXTERNAL CCOPY, CLARFG * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CONJG, AIMAG, MAX, MIN * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * M = 2 DO 50 IBULGE = 1, NBULGE H44 = S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) H33 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) H43H34 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 )* $ S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).GT.ULP*TST1 ) THEN * Find minimum DVAL = ( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ) ) / $ ( ULP*TST1 ) IVAL = IBULGE DO 10 I = IBULGE + 1, NBULGE H44 = S( 2*JBLK-2*I+2, 2*JBLK-2*I+2 ) H33 = S( 2*JBLK-2*I+1, 2*JBLK-2*I+1 ) H43H34 = S( 2*JBLK-2*I+1, 2*JBLK-2*I+2 )* $ S( 2*JBLK-2*I+2, 2*JBLK-2*I+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( ( DVAL.GT.( CABS1( H10 )*( CABS1( V2 )+ $ CABS1( V3 ) ) ) / ( ULP*TST1 ) ) .AND. $ ( DVAL.GT.RONE ) ) THEN DVAL = ( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ) ) / $ ( ULP*TST1 ) IVAL = I END IF 10 CONTINUE IF( ( DVAL.LT.TEN ) .AND. ( IVAL.NE.IBULGE ) ) THEN H44 = S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2 ) H33 = S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+1 ) H43H34 = S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+2 ) H10 = S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1 ) S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2 ) = S( 2*JBLK-2* $ IBULGE+2, 2*JBLK-2*IBULGE+2 ) S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+1 ) = S( 2*JBLK-2* $ IBULGE+1, 2*JBLK-2*IBULGE+1 ) S( 2*JBLK-2*IVAL+1, 2*JBLK-2*IVAL+2 ) = S( 2*JBLK-2* $ IBULGE+1, 2*JBLK-2*IBULGE+2 ) S( 2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1 ) = S( 2*JBLK-2* $ IBULGE+2, 2*JBLK-2*IBULGE+1 ) S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) = H44 S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) = H33 S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 ) = H43H34 S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) = H10 END IF H44 = S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2 ) H33 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+1 ) H43H34 = S( 2*JBLK-2*IBULGE+1, 2*JBLK-2*IBULGE+2 )* $ S( 2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1 ) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) END IF IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).GT.TEN*ULP*TST1 ) $ THEN * IBULGE better not be 1 here or we have a bug! NBULGE = MAX( IBULGE-1, 1 ) RETURN END IF DO 40 K = M, N - 1 NR = MIN( 3, N-K+1 ) IF( K.GT.M ) $ CALL CCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL CLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.N-1 ) $ H( K+2, K-1 ) = ZERO ELSE * H(m,m-1) must be updated, * H( K, K-1 ) = H( K, K-1 ) - CONJG( T1 )*H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 DO 20 J = K, N SUM = CONJG( T1 )*H( K, J ) + $ CONJG( T2 )*H( K+1, J ) + $ CONJG( T3 )*H( K+2, J ) H( K, J ) = H( K, J ) - SUM H( K+1, J ) = H( K+1, J ) - SUM*V2 H( K+2, J ) = H( K+2, J ) - SUM*V3 20 CONTINUE DO 30 J = 1, MIN( K+3, N ) SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) H( J, K+2 ) = H( J, K+2 ) - SUM*CONJG( V3 ) 30 CONTINUE END IF 40 CONTINUE 50 CONTINUE * RETURN * * End of CLAMSH * END scalapack-1.8.0/SRC/psgeql2.f0000640000175000017500000003002510363532303015467 0ustar muammarmuammar SUBROUTINE PSGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQL2 computes a QL factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ REAL AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PSELSET, PSLARF, PSLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA, SGEBR2D, SGEBS2D, $ SLARFG, SSCAL * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IACOL = INDXG2P( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN I = II+(JJ+NQ-2)*DESCA( LLD_ ) AJJ = A( I ) CALL SLARFG( 1, AJJ, A( I ), 1, TAU( JJ+NQ-1 ) ) IF( N.GT.1 ) THEN ALPHA = ONE - TAU( JJ+NQ-1 ) CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL SSCAL( NQ-1, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL SSCAL( NQ, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1, IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA+K-1, JA, -1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(ia:i+m-k-1,j+n-k) * CALL PSLARFG( M-K+I-IA+1, AJJ, M-K+I, N-K+J, A, IA, $ N-K+J, DESCA, 1, TAU ) * * Apply H(j) to A(ia:i+m-k,ja:j+n-k-1) from the left * CALL PSELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PSLARF( 'Left', M-K+I-IA+1, N-K+J-JA, A, IA, N-K+J, $ DESCA, 1, TAU, A, IA, JA, DESCA, WORK ) CALL PSELSET( A, I+M-K, J+N-K, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEQL2 * END scalapack-1.8.0/SRC/pcung2r.f0000640000175000017500000002576510363532303015511 0ustar muammarmuammar SUBROUTINE PCUNG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNG2R generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PCGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PCGEQRF in the K columns of its array * argument A(IA:*,JA:JA+K-1). On exit, this array contains * the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PCGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, KQ, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLARF, PCLASET, PCSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja+k:ja+n-1 to columns of the unit matrix * CALL PCLASET( 'All', K, N-K, ZERO, ZERO, A, IA, JA+K, DESCA ) CALL PCLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, DESCA ) * TAUJ = ZERO KQ = MAX( 1, NUMROC( JA+K-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) DO 10 J = JA+K-1, JA, -1 * * Apply H(j) to A(ia+j-ja:ia+m-1,j:ja+n-1) from the left * IF( J.LT.JA+N-1 ) THEN CALL PCELSET( A, IA+J-JA, J, DESCA, ONE ) CALL PCLARF( 'Left', M-J+JA, JA+N-J-1, A, IA+J-JA, J, DESCA, $ 1, TAU, A, IA+J-JA, J+1, DESCA, WORK ) END IF * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, KQ ) ) IF( J-JA.LT.M-1 ) $ CALL PCSCAL( M-J+JA-1, -TAUJ, A, IA+J-JA+1, J, DESCA, 1 ) CALL PCELSET( A, IA+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia:ia+j-ja-1,j) to zero * CALL PCLASET( 'All', J-JA, 1, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNG2R * END scalapack-1.8.0/SRC/pchegvx.f0000640000175000017500000010575310377154001015571 0ustar muammarmuammar SUBROUTINE PCHEGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, $ IFAIL, ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, $ LIWORK, LRWORK, LWORK, M, N, NZ REAL ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. * INTEGER DESCA( * ), DESCB( * ), DESCZ( * ), $ ICLUSTR( * ), IFAIL( * ), IWORK( * ) REAL GAP( * ), RWORK( * ), W( * ) COMPLEX A( * ), B( * ), WORK( * ), Z( * ) * .. * * Purpose * * ======= * * PCHEGVX computes all the eigenvalues, and optionally, * the eigenvectors * of a complex generalized Hermitian-definite eigenproblem, of the form * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or * sub( B )*sub( A )*x=(lambda)*x. * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be * Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed * to be Hermitian positive definite. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * IBTYPE (global input) INTEGER * Specifies the problem type to be solved: * = 1: sub( A )*x = (lambda)*sub( B )*x * = 2: sub( A )*sub( B )*x = (lambda)*x * = 3: sub( B )*sub( A )*x = (lambda)*x * * JOBZ (global input) CHARACTER*1 * = 'N': Compute eigenvalues only; * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored; * = 'L': Lower triangles of sub( A ) and sub( B ) are stored. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix. * * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains * the distributed matrix Z of eigenvectors. The eigenvectors * are normalized as follows: * if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I; * if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I. * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') * or the lower triangle (if UPLO='L') of sub( A ), including * the diagonal, is destroyed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PCHEGVX cannot guarantee * correct error reporting. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( B ) contains * the upper triangular part of the matrix. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( B ) contains * the lower triangular part of the matrix. * * On exit, if INFO <= N, the part of sub( B ) containing the * matrix is overwritten by the triangular factor U or L from * the Cholesky factorization sub( B ) = U**H*U or * sub( B ) = L*L**H. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * DESCB( CTXT_ ) must equal DESCA( CTXT_ ) * * VL (global input) REAL * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) REAL * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) REAL * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PSLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PSLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PCHEGVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PCHEGVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) REAL * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * The row index in the global array Z indicating the first * row of sub( Z ). * * JZ (global input) INTEGER * The column index in the global array Z indicating the * first column of sub( Z ). * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX array, * dimension (LWORK) * WORK(1) returns the optimal workspace. * * LWORK (local input) INTEGER * Size of WORK array. If only eigenvalues are requested: * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) * If eigenvectors are requested: * LWORK >= N + ( NP0 + MQ0 + NB ) * NB * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). * * For optimal performance, greater workspace is needed, i.e. * LWORK >= MAX( LWORK, N + NHETRD_LWOPT, * NHEGST_LWOPT ) * Where LWORK is as defined above, and * NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 1 ) * NPS * NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB * * NB = DESCA( MB_ ) * NP0 = NUMROC( N, NB, 0, 0, NPROW ) * NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) * ICTXT = DESCA( CTXT_ ) * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) * SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the optimal * size for all work arrays. Each of these values is returned * in the first entry of the correspondingwork array, and no * error message is issued by PXERBLA. * * RWORK (local workspace/output) REAL array, * dimension max(3,LRWORK) * On return, RWORK(1) contains the amount of workspace * required for optimal efficiency * if JOBZ='N' RWORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' RWORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required when computing optimal workspace. * * LRWORK (local input) INTEGER * Size of RWORK * See below for definitions of variables used to define LRWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LRWORK >= 5 * NN + 4 * N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LRWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = * DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LRWORK is too small: * If LRWORK is too small to guarantee orthogonality, * PCHEGVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LRWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-25 * is returned. Note that when RANGE='V', PCHEGVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LRWORK is large enough to allow PCHEGVX to * compute the eigenvalues, PCHEGVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PCSTEIN will perform no better than CSTEIN on 1 processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (output) INTEGER array, dimension (N) * IFAIL provides additional information when INFO .NE. 0 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the * indices of the eigenvectors that failed to converge. * * If neither of the above error conditions hold and JOBZ = 'V', * then the first M elements of IFAIL are set to zero. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) REAL array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PCHEGVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to * compute eigenvalues. * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/16,2).NE.0), then B was not positive * definite. IFAIL(1) indicates the order of * the smallest minor which is not positive definite. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1), * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties, * namely the following expressions should be true: * * DESCA(MB_) = DESCA(NB_) * IA = IB = IZ * JA = IB = JZ * DESCA(M_) = DESCB(M_) =DESCZ(M_) * DESCA(N_) = DESCB(N_)= DESCZ(N_) * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_) * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_) * MOD( IA-1, DESCA( MB_ ) ) = 0 * MOD( JA-1, DESCA( NB_ ) ) = 0 * MOD( IB-1, DESCB( MB_ ) ) = 0 * MOD( JB-1, DESCB( NB_ ) ) = 0 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) REAL FIVE, ZERO PARAMETER ( FIVE = 5.0E+0, ZERO = 0.0E+0 ) INTEGER IERRNPD PARAMETER ( IERRNPD = 16 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ CHARACTER TRANS INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA, $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LRWMIN, $ LRWOPT, LWMIN, LWOPT, MQ0, MYCOL, MYROW, NB, $ NEIG, NHEGST_LWOPT, NHETRD_LWOPT, NN, NP0, $ NPCOL, NPROW, NPS, NQ0, SQNPC REAL EPS, SCALE * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV REAL PSLAMCH EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHEEVX, PCHENGST, $ PCHK1MAT, PCHK2MAT, PCPOTRF, PCTRMM, PCTRSM, $ PXERBLA, SGEBR2D, SGEBS2D, SSCAL * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, DBLE, ICHAR, INT, MAX, MIN, MOD, $ REAL, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 900+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2600+CTXT_ ) ELSE * * Get machine constants. * EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) * WANTZ = LSAME( JOBZ, 'V' ) UPPER = LSAME( UPLO, 'U' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN RWORK( 1 ) = ABSTOL IF( VALEIG ) THEN RWORK( 2 ) = VL RWORK( 3 ) = VU ELSE RWORK( 2 ) = ZERO RWORK( 3 ) = ZERO END IF CALL SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, $ 3 ) ELSE CALL SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, 3, $ 0, 0 ) END IF IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) $ LQUERY = .TRUE. * LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) * NB = DESCA( MB_ ) NN = MAX( N, NB, 2 ) NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = N + MAX( NB*( NP0+1 ), 3 ) LWOPT = LWMIN LRWMIN = 5*NN + 4*N IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LRWOPT = 4*N + MAX( 5*NN, NP0*MQ0 ) ELSE LRWOPT = LRWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = N + ( NP0+MQ0+NB )*NB LWOPT = LWMIN LRWMIN = 4*N + MAX( 5*NN, NP0*MQ0 ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LRWOPT = LRWMIN * END IF * * Compute how much workspace is needed to use the * new TRD and GST algorithms * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS NB = DESCA( MB_ ) NP0 = NUMROC( N, NB, 0, 0, NPROW ) NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB LWOPT = MAX( LWOPT, N+NHETRD_LWOPT, NHEGST_LWOPT ) * * Version 1.0 Limitations * IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -2 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -3 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -4 ELSE IF( N.LT.0 ) THEN INFO = -5 ELSE IF( IROFFA.NE.0 ) THEN INFO = -7 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -8 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 900+NB_ ) ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN INFO = -( 1300+M_ ) ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN INFO = -( 1300+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1300+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -( 1300+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN INFO = -( 1300+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN INFO = -( 1300+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2200+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2200+CSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2200+CTXT_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -11 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -12 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -15 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -16 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -17 ELSE IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -14 ELSE IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -15 ELSE IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* $ ABS( ABSTOL ) ) THEN INFO = -18 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -32 END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( WANTZ ) THEN IDUM1( 2 ) = ICHAR( 'V' ) ELSE IDUM1( 2 ) = ICHAR( 'N' ) END IF IDUM2( 2 ) = 2 IF( UPPER ) THEN IDUM1( 3 ) = ICHAR( 'U' ) ELSE IDUM1( 3 ) = ICHAR( 'L' ) END IF IDUM2( 3 ) = 3 IF( ALLEIG ) THEN IDUM1( 4 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 4 ) = ICHAR( 'I' ) ELSE IDUM1( 4 ) = ICHAR( 'V' ) END IF IDUM2( 4 ) = 4 IF( LQUERY ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 5 CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, $ JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, $ INFO ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = CMPLX( REAL( LWOPT ) ) RWORK( 1 ) = REAL( LRWOPT ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHEGVX ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Form a Cholesky factorization of sub( B ). * CALL PCPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) IF( INFO.NE.0 ) THEN IWORK( 1 ) = LIWMIN WORK( 1 ) = CMPLX( REAL( LWOPT ) ) RWORK( 1 ) = REAL( LRWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD RETURN END IF * * Transform problem to standard eigenvalue problem and solve. * CALL PCHENGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) CALL PCHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, $ IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR, $ GAP, INFO ) * IF( WANTZ ) THEN * * Backtransform eigenvectors to the original problem. * NEIG = M IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN * * For sub( A )*x=(lambda)*sub( B )*x and * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors: * x = inv(L)'*y or inv(U)*y * IF( UPPER ) THEN TRANS = 'N' ELSE TRANS = 'C' END IF * CALL PCTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) * ELSE IF( IBTYPE.EQ.3 ) THEN * * For sub( B )*sub( A )*x=(lambda)*x; * backtransform eigenvectors: x = L*y or U'*y * IF( UPPER ) THEN TRANS = 'C' ELSE TRANS = 'N' END IF * CALL PCTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, $ B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) END IF END IF * IF( SCALE.NE.ONE ) THEN CALL SSCAL( N, SCALE, W, 1 ) END IF * IWORK( 1 ) = LIWMIN WORK( 1 ) = CMPLX( REAL( LWOPT ) ) RWORK( 1 ) = REAL( LRWOPT ) RETURN * * End of PCHEGVX * END scalapack-1.8.0/SRC/pdorm2l.f0000640000175000017500000004047210363532303015500 0ustar muammarmuammar SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORM2L overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PDGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PDGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PDGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 DOUBLE PRECISION AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DGEBR2D, $ DGEBS2D, DGERV2D, DGESD2D, DSCAL, $ INFOG2L, PDELSET, PDELSET2, PDLARF, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL DSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL DSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN AJJ = ONE - TAU( JJ ) END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL DGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL DGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL DSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * DO 10 J = J1, J2, J3 * IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic:ic+m-k+j-ja,jc:jc+n-1) * MI = M - K + J - JA + 1 ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc:jc+n-k+j-ja) * NI = N - K + J - JA + 1 END IF * * Apply H(j) or H(j)' * CALL PDELSET2( AJJ, A, IA+NQ-K+J-JA, J, DESCA, ONE ) CALL PDLARF( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, IC, $ JC, DESCC, WORK ) CALL PDELSET( A, IA+NQ-K+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORM2L * END scalapack-1.8.0/SRC/pclacp3.f0000640000175000017500000003023010363532303015435 0ustar muammarmuammar SUBROUTINE PCLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, II, JJ, LDB, M, REV * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), B( LDB, * ) * .. * * Purpose * ======= * * PCLACP3 is an auxiliary routine that copies from a global parallel * array into a local replicated array or vise versa. Notice that * the entire submatrix that is copied gets placed on one node or * more. The receiving node can be specified precisely, or all nodes * can receive, or just one row or column of nodes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * M is the order of the square submatrix that is copied. * M >= 0. * Unchanged on exit * * I (global input) INTEGER * A(I,I) is the global location that the copying starts from. * Unchanged on exit. * * A (global input/output) COMPLEX array, dimension * (DESCA(LLD_),*) * On entry, the parallel matrix to be copied into or from. * On exit, if REV=1, the copied data. * Unchanged on exit if REV=0. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/output) COMPLEX array of size (LDB,M) * If REV=0, this is the global portion of the array * A(I:I+M-1,I:I+M-1). * If REV=1, this is the unchanged on exit. * * LDB (local input) INTEGER * The leading dimension of B. * * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) * receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data * If II>=0,JJ=-1, then all cols in row II receive the data * If II=-1,JJ=-1, then all nodes receive the data * If REV<>0, then II is the source row index for the node(s) * sending the replicated B. * * JJ (global input) INTEGER * Similar description as II above * * REV (global input) INTEGER * Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in * A) into the global A. * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN, $ III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP, $ JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG1L, CGEBR2D, CGEBS2D, $ CGERV2D, CGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.LE.0 ) $ RETURN * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M DO 10 IDJ = 1, M B( IDI, IDJ ) = ZERO 10 CONTINUE 20 CONTINUE END IF * IFIN = I + M - 1 * IF( MOD( I+HBL, HBL ).NE.0 ) THEN ISTOP = MIN( I+HBL-MOD( I+HBL, HBL ), IFIN ) ELSE ISTOP = I END IF IDJ = I ISTOPJ = ISTOP IF( IDJ.LE.IFIN ) THEN 30 CONTINUE IDI = I ISTOPI = ISTOP IF( IDI.LE.IFIN ) THEN 40 CONTINUE ROW = MOD( ( IDI-1 ) / HBL, NPROW ) COL = MOD( ( IDJ-1 ) / HBL, NPCOL ) CALL INFOG1L( IDI, HBL, NPROW, ROW, 0, IROW1, ITMP ) IROW2 = NUMROC( ISTOPI, HBL, ROW, 0, NPROW ) CALL INFOG1L( IDJ, HBL, NPCOL, COL, 0, ICOL1, ITMP ) ICOL2 = NUMROC( ISTOPJ, HBL, COL, 0, NPCOL ) IF( ( MYROW.EQ.ROW ) .AND. ( MYCOL.EQ.COL ) ) THEN IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to everyone * IF( REV.EQ.0 ) THEN CALL CGEBS2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.NE.-1 ) ) THEN * * Send the message to Column MYCOL which better be JJ * IF( REV.EQ.0 ) THEN CALL CGEBS2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to Row MYROW which better be II * IF( REV.EQ.0 ) THEN CALL CGEBS2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.NE.-1 ) .AND. $ ( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) ) THEN * * Recv/Send the message to (II,JJ) * IF( REV.EQ.0 ) THEN CALL CGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ A( ( ICOL1-1 )*LDA+IROW1 ), LDA, II, $ JJ ) ELSE CALL CGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, II, JJ ) END IF END IF IF( REV.EQ.0 ) THEN DO 60 JJJ = ICOL1, ICOL2 DO 50 III = IROW1, IROW2 B( IDI+III-IROW1+1-I, IDJ+JJJ-ICOL1+1-I ) $ = A( ( JJJ-1 )*LDA+III ) 50 CONTINUE 60 CONTINUE ELSE DO 80 JJJ = ICOL1, ICOL2 DO 70 III = IROW1, IROW2 A( ( JJJ-1 )*LDA+III ) = B( IDI+III-IROW1+1-I, $ IDJ+JJJ-ICOL1+1-I ) 70 CONTINUE 80 CONTINUE END IF ELSE IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL CGEBR2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL CGEBR2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL CGEBR2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL CGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) ELSE CALL CGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) * CALL CGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, * $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL) END IF END IF END IF IDI = ISTOPI + 1 ISTOPI = MIN( ISTOPI+HBL, IFIN ) IF( IDI.LE.IFIN ) $ GO TO 40 END IF IDJ = ISTOPJ + 1 ISTOPJ = MIN( ISTOPJ+HBL, IFIN ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF RETURN * * End of PCLACP3 * END scalapack-1.8.0/SRC/psgeqlf.f0000640000175000017500000002725110363532303015562 0ustar muammarmuammar SUBROUTINE PSGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSGEQLF computes a QL factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( Mp0 + Nq0 + NB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, IINFO, ICTXT, IPW, J, JB, JL, JN, $ K, LWMIN, MP0, MU, MYCOL, MYROW, NPCOL, NPROW, $ NQ0, NU * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PSGEQL2, $ PSLARFB, PSLARFT, PB_TOPGET, PB_TOPSET, $ PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MP0 + NQ0 + DESCA( NB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSGEQLF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JL = MAX( ( (JA+N-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( JL.GE.JN+1 ) THEN * * Use blocked code initially * DO 10 J = JL, JN+1, -DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * * Compute the QL factorization of the current block * A(ia:ia+m-n+j+jb-ja-1,j:j+jb-1) * CALL PSGEQL2( M-N+J+JB-JA, JB, A, IA, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * IF( J.GT.JA ) THEN * * Form the triangular factor of the block reflector * H = H(j+jb-1) . . . H(j+1) H(j) * CALL PSLARFT( 'Backward', 'Columnwise', M-N+J+JB-JA, JB, $ A, IA, J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:ia+m-n+j+jb-ja-1,ja:j-1) from the * left * CALL PSLARFB( 'Left', 'Transpose', 'Backward', $ 'Columnwise', M-N+J+JB-JA, J-JA, JB, A, IA, $ J, DESCA, WORK, A, IA, JA, DESCA, $ WORK( IPW ) ) END IF * 10 CONTINUE * MU = M - N + JN - JA + 1 NU = JN - JA + 1 * ELSE * MU = M NU = N * END IF * * Use unblocked code to factor the last or only block * IF( MU.GT.0 .AND. NU.GT.0 ) $ CALL PSGEQL2( MU, NU, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSGEQLF * END scalapack-1.8.0/SRC/pslabad.f0000640000175000017500000000474110363532303015526 0ustar muammarmuammar SUBROUTINE PSLABAD( ICTXT, SMALL, LARGE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT REAL LARGE, SMALL * .. * * Purpose * ======= * * PSLABAD takes as input the values computed by PSLAMCH for underflow * and overflow, and returns the square root of each of these values if * the log of LARGE is sufficiently large. This subroutine is intended * to identify machines with a large exponent range, such as the Crays, * and redefine the underflow and overflow limits to be the square roots * of the values computed by PSLAMCH. This subroutine is needed because * PSLAMCH does not compensate for poor arithmetic in the upper half of * the exponent range, as is found on a Cray. * * In addition, this routine performs a global minimization and maximi- * zation on these values, to support heterogeneous computing networks. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle in which the computation takes * place. * * SMALL (local input/local output) REAL * On entry, the underflow threshold as computed by PSLAMCH. * On exit, if LOG10(LARGE) is sufficiently large, the square * root of SMALL, otherwise unchanged. * * LARGE (local input/local output) REAL * On entry, the overflow threshold as computed by PSLAMCH. * On exit, if LOG10(LARGE) is sufficiently large, the square * root of LARGE, otherwise unchanged. * * ===================================================================== * * .. Local Scalars .. INTEGER IDUMM * .. * .. External Subroutines .. EXTERNAL SGAMN2D, SGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC LOG10, SQRT * .. * .. Executable Statements .. * * If it looks like we're on a Cray, take the square root of * SMALL and LARGE to avoid overflow and underflow problems. * IF( LOG10( LARGE ).GT.2000. ) THEN SMALL = SQRT( SMALL ) LARGE = SQRT( LARGE ) END IF * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, SMALL, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) CALL SGAMN2D( ICTXT, 'All', ' ', 1, 1, LARGE, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) * RETURN * * End of PSLABAD * END scalapack-1.8.0/SRC/pzlascl.f0000640000175000017500000004166010363532303015571 0ustar muammarmuammar SUBROUTINE PZLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N DOUBLE PRECISION CFROM, CTO * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLASCL multiplies the M-by-N complex distributed matrix sub( A ) * denoting A(IA:IA+M-1,JA:JA+N-1) by the real scalar CTO/CFROM. This * is done without over/underflow as long as the final result * CTO * A(I,J) / CFROM does not over/underflow. TYPE specifies that * sub( A ) may be full, upper triangular, lower triangular or upper * Hessenberg. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TYPE (global input) CHARACTER * TYPE indices the storage type of the input distributed * matrix. * = 'G': sub( A ) is a full matrix, * = 'L': sub( A ) is a lower triangular matrix, * = 'U': sub( A ) is an upper triangular matrix, * = 'H': sub( A ) is an upper Hessenberg matrix. * * CFROM (global input) DOUBLE PRECISION * CTO (global input) DOUBLE PRECISION * The distributed matrix sub( A ) is multiplied by CTO/CFROM. * A(I,J) is computed without over/underflow if the final * result CTO * A(I,J) / CFROM can be represented without * over/underflow. CFROM must be nonzero. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * This array contains the local pieces of the distributed * matrix sub( A ). On exit, this array contains the local * pieces of the distributed matrix multiplied by CTO/CFROM. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. * .. Local Scalars .. LOGICAL DONE INTEGER IACOL, IAROW, ICOFFA, ICTXT, ICURCOL, ICURROW, $ IIA, II, INXTROW, IOFFA, IROFFA, ITYPE, J, JB, $ JJA, JJ, JN, KK, LDA, LL, MYCOL, MYROW, MP, $ NPCOL, NPROW, NQ DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * IF( NPROW.EQ.-1 ) THEN INFO = -907 ELSE INFO = 0 CALL CHK1MAT( M, 4, N, 6, IA, JA, DESCA, 9, INFO ) IF( INFO.EQ.0 ) THEN IF( LSAME( TYPE, 'G' ) ) THEN ITYPE = 0 ELSE IF( LSAME( TYPE, 'L' ) ) THEN ITYPE = 1 ELSE IF( LSAME( TYPE, 'U' ) ) THEN ITYPE = 2 ELSE IF( LSAME( TYPE, 'H' ) ) THEN ITYPE = 3 ELSE ITYPE = -1 END IF IF( ITYPE.EQ.-1 ) THEN INFO = -1 ELSE IF( CFROM.EQ.ZERO ) THEN INFO = -4 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZLASCL', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * * Get machine parameters * SMLNUM = PDLAMCH( ICTXT, 'S' ) BIGNUM = ONE / SMLNUM * CFROMC = CFROM CTOC = CTO * * Compute local indexes * LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) MP = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA * 10 CONTINUE CFROM1 = CFROMC*SMLNUM CTO1 = CTOC / BIGNUM IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN MUL = SMLNUM DONE = .FALSE. CFROMC = CFROM1 ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN MUL = BIGNUM DONE = .FALSE. CTOC = CTO1 ELSE MUL = CTOC / CFROMC DONE = .TRUE. END IF * IOFFA = ( JJA - 1 ) * LDA ICURROW = IAROW ICURCOL = IACOL * IF( ITYPE.EQ.0 ) THEN * * Full matrix * DO 30 JJ = JJA, JJA+NQ-1 DO 20 II = IIA, IIA+MP-1 A( IOFFA+II ) = A( IOFFA+II ) * MUL 20 CONTINUE IOFFA = IOFFA + LDA 30 CONTINUE * ELSE IF( ITYPE.EQ.1 ) THEN * * Lower triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 50 LL = JJ, JJ + JB -1 DO 40 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 40 CONTINUE IOFFA = IOFFA + LDA 50 CONTINUE ELSE DO 70 LL = JJ, JJ + JB -1 DO 60 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 120 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 90 LL = JJ, JJ + JB -1 DO 80 KK = II+LL-JJ, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 DO 100 KK = II, IIA+MP-1 A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 120 CONTINUE * ELSE IF( ITYPE.EQ.2 ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JB = JN-JA+1 * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 140 LL = JJ, JJ + JB -1 DO 130 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 130 CONTINUE IOFFA = IOFFA + LDA 140 CONTINUE ELSE DO 160 LL = JJ, JJ + JB -1 DO 150 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 150 CONTINUE IOFFA = IOFFA + LDA 160 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 210 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 180 LL = JJ, JJ + JB -1 DO 170 KK = IIA, MIN(II+LL-JJ,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 170 CONTINUE IOFFA = IOFFA + LDA 180 CONTINUE ELSE DO 200 LL = JJ, JJ + JB -1 DO 190 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 190 CONTINUE IOFFA = IOFFA + LDA 200 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 210 CONTINUE * ELSE IF( ITYPE.EQ.3 ) THEN * * Upper Hessenberg matrix * II = IIA JJ = JJA JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.ICURCOL ) THEN DO 230 LL = JJ, JJ+JB-1 DO 220 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE JJ = JJ + JB END IF * ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 250 LL = JJ, JJ+JB-1 DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK )*MUL 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE JJ = JJ + JB END IF * II = II + JB ICURCOL = MOD( ICURCOL+1, NPCOL ) * 260 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( ICURROW+1, NPROW ) IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 280 LL = JJ, JJ + JB -1 DO 270 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 270 CONTINUE IOFFA = IOFFA + LDA 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 DO 290 KK = IIA, MIN(II-1,IIA+MP-1) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 290 CONTINUE IOFFA = IOFFA + LDA 300 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining block of columns * DO 350 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.ICURCOL ) THEN IF( MYROW.EQ.ICURROW ) THEN DO 320 LL = JJ, JJ + JB -1 DO 310 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 310 CONTINUE IOFFA = IOFFA + LDA 320 CONTINUE ELSE DO 340 LL = JJ, JJ + JB -1 DO 330 KK = IIA, MIN( II-1, IIA+MP-1 ) A( IOFFA+KK ) = A( IOFFA+KK ) * MUL 330 CONTINUE IOFFA = IOFFA + LDA 340 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+MP-1 ) $ A( II+(JJ+JB-2)*LDA ) = A( II+(JJ+JB-2)*LDA ) * $ MUL END IF JJ = JJ + JB END IF * IF( MYROW.EQ.ICURROW ) $ II = II + JB ICURROW = INXTROW ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 350 CONTINUE * END IF * END IF * IF( .NOT.DONE ) $ GO TO 10 * RETURN * * End of PZLASCL * END scalapack-1.8.0/SRC/pdgerq2.f0000640000175000017500000002471410363532303015466 0ustar muammarmuammar SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDGERQ2 computes a RQ factorization of a real distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAU, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL, $ MYROW, NPCOL, NPROW, NQ DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PDELSET, PDLARF, PDLARFG, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQ + MAX( 1, MP ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * K = MIN( M, N ) DO 10 I = IA+K-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * A(i+m-k,ja:j+n-k-1) * CALL PDLARFG( N-K+J-JA+1, AII, I+M-K, J+N-K, A, I+M-K, JA, $ DESCA, DESCA( M_ ), TAU ) * * Apply H(i) to A(ia:i+m-k-1,ja:j+n-k) from the right * CALL PDELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PDLARF( 'Right', M-K+I-IA, N-K+J-JA+1, A, M-K+I, JA, $ DESCA, DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) CALL PDELSET( A, I+M-K, J+N-K, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDGERQ2 * END scalapack-1.8.0/SRC/pslanhs.f0000640000175000017500000006245310363532303015574 0ustar muammarmuammar REAL FUNCTION PSLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLANHS returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * Hessenberg distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PSLANHS returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PSLANHS as described * above. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PSLANHS is set to zero. N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, INXTROW, $ IOFFA, IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSTREECOMB, $ SCOMBSSQ, SGEBR2D, SGEBS2D, $ SGAMX2D, SGSUM2D, SLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL LSAME, ICEIL, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * VALUE = ZERO * * Find max(abs(A(i,j))). * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 20 LL = JJ, JJ+JB-1 DO 10 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 40 LL = JJ, JJ+JB-1 DO 30 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 50 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 70 LL = JJ, JJ + JB -1 DO 60 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE ELSE DO 90 LL = JJ, JJ+JB-1 DO 80 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 140 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 110 LL = JJ, JJ + JB -1 DO 100 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE ELSE DO 130 LL = JJ, JJ + JB -1 DO 120 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, $ ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 140 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 160 LL = JJ, JJ+JB-1 SUM = ZERO DO 150 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 150 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 160 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 190 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 180 LL = JJ, JJ+JB-1 SUM = ZERO DO 170 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 170 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 180 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 190 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 210 LL = JJ, JJ + JB -1 SUM = ZERO DO 200 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 200 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 SUM = ZERO DO 220 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 220 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 230 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 280 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 250 LL = JJ, JJ + JB -1 SUM = ZERO DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 240 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 250 CONTINUE ELSE DO 270 LL = JJ, JJ + JB -1 SUM = ZERO DO 260 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 260 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 270 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 280 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * DO 290 KK = IIA, IIA+NP-1 WORK( KK ) = ZERO 290 CONTINUE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 310 LL = JJ, JJ+JB-1 DO 300 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 300 CONTINUE IOFFA = IOFFA + LDA 310 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 340 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 330 LL = JJ, JJ+JB-1 DO 320 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 320 CONTINUE IOFFA = IOFFA + LDA 330 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 340 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 360 LL = JJ, JJ + JB -1 DO 350 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA 360 CONTINUE ELSE DO 380 LL = JJ, JJ + JB -1 DO 370 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA 380 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 430 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 400 LL = JJ, JJ + JB -1 DO 390 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 390 CONTINUE IOFFA = IOFFA + LDA 400 CONTINUE ELSE DO 420 LL = JJ, JJ + JB -1 DO 410 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS(A(IOFFA+KK)) 410 CONTINUE IOFFA = IOFFA + LDA 420 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 430 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK, MAX( 1, NP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( NP.GT.0 ) THEN VALUE = WORK( ISAMAX( NP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * SCALE = ZERO SUM = ONE II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 440 LL = JJ, JJ+JB-1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 440 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 460 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 450 LL = JJ, JJ+JB-1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 450 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 460 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 470 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 470 CONTINUE ELSE DO 480 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 480 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL SLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 510 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 490 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 490 CONTINUE ELSE DO 500 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 500 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL SLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 510 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PSLANHS = VALUE * RETURN * * End of PSLANHS * END scalapack-1.8.0/SRC/pclaswp.f0000640000175000017500000002033710363532303015570 0ustar muammarmuammar SUBROUTINE PCLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ) * .. * * Purpose: * ======== * * PCLASWP performs a series of row or column interchanges on * the distributed matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1). One * interchange is initiated for each of rows or columns K1 trough K2 of * sub( A ). This routine assumes that the pivoting information has * already been broadcast along the process row or column. * Also note that this routine will only work for K1-K2 being in the * same MB (or NB) block. If you want to pivot a full matrix, use * PCLAPIV. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) * = 'B' (Backward) * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are permuted: * = 'R' (Rows) * = 'C' (Columns) * * N (global input) INTEGER * If ROWCOL = 'R', the length of the rows of the distributed * matrix A(*,JA:JA+N-1) to be permuted; * If ROWCOL = 'C', the length of the columns of the distributed * matrix A(IA:IA+N-1,*) to be permuted. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, * ). * On entry, this array contains the local pieces of the distri- * buted matrix to which the row/columns interchanges will be * applied. On exit the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * K1 (global input) INTEGER * The first element of IPIV for which a row or column inter- * change will be done. * * K2 (global input) INTEGER * The last element of IPIV for which a row or column inter- * change will be done. * * IPIV (local input) INTEGER array, dimension LOCr(M_A)+MB_A for * row pivoting and LOCc(N_A)+NB_A for column pivoting. This * array is tied to the matrix A, IPIV(K) = L implies rows * (or columns) K and L are to be interchanged. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, ICURCOL, ICURROW, IIA, IP, J, JJA, JP, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PCSWAP * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( K1, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 10 I = K1, K2 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PCSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 10 CONTINUE ELSE CALL INFOG2L( K2, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 20 I = K2, K1, -1 IP = IPIV( IIA+I-K1 ) IF( IP.NE.I ) $ CALL PCSWAP( N, A, I, JA, DESCA, DESCA( M_ ), A, IP, $ JA, DESCA, DESCA( M_ ) ) 20 CONTINUE END IF ELSE IF( LSAME( DIREC, 'F' ) ) THEN CALL INFOG2L( IA, K1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 30 J = K1, K2 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PCSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 30 CONTINUE ELSE CALL INFOG2L( IA, K2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, ICURROW, ICURCOL ) DO 40 J = K2, K1, -1 JP = IPIV( JJA+J-K1 ) IF( JP.NE.J ) $ CALL PCSWAP( N, A, IA, J, DESCA, 1, A, IA, JP, $ DESCA, 1 ) 40 CONTINUE END IF END IF * RETURN * * End PCLASWP * END scalapack-1.8.0/SRC/pcgbsv.f0000640000175000017500000004561710363532303015413 0ustar muammarmuammar SUBROUTINE PCGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX A( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PCGBSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * banded distributed * matrix with bandwidth BWL, BWU. * * Gaussian elimination with pivoting * is used to factor a reordering * of the matrix into P L U. * * See PCGBTRF and PCGBTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * On exit, this array contains information containing details * of the factorization. * Note that permutations are performed on the matrix, so that * the factors returned are different from those returned * by LAPACK. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) COMPLEX pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * +max(NRHS*(NB+2*bwl+4*bwu), 1) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * nonsingular, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * nonsingular, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) COMPLEX CONE, CZERO PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PCGBTRF, PCGBTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PCGBTRF and PCGBTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PCGBSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (NB+BWU)*(BWL+BWU)+6*(BWL+BWU)*(BWL+2*BWU) * * Factor the matrix * CALL PCGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PCGBSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PCGBTRS( 'N', N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGBSV', -INFO ) RETURN END IF * RETURN * * End of PCGBSV * END scalapack-1.8.0/SRC/pdtrtri.f0000640000175000017500000003071210363532303015605 0ustar muammarmuammar SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDTRTRI computes the inverse of a upper or lower triangular * distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the distributed matrix sub( A ) is upper * or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) * is unit triangular: * = 'N': Non-unit triangular, * = 'U': Unit triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * triangular matrix sub( A ). If UPLO = 'U', the leading * N-by-N upper triangular part of the matrix sub( A ) contains * the upper triangular matrix to be inverted, and the strictly * lower triangular part of sub( A ) is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * the matrix sub( A ) contains the lower triangular matrix, * and the strictly upper triangular part of sub( A ) is not * referenced. * On exit, the (triangular) inverse of the original matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, A(IA+K-1,JA+K-1) is exactly zero. The * triangular matrix sub( A ) is singular and its * inverse can not be computed. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER I, ICOFF, ICTXT, IROFF, ICURCOL, ICURROW, $ IDUMMY, II, IOFFA, J, JB, JJ, JN, LDA, MYCOL, $ MYROW, NN, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMX2D, INFOG2L, $ PCHK1MAT, PDTRTI2, PDTRMM, PDTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) * CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 ELSE IF( IROFF.NE.ICOFF .OR. IROFF.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( NOUNIT ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 * CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRI', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Check for singularity if non-unit. * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( NOUNIT ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ II, JJ, ICURROW, ICURCOL ) * * Handle first block separately * JB = JN-JA+1 LDA = DESCA( LLD_ ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 10 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = I + 1 IOFFA = IOFFA + LDA + 1 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFA = II+(JJ-1)*LDA DO 20 I = 0, JB-1 IF( A( IOFFA ).EQ.ZERO .AND. INFO.EQ.0 ) $ INFO = J + I - JA + 1 IOFFA = IOFFA + LDA + 1 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + JB IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + JB ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) IF( INFO.NE.0 ) $ RETURN END IF * * Use blocked code * IF( UPPER ) THEN * * Compute inverse of upper triangular matrix * JB = JN-JA+1 * * Handle first block of column separately * CALL PDTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * * Loop over remaining block of columns * DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * * Compute rows 1:j-1 of current block column * CALL PDTRMM( 'Left', UPLO, 'No transpose', DIAG, J-JA, JB, $ ONE, A, IA, JA, DESCA, A, IA, J, DESCA ) CALL PDTRSM( 'Right', UPLO, 'No transpose', DIAG, J-JA, $ JB, -ONE, A, I, J, DESCA, A, IA, J, DESCA ) * * Compute inverse of current diagonal block * CALL PDTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 40 CONTINUE * ELSE * * Compute inverse of lower triangular matrix * NN = ( ( JA+N-2 ) / DESCA( NB_ ) )*DESCA( NB_ ) + 1 DO 50 J = NN, JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA IF( J+JB.LE.JA+N-1 ) THEN * * Compute rows j+jb:ja+n-1 of current block column * CALL PDTRMM( 'Left', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, ONE, A, I+JB, J+JB, DESCA, $ A, I+JB, J, DESCA ) CALL PDTRSM( 'Right', UPLO, 'No transpose', DIAG, $ JA+N-J-JB, JB, -ONE, A, I, J, DESCA, $ A, I+JB, J, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PDTRTI2( UPLO, DIAG, JB, A, I, J, DESCA, INFO ) * 50 CONTINUE * * Handle the last block of columns separately * JB = JN-JA+1 IF( JA+JB.LE.JA+N-1 ) THEN * * Compute rows ja+jb:ja+n-1 of current block column * CALL PDTRMM( 'Left', UPLO, 'No transpose', DIAG, N-JB, JB, $ ONE, A, IA+JB, JA+JB, DESCA, A, IA+JB, JA, $ DESCA ) CALL PDTRSM( 'Right', UPLO, 'No transpose', DIAG, N-JB, JB, $ -ONE, A, IA, JA, DESCA, A, IA+JB, JA, DESCA ) END IF * * Compute inverse of current diagonal block * CALL PDTRTI2( UPLO, DIAG, JB, A, IA, JA, DESCA, INFO ) * END IF * RETURN * * End PDTRTRI * END scalapack-1.8.0/SRC/slamsh.f0000640000175000017500000002120710363532303015403 0ustar muammarmuammar SUBROUTINE SLAMSH ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER LDS, NBULGE, JBLK, LDH, N REAL ULP * .. * .. Array Arguments .. REAL S(LDS,*), H(LDH,*) * .. * * Purpose * ======= * * SLAMSH sends multiple shifts through a small (single node) matrix to * see how consecutive small subdiagonal elements are modified by * subsequent shifts in an effort to maximize the number of bulges * that can be sent through. * SLAMSH should only be called when there are multiple shifts/bulges * (NBULGE > 1) and the first shift is starting in the middle of an * unreduced Hessenberg matrix because of two or more consecutive small * subdiagonal elements. * * Arguments * ========= * * S (local input/output) REAL array, (LDS,*) * On entry, the matrix of shifts. Only the 2x2 diagonal of S is * referenced. It is assumed that S has JBLK double shifts * (size 2). * On exit, the data is rearranged in the best order for * applying. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * 1 < NBULGE <= JBLK <= LDS/2 * * NBULGE (local input/output) INTEGER * On entry, the number of bulges to send through H ( >1 ). * NBULGE should be less than the maximum determined (JBLK). * 1 < NBULGE <= JBLK <= LDS/2 * On exit, the maximum number of bulges that can be sent * through. * * JBLK (local input) INTEGER * On entry, the number of shifts determined for S. * Unchanged on exit. * * H (local input/output) REAL array (LDH,N) * On entry, the local matrix to apply the shifts on. * H should be aligned so that the starting row is 2. * On exit, the data is destroyed. * * LDS (local input) INTEGER * On entry, the leading dimension of S. Unchanged on exit. * * N (local input) INTEGER * On entry, the size of H. If all the bulges are expected to * go through, N should be at least 4*NBULGE+2. * Otherwise, NBULGE may be reduced by this routine. * * ULP (local input) REAL * On entry, machine precision * Unchanged on exit. * * Implemented by: G. Henry, May 1, 1997 * * ===================================================================== * * .. Parameters .. REAL ZERO, TEN PARAMETER ( ZERO = 0.0E+0, TEN = 10.0E+0 ) * .. * .. Local Scalars .. INTEGER K, IBULGE, M, NR, J, IVAL, I REAL H44, H33, H43H34, H11, H22, H21, H12, H44S, $ H33S, V1, V2, V3, H00, H10, TST1, T1, T2, T3, $ SUM, S1, DVAL * .. * .. Local Arrays .. REAL V(3) * .. * .. External Subroutines .. EXTERNAL SLARFG, SCOPY * .. * .. Intrinsic Functions .. INTRINSIC MAX, ABS * .. * .. Executable Statements .. * M = 2 DO 10 IBULGE = 1, NBULGE H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)* $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.ULP*TST1 ) THEN * Find minimum DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1) IVAL = IBULGE DO 15 I = IBULGE+1, NBULGE H44 = S(2*JBLK-2*I+2, 2*JBLK-2*I+2) H33 = S(2*JBLK-2*I+1,2*JBLK-2*I+1) H43H34 = S(2*JBLK-2*I+1,2*JBLK-2*I+2)* $ S(2*JBLK-2*I+2, 2*JBLK-2*I+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF ( (DVAL.GT.(ABS(H10)*(ABS(V2)+ABS(V3)))/(ULP*TST1)) $ .AND. ( DVAL .GT. 1.D0 ) ) THEN DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1) IVAL = I END IF 15 CONTINUE IF ( (DVAL .LT. TEN) .AND. (IVAL .NE. IBULGE) ) THEN H44 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2) H33 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) H43H34 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) H10 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) S(2*JBLK-2*IVAL+2,2*JBLK-2*IVAL+2) = $ S(2*JBLK-2*IBULGE+2,2*JBLK-2*IBULGE+2) S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) = $ S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) = $ S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) = $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) = H44 S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) = H33 S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) = H43H34 S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) = H10 END IF H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)* $ S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) H11 = H( M, M ) H22 = H( M+1, M+1 ) H21 = H( M+1, M ) H12 = H( M, M+1 ) H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S V3 = H( M+2, M+1 ) S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S1 V2 = V2 / S1 V3 = V3 / S1 V( 1 ) = V1 V( 2 ) = V2 V( 3 ) = V3 H00 = H( M-1, M-1 ) H10 = H( M, M-1 ) TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) END IF IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.TEN*ULP*TST1 ) THEN * IBULGE better not be 1 here or we have a bug! NBULGE = MAX(IBULGE -1,1) RETURN END IF DO 120 K = M, N - 1 NR = MIN( 3, N-K+1 ) IF( K.GT.M ) $ CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 ) CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) IF( K.GT.M ) THEN H( K, K-1 ) = V( 1 ) H( K+1, K-1 ) = ZERO IF( K.LT.N-1 ) $ H( K+2, K-1 ) = ZERO ELSE H( K, K-1 ) = -H( K, K-1 ) END IF V2 = V( 2 ) T2 = T1*V2 IF( NR.EQ.3 ) THEN V3 = V( 3 ) T3 = T1*V3 DO 60 J = K, N SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) H( K, J ) = H( K, J ) - SUM*T1 H( K+1, J ) = H( K+1, J ) - SUM*T2 H( K+2, J ) = H( K+2, J ) - SUM*T3 60 CONTINUE DO 70 J = 1, MIN( K+3, N ) SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) H( J, K ) = H( J, K ) - SUM*T1 H( J, K+1 ) = H( J, K+1 ) - SUM*T2 H( J, K+2 ) = H( J, K+2 ) - SUM*T3 70 CONTINUE END IF 120 CONTINUE 10 CONTINUE * RETURN END scalapack-1.8.0/SRC/dlasrt2.f0000640000175000017500000001614310363532303015472 0ustar muammarmuammar* * SUBROUTINE DLASRT2( ID, N, D, KEY, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, N * .. * .. Array Arguments .. INTEGER KEY( * ) DOUBLE PRECISION D( * ) * .. * * Purpose * ======= * * Sort the numbers in D in increasing order (if ID = 'I') or * in decreasing order (if ID = 'D' ). * * Use Quick Sort, reverting to Insertion sort on arrays of * size <= 20. Dimension of STACK limits N to about 2**32. * * Arguments * ========= * * ID (input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. * * N (input) INTEGER * The length of the array D. * * D (input/output) DOUBLE PRECISION array, dimension (N) * On entry, the array to be sorted. * On exit, D has been sorted into increasing order * (D(1) <= ... <= D(N) ) or into decreasing order * (D(1) >= ... >= D(N) ), depending on ID. * * KEY (input/output) INTEGER array, dimension (N) * On entry, KEY contains a key to each of the entries in D() * Typically, KEY(I) = I for all I * On exit, KEY is permuted in exactly the same manner as * D() was permuted from input to output * Therefore, if KEY(I) = I for all I upon input, then * D_out(I) = D_in(KEY(I)) * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. INTEGER SELECT PARAMETER ( SELECT = 20 ) * .. * .. Local Scalars .. INTEGER DIR, ENDD, I, J, START, STKPNT, TMPKEY DOUBLE PRECISION D1, D2, D3, DMNMX, TMP * .. * .. Local Arrays .. INTEGER STACK( 2, 32 ) * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Executable Statements .. * * Test the input paramters. * * INFO = 0 DIR = -1 IF( LSAME( ID, 'D' ) ) THEN DIR = 0 ELSE IF( LSAME( ID, 'I' ) ) THEN DIR = 1 END IF IF( DIR.EQ.-1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DLASRT2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.LE.1 ) $ RETURN * STKPNT = 1 STACK( 1, 1 ) = 1 STACK( 2, 1 ) = N 10 CONTINUE START = STACK( 1, STKPNT ) ENDD = STACK( 2, STKPNT ) STKPNT = STKPNT - 1 IF( ENDD-START.GT.0 ) THEN * * Do Insertion sort on D( START:ENDD ) * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * DO 30 I = START + 1, ENDD DO 20 J = I, START + 1, -1 IF( D( J ).GT.D( J-1 ) ) THEN DMNMX = D( J ) D( J ) = D( J-1 ) D( J-1 ) = DMNMX TMPKEY = KEY( J ) KEY( J ) = KEY( J-1 ) KEY( J-1 ) = TMPKEY ELSE GO TO 30 END IF 20 CONTINUE 30 CONTINUE * ELSE * * Sort into increasing order * DO 50 I = START + 1, ENDD DO 40 J = I, START + 1, -1 IF( D( J ).LT.D( J-1 ) ) THEN DMNMX = D( J ) D( J ) = D( J-1 ) D( J-1 ) = DMNMX TMPKEY = KEY( J ) KEY( J ) = KEY( J-1 ) KEY( J-1 ) = TMPKEY ELSE GO TO 50 END IF 40 CONTINUE 50 CONTINUE * END IF * ELSE IF( ENDD-START.GT.SELECT ) THEN * * Partition D( START:ENDD ) and stack parts, largest one first * * Choose partition entry as median of 3 * D1 = D( START ) D2 = D( ENDD ) I = ( START+ENDD ) / 2 D3 = D( I ) IF( D1.LT.D2 ) THEN IF( D3.LT.D1 ) THEN DMNMX = D1 ELSE IF( D3.LT.D2 ) THEN DMNMX = D3 ELSE DMNMX = D2 END IF ELSE IF( D3.LT.D2 ) THEN DMNMX = D2 ELSE IF( D3.LT.D1 ) THEN DMNMX = D3 ELSE DMNMX = D1 END IF END IF * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * I = START - 1 J = ENDD + 1 60 CONTINUE 70 CONTINUE J = J - 1 IF( D( J ).LT.DMNMX ) $ GO TO 70 80 CONTINUE I = I + 1 IF( D( I ).GT.DMNMX ) $ GO TO 80 IF( I.LT.J ) THEN TMP = D( I ) D( I ) = D( J ) D( J ) = TMP TMPKEY = KEY( J ) KEY( J ) = KEY( I ) KEY( I ) = TMPKEY GO TO 60 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF ELSE * * Sort into increasing order * I = START - 1 J = ENDD + 1 90 CONTINUE 100 CONTINUE J = J - 1 IF( D( J ).GT.DMNMX ) $ GO TO 100 110 CONTINUE I = I + 1 IF( D( I ).LT.DMNMX ) $ GO TO 110 IF( I.LT.J ) THEN TMP = D( I ) D( I ) = D( J ) D( J ) = TMP TMPKEY = KEY( J ) KEY( J ) = KEY( I ) KEY( I ) = TMPKEY GO TO 90 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF END IF END IF IF( STKPNT.GT.0 ) $ GO TO 10 * * RETURN * * End of DLASRT2 * END scalapack-1.8.0/SRC/pzgesv.f0000640000175000017500000002311610367447133015445 0ustar muammarmuammar SUBROUTINE PZGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * Jan 30, 2006 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZGESV computes the solution to a complex system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) = A(IA:IA+N-1,JA:JA+N-1) is an N-by-N distributed * matrix and X and sub( B ) = B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS * distributed matrices. * * The LU decomposition with partial pivoting and row interchanges is * used to factor sub( A ) as sub( A ) = P * L * U, where P is a permu- * tation matrix, L is unit lower triangular, and U is upper triangular. * L and U are stored in sub( A ). The factored form of sub( A ) is then * used to solve the system of equations sub( A ) * X = sub( B ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the N-by-N distributed matrix * sub( A ) to be factored. On exit, this array contains the * local pieces of the factors L and U from the factorization * sub( A ) = P*L*U; the unit diagonal elements of L are not * stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, the right hand side * distributed matrix sub( B ). On exit, if INFO = 0, sub( B ) * is overwritten by the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, so the solution could not be * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZGETRF, PZGETRS * .. * .. External Functions .. INTEGER INDXG2P EXTERNAL INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 6, INFO ) CALL CHK1MAT( N, 1, NRHS, 2, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IF( IROFFA.NE.0 ) THEN INFO = -4 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( IBROW.NE.IAROW .OR. ICOFFA.NE.IROFFB ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(1100+CTXT_) END IF END IF CALL PCHK2MAT( N, 1, N, 1, IA, JA, DESCA, 6, N, 1, NRHS, 2, $ IB, JB, DESCB, 11, 0, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGESV', -INFO ) RETURN END IF * * Compute the LU factorization of sub( A ). * CALL PZGETRF( N, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ), overwriting sub( B ) * with X. * CALL PZGETRS( 'No transpose', N, NRHS, A, IA, JA, DESCA, IPIV, $ B, IB, JB, DESCB, INFO ) * END IF * RETURN * * End of PZGESV * END scalapack-1.8.0/SRC/pdlabad.f0000640000175000017500000000477310363532303015514 0ustar muammarmuammar SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER ICTXT DOUBLE PRECISION LARGE, SMALL * .. * * Purpose * ======= * * PDLABAD takes as input the values computed by PDLAMCH for underflow * and overflow, and returns the square root of each of these values if * the log of LARGE is sufficiently large. This subroutine is intended * to identify machines with a large exponent range, such as the Crays, * and redefine the underflow and overflow limits to be the square roots * of the values computed by PDLAMCH. This subroutine is needed because * PDLAMCH does not compensate for poor arithmetic in the upper half of * the exponent range, as is found on a Cray. * * In addition, this routine performs a global minimization and maximi- * zation on these values, to support heterogeneous computing networks. * * Arguments * ========= * * ICTXT (global input) INTEGER * The BLACS context handle in which the computation takes * place. * * SMALL (local input/local output) DOUBLE PRECISION * On entry, the underflow threshold as computed by PDLAMCH. * On exit, if LOG10(LARGE) is sufficiently large, the square * root of SMALL, otherwise unchanged. * * LARGE (local input/local output) DOUBLE PRECISION * On entry, the overflow threshold as computed by PDLAMCH. * On exit, if LOG10(LARGE) is sufficiently large, the square * root of LARGE, otherwise unchanged. * * ===================================================================== * * .. Local Scalars .. INTEGER IDUMM * .. * .. External Subroutines .. EXTERNAL DGAMN2D, DGAMX2D * .. * .. Intrinsic Functions .. INTRINSIC LOG10, SQRT * .. * .. Executable Statements .. * * If it looks like we're on a Cray, take the square root of * SMALL and LARGE to avoid overflow and underflow problems. * IF( LOG10( LARGE ).GT.2000.D0 ) THEN SMALL = SQRT( SMALL ) LARGE = SQRT( LARGE ) END IF * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, SMALL, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) CALL DGAMN2D( ICTXT, 'All', ' ', 1, 1, LARGE, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) * RETURN * * End of PDLABAD * END scalapack-1.8.0/SRC/pdgesvx.f0000640000175000017500000010471410363532303015601 0ustar muammarmuammar SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, IPIV, EQUED, R, C, B, IB, $ JB, DESCB, X, IX, JX, DESCX, RCOND, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, TRANS INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, $ LWORK, N, NRHS DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ), IWORK( * ) DOUBLE PRECISION A( * ), AF( * ), B( * ), BERR( * ), C( * ), $ FERR( * ), R( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PDGESVX uses the LU factorization to compute the solution to a real * system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * In the following description, A denotes A(IA:IA+N-1,JA:JA+N-1), * B denotes B(IB:IB+N-1,JB:JB+NRHS-1) and X denotes * X(IX:IX+N-1,JX:JX+NRHS-1). * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') * or diag(C)*B (if TRANS = 'T' or 'C'). * * 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if FACT = 'E') as * A = P * L * U, * where P is a permutation matrix, L is a unit lower triangular * matrix, and U is upper triangular. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If FACT = 'E' and equilibration was used, the matrix X is * premultiplied by diag(C) (if TRANS = 'N') or diag(R) (if * TRANS = 'T' or 'C') so that it solves the original system * before equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix * A(IA:IA+N-1,JA:JA+N-1) is supplied on entry, and if not, * whether the matrix A(IA:IA+N-1,JA:JA+N-1) should be * equilibrated before it is factored. * = 'F': On entry, AF(IAF:IAF+N-1,JAF:JAF+N-1) and IPIV con- * tain the factored form of A(IA:IA+N-1,JA:JA+N-1). * If EQUED is not 'N', the matrix * A(IA:IA+N-1,JA:JA+N-1) has been equilibrated with * scaling factors given by R and C. * A(IA:IA+N-1,JA:JA+N-1), AF(IAF:IAF+N-1,JAF:JAF+N-1), * and IPIV are not modified. * = 'N': The matrix A(IA:IA+N-1,JA:JA+N-1) will be copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * = 'E': The matrix A(IA:IA+N-1,JA:JA+N-1) will be equili- * brated if necessary, then copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': A(IA:IA+N-1,JA:JA+N-1) * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (No transpose) * = 'T': A(IA:IA+N-1,JA:JA+N-1)**T * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * = 'C': A(IA:IA+N-1,JA:JA+N-1)**H * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right-hand sides, i.e., the number of columns * of the distributed submatrices B(IB:IB+N-1,JB:JB+NRHS-1) and * X(IX:IX+N-1,JX:JX+NRHS-1). NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * the local memory to an array of local dimension * (LLD_A,LOCc(JA+N-1)). On entry, the N-by-N matrix * A(IA:IA+N-1,JA:JA+N-1). If FACT = 'F' and EQUED is not 'N', * then A(IA:IA+N-1,JA:JA+N-1) must have been equilibrated by * the scaling factors in R and/or C. A(IA:IA+N-1,JA:JA+N-1) is * not modified if FACT = 'F' or 'N', or if FACT = 'E' and * EQUED = 'N' on exit. * * On exit, if EQUED .ne. 'N', A(IA:IA+N-1,JA:JA+N-1) is scaled * as follows: * EQUED = 'R': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * EQUED = 'C': A(IA:IA+N-1,JA:JA+N-1) := * A(IA:IA+N-1,JA:JA+N-1) * diag(C) * EQUED = 'B': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) DOUBLE PRECISION pointer * into the local memory to an array of local dimension * (LLD_AF,LOCc(JA+N-1)). If FACT = 'F', then * AF(IAF:IAF+N-1,JAF:JAF+N-1) is an input argument and on * entry contains the factors L and U from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by PDGETRF. * If EQUED .ne. 'N', then AF is the factored form of the * equilibrated matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'N', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original * matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equili- * brated matrix A(IA:IA+N-1,JA:JA+N-1) (see the description of * A(IA:IA+N-1,JA:JA+N-1) for the form of the equilibrated * matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input or local output) INTEGER array, dimension * LOCr(M_A)+MB_A. If FACT = 'F', then IPIV is an input argu- * ment and on entry contains the pivot indices from the fac- * torization A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by * PDGETRF; IPIV(i) -> The global row local row i was * swapped with. This array must be aligned with * A( IA:IA+N-1, * ). * * If FACT = 'N', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original matrix * A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equilibrated matrix * A(IA:IA+N-1,JA:JA+N-1). * * EQUED (global input or global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'R': Row equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) has * been premultiplied by diag(R). * = 'C': Column equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) * has been postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., * A(IA:IA+N-1,JA:JA+N-1) has been replaced by * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * R (local input or local output) DOUBLE PRECISION array, * dimension LOCr(M_A). * The row scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'R' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the left by diag(R); if EQUED='N' or 'C', R is not acces- * sed. R is an input variable if FACT = 'F'; otherwise, R is * an output variable. * If FACT = 'F' and EQUED = 'R' or 'B', each element of R must * be positive. * R is replicated in every process column, and is aligned * with the distributed matrix A. * * C (local input or local output) DOUBLE PRECISION array, * dimension LOCc(N_A). * The column scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'C' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the right by diag(C); if EQUED = 'N' or 'R', C is not * accessed. C is an input variable if FACT = 'F'; otherwise, * C is an output variable. If FACT = 'F' and EQUED = 'C' or * 'B', each element of C must be positive. * C is replicated in every process row, and is aligned with * the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer * into the local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1) ). On entry, the N-by-NRHS right-hand * side matrix B(IB:IB+N-1,JB:JB+NRHS-1). On exit, if * EQUED = 'N', B(IB:IB+N-1,JB:JB+NRHS-1) is not modified; if * TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by * diag(R)*B(IB:IB+N-1,JB:JB+NRHS-1); if TRANS = 'T' or 'C' * and EQUED = 'C' or 'B', B(IB:IB+N-1,JB:JB+NRHS-1) is over- * written by diag(C)*B(IB:IB+N-1,JB:JB+NRHS-1). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) DOUBLE PRECISION pointer * into the local memory to an array of local dimension * (LLD_X, LOCc(JX+NRHS-1)). If INFO = 0, the N-by-NRHS * solution matrix X(IX:IX+N-1,JX:JX+NRHS-1) to the original * system of equations. Note that A(IA:IA+N-1,JA:JA+N-1) and * B(IB:IB+N-1,JB:JB+NRHS-1) are modified on exit if * EQUED .ne. 'N', and the solution to the equilibrated system * is inv(diag(C))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'N' * and EQUED = 'C' or 'B', or * inv(diag(R))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'T' or 'C' * and EQUED = 'R' or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) DOUBLE PRECISION * The estimate of the reciprocal condition number of the matrix * A(IA:IA+N-1,JA:JA+N-1) after equilibration (if done). If * RCOND is less than the machine precision (in particular, if * RCOND = 0), the matrix is singular to working precision. * This condition is indicated by a return code of INFO > 0. * * FERR (local output) DOUBLE PRECISION array, dimension LOCc(N_B) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix * X(IX:IX+N-1,JX:JX+NRHS-1). If XTRUE is the true solution, * FERR(j) bounds the magnitude of the largest entry in * (X(j) - XTRUE) divided by the magnitude of the largest entry * in X(j). The estimate is as reliable as the estimate for * RCOND, and is almost always a slight overestimate of the * true error. FERR is replicated in every process row, and is * aligned with the matrices B and X. * * BERR (local output) DOUBLE PRECISION array, dimension LOCc(N_B). * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A(IA:IA+N-1,JA:JA+N-1) or * B(IB:IB+N-1,JB:JB+NRHS-1) that makes X(j) an exact solution). * BERR is replicated in every process row, and is aligned * with the matrices B and X. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PDGECON( LWORK ), PDGERFS( LWORK ) ) * + LOCr( N_A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK = LOCr(N_A). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: U(IA+I-1,IA+I-1) is exactly zero. The * factorization has been completed, but the * factor U is exactly singular, so the solution * and error bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the * matrix is singular to working precision, and * the solution and error bounds have not been * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLEQU, EQUIL, LQUERY, NOFACT, NOTRAN, ROWEQU CHARACTER NORM INTEGER CONWRK, I, IACOL, IAROW, IAFROW, IBROW, IBCOL, $ ICOFFA, ICOFFB, ICOFFX, ICTXT, IDUMM, $ IIA, IIB, IIX, $ INFEQU, IROFFA, IROFFAF, IROFFB, $ IROFFX, IXCOL, IXROW, J, JJA, JJB, JJX, $ LCM, LCMQ, $ LIWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, $ NQ, NQB, NRHSQ, RFSWRK DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, $ ROWCND, SMLNUM * .. * .. Local Arrays .. INTEGER CDESC( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ DGEBR2D, DGEBS2D, DGAMN2D, $ DGAMX2D, INFOG2L, PDCOPY, PDGECON, $ PDGEEQU, PDGERFS, PDGETRF, PDGETRS, $ PDLACPY, PDLAQGE, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PDLANGE EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC, PDLANGE, $ PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IX, JX, DESCX, 24, INFO ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) NOTRAN = LSAME( TRANS, 'N' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' ROWEQU = .FALSE. COLEQU = .FALSE. ELSE ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA NQB = ICEIL( N+IROFFA, DESCA( NB_ )*NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL CONWRK = 2*NP + 2*NQ + MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQ + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) RFSWRK = 3*NP IF( LSAME( TRANS, 'N' ) ) THEN RFSWRK = RFSWRK + NP + NQ + $ ICEIL( NQB, LCMQ )*DESCA( NB_ ) ELSE IF( LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) ) THEN RFSWRK = RFSWRK + NP + NQ END IF LWMIN = MAX( CONWRK, RFSWRK ) WORK( 1 ) = DBLE( LWMIN ) LIWMIN = NP IWORK( 1 ) = LIWMIN IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT. LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( ROWEQU ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 10 J = IIA, IIA + NP - 1 RCMIN = MIN( RCMIN, R( J ) ) RCMAX = MAX( RCMAX, R( J ) ) 10 CONTINUE CALL DGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN ROWCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE ROWCND = ONE END IF END IF IF( COLEQU .AND. INFO.EQ.0 ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 20 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 20 CONTINUE CALL DGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -15 ELSE IF( N.GT.0 ) THEN COLCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE COLCND = ONE END IF END IF END IF END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( DESCX( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2400+NB_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -29 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN INFO = -31 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( TRANS ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 14 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 29 IF( LIWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 5, IDUM1, $ IDUM2, INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 29 IF( LIWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 4, IDUM1, $ IDUM2, INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PDGEEQU( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFEQU ) IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PDLAQGE( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) NP = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFB IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFB * IF( NOTRAN ) THEN IF( ROWEQU ) THEN DO 40 J = JJB, JJB+NRHSQ-1 DO 30 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = R( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 30 CONTINUE 40 CONTINUE END IF ELSE IF( COLEQU ) THEN * * Transpose the Column scale factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PDCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), WORK, IB, JB, $ DESCB, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IIB ), $ DESCB( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IIB ), $ DESCB( LLD_ ), MYROW, IBCOL ) END IF DO 60 J = JJB, JJB+NRHSQ-1 DO 50 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = WORK( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 50 CONTINUE 60 CONTINUE END IF * IF( NOFACT.OR.EQUIL ) THEN * * Compute the LU factorization of A. * CALL PDLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PDGETRF( N, N, AF, IAF, JAF, DESCAF, IPIV, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * IF( NOTRAN ) THEN NORM = '1' ELSE NORM = 'I' END IF ANORM = PDLANGE( NORM, N, N, A, IA, JA, DESCA, WORK ) * * Compute the reciprocal of the condition number of A. * CALL PDGECON( NORM, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PDLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PDGETRS( TRANS, N, NRHS, AF, IAF, JAF, DESCAF, IPIV, X, IX, $ JX, DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFX IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFX * IF( NOTRAN ) THEN IF( COLEQU ) THEN * * Transpose the column scaling factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PDCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), WORK, IX, $ JX, DESCX, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, $ WORK( IIX ), DESCX( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, $ WORK( IIX ), DESCX( LLD_ ), MYROW, IBCOL ) END IF * DO 80 J = JJX, JJX+NRHSQ-1 DO 70 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = WORK( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 70 CONTINUE 80 CONTINUE DO 90 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / COLCND 90 CONTINUE END IF ELSE IF( ROWEQU ) THEN DO 110 J = JJX, JJX+NRHSQ-1 DO 100 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = R( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 100 CONTINUE 110 CONTINUE DO 120 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / ROWCND 120 CONTINUE END IF * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDGESVX * END scalapack-1.8.0/SRC/pzlacp2.f0000640000175000017500000003746310363532303015502 0ustar muammarmuammar SUBROUTINE PZLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZLACP2 copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PZLACP2 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * PZLACP2 requires that only dimension of the matrix operands is * distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW, $ ICOFFA, IIA, IIAA, IIB, IIBB, IIBEGA, IIBEGB, $ IIENDA, IINXTA, IINXTB, ILEFT, IRIGHT, IROFFA, $ ITOP, JJA, JJAA, JJB, JJBB, JJBEGA, JJBEGB, $ JJENDA, JJNXTA, JJNXTB, LDA, LDB, MBA, MP, $ MPAA, MYCOL, MYDIST, MYROW, NBA, NPCOL, NPROW, $ NQ, NQAA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZLACPY * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) LDB = DESCB( LLD_ ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MBA = DESCA( MB_ ) * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MP.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEGA = IIA IIENDA = IIA + MP - 1 IINXTA = MIN( ICEIL( IIBEGA, MBA ) * MBA, IIENDA ) IIBEGB = IIB IINXTB = IIBEGB + IINXTA - IIBEGA * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL ZLACPY( UPLO, IINXTA-IIBEGA+1, N-ITOP, $ A( IIBEGA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBEGB+(JJB+ITOP-1)*LDB ), LDB ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEGA = IINXTA + 1 IINXTA = MIN( IINXTA+MBA, IIENDA ) IIBEGB = IINXTB + 1 IINXTB = IIBEGB + IINXTA - IIBEGA GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * MPAA = MP IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJAA.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL ZLACPY( 'All', MPAA, ITOP-JJAA+JJA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL ZLACPY( UPLO, MPAA, HEIGHT, $ A( IIAA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBB+(JJB+ITOP-1)*LDB ), LDB ) MPAA = MAX( 0, MPAA - HEIGHT ) IIAA = IIAA + HEIGHT JJAA = JJA + IBASE IIBB = IIBB + HEIGHT JJBB = JJB + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * CALL ZLACPY( 'All', MP, N, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQ.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEGA = JJA JJENDA = JJA + NQ - 1 JJNXTA = MIN( ICEIL( JJBEGA, NBA ) * NBA, JJENDA ) JJBEGB = JJB JJNXTB = JJBEGB + JJNXTA - JJBEGA * 30 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL ZLACPY( UPLO, M-ILEFT, JJNXTA-JJBEGA+1, $ A( IIA+ILEFT+(JJBEGA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBEGB-1)*LDB ), LDB ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEGA = JJNXTA +1 JJNXTA = MIN( JJNXTA+NBA, JJENDA ) JJBEGB = JJNXTB +1 JJNXTB = JJBEGB + JJNXTA - JJBEGA GO TO 30 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * NQAA = NQ IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 40 CONTINUE IF( IIAA.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL ZLACPY( 'All', ILEFT-IIAA+IIA, NQAA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL ZLACPY( UPLO, WIDE, NQAA, $ A( IIA+ILEFT+(JJAA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBB-1)*LDB ), LDB ) NQAA = MAX( 0, NQAA - WIDE ) IIAA = IIA + IRIGHT JJAA = JJAA + WIDE IIBB = IIB + IRIGHT JJBB = JJBB + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 40 END IF * ELSE * CALL ZLACPY( 'All', M, NQ, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * END IF * RETURN * * End of PZLACP2 * END scalapack-1.8.0/SRC/pclatra.f0000640000175000017500000001544510363532303015551 0ustar muammarmuammar COMPLEX FUNCTION PCLATRA( N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLATRA computes the trace of an N-by-N distributed matrix sub( A ) * denoting A( IA:IA+N-1, JA:JA+N-1 ). The result is left on every * process of the grid. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix the trace * is to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER ICURCOL, ICURROW, II, IOFFA, J, JB, JJ, JN, $ LDA, LL, MYCOL, MYROW, NPCOL, NPROW COMPLEX TRACE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGSUM2D, INFOG2L * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * TRACE = ZERO IF( N.EQ.0 ) THEN PCLATRA = TRACE RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ ICURROW, ICURCOL ) * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first diagonal block separately * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over the remaining block of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * CALL CGSUM2D( DESCA( CTXT_ ), 'All', ' ', 1, 1, TRACE, 1, -1, $ MYCOL ) * PCLATRA = TRACE * RETURN * * End of PCLATRA * END scalapack-1.8.0/SRC/pzunmr2.f0000640000175000017500000003534110363532303015535 0ustar muammarmuammar SUBROUTINE PZUNMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMR2 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IROFFC, LCM, LCMP, LWMIN, MI, $ MPC0, MYCOL, MYROW, NI, NPCOL, NPROW, NQ, NQC0 COMPLEX*16 AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZELSET2, PZLACGV, PZLARF, PZLARFC * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic:ic+m-k+i-ia,jc:jc+n-1) * MI = M - K + I - IA + 1 ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc:jc+n-k+i-ia+1) * NI = N - K + I - IA + 1 END IF * * Apply H(i) or H(i)' * CALL PZLACGV( NQ-K+I-IA, A, I, JA, DESCA, DESCA( M_ ) ) CALL PZELSET2( AII, A, I, JA+NQ-K+I-IA, DESCA, ONE ) IF( NOTRAN ) THEN CALL PZLARFC( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) ELSE CALL PZLARF( SIDE, MI, NI, A, I, JA, DESCA, DESCA( M_ ), $ TAU, C, IC, JC, DESCC, WORK ) END IF CALL PZELSET( A, I, JA+NQ-K+I-IA, DESCA, AII ) CALL PZLACGV( NQ-K+I-IA, A, I, JA, DESCA, DESCA( M_ ) ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMR2 * END scalapack-1.8.0/SRC/pblas.h0000640000175000017500000010504510604315156015225 0ustar muammarmuammar/* --------------------------------------------------------------------- * * -- ScaLAPACK routine (version 1.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 17, 1996 * * --------------------------------------------------------------------- */ /* * This file includes the standard C libraries, as well as system * dependent include files. All PBLAS routines include this file. */ /* * ======================================================================== * Machine Specific PBLAS macros * ======================================================================== */ #define _HAL_ 0 #define _T3D_ 1 #ifdef T3D #define _MACH_ _T3D_ #endif #ifndef _MACH_ #define _MACH_ _HAL_ #endif /* * ======================================================================== * Include files * ======================================================================== */ #include #include #if( _MACH_ == _T3D_ ) #include #endif /* * ======================================================================== * FORTRAN <-> C interface * ======================================================================== * * These macros define how the PBLAS will be called. _F2C_ADD_ assumes * that they will be called by FORTRAN, which expects C routines to have * an underscore postfixed to the name (Suns, and Intel machines expect * this). _F2C_NOCHANGE indicates that FORTRAN will be calling, and that * it expects the name called by FORTRAN to be identical to that compiled * by the C (RS6K's do this). _F2C_UPCASE says it expects C routines * called by FORTRAN to be in all upcase (CRAY wants this). * _F2C_F77ISF2C indicates that the fortran "compiler" in use is * actually f2c, a FORTRAN to C converter. */ #define _F2C_ADD_ 0 #define _F2C_NOCHANGE 1 #define _F2C_UPCASE 2 #define _F2C_F77ISF2C 3 #ifdef UpCase #define _F2C_CALL_ _F2C_UPCASE #endif #ifdef NoChange #define _F2C_CALL_ _F2C_NOCHANGE #endif #ifdef Add_ #define _F2C_CALL_ _F2C_ADD_ #endif #ifdef f77IsF2C #define _F2C_CALL_ _F2C_F77ISF2C #endif #ifndef _F2C_CALL_ #define _F2C_CALL_ _F2C_ADD_ #endif /* * ======================================================================== * TYPE DEFINITIONS AND CONVERSION UTILITIES * ======================================================================== */ typedef struct { float re, im; } complex; typedef struct { double re, im; } complex16; #if( _MACH_ == _T3D_ ) #define float double /* Type of character argument in a FORTRAN call */ #define F_CHAR _fcd /* Character conversion utilities */ #define F2C_CHAR(a) ( _fcdtocp( (a) ) ) #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) /* Type of FORTRAN functions */ #define F_VOID_FCT void fortran /* Subroutine */ #define F_INTG_FCT int fortran /* INTEGER function */ #define F_DBLE_FCT double fortran /* DOUBLE PRECISION function */ #else /* Type of character argument in a FORTRAN call */ typedef char * F_CHAR; /* Character conversion utilities */ #define F2C_CHAR(a) (a) #define C2F_CHAR(a) (a) /* Type of FORTRAN functions */ #define F_VOID_FCT void /* Subroutine */ #define F_INTG_FCT int /* INTEGER function */ #define F_DBLE_FCT double /* DOUBLE PRECISION function */ #endif /* * ======================================================================== * #DEFINE MACRO CONSTANTS * ======================================================================== */ #define DLEN_ 9 /* Length of a descriptor */ #define DT_ 0 /* Descriptor Type */ #define CTXT_ 1 /* BLACS context */ #define M_ 2 /* Global Number of Rows */ #define N_ 3 /* Global Number of Columns */ #define MB_ 4 /* Row Blocking Size */ #define NB_ 5 /* Column Blocking Size */ #define RSRC_ 6 /* Starting Processor Row */ #define CSRC_ 7 /* Starting Processor Column */ #define LLD_ 8 /* Local Leading Dimension */ /* * Descriptor types */ #define BLOCK_CYCLIC_2D 1 #define BLOCK_CYCLIC_INB_2D 2 #define BROADCAST "B" /* Blacs operation definitions */ #define COMBINE "C" #define ALL "A" /* Scope definitions */ #define COLUMN "C" #define ROW "R" #define TOPDEF " " /* Default BLACS topology, PB-BLAS routines */ #define CTOPDEF ' ' #define TOPGET "!" #define YES "Y" #define NO "N" #define MULLENFAC 2 #define ONE 1.0 #define ZERO 0.0 /* * ======================================================================== * PREPROCESSOR MACRO FUNCTIONS USED FOR OPTIMIZATION & CONVENIENCE * ======================================================================== */ #define ABS(a) (((a) < 0) ? -(a) : (a)) #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define CEIL(a,b) ( ((a)+(b)-1) / (b) ) #define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) ) #define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) ) #define INDXG2L( iglob, nb, iproc, isrcproc, nprocs )\ ( (nb) * ( ( (iglob)-1) / ( (nb) * (nprocs) ) ) +\ ( ( (iglob) - 1 ) % (nb) ) + 1 ) #define INDXL2G( iloc, nb, iproc, isrcproc, nprocs )\ ( (nprocs) * (nb) * ( ( (iloc) - 1 ) / (nb) ) +\ ( ( (iloc) - 1 ) % (nb) ) +\ ( ( (nprocs) + (iproc) - (isrcproc) ) % (nprocs) ) * (nb) + 1 ) #define INDXG2P( iglob, nb, iproc, isrcproc, nprocs ) \ ( ( (isrcproc) + ( (iglob) - 1 ) / (nb) ) % (nprocs) ) #define MYROC0( nblocks, n, nb, nprocs )\ ( ( (nblocks) % (nprocs) ) ? ( ( (nblocks) / (nprocs) ) * (nb) + (nb) )\ : ( ( (nblocks) / (nprocs) )* (nb) + ( (n) % (nb) ) ) ) #if( _F2C_CALL_ == _F2C_ADD_ ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine (which is what the PBLAS are written in). * No redefinition necessary to have following FORTRAN to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm_(...) * * This is the default. */ #endif #if( _F2C_CALL_ == _F2C_UPCASE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine (which is what the PBLAS are written in) * following FORTRAN to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void PDGEMM(...) */ /* TOOLS */ #define ilcm_ ILCM #define infog2l_ INFOG2L #define numroc_ NUMROC #define pstreecomb_ PSTREECOMB #define pdtreecomb_ PDTREECOMB #define pctreecomb_ PCTREECOMB #define pztreecomb_ PZTREECOMB #define scombamax_ SCOMBAMAX #define dcombamax_ DCOMBAMAX #define ccombamax_ CCOMBAMAX #define zcombamax_ ZCOMBAMAX #define scombnrm2_ SCOMBNRM2 #define dcombnrm2_ DCOMBNRM2 /* BLACS */ #define blacs_abort_ BLACS_ABORT #define blacs_gridinfo_ BLACS_GRIDINFO #define igesd2d_ IGESD2D #define igebs2d_ IGEBS2D #define itrsd2d_ ITRSD2D #define itrbs2d_ ITRBS2D #define igerv2d_ IGERV2D #define igebr2d_ IGEBR2D #define itrrv2d_ ITRRV2D #define itrbr2d_ ITRBR2D #define igamx2d_ IGAMX2D #define igamn2d_ IGAMN2D #define igsum2d_ IGSUM2D #define sgesd2d_ SGESD2D #define sgebs2d_ SGEBS2D #define strsd2d_ STRSD2D #define strbs2d_ STRBS2D #define sgerv2d_ SGERV2D #define sgebr2d_ SGEBR2D #define strrv2d_ STRRV2D #define strbr2d_ STRBR2D #define sgamx2d_ SGAMX2D #define sgamn2d_ SGAMN2D #define sgsum2d_ SGSUM2D #define dgesd2d_ DGESD2D #define dgebs2d_ DGEBS2D #define dtrsd2d_ DTRSD2D #define dtrbs2d_ DTRBS2D #define dgerv2d_ DGERV2D #define dgebr2d_ DGEBR2D #define dtrrv2d_ DTRRV2D #define dtrbr2d_ DTRBR2D #define dgamx2d_ DGAMX2D #define dgamn2d_ DGAMN2D #define dgsum2d_ DGSUM2D #define cgesd2d_ CGESD2D #define cgebs2d_ CGEBS2D #define ctrsd2d_ CTRSD2D #define ctrbs2d_ CTRBS2D #define cgerv2d_ CGERV2D #define cgebr2d_ CGEBR2D #define ctrrv2d_ CTRRV2D #define ctrbr2d_ CTRBR2D #define cgamx2d_ CGAMX2D #define cgamn2d_ CGAMN2D #define cgsum2d_ CGSUM2D #define zgesd2d_ ZGESD2D #define zgebs2d_ ZGEBS2D #define ztrsd2d_ ZTRSD2D #define ztrbs2d_ ZTRBS2D #define zgerv2d_ ZGERV2D #define zgebr2d_ ZGEBR2D #define ztrrv2d_ ZTRRV2D #define ztrbr2d_ ZTRBR2D #define zgamx2d_ ZGAMX2D #define zgamn2d_ ZGAMN2D #define zgsum2d_ ZGSUM2D /* Level-1 BLAS */ #define srotg_ SROTG #define srotmg_ SROTMG #define srot_ SROT #define srotm_ SROTM #define sswap_ SSWAP #define sscal_ SSCAL #define scopy_ SCOPY #define saxpy_ SAXPY #define ssdot_ SSDOT #define isamax_ ISAMAX #define drotg_ DROTG #define drotmg_ DROTMG #define drot_ DROT #define drotm_ DROTM #define dswap_ DSWAP #define dscal_ DSCAL #define dcopy_ DCOPY #define daxpy_ DAXPY #define dddot_ DDDOT #define dnrm2_ DNRM2 #define dsnrm2_ DSNRM2 #define dasum_ DASUM #define dsasum_ DSASUM #define idamax_ IDAMAX #define cswap_ CSWAP #define cscal_ CSCAL #define csscal_ CSSCAL #define ccopy_ CCOPY #define caxpy_ CAXPY #define ccdotu_ CCDOTU #define ccdotc_ CCDOTC #define icamax_ ICAMAX #define zswap_ ZSWAP #define zscal_ ZSCAL #define zdscal_ ZDSCAL #define zcopy_ ZCOPY #define zaxpy_ ZAXPY #define zzdotu_ ZZDOTU #define zzdotc_ ZZDOTC #define dscnrm2_ DSCNRM2 #define dznrm2_ DZNRM2 #define dscasum_ DSCASUM #define dzasum_ DZASUM #define izamax_ IZAMAX /* Level-2 BLAS */ #define sgemv_ SGEMV #define ssymv_ SSYMV #define strmv_ STRMV #define strsv_ STRSV #define sger_ SGER #define ssyr_ SSYR #define ssyr2_ SSYR2 #define dgemv_ DGEMV #define dsymv_ DSYMV #define dtrmv_ DTRMV #define dtrsv_ DTRSV #define dger_ DGER #define dsyr_ DSYR #define dsyr2_ DSYR2 #define cgemv_ CGEMV #define chemv_ CHEMV #define ctrmv_ CTRMV #define ctrsv_ CTRSV #define cgeru_ CGERU #define cgerc_ CGERC #define cher_ CHER #define cher2_ CHER2 #define zgemv_ ZGEMV #define zhemv_ ZHEMV #define ztrmv_ ZTRMV #define ztrsv_ ZTRSV #define zgeru_ ZGERU #define zgerc_ ZGERC #define zher_ ZHER #define zher2_ ZHER2 /* Level-3 BLAS */ #define sgemm_ SGEMM #define ssymm_ SSYMM #define ssyrk_ SSYRK #define ssyr2k_ SSYR2K #define strmm_ STRMM #define strsm_ STRSM #define dgemm_ DGEMM #define dsymm_ DSYMM #define dsyrk_ DSYRK #define dsyr2k_ DSYR2K #define dtrmm_ DTRMM #define dtrsm_ DTRSM #define cgemm_ CGEMM #define chemm_ CHEMM #define csymm_ CSYMM #define csyrk_ CSYRK #define cherk_ CHERK #define csyr2k_ CSYR2K #define cher2k_ CHER2K #define ctrmm_ CTRMM #define ctrsm_ CTRSM #define zgemm_ ZGEMM #define zhemm_ ZHEMM #define zsymm_ ZSYMM #define zsyrk_ ZSYRK #define zherk_ ZHERK #define zsyr2k_ ZSYR2K #define zher2k_ ZHER2K #define ztrmm_ ZTRMM #define ztrsm_ ZTRSM /* absolute value auxiliary PBLAS */ #define psatrmv_ PSATRMV #define pdatrmv_ PDATRMV #define pcatrmv_ PCATRMV #define pzatrmv_ PZATRMV #define psagemv_ PSAGEMV #define pdagemv_ PDAGEMV #define pcagemv_ PCAGEMV #define pzagemv_ PZAGEMV #define psasymv_ PSASYMV #define pdasymv_ PDASYMV #define pcahemv_ PCAHEMV #define pzahemv_ PZAHEMV /* Auxiliary PB-BLAS */ #define pbcmatadd_ PBCMATADD #define pbdmatadd_ PBDMATADD #define pbsmatadd_ PBSMATADD #define pbzmatadd_ PBZMATADD /* Level-2 PBBLAS */ #define pbcgemv_ PBCGEMV #define pbcgeru_ PBCGERU #define pbcgerc_ PBCGERC #define pbchemv_ PBCHEMV #define pbcher_ PBCHER #define pbcher2_ PBCHER2 #define pbctrmv_ PBCTRMV #define pbctrnv_ PBCTRNV #define pbctrsv_ PBCTRSV #define pbdgemv_ PBDGEMV #define pbdger_ PBDGER #define pbdsymv_ PBDSYMV #define pbdsyr_ PBDSYR #define pbdsyr2_ PBDSYR2 #define pbdtrmv_ PBDTRMV #define pbdtrnv_ PBDTRNV #define pbdtrsv_ PBDTRSV #define pbsgemv_ PBSGEMV #define pbsger_ PBSGER #define pbssymv_ PBSSYMV #define pbssyr_ PBSSYR #define pbssyr2_ PBSSYR2 #define pbstrmv_ PBSTRMV #define pbstrnv_ PBSTRNV #define pbstrsv_ PBSTRSV #define pbzgemv_ PBZGEMV #define pbzgeru_ PBZGERU #define pbzgerc_ PBZGERC #define pbzhemv_ PBZHEMV #define pbzher_ PBZHER #define pbzher2_ PBZHER2 #define pbztrmv_ PBZTRMV #define pbztrnv_ PBZTRNV #define pbztrsv_ PBZTRSV /* Level-3 PBBLAS */ #define pbcgemm_ PBCGEMM #define pbchemm_ PBCHEMM #define pbcher2k_ PBCHER2K #define pbcherk_ PBCHERK #define pbcsymm_ PBCSYMM #define pbcsyr2k_ PBCSYR2K #define pbcsyrk_ PBCSYRK #define pbctrmm_ PBCTRMM #define pbctrsm_ PBCTRSM #define pbctran_ PBCTRAN #define pbdgemm_ PBDGEMM #define pbdsymm_ PBDSYMM #define pbdsyr2k_ PBDSYR2K #define pbdsyrk_ PBDSYRK #define pbdtrmm_ PBDTRMM #define pbdtrsm_ PBDTRSM #define pbdtran_ PBDTRAN #define pbsgemm_ PBSGEMM #define pbssymm_ PBSSYMM #define pbssyr2k_ PBSSYR2K #define pbssyrk_ PBSSYRK #define pbstrmm_ PBSTRMM #define pbstrsm_ PBSTRSM #define pbstran_ PBSTRAN #define pbzgemm_ PBZGEMM #define pbzhemm_ PBZHEMM #define pbzher2k_ PBZHER2K #define pbzherk_ PBZHERK #define pbzsymm_ PBZSYMM #define pbzsyr2k_ PBZSYR2K #define pbzsyrk_ PBZSYRK #define pbztrmm_ PBZTRMM #define pbztrsm_ PBZTRSM #define pbztran_ PBZTRAN /* Auxilliary PBLAS */ #define pberror_ PBERROR #define pb_freebuf_ PB_FREEBUF #define pb_topget_ PB_TOPGET #define pb_topset_ PB_TOPSET /* Level-1 PBLAS */ #define psrotg_ PSROTG #define psrotmg_ PSROTMG #define psrot_ PSROT #define psrotm_ PSROTM #define psswap_ PSSWAP #define psscal_ PSSCAL #define pscopy_ PSCOPY #define psaxpy_ PSAXPY #define psdot_ PSDOT #define psnrm2_ PSNRM2 #define psasum_ PSASUM #define psamax_ PSAMAX #define pdrotg_ PDROTG #define pdrotmg_ PDROTMG #define pdrot_ PDROT #define pdrotm_ PDROTM #define pdswap_ PDSWAP #define pdscal_ PDSCAL #define pdcopy_ PDCOPY #define pdaxpy_ PDAXPY #define pddot_ PDDOT #define pdnrm2_ PDNRM2 #define pdasum_ PDASUM #define pdamax_ PDAMAX #define pcswap_ PCSWAP #define pcscal_ PCSCAL #define pcsscal_ PCSSCAL #define pccopy_ PCCOPY #define pcaxpy_ PCAXPY #define pcdotu_ PCDOTU #define pcdotc_ PCDOTC #define pscnrm2_ PSCNRM2 #define pscasum_ PSCASUM #define pcamax_ PCAMAX #define pcrot_ PCROT #define pzswap_ PZSWAP #define pzscal_ PZSCAL #define pzdscal_ PZDSCAL #define pzcopy_ PZCOPY #define pzaxpy_ PZAXPY #define pzdotu_ PZDOTU #define pzdotc_ PZDOTC #define pdznrm2_ PDZNRM2 #define pdzasum_ PDZASUM #define pzamax_ PZAMAX #define pzrot_ PZROT /* Level-2 PBLAS */ #define pcgemv_ PCGEMV #define pcgeru_ PCGERU #define pcgerc_ PCGERC #define pchemv_ PCHEMV #define pcher_ PCHER #define pcher2_ PCHER2 #define pctrmv_ PCTRMV #define pctrsv_ PCTRSV #define pdgemv_ PDGEMV #define pdger_ PDGER #define pdsymv_ PDSYMV #define pdsyr_ PDSYR #define pdsyr2_ PDSYR2 #define pdtrmv_ PDTRMV #define pdtrsv_ PDTRSV #define psgemv_ PSGEMV #define psger_ PSGER #define pssymv_ PSSYMV #define pssyr_ PSSYR #define pssyr2_ PSSYR2 #define pstrmv_ PSTRMV #define pstrsv_ PSTRSV #define pzgemv_ PZGEMV #define pzgeru_ PZGERU #define pzgerc_ PZGERC #define pzhemv_ PZHEMV #define pzher_ PZHER #define pzher2_ PZHER2 #define pztrmv_ PZTRMV #define pztrsv_ PZTRSV /* Level-3 PBLAS */ #define pcgemm_ PCGEMM #define pchemm_ PCHEMM #define pcher2k_ PCHER2K #define pcherk_ PCHERK #define pcsymm_ PCSYMM #define pcsyr2k_ PCSYR2K #define pcsyrk_ PCSYRK #define pctrmm_ PCTRMM #define pctrsm_ PCTRSM #define pctranu_ PCTRANU #define pctranc_ PCTRANC #define pdgemm_ PDGEMM #define pdsymm_ PDSYMM #define pdsyr2k_ PDSYR2K #define pdsyrk_ PDSYRK #define pdtrmm_ PDTRMM #define pdtrsm_ PDTRSM #define pdtran_ PDTRAN #define psgemm_ PSGEMM #define pssymm_ PSSYMM #define pssyr2k_ PSSYR2K #define pssyrk_ PSSYRK #define pstrmm_ PSTRMM #define pstrsm_ PSTRSM #define pstran_ PSTRAN #define pzgemm_ PZGEMM #define pzhemm_ PZHEMM #define pzher2k_ PZHER2K #define pzherk_ PZHERK #define pzsymm_ PZSYMM #define pzsyr2k_ PZSYR2K #define pzsyrk_ PZSYRK #define pztrmm_ PZTRMM #define pztrsm_ PZTRSM #define pztranu_ PZTRANU #define pztranc_ PZTRANC #endif #if( _F2C_CALL_ == _F2C_NOCHANGE ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine (which is what the PBLAS are written in) * for following FORTRAN to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm(...) */ /* TOOLS */ #define ilcm_ ilcm #define infog2l_ infog2l #define numroc_ numroc #define pstreecomb_ pstreecomb #define pdtreecomb_ pdtreecomb #define pctreecomb_ pctreecomb #define pztreecomb_ pztreecomb #define scombamax_ scombamax #define dcombamax_ dcombamax #define ccombamax_ ccombamax #define zcombamax_ zcombamax #define scombnrm2_ scombnrm2 #define dcombnrm2_ dcombnrm2 /* BLACS */ #define blacs_abort_ blacs_abort #define blacs_gridinfo_ blacs_gridinfo #define igesd2d_ igesd2d #define igebs2d_ igebs2d #define itrsd2d_ itrsd2d #define itrbs2d_ itrbs2d #define igerv2d_ igerv2d #define igebr2d_ igebr2d #define itrrv2d_ itrrv2d #define itrbr2d_ itrbr2d #define igamx2d_ igamx2d #define igamn2d_ igamn2d #define igsum2d_ igsum2d #define sgesd2d_ sgesd2d #define sgebs2d_ sgebs2d #define strsd2d_ strsd2d #define strbs2d_ strbs2d #define sgerv2d_ sgerv2d #define sgebr2d_ sgebr2d #define strrv2d_ strrv2d #define strbr2d_ strbr2d #define sgamx2d_ sgamx2d #define sgamn2d_ sgamn2d #define sgsum2d_ sgsum2d #define dgesd2d_ dgesd2d #define dgebs2d_ dgebs2d #define dtrsd2d_ dtrsd2d #define dtrbs2d_ dtrbs2d #define dgerv2d_ dgerv2d #define dgebr2d_ dgebr2d #define dtrrv2d_ dtrrv2d #define dtrbr2d_ dtrbr2d #define dgamx2d_ dgamx2d #define dgamn2d_ dgamn2d #define dgsum2d_ dgsum2d #define cgesd2d_ cgesd2d #define cgebs2d_ cgebs2d #define ctrsd2d_ ctrsd2d #define ctrbs2d_ ctrbs2d #define cgerv2d_ cgerv2d #define cgebr2d_ cgebr2d #define ctrrv2d_ ctrrv2d #define ctrbr2d_ ctrbr2d #define cgamx2d_ cgamx2d #define cgamn2d_ cgamn2d #define cgsum2d_ cgsum2d #define zgesd2d_ zgesd2d #define zgebs2d_ zgebs2d #define ztrsd2d_ ztrsd2d #define ztrbs2d_ ztrbs2d #define zgerv2d_ zgerv2d #define zgebr2d_ zgebr2d #define ztrrv2d_ ztrrv2d #define ztrbr2d_ ztrbr2d #define zgamx2d_ zgamx2d #define zgamn2d_ zgamn2d #define zgsum2d_ zgsum2d /* Level-1 BLAS */ #define srotg_ srotg #define srotmg_ srotmg #define srot_ srot #define srotm_ srotm #define sswap_ sswap #define sscal_ sscal #define scopy_ scopy #define saxpy_ saxpy #define ssdot_ ssdot #define isamax_ isamax #define drotg_ drotg #define drotmg_ drotmg #define drot_ drot #define drotm_ drotm #define dswap_ dswap #define dscal_ dscal #define dcopy_ dcopy #define daxpy_ daxpy #define dddot_ dddot #define dnrm2_ dnrm2 #define dsnrm2_ dsnrm2 #define dasum_ dasum #define dsasum_ dsasum #define idamax_ idamax #define cswap_ cswap #define cscal_ cscal #define csscal_ csscal #define ccopy_ ccopy #define caxpy_ caxpy #define ccdotu_ ccdotu #define ccdotc_ ccdotc #define icamax_ icamax #define zswap_ zswap #define zscal_ zscal #define zdscal_ zdscal #define zcopy_ zcopy #define zaxpy_ zaxpy #define zzdotu_ zzdotu #define zzdotc_ zzdotc #define dscnrm2_ dscnrm2 #define dznrm2_ dznrm2 #define dscasum_ dscasum #define dzasum_ dzasum #define izamax_ izamax /* Level-2 BLAS */ #define sgemv_ sgemv #define ssymv_ ssymv #define strmv_ strmv #define strsv_ strsv #define sger_ sger #define ssyr_ ssyr #define ssyr2_ ssyr2 #define dgemv_ dgemv #define dsymv_ dsymv #define dtrmv_ dtrmv #define dtrsv_ dtrsv #define dger_ dger #define dsyr_ dsyr #define dsyr2_ dsyr2 #define cgemv_ cgemv #define chemv_ chemv #define ctrmv_ ctrmv #define ctrsv_ ctrsv #define cgeru_ cgeru #define cgerc_ cgerc #define cher_ cher #define cher2_ cher2 #define zgemv_ zgemv #define zhemv_ zhemv #define ztrmv_ ztrmv #define ztrsv_ ztrsv #define zgeru_ zgeru #define zgerc_ zgerc #define zher_ zher #define zher2_ zher2 /* Level-3 BLAS */ #define sgemm_ sgemm #define ssymm_ ssymm #define ssyrk_ ssyrk #define ssyr2k_ ssyr2k #define strmm_ strmm #define strsm_ strsm #define dgemm_ dgemm #define dsymm_ dsymm #define dsyrk_ dsyrk #define dsyr2k_ dsyr2k #define dtrmm_ dtrmm #define dtrsm_ dtrsm #define cgemm_ cgemm #define chemm_ chemm #define csymm_ csymm #define csyrk_ csyrk #define cherk_ cherk #define csyr2k_ csyr2k #define cher2k_ cher2k #define ctrmm_ ctrmm #define ctrsm_ ctrsm #define zgemm_ zgemm #define zhemm_ zhemm #define zsymm_ zsymm #define zsyrk_ zsyrk #define zherk_ zherk #define zsyr2k_ zsyr2k #define zher2k_ zher2k #define ztrmm_ ztrmm #define ztrsm_ ztrsm /* absolute value auxiliary PBLAS */ #define psatrmv_ psatrmv #define pdatrmv_ pdatrmv #define pcatrmv_ pcatrmv #define pzatrmv_ pzatrmv #define psagemv_ psagemv #define pdagemv_ pdagemv #define pcagemv_ pcagemv #define pzagemv_ pzagemv #define psasymv_ psasymv #define pdasymv_ pdasymv #define pcahemv_ pcahemv #define pzahemv_ pzahemv /* Auxiliary PB-BLAS */ #define pbcmatadd_ pbcmatadd #define pbdmatadd_ pbdmatadd #define pbsmatadd_ pbsmatadd #define pbzmatadd_ pbzmatadd /* Level-2 PBBLAS */ #define pbcgemv_ pbcgemv #define pbcgeru_ pbcgeru #define pbcgerc_ pbcgerc #define pbchemv_ pbchemv #define pbcher_ pbcher #define pbcher2_ pbcher2 #define pbctrmv_ pbctrmv #define pbctrnv_ pbctrnv #define pbctrsv_ pbctrsv #define pbdgemv_ pbdgemv #define pbdger_ pbdger #define pbdsymv_ pbdsymv #define pbdsyr_ pbdsyr #define pbdsyr2_ pbdsyr2 #define pbdtrmv_ pbdtrmv #define pbdtrnv_ pbdtrnv #define pbdtrsv_ pbdtrsv #define pbsgemv_ pbsgemv #define pbsger_ pbsger #define pbssymv_ pbssymv #define pbssyr_ pbssyr #define pbssyr2_ pbssyr2 #define pbstrmv_ pbstrmv #define pbstrnv_ pbstrnv #define pbstrsv_ pbstrsv #define pbzgemv_ pbzgemv #define pbzgeru_ pbzgeru #define pbzgerc_ pbzgerc #define pbzhemv_ pbzhemv #define pbzher_ pbzher #define pbzher2_ pbzher2 #define pbztrmv_ pbztrmv #define pbztrnv_ pbztrnv #define pbztrsv_ pbztrsv /* Level-3 PBBLAS */ #define pbcgemm_ pbcgemm #define pbchemm_ pbchemm #define pbcher2k_ pbcher2k #define pbcherk_ pbcherk #define pbcsymm_ pbcsymm #define pbcsyr2k_ pbcsyr2k #define pbcsyrk_ pbcsyrk #define pbctrmm_ pbctrmm #define pbctrsm_ pbctrsm #define pbctran_ pbctran #define pbdgemm_ pbdgemm #define pbdsymm_ pbdsymm #define pbdsyr2k_ pbdsyr2k #define pbdsyrk_ pbdsyrk #define pbdtrmm_ pbdtrmm #define pbdtrsm_ pbdtrsm #define pbdtran_ pbdtran #define pbsgemm_ pbsgemm #define pbssymm_ pbssymm #define pbssyr2k_ pbssyr2k #define pbssyrk_ pbssyrk #define pbstrmm_ pbstrmm #define pbstrsm_ pbstrsm #define pbstran_ pbstran #define pbzgemm_ pbzgemm #define pbzhemm_ pbzhemm #define pbzher2k_ pbzher2k #define pbzherk_ pbzherk #define pbzsymm_ pbzsymm #define pbzsyr2k_ pbzsyr2k #define pbzsyrk_ pbzsyrk #define pbztrmm_ pbztrmm #define pbztrsm_ pbztrsm #define pbztran_ pbztran /* Auxilliary PBLAS */ #define pberror_ pberror #define pb_freebuf_ pb_freebuf #define pb_topget_ pb_topget #define pb_topset_ pb_topset /* Level-1 PBLAS */ #define psrotg_ psrotg #define psrotmg_ psrotmg #define psrot_ psrot #define psrotm_ psrotm #define psswap_ psswap #define psscal_ psscal #define pscopy_ pscopy #define psaxpy_ psaxpy #define psdot_ psdot #define psnrm2_ psnrm2 #define psasum_ psasum #define psamax_ psamax #define pdrotg_ pdrotg #define pdrotmg_ pdrotmg #define pdrot_ pdrot #define pdrotm_ pdrotm #define pdswap_ pdswap #define pdscal_ pdscal #define pdcopy_ pdcopy #define pdaxpy_ pdaxpy #define pddot_ pddot #define pdnrm2_ pdnrm2 #define pdasum_ pdasum #define pdamax_ pdamax #define pcswap_ pcswap #define pcscal_ pcscal #define pcsscal_ pcsscal #define pccopy_ pccopy #define pcaxpy_ pcaxpy #define pcdotu_ pcdotu #define pcdotc_ pcdotc #define pscnrm2_ pscnrm2 #define pscasum_ pscasum #define pcamax_ pcamax #define pcrot_ pcrot #define crot_ crot #define pzswap_ pzswap #define pzscal_ pzscal #define pzdscal_ pzdscal #define pzcopy_ pzcopy #define pzaxpy_ pzaxpy #define pzdotu_ pzdotu #define pzdotc_ pzdotc #define pdznrm2_ pdznrm2 #define pdzasum_ pdzasum #define pzamax_ pzamax #define pzrot_ pzrot #define zrot_ zrot /* Level-2 PBLAS */ #define pcgemv_ pcgemv #define pcgeru_ pcgeru #define pcgerc_ pcgerc #define pchemv_ pchemv #define pcher_ pcher #define pcher2_ pcher2 #define pctrmv_ pctrmv #define pctrsv_ pctrsv #define pdgemv_ pdgemv #define pdger_ pdger #define pdsymv_ pdsymv #define pdsyr_ pdsyr #define pdsyr2_ pdsyr2 #define pdtrmv_ pdtrmv #define pdtrsv_ pdtrsv #define psgemv_ psgemv #define psger_ psger #define pssymv_ pssymv #define pssyr_ pssyr #define pssyr2_ pssyr2 #define pstrmv_ pstrmv #define pstrsv_ pstrsv #define pzgemv_ pzgemv #define pzgeru_ pzgeru #define pzgerc_ pzgerc #define pzhemv_ pzhemv #define pzher_ pzher #define pzher2_ pzher2 #define pztrmv_ pztrmv #define pztrsv_ pztrsv /* Level-3 PBLAS */ #define pcgemm_ pcgemm #define pchemm_ pchemm #define pcher2k_ pcher2k #define pcherk_ pcherk #define pcsymm_ pcsymm #define pcsyr2k_ pcsyr2k #define pcsyrk_ pcsyrk #define pctrmm_ pctrmm #define pctrsm_ pctrsm #define pctranu_ pctranu #define pctranc_ pctranc #define pdgemm_ pdgemm #define pdsymm_ pdsymm #define pdsyr2k_ pdsyr2k #define pdsyrk_ pdsyrk #define pdtrmm_ pdtrmm #define pdtrsm_ pdtrsm #define pdtran_ pdtran #define psgemm_ psgemm #define pssymm_ pssymm #define pssyr2k_ pssyr2k #define pssyrk_ pssyrk #define pstrmm_ pstrmm #define pstrsm_ pstrsm #define pstran_ pstran #define pzgemm_ pzgemm #define pzhemm_ pzhemm #define pzher2k_ pzher2k #define pzherk_ pzherk #define pzsymm_ pzsymm #define pzsyr2k_ pzsyr2k #define pzsyrk_ pzsyrk #define pztrmm_ pztrmm #define pztrsm_ pztrsm #define pztranu_ pztranu #define pztranc_ pztranc #endif #if( _F2C_CALL_ == _F2C_F77ISF2C ) /* * These defines set up the naming scheme required to have a FORTRAN * routine call a C routine (which is what the PBLAS are written in) * for systems where the fortran "compiler" is actually f2c (a Fortran * to C conversion utility). */ /* * Initialization routines */ #define blacs_pinfo_ blacs_pinfo__ #define blacs_setup_ blacs_setup__ #define blacs_set_ blacs_set__ #define blacs_get_ blacs_get__ #define blacs_gridinit_ blacs_gridinit__ #define blacs_gridmap_ blacs_gridmap__ /* * Destruction routines */ #define blacs_freebuff_ blacs_freebuff__ #define blacs_gridexit_ blacs_gridexit__ #define blacs_abort_ blacs_abort__ #define blacs_exit_ blacs_exit__ /* * Informational & misc. */ #define blacs_gridinfo_ blacs_gridinfo__ #define blacs_pnum_ blacs_pnum__ #define blacs_pcoord_ blacs_pcoord__ #define blacs_barrier_ blacs_barrier__ #endif scalapack-1.8.0/SRC/pdorml2.f0000640000175000017500000003522310363532303015476 0ustar muammarmuammar SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) DOUBLE PRECISION A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORML2 overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PDGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PDGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JCC, LCM, LCMP, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDELSET2, PDLARF, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K -1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(i:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * CALL PDELSET2( AII, A, I, JA+I-IA, DESCA, ONE ) CALL PDLARF( SIDE, MI, NI, A, I, JA+I-IA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) CALL PDELSET( A, I, JA+I-IA, DESCA, AII ) * 10 CONTINUE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORML2 * END scalapack-1.8.0/SRC/pdsytd2.f0000640000175000017500000004223110363532303015505 0ustar muammarmuammar SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDSYTD2 reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 3*N. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION HALF, ONE, ZERO PARAMETER ( HALF = 0.5D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J, $ JJ, JK, JN, LDA, LWMIN, MYCOL, MYROW, NPCOL, $ NPROW DOUBLE PRECISION ALPHA, TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, DAXPY, $ DGEBR2D, DGEBS2D, DLARFG, $ DSYMV, DSYR2, INFOG2L, PXERBLA * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DDOT EXTERNAL LSAME, DDOT * .. * .. Intrinsic Functions .. INTRINSIC DBLE * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) LWMIN = 3 * N * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) * IF( UPPER ) THEN * * Process(IAROW, IACOL) owns block to be reduced * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the upper triangle of sub( A ) * DO 10 J = N-1, 1, -1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA:IA+J-1,JA:JA+J-1) * CALL DLARFG( J, A( IK+JK*LDA ), A( II+JK*LDA ), 1, $ TAUI ) E( JK+1 ) = A( IK+JK*LDA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA:IA+J-1,JA:JA+J-1) * A( IK+JK*LDA ) = ONE * * Compute x := tau * A * v storing x in TAU(1:i) * CALL DSYMV( UPLO, J, TAUI, A( II+(JJ-1)*LDA ), $ LDA, A( II+JK*LDA ), 1, ZERO, $ TAU( JJ ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*DDOT( J, TAU( JJ ), 1, $ A( II+JK*LDA ), 1 ) CALL DAXPY( J, ALPHA, A( II+JK*LDA ), 1, $ TAU( JJ ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL DSYR2( UPLO, J, -ONE, A( II+JK*LDA ), 1, $ TAU( JJ ), 1, A( II+(JJ-1)*LDA ), $ LDA ) A( IK+JK*LDA ) = E( JK+1 ) END IF * * Copy D, E, TAU to broadcast them columnwise. * D( JK+1 ) = A( IK+1+JK*LDA ) WORK( J+1 ) = D( JK+1 ) WORK( N+J+1 ) = E( JK+1 ) TAU( JK+1 ) = TAUI WORK( 2*N+J+1 ) = TAU( JK+1 ) * 10 CONTINUE D( JJ ) = A( II+(JJ-1)*LDA ) WORK( 1 ) = D( JJ ) WORK( N+1 ) = ZERO WORK( 2*N+1 ) = ZERO * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1 ) * ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N, WORK, 1, $ IAROW, IACOL ) DO 20 J = 2, N JN = JJ + J - 1 D( JN ) = WORK( J ) E( JN ) = WORK( N+J ) TAU( JN ) = WORK( 2*N+J ) 20 CONTINUE D( JJ ) = WORK( 1 ) END IF END IF * ELSE * * Process (IAROW, IACOL) owns block to be factorized * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN * * Reduce the lower triangle of sub( A ) * DO 30 J = 1, N - 1 IK = II + J - 1 JK = JJ + J - 1 * * Generate elementary reflector H(i) = I - tau * v * v' * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1) * CALL DLARFG( N-J, A( IK+1+(JK-1)*LDA ), $ A( IK+2+(JK-1)*LDA ), 1, TAUI ) E( JK ) = A( IK+1+(JK-1)*LDA ) * IF( TAUI.NE.ZERO ) THEN * * Apply H(i) from both sides to * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1) * A( IK+1+(JK-1)*LDA ) = ONE * * Compute x := tau * A * v storing y in TAU(i:n-1) * CALL DSYMV( UPLO, N-J, TAUI, A( IK+1+JK*LDA ), $ LDA, A( IK+1+(JK-1)*LDA ), 1, $ ZERO, TAU( JK ), 1 ) * * Compute w := x - 1/2 * tau * (x'*v) * v * ALPHA = -HALF*TAUI*DDOT( N-J, TAU( JK ), 1, $ A( IK+1+(JK-1)*LDA ), 1 ) CALL DAXPY( N-J, ALPHA, A( IK+1+(JK-1)*LDA ), $ 1, TAU( JK ), 1 ) * * Apply the transformation as a rank-2 update: * A := A - v * w' - w * v' * CALL DSYR2( UPLO, N-J, -ONE, $ A( IK+1+(JK-1)*LDA ), 1, $ TAU( JK ), 1, A( IK+1+JK*LDA ), $ LDA ) A( IK+1+(JK-1)*LDA ) = E( JK ) END IF * * Copy D(JK), E(JK), TAU(JK) to broadcast them * columnwise. * D( JK ) = A( IK+(JK-1)*LDA ) WORK( J ) = D( JK ) WORK( N+J ) = E( JK ) TAU( JK ) = TAUI WORK( 2*N+J ) = TAU( JK ) 30 CONTINUE JN = JJ + N - 1 D( JN ) = A( II+N-1+(JN-1)*LDA ) WORK( N ) = D( JN ) TAU( JN ) = ZERO WORK( 2*N ) = ZERO * CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1 ) * ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 3*N-1, WORK, $ 1, IAROW, IACOL ) DO 40 J = 1, N - 1 JN = JJ + J - 1 D( JN ) = WORK( J ) E( JN ) = WORK( N+J ) TAU( JN ) = WORK( 2*N+J ) 40 CONTINUE JN = JJ + N - 1 D( JN ) = WORK( N ) TAU( JN ) = ZERO END IF END IF END IF * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDSYTD2 * END scalapack-1.8.0/SRC/psormtr.f0000640000175000017500000004036310363532303015626 0ustar muammarmuammar SUBROUTINE PSORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMTR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of nq-1 elementary reflectors, as returned by PSSYTRD: * * if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); * * if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(IA:*,JA:*) contains elementary * reflectors from PSSYTRD; * = 'L': Lower triangle of A(IA:*,JA:*) contains elementary * reflectors from PSSYTRD. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * or (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PSSYTRD. * If SIDE = 'L', LLD_A >= max(1,LOCr(IA+M-1)); * if SIDE = 'R', LLD_A >= max(1,LOCr(IA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL array, dimension LTAU, where * if SIDE = 'L' and UPLO = 'U', LTAU = LOCc(M_A), * if SIDE = 'L' and UPLO = 'L', LTAU = LOCc(JA+M-2), * if SIDE = 'R' and UPLO = 'U', LTAU = LOCc(N_A), * if SIDE = 'R' and UPLO = 'L', LTAU = LOCc(JA+N-2). * TAU(i) must contain the scalar factor of the elementary * reflector H(i), as returned by PSSYTRD. TAU is tied to the * distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * If UPLO = 'U', * IAA = IA, JAA = JA+1, ICC = IC, JCC = JC; * else UPLO = 'L', * IAA = IA+1, JAA = JA; * if SIDE = 'L', * ICC = IC+1; JCC = JC; * else * ICC = IC; JCC = JC+1; * end if * end if * * If SIDE = 'L', * MI = M-1; NI = N; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; MI = N-1; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN, UPPER INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSORMQL, $ PSORMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) * IF( UPPER ) THEN IAA = IA JAA = JA+1 ICC = IC JCC = JC ELSE IAA = IA+1 JAA = JA IF( LEFT ) THEN ICC = IC + 1 JCC = JC ELSE ICC = IC JCC = JC + 1 END IF END IF * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = M - 1 NI = N CALL CHK1MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, INFO ) ELSE NQ = N MI = M NI = N - 1 CALL CHK1MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, INFO ) END IF CALL CHK1MAT( MI, 4, NI, 5, ICC, JCC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -3 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'T' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMTR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) $ RETURN * IF( UPPER ) THEN * * Q was determined by a call to PSSYTRD with UPLO = 'U' * CALL PSORMQL( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * ELSE * * Q was determined by a call to PSSYTRD with UPLO = 'L' * CALL PSORMQR( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMTR * END scalapack-1.8.0/SRC/pzdtsv.f0000640000175000017500000004637710363532303015465 0ustar muammarmuammar SUBROUTINE PZDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 B( * ), D( * ), DL( * ), DU( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N complex * tridiagonal diagonally dominant-like distributed * matrix. * * Gaussian elimination without pivoting * is used to factor a reordering * of the matrix into L U. * * See PZDTTRF and PZDTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) COMPLEX*16 pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL+3*NB) * +max(10*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * diagonally dominant-like, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * stably factorable wo/interchanges, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PXERBLA, PZDTTRF, PZDTTRS * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PZDTTRF and PZDTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 6*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PZDTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL+3*NB) * * Factor the matrix * CALL PZDTTRF( N, DL, D, DU, JA, DESCA, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR ), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PZDTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PZDTTRS( 'N', N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZDTSV', -INFO ) RETURN END IF * RETURN * * End of PZDTSV * END scalapack-1.8.0/SRC/pdlange.f0000640000175000017500000002664410363532303015540 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLANGE returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * distributed matrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1). * * PDLANGE returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+M-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PDLANGE as described * above. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PDLANGE * is set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PDLANGE is set to zero. N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the distributed matrix sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IACOL, IAROW, ICTXT, II, ICOFF, IOFFA, $ IROFF, J, JJ, LDA, MP, MYCOL, MYROW, NPCOL, $ NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, DLASSQ, $ INFOG2L, PDTREECOMB * .. * .. External Functions .. LOGICAL LSAME INTEGER IDAMAX, NUMROC EXTERNAL LSAME, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO IF( NQ.GT.0 .AND. MP.GT.0 ) THEN IOFFA = (JJ-1)*LDA DO 20 J = JJ, JJ+NQ-1 DO 10 I = II, MP+II-1 VALUE = MAX( VALUE, ABS( A( IOFFA+I ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE END IF CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, J, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * * Find norm1( sub( A ) ). * IF( NQ.GT.0 ) THEN IOFFA = ( JJ - 1 ) * LDA DO 40 J = JJ, JJ+NQ-1 SUM = ZERO IF( MP.GT.0 ) THEN DO 30 I = II, MP+II-1 SUM = SUM + ABS( A( IOFFA+I ) ) 30 CONTINUE END IF IOFFA = IOFFA + LDA WORK( J-JJ+1 ) = SUM 40 CONTINUE END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, J, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * * Find normI( sub( A ) ). * IF( MP.GT.0 ) THEN IOFFA = II + ( JJ - 1 ) * LDA DO 60 I = II, II+MP-1 SUM = ZERO IF( NQ.GT.0 ) THEN DO 50 J = IOFFA, IOFFA + NQ*LDA - 1, LDA SUM = SUM + ABS( A( J ) ) 50 CONTINUE END IF WORK( I-II+1 ) = SUM IOFFA = IOFFA + 1 60 CONTINUE END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for supnorm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( IDAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, I, $ J, -1, 0, 0 ) END IF * ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE IOFFA = II + ( JJ - 1 ) * LDA IF( NQ.GT.0 ) THEN DO 70 J = IOFFA, IOFFA + NQ*LDA - 1, LDA CALL DLASSQ( MP, A( J ), 1, SCALE, SUM ) 70 CONTINUE END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PDLANGE = VALUE * RETURN * * End of PDLANGE * END scalapack-1.8.0/SRC/pzhegs2.f0000640000175000017500000003756110363532303015510 0ustar muammarmuammar* * SUBROUTINE PZHEGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZHEGS2 reduces a complex Hermitian-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PZPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PZPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, HALF PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IIA, IIB, IOFFA, IOFFB, IROFFA, IROFFB, $ JJA, JJB, K, LDA, LDB, MYCOL, MYROW, NPCOL, $ NPROW DOUBLE PRECISION AKK, BKK COMPLEX*16 CT * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, ZAXPY, ZDSCAL, ZHER2, ZLACGV, ZTRMV, $ ZTRSV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( N+ICOFFA.GT.DESCA( NB_ ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHEGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) LDB = DESCB( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * IF( IBTYPE.EQ.1 ) THEN * IF( UPPER ) THEN * IOFFA = IIA + JJA*LDA IOFFB = IIB + JJB*LDB * * Compute inv(U')*sub( A )*inv(U) * DO 10 K = 1, N * * Update the upper triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = DBLE( A( IOFFA-LDA ) ) BKK = DBLE( B( IOFFB-LDB ) ) AKK = AKK / BKK**2 A( IOFFA-LDA ) = AKK IF( K.LT.N ) THEN CALL ZDSCAL( N-K, ONE / BKK, A( IOFFA ), LDA ) CT = -HALF*AKK CALL ZLACGV( N-K, A( IOFFA ), LDA ) CALL ZLACGV( N-K, B( IOFFB ), LDB ) CALL ZAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL ZHER2( UPLO, N-K, -CONE, A( IOFFA ), LDA, $ B( IOFFB ), LDB, A( IOFFA+1 ), LDA ) CALL ZAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL ZLACGV( N-K, B( IOFFB ), LDB ) CALL ZTRSV( UPLO, 'Conjugate transpose', 'Non-unit', $ N-K, B( IOFFB+1 ), LDB, A( IOFFA ), LDA ) CALL ZLACGV( N-K, A( IOFFA ), LDA ) END IF * * A( IOFFA ) -> A( K, K+1 ) * B( IOFFB ) -> B( K, K+1 ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 10 CONTINUE * ELSE * IOFFA = IIA + 1 + ( JJA-1 )*LDA IOFFB = IIB + 1 + ( JJB-1 )*LDB * * Compute inv(L)*sub( A )*inv(L') * DO 20 K = 1, N * * Update the lower triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = DBLE( A( IOFFA-1 ) ) BKK = DBLE( B( IOFFB-1 ) ) AKK = AKK / BKK**2 A( IOFFA-1 ) = AKK * IF( K.LT.N ) THEN CALL ZDSCAL( N-K, ONE / BKK, A( IOFFA ), 1 ) CT = -HALF*AKK CALL ZAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL ZHER2( UPLO, N-K, -CONE, A( IOFFA ), 1, $ B( IOFFB ), 1, A( IOFFA+LDA ), LDA ) CALL ZAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL ZTRSV( UPLO, 'No transpose', 'Non-unit', N-K, $ B( IOFFB+LDB ), LDB, A( IOFFA ), 1 ) END IF * * A( IOFFA ) -> A( K+1, K ) * B( IOFFB ) -> B( K+1, K ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 20 CONTINUE * END IF * ELSE * IF( UPPER ) THEN * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute U*sub( A )*U' * DO 30 K = 1, N * * Update the upper triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = DBLE( A( IOFFA+K-1 ) ) BKK = DBLE( B( IOFFB+K-1 ) ) CALL ZTRMV( UPLO, 'No transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), 1 ) CT = HALF*AKK CALL ZAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL ZHER2( UPLO, K-1, CONE, A( IOFFA ), 1, B( IOFFB ), $ 1, A( IIA+( JJA-1 )*LDA ), LDA ) CALL ZAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL ZDSCAL( K-1, BKK, A( IOFFA ), 1 ) A( IOFFA+K-1 ) = AKK*BKK**2 * * A( IOFFA ) -> A( 1, K ) * B( IOFFB ) -> B( 1, K ) * IOFFA = IOFFA + LDA IOFFB = IOFFB + LDB * 30 CONTINUE * ELSE * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute L'*sub( A )*L * DO 40 K = 1, N * * Update the lower triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = DBLE( A( IOFFA+( K-1 )*LDA ) ) BKK = DBLE( B( IOFFB+( K-1 )*LDB ) ) CALL ZLACGV( K-1, A( IOFFA ), LDA ) CALL ZTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), $ LDA ) CT = HALF*AKK CALL ZLACGV( K-1, B( IOFFB ), LDB ) CALL ZAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL ZHER2( UPLO, K-1, CONE, A( IOFFA ), LDA, B( IOFFB ), $ LDB, A( IIA+( JJA-1 )*LDA ), LDA ) CALL ZAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL ZLACGV( K-1, B( IOFFB ), LDB ) CALL ZDSCAL( K-1, BKK, A( IOFFA ), LDA ) CALL ZLACGV( K-1, A( IOFFA ), LDA ) A( IOFFA+( K-1 )*LDA ) = AKK*BKK**2 * * A( IOFFA ) -> A( K, 1 ) * B( IOFFB ) -> B( K, 1 ) * IOFFA = IOFFA + 1 IOFFB = IOFFB + 1 * 40 CONTINUE * END IF * END IF * RETURN * * End of PZHEGS2 * END scalapack-1.8.0/SRC/pdsyev.f0000640000175000017500000005262510363532303015436 0ustar muammarmuammar SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, $ Z, IZ, JZ, DESCZ, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) DOUBLE PRECISION A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSYEV computes all eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PDSYEV assumes a homogeneous system and makes * no checks for consistency of the eigenvalues or eigenvectors across * the different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Notes * ===== * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distributed * matrix A. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of A. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of A. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of the * distributed matrix A. * LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic DOUBLE PRECISION array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PDSYEV cannot guarantee * correct error reporting. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension (LWORK) * Version 1.0: on output, WORK(1) returns the workspace * needed to guarantee completion. * If the input parameters are incorrect, WORK(1) may also be * incorrect. * * If JOBZ='N' WORK(1) = minimal=optimal amount of workspace * If JOBZ='V' WORK(1) = minimal workspace required to * generate all the eigenvectors. * * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5*N + SIZESYTRD + 1 * where * SIZESYTRD = The workspace requirement for PDSYTRD * and is MAX( NB * ( NP +1 ), 3 * NB ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * * QRMEM = 2*N-2 * LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 * * Variable definitions: * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( RSRC_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP = NUMROC( NN, NB, 0, 0, NPROW ) * NQ = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) * LDC = MAX( 1, NRC ) * SIZEMQRLEFT = The workspace requirement for PDORMTR * when it's SIDE argument is 'L'. * * With MYPROWC defined when a new context is created as: * CALL BLACS_GET( DESCA( CTXT_ ), 0, CONTEXTC ) * CALL BLACS_GRIDINIT( CONTEXTC, 'R', NPROCS, 1 ) * CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, * MYPCOLC ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in DSTEQR2 after a total of 30*N iterations. * If INFO = N+1, then PDSYEV has detected heterogeneity * by finding that eigenvalues were not identical across * the process grid. In this case, the accuracy of * the results from PDSYEV cannot be guaranteed. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and Z(IZ:IZ+M-1,JZ:JZ+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION FIVE, ONE, TEN, ZERO PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, $ TEN = 10.0D+0, FIVE = 5.0D+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ, ITHVAL PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8, ITHVAL = 10 ) * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, $ MYCOL, MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, $ SIZESYTRD DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCQR( 9 ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC, SL_GRIDRESHAPE DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, NUMROC, PDLAMCH, PDLANSY, $ SL_GRIDRESHAPE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, CHK1MAT, DCOPY, $ DESCINIT, DSCAL, DSTEQR2, PCHK1MAT, PCHK2MAT, $ PDELGET, PDGEMR2D, PDLASCL, PDLASET, PDORMTR, $ PDSYTRD, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD, SQRT, INT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE IF( WANTZ ) THEN IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( INFO .EQ. 0 ) THEN CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A LOWER = LSAME( UPLO, 'L' ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) IF( WANTZ ) THEN NB_Z = DESCZ( NB_ ) MB_Z = DESCZ( MB_ ) RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) SIZEMQRLEFT = MAX( ( NB_A*( NB_A-1 ) ) / 2, ( NP+NQ )* $ NB_A ) + NB_A*NB_A ELSE SIZEMQRLEFT = 0 END IF SIZESYTRD = MAX( NB * ( NP +1 ), 3 * NB ) * * Initialize the context of the single column distributed * matrix required by DSTEQR2. This specific distribution * allows each process to do 1/pth of the work updating matrix * Q during DSTEQR2 and achieve some parallelization to an * otherwise serial subroutine. * LDC = 0 IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB_A, MYPROWC, 0, NPROCS) LDC = MAX( 1, NRC ) CALL DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, $ LDC, INFO ) END IF * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDWORK = INDE2 + N INDWORK2 = INDWORK + N*LDC LLWORK = LWORK - INDWORK + 1 * * Compute the total amount of space needed * QRMEM = 2*N-2 IF( WANTZ ) THEN LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 ELSE LWMIN = 5*N + SIZESYTRD + 1 END IF * END IF IF( INFO.EQ.0 ) THEN IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) ENDIF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 3 IF( LSAME( JOBZ, 'V' ) ) THEN CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, $ IZ, JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 3, IDUM1, $ IDUM2, INFO ) END IF * * Write the required workspace for lwork queries. * WORK( 1 ) = DBLE( LWMIN ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSYEV', -INFO ) IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN ELSE IF( LWORK .EQ. -1 ) THEN IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN END IF * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PDLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PDLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * CALL PDSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * DO 10 I=1,N CALL PDELGET( 'A', ' ', WORK(INDD2+I-1), A, $ I+IA-1, I+JA-1, DESCA ) 10 CONTINUE IF( LSAME( UPLO, 'U') ) THEN DO 20 I=1,N-1 CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA-1, I+JA, DESCA ) 20 CONTINUE ELSE DO 30 I=1,N-1 CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA, I+JA-1, DESCA ) 30 CONTINUE ENDIF * IF( WANTZ ) THEN * CALL PDLASET( 'Full', N, N, ZERO, ONE, WORK( INDWORK ), 1, 1, $ DESCQR ) * * DSTEQR2 is a modified version of LAPACK's DSTEQR. The * modifications allow each process to perform partial updates * to matrix Q. * CALL DSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), $ INFO ) * CALL PDGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, $ DESCZ, CONTEXTC ) * CALL PDORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) * ELSE * CALL DSTEQR2( 'N', N, WORK( INDD2 ), WORK( INDE2 ), $ WORK( INDWORK ), 1, 1, WORK( INDWORK2 ), $ INFO ) ENDIF * * Copy eigenvalues from workspace to output array * CALL DCOPY( N, WORK( INDD2 ), 1, W, 1 ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE .EQ. 1 ) THEN CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * * Free up resources * IF( WANTZ ) THEN CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF( N.LE.ITHVAL ) THEN J = N K = 1 ELSE J = N/ITHVAL K = ITHVAL END IF * DO 40 I = 1, J WORK( I+INDTAU ) = W( (I-1)*K+1 ) WORK( I+INDE ) = W( (I-1)*K+1 ) 40 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDTAU ), $ J, 1, 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDE ), $ J, 1, 1, -1, -1, 0 ) * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ) $ .NE. ZERO ) )THEN INFO = N+1 END IF 50 CONTINUE * RETURN * * End of PDSYEV * END scalapack-1.8.0/SRC/pzunmr3.f0000640000175000017500000003546610363532303015546 0ustar muammarmuammar SUBROUTINE PZUNMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMR3 overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( * NUMROC( M+IROFFC,MB_A,0,0,NPROW ),MB_A,0,0,LCMP ) ); * if SIDE = 'R', LWORK >= NqC0 + MAX( 1, MpC0 ); * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER I, I1, I2, I3, IACOL, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IROFFC, JAA, JCC, LCM, $ LCMP, LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPCOL, $ NPROW, NQ, NQC0 * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZLARZ, PZLARZC * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MPC0 + MAX( MAX( 1, NQC0 ), NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ) ) ELSE LWMIN = NQC0 + MAX( 1, MPC0 ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( L.LT.0 .OR. L.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = IA I2 = IA + K - 1 I3 = 1 ELSE I1 = IA + K - 1 I2 = IA I3 = -1 END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * DO 10 I = I1, I2, I3 IF( LEFT ) THEN * * H(i) or H(i)' is applied to C(ic+i-ia:icc+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H(i) or H(i)' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H(i) or H(i)' * IF( NOTRAN ) THEN CALL PZLARZ( SIDE, MI, NI, L, A, I, JAA, DESCA, DESCA( M_ ), $ TAU, C, ICC, JCC, DESCC, WORK ) ELSE CALL PZLARZC( SIDE, MI, NI, L, A, I, JAA, DESCA, $ DESCA( M_ ), TAU, C, ICC, JCC, DESCC, WORK ) END IF * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMR3 * END scalapack-1.8.0/SRC/pssytrd.f0000640000175000017500000004006610363532303015630 0ustar muammarmuammar SUBROUTINE PSSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSSYTRD reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) REAL, array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IINFO, IPW, $ IROFFA, J, JB, JX, K, KK, LWMIN, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PSLATRD, PSSYR2K, PSSYTD2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( (NP+1)*NB, 3*NB ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP * NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, INDXG2P( JA+N-KK, $ NB, MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, NP ) ) * DO 10 K = N-KK+1, NB+1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PSLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, TAU, $ WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PSSYR2K( UPLO, 'No transpose', K-1, JB, -ONE, A, IA, J, $ DESCA, WORK, 1, 1, DESCW, ONE, A, IA, JA, $ DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PSELSET( A, I-1, J, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PSSYTD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PSLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PSSYR2K( UPLO, 'No transpose', N-K-NB+1, NB, -ONE, A, $ I+NB, J, DESCA, WORK, K+NB, 1, DESCW, ONE, A, $ I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PSELSET( A, I+NB, J+NB-1, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PSSYTD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSSYTRD * END scalapack-1.8.0/SRC/pslaiect.c0000640000175000017500000001423710363532303015722 0ustar muammarmuammar /* --------------------------------------------------------------------- * * -- ScaLAPACK routine (version 1.5) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * --------------------------------------------------------------------- */ /* * Include Files */ #include "pxsyevx.h" #include #include #define proto(x) () void pslasnbt_( ieflag ) /* * .. Scalar Arguments .. */ int *ieflag; { /* * * Purpose * ======= * * psalsnbt finds the position of the signbit of a single * precision floating point number. This routine assumes IEEE * arithmetic, and hence, tests only the 32nd bit as a possibility * for the sign bit. * * Note : For this release, we assume that sizeof(int) is 4 bytes. * * Note : If a compile time flag (NO_IEEE) indicates that the * machine does not have IEEE arithmetic, IEFLAG = 0 is returned. * * Arguments * ========= * * IEFLAG (output) INTEGER * This indicates the position of the signbit of any single * precision floating point number. * IEFLAG = 0 if the compile time flag, NO_IEEE, indicates * that the machine does not have IEEE arithmetic, or if * sizeof(int) is different from 4 bytes. * IEFLAG = 1 indicates that the sign bit is the 32nd bit. * * ===================================================================== * * .. Local Scalars .. */ float x; int negone=-1, errornum; unsigned int *ix; /* .. * .. Executable Statements .. */ #ifdef NO_IEEE *ieflag = 0; #else if(sizeof(int) != 4){ *ieflag = 0; return; } x = (float) -1.0; ix = (unsigned int *) &x; if( *ix == 0xbff00000 ) { *ieflag = 1; } else { *ieflag = 0; } #endif } void pslaiect_( sigma, n, d, count ) /* * .. Scalar Arguments .. */ float *sigma, *d; int *n, *count; { /* * * Purpose * ======= * * pslaiect computes the number of negative eigenvalues of (A- SIGMA I). * This implementation of the Sturm Sequence loop exploits IEEE Arithmetic * and has no conditionals in the innermost loop. The signbit is assumed * to be bit 32. * * Note that all arguments are call-by-reference so that this routine * can be directly called from Fortran code. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * SIGMA (input) REAL * The shift. pslaiect finds the number of eigenvalues less * than equal to SIGMA. * * N (input) INTEGER * The order of the tridiagonal matrix T. N >= 1. * * D (input) REAL array, dimension (2*N - 1) * Contains the diagonals and the squares of the off-diagonal * elements of the tridiagonal matrix T. These elements are * assumed to be interleaved in memory for better cache * performance. The diagonal entries of T are in the entries * D(1),D(3),...,D(2*N-1), while the squares of the off-diagonal * entries are D(2),D(4),...,D(2*N-2). To avoid overflow, the * matrix must be scaled so that its largest entry is no greater * than overflow**(1/2) * underflow**(1/4) in absolute value, * and for greatest accuracy, it should not be much smaller * than that. * * COUNT (output) INTEGER * The count of the number of eigenvalues of T less than or * equal to SIGMA. * * ===================================================================== * * .. Local Scalars .. */ float lsigma, tmp, *pd, *pe2; int i; /* .. * .. Executable Statements .. */ lsigma = *sigma; pd = d; pe2 = d+1; tmp = *pd - lsigma; pd += 2; *count = (*((int *)&tmp) >> 31) & 1; for(i = 1;i < *n;i++){ tmp = *pd - *pe2/tmp - lsigma; pd += 2; pe2 += 2; *count += ((*((int *)&tmp)) >> 31) & 1; } } pslachkieee_( isieee, rmax, rmin ) /* * .. Scalar Arguments .. */ float *rmax, *rmin; int *isieee; { /* * * Purpose * ======= * * pslachkieee performs a simple check to make sure that the features * of the IEEE standard that we rely on are implemented. In some * implementations, pslachkieee may not return. * * Note that all arguments are call-by-reference so that this routine * can be directly called from Fortran code. * * This is a ScaLAPACK internal subroutine and arguments are not * checked for unreasonable values. * * Arguments * ========= * * ISIEEE (local output) INTEGER * On exit, ISIEEE = 1 implies that all the features of the * IEEE standard that we rely on are implemented. * On exit, ISIEEE = 0 implies that some the features of the * IEEE standard that we rely on are missing. * * RMAX (local input) REAL * The overflow threshold ( = SLAMCH('O') ). * * RMIN (local input) REAL * The underflow threshold ( = SLAMCH('U') ). * * ===================================================================== * * .. Local Scalars .. */ float x, pinf, pzero, ninf, nzero; int ieflag, *ix, sbit1, sbit2, negone=-1, errornum; /* .. * .. Executable Statements .. */ pslasnbt_( &ieflag ); pinf = *rmax / *rmin; pzero = 1.0 / pinf; pinf = 1.0 / pzero; if( pzero != 0.0 ){ printf("pzero = %g should be zero\n",pzero); *isieee = 0; return ; } if( ieflag == 1 ){ sbit1 = (*((int *)&pzero) >> 31) & 1; sbit2 = (*((int *)&pinf) >> 31) & 1; } if( sbit1 == 1 ){ printf("Sign of positive infinity is incorrect\n"); *isieee = 0; } if( sbit2 == 1 ){ printf("Sign of positive zero is incorrect\n"); *isieee = 0; } ninf = -pinf; nzero = 1.0 / ninf; ninf = 1.0 / nzero; if( nzero != 0.0 ){ printf("nzero = %g should be zero\n",nzero); *isieee = 0; } if( ieflag == 1 ){ sbit1 = (*((int *)&nzero) >> 31) & 1; sbit2 = (*((int *)&ninf) >> 31) & 1; } if( sbit1 == 0 ){ printf("Sign of negative infinity is incorrect\n"); *isieee = 0; } if( sbit2 == 0 ){ printf("Sign of negative zero is incorrect\n"); *isieee = 0; } } scalapack-1.8.0/SRC/pcungrq.f0000640000175000017500000002704610363532303015602 0ustar muammarmuammar SUBROUTINE PCUNGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGRQ generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PCGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. * N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PCGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( MpA0 + NqA0 + MB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IINFO, IN, IPW, $ LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCLARFB, $ PCLARFT, PCLASET, PCUNGR2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * * Set A(ia:in,ja+n-m+in-ia+1:ja-n+1) to zero. * CALL PCLASET( 'All', IN-IA+1, M-IN+IA-1, ZERO, ZERO, A, IA, $ JA+N-M+IN-IA+1, DESCA ) * * Use unblocked code for the first or only block. * CALL PCUNGR2( IN-IA+1, N-M+IN-IA+1, IN-IA-M+K+1, A, IA, JA, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Use blocked code * DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PCLARFT( 'Backward', 'Rowwise', N-M+I+IB-IA, IB, A, I, JA, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H' to A(ia:i-1,ja:ja+n-m+i+ib-ia-1) from the right * CALL PCLARFB( 'Right', 'Conjugate transpose', 'Backward', $ 'Rowwise', I-IA, N-M+I+IB-IA, IB, A, I, JA, $ DESCA, WORK, A, IA, JA, DESCA, WORK( IPW ) ) * * Apply H' to columns ja:ja+n-m+i+ib-ia-1 of current block * CALL PCUNGR2( IB, N-M+I+IB-IA, IB, A, I, JA, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows i:i+ib-1,ja+n-m+i+ib-ia:ja+n-1 of current block to * zero * CALL PCLASET( 'All', IB, M-I-IB+IA, ZERO, ZERO, A, I, $ JA+N-M+I+IB-IA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGRQ * END scalapack-1.8.0/SRC/pdptsv.f0000640000175000017500000004530410363532303015440 0ustar muammarmuammar SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPTSV solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal symmetric positive definite distributed * matrix. * * Cholesky factorization is used to factor a reordering of * the matrix into L L'. * * See PDPTTRF and PDPTTRS for details. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (12*NPCOL + 3*NB) * +max((10+2*min(100,NRHS))*NPCOL+4*NRHS, 8*NPCOL) * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ICTXT, MYCOL, MYROW, NB, NPCOL, NPROW, $ WS_FACTOR * .. * .. External Subroutines .. EXTERNAL PDPTTRF, PDPTTRS, PXERBLA * .. * .. Executable Statements .. * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPTTRF and PDPTTRS. * * Begin main code * INFO = 0 * * Get block size to calculate workspace requirements * IF( DESCA( DTYPE_ ) .EQ. BLOCK_CYCLIC_2D ) THEN NB = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) ELSEIF( DESCA( DTYPE_ ) .EQ. 501 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSEIF( DESCA( DTYPE_ ) .EQ. 502 ) THEN NB = DESCA( 4 ) ICTXT = DESCA( 2 ) ELSE INFO = -( 5*100 + DTYPE_ ) CALL PXERBLA( ICTXT, $ 'PDPTSV', $ -INFO ) RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * * Size needed for AF in factorization * WS_FACTOR = (12*NPCOL + 3*NB) * * Factor the matrix * CALL PDPTTRF( N, D, E, JA, DESCA, WORK, MIN( LWORK, WS_FACTOR ), $ WORK( 1+WS_FACTOR ), LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) ENDIF RETURN END IF * * Solve the system using the factorization * CALL PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ MIN( LWORK, WS_FACTOR ), WORK( 1+WS_FACTOR), $ LWORK-WS_FACTOR, INFO ) * * Check info for error conditions * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) RETURN END IF * RETURN * * End of PDPTSV * END scalapack-1.8.0/SRC/pzlaconsb.f0000640000175000017500000005113510363532303016112 0ustar muammarmuammar SUBROUTINE PZLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, L, LWORK, M COMPLEX*16 H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), BUF( * ) * .. * * Purpose * ======= * * PZLACONSB looks for two consecutive small subdiagonal elements by * seeing the effect of starting a double shift QR iteration * given by H44, H33, & H43H34 and see if this would make a * subdiagonal negligible. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) COMPLEX*16 array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * M (global output) INTEGER * On exit, this yields the starting location of the QR double * shift. This will satisfy: L <= M <= I-2. * * H44 * H33 * H43H34 (global input) COMPLEX*16 * These three values are for the double shift QR iteration. * * BUF (local output) COMPLEX*16 array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 7*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Logic: * ====== * * Two consecutive small subdiagonal elements will stall * convergence of a double shift if their product is small * relatively even if each is not very small. Thus it is * necessary to scan the "tridiagonal portion of the matrix." In * the LAPACK algorithm ZLAHQR, a loop of M goes from I-2 down to * L and examines * H(m,m),H(m+1,m+1),H(m+1,m),H(m,m+1),H(m-1,m-1),H(m,m-1), and * H(m+2,m-1). Since these elements may be on separate * processors, the first major loop (10) goes over the tridiagonal * and has each node store whatever values of the 7 it has that * the node owning H(m,m) does not. This will occur on a border * and can happen in no more than 3 locations per block assuming * square blocks. There are 5 buffers that each node stores these * values: a buffer to send diagonally down and right, a buffer * to send up, a buffer to send left, a buffer to send diagonally * up and left and a buffer to send right. Each of these buffers * is actually stored in one buffer BUF where BUF(ISTR1+1) starts * the first buffer, BUF(ISTR2+1) starts the second, etc.. After * the values are stored, if there are any values that a node * needs, they will be sent and received. Then the next major * loop passes over the data and searches for two consecutive * small subdiagonals. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, IBUF3, IBUF4, $ IBUF5, ICOL1, II, IRCV1, IRCV2, IRCV3, IRCV4, $ IRCV5, IROW1, ISRC, ISTR1, ISTR2, ISTR3, ISTR4, $ ISTR5, JJ, JSRC, LDA, LEFT, MODKM1, MYCOL, $ MYROW, NPCOL, NPROW, NUM, RIGHT, UP DOUBLE PRECISION S, TST1, ULP COMPLEX*16 CDUM, H00, H10, H11, H12, H21, H22, H33S, H44S, $ V1, V2, V3 * .. * .. External Functions .. INTEGER ILCM DOUBLE PRECISION PDLAMCH EXTERNAL ILCM, PDLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, INFOG2L, PXERBLA, $ ZGERV2D, ZGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, MOD * .. * .. Statement Functions .. DOUBLE PRECISION CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 starts at BUF(ISTR1+1) and will contain IBUF1 elements * BUFFER2 starts at BUF(ISTR2+1) and will contain IBUF2 elements * BUFFER3 starts at BUF(ISTR3+1) and will contain IBUF3 elements * BUFFER4 starts at BUF(ISTR4+1) and will contain IBUF4 elements * BUFFER5 starts at BUF(ISTR5+1) and will contain IBUF5 elements * ISTR1 = 0 ISTR2 = ( ( I-L-1 ) / HBL ) IF( ISTR2*HBL.LT.( I-L-1 ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PZLACONSB', 10 ) RETURN END IF ISTR3 = 3*ISTR2 ISTR4 = ISTR3 + ISTR2 ISTR5 = ISTR3 + ISTR3 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) * * Copy our relevant pieces of triadiagonal that we owe into * 5 buffers to send to whomever owns H(M,M) as M moves diagonally * up the tridiagonal * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 IRCV1 = 0 IRCV2 = 0 IRCV3 = 0 IRCV4 = 0 IRCV5 = 0 DO 10 M = I - 2, L, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M-1,M-1) and send it diagonal down * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( M-1, M-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M ,M-1) and send it right * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF5 = IBUF5 + 1 BUF( ISTR5+IBUF5 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+1,M) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( MYROW.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M ,M+1) and send it left * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF3 = IBUF3 + 1 BUF( ISTR3+IBUF3 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M+1,M+1) & H(M+2,M+1) and send it * diagonally up * IF( ( UP.NE.MYROW ) .OR. ( LEFT.NE.MYCOL ) ) THEN CALL INFOG2L( M+1, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF4 = IBUF4 + 2 BUF( ISTR4+IBUF4-1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) BUF( ISTR4+IBUF4 ) = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+2,M+1) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * Add up the receives * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( M.GT.L ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M-1,M-1) from diagonal up * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) .AND. ( M.GT.L ) ) $ THEN * * We must receive H(M ,M-1) from left * IRCV5 = IRCV5 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+1,M ) from down * IRCV2 = IRCV2 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPCOL.GT.1 ) ) THEN * * We must receive H(M ,M+1) from right * IRCV3 = IRCV3 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M+1:M+2,M+1) from diagonal down * IRCV4 = IRCV4 + 2 END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+2,M+1) from down * IRCV2 = IRCV2 + 1 END IF END IF * * Possibly change owners (occurs only when MOD(M-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * * Send data on to the appropriate node if there is any data to send * IF( IBUF1.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, UP, $ MYCOL ) END IF IF( IBUF3.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF3, 1, BUF( ISTR3+1 ), IBUF3, MYROW, $ LEFT ) END IF IF( IBUF4.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF4, 1, BUF( ISTR4+1 ), IBUF4, UP, $ LEFT ) END IF IF( IBUF5.GT.0 ) THEN CALL ZGESD2D( CONTXT, IBUF5, 1, BUF( ISTR5+1 ), IBUF5, MYROW, $ RIGHT ) END IF * * Receive appropriate data if there is any * IF( IRCV1.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, DOWN, $ MYCOL ) END IF IF( IRCV3.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV3, 1, BUF( ISTR3+1 ), IRCV3, MYROW, $ RIGHT ) END IF IF( IRCV4.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV4, 1, BUF( ISTR4+1 ), IRCV4, DOWN, $ RIGHT ) END IF IF( IRCV5.GT.0 ) THEN CALL ZGERV2D( CONTXT, IRCV5, 1, BUF( ISTR5+1 ), IRCV5, MYROW, $ LEFT ) END IF * * Start main loop * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. $ ( MODKM1.NE.HBL-1 ) ) THEN CALL INFOG2L( I-2, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) END IF * * Look for two consecutive small subdiagonal elements. * DO 20 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H00 = BUF( ISTR1+IBUF1 ) ELSE H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF5 = IBUF5 + 1 H10 = BUF( ISTR5+IBUF5 ) ELSE H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF END IF IF( MODKM1.EQ.HBL-1 ) THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) H11 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( NUM.GT.1 ) THEN IBUF4 = IBUF4 + 2 H22 = BUF( ISTR4+IBUF4-1 ) V3 = BUF( ISTR4+IBUF4 ) ELSE H22 = A( ICOL1*LDA+IROW1+1 ) V3 = A( ( ICOL1+1 )*LDA+IROW1+1 ) END IF IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 H21 = BUF( ISTR2+IBUF2 ) ELSE H21 = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF3 = IBUF3 + 1 H12 = BUF( ISTR3+IBUF3 ) ELSE H12 = A( ICOL1*LDA+IROW1 ) END IF IF( M.GT.L ) THEN H00 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF * * Adjust ICOL1 for next iteration where MODKM1=HBL-2 * ICOL1 = ICOL1 + 1 END IF IF( MODKM1.EQ.HBL-2 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 V3 = BUF( ISTR2+IBUF2 ) ELSE V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) END IF H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.LT.HBL-2 ) .AND. ( MODKM1.GT.0 ) ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S IF( M.EQ.L ) $ GO TO 30 TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 ) $ GO TO 30 * * Slide indices diagonally up one for next iteration * IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF IF( M.EQ.L ) THEN * * Stop regardless of which node we are * GO TO 30 END IF * * Possibly change owners if on border * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 20 CONTINUE 30 CONTINUE * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * RETURN * * End of PZLACONSB * END scalapack-1.8.0/SRC/pdlansy.f0000640000175000017500000007030510363532303015571 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDLANSY returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * real symmetric distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PDLANSY returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PDLANSY as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PDLANSY is set to zero. N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the symmetric distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DAXPY, DCOMBSSQ, $ DGAMX2D, DGSUM2D, DGEBR2D, $ DGEBS2D, DLASSQ, PDCOL2ROW, $ PDTREECOMB * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL ICEIL, IDAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is symmetric, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 30 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 70 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 100 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 140 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * symmetric). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 210 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 250 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 280 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 320 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to DGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PDCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL DAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( IDAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL DLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL DLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL DLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL DLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL DLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL DLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL DLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL DLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PDLANSY = VALUE * RETURN * * End of PDLANSY * END scalapack-1.8.0/SRC/pslaset.f0000640000175000017500000002166710363532303015601 0ustar muammarmuammar SUBROUTINE PSLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N REAL ALPHA, BETA * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSLASET initializes an M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1) to BETA on the diagonal and ALPHA on the * offdiagonals. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * set: * = 'U': Upper triangular part is set; the strictly lower * triangular part of sub( A ) is not changed; * = 'L': Lower triangular part is set; the strictly upper * triangular part of sub( A ) is not changed; * Otherwise: All of the matrix sub( A ) is set. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * ALPHA (global input) REAL * The constant to which the offdiagonal elements are to be * set. * * BETA (global input) REAL * The constant to which the diagonal elements are to be set. * * A (local output) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array * contains the local pieces of the distributed matrix sub( A ) * to be set. On exit, the leading M-by-N submatrix sub( A ) * is set as follows: * * if UPLO = 'U', A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=j-1, 1<=j<=N, * if UPLO = 'L', A(IA+i-1,JA+j-1) = ALPHA, j+1<=i<=M, 1<=j<=N, * otherwise, A(IA+i-1,JA+j-1) = ALPHA, 1<=i<=M, 1<=j<=N, * IA+i.NE.JA+j, * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER I, IAA, IBLK, IN, ITMP, J, JAA, JBLK, JN, JTMP * .. * .. External Subroutines .. EXTERNAL PSLASE2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN CALL PSLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) ELSE * IF( LSAME( UPLO, 'U' ) ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) CALL PSLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 10 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) JAA = JA + ITMP CALL PSLASE2( UPLO, IBLK, N-ITMP, ALPHA, BETA, $ A, I, JAA, DESCA ) 10 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) CALL PSLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, JA, $ DESCA ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) IAA = IA + JTMP CALL PSLASE2( UPLO, M-JTMP, JBLK, ALPHA, BETA, A, IAA, $ J, DESCA ) 20 CONTINUE ELSE IF( M.LE.N ) THEN IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), $ IA+M-1 ) CALL PSLASE2( UPLO, IN-IA+1, N, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 30 I = IN+1, IA+M-1, DESCA( MB_ ) ITMP = I-IA IBLK = MIN( DESCA( MB_ ), M-ITMP ) CALL PSLASE2( UPLO, IBLK, I-IA, ALPHA, ALPHA, A, I, $ JA, DESCA ) CALL PSLASE2( UPLO, IBLK, N-I+IA, ALPHA, BETA, A, I, $ JA+I-IA, DESCA ) 30 CONTINUE ELSE JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), $ JA+N-1 ) CALL PSLASE2( UPLO, M, JN-JA+1, ALPHA, BETA, A, IA, $ JA, DESCA ) DO 40 J = JN+1, JA+N-1, DESCA( NB_ ) JTMP = J-JA JBLK = MIN( DESCA( NB_ ), N-JTMP ) CALL PSLASE2( UPLO, J-JA, JBLK, ALPHA, ALPHA, A, IA, $ J, DESCA ) CALL PSLASE2( UPLO, M-J+JA, JBLK, ALPHA, BETA, A, $ IA+J-JA, J, DESCA ) 40 CONTINUE END IF END IF * END IF * RETURN * * End of PSLASET * END scalapack-1.8.0/SRC/pssygs2.f0000640000175000017500000003631510363532303015534 0ustar muammarmuammar* * SUBROUTINE PSSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL A( * ), B( * ) * .. * * Purpose * ======= * * PSSYGS2 reduces a real symmetric-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**T or L**T*sub( A )*L. * * sub( B ) must have been previously factorized as U**T*U or L*L**T by * PSPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**T)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**T); * = 2 or 3: compute U*sub( A )*U**T or L**T*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**T*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**T. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) REAL pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PSPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, HALF PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IIA, IIB, IOFFA, IOFFB, IROFFA, IROFFB, $ JJA, JJB, K, LDA, LDB, MYCOL, MYROW, NPCOL, $ NPROW REAL AKK, BKK, CT * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, SAXPY, SSCAL, SSYR2, STRMV, STRSV * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( N+ICOFFA.GT.DESCA( NB_ ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSSYGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) LDB = DESCB( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * IF( IBTYPE.EQ.1 ) THEN * IF( UPPER ) THEN * IOFFA = IIA + JJA*LDA IOFFB = IIB + JJB*LDB * * Compute inv(U')*sub( A )*inv(U) * DO 10 K = 1, N * * Update the upper triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = A( IOFFA-LDA ) BKK = B( IOFFB-LDB ) AKK = AKK / BKK**2 A( IOFFA-LDA ) = AKK IF( K.LT.N ) THEN CALL SSCAL( N-K, ONE / BKK, A( IOFFA ), LDA ) CT = -HALF*AKK CALL SAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL SSYR2( UPLO, N-K, -ONE, A( IOFFA ), LDA, $ B( IOFFB ), LDB, A( IOFFA+1 ), LDA ) CALL SAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL STRSV( UPLO, 'Transpose', 'Non-unit', N-K, $ B( IOFFB+1 ), LDB, A( IOFFA ), LDA ) END IF * * A( IOFFA ) -> A( K, K+1 ) * B( IOFFB ) -> B( K, K+1 ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 10 CONTINUE * ELSE * IOFFA = IIA + 1 + ( JJA-1 )*LDA IOFFB = IIB + 1 + ( JJB-1 )*LDB * * Compute inv(L)*sub( A )*inv(L') * DO 20 K = 1, N * * Update the lower triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = A( IOFFA-1 ) BKK = B( IOFFB-1 ) AKK = AKK / BKK**2 A( IOFFA-1 ) = AKK * IF( K.LT.N ) THEN CALL SSCAL( N-K, ONE / BKK, A( IOFFA ), 1 ) CT = -HALF*AKK CALL SAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL SSYR2( UPLO, N-K, -ONE, A( IOFFA ), 1, $ B( IOFFB ), 1, A( IOFFA+LDA ), LDA ) CALL SAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL STRSV( UPLO, 'No transpose', 'Non-unit', N-K, $ B( IOFFB+LDB ), LDB, A( IOFFA ), 1 ) END IF * * A( IOFFA ) -> A( K+1, K ) * B( IOFFB ) -> B( K+1, K ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 20 CONTINUE * END IF * ELSE * IF( UPPER ) THEN * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute U*sub( A )*U' * DO 30 K = 1, N * * Update the upper triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = A( IOFFA+K-1 ) BKK = B( IOFFB+K-1 ) CALL STRMV( UPLO, 'No transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), 1 ) CT = HALF*AKK CALL SAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL SSYR2( UPLO, K-1, ONE, A( IOFFA ), 1, B( IOFFB ), 1, $ A( IIA+( JJA-1 )*LDA ), LDA ) CALL SAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL SSCAL( K-1, BKK, A( IOFFA ), 1 ) A( IOFFA+K-1 ) = AKK*BKK**2 * * A( IOFFA ) -> A( 1, K ) * B( IOFFB ) -> B( 1, K ) * IOFFA = IOFFA + LDA IOFFB = IOFFB + LDB * 30 CONTINUE * ELSE * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute L'*sub( A )*L * DO 40 K = 1, N * * Update the lower triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = A( IOFFA+( K-1 )*LDA ) BKK = B( IOFFB+( K-1 )*LDB ) CALL STRMV( UPLO, 'Transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), $ LDA ) CT = HALF*AKK CALL SAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL SSYR2( UPLO, K-1, ONE, A( IOFFA ), LDA, B( IOFFB ), $ LDB, A( IIA+( JJA-1 )*LDA ), LDA ) CALL SAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL SSCAL( K-1, BKK, A( IOFFA ), LDA ) A( IOFFA+( K-1 )*LDA ) = AKK*BKK**2 * * A( IOFFA ) -> A( K, 1 ) * B( IOFFB ) -> B( K, 1 ) * IOFFA = IOFFA + 1 IOFFB = IOFFB + 1 * 40 CONTINUE * END IF * END IF * RETURN * * End of PSSYGS2 * END scalapack-1.8.0/SRC/pjlaenv.f0000640000175000017500000002533010363532303015554 0ustar muammarmuammar INTEGER FUNCTION PJLAENV( ICTXT, ISPEC, NAME, OPTS, N1, $ N2, N3, N4 ) * * -- ScaLAPACK test routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS INTEGER ICTXT, ISPEC, N1, N2, N3, N4 * .. * * Purpose * * ======= * * PJLAENV is called from the ScaLAPACK symmetric and Hermitian * tailored eigen-routines to choose * problem-dependent parameters for the local environment. See ISPEC * for a description of the parameters. * * This version provides a set of parameters which should give good, * but not optimal, performance on many of the currently available * computers. Users are encouraged to modify this subroutine to set * the tuning parameters for their particular machine using the option * and problem size information in the arguments. * * This routine will not function correctly if it is converted to all * lower case. Converting it to all upper case is allowed. * * Arguments * ========= * * ISPEC (global input) INTEGER * Specifies the parameter to be returned as the value of * PJLAENV. * = 1: the data layout blocksize; * = 2: the panel blocking factor; * = 3: the algorithmic blocking factor; * = 4: execution path control; * = 5: maximum size for direct call to the LAPACK routine * * NAME (global input) CHARACTER*(*) * The name of the calling subroutine, in either upper case or * lower case. * * OPTS (global input) CHARACTER*(*) * The character options to the subroutine NAME, concatenated * into a single character string. For example, UPLO = 'U', * TRANS = 'T', and DIAG = 'N' for a triangular routine would * be specified as OPTS = 'UTN'. * * N1 (global input) INTEGER * N2 (global input) INTEGER * N3 (global input) INTEGER * N4 (global input) INTEGER * Problem dimensions for the subroutine NAME; these may not all * be required. * * At present, only N1 is used, and it (N1) is used only for * 'TTRD' * * (PJLAENV) (global or local output) INTEGER * >= 0: the value of the parameter specified by ISPEC * < 0: if PJLAENV = -k, the k-th argument had an illegal * value. * * Most parameters set via a call to PJLAENV must be identical * on all processors and hence PJLAENV will return the same * value to all procesors (i.e. global output). However some, * in particular, the panel blocking factor can be different * on each processor and hence PJLAENV can return different * values on different processors (i.e. local output). * * Further Details * =============== * * The following conventions have been used when calling PJLAENV from * the ScaLAPACK routines: * 1) OPTS is a concatenation of all of the character options to * subroutine NAME, in the same order that they appear in the * argument list for NAME, even if they are not used in determining * the value of the parameter specified by ISPEC. * 2) The problem dimensions N1, N2, N3, N4 are specified in the order * that they appear in the argument list for NAME. N1 is used * first, N2 second, and so on, and unused problem dimensions are * passed a value of -1. * 3) The parameter value returned by PJLAENV is checked for validity * in the calling subroutine. For example, PJLAENV is used to * retrieve the optimal blocksize for STRTRI as follows: * * NB = PJLAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) * IF( NB.LE.1 ) NB = MAX( 1, N ) * * PJLAENV is patterned after ILAENV and keeps the same interface in * anticipation of future needs, even though PJLAENV is only sparsely * used at present in ScaLAPACK. Most ScaLAPACK codes use the input * data layout blocking factor as the algorithmic blocking factor - * hence there is no need or opportunity to set the algorithmic or * data decomposition blocking factor. * * pXYYtevx.f and pXYYtgvx.f and pXYYttrd.f are the only codes which * call PJLAENV in this release. pXYYtevx.f and pXYYtgvx.f redistribute * the data to the best data layout for each transformation. pXYYttrd.f * uses a data layout blocking factor of 1 and a * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL CNAME, GLOBAL, SNAME CHARACTER C1 CHARACTER*2 C2, C4 CHARACTER*3 C3 CHARACTER*8 SUBNAM INTEGER I, IC, IDUMM, IZ, MSZ, NB * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR * .. * * * .. External Subroutines .. EXTERNAL IGAMX2D * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * GO TO ( 10, 10, 10, 10, 10 )ISPEC * * Invalid value for ISPEC * PJLAENV = -1 RETURN * 10 CONTINUE * * Convert NAME to upper case if the first character is lower case. * PJLAENV = 1 SUBNAM = NAME IC = ICHAR( SUBNAM( 1: 1 ) ) IZ = ICHAR( 'Z' ) IF( IZ.EQ.100 .OR. IZ.EQ.122 ) THEN * * ASCII character set * IF( IC.GE.97 .AND. IC.LE.122 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 20 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.97 .AND. IC.LE.122 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 20 CONTINUE END IF * ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN * * EBCDIC character set * IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN SUBNAM( 1: 1 ) = CHAR( IC+64 ) DO 30 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: $ I ) = CHAR( IC+64 ) 30 CONTINUE END IF * ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN * * Prime machines: ASCII+128 * IF( IC.GE.225 .AND. IC.LE.250 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 40 I = 2, 6 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.225 .AND. IC.LE.250 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 40 CONTINUE END IF END IF * C1 = SUBNAM( 2: 2 ) SNAME = C1.EQ.'S' .OR. C1.EQ.'D' CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' IF( .NOT.( CNAME .OR. SNAME ) ) $ RETURN C2 = SUBNAM( 3: 4 ) C3 = SUBNAM( 5: 7 ) C4 = C3( 2: 3 ) * * This is to keep ftnchek happy * IF( ( N2+N3+N4 )*0.NE.0 ) THEN C4 = OPTS C3 = C4 END IF * GO TO ( 50, 60, 70, 80, 90 )ISPEC * 50 CONTINUE * * ISPEC = 1: data layout block size * (global - all processes must use the same value) * * In these examples, separate code is provided for setting NB for * real and complex. We assume that NB will take the same value in * single or double precision. * NB = 1 * IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'LLT' ) THEN IF( SNAME ) THEN NB = 64 ELSE NB = 64 END IF ELSE IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 1 ELSE NB = 1 END IF ELSE IF( C3.EQ.'GST' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF ELSE IF( C3.EQ.'BCK' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF ELSE IF( C3.EQ.'TRS' ) THEN IF( SNAME ) THEN NB = 64 ELSE NB = 64 END IF END IF END IF * * PJLAENV = NB GLOBAL = .TRUE. GO TO 100 * 60 CONTINUE * * ISPEC = 2: panel blocking factor (Used only in PxyyTTRD) * (local - different processes may use different values) * NB = 16 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 32 ELSE NB = 32 END IF END IF END IF PJLAENV = NB GLOBAL = .FALSE. GO TO 100 * * 70 CONTINUE * * ISPEC = 3: algorithmic blocking factor (Used only in PxyyTTRD) * (global - all processes must use the same value) * NB = 1 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN NB = 16 ELSE NB = 16 END IF END IF END IF PJLAENV = NB GLOBAL = .TRUE. GO TO 100 * 80 CONTINUE * * ISPEC = 4: Execution path options (Used only in PxyyTTRD) * (global - all processes must use the same value) * PJLAENV = -4 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN * V and H interleaved (default is not interleaved) IF( N1.EQ.1 ) THEN PJLAENV = 1 END IF * * Two ZGEMMs (default is one ZGEMM) IF( N1.EQ.2 ) THEN PJLAENV = 0 END IF * Balanced Update (default is minimum communication update) IF( N1.EQ.3 ) THEN PJLAENV = 0 END IF END IF END IF GLOBAL = .TRUE. GO TO 100 * 90 CONTINUE * * ISPEC = 5: Minimum size to justify call to parallel code * (global - all processes must use the same value) * MSZ = 0 IF( C2.EQ.'SY' .OR. C2.EQ.'HE' ) THEN IF( C3.EQ.'TTR' ) THEN IF( SNAME ) THEN MSZ = 100 ELSE MSZ = 100 END IF END IF END IF PJLAENV = MSZ GLOBAL = .TRUE. GO TO 100 * 100 CONTINUE * IF( GLOBAL ) THEN CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, PJLAENV, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) END IF * * * RETURN * * End of PJLAENV * END scalapack-1.8.0/SRC/pzunmhr.f0000640000175000017500000003647010363532303015627 0ustar muammarmuammar SUBROUTINE PZUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMHR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of IHI-ILO elementary reflectors, as returned by PZGEHRD: * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * ILO and IHI must have the same values as in the previous call * of PZGEHRD. Q is equal to the unit matrix except in the * distributed submatrix Q(ia+ilo:ia+ihi-1,ia+ilo:ja+ihi-1). * If SIDE = 'L', 1 <= ILO <= IHI <= max(1,M); * if SIDE = 'R', 1 <= ILO <= IHI <= max(1,N); * ILO and IHI are relative indexes. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PZGEHRD. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+M-2) * if SIDE = 'L', and LOCc(JA+N-2) if SIDE = 'R'. This array * contains the scalar factors TAU(j) of the elementary * reflectors H(j) as returned by PZGEHRD. TAU is tied to * the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * IAA = IA + ILO; JAA = JA+ILO-1; * If SIDE = 'L', * MI = IHI-ILO; NI = N; ICC = IC + ILO; JCC = JC; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; NI = IHI-ILO; ICC = IC; JCC = JC + ILO; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NH, NI, NPA0, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZUNMQR * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 NH = IHI - ILO IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) IAA = IA + ILO JAA = JA + ILO - 1 * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = NH NI = N ICC = IC + ILO JCC = JC CALL CHK1MAT( M, 3, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N MI = M NI = NH ICC = IC JCC = JC + ILO CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN INFO = -5 ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN INFO = -6 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = ILO IDUM2( 3 ) = 5 IDUM1( 4 ) = IHI IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMHR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) $ RETURN * CALL PZUNMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMHR * END scalapack-1.8.0/SRC/pcgebrd.f0000640000175000017500000004020310363532303015517 0ustar muammarmuammar SUBROUTINE PCGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAUP( * ), TAUQ( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEBRD reduces a complex general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal * form B by an unitary transformation: Q' * sub( A ) * P = B. * * If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * general distributed matrix sub( A ). On exit, if M >= N, * the diagonal and the first superdiagonal of sub( A ) are * overwritten with the upper bidiagonal matrix B; the elements * below the diagonal, with the array TAUQ, represent the * unitary matrix Q as a product of elementary reflectors, and * the elements above the first superdiagonal, with the array * TAUP, represent the orthogonal matrix P as a product of * elementary reflectors. If M < N, the diagonal and the first * subdiagonal are overwritten with the lower bidiagonal * matrix B; the elements below the first subdiagonal, with the * array TAUQ, represent the unitary matrix Q as a product of * elementary reflectors, and the elements above the diagonal, * with the array TAUP, represent the orthogonal matrix P as a * product of elementary reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dimension * LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise. * The distributed diagonal elements of the bidiagonal matrix * B: D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dimension * LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise. * The distributed off-diagonal elements of the bidiagonal * distributed matrix B: * if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; * if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. * E is tied to the distributed matrix A. * * TAUQ (local output) COMPLEX array dimension * LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUQ is * tied to the distributed matrix A. See Further Details. * * TAUP (local output) COMPLEX array, dimension * LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary * reflectors which represent the unitary matrix P. TAUP is * tied to the distributed matrix A. See Further Details. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0 * * where NB = MB_A = NB_A, * IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrices Q and P are represented as products of elementary * reflectors: * * If m >= n, * * Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1); * u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in * A(ia+i-1,ja+i+1:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * If m < n, * * Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) * * Each H(i) and G(i) has the form: * * H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' * * where tauq and taup are complex scalars, and v and u are complex * vectors; * v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in * A(ia+i+1:ia+m-1,ja+i-1); * u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in * A(ia+i-1,ja+i:ja+n-1); * tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples: * * m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): * * ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) * ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) * ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) * ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) * ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) * ( v1 v2 v3 v4 v5 ) * * where d and e denote diagonal and off-diagonal elements of B, vi * denotes an element of the vector defining H(i), and ui an element of * the vector defining G(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expressions should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY, $ IW, J, JB, JS, JW, K, L, LWMIN, MN, MP, MYCOL, $ MYROW, NB, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ), $ IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PCELSET, PCGEBD2, PCGEMM, PCLABRD, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN NB = DESCA( MB_ ) IOFF = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) MP = NUMROC( M+IOFF, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+IOFF, NB, MYCOL, IACOL, NPCOL ) LWMIN = NB*( MP+NQ+1 ) + NQ * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IOFF.NE.MOD( JA-1, DESCA( NB_ ) ) ) THEN INFO = -5 ELSE IF( NB.NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -12 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 12 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEBRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * MN = MIN( M, N ) IF( MN.EQ.0 ) $ RETURN * * Initialize parameters. * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPY = MP * NB + 1 IPW = NQ * NB + IPY * CALL DESCSET( DESCWX, M+IOFF, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, MP ) ) CALL DESCSET( DESCWY, NB, N+IOFF, NB, NB, IAROW, IACOL, ICTXT, $ NB ) * MP = NUMROC( M+IA-1, NB, MYROW, DESCA( RSRC_ ), NPROW ) NQ = NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) K = 1 JB = NB - IOFF IW = IOFF + 1 JW = IOFF + 1 * DO 10 L = 1, MN+IOFF-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce rows and columns i:i+nb-1 to bidiagonal form and return * the matrices X and Y which are needed to update the unreduced * part of the matrix. * CALL PCLABRD( M-K+1, N-K+1, JB, A, I, J, DESCA, D, E, TAUQ, $ TAUP, WORK, IW, JW, DESCWX, WORK( IPY ), IW, $ JW, DESCWY, WORK( IPW ) ) * * Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using * an update of the form A := A - V*Y' - X*U'. * CALL PCGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, A, I+JB, J, DESCA, $ WORK( IPY ), IW, JW+JB, DESCWY, ONE, A, I+JB, $ J+JB, DESCA ) CALL PCGEMM( 'No transpose', 'No transpose', M-K-JB+1, $ N-K-JB+1, JB, -ONE, WORK, IW+JB, JW, DESCWX, A, I, $ J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * * Copy last off-diagonal elements of B back into sub( A ). * IF( M.GE.N ) THEN JS = MIN( INDXG2L( I+JB-1, NB, 0, DESCA( RSRC_ ), NPROW ), $ MP ) IF( JS.GT.0 ) $ CALL PCELSET( A, I+JB-1, J+JB, DESCA, CMPLX( E( JS ) ) ) ELSE JS = MIN( INDXG2L( J+JB-1, NB, 0, DESCA( CSRC_ ), NPCOL ), $ NQ ) IF( JS.GT.0 ) $ CALL PCELSET( A, I+JB, J+JB-1, DESCA, CMPLX( E( JS ) ) ) END IF * K = K + JB JB = NB IW = 1 JW = 1 DESCWX( M_ ) = DESCWX( M_ ) - JB DESCWX( RSRC_ ) = MOD( DESCWX( RSRC_ ) + 1, NPROW ) DESCWX( CSRC_ ) = MOD( DESCWX( CSRC_ ) + 1, NPCOL ) DESCWY( N_ ) = DESCWY( N_ ) - JB DESCWY( RSRC_ ) = MOD( DESCWY( RSRC_ ) + 1, NPROW ) DESCWY( CSRC_ ) = MOD( DESCWY( CSRC_ ) + 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the remainder of the matrix. * CALL PCGEBD2( M-K+1, N-K+1, A, IA+K-1, JA+K-1, DESCA, D, E, TAUQ, $ TAUP, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEBRD * END scalapack-1.8.0/SRC/pzhettrd.f0000640000175000017500000012350310363532303015762 0ustar muammarmuammar SUBROUTINE PZHETTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION D( * ), E( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * * ======= * * PZHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding * process and memory location. * * Let A be a generic term for any 2D block cyclicly distributed * array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- ----------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, * indicating the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to * distribute the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to * distribute the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the * first row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes * of its process row. * The values of LOCp() and LOCq() may be determined via a call to * the ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1) * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension * LOCq(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal workspace * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS * Where: * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PZHETTRD', 'L', 0, 0, * 0, 0 ) * * NUMROC is a ScaLAPACK tool function; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of * elementary reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of * elementary reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and * vi denotes an element of the vector defining H(i). * * Data storage requirements * ========================= * * PZHETTRD is not intended to be called directly. All users are * encourage to call PZHETRD which will then call PZHETTRD if * appropriate. A must be in cyclic format (i.e. MB = NB = 1), * the process grid must be square ( i.e. NPROW = NPCOL ) and * only lower triangular storage is supported. * * Local variables * =============== * * PZHETTRD uses five local arrays: * WORK ( InV ) dimension ( NP, ANB+1): array V * WORK ( InH ) dimension ( NP, ANB+1): array H * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT * * Arrays V and H are replicated across all processor columns. * Arrays V^T and H^T are replicated across all processor rows. * * WORK ( InVT ), or V^T, is stored as a tall skinny * array ( NQ x ANB-1 ) for efficiency. Since only the lower * triangular portion of A is updated, Av is computed as: * tril(A) * v + v^T * tril(A,-1). This is performed as * two local triangular matrix-vector multiplications (both in * MVR2) followed by a transpose and a sum across the columns. * In the local computation, WORK( InVT ) is used to compute * tril(A) * v and WORK( InV ) is used to compute * v^T * tril(A,-1) * * The following variables are global indices into A: * INDEX: The current global row and column number. * MAXINDEX: The global row and column for the first row and * column in the trailing block of A. * LIIB, LIJB: The first row, column in * * The following variables point into the arrays A, V, H, V^T, H^T: * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T. * LII: local index I: The local row number for row INDEX * LIJ: local index J: The local column number for column INDEX * LIIP1: local index I+1: The local row number for row INDEX+1 * LIJP1: local index J+1: The local col number for col INDEX+1 * LTLI: lower triangular local index I: The local row for the * upper left entry in tril( A(INDEX, INDEX) ) * LTLIP1: lower triangular local index I+1: The local row for the * upper left entry in tril( A(INDEX+1, INDEX+1) ) * * Details: The distinction between LII and LTLI (and between * LIIP1 and LTLIP1) is subtle. Within the current processor * column (i.e. MYCOL .eq. CURCOL) they are the same. However, * on some processors, A( LII, LIJ ) points to an element * above the diagonal, on these processors, LTLI = LII+1. * * The following variables give the number of rows and/or columns * in various matrices: * NP: The number of local rows in A( 1:N, 1:N ) * NQ: The number of local columns in A( 1:N, 1:N ) * NPM0: The number of local rows in A( INDEX:N, INDEX:N ) * NQM0: The number of local columns in A( INDEX:N, INDEX:N ) * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N ) * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N ) * LTNM0: The number of local rows & columns in * tril( A( INDEX:N, INDEX:N ) ) * LTNM1: The number of local rows & columns in * tril( A( INDEX+1:N, INDEX+1:N ) ) * NOTE: LTNM0 == LTNM1 on all processors except the diagonal * processors, i.e. those where MYCOL == MYROW. * * Invariants: * NP = NPM0 + LII - 1 * NQ = NQM0 + LIJ - 1 * NP = NPM1 + LIIP1 - 1 * NQ = NQM1 + LIJP1 - 1 * NP = LTLI + LTNM0 - 1 * NP = LTLIP1 + LTNM1 - 1 * * Temporary variables. The following variables are used within * a few lines after they are set and do hold state from one loop * iteration to the next: * * The matrix A: * The matrix A does not hold the same values that it would * in an unblocked code nor the values that it would hold in * in a blocked code. * * The value of A is confusing. It is easiest to state the * difference between trueA and A at the point that MVR2 is called, * so we will start there. * * Let trueA be the value that A would * have at a given point in an unblocked code and A * be the value that A has in this code at the same point. * * At the time of the call to MVR2, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ). * * At the bottom of the inner loop, * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) and * v = V( liip1:N, BINDEX+1 ) and * h = H( liip1:N, BINDEX+1 ) * * At the top of the loop, BINDEX gets incremented, hence: * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and * V = V( MAXINDEX:N, 1:BINDEX-1 ) and * v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) * * * A gets updated at the bottom of the outer loop * After this update, trueA = A + v' * h + h' * v * where v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) and BINDEX = 0 * Indeed, the previous loop invariant as stated above for the * top of the loop still holds, but with BINDEX = 0, H and V * are null matrices. * * After the current column of A is updated, * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N ) * the rest of A is untouched. * * After the current block column of A is updated, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) * * This brings us back to the point at which mvr2 is called. * * * Details of the parallelization: * * We delay spreading v across to all processor columns (which * would naturally happen at the bottom of the loop) in order to * combine the spread of v( : , i-1 ) with the spread of h( : , i ) * * In order to compute h( :, i ), we must update A( :, i ) * which means that the processor column owning A( :, i ) must * have: c, tau, v( i, i ) and h( i, i ). * * The traditional * way of computing v (and the one used in pzlatrd.f and * zlatrd.f) is: * v = tau * v * c = v' * h * alpha = - tau * c / 2 * v = v + alpha * h * However, the traditional way of computing v requires that tau * be broadcast to all processors in the current column (to compute * v = tau * v) and then a sum-to-all is required (to * compute v' * h ). We use the following formula instead: * c = v' * h * v = tau * ( v - c * tau' * h / 2 ) * The above formula allows tau to be spread down in the * same call to DGSUM2D which performs the sum-to-all of c. * * The computation of v, which could be performed in any processor * column (or other procesor subsets), is performed in the * processor column that owns A( :, i+1 ) so that A( :, i+1 ) * can be updated prior to spreading v across. * * We keep the block column of A up-to-date to minimize the * work required in updating the current column of A. Updating * the block column of A is reasonably load balanced whereas * updating the current column of A is not (only the current * processor column is involved). * * In the following overview of the steps performed, M in the * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p)) * or more flops per processor. * * Inner loop: * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) ) *M h = house( A(index:n, index) ) *M Spread v, h across *M vt = v^T; ht = h^T * A( index+1:n, index+1:maxindex ) -= * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) ) *C v = tril(A) * h; vt = ht * tril(A,-1) *MorC v = v - H*V*h - V*H*h *M v = v + vt^T *M c = v' * h * v = tau * ( v - c * tau' * h / 2 ) *C A = A - H*V - V*H * * * * ================================================================= * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D0 ) COMPLEX*16 Z_ONE, Z_NEGONE, Z_ZERO PARAMETER ( Z_ONE = 1.0D0, Z_NEGONE = -1.0D0, $ Z_ZERO = 0.0D0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * * * .. Local Scalars .. * * LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX, $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT, $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA, $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1, $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX, $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP, $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ, $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX, $ PBMIN, PBSIZE, PNB, ROWSPERPROC DOUBLE PRECISION NORM, SAFMAX, SAFMIN COMPLEX*16 ALPHA, BETA, C, CONJTOPH, CONJTOPV, $ ONEOVERBETA, TOPH, TOPNV, TOPTAU, TOPV * .. * .. Local Arrays .. * * * * INTEGER IDUM1( 1 ), IDUM2( 1 ) DOUBLE PRECISION DTMP( 5 ) COMPLEX*16 CC( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DCOMBNRM2, DGEBR2D, $ DGEBS2D, DGSUM2D, PCHK1MAT, PDTREECOMB, $ PXERBLA, ZGEBR2D, ZGEBS2D, ZGEMM, ZGEMV, $ ZGERV2D, ZGESD2D, ZGSUM2D, ZLACPY, ZSCAL, $ ZTRMVT * .. * .. External Functions .. * LOGICAL LSAME INTEGER ICEIL, NUMROC, PJLAENV DOUBLE PRECISION DZNRM2, PDLAMCH EXTERNAL LSAME, ICEIL, NUMROC, PJLAENV, DZNRM2, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG, ICHAR, MAX, MIN, $ MOD, SIGN, SQRT * .. * * * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * * Further details * =============== * * At the top of the loop, v and nh have been computed but not * spread across. Hence, A is out-of-date even after the * rank 2k update. Furthermore, we compute the next v before * nh is spread across. * * I claim that if we used a sum-to-all on NV, by summing CC within * each column, that we could compute NV locally and could avoid * spreading V across. Bruce claims that sum-to-all can be made * to cost no more than sum-to-one on the Paragon. If that is * true, this would be a win. But, * the BLACS sum-to-all is just a sum-to-one followed by a broadcast, * and hence the present scheme is better for now. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * SAFMAX = SQRT( PDLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PDLAMCH( ICTXT, 'S' ) ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE * * Here we set execution options for PZHETTRD * PNB = PJLAENV( ICTXT, 2, 'PZHETTRD', 'L', 0, 0, 0, 0 ) ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) * INTERLEAVE = ( PJLAENV( ICTXT, 4, 'PZHETTRD', 'L', 1, 0, 0, $ 0 ).EQ.1 ) TWOGEMMS = ( PJLAENV( ICTXT, 4, 'PZHETTRD', 'L', 2, 0, 0, $ 0 ).EQ.1 ) BALANCED = ( PJLAENV( ICTXT, 4, 'PZHETTRD', 'L', 3, 0, 0, $ 0 ).EQ.1 ) * CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) * * UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 .AND. DESCA( NB_ ).NE.1 ) $ INFO = 600 + NB_ IF( INFO.EQ.0 ) THEN * * * Here is the arithmetic: * Let maxnpq = max( np, nq, 2 * ANB ) * LDV = 4 * max( np, nq ) + 2 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB ) * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS * * This overestimates memory requirements when ANB > NP/2 * Memory requirements are lower when interleave = .false. * Hence, we could have two sets of memory requirements, * one for interleave and one for * * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORK( 1 ) = DCMPLX( LWMIN ) IF( .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( NPROW.NE.NPCOL ) THEN INFO = -( 600+CTXT_ ) ELSE IF( DESCA( DTYPE_ ).NE.1 ) THEN INFO = -( 600+DTYPE_ ) ELSE IF( DESCA( MB_ ).NE.1 ) THEN INFO = -( 600+MB_ ) ELSE IF( DESCA( NB_ ).NE.1 ) THEN INFO = -( 600+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.0 ) THEN INFO = -( 600+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.0 ) THEN INFO = -( 600+CSRC_ ) ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 * CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHETTRD', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * * Reduce the lower triangle of sub( A ) NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * NXTROW = 0 NXTCOL = 0 * LIIP1 = 1 LIJP1 = 1 NPM1 = NP NQM1 = NQ * LDA = DESCA( LLD_ ) ICTXT = DESCA( CTXT_ ) * * * * Miscellaneous details: * Put tau, D and E in the right places * Check signs * Place all the arrays in WORK, control their placement * in memory. * * * * Loop invariants * A(LIIP1, LIJ) points to the first element of A(I+1,J) * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N ) * A(LII:N,LIJ:N) is one step out of date. * proc( CURROW, CURCOL ) owns A(LII,LIJ) * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ) * INH = 1 * IF( INTERLEAVE ) THEN * * H and V are interleaved to minimize memory movement * LDV has to be twice as large to accomodate interleaving. * In addition, LDV is doubled again to allow v, h and * toptau to be spreaad across and transposed in a * single communication operation with minimum memory * movement. * * We could reduce LDV back to 2*MAX(NPM1,NQM1) * by increasing the memory movement required in * the spread and transpose of v, h and toptau. * However, since the non-interleaved path already * provides a mear minimum memory requirement option, * we did not provide this additional path. * LDV = 4*( MAX( NPM1, NQM1 ) ) + 2 * INH = 1 * INV = INH + LDV / 2 INVT = INH + ( ANB+1 )*LDV * INHT = INVT + LDV / 2 INTMP = INVT + LDV*( ANB+1 ) * ELSE LDV = MAX( NPM1, NQM1 ) * INHT = INH + LDV*( ANB+1 ) INV = INHT + LDV*( ANB+1 ) * * The code works without this +1, but only because of a * coincidence. Without the +1, WORK(INVT) gets trashed, but * WORK(INVT) is only used once and when it is used, it is * multiplied by WORK( INH ) which is zero. Hence, the fact * that WORK(INVT) is trashed has no effect. * INVT = INV + LDV*( ANB+1 ) + 1 INTMP = INVT + LDV*( 2*ANB ) * END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZHETTRD', -INFO ) WORK( 1 ) = DCMPLX( LWMIN ) RETURN END IF * * * The satisfies the loop invariant: trueA = A - V * HT - H * VT, * (where V, H, VT and HT all have BINDEX+1 rows/columns) * the first ANB times through the loop. * * * * Setting either ( InH and InHT ) or InV to Z_ZERO * is adequate except in the face of NaNs. * * DO 10 I = 1, NP WORK( INH+I-1 ) = Z_ZERO WORK( INV+I-1 ) = Z_ZERO 10 CONTINUE DO 20 I = 1, NQ WORK( INHT+I-1 ) = Z_ZERO 20 CONTINUE * * * TOPNV = Z_ZERO * LTLIP1 = LIJP1 LTNM1 = NPM1 IF( MYCOL.GT.MYROW ) THEN LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * DO 210 MININDEX = 1, N - 1, ANB * * MAXINDEX = MIN( MININDEX+ANB-1, N ) LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1 * NQB = NQ - LIJB + 1 NPB = NP - LIIB + 1 INHTB = INHT + LIJB - 1 INVTB = INVT + LIJB - 1 INHB = INH + LIIB - 1 INVB = INV + LIIB - 1 * * * * DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 ) * BINDEX = INDEX - MININDEX * CURROW = NXTROW CURCOL = NXTCOL * NXTROW = MOD( CURROW+1, NPROW ) NXTCOL = MOD( CURCOL+1, NPCOL ) * LII = LIIP1 LIJ = LIJP1 NPM0 = NPM1 * IF( MYROW.EQ.CURROW ) THEN NPM1 = NPM1 - 1 LIIP1 = LIIP1 + 1 END IF IF( MYCOL.EQ.CURCOL ) THEN NQM1 = NQM1 - 1 LIJP1 = LIJP1 + 1 LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * * * * V = NV, VT = NVT, H = NH, HT = NHT * * * Update the current column of A * * IF( MYCOL.EQ.CURCOL ) THEN * INDEXA = LII + ( LIJ-1 )*LDA INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV CONJTOPH = DCONJG( WORK( INHT+LIJ-1+BINDEX*LDV ) ) CONJTOPV = DCONJG( TOPNV ) * IF( INDEX.GT.1 ) THEN DO 30 I = 0, NPM0 - 1 * A( INDEXA+I ) = A( INDEXA+I ) A( INDEXA+I ) = A( INDEXA+I ) - $ WORK( INDEXINV+LDV+I )*CONJTOPH - $ WORK( INDEXINH+LDV+I )*CONJTOPV 30 CONTINUE END IF * * END IF * * IF( MYCOL.EQ.CURCOL ) THEN * * Compute the householder vector * IF( MYROW.EQ.CURROW ) THEN DTMP( 2 ) = DBLE( A( LII+( LIJ-1 )*LDA ) ) ELSE DTMP( 2 ) = ZERO END IF IF( MYROW.EQ.NXTROW ) THEN DTMP( 3 ) = DBLE( A( LIIP1+( LIJ-1 )*LDA ) ) DTMP( 4 ) = DIMAG( A( LIIP1+( LIJ-1 )*LDA ) ) ELSE DTMP( 3 ) = ZERO DTMP( 4 ) = ZERO END IF * NORM = DZNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 ) DTMP( 1 ) = NORM * * IF DTMP(5) = 1.0, NORM is too large and might cause * overflow, hence PDTREECOMB must be called. IF DTMP(5) * is zero on output, DTMP(1) can be trusted. * DTMP( 5 ) = ZERO IF( DTMP( 1 ).GE.SAFMAX .OR. DTMP( 1 ).LT.SAFMIN ) THEN DTMP( 5 ) = ONE DTMP( 1 ) = ZERO END IF * DTMP( 1 ) = DTMP( 1 )*DTMP( 1 ) CALL DGSUM2D( ICTXT, 'C', ' ', 5, 1, DTMP, 5, -1, $ CURCOL ) IF( DTMP( 5 ).EQ.ZERO ) THEN DTMP( 1 ) = SQRT( DTMP( 1 ) ) ELSE DTMP( 1 ) = NORM CALL PDTREECOMB( ICTXT, 'C', 1, DTMP, -1, MYCOL, $ DCOMBNRM2 ) END IF * NORM = DTMP( 1 ) * D( LIJ ) = DTMP( 2 ) IF( MYROW.EQ.CURROW .AND. MYCOL.EQ.CURCOL ) THEN A( LII+( LIJ-1 )*LDA ) = DCMPLX( D( LIJ ), ZERO ) END IF * * ALPHA = DCMPLX( DTMP( 3 ), DTMP( 4 ) ) * NORM = SIGN( NORM, DBLE( ALPHA ) ) * IF( NORM.EQ.ZERO ) THEN TOPTAU = ZERO ELSE BETA = NORM + ALPHA TOPTAU = BETA / NORM ONEOVERBETA = 1.0D0 / BETA * CALL ZSCAL( NPM1, ONEOVERBETA, $ A( LIIP1+( LIJ-1 )*LDA ), 1 ) END IF * IF( MYROW.EQ.NXTROW ) THEN A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE END IF * TAU( LIJ ) = TOPTAU E( LIJ ) = -NORM * END IF * * * Spread v, nh, toptau across * DO 40 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+ $ ( LIJ-1 )*LDA ) 40 CONTINUE * IF( MYCOL.EQ.CURCOL ) THEN WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU CALL ZGEBS2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1 ) ELSE CALL ZGEBR2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1, MYROW, CURCOL ) TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) END IF DO 50 I = 0, NPM1 - 1 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+BINDEX*LDV+NPM1+I ) 50 CONTINUE * IF( INDEX.LT.N ) THEN IF( MYROW.EQ.NXTROW .AND. MYCOL.EQ.CURCOL ) $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ ) END IF * * Transpose v, nh * * IF( MYROW.EQ.MYCOL ) THEN DO 60 I = 0, NPM1 + NPM1 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+ $ BINDEX*LDV+I ) 60 CONTINUE ELSE CALL ZGESD2D( ICTXT, NPM1+NPM1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1, $ MYCOL, MYROW ) CALL ZGERV2D( ICTXT, NQM1+NQM1, 1, $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1, $ MYCOL, MYROW ) END IF * DO 70 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+ $ LIJP1-1+BINDEX*LDV+NQM1+I ) 70 CONTINUE * * * Update the current block column of A * IF( INDEX.GT.1 ) THEN DO 90 J = LIJP1, LIJB - 1 DO 80 I = 0, NPM1 - 1 * A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA ) $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )* $ DCONJG( WORK( INHT+J-1+BINDEX*LDV ) ) - $ WORK( INH+LIIP1-1+BINDEX*LDV+I )* $ DCONJG( WORK( INVT+J-1+BINDEX*LDV ) ) 80 CONTINUE 90 CONTINUE END IF * * * * Compute NV = A * NHT; NVT = A * NH * * These two lines are necessary because these elements * are not always involved in the calls to ZTRMVT * for two reasons: * 1) On diagonal processors, the call to TRMVT * involves only LTNM1-1 elements * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1 * and when the results are combined across all processes, * uninitialized values may be included. WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO * * IF( MYROW.EQ.MYCOL ) THEN IF( LTNM1.GT.1 ) THEN CALL ZTRMVT( 'L', LTNM1-1, $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1, $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ), $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+ $ 1 )*LDV ), 1 ) END IF DO 100 I = 1, LTNM1 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) + $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )* $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV ) 100 CONTINUE ELSE IF( LTNM1.GT.0 ) $ CALL ZTRMVT( 'L', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ), $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+ $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+ $ ( BINDEX+1 )*LDV ), 1, $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ), $ 1 ) * END IF * * * We take advantage of the fact that: * A * sum( B ) = sum ( A * B ) for matrices A,B * * trueA = A + V * HT + H * VT * hence: (trueA)v = Av' + V * HT * v + H * VT * v * VT * v = sum_p_in_NPROW ( VTp * v ) * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v ) * * v = v + V * HT * h + H * VT * h * * * * tmp = HT * nh1 DO 110 I = 1, 2*( BINDEX+1 ) WORK( INTMP-1+I ) = 0 110 CONTINUE * IF( BALANCED ) THEN NPSET = NPROW MYSETNUM = MYROW ROWSPERPROC = ICEIL( NQB, NPSET ) MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 ) * * * tmp = HT * v * CALL ZGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INHTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL ZGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INVTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * * CALL ZGSUM2D( ICTXT, 'C', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) ELSE * tmp = HT * v * CALL ZGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INHTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL ZGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INVTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * END IF * * * IF( BALANCED ) THEN MYSETNUM = MYCOL * ROWSPERPROC = ICEIL( NPB, NPSET ) MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 ) * CALL ZGSUM2D( ICTXT, 'R', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) * * * v = v + V * tmp IF( INDEX.GT.1. ) THEN CALL ZGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INVB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) * * v = v + H * tmp2 CALL ZGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INHB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) END IF * ELSE * v = v + V * tmp CALL ZGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ), $ LDV, WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * * * v = v + H * tmp2 CALL ZGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ), $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * END IF * * * Transpose NV and add it back into NVT * IF( MYROW.EQ.MYCOL ) THEN DO 120 I = 0, NQM1 - 1 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+ $ I ) 120 CONTINUE ELSE CALL ZGESD2D( ICTXT, NQM1, 1, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), $ NQM1, MYCOL, MYROW ) CALL ZGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL, $ MYROW ) * END IF DO 130 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I ) 130 CONTINUE * * Sum-to-one NV rowwise (within a row) * CALL ZGSUM2D( ICTXT, 'R', ' ', NPM1, 1, $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1, $ MYROW, NXTCOL ) * * * Dot product c = NV * NH * Sum-to-all c within next processor column * * IF( MYCOL.EQ.NXTCOL ) THEN CC( 1 ) = Z_ZERO DO 140 I = 0, NPM1 - 1 CC( 1 ) = CC( 1 ) + DCONJG( WORK( INV+LIIP1-1+ $ ( BINDEX+1 )*LDV+I ) )* $ WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) 140 CONTINUE IF( MYROW.EQ.NXTROW ) THEN CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV ) ELSE CC( 2 ) = Z_ZERO CC( 3 ) = Z_ZERO END IF CALL ZGSUM2D( ICTXT, 'C', ' ', 3, 1, CC, 3, -1, NXTCOL ) * TOPV = CC( 2 ) C = CC( 1 ) TOPH = CC( 3 ) * TOPNV = TOPTAU*( TOPV-C*DCONJG( TOPTAU ) / 2*TOPH ) * * * Compute V = Tau * (V - C * Tau' / 2 * H ) * * DO 150 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU* $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C* $ DCONJG( TOPTAU ) / 2*WORK( INH+LIIP1-1+( BINDEX+ $ 1 )*LDV+I ) ) 150 CONTINUE * END IF * * 160 CONTINUE * * * Perform the rank2k update * IF( MAXINDEX.LT.N ) THEN * DO 170 I = 0, NPM1 - 1 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I ) 170 CONTINUE * * * IF( .NOT.TWOGEMMS ) THEN IF( INTERLEAVE ) THEN LDZG = LDV / 2 ELSE CALL ZLACPY( 'A', LTNM1, ANB, WORK( INHT+LIJP1-1 ), $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV ) * CALL ZLACPY( 'A', LTNM1, ANB, WORK( INV+LTLIP1-1 ), $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV ) LDZG = LDV END IF NBZG = ANB*2 ELSE LDZG = LDV NBZG = ANB END IF * * DO 180 PBMIN = 1, LTNM1, PNB * PBSIZE = MIN( PNB, LTNM1-PBMIN+1 ) PBMAX = MIN( LTNM1, PBMIN+PNB-1 ) CALL ZGEMM( 'N', 'C', PBSIZE, PBMAX, NBZG, Z_NEGONE, $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) IF( TWOGEMMS ) THEN CALL ZGEMM( 'N', 'C', PBSIZE, PBMAX, ANB, Z_NEGONE, $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) END IF 180 CONTINUE * * * DO 190 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I ) WORK( INH+LIIP1-1+I ) = WORK( INTMP+I ) 190 CONTINUE DO 200 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I ) 200 CONTINUE * * END IF * * End of the update A code * 210 CONTINUE * IF( MYCOL.EQ.NXTCOL ) THEN IF( MYROW.EQ.NXTROW ) THEN * D( NQ ) = DBLE( A( NP+( NQ-1 )*LDA ) ) A( NP+( NQ-1 )*LDA ) = D( NQ ) * CALL DGEBS2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1 ) ELSE CALL DGEBR2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1, NXTROW, $ NXTCOL ) END IF END IF * * * * WORK( 1 ) = DCMPLX( LWMIN ) RETURN * * End of PZHETTRD * * END scalapack-1.8.0/SRC/pzlanhs.f0000640000175000017500000006252510363532303015603 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLANHS returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * Hessenberg distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PZLANHS returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PZLANHS as described * above. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PZLANHS is set to zero. N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, INXTROW, $ IOFFA, IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DCOMBSSQ, DGEBR2D, $ DGEBS2D, DGAMX2D, DGSUM2D, $ INFOG2L, PDTREECOMB, ZLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL LSAME, ICEIL, IDAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * VALUE = ZERO * * Find max(abs(A(i,j))). * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 20 LL = JJ, JJ+JB-1 DO 10 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 40 LL = JJ, JJ+JB-1 DO 30 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 50 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 70 LL = JJ, JJ + JB -1 DO 60 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 60 CONTINUE IOFFA = IOFFA + LDA 70 CONTINUE ELSE DO 90 LL = JJ, JJ+JB-1 DO 80 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 140 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 110 LL = JJ, JJ + JB -1 DO 100 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE ELSE DO 130 LL = JJ, JJ + JB -1 DO 120 KK = IIA, MIN( II-1, IIA+NP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ VALUE = MAX( VALUE, $ ABS( A( II+(JJ+JB-2)*LDA ) ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 140 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 160 LL = JJ, JJ+JB-1 SUM = ZERO DO 150 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 150 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 160 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 190 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 180 LL = JJ, JJ+JB-1 SUM = ZERO DO 170 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 170 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 180 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 190 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 210 LL = JJ, JJ + JB -1 SUM = ZERO DO 200 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 200 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 SUM = ZERO DO 220 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 220 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 230 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 280 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 250 LL = JJ, JJ + JB -1 SUM = ZERO DO 240 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 240 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 250 CONTINUE ELSE DO 270 LL = JJ, JJ + JB -1 SUM = ZERO DO 260 KK = IIA, MIN( II-1, IIA+NP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 260 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 270 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( JJ+JB-JJA ) = WORK( JJ+JB-JJA ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 280 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( IDAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * DO 290 KK = IIA, IIA+NP-1 WORK( KK ) = ZERO 290 CONTINUE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 310 LL = JJ, JJ+JB-1 DO 300 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 300 CONTINUE IOFFA = IOFFA + LDA 310 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 340 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 330 LL = JJ, JJ+JB-1 DO 320 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 320 CONTINUE IOFFA = IOFFA + LDA 330 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 340 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 360 LL = JJ, JJ + JB -1 DO 350 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA 360 CONTINUE ELSE DO 380 LL = JJ, JJ + JB -1 DO 370 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA 380 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 430 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 400 LL = JJ, JJ + JB -1 DO 390 KK = IIA, MIN( II+LL-JJ+1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 390 CONTINUE IOFFA = IOFFA + LDA 400 CONTINUE ELSE DO 420 LL = JJ, JJ + JB -1 DO 410 KK = IIA, MIN( II-1, IIA+NP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS(A(IOFFA+KK)) 410 CONTINUE IOFFA = IOFFA + LDA 420 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ WORK( II-IIA+1 ) = WORK( II-IIA+1 ) + $ ABS( A( II+(JJ+JB-2)*LDA ) ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 430 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL DGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK, MAX( 1, NP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( NP.GT.0 ) THEN VALUE = WORK( IDAMAX( NP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * SCALE = ZERO SUM = ONE II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Only one process row * IF( NPROW.EQ.1 ) THEN * * Handle first block of columns separately * IF( MYCOL.EQ.IACOL ) THEN DO 440 LL = JJ, JJ+JB-1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 440 CONTINUE JJ = JJ + JB END IF * IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 460 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN DO 450 LL = JJ, JJ+JB-1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 450 CONTINUE JJ = JJ + JB END IF * II = II + JB IACOL = MOD( IACOL+1, NPCOL ) * 460 CONTINUE * ELSE * * Handle first block of columns separately * INXTROW = MOD( IAROW+1, NPROW ) IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 470 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 470 CONTINUE ELSE DO 480 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 480 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL ZLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 510 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 490 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II+LL-JJ+1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 490 CONTINUE ELSE DO 500 LL = JJ, JJ + JB -1 CALL ZLASSQ( MIN( II-1, IIA+NP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 500 CONTINUE IF( MYROW.EQ.INXTROW .AND. II.LE.IIA+NP-1 ) $ CALL ZLASSQ( 1, A( II+(JJ+JB-2)*LDA ), 1, $ SCALE, SUM ) END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = INXTROW IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 510 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PZLANHS = VALUE * RETURN * * End of PZLANHS * END scalapack-1.8.0/SRC/pdsyevx.f0000640000175000017500000011506310377154001015622 0ustar muammarmuammar SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, $ N, NZ DOUBLE PRECISION ABSTOL, ORFAC, VL, VU * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ), $ IFAIL( * ), IWORK( * ) DOUBLE PRECISION A( * ), GAP( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PDSYEVX computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. Eigenvalues/vectors can be selected by * specifying a range of values or a range of indices for the desired * eigenvalues. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * PDSYEVX assumes IEEE 754 standard compliant arithmetic. To port * to a system which does not have IEEE 754 arithmetic, modify * the appropriate SLmake.inc file to include the compiler switch * -DNO_IEEE. This switch only affects the compilation of pdlaiect.c. * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * RANGE (global input) CHARACTER*1 * = 'A': all eigenvalues will be found. * = 'V': all eigenvalues in the interval [VL,VU] will be found. * = 'I': the IL-th through IU-th eigenvalues will be found. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PDSYEVX cannot guarantee * correct error reporting. * * VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * * IL (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * smallest eigenvalue to be returned. IL >= 1. * Not referenced if RANGE = 'A' or 'V'. * * IU (global input) INTEGER * If RANGE='I', the index (from smallest to largest) of the * largest eigenvalue to be returned. min(IL,N) <= IU <= N. * Not referenced if RANGE = 'A' or 'V'. * * ABSTOL (global input) DOUBLE PRECISION * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields * the most orthogonal eigenvectors. * * The absolute error tolerance for the eigenvalues. * An approximate eigenvalue is accepted as converged * when it is determined to lie in an interval [a,b] * of width less than or equal to * * ABSTOL + EPS * max( |a|,|b| ) , * * where EPS is the machine precision. If ABSTOL is less than * or equal to zero, then EPS*norm(T) will be used in its place, * where norm(T) is the 1-norm of the tridiagonal matrix * obtained by reducing A to tridiagonal form. * * Eigenvalues will be computed most accurately when ABSTOL is * set to twice the underflow threshold 2*PDLAMCH('S') not zero. * If this routine returns with ((MOD(INFO,2).NE.0) .OR. * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or * eigenvectors did not converge, try setting ABSTOL to * 2*PDLAMCH('S'). * * See "Computing Small Singular Values of Bidiagonal Matrices * with Guaranteed High Relative Accuracy," by Demmel and * Kahan, LAPACK Working Note #3. * * See "On the correctness of Parallel Bisection in Floating * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 * * M (global output) INTEGER * Total number of eigenvalues found. 0 <= M <= N. * * NZ (global output) INTEGER * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. * If JOBZ .EQ. 'V', NZ = M unless the user supplies * insufficient space and PDSYEVX is not able to detect this * before beginning computation. To get all the eigenvectors * requested, the user must supply both sufficient * space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) * and sufficient workspace to compute them. (See LWORK below.) * PDSYEVX is always able to detect insufficient space without * computation unless RANGE .EQ. 'V'. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * ORFAC (global input) DOUBLE PRECISION * Specifies which eigenvectors should be reorthogonalized. * Eigenvectors that correspond to eigenvalues which are within * tol=ORFAC*norm(A) of each other are to be reorthogonalized. * However, if the workspace is insufficient (see LWORK), * tol may be decreased until all eigenvectors to be * reorthogonalized can be stored in one process. * No reorthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. If an eigenvector * fails to converge, then that column of Z contains the latest * approximation to the eigenvector, and the index of the * eigenvector is returned in IFAIL. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) DOUBLE PRECISION array, * dimension max(3,LWORK) * On return, WORK(1) contains the optimal amount of * workspace required for efficient execution. * if JOBZ='N' WORK(1) = optimal amount of workspace * required to compute eigenvalues efficiently * if JOBZ='V' WORK(1) = optimal amount of workspace * required to compute eigenvalues and eigenvectors * efficiently with no guarantee on orthogonality. * If RANGE='V', it is assumed that all eigenvectors * may be required. * * LWORK (local input) INTEGER * Size of WORK * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + * ICEIL( NEIG, NPROW*NPCOL)*NN * * The computed eigenvectors may not be orthogonal if the * minimal workspace is supplied and ORFAC is too small. * If you want to guarantee orthogonality (at the cost * of potentially poor performance) you should add * the following to LWORK: * (CLUSTERSIZE-1)*N * where CLUSTERSIZE is the number of eigenvalues in the * largest cluster, where a cluster is defined as a set of * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | * W(J+1) <= W(J) + ORFAC*2*norm(A) } * Variable definitions: * NEIG = number of eigenvectors requested * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) * ICEIL( X, Y ) is a ScaLAPACK function returning * ceiling(X/Y) * * When LWORK is too small: * If LWORK is too small to guarantee orthogonality, * PDSYEVX attempts to maintain orthogonality in * the clusters with the smallest * spacing between the eigenvalues. * If LWORK is too small to compute all the eigenvectors * requested, no computation is performed and INFO=-23 * is returned. Note that when RANGE='V', PDSYEVX does * not know how many eigenvectors are requested until * the eigenvalues are computed. Therefore, when RANGE='V' * and as long as LWORK is large enough to allow PDSYEVX to * compute the eigenvalues, PDSYEVX will compute the * eigenvalues and as many eigenvectors as it can. * * Relationship between workspace, orthogonality & performance: * Greater performance can be achieved if adequate workspace * is provided. On the other hand, in some situations, * performance can decrease as the workspace provided * increases above the workspace amount shown below: * * For optimal performance, greater workspace may be * needed, i.e. * LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT ) * Where: * LWORK, as defined previously, depends upon the number * of eigenvectors requested, and * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + * ( NPS + 3 ) * NPS * * ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L', * 0, 0, 0, 0) * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) * * NUMROC is a ScaLAPACK tool functions; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by * calling the subroutine BLACS_GRIDINFO. * * For large N, no extra workspace is needed, however the * biggest boost in performance comes for small N, so it * is wise to provide the extra workspace (typically less * than a Megabyte per process). * * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing * enough space to compute all the eigenvectors * orthogonally will cause serious degradation in * performance. In the limit (i.e. CLUSTERSIZE = N-1) * PDSTEIN will perform no better than DSTEIN on 1 * processor. * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing * all eigenvectors will increase the total execution time * by a factor of 2 or more. * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will * grow as the square of the cluster size, all other factors * remaining equal and assuming enough workspace. Less * workspace means less reorthogonalization but faster * execution. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the size * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace * required. * * LIWORK (local input) INTEGER * size of IWORK * LIWORK >= 6 * NNP * Where: * NNP = MAX( N, NPROW*NPCOL + 1, 4 ) * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) INTEGER array, dimension (N) * If JOBZ = 'V', then on normal exit, the first M elements of * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then * IFAIL contains the * indices of the eigenvectors that failed to converge. * If JOBZ = 'N', then IFAIL is not referenced. * * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) * This array contains indices of eigenvectors corresponding to * a cluster of eigenvalues that could not be reorthogonalized * due to insufficient workspace (see LWORK, ORFAC and INFO). * Eigenvectors corresponding to clusters of eigenvalues indexed * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be * reorthogonalized due to lack of workspace. Hence the * eigenvectors corresponding to these clusters may not be * orthogonal. ICLUSTR() is a zero terminated array. * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if * K is the number of clusters * ICLUSTR is not referenced if JOBZ = 'N' * * GAP (global output) DOUBLE PRECISION array, * dimension (NPROW*NPCOL) * This array contains the gap between eigenvalues whose * eigenvectors could not be reorthogonalized. The output * values in this array correspond to the clusters indicated * by the array ICLUSTR. As a result, the dot product between * eigenvectors correspoding to the I^th cluster may be as high * as ( C * n ) / GAP(I) where C is a small constant. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors * failed to converge. Their indices are stored * in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding * to one or more clusters of eigenvalues could not be * reorthogonalized because of insufficient workspace. * The indices of the clusters are stored in the array * ICLUSTR. * if (MOD(INFO/4,2).NE.0), then space limit prevented * PDSYEVX from computing all of the eigenvectors * between VL and VU. The number of eigenvectors * computed is returned in NZ. * if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to compute * eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) * Send e-mail to scalapack@cs.utk.edu * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Differences between PDSYEVX and DSYEVX * ====================================== * * A, LDA -> A, IA, JA, DESCA * Z, LDZ -> Z, IZ, JZ, DESCZ * WORKSPACE needs are larger for PDSYEVX. * LIWORK parameter added * * ORFAC, ICLUSTER() and GAP() parameters added * meaning of INFO is changed * * Functional differences: * PDSYEVX does not promise orthogonality for eigenvectors associated * with tighly clustered eigenvalues. * PDSYEVX does not reorthogonalize eigenvectors * that are on different processes. The extent of reorthogonalization * is controlled by the input parameter LWORK. * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE, TEN, FIVE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0, $ FIVE = 5.0D+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8 ) * .. * .. Local Scalars .. LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN, $ VALEIG, WANTZ CHARACTER ORDER INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO, $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP, $ INDTAU, INDWORK, IROFFA, IROFFZ, ISCALE, $ ISIZESTEBZ, ISIZESTEIN, IZROW, LALLWORK, $ LIWMIN, LLWORK, LWMIN, LWOPT, MAXEIGS, MB_A, $ MQ0, MYCOL, MYROW, NB, NB_A, NEIG, NN, NNP, $ NP0, NPCOL, NPROCS, NPROW, NPS, NSPLIT, $ NSYTRD_LWOPT, NZZ, OFFSET, RSRC_A, RSRC_Z, $ SIZEORMTR, SIZESTEIN, SIZESYEVX, SQNPC DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, $ SIGMA, SMLNUM, VLL, VUU * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV DOUBLE PRECISION PDLAMCH, PDLANSY EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, $ PDLAMCH, PDLANSY * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DLASRT, DSCAL, IGAMN2D, PCHK1MAT, PCHK2MAT, $ PDELGET, PDLARED1D, PDLASCL, PDORMTR, PDSTEBZ, $ PDSTEIN, PDSYNTRD, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 800+CTXT_ ) ELSE IF( WANTZ ) THEN IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) EPS = PDLAMCH( ICTXT, 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL LOWER = LSAME( UPLO, 'L' ) ALLEIG = LSAME( RANGE, 'A' ) VALEIG = LSAME( RANGE, 'V' ) INDEIG = LSAME( RANGE, 'I' ) * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDWORK = INDE2 + N LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the IWORK array * ISIZESTEIN = 3*N + NPROCS + 1 ISIZESTEBZ = MAX( 4*N, 14, NPROCS ) INDIBL = ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 INDISP = INDIBL + N * * Compute the total amount of space needed * LQUERY = .FALSE. IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) $ LQUERY = .TRUE. * NNP = MAX( N, NPROCS+1, 4 ) LIWMIN = 6*NNP * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A NN = MAX( N, NB, 2 ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) NP0 = NUMROC( N+IROFFA, NB, 0, 0, NPROW ) MQ0 = NUMROC( N+ICOFFA, NB, 0, 0, NPCOL ) IF( WANTZ ) THEN RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) $ THEN LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) ) IF( WANTZ ) THEN MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) ELSE LWOPT = LWMIN END IF NEIG = 0 ELSE IF( ALLEIG .OR. VALEIG ) THEN NEIG = N ELSE IF( INDEIG ) THEN NEIG = IU - IL + 1 END IF MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + $ ICEIL( NEIG, NPROW*NPCOL )*NN LWOPT = LWMIN * END IF * * Compute how much workspace is needed to use the * new TRD code * ANB = PJLAENV( ICTXT, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 ) SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) ) NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS LWOPT = MAX( LWOPT, 5*N+NSYTRD_LWOPT ) * END IF IF( INFO.EQ.0 ) THEN IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN WORK( 1 ) = ABSTOL IF( VALEIG ) THEN WORK( 2 ) = VL WORK( 3 ) = VU ELSE WORK( 2 ) = ZERO WORK( 3 ) = ZERO END IF CALL DGEBS2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3 ) ELSE CALL DGEBR2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3, 0, 0 ) END IF IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN INFO = -2 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -3 ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN INFO = -10 ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) $ THEN INFO = -11 ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) $ THEN INFO = -12 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -23 ELSE IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN INFO = -25 ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* $ ABS( VL ) ) ) THEN INFO = -9 ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* $ ABS( VU ) ) ) THEN INFO = -10 ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) $ THEN INFO = -13 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -19 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -19 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 2100+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 2100+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 2100+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 2100+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 2100+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN INFO = -( 2100+CSRC_ ) ELSE IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN INFO = -( 2100+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( ALLEIG ) THEN IDUM1( 3 ) = ICHAR( 'A' ) ELSE IF( INDEIG ) THEN IDUM1( 3 ) = ICHAR( 'I' ) ELSE IDUM1( 3 ) = ICHAR( 'V' ) END IF IDUM2( 3 ) = 3 IF( LQUERY ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 4 IF( WANTZ ) THEN CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, $ JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 4, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 ICLUSTR( 1 ) = 0 END IF M = 0 WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN RETURN END IF * * Scale matrix to allowable range, if necessary. * ABSTLL = ABSTOL ISCALE = 0 IF( VALEIG ) THEN VLL = VL VUU = VU ELSE VLL = ZERO VUU = ZERO END IF * ANRM = PDLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ANRM = ANRM*SIGMA ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM ANRM = ANRM*SIGMA END IF * IF( ISCALE.EQ.1 ) THEN CALL PDLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) IF( ABSTOL.GT.0 ) $ ABSTLL = ABSTOL*SIGMA IF( VALEIG ) THEN VLL = VL*SIGMA VUU = VU*SIGMA IF( VUU.EQ.VLL ) THEN VUU = VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) END IF END IF END IF * * Call PDSYNTRD to reduce symmetric matrix to tridiagonal form. * LALLWORK = LLWORK * CALL PDSYNTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * * Copy the values of D, E to all processes * * Here PxLARED1D is used to redistribute the tridiagonal matrix. * PxLARED1D, however, doesn't yet work with arbritary matrix * distributions so we have PxELGET as a backup. * OFFSET = 0 IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) $ THEN CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), $ WORK( INDWORK ), LLWORK ) * CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDE ), WORK( INDE2 ), $ WORK( INDWORK ), LLWORK ) IF( .NOT.LOWER ) $ OFFSET = 1 ELSE DO 10 I = 1, N CALL PDELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, $ I+JA-1, DESCA ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, $ I+JA, DESCA ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, $ I+JA-1, DESCA ) 30 CONTINUE END IF END IF * * Call PDSTEBZ and, if eigenvectors are desired, PDSTEIN. * IF( WANTZ ) THEN ORDER = 'B' ELSE ORDER = 'E' END IF * CALL PDSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, $ WORK( INDD2 ), WORK( INDE2+OFFSET ), M, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWORK ), $ LLWORK, IWORK( 1 ), ISIZESTEBZ, IINFO ) * * * IF PDSTEBZ fails, the error propogates to INFO, but * we do not propogate the eigenvalue(s) which failed because: * 1) This should never happen if the user specifies * ABSTOL = 2 * PDLAMCH( 'U' ) * 2) PDSTEIN will confirm/deny whether the eigenvalues are * close enough. * IF( IINFO.NE.0 ) THEN INFO = INFO + IERREBZ DO 40 I = 1, M IWORK( INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) 40 CONTINUE END IF IF( WANTZ ) THEN * IF( VALEIG ) THEN * * Compute the maximum number of eigenvalues that we can * compute in the * workspace that we have, and that we can store in Z. * * Loop through the possibilities looking for the largest * NZ that we can feed to PDSTEIN and PDORMTR * * Since all processes must end up with the same value * of NZ, we first compute the minimum of LALLWORK * CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LALLWORK, 1, 1, 1, -1, $ -1, -1 ) * MAXEIGS = DESCZ( N_ ) * DO 50 NZ = MIN( MAXEIGS, M ), 0, -1 MQ0 = NUMROC( NZ, NB, 0, 0, NPCOL ) SIZESTEIN = ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) SIZEORMTR = MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + $ NB*NB * SIZESYEVX = MAX( SIZESTEIN, SIZEORMTR ) IF( SIZESYEVX.LE.LALLWORK ) $ GO TO 60 50 CONTINUE 60 CONTINUE ELSE NZ = M END IF NZ = MAX( NZ, 0 ) IF( NZ.NE.M ) THEN INFO = INFO + IERRSPC * DO 70 I = 1, M IFAIL( I ) = 0 70 CONTINUE * * The following code handles a rare special case * - NZ .NE. M means that we don't have enough room to store * all the vectors. * - NSPLIT .GT. 1 means that the matrix split * In this case, we cannot simply take the first NZ eigenvalues * because PDSTEBZ sorts the eigenvalues by block when * a split occurs. So, we have to make another call to * PDSTEBZ with a new upper limit - VUU. * IF( NSPLIT.GT.1 ) THEN CALL DLASRT( 'I', M, W, IINFO ) NZZ = 0 IF( NZ.GT.0 ) THEN * VUU = W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) IF( VLL.GE.VUU ) THEN NZZ = 0 ELSE CALL PDSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, $ IU, ABSTLL, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), NZZ, NSPLIT, W, $ IWORK( INDIBL ), IWORK( INDISP ), $ WORK( INDWORK ), LLWORK, IWORK( 1 ), $ ISIZESTEBZ, IINFO ) END IF * IF( MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN IF( NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN INFO = INFO + IERREBZ END IF END IF END IF NZ = MIN( NZ, NZZ ) * END IF END IF CALL PDSTEIN( N, WORK( INDD2 ), WORK( INDE2+OFFSET ), NZ, W, $ IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, $ JZ, DESCZ, WORK( INDWORK ), LALLWORK, IWORK( 1 ), $ ISIZESTEIN, IFAIL, ICLUSTR, GAP, IINFO ) * IF( IINFO.GE.NZ+1 ) $ INFO = INFO + IERRCLS IF( MOD( IINFO, NZ+1 ).NE.0 ) $ INFO = INFO + IERREIN * * Z = Q * Z * * IF( NZ.GT.0 ) THEN CALL PDORMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) END IF * END IF * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL DSCAL( M, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN * RETURN * * End of PDSYEVX * END scalapack-1.8.0/SRC/pcmax1.f0000640000175000017500000003330710363532303015311 0ustar muammarmuammar SUBROUTINE PCMAX1( N, AMAX, INDX, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER INDX, INCX, IX, JX, N COMPLEX AMAX * .. * .. Array Arguments .. INTEGER DESCX( * ) COMPLEX X( * ) * .. * * Purpose * ======= * * PCMAX1 computes the global index of the maximum element in absolute * value of a distributed vector sub( X ). The global index is returned * in INDX and the value is returned in AMAX, * * where sub( X ) denotes X(IX:IX+N-1,JX) if INCX = 1, * X(IX,JX:JX+N-1) if INCX = M_X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * When the result of a vector-oriented PBLAS call is a scalar, it will * be made available only within the scope which owns the vector(s) * being operated on. Let X be a generic term for the input vector(s). * Then, the processes which receive the answer will be (note that if * an operation involves more than one vector, the processes which re- * ceive the result will be the union of the following calculation for * each vector): * * If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process * row or process column owns the vector operand, therefore only the * process of coordinate {RSRC_X, CSRC_X} receives the result; * * If INCX = M_X, then sub( X ) is a vector distributed over a process * row. Each process part of this row receives the result; * * If INCX = 1, then sub( X ) is a vector distributed over a process * column. Each process part of this column receives the result; * * Based on PCAMAX from Level 1 PBLAS. The change is to use the * 'genuine' absolute value. * * The serial version was contributed to LAPACK by Nick Higham for use * with CLACON. * * Arguments * ========= * * N (global input) pointer to INTEGER * The number of components of the distributed vector sub( X ). * N >= 0. * * AMAX (global output) pointer to REAL * The absolute value of the largest entry of the distributed * vector sub( X ) only in the scope of sub( X ). * * INDX (global output) pointer to INTEGER * The global index of the element of the distributed vector * sub( X ) whose real part has maximum absolute value. * * X (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ZERO PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. CHARACTER CBTOP, CCTOP, RBTOP, RCTOP INTEGER ICOFF, ICTXT, IDUMM, IIX, IROFF, IXCOL, IXROW, $ JJX, LCINDX, LDX, MAXPOS, MYCOL, MYROW, NP, $ NPCOL, NPROW, NQ * .. * .. Local Arrays .. COMPLEX WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOMBAMAX1, CGAMX2D, $ IGEBR2D, IGEBS2D, INFOG2L, PCTREECOMB, $ PB_TOPGET * .. * .. External Functions .. LOGICAL LSAME INTEGER ICMAX1, INDXL2G, NUMROC EXTERNAL ICMAX1, INDXL2G, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, MOD, NINT, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Quick return if possible. * INDX = 0 AMAX = ZERO IF( N.LE.0 ) $ RETURN * * Retrieve local information for vector X. * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, $ IXROW, IXCOL ) * IF( INCX.EQ.1 .AND. DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN INDX = JX AMAX = X( IIX+(JJX-1)*LDX ) RETURN END IF * * Find the maximum value and its index * IF( INCX.EQ.DESCX( M_ ) ) THEN * IF( MYROW.EQ.IXROW ) THEN * ICOFF = MOD( JX-1, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) $ NQ = NQ-ICOFF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', RBTOP ) * IF( LSAME( RBTOP, ' ' ) ) THEN * IF( NQ.GT.0 ) THEN LCINDX = JJX-1+ICMAX1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) WORK( 1 ) = X( IIX+(LCINDX-1)*LDX ) WORK( 2 ) = CMPLX( REAL( INDXL2G( LCINDX, $ DESCX( NB_ ), MYCOL, DESCX( CSRC_ ), NPCOL ) ) ) ELSE WORK( 1 ) = ZERO WORK( 2 ) = ZERO END IF * CALL PCTREECOMB( ICTXT, 'Row', 2, WORK, -1, MYCOL, $ CCOMBAMAX1 ) * AMAX = WORK( 1 ) IF( AMAX.EQ.ZERO ) THEN INDX = JX ELSE INDX = NINT( REAL( WORK( 2 ) ) ) END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', RCTOP ) * IF( NQ.GT.0 ) THEN LCINDX = JJX-1+ICMAX1( NQ, X( IIX+(JJX-1)*LDX ), LDX ) AMAX = X( IIX + (LCINDX-1)*LDX ) ELSE AMAX = ZERO END IF * * Find the maximum value * CALL CGAMX2D( ICTXT, 'Rowwise', RCTOP, 1, 1, AMAX, 1, $ IDUMM, MAXPOS, 1, -1, MYROW ) * IF( AMAX.NE.ZERO ) THEN * * Broadcast corresponding global index * IF( MYCOL.EQ.MAXPOS ) THEN INDX = INDXL2G( LCINDX, DESCX( NB_ ), MYCOL, $ DESCX( CSRC_ ), NPCOL ) CALL IGEBS2D( ICTXT, 'Rowwise', RBTOP, 1, 1, INDX, $ 1 ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', RBTOP, 1, 1, INDX, $ 1, MYROW, MAXPOS ) END IF * ELSE * INDX = JX * END IF * END IF * END IF * ELSE * IF( MYCOL.EQ.IXCOL ) THEN * IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) $ NP = NP-IROFF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) * IF( LSAME( CBTOP, ' ' ) ) THEN * IF( NP.GT.0 ) THEN LCINDX = IIX-1+ICMAX1( NP, X( IIX+(JJX-1)*LDX ), 1 ) WORK( 1 ) = X( LCINDX + (JJX-1)*LDX ) WORK( 2 ) = CMPLX( REAL( INDXL2G( LCINDX, $ DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) ) ) ELSE WORK( 1 ) = ZERO WORK( 2 ) = ZERO END IF * CALL PCTREECOMB( ICTXT, 'Column', 2, WORK, -1, MYCOL, $ CCOMBAMAX1 ) * AMAX = WORK( 1 ) IF( AMAX.EQ.ZERO ) THEN INDX = IX ELSE INDX = NINT( REAL( WORK( 2 ) ) ) END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', CCTOP ) * IF( NP.GT.0 ) THEN LCINDX = IIX-1+ICMAX1( NP, X( IIX+(JJX-1)*LDX ), 1 ) AMAX = X( LCINDX + (JJX-1)*LDX ) ELSE AMAX = ZERO END IF * * Find the maximum value * CALL CGAMX2D( ICTXT, 'Columnwise', CCTOP, 1, 1, AMAX, 1, $ MAXPOS, IDUMM, 1, -1, MYCOL ) * IF( AMAX.NE.ZERO ) THEN * * Broadcast corresponding global index * IF( MYROW.EQ.MAXPOS ) THEN INDX = INDXL2G( LCINDX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW ) CALL IGEBS2D( ICTXT, 'Columnwise', CBTOP, 1, 1, $ INDX, 1 ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', CBTOP, 1, 1, $ INDX, 1, MAXPOS, MYCOL ) END IF * ELSE * INDX = IX * END IF * END IF * END IF * END IF * RETURN * * End of PCMAX1 * END * SUBROUTINE CCOMBAMAX1 ( V1, V2 ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Array Arguments .. COMPLEX V1( 2 ), V2( 2 ) * .. * * Purpose * ======= * * CCOMBAMAX1 finds the element having maximum real part absolute * value as well as its corresponding globl index. * * Arguments * ========= * * V1 (local input/local output) COMPLEX array of * dimension 2. The first maximum absolute value element and * its global index. V1(1) = AMAX, V1(2) = INDX. * * V2 (local input) COMPLEX array of dimension 2. * The second maximum absolute value element and its global * index. V2(1) = AMAX, V2(2) = INDX. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, REAL * .. * .. Executable Statements .. * IF( ABS( REAL( V1( 1 ) ) ).LT.ABS( REAL( V2( 1 ) ) ) ) THEN V1( 1 ) = V2( 1 ) V1( 2 ) = V2( 2 ) END IF * RETURN * * End of CCOMBAMAX1 * END scalapack-1.8.0/SRC/pchettrd.f0000640000175000017500000012335110363532303015734 0ustar muammarmuammar SUBROUTINE PCHETTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * October 15, 1999 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL D( * ), E( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * * ======= * * PCHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian * tridiagonal form T by an unitary similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding * process and memory location. * * Let A be a generic term for any 2D block cyclicly distributed * array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- ----------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, * indicating the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to * distribute the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to * distribute the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the * first row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes * of its process row. * The values of LOCp() and LOCq() may be determined via a call to * the ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * Hermitian matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)). * On entry, this array contains the local pieces of the * Hermitian distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the unitary matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) REAL array, dim LOCq(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) REAL array, dim LOCq(JA+N-1) * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCq(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal workspace * * LWORK (local input) INTEGER * The dimension of the array WORK. * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS * Where: * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PCHETTRD', 'L', 0, 0, * 0, 0 ) * * NUMROC is a ScaLAPACK tool function; * PJLAENV is a ScaLAPACK envionmental inquiry function * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of * elementary reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of * elementary reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and * vi denotes an element of the vector defining H(i). * * Data storage requirements * ========================= * * PCHETTRD is not intended to be called directly. All users are * encourage to call PCHETRD which will then call PCHETTRD if * appropriate. A must be in cyclic format (i.e. MB = NB = 1), * the process grid must be square ( i.e. NPROW = NPCOL ) and * only lower triangular storage is supported. * * Local variables * =============== * * PCHETTRD uses five local arrays: * WORK ( InV ) dimension ( NP, ANB+1): array V * WORK ( InH ) dimension ( NP, ANB+1): array H * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT * * Arrays V and H are replicated across all processor columns. * Arrays V^T and H^T are replicated across all processor rows. * * WORK ( InVT ), or V^T, is stored as a tall skinny * array ( NQ x ANB-1 ) for efficiency. Since only the lower * triangular portion of A is updated, Av is computed as: * tril(A) * v + v^T * tril(A,-1). This is performed as * two local triangular matrix-vector multiplications (both in * MVR2) followed by a transpose and a sum across the columns. * In the local computation, WORK( InVT ) is used to compute * tril(A) * v and WORK( InV ) is used to compute * v^T * tril(A,-1) * * The following variables are global indices into A: * INDEX: The current global row and column number. * MAXINDEX: The global row and column for the first row and * column in the trailing block of A. * LIIB, LIJB: The first row, column in * * The following variables point into the arrays A, V, H, V^T, H^T: * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T. * LII: local index I: The local row number for row INDEX * LIJ: local index J: The local column number for column INDEX * LIIP1: local index I+1: The local row number for row INDEX+1 * LIJP1: local index J+1: The local col number for col INDEX+1 * LTLI: lower triangular local index I: The local row for the * upper left entry in tril( A(INDEX, INDEX) ) * LTLIP1: lower triangular local index I+1: The local row for the * upper left entry in tril( A(INDEX+1, INDEX+1) ) * * Details: The distinction between LII and LTLI (and between * LIIP1 and LTLIP1) is subtle. Within the current processor * column (i.e. MYCOL .eq. CURCOL) they are the same. However, * on some processors, A( LII, LIJ ) points to an element * above the diagonal, on these processors, LTLI = LII+1. * * The following variables give the number of rows and/or columns * in various matrices: * NP: The number of local rows in A( 1:N, 1:N ) * NQ: The number of local columns in A( 1:N, 1:N ) * NPM0: The number of local rows in A( INDEX:N, INDEX:N ) * NQM0: The number of local columns in A( INDEX:N, INDEX:N ) * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N ) * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N ) * LTNM0: The number of local rows & columns in * tril( A( INDEX:N, INDEX:N ) ) * LTNM1: The number of local rows & columns in * tril( A( INDEX+1:N, INDEX+1:N ) ) * NOTE: LTNM0 == LTNM1 on all processors except the diagonal * processors, i.e. those where MYCOL == MYROW. * * Invariants: * NP = NPM0 + LII - 1 * NQ = NQM0 + LIJ - 1 * NP = NPM1 + LIIP1 - 1 * NQ = NQM1 + LIJP1 - 1 * NP = LTLI + LTNM0 - 1 * NP = LTLIP1 + LTNM1 - 1 * * Temporary variables. The following variables are used within * a few lines after they are set and do hold state from one loop * iteration to the next: * * The matrix A: * The matrix A does not hold the same values that it would * in an unblocked code nor the values that it would hold in * in a blocked code. * * The value of A is confusing. It is easiest to state the * difference between trueA and A at the point that MVR2 is called, * so we will start there. * * Let trueA be the value that A would * have at a given point in an unblocked code and A * be the value that A has in this code at the same point. * * At the time of the call to MVR2, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ). * * At the bottom of the inner loop, * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) and * v = V( liip1:N, BINDEX+1 ) and * h = H( liip1:N, BINDEX+1 ) * * At the top of the loop, BINDEX gets incremented, hence: * trueA = A + V' * H + H' * V + v' * h + h' * v * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and * V = V( MAXINDEX:N, 1:BINDEX-1 ) and * v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) * * * A gets updated at the bottom of the outer loop * After this update, trueA = A + v' * h + h' * v * where v = V( liip1:N, BINDEX ) and * h = H( liip1:N, BINDEX ) and BINDEX = 0 * Indeed, the previous loop invariant as stated above for the * top of the loop still holds, but with BINDEX = 0, H and V * are null matrices. * * After the current column of A is updated, * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N ) * the rest of A is untouched. * * After the current block column of A is updated, * trueA = A + V' * H + H' * V * where H = H( MAXINDEX:N, 1:BINDEX ) and * V = V( MAXINDEX:N, 1:BINDEX ) * * This brings us back to the point at which mvr2 is called. * * * Details of the parallelization: * * We delay spreading v across to all processor columns (which * would naturally happen at the bottom of the loop) in order to * combine the spread of v( : , i-1 ) with the spread of h( : , i ) * * In order to compute h( :, i ), we must update A( :, i ) * which means that the processor column owning A( :, i ) must * have: c, tau, v( i, i ) and h( i, i ). * * The traditional * way of computing v (and the one used in pzlatrd.f and * zlatrd.f) is: * v = tau * v * c = v' * h * alpha = - tau * c / 2 * v = v + alpha * h * However, the traditional way of computing v requires that tau * be broadcast to all processors in the current column (to compute * v = tau * v) and then a sum-to-all is required (to * compute v' * h ). We use the following formula instead: * c = v' * h * v = tau * ( v - c * tau' * h / 2 ) * The above formula allows tau to be spread down in the * same call to SGSUM2D which performs the sum-to-all of c. * * The computation of v, which could be performed in any processor * column (or other procesor subsets), is performed in the * processor column that owns A( :, i+1 ) so that A( :, i+1 ) * can be updated prior to spreading v across. * * We keep the block column of A up-to-date to minimize the * work required in updating the current column of A. Updating * the block column of A is reasonably load balanced whereas * updating the current column of A is not (only the current * processor column is involved). * * In the following overview of the steps performed, M in the * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p)) * or more flops per processor. * * Inner loop: * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) ) *M h = house( A(index:n, index) ) *M Spread v, h across *M vt = v^T; ht = h^T * A( index+1:n, index+1:maxindex ) -= * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) ) *C v = tril(A) * h; vt = ht * tril(A,-1) *MorC v = v - H*V*h - V*H*h *M v = v + vt^T *M c = v' * h * v = tau * ( v - c * tau' * h / 2 ) *C A = A - H*V - V*H * * * * ================================================================= * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E0 ) COMPLEX Z_ONE, Z_NEGONE, Z_ZERO PARAMETER ( Z_ONE = 1.0E0, Z_NEGONE = -1.0E0, $ Z_ZERO = 0.0E0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * * * .. Local Scalars .. * * LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX, $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT, $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA, $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1, $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX, $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP, $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ, $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX, $ PBMIN, PBSIZE, PNB, ROWSPERPROC REAL NORM, SAFMAX, SAFMIN COMPLEX ALPHA, BETA, C, ONEOVERBETA, TOPH, TOPNV, $ TOPTAU, TOPV, TTOPH, TTOPV * .. * .. Local Arrays .. * * * * INTEGER IDUM1( 1 ), IDUM2( 1 ) REAL DTMP( 5 ) COMPLEX CC( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CGEBR2D, CGEBS2D, CGEMM, CGEMV, $ CGERV2D, CGESD2D, CGSUM2D, CHK1MAT, CLACPY, $ CSCAL, CTRMVT, PCHK1MAT, PSTREECOMB, PXERBLA, $ SCOMBNRM2, SGEBR2D, SGEBS2D, SGSUM2D * .. * .. External Functions .. * LOGICAL LSAME INTEGER ICEIL, NUMROC, PJLAENV REAL PSLAMCH, SCNRM2 EXTERNAL LSAME, ICEIL, NUMROC, PJLAENV, PSLAMCH, SCNRM2 * .. * .. Intrinsic Functions .. INTRINSIC AIMAG, CMPLX, CONJG, ICHAR, MAX, MIN, MOD, $ REAL, SIGN, SQRT * .. * * * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * * * Further details * =============== * * At the top of the loop, v and nh have been computed but not * spread across. Hence, A is out-of-date even after the * rank 2k update. Furthermore, we compute the next v before * nh is spread across. * * I claim that if we used a sum-to-all on NV, by summing CC within * each column, that we could compute NV locally and could avoid * spreading V across. Bruce claims that sum-to-all can be made * to cost no more than sum-to-one on the Paragon. If that is * true, this would be a win. But, * the BLACS sum-to-all is just a sum-to-one followed by a broadcast, * and hence the present scheme is better for now. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * SAFMAX = SQRT( PSLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PSLAMCH( ICTXT, 'S' ) ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) ELSE * * Here we set execution options for PCHETTRD * PNB = PJLAENV( ICTXT, 2, 'PCHETTRD', 'L', 0, 0, 0, 0 ) ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) * INTERLEAVE = ( PJLAENV( ICTXT, 4, 'PCHETTRD', 'L', 1, 0, 0, $ 0 ).EQ.1 ) TWOGEMMS = ( PJLAENV( ICTXT, 4, 'PCHETTRD', 'L', 2, 0, 0, $ 0 ).EQ.1 ) BALANCED = ( PJLAENV( ICTXT, 4, 'PCHETTRD', 'L', 3, 0, 0, $ 0 ).EQ.1 ) * CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) * * UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 .AND. DESCA( NB_ ).NE.1 ) $ INFO = 600 + NB_ IF( INFO.EQ.0 ) THEN * * * Here is the arithmetic: * Let maxnpq = max( np, nq, 2 * ANB ) * LDV = 4 * max( np, nq ) + 2 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB ) * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS * * This overestimates memory requirements when ANB > NP/2 * Memory requirements are lower when interleave = .false. * Hence, we could have two sets of memory requirements, * one for interleave and one for * * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB ) LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS * WORK( 1 ) = CMPLX( LWMIN ) IF( .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IA.NE.1 ) THEN INFO = -4 ELSE IF( JA.NE.1 ) THEN INFO = -5 ELSE IF( NPROW.NE.NPCOL ) THEN INFO = -( 600+CTXT_ ) ELSE IF( DESCA( DTYPE_ ).NE.1 ) THEN INFO = -( 600+DTYPE_ ) ELSE IF( DESCA( MB_ ).NE.1 ) THEN INFO = -( 600+MB_ ) ELSE IF( DESCA( NB_ ).NE.1 ) THEN INFO = -( 600+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.0 ) THEN INFO = -( 600+RSRC_ ) ELSE IF( DESCA( CSRC_ ).NE.0 ) THEN INFO = -( 600+CSRC_ ) ELSE IF( LWORK.LT.LWMIN ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 * CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHETTRD', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * * Reduce the lower triangle of sub( A ) NP = NUMROC( N, 1, MYROW, 0, NPROW ) NQ = NUMROC( N, 1, MYCOL, 0, NPCOL ) * NXTROW = 0 NXTCOL = 0 * LIIP1 = 1 LIJP1 = 1 NPM1 = NP NQM1 = NQ * LDA = DESCA( LLD_ ) ICTXT = DESCA( CTXT_ ) * * * * Miscellaneous details: * Put tau, D and E in the right places * Check signs * Place all the arrays in WORK, control their placement * in memory. * * * * Loop invariants * A(LIIP1, LIJ) points to the first element of A(I+1,J) * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N ) * A(LII:N,LIJ:N) is one step out of date. * proc( CURROW, CURCOL ) owns A(LII,LIJ) * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ) * INH = 1 * IF( INTERLEAVE ) THEN * * H and V are interleaved to minimize memory movement * LDV has to be twice as large to accomodate interleaving. * In addition, LDV is doubled again to allow v, h and * toptau to be spreaad across and transposed in a * single communication operation with minimum memory * movement. * * We could reduce LDV back to 2*MAX(NPM1,NQM1) * by increasing the memory movement required in * the spread and transpose of v, h and toptau. * However, since the non-interleaved path already * provides a mear minimum memory requirement option, * we did not provide this additional path. * LDV = 4*( MAX( NPM1, NQM1 ) ) + 2 * INH = 1 * INV = INH + LDV / 2 INVT = INH + ( ANB+1 )*LDV * INHT = INVT + LDV / 2 INTMP = INVT + LDV*( ANB+1 ) * ELSE LDV = MAX( NPM1, NQM1 ) * INHT = INH + LDV*( ANB+1 ) INV = INHT + LDV*( ANB+1 ) * * The code works without this +1, but only because of a * coincidence. Without the +1, WORK(INVT) gets trashed, but * WORK(INVT) is only used once and when it is used, it is * multiplied by WORK( INH ) which is zero. Hence, the fact * that WORK(INVT) is trashed has no effect. * INVT = INV + LDV*( ANB+1 ) + 1 INTMP = INVT + LDV*( 2*ANB ) * END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHETTRD', -INFO ) WORK( 1 ) = CMPLX( LWMIN ) RETURN END IF * * * The satisfies the loop invariant: trueA = A - V * HT - H * VT, * (where V, H, VT and HT all have BINDEX+1 rows/columns) * the first ANB times through the loop. * * * * Setting either ( InH and InHT ) or InV to Z_ZERO * is adequate except in the face of NaNs. * * DO 10 I = 1, NP WORK( INH+I-1 ) = Z_ZERO WORK( INV+I-1 ) = Z_ZERO 10 CONTINUE DO 20 I = 1, NQ WORK( INHT+I-1 ) = Z_ZERO 20 CONTINUE * * * TOPNV = Z_ZERO * LTLIP1 = LIJP1 LTNM1 = NPM1 IF( MYCOL.GT.MYROW ) THEN LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * DO 210 MININDEX = 1, N - 1, ANB * * MAXINDEX = MIN( MININDEX+ANB-1, N ) LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1 * NQB = NQ - LIJB + 1 NPB = NP - LIIB + 1 INHTB = INHT + LIJB - 1 INVTB = INVT + LIJB - 1 INHB = INH + LIIB - 1 INVB = INV + LIIB - 1 * * * * DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 ) * BINDEX = INDEX - MININDEX * CURROW = NXTROW CURCOL = NXTCOL * NXTROW = MOD( CURROW+1, NPROW ) NXTCOL = MOD( CURCOL+1, NPCOL ) * LII = LIIP1 LIJ = LIJP1 NPM0 = NPM1 * IF( MYROW.EQ.CURROW ) THEN NPM1 = NPM1 - 1 LIIP1 = LIIP1 + 1 END IF IF( MYCOL.EQ.CURCOL ) THEN NQM1 = NQM1 - 1 LIJP1 = LIJP1 + 1 LTLIP1 = LTLIP1 + 1 LTNM1 = LTNM1 - 1 END IF * * * * * V = NV, VT = NVT, H = NH, HT = NHT * * * Update the current column of A * * IF( MYCOL.EQ.CURCOL ) THEN * INDEXA = LII + ( LIJ-1 )*LDA INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV TTOPH = CONJG( WORK( INHT+LIJ-1+BINDEX*LDV ) ) TTOPV = CONJG( TOPNV ) * IF( INDEX.GT.1 ) THEN DO 30 I = 0, NPM0 - 1 * A( INDEXA+I ) = A( INDEXA+I ) A( INDEXA+I ) = A( INDEXA+I ) - $ WORK( INDEXINV+LDV+I )*TTOPH - $ WORK( INDEXINH+LDV+I )*TTOPV 30 CONTINUE END IF * * END IF * * IF( MYCOL.EQ.CURCOL ) THEN * * Compute the householder vector * IF( MYROW.EQ.CURROW ) THEN DTMP( 2 ) = REAL( A( LII+( LIJ-1 )*LDA ) ) ELSE DTMP( 2 ) = ZERO END IF IF( MYROW.EQ.NXTROW ) THEN DTMP( 3 ) = REAL( A( LIIP1+( LIJ-1 )*LDA ) ) DTMP( 4 ) = AIMAG( A( LIIP1+( LIJ-1 )*LDA ) ) ELSE DTMP( 3 ) = ZERO DTMP( 4 ) = ZERO END IF * NORM = SCNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 ) DTMP( 1 ) = NORM * * IF DTMP(5) = 1.0, NORM is too large and might cause * overflow, hence PSTREECOMB must be called. IF DTMP(5) * is zero on output, DTMP(1) can be trusted. * DTMP( 5 ) = ZERO IF( DTMP( 1 ).GE.SAFMAX .OR. DTMP( 1 ).LT.SAFMIN ) THEN DTMP( 5 ) = ONE DTMP( 1 ) = ZERO END IF * DTMP( 1 ) = DTMP( 1 )*DTMP( 1 ) CALL SGSUM2D( ICTXT, 'C', ' ', 5, 1, DTMP, 5, -1, $ CURCOL ) IF( DTMP( 5 ).EQ.ZERO ) THEN DTMP( 1 ) = SQRT( DTMP( 1 ) ) ELSE DTMP( 1 ) = NORM CALL PSTREECOMB( ICTXT, 'C', 1, DTMP, -1, MYCOL, $ SCOMBNRM2 ) END IF * NORM = DTMP( 1 ) * D( LIJ ) = DTMP( 2 ) IF( MYROW.EQ.CURROW .AND. MYCOL.EQ.CURCOL ) THEN A( LII+( LIJ-1 )*LDA ) = CMPLX( D( LIJ ), ZERO ) END IF * * ALPHA = CMPLX( DTMP( 3 ), DTMP( 4 ) ) * NORM = SIGN( NORM, REAL( ALPHA ) ) * IF( NORM.EQ.ZERO ) THEN TOPTAU = ZERO ELSE BETA = NORM + ALPHA TOPTAU = BETA / NORM ONEOVERBETA = 1.0E0 / BETA * CALL CSCAL( NPM1, ONEOVERBETA, $ A( LIIP1+( LIJ-1 )*LDA ), 1 ) END IF * IF( MYROW.EQ.NXTROW ) THEN A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE END IF * TAU( LIJ ) = TOPTAU E( LIJ ) = -NORM * END IF * * * Spread v, nh, toptau across * DO 40 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+ $ ( LIJ-1 )*LDA ) 40 CONTINUE * IF( MYCOL.EQ.CURCOL ) THEN WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU CALL CGEBS2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1 ) ELSE CALL CGEBR2D( ICTXT, 'R', ' ', NPM1+NPM1+1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), $ NPM1+NPM1+1, MYROW, CURCOL ) TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) END IF DO 50 I = 0, NPM1 - 1 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+BINDEX*LDV+NPM1+I ) 50 CONTINUE * IF( INDEX.LT.N ) THEN IF( MYROW.EQ.NXTROW .AND. MYCOL.EQ.CURCOL ) $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ ) END IF * * Transpose v, nh * * IF( MYROW.EQ.MYCOL ) THEN DO 60 I = 0, NPM1 + NPM1 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+ $ BINDEX*LDV+I ) 60 CONTINUE ELSE CALL CGESD2D( ICTXT, NPM1+NPM1, 1, $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1, $ MYCOL, MYROW ) CALL CGERV2D( ICTXT, NQM1+NQM1, 1, $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1, $ MYCOL, MYROW ) END IF * DO 70 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+ $ LIJP1-1+BINDEX*LDV+NQM1+I ) 70 CONTINUE * * * Update the current block column of A * IF( INDEX.GT.1 ) THEN DO 90 J = LIJP1, LIJB - 1 DO 80 I = 0, NPM1 - 1 * A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA ) $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )* $ CONJG( WORK( INHT+J-1+BINDEX*LDV ) ) - $ WORK( INH+LIIP1-1+BINDEX*LDV+I )* $ CONJG( WORK( INVT+J-1+BINDEX*LDV ) ) 80 CONTINUE 90 CONTINUE END IF * * * * Compute NV = A * NHT; NVT = A * NH * * These two lines are necessary because these elements * are not always involved in the calls to CTRMVT * for two reasons: * 1) On diagonal processors, the call to TRMVT * involves only LTNM1-1 elements * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1 * and when the results are combined across all processes, * uninitialized values may be included. WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO * * IF( MYROW.EQ.MYCOL ) THEN IF( LTNM1.GT.1 ) THEN CALL CTRMVT( 'L', LTNM1-1, $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1, $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ), $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+ $ 1 )*LDV ), 1 ) END IF DO 100 I = 1, LTNM1 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) + $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )* $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV ) 100 CONTINUE ELSE IF( LTNM1.GT.0 ) $ CALL CTRMVT( 'L', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ), $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )* $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+ $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+ $ ( BINDEX+1 )*LDV ), 1, $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ), $ 1 ) * END IF * * * We take advantage of the fact that: * A * sum( B ) = sum ( A * B ) for matrices A,B * * trueA = A + V * HT + H * VT * hence: (trueA)v = Av' + V * HT * v + H * VT * v * VT * v = sum_p_in_NPROW ( VTp * v ) * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v ) * * v = v + V * HT * h + H * VT * h * * * * tmp = HT * nh1 DO 110 I = 1, 2*( BINDEX+1 ) WORK( INTMP-1+I ) = 0 110 CONTINUE * IF( BALANCED ) THEN NPSET = NPROW MYSETNUM = MYROW ROWSPERPROC = ICEIL( NQB, NPSET ) MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 ) * * * tmp = HT * v * CALL CGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INHTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL CGEMV( 'C', NUMROWS, BINDEX+1, Z_ONE, $ WORK( INVTB+MYFIRSTROW-1 ), LDV, $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ), $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * * CALL CGSUM2D( ICTXT, 'C', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) ELSE * tmp = HT * v * CALL CGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INHTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP ), 1 ) * tmp2 = VT * v CALL CGEMV( 'C', NQB, BINDEX+1, Z_ONE, WORK( INVTB ), $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1, $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 ) * END IF * * * IF( BALANCED ) THEN MYSETNUM = MYCOL * ROWSPERPROC = ICEIL( NPB, NPSET ) MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM ) NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 ) * CALL CGSUM2D( ICTXT, 'R', ' ', 2*( BINDEX+1 ), 1, $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 ) * * * v = v + V * tmp IF( INDEX.GT.1. ) THEN CALL CGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INVB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) * * v = v + H * tmp2 CALL CGEMV( 'N', NUMROWS, BINDEX+1, Z_NEGONE, $ WORK( INHB+MYFIRSTROW-1 ), LDV, $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )* $ LDV ), 1 ) END IF * ELSE * v = v + V * tmp CALL CGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ), $ LDV, WORK( INTMP ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * * * v = v + H * tmp2 CALL CGEMV( 'N', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ), $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE, $ WORK( INVB+( BINDEX+1 )*LDV ), 1 ) * END IF * * * Transpose NV and add it back into NVT * IF( MYROW.EQ.MYCOL ) THEN DO 120 I = 0, NQM1 - 1 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+ $ I ) 120 CONTINUE ELSE CALL CGESD2D( ICTXT, NQM1, 1, $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), $ NQM1, MYCOL, MYROW ) CALL CGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL, $ MYROW ) * END IF DO 130 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1- $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I ) 130 CONTINUE * * Sum-to-one NV rowwise (within a row) * CALL CGSUM2D( ICTXT, 'R', ' ', NPM1, 1, $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1, $ MYROW, NXTCOL ) * * * Dot product c = NV * NH * Sum-to-all c within next processor column * * IF( MYCOL.EQ.NXTCOL ) THEN CC( 1 ) = Z_ZERO DO 140 I = 0, NPM1 - 1 CC( 1 ) = CC( 1 ) + CONJG( WORK( INV+LIIP1-1+( BINDEX+ $ 1 )*LDV+I ) )*WORK( INH+LIIP1-1+ $ ( BINDEX+1 )*LDV+I ) 140 CONTINUE IF( MYROW.EQ.NXTROW ) THEN CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV ) ELSE CC( 2 ) = Z_ZERO CC( 3 ) = Z_ZERO END IF CALL CGSUM2D( ICTXT, 'C', ' ', 3, 1, CC, 3, -1, NXTCOL ) * TOPV = CC( 2 ) C = CC( 1 ) TOPH = CC( 3 ) * TOPNV = TOPTAU*( TOPV-C*CONJG( TOPTAU ) / 2*TOPH ) * * * Compute V = Tau * (V - C * Tau' / 2 * H ) * * DO 150 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU* $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C* $ CONJG( TOPTAU ) / 2*WORK( INH+LIIP1-1+( BINDEX+1 )* $ LDV+I ) ) 150 CONTINUE * END IF * * 160 CONTINUE * * * Perform the rank2k update * IF( MAXINDEX.LT.N ) THEN * DO 170 I = 0, NPM1 - 1 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I ) 170 CONTINUE * * * IF( .NOT.TWOGEMMS ) THEN IF( INTERLEAVE ) THEN LDZG = LDV / 2 ELSE CALL CLACPY( 'A', LTNM1, ANB, WORK( INHT+LIJP1-1 ), $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV ) * CALL CLACPY( 'A', LTNM1, ANB, WORK( INV+LTLIP1-1 ), $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV ) LDZG = LDV END IF NBZG = ANB*2 ELSE LDZG = LDV NBZG = ANB END IF * * DO 180 PBMIN = 1, LTNM1, PNB * PBSIZE = MIN( PNB, LTNM1-PBMIN+1 ) PBMAX = MIN( LTNM1, PBMIN+PNB-1 ) CALL CGEMM( 'N', 'C', PBSIZE, PBMAX, NBZG, Z_NEGONE, $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) IF( TWOGEMMS ) THEN CALL CGEMM( 'N', 'C', PBSIZE, PBMAX, ANB, Z_NEGONE, $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG, $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE, $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA ) END IF 180 CONTINUE * * * DO 190 I = 0, NPM1 - 1 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I ) WORK( INH+LIIP1-1+I ) = WORK( INTMP+I ) 190 CONTINUE DO 200 I = 0, NQM1 - 1 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I ) 200 CONTINUE * * END IF * * End of the update A code * 210 CONTINUE * IF( MYCOL.EQ.NXTCOL ) THEN IF( MYROW.EQ.NXTROW ) THEN * D( NQ ) = REAL( A( NP+( NQ-1 )*LDA ) ) A( NP+( NQ-1 )*LDA ) = D( NQ ) * CALL SGEBS2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1 ) ELSE CALL SGEBR2D( ICTXT, 'C', ' ', 1, 1, D( NQ ), 1, NXTROW, $ NXTCOL ) END IF END IF * * * * WORK( 1 ) = CMPLX( LWMIN ) RETURN * * End of PCHETTRD * * END scalapack-1.8.0/SRC/pzung2l.f0000640000175000017500000002573210363532303015524 0ustar muammarmuammar SUBROUTINE PZUNG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNG2L generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the last N columns of a product of K elementary reflectors of order M * * Q = H(k) . . . H(2) H(1) * * as returned by PZGEQLF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA+N-K <= j <= JA+N-1, * as returned by PZGEQLF in the K columns of its distributed * matrix argument A(IA:*,JA+N-K:JA+N-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQLF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MpA0 + MAX( 1, NqA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICTXT, J, JJ, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0 COMPLEX*16 TAUJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, PZLARF, $ PZLASET, PZSCAL * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MPA0 + MAX( 1, NQA0 ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * Initialise columns ja:ja+n-k-1 to columns of the unit matrix * CALL PZLASET( 'All', M-N, N-K, ZERO, ZERO, A, IA, JA, DESCA ) CALL PZLASET( 'All', N, N-K, ZERO, ONE, A, IA+M-N, JA, DESCA ) * TAUJ = ZERO NQA0 = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) ) DO 10 J = JA+N-K, JA+N-1 * * Apply H(j) to A(ia:ia+m-n+j-ja,ja:j) from the left * CALL PZELSET( A, IA+M-N+J-JA, J, DESCA, ONE ) CALL PZLARF( 'Left', M-N+J-JA+1, J-JA, A, IA, J, DESCA, 1, TAU, $ A, IA, JA, DESCA, WORK ) * JJ = INDXG2L( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), NPCOL ) IACOL = INDXG2P( J, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ TAUJ = TAU( MIN( JJ, NQA0 ) ) CALL PZSCAL( M-N+J-JA, -TAUJ, A, IA, J, DESCA, 1 ) CALL PZELSET( A, IA+M-N+J-JA, J, DESCA, ONE-TAUJ ) * * Set A(ia+m-n+j-ja+1:ia+m-1,j) to zero * CALL PZLASET( 'All', JA+N-1-J, 1, ZERO, ZERO, A, IA+M-N+J-JA+1, $ J, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNG2L * END scalapack-1.8.0/SRC/pzunmbr.f0000640000175000017500000005437410363532303015624 0ustar muammarmuammar SUBROUTINE PZUNMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * If VECT = 'Q', PZUNMBR overwrites the general complex distributed * M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * If VECT = 'P', PZUNMBR overwrites sub( C ) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': P * sub( C ) sub( C ) * P * TRANS = 'C': P**H * sub( C ) sub( C ) * P**H * * Here Q and P**H are the unitary distributed matrices determined by * PZGEBRD when reducing a complex distributed matrix A(IA:*,JA:*) to * bidiagonal form: A(IA:*,JA:*) = Q * B * P**H. Q and P**H are defined * as products of elementary reflectors H(i) and G(i) respectively. * * Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the * order of the unitary matrix Q or P**H that is applied. * * If VECT = 'Q', A(IA:*,JA:*) is assumed to have been an NQ-by-K * matrix: * if nq >= k, Q = H(1) H(2) . . . H(k); * if nq < k, Q = H(1) H(2) . . . H(nq-1). * * If VECT = 'P', A(IA:*,JA:*) is assumed to have been a K-by-NQ * matrix: * if k < nq, P = G(1) G(2) . . . G(k); * if k >= nq, P = G(1) G(2) . . . G(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * VECT (global input) CHARACTER * = 'Q': apply Q or Q**H; * = 'P': apply P or P**H. * * SIDE (global input) CHARACTER * = 'L': apply Q, Q**H, P or P**H from the Left; * = 'R': apply Q, Q**H, P or P**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q or P; * = 'C': Conjugate transpose, apply Q**H or P**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * If VECT = 'Q', the number of columns in the original * distributed matrix reduced by PZGEBRD. * If VECT = 'P', the number of rows in the original * distributed matrix reduced by PZGEBRD. * K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+MIN(NQ,K)-1)) if * VECT='Q', and (LLD_A,LOCc(JA+NQ-1)) if VECT = 'P'. NQ = M * if SIDE = 'L', and NQ = N otherwise. The vectors which * define the elementary reflectors H(i) and G(i), whose * products determine the matrices Q and P, as returned by * PZGEBRD. * If VECT = 'Q', LLD_A >= max(1,LOCr(IA+NQ-1)); * if VECT = 'P', LLD_A >= max(1,LOCr(IA+MIN(NQ,K)-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16 array, dimension * LOCc(JA+MIN(NQ,K)-1) if VECT = 'Q', LOCr(IA+MIN(NQ,K)-1) if * VECT = 'P', TAU(i) must contain the scalar factor of the * elementary reflector H(i) or G(i), which determines Q or P, * as returned by PDGEBRD in its array argument TAUQ or TAUP. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, if VECT='Q', sub( C ) is overwritten by Q*sub( C ) * or Q'*sub( C ) or sub( C )*Q' or sub( C )*Q; if VECT='P, * sub( C ) is overwritten by P*sub( C ) or P'*sub( C ) or * sub( C )*P or sub( C )*P'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * NQ = M; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA+1; JAA=JA; MI=M-1; NI=N; ICC=IC+1; JCC=JC; * end if * else if SIDE = 'R', * NQ = N; * if( (VECT = 'Q' and NQ >= K) or (VECT <> 'Q' and NQ > K) ), * IAA=IA; JAA=JA; MI=M; NI=N; ICC=IC; JCC=JC; * else * IAA=IA; JAA=JA+1; MI=M; NI=N-1; ICC=IC; JCC=JC+1; * end if * end if * * If VECT = 'Q', * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * else if VECT <> 'Q', * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( MI+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * end if * * where LCMP = LCM / NPROW, LCMQ = LCM / NPCOL, with * LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JAA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( MI+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If VECT = 'Q', * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * else * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN CHARACTER TRANST INTEGER IAA, IACOL, IAROW, ICC, ICCOL, ICOFFA, ICOFFC, $ ICROW, ICTXT, IINFO, IROFFA, IROFFC, JAA, JCC, $ LCM, LCMP, LCMQ, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PXERBLA, $ PZUNMLQ, PZUNMQR * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE APPLYQ = LSAME( VECT, 'Q' ) LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q or P * IF( LEFT ) THEN NQ = M IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA + 1 JAA = JA MI = M - 1 NI = N ICC = IC + 1 JCC = JC END IF * IF( APPLYQ ) THEN CALL CHK1MAT( M, 4, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, M, 4, IA, JA, DESCA, 10, INFO ) END IF ELSE NQ = N IF( ( APPLYQ .AND. NQ.GE.K ) .OR. $ ( .NOT.APPLYQ .AND. NQ.GT.K ) ) THEN IAA = IA JAA = JA MI = M NI = N ICC = IC JCC = JC ELSE IAA = IA JAA = JA + 1 MI = M NI = N - 1 ICC = IC JCC = JC + 1 END IF * IF( APPLYQ ) THEN CALL CHK1MAT( N, 5, K, 6, IA, JA, DESCA, 10, INFO ) ELSE CALL CHK1MAT( K, 6, N, 5, IA, JA, DESCA, 10, INFO ) END IF END IF CALL CHK1MAT( M, 4, N, 5, IC, JC, DESCC, 15, INFO ) * IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) ICOFFA = MOD( JAA-1, DESCA( NB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JAA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( APPLYQ ) THEN IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF ELSE * IF( LEFT ) THEN MQA0 = NUMROC( MI+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ MI+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN INFO = -1 ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -2 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -3 ELSE IF( K.LT.0 ) THEN INFO = -6 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( APPLYQ .AND. LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( APPLYQ .AND. LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( .NOT.APPLYQ .AND. LEFT .AND. $ DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( .NOT.APPLYQ .AND. .NOT.LEFT .AND. $ DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( APPLYQ ) THEN IDUM1( 1 ) = ICHAR( 'Q' ) ELSE IDUM1( 1 ) = ICHAR( 'P' ) END IF IDUM2( 1 ) = 1 IF( LEFT ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'R' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'C' ) END IF IDUM2( 3 ) = 3 IDUM1( 4 ) = K IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( APPLYQ ) THEN IF( LEFT ) THEN CALL PCHK2MAT( M, 4, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( N, 5, K, 6, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF ELSE IF( LEFT ) THEN CALL PCHK2MAT( K, 6, M, 4, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( K, 6, N, 5, IA, JA, DESCA, 10, M, 4, N, $ 5, IC, JC, DESCC, 15, 5, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMBR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( APPLYQ ) THEN * * Apply Q * IF( NQ.GE.K ) THEN * * Q was determined by a call to PZGEBRD with nq >= k * CALL PZUNMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * Q was determined by a call to PZGEBRD with nq < k * CALL PZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A, IA+1, JA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) END IF ELSE * * Apply P * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF IF( NQ.GT.K ) THEN * * P was determined by a call to PZGEBRD with nq > k * CALL PZUNMLQ( SIDE, TRANST, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) ELSE IF( NQ.GT.1 ) THEN * * P was determined by a call to PZGEBRD with nq <= k * CALL PZUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A, IA, JA+1, $ DESCA, TAU, C, ICC, JCC, DESCC, WORK, LWORK, $ IINFO ) END IF END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMBR * END scalapack-1.8.0/SRC/zdbtrf.f0000640000175000017500000002514310363532303015412 0ustar muammarmuammar SUBROUTINE ZDBTRF( M, N, KL, KU, AB, LDAB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from ZGBTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. COMPLEX*16 AB( LDAB, * ) * .. * * Purpose * ======= * * Zdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting or row interchanges. * * This is the blocked version of the algorithm, calling Level 3 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) REAL array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER NBMAX, LDWORK PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) * .. * .. Local Scalars .. INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP, $ JU, KM, KV, NB, NW * .. * .. Local Arrays .. COMPLEX*16 WORK13( LDWORK, NBMAX ), $ WORK31( LDWORK, NBMAX ) * .. * .. External Functions .. INTEGER ILAENV, ISAMAX EXTERNAL ILAENV, ISAMAX * .. * .. External Subroutines .. EXTERNAL ZCOPY, ZDBTF2, ZGEMM, ZGERU, ZSCAL, $ ZSWAP, ZTRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U * KV = KU * * Test the input parameters. * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 ) THEN INFO = -3 ELSE IF( KU.LT.0 ) THEN INFO = -4 ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Determine the block size for this environment * NB = ILAENV( 1, 'ZDBTRF', ' ', M, N, KL, KU ) * * The block size must not exceed the limit set by the size of the * local arrays WORK13 and WORK31. * NB = MIN( NB, NBMAX ) * IF( NB.LE.1 .OR. NB.GT.KL ) THEN * * Use unblocked code * CALL ZDBTF2( M, N, KL, KU, AB, LDAB, INFO ) ELSE * * Use blocked code * * Zero the superdiagonal elements of the work array WORK13 * DO 20 J = 1, NB DO 10 I = 1, J - 1 WORK13( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * * Zero the subdiagonal elements of the work array WORK31 * DO 40 J = 1, NB DO 30 I = J + 1, NB WORK31( I, J ) = ZERO 30 CONTINUE 40 CONTINUE * * JU is the index of the last column affected by the current * stage of the factorization * JU = 1 * DO 180 J = 1, MIN( M, N ), NB JB = MIN( NB, MIN( M, N )-J+1 ) * * The active part of the matrix is partitioned * * A11 A12 A13 * A21 A22 A23 * A31 A32 A33 * * Here A11, A21 and A31 denote the current block of JB columns * which is about to be factorized. The number of rows in the * partitioning are JB, I2, I3 respectively, and the numbers * of columns are JB, J2, J3. The superdiagonal elements of A13 * and the subdiagonal elements of A31 lie outside the band. * I2 = MIN( KL-JB, M-J-JB+1 ) I3 = MIN( JB, M-J-KL+1 ) * * J2 and J3 are computed after JU has been updated. * * Factorize the current block of JB columns * DO 80 JJ = J, J + JB - 1 * * Find pivot and test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-JJ ) JP = 1 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) * * Compute multipliers * CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), $ 1 ) * * Update trailing submatrix within the band and within * the current block. JM is the index of the last column * which needs to be updated. * JM = MIN( JU, J+JB-1 ) IF( JM.GT.JJ ) THEN CALL ZGERU( KM, JM-JJ, -CONE, AB( KV+2, JJ ), 1, $ AB( KV, JJ+1 ), LDAB-1, $ AB( KV+1, JJ+1 ), LDAB-1 ) END IF END IF * * Copy current column of A31 into the work array WORK31 * NW = MIN( JJ-J+1, I3 ) IF( NW.GT.0 ) $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, $ WORK31( 1, JJ-J+1 ), 1 ) 80 CONTINUE IF( J+JB.LE.N ) THEN * * Apply the row interchanges to the other blocks. * J2 = MIN( JU-J+1, KV ) - JB J3 = MAX( 0, JU-J-KV+1 ) * * Update the relevant part of the trailing submatrix * IF( J2.GT.0 ) THEN * * Update A12 * CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J2, CONE, AB( KV+1, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1 ) * IF( I2.GT.0 ) THEN * * Update A22 * CALL ZGEMM( 'No transpose', 'No transpose', I2, J2, $ JB, -CONE, AB( KV+1+JB, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1, CONE, $ AB( KV+1, J+JB ), LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A32 * CALL ZGEMM( 'No transpose', 'No transpose', I3, J2, $ JB, -CONE, WORK31, LDWORK, $ AB( KV+1-JB, J+JB ), LDAB-1, CONE, $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) END IF END IF * IF( J3.GT.0 ) THEN * * Copy the lower triangle of A13 into the work array * WORK13 * DO 130 JJ = 1, J3 DO 120 II = JJ, JB WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 120 CONTINUE 130 CONTINUE * * Update A13 in the work array * CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J3, CONE, AB( KV+1, J ), LDAB-1, $ WORK13, LDWORK ) * IF( I2.GT.0 ) THEN * * Update A23 * CALL ZGEMM( 'No transpose', 'No transpose', I2, J3, $ JB, -CONE, AB( KV+1+JB, J ), LDAB-1, $ WORK13, LDWORK, CONE, AB( 1+JB, J+KV ), $ LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A33 * CALL ZGEMM( 'No transpose', 'No transpose', I3, J3, $ JB, -CONE, WORK31, LDWORK, WORK13, $ LDWORK, CONE, AB( 1+KL, J+KV ), LDAB-1 ) END IF * * Copy the lower triangle of A13 back into place * DO 150 JJ = 1, J3 DO 140 II = JJ, JB AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 140 CONTINUE 150 CONTINUE END IF ELSE END IF * * copy the upper triangle of A31 back into place * DO 170 JJ = J + JB - 1, J, -1 * * Copy the current column of A31 back into place * NW = MIN( I3, JJ-J+1 ) IF( NW.GT.0 ) $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1, $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 170 CONTINUE 180 CONTINUE END IF * RETURN * * End of ZDBTRF * END scalapack-1.8.0/SRC/pspttrsv.f0000640000175000017500000011017010363532303016017 0ustar muammarmuammar SUBROUTINE PSPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) REAL AF( * ), B( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PSPTTRSV solves a tridiagonal triangular system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is a tridiagonal * triangular matrix factor produced by the * Cholesky factorization code PSPTTRF * and is stored in A(1:N,JA:JA+N-1) and AF. * The matrix stored in A(1:N, JA:JA+N-1) is either * upper or lower triangular according to UPLO, * and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T * is dictated by the user by the parameter TRANS. * * Routine PSPTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored; * = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * D (local input/local output) REAL pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) REAL pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSPTTRF and this is stored in AF. If a linear system * is to be solved using PSPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (10+2*min(100,NRHS))*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM3, JA_NEW, LEVEL_DIST, LLDA, LLDB, $ MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL, $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET, $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A, $ TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PXERBLA, RESHAPE, SAXPY, SGEMM, $ SGERV2D, SGESD2D, SMATADD, SPTTRSV, STRTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 7*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 10*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 10*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 10*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 10*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 7*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 10*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 10*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -6 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 7*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PSPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PSPTTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = INT_ONE*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PSPTTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB( 5 ) PARAM_CHECK( 14, 1 ) = DESCB( 4 ) PARAM_CHECK( 13, 1 ) = DESCB( 3 ) PARAM_CHECK( 12, 1 ) = DESCB( 2 ) PARAM_CHECK( 11, 1 ) = DESCB( 1 ) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 15, 2 ) = 1005 PARAM_CHECK( 14, 2 ) = 1004 PARAM_CHECK( 13, 2 ) = 1003 PARAM_CHECK( 12, 2 ) = 1002 PARAM_CHECK( 11, 2 ) = 1001 PARAM_CHECK( 10, 2 ) = 9 PARAM_CHECK( 9, 2 ) = 705 PARAM_CHECK( 8, 2 ) = 704 PARAM_CHECK( 7, 2 ) = 703 PARAM_CHECK( 6, 2 ) = 701 PARAM_CHECK( 5, 2 ) = 6 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 14 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSPTTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 100 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL SPTTRSV( 'N', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -E( PART_OFFSET+ODD_SIZE ), $ B( PART_OFFSET+ODD_SIZE ), LLDB, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL SGEMM( 'T', 'N', 1, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+INT_ONE-1 ), INT_ONE ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL STRTRS( 'L', 'N', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 90 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*1+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*1+2+1 ), INT_ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), INT_ONE ) * * Send contribution to diagonal block's owning processor. * CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * * Receive solution from processor to left * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL SGEMM( 'T', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ODD_SIZE*1+2+1 ), INT_ONE, WORK( 1 ), $ INT_ONE, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL SGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -ONE, $ AF( ( ODD_SIZE )*1+1 ), INT_ONE, WORK( 1 ), $ INT_ONE, ONE, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL STRTRS( 'L', 'T', 'U', INT_ONE, NRHS, AF( ODD_SIZE+2 ), $ INT_ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 90 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, INT_ONE, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL SGEMM( 'N', 'N', ODD_SIZE, NRHS, 1, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+INT_ONE-1 ), INT_ONE, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * CALL SAXPY( NRHS, -( E( PART_OFFSET+ODD_SIZE ) ), $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, $ B( PART_OFFSET+ODD_SIZE ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL SPTTRSV( 'T', ODD_SIZE, NRHS, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB, $ INFO ) * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 90 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 100 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PSPTTRSV * END scalapack-1.8.0/SRC/pstrti2.f0000640000175000017500000002365010363532303015527 0ustar muammarmuammar SUBROUTINE PSTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ) * .. * * Purpose * ======= * * PSTRTI2 computes the inverse of a real upper or lower triangular * block matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). This matrix should be * contained in one and only one process memory space (local operation). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular * = 'U': sub( A ) is unit triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)), * this array contains the local pieces of the triangular matrix * sub( A ). If UPLO = 'U', the leading N-by-N upper triangular * part of the matrix sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of the matrix sub( A ) contains the lower * triangular matrix, and the strictly upper triangular part * of sub( A ) is not referenced. If DIAG = 'U', the diagonal * elements of sub( A ) are also not referenced and are assumed * to be 1. On exit, the (triangular) inverse of the original * matrix, in the same storage format. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER IACOL, IAROW, ICTXT, ICURR, IDIAG, IIA, IOFFA, $ JJA, LDA, MYCOL, MYROW, NA, NPCOL, NPROW REAL AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, SSCAL, STRMV * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Compute local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) * IF( UPPER ) THEN * IOFFA = IIA + ( JJA - 1 ) * LDA ICURR = IOFFA + LDA * IF( NOUNIT ) THEN * * Compute inverse of upper non-unit triangular matrix. * A( IOFFA ) = ONE / A( IOFFA ) IDIAG = ICURR + 1 DO 10 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements 1:j-1 of j-th column. * CALL STRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL SSCAL( NA, AJJ, A( ICURR ), 1 ) IDIAG = IDIAG + LDA + 1 ICURR = ICURR + LDA 10 CONTINUE * ELSE * * Compute inverse of upper unit triangular matrix. * DO 20 NA = 1, N-1 * * Compute elements 1:j-1 of j-th column. * CALL STRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL SSCAL( NA, -ONE, A( ICURR ), 1 ) ICURR = ICURR + LDA 20 CONTINUE * END IF * ELSE * ICURR = IIA + N - 1 + ( JJA + N - 2 ) * LDA IOFFA = ICURR - LDA * IF( NOUNIT ) THEN * * Compute inverse of lower non-unit triangular matrix. * A( ICURR ) = ONE / A( ICURR ) IDIAG = IOFFA - 1 DO 30 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements j+1:n of j-th column. * CALL STRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL SSCAL( NA, AJJ, A( IOFFA ), 1 ) ICURR = IDIAG IDIAG = IDIAG - LDA - 1 IOFFA = IDIAG + 1 30 CONTINUE * ELSE * * Compute inverse of lower unit triangular matrix. * DO 40 NA = 1, N-1 * * Compute elements j+1:n of j-th column. * CALL STRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL SSCAL( NA, -ONE, A( IOFFA ), 1 ) ICURR = ICURR - LDA - 1 IOFFA = ICURR - LDA 40 CONTINUE * END IF * END IF * END IF * * End of PSTRTI2 * END scalapack-1.8.0/SRC/pcunmhr.f0000640000175000017500000003645110363532303015577 0ustar muammarmuammar SUBROUTINE PCUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMHR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of IHI-ILO elementary reflectors, as returned by PCGEHRD: * * Q = H(ilo) H(ilo+1) . . . H(ihi-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * ILO and IHI must have the same values as in the previous call * of PCGEHRD. Q is equal to the unit matrix except in the * distributed submatrix Q(ia+ilo:ia+ihi-1,ia+ilo:ja+ihi-1). * If SIDE = 'L', 1 <= ILO <= IHI <= max(1,M); * if SIDE = 'R', 1 <= ILO <= IHI <= max(1,N); * ILO and IHI are relative indexes. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PCGEHRD. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCc(JA+M-2) * if SIDE = 'L', and LOCc(JA+N-2) if SIDE = 'R'. This array * contains the scalar factors TAU(j) of the elementary * reflectors H(j) as returned by PCGEHRD. TAU is tied to * the distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * IAA = IA + ILO; JAA = JA+ILO-1; * If SIDE = 'L', * MI = IHI-ILO; NI = N; ICC = IC + ILO; JCC = JC; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; NI = IHI-ILO; ICC = IC; JCC = JC + ILO; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NH, NI, NPA0, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCUNMQR, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 NH = IHI - ILO IF( NPROW.EQ.-1 ) THEN INFO = -(1000+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) IAA = IA + ILO JAA = JA + ILO - 1 * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = NH NI = N ICC = IC + ILO JCC = JC CALL CHK1MAT( M, 3, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N MI = M NI = NH ICC = IC JCC = JC + ILO CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN INFO = -5 ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN INFO = -6 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1500+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = ILO IDUM2( 3 ) = 5 IDUM1( 4 ) = IHI IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMHR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) $ RETURN * CALL PCUNMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMHR * END scalapack-1.8.0/SRC/pclahqr.f0000640000175000017500000033321510602576752015567 0ustar muammarmuammar SUBROUTINE PCLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ, $ IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7.3) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * 1.7.3: March 22, 2006 * modification suggested by Mark Fahey and Greg Henry * 1.7.0: July 31, 2001 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) COMPLEX A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PCLAHQR is an auxiliary routine used to find the Schur decomposition * and or eigenvalues of a matrix already in Hessenberg form from * cols ILO to IHI. * If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ, * with Z'Z=I, and H in Schur form. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCp(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCp( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCq( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCp() and LOCq() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * WANTT (global input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (global input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (global input) INTEGER * The order of the Hessenberg matrix A (and Z if WANTZ). * N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that A is already upper quasi-triangular in * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless * ILO = 1). PCLAHQR works primarily with the Hessenberg * submatrix in rows and columns ILO to IHI, but applies * transformations to all of H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * A (global input/output) COMPLEX array, dimension * (DESCA(LLD_),*) * On entry, the upper Hessenberg matrix A. * On exit, if WANTT is .TRUE., A is upper triangular in rows * and columns ILO:IHI. If WANTT is .FALSE., the contents of * A are unspecified on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * W (global replicated output) COMPLEX array, dimension (N) * The computed eigenvalues ILO to IHI are stored in the * corresponding elements of W. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in A. A may be returned with * larger diagonal blocks until the next release. * * ILOZ (global input) INTEGER * IHIZ (global input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (global input/output) COMPLEX array. * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations accumulated by PCHSEQR, and on * exit Z has been updated; transformations are applied only to * the submatrix Z(ILOZ:IHIZ,ILO:IHI). * If WANTZ is .FALSE., Z is not referenced. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local output) COMPLEX array of size LWORK * (Unless LWORK=-1, in which case WORK must be at least size 1) * * LWORK (local input) INTEGER * WORK(LWORK) is a local array and LWORK is assumed big enough * so that LWORK >= 3*N + * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N), * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) + * MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 ) * If LWORK=-1, then WORK(1) gets set to the above number and * the code returns immediately. * * IWORK (global and local input) INTEGER array of size ILWORK * This will hold some of the IBLK integer arrays. * This is held as a place holder for a future release. * Currently unreferenced. * * ILWORK (local input) INTEGER * This will hold the size of the IWORK array. * This is held as a place holder for a future release. * Currently unreferenced. * * INFO (global output) INTEGER * < 0: parameter number -INFO incorrect or inconsistent * = 0: successful exit * > 0: PCLAHQR failed to compute all the eigenvalues ILO to IHI * in a total of 30*(IHI-ILO+1) iterations; if INFO = i, * elements i+1:ihi of W contains those eigenvalues * which have been successfully computed. * * Logic: * This algorithm is very similar to SLAHQR. Unlike SLAHQR, * instead of sending one double shift through the largest * unreduced submatrix, this algorithm sends multiple double shifts * and spaces them apart so that there can be parallelism across * several processor row/columns. Another critical difference is * that this algorithm aggregrates multiple transforms together in * order to apply them in a block fashion. * * Important Local Variables: * IBLK = The maximum number of bulges that can be computed. * Currently fixed. Future releases this won't be fixed. * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) * ROTN = The number of transforms to block together * NBULGE = The number of bulges that will be attempted on the * current submatrix. * IBULGE = The current number of bulges started. * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). * * Subroutines: * From LAPACK, this routine calls: * CLAHQR -> Serial QR used to determine shifts and * eigenvalues * CLARFG -> Determine the Householder transforms * * This ScaLAPACK, this routine calls: * PCLACONSB -> To determine where to start each iteration * CLAMSH -> Sends multiple shifts through a small * submatrix to see how the consecutive * subdiagonals change (if PCLACONSB indicates * we can start a run in the middle) * PCLAWIL -> Given the shift, get the transformation * PCLACP3 -> Parallel array to local replicated array copy * & back. * CLAREF -> Row/column reflector applier. Core routine * here. * PCLASMSUB -> Finds negligible subdiagonal elements. * * Current Notes and/or Restrictions: * 1.) This code requires the distributed block size to be square * and at least six (6); unlike simpler codes like LU, this * algorithm is extremely sensitive to block size. Unwise * choices of too small a block size can lead to bad * performance. * 2.) This code requires A and Z to be distributed identically * and have identical contxts. A future version may allow Z to * have a different contxt to 1D row map it to all nodes (so no * communication on Z is necessary.) * 3.) This code does not currently block the initial transforms * so that none of the rows or columns for any bulge are * completed until all are started. To offset pipeline * start-up it is recommended that at least 2*LCM(NPROW,NPCOL) * bulges are used (if possible) * 4.) The maximum number of bulges currently supported is fixed at * 32. In future versions this will be limited only by the * incoming WORK and IWORK array. * 5.) The matrix A must be in upper Hessenberg form. If elements * below the subdiagonal are nonzero, the resulting transforms * may be nonsimilar. This is also true with the LAPACK * routine CLAHQR. * 6.) For this release, this code has only been tested for * RSRC_=CSRC_=0, but it has been written for the general case. * 7.) Currently, all the eigenvalues are distributed to all the * nodes. Future releases will probably distribute the * eigenvalues by the column partitioning. * 8.) The internals of this routine are subject to change. * 9.) To optimize this for your architecture, try tuning CLAREF. * 10.) This code has only been tested for WANTZ = .TRUE. and may * behave unpredictably for WANTZ set to .FALSE. * * Further Details * =============== * * Contributed by Mark Fahey, June, 2000. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL RONE PARAMETER ( RONE = 1.0E+0 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) REAL CONST PARAMETER ( CONST = 1.50E+0 ) INTEGER IBLK PARAMETER ( IBLK = 32 ) * .. * .. Local Scalars .. LOGICAL SKIP INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE, $ ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II, $ IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART, $ ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP, $ ITERMAX, ITMP1, ITMP2, ITN, ITS, IZBUF, J, $ JAFIRST, JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ, $ LEFT, LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1, $ LOCALI2, LOCALK, LOCALM, M, MODKM1, MYCOL, $ MYROW, NBULGE, NH, NODE, NPCOL, NPROW, NQ, NR, $ NUM, NZ, RIGHT, ROTN, UP, VECSIDX REAL CS, OVFL, S, SMLNUM, ULP, UNFL COMPLEX CDUM, H10, H11, H22, H33, H43H34, H44, SN, SUM, $ T1, T1COPY, T2, T3, V1SAVE, V2, V2SAVE, V3, $ V3SAVE * .. * .. Local Arrays .. INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ), $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ), $ KP2ROW( IBLK ), KROW( IBLK ) COMPLEX S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ), $ VCOPY( 3 ) * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH EXTERNAL ILCM, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMN2D, IGEBR2D, IGEBS2D, $ INFOG1L, INFOG2L, PSLABAD, PXERBLA, PCLACONSB, $ PCLACP3, PCLASMSUB, PCLAWIL, PCROT, CCOPY, $ CGEBR2D, CGEBS2D, CGERV2D, CGESD2D, CGSUM2D, $ CLAHQR2, CLAMSH, CLANV2, CLAREF, CLARFG * .. * .. Intrinsic Functions .. * INTRINSIC ABS, REAL, CONJG, AIMAG, MAX, MIN, MOD * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) IF( N.EQ.0 ) $ RETURN * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) IAFIRST = DESCA( RSRC_ ) JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) LCMRC = ILCM( NPROW, NPCOL ) IF( ( NPROW.LE.3 ) .OR. ( NPCOL.LE.3 ) ) THEN SKIP = .TRUE. ELSE SKIP = .FALSE. END IF * * Determine the number of columns we have so we can check workspace * NQ = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL ) JJ = N / HBL IF( JJ*HBL.LT.N ) $ JJ = JJ + 1 JJ = 7*JJ / LCMRC JJ = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, JJ ) JJ = JJ + MAX( 2*N, ( 8*LCMRC+2 )**2 ) IF( LWORK.EQ.-1 ) THEN WORK( 1 ) = JJ RETURN END IF IF( LWORK.LT.JJ ) THEN INFO = -14 END IF IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) END IF IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1300+NB_ ) END IF IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1300+MB_ ) END IF IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN INFO = -( 700+RSRC_ ) END IF IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN INFO = -( 1300+RSRC_ ) END IF IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN INFO = -4 END IF IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN INFO = -5 END IF IF( HBL.LT.5 ) THEN INFO = -( 700+MB_ ) END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PCLAHQR', -INFO ) RETURN END IF * * Set work array indices * VECSIDX = 0 IDIA = 3*N ISUB = 3*N ISUP = 3*N IRBUF = 3*N ICBUF = 3*N IZBUF = 5*N * * Find a value for ROTN * ROTN = HBL / 3 ROTN = MIN( ROTN, HBL-2 ) ROTN = MAX( ROTN, 1 ) * IF( ILO.EQ.IHI ) THEN CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN W( ILO ) = A( ( ICOL-1 )*LDA+IROW ) ELSE W( ILO ) = ZERO END IF RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, IAFIRST, LILOZ, LIHIZ ) LIHIZ = NUMROC( IHIZ, HBL, MYROW, IAFIRST, NPROW ) * * Set machine-dependent constants for the stopping criterion. * If NORM(H) <= SQRT(OVFL), overflow should not occur. * UNFL = PSLAMCH( CONTXT, 'SAFE MINIMUM' ) OVFL = RONE / UNFL CALL PSLABAD( CONTXT, UNFL, OVFL ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = ITERMAX * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of our schur block size (<=2*IBLK). Each * iteration of the loop works with the active submatrix in rows * and columns L to I. Eigenvalues I+1 to IHI have already * converged. Either L = ILO or the global A(L,L-1) is negligible * so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 570 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 540 ITS = 0, ITN * * Look for a single small subdiagonal element. * CALL PCLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ), $ LWORK-IRBUF ) L = K * IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN A( ( ICOL-1 )*LDA+IROW ) = ZERO END IF WORK( ISUB+L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * IF( WANTT ) THEN * For Schur form, use 2x2 blocks IF( L.GE.I-1 ) THEN GO TO 550 END IF ELSE * If we don't want the Schur form, use bigger blocks. IF( L.GE.I-( 2*IBLK-1 ) ) THEN GO TO 550 END IF END IF * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * * Copy submatrix of size 2*JBLK and prepare to do generalized * Wilkinson shift or an exceptional shift * JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 ) IF( JBLK.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * JBLK = JBLK - MOD( JBLK, LCMRC ) END IF JBLK = MIN( JBLK, 2*LCMRC ) JBLK = MAX( JBLK, 1 ) * CALL PCLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1, $ 0 ) IF( ( ITS.EQ.20 .OR. ITS.EQ.40 ) .AND. ( JBLK.GT.1 ) ) THEN * * Exceptional shift. * DO 20 II = 2*JBLK, 2, -1 S1( II, II ) = CONST*( CABS1( S1( II, II ) )+ $ CABS1( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE S1( 1, 1 ) = CONST*CABS1( S1( 1, 1 ) ) ELSE CALL CLAHQR2( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1, $ 2*IBLK, WORK( IRBUF+1 ), 1, 2*JBLK, Z, LDZ, $ IERR ) * * Prepare to use Wilkinson's double shift * H44 = S1( 2*JBLK, 2*JBLK ) H33 = S1( 2*JBLK-1, 2*JBLK-1 ) H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 ) * END IF * * Look for two consecutive small subdiagonal elements: * PCLACONSB is the routine that does this. * CALL PCLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, $ WORK( IRBUF+1 ), LWORK-IRBUF ) * * Double-shift QR step * * NBULGE is the number of bulges that will be attempted * ISTOP = MIN( M+ROTN-1-MOD( M-( M / HBL )*HBL-1, ROTN ), I-2 ) ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) ) ISTOP = MIN( ISTOP, I2-2 ) ISTOP = MAX( ISTOP, M ) NBULGE = ( I-1-ISTOP ) / HBL * * Do not exceed maximum determined. * NBULGE = MIN( NBULGE, JBLK ) IF( NBULGE.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * NBULGE = NBULGE - MOD( NBULGE, LCMRC ) END IF NBULGE = MAX( NBULGE, 1 ) * * If we are starting in the middle because of consecutive small * subdiagonal elements, we need to see how many bulges we * can send through without breaking the consecutive small * subdiagonal property. * IF( ( NBULGE.GT.1 ) .AND. ( M.GT.L ) ) THEN * * Copy a chunk of elements from global A(M-1:,M-1:) * CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ITMP1, ITMP2 ) II = MIN( 4*NBULGE+2, N-M+2 ) CALL PCLACP3( II, M-1, A, DESCA, WORK( IRBUF+1 ), II, ITMP1, $ ITMP2, 0 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN * * Find a new NBULGE based on the bulges we have. * CALL CLAMSH( S1, 2*IBLK, NBULGE, JBLK, WORK( IRBUF+1 ), $ II, II, ULP ) IF( NUM.GT.1 ) THEN CALL IGEBS2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1 ) END IF ELSE * * Everyone needs to receive the new NBULGE * CALL IGEBR2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1, ITMP1, $ ITMP2 ) END IF END IF * * IBULGE is the number of bulges going so far * IBULGE = 1 * * "A" row defs : main row transforms from LOCALK to LOCALI2 * CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, ITMP1, LOCALK ) LOCALK = NQ CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ICOL1, LOCALI2 ) LOCALI2 = NUMROC( I2, HBL, MYCOL, JAFIRST, NPCOL ) * * "A" col defs : main col transforms from LOCALI1 to LOCALM * CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, LOCALI1, ICOL1 ) CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, LOCALM, ICOL1 ) ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, IAFIRST, NPROW ) * * Which row & column will start the bulges * ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST * CALL INFOG1L( M, HBL, NPROW, MYROW, IAFIRST, II, ITMP2 ) CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, JJ, ITMP2 ) CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, ISTOP, $ KP2ROW( 1 ) ) KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, IAFIRST, NPROW ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ISTOP, $ KP2COL( 1 ) ) KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, JAFIRST, NPCOL ) * * Set all values for bulges. All bulges are stored in * intermediate steps as loops over KI. Their current "task" * over the global M to I-1 values is always K1(KI) to K2(KI). * However, because there are many bulges, K1(KI) & K2(KI) might * go past that range while later bulges (KI+1,KI+2,etc..) are * finishing up. Even if ROTN=1, in order to minimize border * communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both * border messages can be handled at once. * * Rules: * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)= 0. * * BW (global input) INTEGER * Number of subdiagonals in L or U. 0 <= BW <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into * local memory to an array with first dimension * LLD_A >=(bw+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N symmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPBTRF and this is stored in AF. If a linear system * is to be solved using PDPBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2*bw)*bw * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (bw*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*BW * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (BW* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA, $ LLDB, MBW2, MYCOL, MYROW, MY_NUM_COLS, NB, NP, $ NPCOL, NPROW, NP_SAVE, ODD_SIZE, OFST, $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ DGEMM, DGERV2D, DGESD2D, DLACPY, DMATADD, $ DTBTRS, DTRMM, DTRTRS, GLOBCHK, PXERBLA, $ RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * * Pre-calculate bw^2 * MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( UPLO, 'U' ) ) THEN IDUM1 = ICHAR( 'U' ) ELSE IF( LSAME( UPLO, 'L' ) ) THEN IDUM1 = ICHAR( 'L' ) ELSE INFO = -1 END IF * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -2 END IF * IF( LWORK.LT.-1 ) THEN INFO = -14 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -3 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BW.GT.N-1 ) .OR. ( BW.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( BW+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 3 ) CALL PXERBLA( ICTXT, $ 'PDPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRSV, D&C alg.: NB too small', $ -INFO ) RETURN END IF * * WORK_SIZE_MIN = BW*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRSV: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BW PARAM_CHECK( 4, 1 ) = N PARAM_CHECK( 3, 1 ) = IDUM3 PARAM_CHECK( 2, 1 ) = IDUM2 PARAM_CHECK( 1, 1 ) = IDUM1 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 14 PARAM_CHECK( 2, 2 ) = 2 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRSV', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 180 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Offset in elements * OFST = PART_OFFSET*LLDA * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - BW END IF * * * * Begin main code * IF( LSAME( UPLO, 'L' ) ) THEN * IF( LSAME( TRANS, 'N' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'N', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB, $ WORK( 1 ), BW ) * CALL DTRMM( 'L', 'U', 'N', 'N', BW, NRHS, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )*LLDA ) ), $ LLDA-1, WORK( 1 ), BW ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'T', 'N', BW, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+BW-BW ), BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 30 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 10 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 20 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 10 20 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTRTRS( 'L', 'N', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 30 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 80 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 40 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 50 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 40 50 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ), $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTRTRS( 'L', 'T', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 60 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 70 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 60 70 CONTINUE * [End of GOTO Loop] * 80 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+BW-BW ), BW ) * CALL DTRMM( 'L', 'U', 'T', 'N', BW, NRHS, -ONE, $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )*LLDA ) ), $ LLDA-1, WORK( 1+BW-BW ), BW ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'T', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * ELSE *************************************************************** * CASE UPLO = 'U' * *************************************************************** IF( LSAME( TRANS, 'T' ) ) THEN * * Frontsolve * * ****************************************** * Local computation phase ****************************************** * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'T', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BW, NRHS, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB, $ WORK( 1 ), BW ) * CALL DTRMM( 'L', 'L', 'T', 'N', BW, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1 ), BW ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution to previous * processor's righthand-side. * CALL DGEMM( 'T', 'N', BW, NRHS, ODD_SIZE, -ONE, AF( 1 ), $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO, $ WORK( 1+BW-BW ), BW ) END IF * * ************************************************ * Formation and solution of reduced system ************************************************ * * * Send modifications to prior processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+1 ) * * Combine contribution to locally stored right hand sides * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * * The last processor does not participate in the solution of the * reduced system, having sent its contribution already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 110 END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 90 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 100 * * Receive and add contribution to righthand sides from left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * * Receive and add contribution to righthand sides from right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 90 100 CONTINUE * [End of GOTO Loop] * * * * ********************************* * Calculate and use this proc's blocks to modify other procs * * Solve with diagonal block * CALL DTRTRS( 'L', 'N', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * * ********* IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * ************ IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * * Use offdiagonal block to calculate modification to diag block * of processor to the left * CALL DGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO, $ WORK( 1 ), BW ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 110 CONTINUE * ELSE * ******************** BACKSOLVE ************************************* * ******************************************************************** * .. Begin reduced system phase of algorithm .. ******************************************************************** * * * * The last processor does not participate in the solution of the * reduced system and just waits to receive its solution. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 160 END IF * * Determine number of steps in tree loop * LEVEL_DIST = 1 120 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 130 * LEVEL_DIST = LEVEL_DIST*2 * GO TO 120 130 CONTINUE * * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) $ THEN * * Receive solution from processor to left * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-LEVEL_DIST ) * * * Use offdiagonal block to calculate modification to RHS stored * on this processor * CALL DGEMM( 'T', 'N', BW, NRHS, BW, -ONE, $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ), $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Receive solution from processor to right * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL+LEVEL_DIST ) * * Calculate contribution from this block to next diagonal block * CALL DGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW, $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * Solve with diagonal block * CALL DTRTRS( 'L', 'T', 'N', BW, NRHS, $ AF( ODD_SIZE*BW+MBW2+1 ), BW, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO ) * IF( INFO.NE.0 ) THEN GO TO 170 END IF * * * ***Modification Loop ******* * 140 CONTINUE IF( LEVEL_DIST.EQ.1 ) $ GO TO 150 * LEVEL_DIST = LEVEL_DIST / 2 * * Send solution to the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+LEVEL_DIST ) * END IF * * Send solution to left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL-LEVEL_DIST ) * END IF * GO TO 140 150 CONTINUE * [End of GOTO Loop] * 160 CONTINUE * [Processor npcol - 1 jumped to here to await next stage] * ******************************* * Reduced system has been solved, communicate solutions to nearest * neighbors in preparation for local computation phase. * * * Send elements of solution to next proc * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGESD2D( ICTXT, BW, NRHS, $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0, $ MYCOL+1 ) * END IF * * Receive modifications to processor's right hand sides * IF( MYCOL.GT.0 ) THEN * CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0, $ MYCOL-1 ) * END IF * * * ********************************************** * Local computation phase ********************************************** * IF( MYCOL.NE.0 ) THEN * Use the "spike" fillin to calculate contribution from previous * processor's solution. * CALL DGEMM( 'N', 'N', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ), $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+1 ), LLDB ) * END IF * * IF( MYCOL.LT.NP-1 ) THEN * Use factorization of odd-even connection block to modify * locally stored portion of right hand side(s) * * * First copy and multiply it into temporary storage, * then use it on RHS * CALL DLACPY( 'N', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ), $ LLDB, WORK( 1+BW-BW ), BW ) * CALL DTRMM( 'L', 'L', 'N', 'N', BW, NRHS, -ONE, $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1, $ WORK( 1+BW-BW ), BW ) * CALL DMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE, $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB ) * END IF * * Use main partition in each processor to solve locally * CALL DTBTRS( UPLO, 'N', 'N', ODD_SIZE, BW, NRHS, $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB, $ INFO ) * END IF * End of "IF( LSAME( TRANS, 'N' ) )"... * * END IF * End of "IF( LSAME( UPLO, 'L' ) )"... 170 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 180 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDPBTRSV * END scalapack-1.8.0/SRC/pclarft.f0000640000175000017500000004505610363532303015557 0ustar muammarmuammar SUBROUTINE PCLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) COMPLEX TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARFT forms the triangular factor T of a complex block reflector H * of order n, which is defined as a product of k elementary reflectors. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the distributed matrix V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the distributed matrix V, and * * H = I - V' * T * V * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER*1 * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER*1 * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise * = 'R': rowwise * * N (global input) INTEGER * The order of the block reflector H. N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) COMPLEX pointer into the local memory * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1)) * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if * STOREV = 'R'. The distributed matrix V contains the * Householder vectors. See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) COMPLEX, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) COMPLEX array, dimension (NB_V,NB_V) * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains * the k-by-k triangular factor of the block reflector asso- * ciated with V. If DIRECT = 'F', T is upper triangular; * if DIRECT = 'B', T is lower triangular. * * WORK (local workspace) COMPLEX array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 ) * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 ) * ( v1 v2 1 ) ( 1 v3 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 ) * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 ) * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) * ( 1 v3 ) * ( 1 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL FORWARD INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJ, JJV, LDV, MICOL, MIROW, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ COMPLEX VII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CGEMV, CGSUM2D, $ CLACGV, CLASET, CTRMV, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 .OR. K.LE.0 ) $ RETURN * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( LSAME( STOREV, 'C' ) .AND. MYCOL.EQ.IVCOL ) THEN * IW = 1 LDV = DESCV( LLD_ ) IROFF = MOD( IV-1, DESCV( MB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NP = NP - IROFF II = IIV + 1 ELSE II = IIV END IF IF( IROFF+1.EQ.DESCV( MB_ ) ) THEN MIROW = MOD( IVROW+1, NPROW ) ELSE MIROW = IVROW END IF ITMP0 = 0 * DO 10 JJ = JJV+1, JJV+K-1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( NP-II+IIV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', NP-II+IIV, ITMP0, $ -TAU( JJ ), V( II+(JJV-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL CLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II + 1 END IF * IF( MOD( IV+ITMP0, DESCV( MB_ ) ).EQ.0 ) $ MIROW = MOD( MIROW+1, NPROW ) * 10 CONTINUE * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( JJV ) * DO 20 JJ = JJV+1, JJV+K-1 * * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( NB_ ) CALL CCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( NB_ ), T( ITMP1 ), 1 ) T(ITMP1+ITMP0) = TAU( JJ ) * 20 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Columnwise' * NP = NUMROC( N+IROFF-1, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ NP = NP - IROFF MIROW = INDXG2P( IV+N-2, DESCV( MB_ ), MYROW, $ DESCV( RSRC_ ), NPROW ) II = IIV + NP - 1 ITMP0 = 0 * DO 30 JJ = JJV+K-2, JJV, -1 * IF( MYROW.EQ.MIROW ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( jv+i-1 ) * * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1) * ITMP0 = ITMP0 + 1 IF( II-IIV+1.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', II-IIV+1, ITMP0, $ -TAU( JJ ), V( IIV+JJ*LDV ), LDV, $ V( IIV+(JJ-1)*LDV ), 1, ZERO, $ WORK( IW ), 1 ) ELSE CALL CLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF * IW = IW + ITMP0 IF( MYROW.EQ.MIROW ) THEN V( II+(JJ-1)*LDV ) = VII II = II - 1 END IF * IF( MOD( IV+N-ITMP0-2, DESCV(MB_) ).EQ.0 ) $ MIROW = MOD( MIROW+NPROW-1, NPROW ) * 30 CONTINUE * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', IW-1, 1, WORK, IW-1, $ IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( NB_ ) * T( ITMP1-1 ) = TAU( JJV+K-1 ) * DO 40 JJ = JJV+K-2, JJV, -1 * * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( NB_ ) - 1 CALL CCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( NB_ ) ), $ DESCV( NB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( JJ ) * 40 CONTINUE * END IF * END IF * ELSE IF( LSAME( STOREV, 'R' ) .AND. MYROW.EQ.IVROW ) THEN * IW = 1 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) * IF( FORWARD ) THEN * * DIRECT = 'Forward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN NQ = NQ - ICOFF JJ = JJV + 1 ELSE JJ = JJV END IF IF( ICOFF+1.EQ.DESCV( NB_ ) ) THEN MICOL = MOD( IVCOL+1, NPCOL ) ELSE MICOL = IVCOL END IF ITMP0 = 0 * DO 50 II = IIV+1, IIV+K-1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(1:i-1,i) = -tau( iv+i-1 ) * * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ-JJ+JJV.GT.0 ) THEN CALL CLACGV( NQ-JJ+JJV, V( II+(JJ-1)*LDV ), LDV ) CALL CGEMV( 'No transpose', ITMP0, NQ-JJ+JJV, $ -TAU(II), V( IIV+(JJ-1)*LDV ), LDV, $ V( II+(JJ-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) CALL CLACGV( NQ-JJ+JJV, V( II+(JJ-1)*LDV ), LDV ) ELSE CALL CLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ + 1 END IF * IF( MOD( JV+ITMP0, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+1, NPCOL ) * 50 CONTINUE * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = 1 * T( ITMP1 ) = TAU( IIV ) * DO 60 II = IIV+1, IIV+K-1 * * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 + DESCV( MB_ ) CALL CCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', $ ITMP0, T, DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1+ITMP0 ) = TAU( II ) * 60 CONTINUE * END IF * ELSE * * DIRECT = 'Backward', STOREV = 'Rowwise' * NQ = NUMROC( N+ICOFF-1, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF MICOL = INDXG2P( JV+N-2, DESCV( NB_ ), MYCOL, $ DESCV( CSRC_ ), NPCOL ) JJ = JJV + NQ - 1 ITMP0 = 0 * DO 70 II = IIV+K-2, IIV, -1 * IF( MYCOL.EQ.MICOL ) THEN VII = V( II+(JJ-1)*LDV ) V( II+(JJ-1)*LDV ) = ONE END IF * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)' * ITMP0 = ITMP0 + 1 IF( JJ-JJV+1.GT.0 ) THEN CALL CLACGV( JJ-JJV+1, V( II+(JJV-1)*LDV ), LDV ) CALL CGEMV( 'No transpose', ITMP0, JJ-JJV+1, $ -TAU( II ), V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, $ WORK( IW ), 1 ) CALL CLACGV( JJ-JJV+1, V( II+(JJV-1)*LDV ), LDV ) ELSE CALL CLASET( 'All', ITMP0, 1, ZERO, ZERO, $ WORK( IW ), ITMP0 ) END IF * IW = IW + ITMP0 IF( MYCOL.EQ.MICOL ) THEN V( II+(JJ-1)*LDV ) = VII JJ = JJ - 1 END IF * IF( MOD( JV+N-ITMP0-2, DESCV( NB_ ) ).EQ.0 ) $ MICOL = MOD( MICOL+NPCOL-1, NPCOL ) * 70 CONTINUE * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 80 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL CCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', $ ITMP0, T( ITMP1+DESCV( MB_ ) ), $ DESCV( MB_ ), T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 80 CONTINUE * END IF * END IF * END IF * RETURN * * End of PCLARFT * END scalapack-1.8.0/SRC/pclahrd.f0000640000175000017500000002441510602576752015551 0ustar muammarmuammar SUBROUTINE PCLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCY( * ) COMPLEX A( * ), T( * ), TAU( * ), WORK( * ), Y( * ) * .. * * Purpose * ======= * * PCLAHRD reduces the first NB columns of a complex general * N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that * elements below the k-th subdiagonal are zero. The reduction is * performed by an unitary similarity transformation Q' * A * Q. The * routine returns the matrices V and T which determine Q as a block * reflector I - V*T*V', and also the matrix Y = A * V * T. * * This is an auxiliary routine called by PCGEHRD. In the following * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1). * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). * N >= 0. * * K (global input) INTEGER * The offset for the reduction. Elements below the k-th * subdiagonal in the first NB columns are reduced to zero. * * NB (global input) INTEGER * The number of columns to be reduced. * * A (local input/local output) COMPLEX pointer into * the local memory to an array of dimension (LLD_A, * LOCc(JA+N-K)). On entry, this array contains the the local * pieces of the N-by-(N-K+1) general distributed matrix * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above * the k-th subdiagonal in the first NB columns are overwritten * with the corresponding elements of the reduced distributed * matrix; the elements below the k-th subdiagonal, with the * array TAU, represent the matrix Q as a product of elementary * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are * unchanged. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX array, dimension LOCc(JA+N-2) * The scalar factors of the elementary reflectors (see Further * Details). TAU is tied to the distributed matrix A. * * T (local output) COMPLEX array, dimension (NB_A,NB_A) * The upper triangular matrix T. * * Y (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_Y,NB_A). On exit, this array * contains the local pieces of the N-by-NB distributed * matrix Y. LLD_Y >= LOCr(IA+N-1). * * IY (global input) INTEGER * The row index in the global array Y indicating the first * row of sub( Y ). * * JY (global input) INTEGER * The column index in the global array Y indicating the * first column of sub( Y ). * * DESCY (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Y. * * WORK (local workspace) COMPLEX array, dimension (NB) * * Further Details * =============== * * The matrix Q is represented as a product of nb elementary reflectors * * Q = H(1) H(2) . . . H(nb). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The elements of the vectors v together form the (n-k+1)-by-nb matrix * V which is needed, with T and Y, to apply the transformation to the * unreduced part of the matrix, using an update of the form: * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V'). * * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the * following example with n = 7, k = 3 and nb = 2: * * ( a h a a a ) * ( a h a a a ) * ( a h a a a ) * ( h h a a a ) * ( v1 h a a a ) * ( v1 v2 a a a ) * ( v1 v2 a a a ) * * where a denotes an element of the original matrix * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper * Hessenberg matrix H, and vi denotes an element of the vector * defining H(i). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL IPROC INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL, $ JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ COMPLEX EI * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ) * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CAXPY, CCOPY, CSCAL, $ CTRMV, DESCSET, INFOG2L, PCELSET, $ PCGEMV, PCLACGV, PCLARFG, PCSCAL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.1 ) $ RETURN * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) * IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ - IOFF * EI = ZERO JW = IOFF + 1 CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW, $ IACOL, ICTXT, 1 ) * DO 10 L = 1, NB I = IA + K + L - 2 J = JA + L - 1 * IF( L.GT.1 ) THEN * * Update A(ia:ia+n-1,j) * * Compute i-th column of A - Y * V' * CALL PCLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) ) CALL PCGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J, $ DESCA, 1 ) CALL PCLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) ) * * Apply I - V * T' * V' to this column (call it b) from the * left, using the last column of T as workspace * * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) * ( V2 ) ( b2 ) * * where V1 is unit lower triangular * * w := V1' * b1 * IF( IPROC ) THEN CALL CCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1, $ WORK( JW ), 1 ) CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) END IF * * w := w + V2'*b2 * CALL PCGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A, $ I+1, JA, DESCA, A, I+1, J, DESCA, 1, ONE, WORK, $ 1, JW, DESCW, DESCW( M_ ) ) * * w := T'*w * IF( IPROC ) $ CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', $ L-1, T, DESCA( NB_ ), WORK( JW ), 1 ) * * b2 := b2 - V2*w * CALL PCGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA, $ DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE, $ A, I+1, J, DESCA, 1 ) * * b1 := b1 - V1*w * IF( IPROC ) THEN CALL CTRMV( 'Lower', 'No transpose', 'Unit', L-1, $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ), $ WORK( JW ), 1 ) CALL CAXPY( L-1, -ONE, WORK( JW ), 1, $ A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 ) END IF CALL PCELSET( A, I, J-1, DESCA, EI ) END IF * * Generate the elementary reflector H(i) to annihilate * A(ia+k+i:ia+n-1,j) * CALL PCLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J, $ DESCA, 1, TAU ) CALL PCELSET( A, I+1, J, DESCA, ONE ) * * Compute Y(iy:y+n-1,jy+l-1) * CALL PCGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1, $ DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1, $ DESCY, 1 ) CALL PCGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A, I+1, $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW, $ DESCW, DESCW( M_ ) ) CALL PCGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY, $ WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY, $ JY+L-1, DESCY, 1 ) JL = MIN( JJ+L-1, JA+NQ-1 ) CALL PCSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 ) * * Compute T(1:i,i) * IF( IPROC ) THEN JT = ( L-1 ) * DESCA( NB_ ) CALL CSCAL( L-1, -TAU( JL ), WORK( JW ), 1 ) CALL CCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 ) CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T, $ DESCA( NB_ ), T( JT+1 ), 1 ) T( JT+L ) = TAU( JL ) END IF 10 CONTINUE * CALL PCELSET( A, K+NB+IA-1, J, DESCA, EI ) * RETURN * * End of PCLAHRD * END scalapack-1.8.0/SRC/dlapst.f0000640000175000017500000001502010363532303015377 0ustar muammarmuammar SUBROUTINE DLAPST( ID, N, D, INDX, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, N * .. * .. Array Arguments .. INTEGER INDX( * ) DOUBLE PRECISION D( * ) * .. * * Purpose * ======= * DLAPST is a modified version of the LAPACK routine DLASRT. * * Define a permutation INDX that sorts the numbers in D * in increasing order (if ID = 'I') or * in decreasing order (if ID = 'D' ). * * Use Quick Sort, reverting to Insertion sort on arrays of * size <= 20. Dimension of STACK limits N to about 2**32. * * Arguments * ========= * * ID (input) CHARACTER*1 * = 'I': sort D in increasing order; * = 'D': sort D in decreasing order. * * N (input) INTEGER * The length of the array D. * * D (input) DOUBLE PRECISION array, dimension (N) * The array to be sorted. * * INDX (ouput) INTEGER array, dimension (N). * The permutation which sorts the array D. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Parameters .. INTEGER SELECT PARAMETER ( SELECT = 20 ) * .. * .. Local Scalars .. INTEGER DIR, ENDD, I, ITMP, J, START, STKPNT DOUBLE PRECISION D1, D2, D3, DMNMX * .. * .. Local Arrays .. INTEGER STACK( 2, 32 ) * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Executable Statements .. * * Test the input paramters. * INFO = 0 DIR = -1 IF( LSAME( ID, 'D' ) ) THEN DIR = 0 ELSE IF( LSAME( ID, 'I' ) ) THEN DIR = 1 END IF IF( DIR.EQ.-1 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DLAPST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.LE.1 ) $ RETURN * DO 10 I = 1, N INDX( I ) = I 10 CONTINUE * STKPNT = 1 STACK( 1, 1 ) = 1 STACK( 2, 1 ) = N 20 CONTINUE START = STACK( 1, STKPNT ) ENDD = STACK( 2, STKPNT ) STKPNT = STKPNT - 1 IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN * * Do Insertion sort on D( START:ENDD ) * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * DO 40 I = START + 1, ENDD DO 30 J = I, START + 1, -1 IF( D( INDX( J ) ).GT.D( INDX( J-1 ) ) ) THEN ITMP = INDX( J ) INDX( J ) = INDX( J-1 ) INDX( J-1 ) = ITMP ELSE GO TO 40 END IF 30 CONTINUE 40 CONTINUE * ELSE * * Sort into increasing order * DO 60 I = START + 1, ENDD DO 50 J = I, START + 1, -1 IF( D( INDX( J ) ).LT.D( INDX( J-1 ) ) ) THEN ITMP = INDX( J ) INDX( J ) = INDX( J-1 ) INDX( J-1 ) = ITMP ELSE GO TO 60 END IF 50 CONTINUE 60 CONTINUE * END IF * ELSE IF( ENDD-START.GT.SELECT ) THEN * * Partition D( START:ENDD ) and stack parts, largest one first * * Choose partition entry as median of 3 * D1 = D( INDX( START ) ) D2 = D( INDX( ENDD ) ) I = ( START+ENDD ) / 2 D3 = D( INDX( I ) ) IF( D1.LT.D2 ) THEN IF( D3.LT.D1 ) THEN DMNMX = D1 ELSE IF( D3.LT.D2 ) THEN DMNMX = D3 ELSE DMNMX = D2 END IF ELSE IF( D3.LT.D2 ) THEN DMNMX = D2 ELSE IF( D3.LT.D1 ) THEN DMNMX = D3 ELSE DMNMX = D1 END IF END IF * IF( DIR.EQ.0 ) THEN * * Sort into decreasing order * I = START - 1 J = ENDD + 1 70 CONTINUE 80 CONTINUE J = J - 1 IF( D( INDX( J ) ).LT.DMNMX ) $ GO TO 80 90 CONTINUE I = I + 1 IF( D( INDX( I ) ).GT.DMNMX ) $ GO TO 90 IF( I.LT.J ) THEN ITMP = INDX( I ) INDX( I ) = INDX( J ) INDX( J ) = ITMP GO TO 70 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF ELSE * * Sort into increasing order * I = START - 1 J = ENDD + 1 100 CONTINUE 110 CONTINUE J = J - 1 IF( D( INDX( J ) ).GT.DMNMX ) $ GO TO 110 120 CONTINUE I = I + 1 IF( D( INDX( I ) ).LT.DMNMX ) $ GO TO 120 IF( I.LT.J ) THEN ITMP = INDX( I ) INDX( I ) = INDX( J ) INDX( J ) = ITMP GO TO 100 END IF IF( J-START.GT.ENDD-J-1 ) THEN STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD ELSE STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = J + 1 STACK( 2, STKPNT ) = ENDD STKPNT = STKPNT + 1 STACK( 1, STKPNT ) = START STACK( 2, STKPNT ) = J END IF END IF END IF IF( STKPNT.GT.0 ) $ GO TO 20 RETURN * * End of DLAPST * END scalapack-1.8.0/SRC/ctrmvt.f0000640000175000017500000001115410363532303015433 0ustar muammarmuammar SUBROUTINE CTRMVT( UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, $ INCZ ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER INCW, INCX, INCY, INCZ, LDT, N * .. * .. Array Arguments .. COMPLEX T( LDT, * ), W( * ), X( * ), Y( * ), Z( * ) * .. * * Purpose * ======= * * CTRMVT performs the matrix-vector operations * * x := conjg( T' ) *y, and w := T *z, * * where x is an n element vector and T is an n by n * upper or lower triangular matrix. * * Arguments * ========== * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix A. * N must be at least zero. * Unchanged on exit. * * T - COMPLEX array of DIMENSION ( LDT, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array T must contain the upper * triangular matrix and the strictly lower triangular part of * T is not referenced. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array T must contain the lower * triangular matrix and the strictly upper triangular part of * T is not referenced. * * LDT - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least * max( 1, n ). * Unchanged on exit. * * X - COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). * On exit, X = T' * y * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * Unchanged on exit. * * Y - COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). * Before entry, the incremented array Y must contain the n * element vector y. Unchanged on exit. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * W - COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCW ) ). * On exit, W = T * z * * INCW - INTEGER. * On entry, INCW specifies the increment for the elements of * W. INCW must not be zero. * Unchanged on exit. * * Z - COMPLEX array of dimension at least * ( 1 + ( n - 1 )*abs( INCZ ) ). * Before entry, the incremented array Z must contain the n * element vector z. Unchanged on exit. * * INCY - INTEGER. * On entrz, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * * Level 2 Blas routine. * * * .. Local Scalars .. INTEGER INFO * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL CCOPY, CTRMV, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( LDT.LT.MAX( 1, N ) ) THEN INFO = 4 ELSE IF( INCW.EQ.0 ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 10 ELSE IF( INCZ.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CTRMVT', INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * * IF( INCX.NE.1 .OR. INCY.NE.1 .OR. INCW.NE.1 .OR. INCZ.NE.1 .OR. $ .TRUE. ) THEN CALL CCOPY( N, Y, INCY, X, INCX ) CALL CTRMV( UPLO, 'C', 'N', N, T, LDT, X, INCX ) CALL CCOPY( N, Z, INCZ, W, INCW ) CALL CTRMV( UPLO, 'N', 'N', N, T, LDT, W, INCW ) RETURN END IF * RETURN * * End of CTRMVT. * END scalapack-1.8.0/SRC/pclatrz.f0000640000175000017500000002271410363532303015577 0ustar muammarmuammar SUBROUTINE PCLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCLATRZ reduces the M-by-N ( M<=N ) complex upper trapezoidal * matrix sub( A ) = [A(IA:IA+M-1,JA:JA+M-1) A(IA:IA+M-1,JA+N-L:JA+N-1)] * to upper triangular form by means of unitary transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N unitary matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. L > 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements N-L+1 to N of the first M rows * of sub( A ), with the array TAU, represent the unitary matrix * Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * LWORK >= Nq0 + MAX( 1, Mp0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), whose conjugate transpose is used to * introduce zeros into the (m - k + 1)th row of sub( A ), is given in * the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I, IAROW, ICTXT, II, J, J1, MP, MYCOL, MYROW, $ NPCOL, NPROW COMPLEX AII * .. * .. Local Arrays .. INTEGER DESCTAU( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL DESCSET, INFOG1L, PCELSET, PCLACGV, $ PCLARFG, PCLARZ * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) * CALL DESCSET( DESCTAU, DESCA( M_ ), 1, DESCA( MB_ ), 1, $ DESCA( RSRC_ ), MYCOL, ICTXT, MAX( 1, MP ) ) * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ II, IAROW ) DO 10 I = II, MP TAU( I ) = ZERO 10 CONTINUE * ELSE * AII = ZERO * J1 = JA + N - L DO 20 I = IA+M-1, IA, -1 J = JA + I - IA * * Generate elementary reflector H(i) to annihilate * [ A(i, j) A(i,j1:ja+n-1) ] * CALL PCLACGV( 1, A, I, J, DESCA, DESCA( M_ ) ) CALL PCLACGV( L, A, I, J1, DESCA, DESCA( M_ ) ) CALL PCLARFG( L+1, AII, I, J, A, I, J1, DESCA, DESCA( M_ ), $ TAU ) * * Apply H(i) to A(ia:i-1,j:ja+n-1) from the right * CALL PCLARZ( 'Right', I-IA, JA+N-J, L, A, I, J1, DESCA, $ DESCA( M_ ), TAU, A, IA, J, DESCA, WORK ) CALL PCELSET( A, I, J, DESCA, CONJG( AII ) ) * 20 CONTINUE * CALL PCLACGV( M, TAU, IA, 1, DESCTAU, 1 ) * END IF * RETURN * * End of PCLATRZ * END scalapack-1.8.0/SRC/psgbtrs.f0000640000175000017500000011313710363532303015604 0ustar muammarmuammar SUBROUTINE PSGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ), IPIV( * ) REAL A( * ), AF( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PSGBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PSGBTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * banded distributed * matrix with bandwidth BWL, BWU. * * Routine PSGBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) REAL pointer into * local memory to an array with first dimension * LLD_A >=(2*bwl+2*bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * IPIV (local output) INTEGER array, dimension >= DESCA( NB ). * Pivot indices for local factorizations. * Users *should not* alter the contents between * factorization and solve. * * B (local input/local output) REAL pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) REAL array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PSGBTRF and this is stored in AF. If a linear system * is to be solved using PSGBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+bwu)*(bwl+bwu)+6*(bwl+bwu)*(bwl+2*bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * REAL temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * NRHS*(NB+2*bwl+4*bwu) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= (BWL+BWU)+1 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * ===================================================================== * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * ===================================================================== * * Implemented for ScaLAPACK by: * Andrew J. Cleary, Livermore National Lab and University of Tenn., * and Markus Hegland, Australian National University. Feb., 1997. * Based on code written by : Peter Arbenz, ETH Zurich, 1996. * Last modified by: Peter Arbenz, Institute of Scientific Computing, * ETH, Zurich. * * ===================================================================== * * .. Parameters .. REAL ONE PARAMETER ( ONE = 1.0E+0 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC, $ FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, J, JA_NEW, L, LBWL, LBWU, LDBB, $ LDW, LLDA, LLDB, LM, LMJ, LN, LPTR, MYCOL, $ MYROW, NB, NEICOL, NP, NPACT, NPCOL, NPROW, $ NPSTR, NP_SAVE, ODD_SIZE, PART_OFFSET, $ RECOVERY_VAL, RETURN_CODE, STORE_M_B, $ STORE_N_A, WORK_SIZE_MIN, WPTR * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, SCOPY, $ DESC_CONVERT, SGEMM, SGEMV, SGER, SGERV2D, $ SGESD2D, SGETRS, SLACPY, SLASWP, SSCAL, SSWAP, $ STRSM, GLOBCHK, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -16 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( ( BWL.GT.N-1 ) .OR. ( BWL.LT.0 ) ) THEN INFO = -3 END IF * IF( ( BWU.GT.N-1 ) .OR. ( BWU.LT.0 ) ) THEN INFO = -4 END IF * IF( LLDA.LT.( 2*BWL+2*BWU+1 ) ) THEN INFO = -( 8*100+6 ) END IF * IF( NB.LE.0 ) THEN INFO = -( 8*100+4 ) END IF * BW = BWU + BWL * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -5 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PSGBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PSGBTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check worksize * WORK_SIZE_MIN = NRHS*( NB+2*BWL+4*BWU ) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -16 CALL PXERBLA( ICTXT, 'PSGBTRS: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB( 5 ) PARAM_CHECK( 16, 1 ) = DESCB( 4 ) PARAM_CHECK( 15, 1 ) = DESCB( 3 ) PARAM_CHECK( 14, 1 ) = DESCB( 2 ) PARAM_CHECK( 13, 1 ) = DESCB( 1 ) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA( 5 ) PARAM_CHECK( 10, 1 ) = DESCA( 4 ) PARAM_CHECK( 9, 1 ) = DESCA( 3 ) PARAM_CHECK( 8, 1 ) = DESCA( 1 ) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 16 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PSGBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 100 END IF * * * * Begin main code * * Move data into workspace - communicate/copy (overlap) * IF( MYCOL.LT.NPCOL-1 ) THEN CALL SGESD2D( ICTXT, BWU, NRHS, B( NB-BWU+1 ), LLDB, 0, $ MYCOL+1 ) END IF * IF( MYCOL.LT.NPCOL-1 ) THEN LM = NB - BWU ELSE LM = NB END IF * IF( MYCOL.GT.0 ) THEN WPTR = BWU + 1 ELSE WPTR = 1 END IF * LDW = NB + BWU + 2*BW + BWU * CALL SLACPY( 'G', LM, NRHS, B( 1 ), LLDB, WORK( WPTR ), LDW ) * * Zero out rest of work * DO 20 J = 1, NRHS DO 10 L = WPTR + LM, LDW WORK( ( J-1 )*LDW+L ) = ZERO 10 CONTINUE 20 CONTINUE * IF( MYCOL.GT.0 ) THEN CALL SGERV2D( ICTXT, BWU, NRHS, WORK( 1 ), LDW, 0, MYCOL-1 ) END IF * ******************************************************************** * PHASE 1: Local computation phase -- Solve L*X = B ******************************************************************** * * Size of main (or odd) partition in each processor * ODD_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL ) * IF( MYCOL.NE.0 ) THEN LBWL = BW LBWU = 0 APTR = 1 ELSE LBWL = BWL LBWU = BWU APTR = 1 + BWU END IF * IF( MYCOL.NE.NPCOL-1 ) THEN LM = NB - LBWU LN = NB - BW ELSE IF( MYCOL.NE.0 ) THEN LM = ODD_SIZE + BWU LN = MAX( ODD_SIZE-BW, 0 ) ELSE LM = N LN = MAX( N-BW, 0 ) END IF * DO 30 J = 1, LN * LMJ = MIN( LBWL, LM-J ) L = IPIV( J ) * IF( L.NE.J ) THEN CALL SSWAP( NRHS, WORK( L ), LDW, WORK( J ), LDW ) END IF * LPTR = BW + 1 + ( J-1 )*LLDA + APTR * CALL SGER( LMJ, NRHS, -ONE, A( LPTR ), 1, WORK( J ), LDW, $ WORK( J+1 ), LDW ) * 30 CONTINUE * ******************************************************************** * PHASE 2: Global computation phase -- Solve L*X = B ******************************************************************** * * Define the initial dimensions of the diagonal blocks * The offdiagonal blocks (for MYCOL > 0) are of size BM by BW * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU BN = BW ELSE BM = MIN( BW, ODD_SIZE ) + BWU BN = MIN( BW, ODD_SIZE ) END IF * * Pointer to first element of block bidiagonal matrix in AF * Leading dimension of block bidiagonal system * BBPTR = ( NB+BWU )*BW + 1 LDBB = 2*BW + BWU * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. CALL SGETRS( 'N', N-LN, NRHS, AF( BBPTR+BW*LDBB ), LDBB, $ IPIV( LN+1 ), WORK( LN+1 ), LDW, INFO ) * END IF * * Loop over levels ... * * The two integers NPACT (nu. of active processors) and NPSTR * (stride between active processors) is used to control the * loop. * NPACT = NPCOL NPSTR = 1 * * Begin loop over levels 40 CONTINUE IF( NPACT.LE.1 ) $ GO TO 50 * * Test if processor is active IF( MOD( MYCOL, NPSTR ).EQ.0 ) THEN * * Send/Receive blocks * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW ELSE BMN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) + $ BWU END IF * CALL SGESD2D( ICTXT, BM, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * IF( NPACT.NE.2 ) THEN * * Receive answers back from partner processor * CALL SGERV2D( ICTXT, BM+BMN-BW, NRHS, WORK( LN+1 ), $ LDW, 0, NEICOL ) * BM = BM + BMN - BW * END IF * END IF * ELSE * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * CALL SLACPY( 'G', BM, NRHS, WORK( LN+1 ), LDW, $ WORK( NB+BWU+BMN+1 ), LDW ) * CALL SGERV2D( ICTXT, BMN, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ NEICOL ) * * and do the permutations and eliminations * IF( NPACT.NE.2 ) THEN * * Solve locally for BW variables * CALL SLASWP( NRHS, WORK( NB+BWU+1 ), LDW, 1, BW, $ IPIV( LN+1 ), 1 ) * CALL STRSM( 'L', 'L', 'N', 'U', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) * * Use soln just calculated to update RHS * CALL SGEMM( 'N', 'N', BM+BMN-BW, NRHS, BW, -ONE, $ AF( BBPTR+BW*LDBB+BW ), LDBB, $ WORK( NB+BWU+1 ), LDW, ONE, $ WORK( NB+BWU+1+BW ), LDW ) * * Give answers back to partner processor * CALL SGESD2D( ICTXT, BM+BMN-BW, NRHS, $ WORK( NB+BWU+1+BW ), LDW, 0, NEICOL ) * ELSE * * Finish up calculations for final level * CALL SLASWP( NRHS, WORK( NB+BWU+1 ), LDW, 1, BM+BMN, $ IPIV( LN+1 ), 1 ) * CALL STRSM( 'L', 'L', 'N', 'U', BM+BMN, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) END IF * END IF * NPACT = ( NPACT+1 ) / 2 NPSTR = NPSTR*2 GO TO 40 * END IF * 50 CONTINUE * * ************************************** * BACKSOLVE ******************************************************************** * PHASE 2: Global computation phase -- Solve U*Y = X ******************************************************************** * IF( NPCOL.EQ.1 ) THEN * * In this case the loop over the levels will not be * performed. * In fact, the backsolve portion was done in the call to * SGETRS in the frontsolve. * END IF * * Compute variable needed to reverse loop structure in * reduced system. * RECOVERY_VAL = NPACT*NPSTR - NPCOL * * Loop over levels * Terminal values of NPACT and NPSTR from frontsolve are used * 60 CONTINUE IF( NPACT.GE.NPCOL ) $ GO TO 80 * NPSTR = NPSTR / 2 * NPACT = NPACT*2 * * Have to adjust npact for non-power-of-2 * NPACT = NPACT - MOD( ( RECOVERY_VAL / NPSTR ), 2 ) * * Find size of submatrix in this proc at this level * IF( MYCOL / NPSTR.LT.NPACT-1 ) THEN BN = BW ELSE BN = MIN( BW, NUMROC( N, NB, NPCOL-1, 0, NPCOL ) ) END IF * * If this processor is even in this level... * IF( MOD( MYCOL, 2*NPSTR ).EQ.0 ) THEN * NEICOL = MYCOL + NPSTR * IF( NEICOL / NPSTR.LE.NPACT-1 ) THEN * IF( NEICOL / NPSTR.LT.NPACT-1 ) THEN BMN = BW BNN = BW ELSE BMN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) + BWU BNN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) END IF * IF( NPACT.GT.2 ) THEN * CALL SGESD2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * CALL SGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * ELSE * CALL SGERV2D( ICTXT, BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * END IF * END IF * ELSE * This processor is odd on this level * NEICOL = MYCOL - NPSTR * IF( NEICOL.EQ.0 ) THEN BMN = BW - BWU ELSE BMN = BW END IF * IF( NEICOL.LT.NPCOL-1 ) THEN BNN = BW ELSE BNN = MIN( BW, NUMROC( N, NB, NEICOL, 0, NPCOL ) ) END IF * IF( NPACT.GT.2 ) THEN * * Move RHS to make room for received solutions * CALL SLACPY( 'G', BW, NRHS, WORK( NB+BWU+1 ), LDW, $ WORK( NB+BWU+BW+1 ), LDW ) * CALL SGERV2D( ICTXT, 2*BW, NRHS, WORK( LN+1 ), LDW, 0, $ NEICOL ) * CALL SGEMM( 'N', 'N', BW, NRHS, BN, -ONE, AF( BBPTR ), LDBB, $ WORK( LN+1 ), LDW, ONE, WORK( NB+BWU+BW+1 ), $ LDW ) * * IF( MYCOL.GT.NPSTR ) THEN * CALL SGEMM( 'N', 'N', BW, NRHS, BW, -ONE, $ AF( BBPTR+2*BW*LDBB ), LDBB, WORK( LN+BW+1 ), $ LDW, ONE, WORK( NB+BWU+BW+1 ), LDW ) * END IF * CALL STRSM( 'L', 'U', 'N', 'N', BW, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+BW+1 ), $ LDW ) * * Send new solution to neighbor * CALL SGESD2D( ICTXT, BW, NRHS, WORK( NB+BWU+BW+1 ), LDW, 0, $ NEICOL ) * * Copy new solution into expected place * CALL SLACPY( 'G', BW, NRHS, WORK( NB+BWU+1+BW ), LDW, $ WORK( LN+BW+1 ), LDW ) * ELSE * * Solve with local diagonal block * CALL STRSM( 'L', 'U', 'N', 'N', BN+BNN, NRHS, ONE, $ AF( BBPTR+BW*LDBB ), LDBB, WORK( NB+BWU+1 ), $ LDW ) * * Send new solution to neighbor * CALL SGESD2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ NEICOL ) * * Shift solutions into expected positions * CALL SLACPY( 'G', BNN+BN-BW, NRHS, WORK( NB+BWU+1+BW ), LDW, $ WORK( LN+1 ), LDW ) * * IF( ( NB+BWU+1 ).NE.( LN+1+BW ) ) THEN * * Copy one row at a time since spaces may overlap * DO 70 J = 1, BW CALL SCOPY( NRHS, WORK( NB+BWU+J ), LDW, $ WORK( LN+BW+J ), LDW ) 70 CONTINUE * END IF * END IF * END IF * GO TO 60 * 80 CONTINUE * End of loop over levels * ******************************************************************** * PHASE 1: (Almost) Local computation phase -- Solve U*Y = X ******************************************************************** * * Reset BM to value it had before reduced system frontsolve... * IF( MYCOL.NE.NPCOL-1 ) THEN BM = BW - LBWU ELSE BM = MIN( BW, ODD_SIZE ) + BWU END IF * * First metastep is to account for the fillin blocks AF * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL SGESD2D( ICTXT, BW, NRHS, WORK( NB-BW+1 ), LDW, 0, $ MYCOL+1 ) * END IF * IF( MYCOL.GT.0 ) THEN * CALL SGERV2D( ICTXT, BW, NRHS, WORK( NB+BWU+1 ), LDW, 0, $ MYCOL-1 ) * * Modify local right hand sides with received rhs's * CALL SGEMM( 'T', 'N', LM-BM, NRHS, BW, -ONE, AF( 1 ), BW, $ WORK( NB+BWU+1 ), LDW, ONE, WORK( 1 ), LDW ) * END IF * DO 90 J = LN, 1, -1 * LMJ = MIN( BW, ODD_SIZE-1 ) * LPTR = BW - 1 + J*LLDA + APTR * * In the following, the TRANS=T option is used to reverse * the order of multiplication, not as a true transpose * CALL SGEMV( 'T', LMJ, NRHS, -ONE, WORK( J+1 ), LDW, A( LPTR ), $ LLDA-1, ONE, WORK( J ), LDW ) * * Divide by diagonal element * CALL SSCAL( NRHS, ONE / A( LPTR-LLDA+1 ), WORK( J ), LDW ) 90 CONTINUE * * * CALL SLACPY( 'G', ODD_SIZE, NRHS, WORK( 1 ), LDW, B( 1 ), LLDB ) * * Free BLACS space used to hold standard-form grid. * ICTXT = ICTXT_SAVE IF( ICTXT.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 100 CONTINUE * * Restore saved input parameters * NP = NP_SAVE * * Output worksize * WORK( 1 ) = WORK_SIZE_MIN * RETURN * * End of PSGBTRS * END scalapack-1.8.0/SRC/pclansy.f0000640000175000017500000007025710363532303015576 0ustar muammarmuammar REAL FUNCTION PCLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL WORK( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCLANSY returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * real symmetric distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PCLANSY returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PCLANSY as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PCLANSY is set to zero. N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the symmetric distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLASSQ, PSCOL2ROW, $ PSTREECOMB, SAXPY, SCOMBSSQ, $ SGAMX2D, SGSUM2D, SGEBR2D, SGEBS2D * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL ICEIL, ISAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is symmetric, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 30 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 70 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 100 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 140 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * symmetric). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 210 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 250 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 280 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 320 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to SGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PSCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL SAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( ISAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL CLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL CLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PCLANSY = VALUE * RETURN * * End of PCLANSY * END scalapack-1.8.0/SRC/pzstein.f0000640000175000017500000006042310602576752015627 0ustar muammarmuammar SUBROUTINE PZSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N DOUBLE PRECISION ORFAC * .. * .. Array Arguments .. INTEGER DESCZ( * ), IBLOCK( * ), ICLUSTR( * ), $ IFAIL( * ), ISPLIT( * ), IWORK( * ) DOUBLE PRECISION D( * ), E( * ), GAP( * ), W( * ), WORK( * ) COMPLEX*16 Z( * ) * .. * * Purpose * ======= * * PZSTEIN computes the eigenvectors of a symmetric tridiagonal matrix * in parallel, using inverse iteration. The eigenvectors found * correspond to user specified eigenvalues. PZSTEIN does not * orthogonalize vectors that are on different processes. The extent * of orthogonalization is controlled by the input parameter LWORK. * Eigenvectors that are to be orthogonalized are computed by the same * process. PZSTEIN decides on the allocation of work among the * processes and then calls DSTEIN2 (modified LAPACK routine) on each * individual process. If insufficient workspace is allocated, the * expected orthogonalization may not be done. * * Note : If the eigenvectors obtained are not orthogonal, increase * LWORK and run the code again. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension r x c. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the r processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the c processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * P = NPROW * NPCOL is the total number of processes * * N (global input) INTEGER * The order of the tridiagonal matrix T. N >= 0. * * D (global input) DOUBLE PRECISION array, dimension (N) * The n diagonal elements of the tridiagonal matrix T. * * E (global input) DOUBLE PRECISION array, dimension (N-1) * The (n-1) off-diagonal elements of the tridiagonal matrix T. * * M (global input) INTEGER * The total number of eigenvectors to be found. 0 <= M <= N. * * W (global input/global output) DOUBLE PRECISION array, dim (M) * On input, the first M elements of W contain all the * eigenvalues for which eigenvectors are to be computed. The * eigenvalues should be grouped by split-off block and ordered * from smallest to largest within the block (The output array * W from PDSTEBZ with ORDER='b' is expected here). This * array should be replicated on all processes. * On output, the first M elements contain the input * eigenvalues in ascending order. * * Note : To obtain orthogonal vectors, it is best if * eigenvalues are computed to highest accuracy ( this can be * done by setting ABSTOL to the underflow threshold = * DLAMCH('U') --- ABSTOL is an input parameter * to PDSTEBZ ) * * IBLOCK (global input) INTEGER array, dimension (N) * The submatrix indices associated with the corresponding * eigenvalues in W -- 1 for eigenvalues belonging to the * first submatrix from the top, 2 for those belonging to * the second submatrix, etc. (The output array IBLOCK * from PDSTEBZ is expected here). * * ISPLIT (global input) INTEGER array, dimension (N) * The splitting points, at which T breaks up into submatrices. * The first submatrix consists of rows/columns 1 to ISPLIT(1), * the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), * etc., and the NSPLIT-th consists of rows/columns * ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N (The output array * ISPLIT from PDSTEBZ is expected here.) * * ORFAC (global input) DOUBLE PRECISION * ORFAC specifies which eigenvectors should be orthogonalized. * Eigenvectors that correspond to eigenvalues which are within * ORFAC*||T|| of each other are to be orthogonalized. * However, if the workspace is insufficient (see LWORK), this * tolerance may be decreased until all eigenvectors to be * orthogonalized can be stored in one process. * No orthogonalization will be done if ORFAC equals zero. * A default value of 10^-3 is used if ORFAC is negative. * ORFAC should be identical on all processes. * * Z (local output) COMPLEX*16 array, * dimension (DESCZ(DLEN_), N/npcol + NB) * Z contains the computed eigenvectors associated with the * specified eigenvalues. Any vector which fails to converge is * set to its current iterate after MAXITS iterations ( See * DSTEIN2 ). * On output, Z is distributed across the P processes in block * cyclic format. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local workspace/global output) DOUBLE PRECISION array, * dimension ( LWORK ) * On output, WORK(1) gives a lower bound on the * workspace ( LWORK ) that guarantees the user desired * orthogonalization (see ORFAC). * Note that this may overestimate the minimum workspace needed. * * LWORK (local input) integer * LWORK controls the extent of orthogonalization which can be * done. The number of eigenvectors for which storage is * allocated on each process is * NVEC = floor(( LWORK- max(5*N,NP00*MQ00) )/N). * Eigenvectors corresponding to eigenvalue clusters of size * NVEC - ceil(M/P) + 1 are guaranteed to be orthogonal ( the * orthogonality is similar to that obtained from ZSTEIN2). * Note : LWORK must be no smaller than: * max(5*N,NP00*MQ00) + ceil(M/P)*N, * and should have the same input value on all processes. * It is the minimum value of LWORK input on different processes * that is significant. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/global output) INTEGER array, * dimension ( 3*N+P+1 ) * On return, IWORK(1) contains the amount of integer workspace * required. * On return, the IWORK(2) through IWORK(P+2) indicate * the eigenvectors computed by each process. Process I computes * eigenvectors indexed IWORK(I+2)+1 thru' IWORK(I+3). * * LIWORK (local input) INTEGER * Size of array IWORK. Must be >= 3*N + P + 1 * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IFAIL (global output) integer array, dimension (M) * On normal exit, all elements of IFAIL are zero. * If one or more eigenvectors fail to converge after MAXITS * iterations (as in ZSTEIN), then INFO > 0 is returned. * If mod(INFO,M+1)>0, then * for I=1 to mod(INFO,M+1), the eigenvector * corresponding to the eigenvalue W(IFAIL(I)) failed to * converge ( W refers to the array of eigenvalues on output ). * * ICLUSTR (global output) integer array, dimension (2*P) * This output array contains indices of eigenvectors * corresponding to a cluster of eigenvalues that could not be * orthogonalized due to insufficient workspace (see LWORK, * ORFAC and INFO). Eigenvectors corresponding to clusters of * eigenvalues indexed ICLUSTR(2*I-1) to ICLUSTR(2*I), I = 1 to * INFO/(M+1), could not be orthogonalized due to lack of * workspace. Hence the eigenvectors corresponding to these * clusters may not be orthogonal. ICLUSTR is a zero terminated * array --- ( ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0 ) * if and only if K is the number of clusters. * * GAP (global output) DOUBLE PRECISION array, dimension (P) * This output array contains the gap between eigenvalues whose * eigenvectors could not be orthogonalized. The INFO/M output * values in this array correspond to the INFO/(M+1) clusters * indicated by the array ICLUSTR. As a result, the dot product * between eigenvectors corresponding to the I^th cluster may be * as high as ( O(n)*macheps ) / GAP(I). * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * < 0 : if INFO = -I, the I-th argument had an illegal value * > 0 : if mod(INFO,M+1) = I, then I eigenvectors failed to * converge in MAXITS iterations. Their indices are * stored in the array IFAIL. * if INFO/(M+1) = I, then eigenvectors corresponding to * I clusters of eigenvalues could not be orthogonalized * due to insufficient workspace. The indices of the * clusters are stored in the array ICLUSTR. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD * .. * .. External Functions .. INTEGER ICEIL, NUMROC EXTERNAL ICEIL, NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D, $ DLASRT2, DSTEIN2, IGAMN2D, IGEBR2D, IGEBS2D, $ PCHK1MAT, PXERBLA, PZLAEVSWP * .. * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, NEGONE, ODM1, FIVE, ODM3, ODM18 PARAMETER ( ZERO = 0.0D+0, NEGONE = -1.0D+0, $ ODM1 = 1.0D-1, FIVE = 5.0D+0, ODM3 = 1.0D-3, $ ODM18 = 1.0D-18 ) * .. * .. Local Scalars .. LOGICAL LQUERY, SORTED INTEGER B1, BN, BNDRY, CLSIZ, COL, I, IFIRST, IINFO, $ ILAST, IM, INDRW, ITMP, J, K, LGCLSIZ, LLWORK, $ LOAD, LOCINFO, MAXVEC, MQ00, MYCOL, MYROW, $ NBLK, NERR, NEXT, NP00, NPCOL, NPROW, NVS, $ OLNBLK, P, ROW, SELF, TILL, TOTERR DOUBLE PRECISION DIFF, MINGAP, ONENRM, ORGFAC, ORTOL, TMPFAC * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 1200+CTXT_ ) ELSE * * Make sure that NPROW>0 and NPCOL>0 before calling NUMROC * CALL CHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, INFO ) IF( INFO.EQ.0 ) THEN * * Now we know that our context is good enough to * perform the rest of the checks * NP00 = NUMROC( N, DESCZ( MB_ ), 0, 0, NPROW ) MQ00 = NUMROC( M, DESCZ( NB_ ), 0, 0, NPCOL ) P = NPROW*NPCOL * * Compute the maximum number of vectors per process * LLWORK = LWORK CALL IGAMN2D( DESCZ( CTXT_ ), 'A', ' ', 1, 1, LLWORK, 1, 1, $ 1, -1, -1, -1 ) INDRW = MAX( 5*N, NP00*MQ00 ) IF( N.NE.0 ) $ MAXVEC = ( LLWORK-INDRW ) / N LOAD = ICEIL( M, P ) IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN TMPFAC = ORFAC CALL DGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1 ) ELSE CALL DGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC, $ 1, 0, 0 ) END IF * LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) IF( M.LT.0 .OR. M.GT.N ) THEN INFO = -4 ELSE IF( MAXVEC.LT.LOAD .AND. .NOT.LQUERY ) THEN INFO = -14 ELSE IF( LIWORK.LT.3*N+P+1 .AND. .NOT.LQUERY ) THEN INFO = -16 ELSE DO 10 I = 2, M IF( IBLOCK( I ).LT.IBLOCK( I-1 ) ) THEN INFO = -6 GO TO 20 END IF IF( IBLOCK( I ).EQ.IBLOCK( I-1 ) .AND. W( I ).LT. $ W( I-1 ) ) THEN INFO = -5 GO TO 20 END IF 10 CONTINUE 20 CONTINUE IF( INFO.EQ.0 ) THEN IF( ABS( TMPFAC-ORFAC ).GT.FIVE*ABS( TMPFAC ) ) $ INFO = -8 END IF END IF * END IF IDUM1( 1 ) = M IDUM2( 1 ) = 4 CALL PCHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, 1, IDUM1, IDUM2, $ INFO ) WORK( 1 ) = DBLE( MAX( 5*N, NP00*MQ00 )+ICEIL( M, P )*N ) IWORK( 1 ) = 3*N + P + 1 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PZSTEIN', -INFO ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN RETURN END IF * DO 30 I = 1, M IFAIL( I ) = 0 30 CONTINUE DO 40 I = 1, P + 1 IWORK( I ) = 0 40 CONTINUE DO 50 I = 1, P GAP( I ) = NEGONE ICLUSTR( 2*I-1 ) = 0 ICLUSTR( 2*I ) = 0 50 CONTINUE * * * Quick return if possible * IF( N.EQ.0 .OR. M.EQ.0 ) $ RETURN * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC ELSE TMPFAC = ODM3 END IF ORGFAC = TMPFAC * * Allocate the work among the processes * ILAST = M / LOAD IF( MOD( M, LOAD ).EQ.0 ) $ ILAST = ILAST - 1 OLNBLK = -1 NVS = 0 NEXT = 1 IM = 0 ONENRM = ZERO DO 100 I = 0, ILAST - 1 NEXT = NEXT + LOAD J = NEXT - 1 IF( J.GT.NVS ) THEN NBLK = IBLOCK( NEXT ) IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN * * Compute orthogonalization criterion * IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 60 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 60 CONTINUE OLNBLK = NBLK END IF TILL = NVS + MAXVEC 70 CONTINUE J = NEXT - 1 IF( TMPFAC.GT.ODM18 ) THEN ORTOL = TMPFAC*ONENRM DO 80 J = NEXT - 1, MIN( TILL, M-1 ) IF( IBLOCK( J+1 ).NE.IBLOCK( J ) .OR. W( J+1 )- $ W( J ).GE.ORTOL ) THEN GO TO 90 END IF 80 CONTINUE IF( J.EQ.M .AND. TILL.GE.M ) $ GO TO 90 TMPFAC = TMPFAC*ODM1 GO TO 70 END IF 90 CONTINUE J = MIN( J, TILL ) END IF IF( SELF.EQ.I ) $ IM = MAX( 0, J-NVS ) * IWORK( I+1 ) = NVS NVS = MAX( J, NVS ) 100 CONTINUE IF( SELF.EQ.ILAST ) $ IM = M - NVS IWORK( ILAST+1 ) = NVS DO 110 I = ILAST + 2, P + 1 IWORK( I ) = M 110 CONTINUE * CLSIZ = 1 LGCLSIZ = 1 ILAST = 0 NBLK = 0 BNDRY = 2 K = 1 DO 140 I = 1, M IF( IBLOCK( I ).NE.NBLK ) THEN NBLK = IBLOCK( I ) IF( NBLK.EQ.1 ) THEN B1 = 1 ELSE B1 = ISPLIT( NBLK-1 ) + 1 END IF BN = ISPLIT( NBLK ) * ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) DO 120 J = B1 + 1, BN - 1 ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+ $ ABS( E( J ) ) ) 120 CONTINUE * END IF IF( I.GT.1 ) THEN DIFF = W( I ) - W( I-1 ) IF( IBLOCK( I ).NE.IBLOCK( I-1 ) .OR. I.EQ.M .OR. DIFF.GT. $ ORGFAC*ONENRM ) THEN IFIRST = ILAST IF( I.EQ.M ) THEN IF( IBLOCK( M ).NE.IBLOCK( M-1 ) .OR. DIFF.GT.ORGFAC* $ ONENRM ) THEN ILAST = M - 1 ELSE ILAST = M END IF ELSE ILAST = I - 1 END IF CLSIZ = ILAST - IFIRST IF( CLSIZ.GT.1 ) THEN IF( LGCLSIZ.LT.CLSIZ ) $ LGCLSIZ = CLSIZ MINGAP = ONENRM 130 CONTINUE IF( BNDRY.GT.P+1 ) $ GO TO 150 IF( IWORK( BNDRY ).GT.IFIRST .AND. IWORK( BNDRY ).LT. $ ILAST ) THEN MINGAP = MIN( W( IWORK( BNDRY )+1 )- $ W( IWORK( BNDRY ) ), MINGAP ) ELSE IF( IWORK( BNDRY ).GE.ILAST ) THEN IF( MINGAP.LT.ONENRM ) THEN ICLUSTR( 2*K-1 ) = IFIRST + 1 ICLUSTR( 2*K ) = ILAST GAP( K ) = MINGAP / ONENRM K = K + 1 END IF GO TO 140 END IF BNDRY = BNDRY + 1 GO TO 130 END IF END IF END IF 140 CONTINUE 150 CONTINUE INFO = ( K-1 )*( M+1 ) * * Call DSTEIN2 to find the eigenvectors * CALL DSTEIN2( N, D, E, IM, W( IWORK( SELF+1 )+1 ), $ IBLOCK( IWORK( SELF+1 )+1 ), ISPLIT, ORGFAC, $ WORK( INDRW+1 ), N, WORK, IWORK( P+2 ), $ IFAIL( IWORK( SELF+1 )+1 ), LOCINFO ) * * Redistribute the eigenvector matrix to conform with the block * cyclic distribution of the input matrix * * DO 160 I = 1, M IWORK( P+1+I ) = I 160 CONTINUE * CALL DLASRT2( 'I', M, W, IWORK( P+2 ), IINFO ) * DO 170 I = 1, M IWORK( M+P+1+IWORK( P+1+I ) ) = I 170 CONTINUE * * DO 180 I = 1, LOCINFO ITMP = IWORK( SELF+1 ) + I IFAIL( ITMP ) = IFAIL( ITMP ) + ITMP - I IFAIL( ITMP ) = IWORK( M+P+1+IFAIL( ITMP ) ) 180 CONTINUE * DO 190 I = 1, K - 1 ICLUSTR( 2*I-1 ) = IWORK( M+P+1+ICLUSTR( 2*I-1 ) ) ICLUSTR( 2*I ) = IWORK( M+P+1+ICLUSTR( 2*I ) ) 190 CONTINUE * * * Still need to apply the above permutation to IFAIL * * TOTERR = 0 DO 210 I = 1, P IF( SELF.EQ.I-1 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, LOCINFO, 1 ) IF( LOCINFO.NE.0 ) THEN CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', LOCINFO, 1, $ IFAIL( IWORK( I )+1 ), LOCINFO ) DO 200 J = 1, LOCINFO IFAIL( TOTERR+J ) = IFAIL( IWORK( I )+J ) 200 CONTINUE TOTERR = TOTERR + LOCINFO END IF ELSE * ROW = ( I-1 ) / NPCOL COL = MOD( I-1, NPCOL ) * CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, NERR, 1, $ ROW, COL ) IF( NERR.NE.0 ) THEN CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', NERR, 1, $ IFAIL( TOTERR+1 ), NERR, ROW, COL ) TOTERR = TOTERR + NERR END IF END IF 210 CONTINUE INFO = INFO + TOTERR * * CALL PZLAEVSWP( N, WORK( INDRW+1 ), N, Z, IZ, JZ, DESCZ, IWORK, $ IWORK( M+P+2 ), WORK, INDRW ) * DO 220 I = 2, P IWORK( I ) = IWORK( M+P+1+IWORK( I ) ) 220 CONTINUE * * * Sort the IWORK array * * 230 CONTINUE SORTED = .TRUE. DO 240 I = 2, P - 1 IF( IWORK( I ).GT.IWORK( I+1 ) ) THEN ITMP = IWORK( I+1 ) IWORK( I+1 ) = IWORK( I ) IWORK( I ) = ITMP SORTED = .FALSE. END IF 240 CONTINUE IF( .NOT.SORTED ) $ GO TO 230 * DO 250 I = P + 1, 1, -1 IWORK( I+1 ) = IWORK( I ) 250 CONTINUE * WORK( 1 ) = ( LGCLSIZ+LOAD-1 )*N + INDRW IWORK( 1 ) = 3*N + P + 1 * * End of PZSTEIN * END scalapack-1.8.0/SRC/pcgels.f0000640000175000017500000005472510363532303015404 0ustar muammarmuammar SUBROUTINE PCGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), WORK( * ) * .. * * Purpose * ======= * * PCGELS solves overdetermined or underdetermined complex linear * systems involving an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1), * or its conjugate-transpose, using a QR or LQ factorization of * sub( A ). It is assumed that sub( A ) has full rank. * * The following options are provided: * * 1. If TRANS = 'N' and m >= n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )*X ||. * * 2. If TRANS = 'N' and m < n: find the minimum norm solution of * an underdetermined system sub( A ) * X = sub( B ). * * 3. If TRANS = 'C' and m >= n: find the minimum norm solution of * an undetermined system sub( A )**H * X = sub( B ). * * 4. If TRANS = 'C' and m < n: find the least squares solution of * an overdetermined system, i.e., solve the least squares problem * minimize || sub( B ) - sub( A )**H * X ||. * * where sub( B ) denotes B( IB:IB+M-1, JB:JB+NRHS-1 ) when TRANS = 'N' * and B( IB:IB+N-1, JB:JB+NRHS-1 ) otherwise. Several right hand side * vectors b and solution vectors x can be handled in a single call; * When TRANS = 'N', the solution vectors are stored as the columns of * the N-by-NRHS right hand side matrix sub( B ) and the M-by-NRHS * right hand side matrix sub( B ) otherwise. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * TRANS (global input) CHARACTER * = 'N': the linear system involves sub( A ); * = 'C': the linear system involves sub( A )**H. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e. the number of columns * of the distributed submatrices sub( B ) and X. NRHS >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). On entry, the M-by-N matrix A. * if M >= N, sub( A ) is overwritten by details of its QR * factorization as returned by PCGEQRF; * if M < N, sub( A ) is overwritten by details of its LQ * factorization as returned by PCGELQF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of local dimension * (LLD_B, LOCc(JB+NRHS-1)). On entry, this array contains the * local pieces of the distributed matrix B of right hand side * vectors, stored columnwise; * sub( B ) is M-by-NRHS if TRANS='N', and N-by-NRHS otherwise. * On exit, sub( B ) is overwritten by the solution vectors, * stored columnwise: if TRANS = 'N' and M >= N, rows 1 to N * of sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements N+1 to M in that * column; if TRANS = 'N' and M < N, rows 1 to N of sub( B ) * contain the minimum norm solution vectors; if TRANS = 'C' * and M >= N, rows 1 to M of sub( B ) contain the minimum norm * solution vectors; if TRANS = 'C' and M < N, rows 1 to M of * sub( B ) contain the least squares solution vectors; the * residual sum of squares for the solution in each column is * given by the sum of squares of elements M+1 to N in that * column. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= LTAU + MAX( LWF, LWS ) where * If M >= N, then * LTAU = NUMROC( JA+MIN(M,N)-1, NB_A, MYCOL, CSRC_A, NPCOL ), * LWF = NB_A * ( MpA0 + NqA0 + NB_A ) * LWS = MAX( (NB_A*(NB_A-1))/2, (NRHSqB0 + MpB0)*NB_A ) + * NB_A * NB_A * Else * LTAU = NUMROC( IA+MIN(M,N)-1, MB_A, MYROW, RSRC_A, NPROW ), * LWF = MB_A * ( MpA0 + NqA0 + MB_A ) * LWS = MAX( (MB_A*(MB_A-1))/2, ( NpB0 + MAX( NqA0 + * NUMROC( NUMROC( N+IROFFB, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NRHSqB0 ) )*MB_A ) + * MB_A * MB_A * End if * * where LCMP = LCM / NPROW with LCM = ILCM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * MpB0 = NUMROC( M+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NpB0 = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NRHSqB0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY, TPSD INTEGER BROW, IACOL, IAROW, IASCL, IBCOL, IBROW, IBSCL, $ ICOFFA, ICOFFB, ICTXT, IPW, IROFFA, IROFFB, $ LCM, LCMP, LTAU, LWF, LWMIN, LWS, MPA0, MPB0, $ MYCOL, MYROW, NPB0, NPCOL, NPROW, NQA0, $ NRHSQB0, SCLLEN REAL ANRM, BIGNUM, BNRM, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) REAL RWORK( 1 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM INTEGER INDXG2P, NUMROC REAL PCLANGE, PSLAMCH EXTERNAL ILCM, INDXG2P, LSAME, NUMROC, PCLANGE, $ PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCGELQF, $ PCGEQRF, PSLABAD, PCLASCL, PCLASET, $ PCTRSM, PCUNMLQ, PCUNMQR, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 800 + CTXT_ ) ELSE CALL CHK1MAT( M, 2, N, 3, IA, JA, DESCA, 8, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( IA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) * IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( IB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) NRHSQB0 = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, $ NPCOL ) IF( M.GE.N ) THEN MPB0 = NUMROC( M+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( JA+MIN(M,N)-1, DESCA( NB_ ), MYCOL, $ DESCA( CSRC_ ), NPCOL ) LWF = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) LWS = MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPB0 + NRHSQB0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ )*DESCA( NB_ ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, $ NPROW ) LTAU = NUMROC( IA+MIN(M,N)-1, DESCA( MB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) LWF = DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ) LWS = MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( NPB0 + MAX( NQA0 + NUMROC( NUMROC( N+IROFFB, $ DESCA( MB_ ), 0, 0, NPROW ), DESCA( MB_ ), 0, 0, $ LCMP ), NRHSQB0 ) )*DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF LWMIN = LTAU + MAX( LWF, LWS ) WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) * TPSD = .TRUE. IF( LSAME( TRANS, 'N' ) ) $ TPSD = .FALSE. * IF( .NOT.( LSAME( TRANS, 'N' ) .OR. $ LSAME( TRANS, 'C' ) ) ) THEN INFO = -1 ELSE IF( M.LT.0 ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( M.GE.N .AND. IROFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. IAROW.NE.IBROW ) THEN INFO = -10 ELSE IF( M.LT.N .AND. ICOFFA.NE.IROFFB ) THEN INFO = -10 ELSE IF( M.GE.N .AND. DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( M.LT.N .AND. DESCA( NB_ ).NE.DESCB( MB_ ) ) THEN INFO = -( 1200 + MB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1200 + CTXT_ ) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF END IF * IF( .NOT.TPSD ) THEN IDUM1( 1 ) = ICHAR( 'N' ) ELSE IDUM1( 1 ) = ICHAR( 'C' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 14 CALL PCHK2MAT( M, 2, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, 4, $ IB, JB, DESCB, 12, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGELS', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PCLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, $ IB, JB, DESCB ) RETURN END IF * * Get machine parameters * SMLNUM = PSLAMCH( ICTXT, 'S' ) SMLNUM = SMLNUM / PSLAMCH( ICTXT, 'P' ) BIGNUM = ONE / SMLNUM CALL PSLABAD( ICTXT, SMLNUM, BIGNUM ) * * Scale A, B if max entry outside range [SMLNUM,BIGNUM] * ANRM = PCLANGE( 'M', M, N, A, IA, JA, DESCA, RWORK ) IASCL = 0 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PCLASCL( 'G', ANRM, SMLNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 1 ELSE IF( ANRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PCLASCL( 'G', ANRM, BIGNUM, M, N, A, IA, JA, DESCA, $ INFO ) IASCL = 2 ELSE IF( ANRM.EQ.ZERO ) THEN * * Matrix all zero. Return zero solution. * CALL PCLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, IB, $ JB, DESCB ) GO TO 10 END IF * BROW = M IF( TPSD ) $ BROW = N * BNRM = PCLANGE( 'M', BROW, NRHS, B, IB, JB, DESCB, RWORK ) * IBSCL = 0 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN * * Scale matrix norm up to SMLNUM * CALL PCLASCL( 'G', BNRM, SMLNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 1 ELSE IF( BNRM.GT.BIGNUM ) THEN * * Scale matrix norm down to BIGNUM * CALL PCLASCL( 'G', BNRM, BIGNUM, BROW, NRHS, B, IB, JB, $ DESCB, INFO ) IBSCL = 2 END IF * IPW = LTAU + 1 * IF( M.GE.N ) THEN * * compute QR factorization of A * CALL PCGEQRF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least N, optimally N*NB * IF( .NOT.TPSD ) THEN * * Least-Squares Problem min || A * X - B || * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q' * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PCUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, $ IA, JA, DESCA, WORK, B, IB, JB, DESCB, $ WORK( IPW ), LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+N-1,JB:JB+NRHS-1) := inv(R) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PCTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, CONE, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * SCLLEN = N * ELSE * * Overdetermined system of equations sub( A )' * X = sub( B ) * * sub( B ) := inv(R') * sub( B ) * CALL PCTRSM( 'Left', 'Upper', 'Conjugate transpose', $ 'Non-unit', N, NRHS, CONE, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) * * B(IB+N:IB+M-1,JB:JB+NRHS-1) = ZERO * CALL PCLASET( 'All', M-N, NRHS, CZERO, CZERO, B, IB+N, JB, $ DESCB ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := Q(1:N,:) * * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PCUNMQR( 'Left', 'No transpose', M, NRHS, N, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = M * END IF * ELSE * * Compute LQ factorization of sub( A ) * CALL PCGELQF( M, N, A, IA, JA, DESCA, WORK, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least M, optimally M*NB. * IF( .NOT.TPSD ) THEN * * underdetermined system of equations sub( A ) * X = sub( B ) * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L) * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PCTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, $ NRHS, CONE, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * B(IB+M:IB+N-1,JB:JB+NRHS-1) = 0 * CALL PCLASET( 'All', N-M, NRHS, CZERO, CZERO, B, IB+M, JB, $ DESCB ) * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q(1:N,:)' * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PCUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, $ IA, JA, DESCA, WORK, B, IB, JB, DESCB, $ WORK( IPW ), LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * SCLLEN = N * ELSE * * overdetermined system min || A' * X - B || * * B(IB:IB+N-1,JB:JB+NRHS-1) := Q * B(IB:IB+N-1,JB:JB+NRHS-1) * CALL PCUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, IA, JA, $ DESCA, WORK, B, IB, JB, DESCB, WORK( IPW ), $ LWORK-LTAU, INFO ) * * workspace at least NRHS, optimally NRHS*NB * * B(IB:IB+M-1,JB:JB+NRHS-1) := inv(L') * * B(IB:IB+M-1,JB:JB+NRHS-1) * CALL PCTRSM( 'Left', 'Lower', 'Conjugate transpose', $ 'Non-unit', M, NRHS, CONE, A, IA, JA, DESCA, $ B, IB, JB, DESCB ) * SCLLEN = M * END IF * END IF * * Undo scaling * IF( IASCL.EQ.1 ) THEN CALL PCLASCL( 'G', ANRM, SMLNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IASCL.EQ.2 ) THEN CALL PCLASCL( 'G', ANRM, BIGNUM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF IF( IBSCL.EQ.1 ) THEN CALL PCLASCL( 'G', SMLNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) ELSE IF( IBSCL.EQ.2 ) THEN CALL PCLASCL( 'G', BIGNUM, BNRM, SCLLEN, NRHS, B, IB, JB, $ DESCB, INFO ) END IF * 10 CONTINUE * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGELS * END scalapack-1.8.0/SRC/pcgetrf.f0000640000175000017500000002644610363532303015560 0ustar muammarmuammar SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), IPIV( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCGETRF computes an LU factorization of a general M-by-N distributed * matrix sub( A ) = (IA:IA+M-1,JA:JA+N-1) using partial pivoting with * row interchanges. * * The factorization has the form sub( A ) = P * L * U, where P is a * permutation matrix, L is lower triangular with unit diagonal ele- * ments (lower trapezoidal if m > n), and U is upper triangular * (upper trapezoidal if m < n). L and U are stored in sub( A ). * * This is the right-looking Parallel Level 3 BLAS version of the * algorithm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the M-by-N * distributed matrix sub( A ) to be factored. On exit, this * array contains the local pieces of the factors L and U from * the factorization sub( A ) = P*L*U; the unit diagonal ele- * ments of L are not stored. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, U(IA+K-1,JA+K-1) is exactly zero. * The factorization has been completed, but the factor U * is exactly singular, and division by zero will occur if * it is used to solve a system of equations. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. CHARACTER COLBTOP, COLCTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT, $ PB_TOPGET, PB_TOPSET, PCGEMM, PCGETF2, $ PCLASWP, PCTRSM, PXERBLA * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUM1, $ IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGETRF', -INFO ) RETURN END IF * * Quick return if possible * IF( DESCA( M_ ).EQ.1 ) THEN IPIV( 1 ) = 1 RETURN ELSE IF( M.EQ.0 .OR. N.EQ.0 ) THEN RETURN END IF * * Split-ring topology for the communication along process rows * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', ' ' ) * * Handle the first block of columns separately * MN = MIN( M, N ) IN = MIN( ICEIL( IA, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PCGETF2( M, JB, A, IA, JA, DESCA, IPIV, INFO ) * IF( JB+1.LE.N ) THEN * * Apply interchanges to columns JN+1:JA+N-1. * CALL PCLASWP( 'Forward', 'Rows', N-JB, A, IA, JN+1, DESCA, $ IA, IN, IPIV ) * * Compute block row of U. * CALL PCTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-JB, ONE, A, IA, JA, DESCA, A, IA, JN+1, DESCA ) * IF( JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PCGEMM( 'No transpose', 'No transpose', M-JB, N-JB, JB, $ -ONE, A, IN+1, JA, DESCA, A, IA, JN+1, DESCA, $ ONE, A, IN+1, JN+1, DESCA ) * END IF END IF * * Loop over the remaining blocks of columns. * DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * CALL PCGETF2( M-J+JA, JB, A, I, J, DESCA, IPIV, IINFO ) * IF( INFO.EQ.0 .AND. IINFO.GT.0 ) $ INFO = IINFO + J - JA * * Apply interchanges to columns JA:J-JA. * CALL PCLASWP( 'Forward', 'Rowwise', J-JA, A, IA, JA, DESCA, $ I, I+JB-1, IPIV ) * IF( J-JA+JB+1.LE.N ) THEN * * Apply interchanges to columns J+JB:JA+N-1. * CALL PCLASWP( 'Forward', 'Rowwise', N-J-JB+JA, A, IA, J+JB, $ DESCA, I, I+JB-1, IPIV ) * * Compute block row of U. * CALL PCTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, $ N-J-JB+JA, ONE, A, I, J, DESCA, A, I, J+JB, $ DESCA ) * IF( J-JA+JB+1.LE.M ) THEN * * Update trailing submatrix. * CALL PCGEMM( 'No transpose', 'No transpose', M-J-JB+JA, $ N-J-JB+JA, JB, -ONE, A, I+JB, J, DESCA, A, $ I, J+JB, DESCA, ONE, A, I+JB, J+JB, DESCA ) * END IF END IF * 10 CONTINUE * IF( INFO.EQ.0 ) $ INFO = MN + 1 CALL IGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, INFO, 1, IDUM1, IDUM2, $ -1, -1, MYCOL ) IF( INFO.EQ.MN+1 ) $ INFO = 0 * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * RETURN * * End of PCGETRF * END scalapack-1.8.0/SRC/pzlarz.f0000640000175000017500000010212010363532303015430 0ustar muammarmuammar SUBROUTINE PZLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARZ applies a complex elementary reflector Q to a complex M-by-N * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the * left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PZTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST COMPLEX*16 TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBZTRNV, $ ZAXPY, ZCOPY, ZGEBR2D, ZGEBS2D, $ ZGEMV, ZGERC, ZGERV2D, ZGESD2D, $ ZGSUM2D, ZLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, V( IOFFV ), $ 1, ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MPV+1 CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * IF( MPC2.GT.0 .AND. NQV.GT.0 ) $ CALL ZGERC( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQV+1 CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL ZGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PZLARZ * END scalapack-1.8.0/SRC/pcpotrf.f0000640000175000017500000003164510363532303015600 0ustar muammarmuammar SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ) * .. * * Purpose * ======= * * PCPOTRF computes the Cholesky factorization of an N-by-N complex * hermitian positive definite distributed matrix sub( A ) denoting * A(IA:IA+N-1, JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PCPOTF2, PCHERK, PCTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( UPPER ) THEN * * Split-ring topology for the communication along process * columns, 1-tree topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * * A is upper triangular, compute Cholesky factorization A = U'*U. * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA(NB_), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PCPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PCTRSM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-Unit', JB, N-JB, CONE, A, IA, JA, DESCA, $ A, IA, JA+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PCHERK( UPLO, 'Conjugate transpose', N-JB, JB, -ONE, A, $ IA, JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PCPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PCTRSM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-Unit', JB, N-J-JB+JA, CONE, A, I, J, $ DESCA, A, I, J+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PCHERK( UPLO, 'Conjugate transpose', N-J-JB+JA, JB, $ -ONE, A, I, J+JB, DESCA, ONE, A, I+JB, $ J+JB, DESCA ) END IF 10 CONTINUE * ELSE * * 1-tree topology for the communication along process columns, * Split-ring topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * A is lower triangular, compute Cholesky factorization A = L*L' * (right-looking) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PCPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PCTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-Unit', N-JB, JB, CONE, A, IA, JA, DESCA, $ A, IA+JB, JA, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PCHERK( UPLO, 'No Transpose', N-JB, JB, -ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * END IF * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PCPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PCTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-Unit', N-J-JB+JA, JB, CONE, A, I, J, $ DESCA, A, I+JB, J, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PCHERK( UPLO, 'No Transpose', N-J-JB+JA, JB, -ONE, $ A, I+JB, J, DESCA, ONE, A, I+JB, J+JB, $ DESCA ) * END IF 20 CONTINUE * END IF * 30 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PCPOTRF * END scalapack-1.8.0/SRC/pzlacon.f0000640000175000017500000003237110363532303015566 0ustar muammarmuammar SUBROUTINE PZLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, EST, $ KASE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N DOUBLE PRECISION EST * .. * .. Array Arguments .. INTEGER DESCV( * ), DESCX( * ) COMPLEX*16 V( * ), X( * ) * .. * * Purpose * ======= * * PZLACON estimates the 1-norm of a square, complex distributed matrix * A. Reverse communication is used for evaluating matrix-vector * products. X and V are aligned with the distributed matrix A, this * information is implicitly contained within IV, IX, DESCV, and DESCX. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The length of the distributed vectors V and X. N >= 0. * * V (local workspace) COMPLEX*16 pointer into the local * memory to an array of dimension LOCr(N+MOD(IV-1,MB_V)). On * the final return, V = A*W, where EST = norm(V)/norm(W) * (W is not returned). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * X (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension * LOCr(N+MOD(IX-1,MB_X)). On an intermediate return, X * should be overwritten by * A * X, if KASE=1, * A' * X, if KASE=2, * where A' is the conjugate transpose of A, and PZLACON must * be re-called with all the other parameters unchanged. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * * EST (global output) DOUBLE PRECISION * An estimate (a lower bound) for norm(A). * * KASE (local input/local output) INTEGER * On the initial call to PZLACON, KASE should be 0. * On an intermediate return, KASE will be 1 or 2, indicating * whether X should be overwritten by A * X or A' * X. * On the final return from PZLACON, KASE will again be 0. * * Further Details * =============== * * The serial version ZLACON has been contributed by Nick Higham, * University of Manchester. It was originally named SONEST, dated * March 16, 1988. * * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of * a real or complex matrix, with applications to condition estimation", * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) INTEGER ITMAX PARAMETER ( ITMAX = 5 ) DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER I, ICTXT, IIVX, IMAXROW, IOFFVX, IROFF, ITER, $ IVXCOL, IVXROW, J, JLAST, JJVX, JUMP, K, $ MYCOL, MYROW, NP, NPCOL, NPROW DOUBLE PRECISION ALTSGN, ESTOLD, SAFMIN, TEMP COMPLEX*16 JLMAX, XMAX * .. * .. Local Arrays .. COMPLEX*16 WORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, DGEBR2D, $ DGEBS2D, PDZSUM1, PZELGET, $ PZMAX1, ZCOPY, ZGEBR2D, ZGEBS2D * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, INDXL2G, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL INDXG2L, INDXG2P, INDXL2G, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX * .. * .. Save statement .. SAVE * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) IF( MYCOL.NE.IVXCOL ) $ RETURN IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) $ NP = NP - IROFF IOFFVX = IIVX + (JJVX-1)*DESCX( LLD_ ) * SAFMIN = PDLAMCH( ICTXT, 'Safe minimum' ) IF( KASE.EQ.0 ) THEN DO 10 I = IOFFVX, IOFFVX+NP-1 X( I ) = DCMPLX( ONE / DBLE( N ) ) 10 CONTINUE KASE = 1 JUMP = 1 RETURN END IF * GO TO ( 20, 40, 70, 90, 120 )JUMP * * ................ ENTRY (JUMP = 1) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X * 20 CONTINUE IF( N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN V( IOFFVX ) = X( IOFFVX ) EST = ABS( V( IOFFVX ) ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF * ... QUIT GO TO 130 END IF CALL PDZSUM1( N, EST, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * DO 30 I = IOFFVX, IOFFVX+NP-1 IF( ABS( X( I ) ).GT.SAFMIN ) THEN X( I ) = X( I ) / DCMPLX( ABS( X( I ) ) ) ELSE X( I ) = CONE END IF 30 CONTINUE KASE = 2 JUMP = 2 RETURN * * ................ ENTRY (JUMP = 2) * FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X * 40 CONTINUE CALL PZMAX1( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = DCMPLX( DBLE( J ) ) CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( DBLE( WORK( 2 ) ) ) END IF END IF ITER = 2 * * MAIN LOOP - ITERATIONS 2, 3,...,ITMAX * 50 CONTINUE DO 60 I = IOFFVX, IOFFVX+NP-1 X( I ) = CZERO 60 CONTINUE IMAXROW = INDXG2P( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) IF( MYROW.EQ.IMAXROW ) THEN I = INDXG2L( J, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), NPROW ) X( I ) = CONE END IF KASE = 1 JUMP = 3 RETURN * * ................ ENTRY (JUMP = 3) * X HAS BEEN OVERWRITTEN BY A*X * 70 CONTINUE CALL ZCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) ESTOLD = EST CALL PDZSUM1( N, EST, V, IV, JV, DESCV, 1 ) IF( DESCV( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, EST, 1, $ IVXROW, MYCOL ) END IF END IF * * TEST FOR CYCLING IF( EST.LE.ESTOLD ) $ GO TO 100 * DO 80 I = IOFFVX, IOFFVX+NP-1 IF( ABS( X( I ) ).GT.SAFMIN ) THEN X( I ) = X( I ) / DCMPLX( ABS( X( I ) ) ) ELSE X( I ) = CONE END IF 80 CONTINUE KASE = 2 JUMP = 4 RETURN * * ................ ENTRY (JUMP = 4) * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X * 90 CONTINUE JLAST = J CALL PZMAX1( N, XMAX, J, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN WORK( 1 ) = XMAX WORK( 2 ) = DCMPLX( DBLE( J ) ) CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2 ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 2, 1, WORK, 2, $ IVXROW, MYCOL ) XMAX = WORK( 1 ) J = NINT( DBLE( WORK( 2 ) ) ) END IF END IF CALL PZELGET( 'Columnwise', ' ', JLMAX, X, JLAST, JX, DESCX ) IF( ( DBLE( JLMAX ).NE.ABS( DBLE( XMAX ) ) ).AND. $ ( ITER.LT.ITMAX ) ) THEN ITER = ITER + 1 GO TO 50 END IF * * ITERATION COMPLETE. FINAL STAGE. * 100 CONTINUE DO 110 I = IOFFVX, IOFFVX+NP-1 K = INDXL2G( I-IOFFVX+IIVX, DESCX( MB_ ), MYROW, $ DESCX( RSRC_ ), NPROW )-IX+1 IF( MOD( K, 2 ).EQ.0 ) THEN ALTSGN = -ONE ELSE ALTSGN = ONE END IF X( I ) = DCMPLX( ALTSGN*( ONE+DBLE( K-1 ) / DBLE( N-1 ) ) ) 110 CONTINUE KASE = 1 JUMP = 5 RETURN * * ................ ENTRY (JUMP = 5) * X HAS BEEN OVERWRITTEN BY A*X * 120 CONTINUE CALL PDZSUM1( N, TEMP, X, IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN IF( MYROW.EQ.IVXROW ) THEN CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TEMP, 1, $ IVXROW, MYCOL ) END IF END IF TEMP = TWO*( TEMP / DBLE( 3*N ) ) IF( TEMP.GT.EST ) THEN CALL ZCOPY( NP, X( IOFFVX ), 1, V( IOFFVX ), 1 ) EST = TEMP END IF * 130 CONTINUE KASE = 0 * RETURN * * End of PZLACON * END scalapack-1.8.0/SRC/dsteqr2.f0000640000175000017500000003300710363532303015501 0ustar muammarmuammar SUBROUTINE DSTEQR2( COMPZ, N, D, E, Z, LDZ, NR, WORK, INFO ) * * -- LAPACK routine (version 2.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, LDZ, N, NR * .. * .. Array Arguments .. DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) * .. * * Purpose * ======= * * DSTEQR2 is a modified version of LAPACK routine DSTEQR. * DSTEQR2 computes all eigenvalues and, optionally, eigenvectors of a * symmetric tridiagonal matrix using the implicit QL or QR method. * DSTEQR2 is modified from DSTEQR to allow each ScaLAPACK process * running DSTEQR2 to perform updates on a distributed matrix Q. * Proper usage of DSTEQR2 can be gleaned from examination of ScaLAPACK's * PDSYEV. * * Arguments * ========= * * COMPZ (input) CHARACTER*1 * = 'N': Compute eigenvalues only. * = 'I': Compute eigenvalues and eigenvectors of the * tridiagonal matrix. Z must be initialized to the * identity matrix by PDLASET or DLASET prior to entering * this subroutine. * * N (input) INTEGER * The order of the matrix. N >= 0. * * D (input/output) DOUBLE PRECISION array, dimension (N) * On entry, the diagonal elements of the tridiagonal matrix. * On exit, if INFO = 0, the eigenvalues in ascending order. * * E (input/output) DOUBLE PRECISION array, dimension (N-1) * On entry, the (n-1) subdiagonal elements of the tridiagonal * matrix. * On exit, E has been destroyed. * * Z (local input/local output) DOUBLE PRECISION array, global * dimension (N, N), local dimension (LDZ, NR). * On entry, if COMPZ = 'V', then Z contains the orthogonal * matrix used in the reduction to tridiagonal form. * On exit, if INFO = 0, then if COMPZ = 'V', Z contains the * orthonormal eigenvectors of the original symmetric matrix, * and if COMPZ = 'I', Z contains the orthonormal eigenvectors * of the symmetric tridiagonal matrix. * If COMPZ = 'N', then Z is not referenced. * * LDZ (input) INTEGER * The leading dimension of the array Z. LDZ >= 1, and if * eigenvectors are desired, then LDZ >= max(1,N). * * NR (input) INTEGER * NR = MAX(1, NUMROC( N, NB, MYPROW, 0, NPROCS ) ). * If COMPZ = 'N', then NR is not referenced. * * WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) * If COMPZ = 'N', then WORK is not referenced. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: the algorithm has failed to find all the eigenvalues in * a total of 30*N iterations; if INFO = i, then i * elements of E have not converged to zero; on exit, D * and E contain the elements of a symmetric tridiagonal * matrix which is orthogonally similar to the original * matrix. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ZERO, ONE, TWO, THREE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, $ THREE = 3.0D0 ) INTEGER MAXIT PARAMETER ( MAXIT = 30 ) * .. * .. Local Scalars .. INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, $ NM1, NMAXIT DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 * .. * .. External Subroutines .. EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASR, $ DLASRT, DSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SIGN, SQRT * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 * IF( LSAME( COMPZ, 'N' ) ) THEN ICOMPZ = 0 ELSE IF( LSAME( COMPZ, 'I' ) ) THEN ICOMPZ = 1 ELSE ICOMPZ = -1 END IF IF( ICOMPZ.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, NR ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DSTEQR2', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( N.EQ.1 ) THEN IF( ICOMPZ.EQ.1 ) $ Z( 1, 1 ) = ONE RETURN END IF * * Determine the unit roundoff and over/underflow thresholds. * EPS = DLAMCH( 'E' ) EPS2 = EPS**2 SAFMIN = DLAMCH( 'S' ) SAFMAX = ONE / SAFMIN SSFMAX = SQRT( SAFMAX ) / THREE SSFMIN = SQRT( SAFMIN ) / EPS2 * * Compute the eigenvalues and eigenvectors of the tridiagonal * matrix. * NMAXIT = N*MAXIT JTOT = 0 * * Determine where the matrix splits and choose QL or QR iteration * for each block, according to whether top or bottom diagonal * element is smaller. * L1 = 1 NM1 = N - 1 * 10 CONTINUE IF( L1.GT.N ) $ GO TO 160 IF( L1.GT.1 ) $ E( L1-1 ) = ZERO IF( L1.LE.NM1 ) THEN DO 20 M = L1, NM1 TST = ABS( E( M ) ) IF( TST.EQ.ZERO ) $ GO TO 30 IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ $ 1 ) ) ) )*EPS ) THEN E( M ) = ZERO GO TO 30 END IF 20 CONTINUE END IF M = N * 30 CONTINUE L = L1 LSV = L LEND = M LENDSV = LEND L1 = M + 1 IF( LEND.EQ.L ) $ GO TO 10 * * Scale submatrix in rows and columns L to LEND * ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) ISCALE = 0 IF( ANORM.EQ.ZERO ) $ GO TO 10 IF( ANORM.GT.SSFMAX ) THEN ISCALE = 1 CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, $ INFO ) CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, $ INFO ) ELSE IF( ANORM.LT.SSFMIN ) THEN ISCALE = 2 CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, $ INFO ) CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, $ INFO ) END IF * * Choose between QL and QR iteration * IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN LEND = LSV L = LENDSV END IF * IF( LEND.GT.L ) THEN * * QL Iteration * * Look for small subdiagonal element. * 40 CONTINUE IF( L.NE.LEND ) THEN LENDM1 = LEND - 1 DO 50 M = L, LENDM1 TST = ABS( E( M ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ $ SAFMIN )GO TO 60 50 CONTINUE END IF * M = LEND * 60 CONTINUE IF( M.LT.LEND ) $ E( M ) = ZERO P = D( L ) IF( M.EQ.L ) $ GO TO 80 * * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L+1 ) THEN IF( ICOMPZ.GT.0 ) THEN CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) WORK( L ) = C WORK( N-1+L ) = S CALL DLASR( 'R', 'V', 'B', NR, 2, WORK( L ), $ WORK( N-1+L ), Z( 1, L ), LDZ ) ELSE CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) END IF D( L ) = RT1 D( L+1 ) = RT2 E( L ) = ZERO L = L + 2 IF( L.LE.LEND ) $ GO TO 40 GO TO 140 END IF * IF( JTOT.EQ.NMAXIT ) $ GO TO 140 JTOT = JTOT + 1 * * Form shift. * G = ( D( L+1 )-P ) / ( TWO*E( L ) ) R = DLAPY2( G, ONE ) G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) * S = ONE C = ONE P = ZERO * * Inner loop * MM1 = M - 1 DO 70 I = MM1, L, -1 F = S*E( I ) B = C*E( I ) CALL DLARTG( G, F, C, S, R ) IF( I.NE.M-1 ) $ E( I+1 ) = R G = D( I+1 ) - P R = ( D( I )-G )*S + TWO*C*B P = S*R D( I+1 ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * IF( ICOMPZ.GT.0 ) THEN WORK( I ) = C WORK( N-1+I ) = -S END IF * 70 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * IF( ICOMPZ.GT.0 ) THEN MM = M - L + 1 CALL DLASR( 'R', 'V', 'B', NR, MM, WORK( L ), WORK( N-1+L ), $ Z( 1, L ), LDZ ) END IF * D( L ) = D( L ) - P E( L ) = G GO TO 40 * * Eigenvalue found. * 80 CONTINUE D( L ) = P * L = L + 1 IF( L.LE.LEND ) $ GO TO 40 GO TO 140 * ELSE * * QR Iteration * * Look for small superdiagonal element. * 90 CONTINUE IF( L.NE.LEND ) THEN LENDP1 = LEND + 1 DO 100 M = L, LENDP1, -1 TST = ABS( E( M-1 ) )**2 IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ $ SAFMIN )GO TO 110 100 CONTINUE END IF * M = LEND * 110 CONTINUE IF( M.GT.LEND ) $ E( M-1 ) = ZERO P = D( L ) IF( M.EQ.L ) $ GO TO 130 * * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 * to compute its eigensystem. * IF( M.EQ.L-1 ) THEN IF( ICOMPZ.GT.0 ) THEN CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) WORK( M ) = C WORK( N-1+M ) = S CALL DLASR( 'R', 'V', 'F', NR, 2, WORK( M ), $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) ELSE CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) END IF D( L-1 ) = RT1 D( L ) = RT2 E( L-1 ) = ZERO L = L - 2 IF( L.GE.LEND ) $ GO TO 90 GO TO 140 END IF * IF( JTOT.EQ.NMAXIT ) $ GO TO 140 JTOT = JTOT + 1 * * Form shift. * G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) R = DLAPY2( G, ONE ) G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) * S = ONE C = ONE P = ZERO * * Inner loop * LM1 = L - 1 DO 120 I = M, LM1 F = S*E( I ) B = C*E( I ) CALL DLARTG( G, F, C, S, R ) IF( I.NE.M ) $ E( I-1 ) = R G = D( I ) - P R = ( D( I+1 )-G )*S + TWO*C*B P = S*R D( I ) = G + P G = C*R - B * * If eigenvectors are desired, then save rotations. * IF( ICOMPZ.GT.0 ) THEN WORK( I ) = C WORK( N-1+I ) = S END IF * 120 CONTINUE * * If eigenvectors are desired, then apply saved rotations. * IF( ICOMPZ.GT.0 ) THEN MM = L - M + 1 CALL DLASR( 'R', 'V', 'F', NR, MM, WORK( M ), WORK( N-1+M ), $ Z( 1, M ), LDZ ) END IF * D( L ) = D( L ) - P E( LM1 ) = G GO TO 90 * * Eigenvalue found. * 130 CONTINUE D( L ) = P * L = L - 1 IF( L.GE.LEND ) $ GO TO 90 GO TO 140 * END IF * * Undo scaling if necessary * 140 CONTINUE IF( ISCALE.EQ.1 ) THEN CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) ELSE IF( ISCALE.EQ.2 ) THEN CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, $ D( LSV ), N, INFO ) CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), $ N, INFO ) END IF * * Check for no convergence to an eigenvalue after a total * of N*MAXIT iterations. * IF( JTOT.LT.NMAXIT ) $ GO TO 10 DO 150 I = 1, N - 1 IF( E( I ).NE.ZERO ) $ INFO = INFO + 1 150 CONTINUE GO TO 190 * * Order eigenvalues and eigenvectors. * 160 CONTINUE IF( ICOMPZ.EQ.0 ) THEN * * Use Quick Sort * CALL DLASRT( 'I', N, D, INFO ) * ELSE * * Use Selection Sort to minimize swaps of eigenvectors * DO 180 II = 2, N I = II - 1 K = I P = D( I ) DO 170 J = II, N IF( D( J ).LT.P ) THEN K = J P = D( J ) END IF 170 CONTINUE IF( K.NE.I ) THEN D( K ) = D( I ) D( I ) = P CALL DSWAP( NR, Z( 1, I ), 1, Z( 1, K ), 1 ) END IF 180 CONTINUE END IF * 190 CONTINUE RETURN * * End of DSTEQR2 * END scalapack-1.8.0/SRC/pzunmtr.f0000640000175000017500000004046310363532303015640 0ustar muammarmuammar SUBROUTINE PZUNMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMTR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of nq-1 elementary reflectors, as returned by PZHETRD: * * if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); * * if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(IA:*,JA:*) contains elementary * reflectors from PZHETRD; * = 'L': Lower triangle of A(IA:*,JA:*) contains elementary * reflectors from PZHETRD. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * or (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PZHETRD. * If SIDE = 'L', LLD_A >= max(1,LOCr(IA+M-1)); * if SIDE = 'R', LLD_A >= max(1,LOCr(IA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16 array, dimension LTAU, where * if SIDE = 'L' and UPLO = 'U', LTAU = LOCc(M_A), * if SIDE = 'L' and UPLO = 'L', LTAU = LOCc(JA+M-2), * if SIDE = 'R' and UPLO = 'U', LTAU = LOCc(N_A), * if SIDE = 'R' and UPLO = 'L', LTAU = LOCc(JA+N-2). * TAU(i) must contain the scalar factor of the elementary * reflector H(i), as returned by PZHETRD. TAU is tied to the * distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * If UPLO = 'U', * IAA = IA, JAA = JA+1, ICC = IC, JCC = JC; * else UPLO = 'L', * IAA = IA+1, JAA = JA; * if SIDE = 'L', * ICC = IC+1; JCC = JC; * else * ICC = IC; JCC = JC+1; * end if * end if * * If SIDE = 'L', * MI = M-1; NI = N; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; MI = N-1; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN, UPPER INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZUNMQL, PZUNMQR * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) * IF( UPPER ) THEN IAA = IA JAA = JA+1 ICC = IC JCC = JC ELSE IAA = IA+1 JAA = JA IF( LEFT ) THEN ICC = IC + 1 JCC = JC ELSE ICC = IC JCC = JC + 1 END IF END IF * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = M - 1 NI = N CALL CHK1MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, INFO ) ELSE NQ = N MI = M NI = N - 1 CALL CHK1MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, INFO ) END IF CALL CHK1MAT( MI, 4, NI, 5, ICC, JCC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -3 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'C' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMTR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) $ RETURN * IF( UPPER ) THEN * * Q was determined by a call to PZHETRD with UPLO = 'U' * CALL PZUNMQL( SIDE, TRANS, MI, NI, NQ-1, A, IA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * ELSE * * Q was determined by a call to PZHETRD with UPLO = 'L' * CALL PZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMTR * END scalapack-1.8.0/SRC/pclaconsb.f0000640000175000017500000005112410363532303016061 0ustar muammarmuammar SUBROUTINE PCLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * July 31, 2001 * * .. Scalar Arguments .. INTEGER I, L, LWORK, M COMPLEX H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), BUF( * ) * .. * * Purpose * ======= * * PCLACONSB looks for two consecutive small subdiagonal elements by * seeing the effect of starting a double shift QR iteration * given by H44, H33, & H43H34 and see if this would make a * subdiagonal negligible. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) COMPLEX array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * M (global output) INTEGER * On exit, this yields the starting location of the QR double * shift. This will satisfy: L <= M <= I-2. * * H44 * H33 * H43H34 (global input) COMPLEX * These three values are for the double shift QR iteration. * * BUF (local output) COMPLEX array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 7*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Logic: * ====== * * Two consecutive small subdiagonal elements will stall * convergence of a double shift if their product is small * relatively even if each is not very small. Thus it is * necessary to scan the "tridiagonal portion of the matrix." In * the LAPACK algorithm ZLAHQR, a loop of M goes from I-2 down to * L and examines * H(m,m),H(m+1,m+1),H(m+1,m),H(m,m+1),H(m-1,m-1),H(m,m-1), and * H(m+2,m-1). Since these elements may be on separate * processors, the first major loop (10) goes over the tridiagonal * and has each node store whatever values of the 7 it has that * the node owning H(m,m) does not. This will occur on a border * and can happen in no more than 3 locations per block assuming * square blocks. There are 5 buffers that each node stores these * values: a buffer to send diagonally down and right, a buffer * to send up, a buffer to send left, a buffer to send diagonally * up and left and a buffer to send right. Each of these buffers * is actually stored in one buffer BUF where BUF(ISTR1+1) starts * the first buffer, BUF(ISTR2+1) starts the second, etc.. After * the values are stored, if there are any values that a node * needs, they will be sent and received. Then the next major * loop passes over the data and searches for two consecutive * small subdiagonals. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * * Further Details * =============== * * Implemented by: M. Fahey, May 28, 1999 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, IBUF3, IBUF4, $ IBUF5, ICOL1, II, IRCV1, IRCV2, IRCV3, IRCV4, $ IRCV5, IROW1, ISRC, ISTR1, ISTR2, ISTR3, ISTR4, $ ISTR5, JJ, JSRC, LDA, LEFT, MODKM1, MYCOL, $ MYROW, NPCOL, NPROW, NUM, RIGHT, UP REAL S, TST1, ULP COMPLEX CDUM, H00, H10, H11, H12, H21, H22, H33S, H44S, $ V1, V2, V3 * .. * .. External Functions .. INTEGER ILCM REAL PSLAMCH EXTERNAL ILCM, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMX2D, INFOG2L, PXERBLA, $ CGERV2D, CGESD2D * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, AIMAG, MOD * .. * .. Statement Functions .. REAL CABS1 * .. * .. Statement Function definitions .. CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 starts at BUF(ISTR1+1) and will contain IBUF1 elements * BUFFER2 starts at BUF(ISTR2+1) and will contain IBUF2 elements * BUFFER3 starts at BUF(ISTR3+1) and will contain IBUF3 elements * BUFFER4 starts at BUF(ISTR4+1) and will contain IBUF4 elements * BUFFER5 starts at BUF(ISTR5+1) and will contain IBUF5 elements * ISTR1 = 0 ISTR2 = ( ( I-L-1 ) / HBL ) IF( ISTR2*HBL.LT.( I-L-1 ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PCLACONSB', 10 ) RETURN END IF ISTR3 = 3*ISTR2 ISTR4 = ISTR3 + ISTR2 ISTR5 = ISTR3 + ISTR3 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) * * Copy our relevant pieces of triadiagonal that we owe into * 5 buffers to send to whomever owns H(M,M) as M moves diagonally * up the tridiagonal * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 IRCV1 = 0 IRCV2 = 0 IRCV3 = 0 IRCV4 = 0 IRCV5 = 0 DO 10 M = I - 2, L, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M-1,M-1) and send it diagonal down * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( M-1, M-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M ,M-1) and send it right * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF5 = IBUF5 + 1 BUF( ISTR5+IBUF5 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+1,M) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( MYROW.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M ,M+1) and send it left * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF3 = IBUF3 + 1 BUF( ISTR3+IBUF3 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M+1,M+1) & H(M+2,M+1) and send it * diagonally up * IF( ( UP.NE.MYROW ) .OR. ( LEFT.NE.MYCOL ) ) THEN CALL INFOG2L( M+1, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF4 = IBUF4 + 2 BUF( ISTR4+IBUF4-1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) BUF( ISTR4+IBUF4 ) = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+2,M+1) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * Add up the receives * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( M.GT.L ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M-1,M-1) from diagonal up * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) .AND. ( M.GT.L ) ) $ THEN * * We must receive H(M ,M-1) from left * IRCV5 = IRCV5 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+1,M ) from down * IRCV2 = IRCV2 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPCOL.GT.1 ) ) THEN * * We must receive H(M ,M+1) from right * IRCV3 = IRCV3 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M+1:M+2,M+1) from diagonal down * IRCV4 = IRCV4 + 2 END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+2,M+1) from down * IRCV2 = IRCV2 + 1 END IF END IF * * Possibly change owners (occurs only when MOD(M-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * * Send data on to the appropriate node if there is any data to send * IF( IBUF1.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, UP, $ MYCOL ) END IF IF( IBUF3.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF3, 1, BUF( ISTR3+1 ), IBUF3, MYROW, $ LEFT ) END IF IF( IBUF4.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF4, 1, BUF( ISTR4+1 ), IBUF4, UP, $ LEFT ) END IF IF( IBUF5.GT.0 ) THEN CALL CGESD2D( CONTXT, IBUF5, 1, BUF( ISTR5+1 ), IBUF5, MYROW, $ RIGHT ) END IF * * Receive appropriate data if there is any * IF( IRCV1.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, DOWN, $ MYCOL ) END IF IF( IRCV3.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV3, 1, BUF( ISTR3+1 ), IRCV3, MYROW, $ RIGHT ) END IF IF( IRCV4.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV4, 1, BUF( ISTR4+1 ), IRCV4, DOWN, $ RIGHT ) END IF IF( IRCV5.GT.0 ) THEN CALL CGERV2D( CONTXT, IRCV5, 1, BUF( ISTR5+1 ), IRCV5, MYROW, $ LEFT ) END IF * * Start main loop * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. $ ( MODKM1.NE.HBL-1 ) ) THEN CALL INFOG2L( I-2, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) END IF * * Look for two consecutive small subdiagonal elements. * DO 20 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H00 = BUF( ISTR1+IBUF1 ) ELSE H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF5 = IBUF5 + 1 H10 = BUF( ISTR5+IBUF5 ) ELSE H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF END IF IF( MODKM1.EQ.HBL-1 ) THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) H11 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( NUM.GT.1 ) THEN IBUF4 = IBUF4 + 2 H22 = BUF( ISTR4+IBUF4-1 ) V3 = BUF( ISTR4+IBUF4 ) ELSE H22 = A( ICOL1*LDA+IROW1+1 ) V3 = A( ( ICOL1+1 )*LDA+IROW1+1 ) END IF IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 H21 = BUF( ISTR2+IBUF2 ) ELSE H21 = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF3 = IBUF3 + 1 H12 = BUF( ISTR3+IBUF3 ) ELSE H12 = A( ICOL1*LDA+IROW1 ) END IF IF( M.GT.L ) THEN H00 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF * * Adjust ICOL1 for next iteration where MODKM1=HBL-2 * ICOL1 = ICOL1 + 1 END IF IF( MODKM1.EQ.HBL-2 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 V3 = BUF( ISTR2+IBUF2 ) ELSE V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) END IF H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.LT.HBL-2 ) .AND. ( MODKM1.GT.0 ) ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S IF( M.EQ.L ) $ GO TO 30 TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+ $ CABS1( H22 ) ) IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 ) $ GO TO 30 * * Slide indices diagonally up one for next iteration * IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF IF( M.EQ.L ) THEN * * Stop regardless of which node we are * GO TO 30 END IF * * Possibly change owners if on border * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 20 CONTINUE 30 CONTINUE * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * RETURN * * End of PCLACONSB * END scalapack-1.8.0/SRC/pcggrqf.f0000640000175000017500000003631410363532303015552 0ustar muammarmuammar SUBROUTINE PCGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PCGGRQF computes a generalized RQ factorization of * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1): * * sub( A ) = R*Q, sub( B ) = Z*T*Q, * * where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, * and R and T assume one of the forms: * * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, * N-M M ( R21 ) N * N * * where R12 or R21 is upper triangular, and * * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, * ( 0 ) P-N P N-P * N * * where T11 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GRQ * factorization of sub( A ) and sub( B ) implicitly gives the RQ * factorization of sub( A )*inv( sub( B ) ): * * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z' * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the conjugate transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( B ). P >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and sub( B ). * N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAUA, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) COMPLEX, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors which represent the unitary matrix Q. TAUA is * tied to the distributed matrix A (see Further Details). * * B (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, the local pieces of the P-by-N distributed matrix * sub( B ) which is to be factored. On exit, the elements on * and above the diagonal of sub( B ) contain the min(P,N) by N * upper trapezoidal matrix T (T is upper triangular if P >= N); * the elements below the diagonal, with the array TAUB, * represent the unitary matrix Z as a product of elementary * reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) COMPLEX, array, dimension * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors * TAUB of the elementary reflectors which represent the unitary * matrix Z. TAUB is tied to the distributed matrix B (see * Further Details). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ), * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) + * MB_A * MB_A, * NB_B * ( PpB0 + NqB0 + NB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a complex scalar, and v is a complex vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on * exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1). * To form Q explicitly, use ScaLAPACK subroutine PCUNGRQ. * To use Q to update another matrix, use ScaLAPACK subroutine PCUNMRQ. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1). * To form Z explicitly, use ScaLAPACK subroutine PCUNGQR. * To use Z to update another matrix, use ScaLAPACK subroutine PCUNMQR. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0, NQB0, PPB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCGEQRF, PCGERQF, $ PCHK2MAT, PCUNMRQ, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, INT, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( M, 1, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( P, 2, N, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) PPB0 = NUMROC( P+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NQB0 = NUMROC( N+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ), $ MAX( MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( PPB0 + NQB0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ), $ DESCB( NB_ ) * ( PPB0 + NQB0 + DESCB( NB_ ) ) ) ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( IACOL.NE.IBCOL .OR. ICOFFA.NE.ICOFFB ) THEN INFO = -11 ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -1204 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( M, 1, N, 3, IA, JA, DESCA, 7, P, 2, N, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGGRQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q * CALL PCGERQF( M, N, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := sub( B )*Q' * CALL PCUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), $ A, MAX( IA, IA+M-N ), JA, DESCA, TAUA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) LWMIN = MAX( LWMIN, INT( WORK( 1 ) ) ) * * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T * CALL PCGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = CMPLX( REAL( MAX( LWMIN, INT( WORK( 1 ) ) ) ) ) * RETURN * * End of PCGGRQF * END scalapack-1.8.0/SRC/pslantr.f0000640000175000017500000011057410363532303015605 0ustar muammarmuammar REAL FUNCTION PSLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), WORK( * ) * .. * * Purpose * ======= * * PSLANTR returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * trapezoidal or triangular distributed matrix sub( A ) denoting * A(IA:IA+M-1, JA:JA+N-1). * * PSLANTR returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with ia <= i <= ia+m-1, * ( and ja <= j <= ja+n-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PSLANTR as described * above. * * UPLO (global input) CHARACTER * Specifies whether the matrix sub( A ) is upper or lower * trapezoidal. * = 'U': Upper trapezoidal * = 'L': Lower trapezoidal * Note that sub( A ) is triangular instead of trapezoidal * if M = N. * * DIAG (global input) CHARACTER * Specifies whether or not the distributed matrix sub( A ) has * unit diagonal. * = 'N': Non-unit diagonal * = 'U': Unit diagonal * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). When M = 0, PSLANTR is * set to zero. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). When N = 0, * PSLANTR is set to zero. N >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ) containing * the local pieces of sub( A ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) REAL array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * Nq0 if NORM = '1', 'O' or 'o', * Mp0 if NORM = 'I' or 'i', * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW, * MYCOL, NPROW and NPCOL can be determined by calling the * subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL UDIAG INTEGER IACOL, IAROW, ICTXT, II, IIA, ICOFF, IOFFA, $ IROFF, J, JB, JJ, JJA, JN, KK, LDA, LL, MP, $ MYCOL, MYROW, NP, NPCOL, NPROW, NQ REAL SCALE, SUM, VALUE * .. * .. Local Arrays .. REAL RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PSTREECOMB, $ SCOMBSSQ, SGEBR2D, SGEBS2D, $ SGAMX2D, SGSUM2D, SLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ISAMAX, NUMROC EXTERNAL LSAME, ICEIL, ISAMAX, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ MP = MP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF LDA = DESCA( LLD_ ) IOFFA = ( JJA - 1 ) * LDA * IF( MIN( M, N ).EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * IF( UDIAG ) THEN VALUE = ONE ELSE VALUE = ZERO END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 20 LL = JJ, JJ + JB -1 DO 10 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE DO 40 LL = JJ, JJ + JB -1 DO 30 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE END IF ELSE DO 60 LL = JJ, JJ + JB -1 DO 50 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 80 LL = JJ, JJ + JB -1 DO 70 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 70 CONTINUE IOFFA = IOFFA + LDA 80 CONTINUE ELSE DO 100 LL = JJ, JJ + JB -1 DO 90 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 90 CONTINUE IOFFA = IOFFA + LDA 100 CONTINUE END IF ELSE DO 120 LL = JJ, JJ + JB -1 DO 110 KK = IIA, MIN( II-1, IIA+MP-1 ) VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 110 CONTINUE IOFFA = IOFFA + LDA 120 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 130 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 150 LL = JJ, JJ + JB -1 DO 140 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE ELSE DO 170 LL = JJ, JJ + JB -1 DO 160 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 160 CONTINUE IOFFA = IOFFA + LDA 170 CONTINUE END IF ELSE DO 190 LL = JJ, JJ + JB -1 DO 180 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 180 CONTINUE IOFFA = IOFFA + LDA 190 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 260 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 210 LL = JJ, JJ + JB -1 DO 200 KK = II+LL-JJ+1, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 200 CONTINUE IOFFA = IOFFA + LDA 210 CONTINUE ELSE DO 230 LL = JJ, JJ + JB -1 DO 220 KK = II+LL-JJ, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 220 CONTINUE IOFFA = IOFFA + LDA 230 CONTINUE END IF ELSE DO 250 LL = JJ, JJ + JB -1 DO 240 KK = II, IIA+MP-1 VALUE = MAX( VALUE, ABS( A( IOFFA+KK ) ) ) 240 CONTINUE IOFFA = IOFFA + LDA 250 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 260 CONTINUE * END IF * * Gather the intermediate results to process (0,0). * CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, KK, LL, -1, $ 0, 0 ) * ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 280 LL = JJ, JJ + JB -1 SUM = ONE DO 270 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 270 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 280 CONTINUE ELSE DO 300 LL = JJ, JJ + JB -1 SUM = ZERO DO 290 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 290 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 300 CONTINUE END IF ELSE DO 320 LL = JJ, JJ + JB -1 SUM = ZERO DO 310 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 310 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 320 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 390 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 340 LL = JJ, JJ + JB -1 SUM = ONE DO 330 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 330 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 340 CONTINUE ELSE DO 360 LL = JJ, JJ + JB -1 SUM = ZERO DO 350 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 350 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 360 CONTINUE END IF ELSE DO 380 LL = JJ, JJ + JB -1 SUM = ZERO DO 370 KK = IIA, MIN( II-1, IIA+MP-1 ) SUM = SUM + ABS( A( IOFFA+KK ) ) 370 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 380 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 410 LL = JJ, JJ + JB -1 SUM = ONE DO 400 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 400 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 410 CONTINUE ELSE DO 430 LL = JJ, JJ + JB -1 SUM = ZERO DO 420 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 420 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 430 CONTINUE END IF ELSE DO 450 LL = JJ, JJ + JB -1 SUM = ZERO DO 440 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 440 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 450 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 520 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 470 LL = JJ, JJ + JB -1 SUM = ONE DO 460 KK = II+LL-JJ+1, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 460 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 470 CONTINUE ELSE DO 490 LL = JJ, JJ + JB -1 SUM = ZERO DO 480 KK = II+LL-JJ, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 480 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 490 CONTINUE END IF ELSE DO 510 LL = JJ, JJ + JB -1 SUM = ZERO DO 500 KK = II, IIA+MP-1 SUM = SUM + ABS( A( IOFFA+KK ) ) 500 CONTINUE IOFFA = IOFFA + LDA WORK( LL-JJA+1 ) = SUM 510 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 520 CONTINUE * END IF * * Find sum of global matrix columns and store on row 0 of * process grid * CALL SGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK, 1, $ 0, MYCOL ) * * Find maximum sum of columns for 1-norm * IF( MYROW.EQ.0 ) THEN IF( NQ.GT.0 ) THEN VALUE = WORK( ISAMAX( NQ, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, KK, LL, $ -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'I' ) ) THEN * IF( LSAME( UPLO, 'U' ) ) THEN IF( UDIAG ) THEN DO 530 KK = IIA, IIA+MP-1 WORK( KK ) = ONE 530 CONTINUE ELSE DO 540 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 540 CONTINUE END IF ELSE IF( UDIAG ) THEN NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF DO 550 KK = IIA, IIA+NP-1 WORK( KK ) = ONE 550 CONTINUE DO 560 KK = IIA+NP, IIA+MP-1 WORK( KK ) = ZERO 560 CONTINUE ELSE DO 570 KK = IIA, IIA+MP-1 WORK( KK ) = ZERO 570 CONTINUE END IF END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 590 LL = JJ, JJ + JB -1 DO 580 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 580 CONTINUE IOFFA = IOFFA + LDA 590 CONTINUE ELSE DO 610 LL = JJ, JJ + JB -1 DO 600 KK = IIA, MIN(II+LL-JJ+1,IIA+MP-1) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 600 CONTINUE IOFFA = IOFFA + LDA 610 CONTINUE END IF ELSE DO 630 LL = JJ, JJ + JB -1 DO 620 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 620 CONTINUE IOFFA = IOFFA + LDA 630 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 700 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 650 LL = JJ, JJ + JB -1 DO 640 KK = IIA, MIN( II+LL-JJ+1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 640 CONTINUE IOFFA = IOFFA + LDA 650 CONTINUE ELSE DO 670 LL = JJ, JJ + JB -1 DO 660 KK = IIA, MIN( II+LL-JJ, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 660 CONTINUE IOFFA = IOFFA + LDA 670 CONTINUE END IF ELSE DO 690 LL = JJ, JJ + JB -1 DO 680 KK = IIA, MIN( II-1, IIA+MP-1 ) WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 680 CONTINUE IOFFA = IOFFA + LDA 690 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 700 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 720 LL = JJ, JJ + JB -1 DO 710 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 710 CONTINUE IOFFA = IOFFA + LDA 720 CONTINUE ELSE DO 740 LL = JJ, JJ + JB -1 DO 730 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 730 CONTINUE IOFFA = IOFFA + LDA 740 CONTINUE END IF ELSE DO 760 LL = JJ, JJ + JB -1 DO 750 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 750 CONTINUE IOFFA = IOFFA + LDA 760 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 830 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 780 LL = JJ, JJ + JB -1 DO 770 KK = II+LL-JJ+1, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 770 CONTINUE IOFFA = IOFFA + LDA 780 CONTINUE ELSE DO 800 LL = JJ, JJ + JB -1 DO 790 KK = II+LL-JJ, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 790 CONTINUE IOFFA = IOFFA + LDA 800 CONTINUE END IF ELSE DO 820 LL = JJ, JJ + JB -1 DO 810 KK = II, IIA+MP-1 WORK( KK-IIA+1 ) = WORK( KK-IIA+1 ) + $ ABS( A( IOFFA+KK ) ) 810 CONTINUE IOFFA = IOFFA + LDA 820 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 830 CONTINUE * END IF * * Find sum of global matrix rows and store on column 0 of * process grid * CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, WORK, MAX( 1, MP ), $ MYROW, 0 ) * * Find maximum sum of rows for Infinity-norm * IF( MYCOL.EQ.0 ) THEN IF( MP.GT.0 ) THEN VALUE = WORK( ISAMAX( MP, WORK, 1 ) ) ELSE VALUE = ZERO END IF CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, VALUE, 1, KK, $ LL, -1, 0, 0 ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * IF( UDIAG ) THEN SCALE = ONE SUM = REAL( MIN( M, N ) ) / REAL( NPROW*NPCOL ) ELSE SCALE = ZERO SUM = ONE END IF * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 840 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 840 CONTINUE ELSE DO 850 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 850 CONTINUE END IF ELSE DO 860 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 860 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 900 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 870 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ+1, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 870 CONTINUE ELSE DO 880 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II+LL-JJ, IIA+MP-1 )- $ IIA+1, A( IIA+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 880 CONTINUE END IF ELSE DO 890 LL = JJ, JJ + JB -1 CALL SLASSQ( MIN( II-1, IIA+MP-1 )-IIA+1, $ A( IIA+IOFFA ), 1, SCALE, SUM ) IOFFA = IOFFA + LDA 890 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 900 CONTINUE * ELSE * * Lower triangular matrix * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 910 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 910 CONTINUE ELSE DO 920 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 920 CONTINUE END IF ELSE DO 930 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 930 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 970 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN IF( UDIAG ) THEN DO 940 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-(II+LL-JJ+1), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 940 CONTINUE ELSE DO 950 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-(II+LL-JJ), $ A( II+LL-JJ+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 950 CONTINUE END IF ELSE DO 960 LL = JJ, JJ + JB -1 CALL SLASSQ( IIA+MP-II, A( II+IOFFA ), 1, SCALE, $ SUM ) IOFFA = IOFFA + LDA 960 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 970 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM CALL PSTREECOMB( ICTXT, 'All', 2, RWORK, 0, 0, SCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to every process in the grid. * IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, 0, 0 ) END IF * PSLANTR = VALUE * RETURN * * End of PSLANTR * END scalapack-1.8.0/SRC/pzpoequ.f0000640000175000017500000003133310363532303015620 0ustar muammarmuammar SUBROUTINE PZPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, N DOUBLE PRECISION AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION SC( * ), SR( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZPOEQU computes row and column scalings intended to * equilibrate a distributed Hermitian positive definite matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) and reduce its condition number * (with respect to the two-norm). SR and SC contain the scale * factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled distri- * buted matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on * the diagonal. This choice of SR and SC puts the condition number * of B within a factor N of the smallest possible condition number * over all possible diagonal scalings. * * The scaling factor are stored along process rows in SR and along * process columns in SC. The duplication of information simplifies * greatly the application of the factors. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory to an * array of local dimension ( LLD_A, LOCc(JA+N-1) ), the * N-by-N Hermitian positive definite distributed matrix * sub( A ) whose scaling factors are to be computed. Only the * diagonal elements of sub( A ) are referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local output) DOUBLE PRECISION array, dimension LOCr(M_A) * If INFO = 0, SR(IA:IA+N-1) contains the row scale factors * for sub( A ). SR is aligned with the distributed matrix A, * and replicated across every process column. SR is tied to the * distributed matrix A. * * SC (local output) DOUBLE PRECISION array, dimension LOCc(N_A) * If INFO = 0, SC(JA:JA+N-1) contains the column scale factors * for A(IA:IA+M-1,JA:JA+N-1). SC is aligned with the distribu- * ted matrix A, and replicated down every process row. SC is * tied to the distributed matrix A. * * SCOND (global output) DOUBLE PRECISION * If INFO = 0, SCOND contains the ratio of the smallest SR(i) * (or SC(j)) to the largest SR(i) (or SC(j)), with * IA <= i <= IA+N-1 and JA <= j <= JA+N-1. If SCOND >= 0.1 * and AMAX is neither too large nor too small, it is not worth * scaling by SR (or SC). * * AMAX (global output) DOUBLE PRECISION * Absolute value of largest matrix element. If AMAX is very * close to overflow or very close to underflow, the matrix * should be scaled. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the K-th diagonal entry of sub( A ) is * nonpositive. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. CHARACTER ALLCTOP, COLCTOP, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW, $ IDUMM, II, IIA, IOFFA, IOFFD, IROFF, J, JB, JJ, $ JJA, JN, LDA, LL, MYCOL, MYROW, NP, NPCOL, $ NPROW, NQ DOUBLE PRECISION AII, SMIN * .. * .. Local Arrays .. INTEGER DESCSC( DLEN_ ), DESCSR( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGAMN2D, $ DGAMX2D, DGSUM2D, IGAMN2D, INFOG2L, $ PCHK1MAT, PB_TOPGET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(500+CTXT_) ELSE CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, INFO ) CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 5, 0, IDUMM, IDUMM, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOEQU', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'All', ALLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * * Compute some local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP - IROFF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) LDA = DESCA( LLD_ ) * * Assign descriptors for SR and SC arrays * CALL DESCSET( DESCSR, N, 1, DESCA( MB_ ), 1, 0, 0, ICTXT, $ MAX( 1, NP ) ) CALL DESCSET( DESCSC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 ) * * Initialize the scaling factors to zero. * DO 10 II = IIA, IIA+NP-1 SR( II ) = ZERO 10 CONTINUE * DO 20 JJ = JJA, JJA+NQ-1 SC( JJ ) = ZERO 20 CONTINUE * * Find the minimum and maximum diagonal elements. * Handle first block separately. * II = IIA JJ = JJA JB = JN-JA+1 SMIN = ONE / PDLAMCH( ICTXT, 'S' ) AMAX = ZERO * IOFFA = II+(JJ-1)*LDA IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN IOFFD = IOFFA DO 30 LL = 0, JB-1 AII = DBLE( A( IOFFD ) ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = LL + 1 IOFFD = IOFFD + LDA + 1 30 CONTINUE END IF * IF( MYROW.EQ.IAROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining blocks of columns * DO 50 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN IOFFD = IOFFA DO 40 LL = 0, JB-1 AII = DBLE( A( IOFFD ) ) SR( II+LL ) = AII SC( JJ+LL ) = AII SMIN = MIN( SMIN, AII ) AMAX = MAX( AMAX, AII ) IF( AII.LE.ZERO .AND. INFO.EQ.0 ) $ INFO = J + LL - JA + 1 IOFFD = IOFFD + LDA + 1 40 CONTINUE END IF * IF( MYROW.EQ.ICURROW ) THEN II = II + JB IOFFA = IOFFA + JB END IF IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + JB IOFFA = IOFFA + JB*LDA END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 50 CONTINUE * * Compute scaling factors * CALL DGSUM2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, SC( JJA ), $ 1, -1, MYCOL ) CALL DGSUM2D( ICTXT, 'Rowwise', ROWCTOP, NP, 1, SR( IIA ), $ MAX( 1, NP ), -1, MYCOL ) * CALL DGAMX2D( ICTXT, 'All', ALLCTOP, 1, 1, AMAX, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) CALL DGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, SMIN, 1, IDUMM, IDUMM, $ -1, -1, MYCOL ) * IF( SMIN.LE.ZERO ) THEN * * Find the first non-positive diagonal element and return. * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) RETURN * ELSE * * Set the scale factors to the reciprocals * of the diagonal elements. * DO 60 II = IIA, IIA+NP-1 SR( II ) = ONE / SQRT( SR( II ) ) 60 CONTINUE * DO 70 JJ = JJA, JJA+NQ-1 SC( JJ ) = ONE / SQRT( SC( JJ ) ) 70 CONTINUE * * Compute SCOND = min(S(I)) / max(S(I)) * SCOND = SQRT( SMIN ) / SQRT( AMAX ) * END IF * RETURN * * End of PZPOEQU * END scalapack-1.8.0/SRC/pdpocon.f0000640000175000017500000003635410363532303015567 0ustar muammarmuammar SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LIWORK, LWORK, N DOUBLE PRECISION ANORM, RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), IWORK( * ) DOUBLE PRECISION A( * ), WORK( * ) * .. * * Purpose * ======= * * PDPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a real symmetric positive definite distributed matrix * using the Cholesky factorization A = U**T*U or A = L*L**T computed by * PDPOTRF. * * An estimate is obtained for norm(inv(A(IA:IA+N-1,JA:JA+N-1))), and * the reciprocal of the condition number is computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the factor stored in * A(IA:IA+N-1,JA:JA+N-1) is upper or lower triangular. * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The order of the distributed matrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * A (local input) DOUBLE PRECISION pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). On entry, * this array contains the local pieces of the factors L or U * from the Cholesky factorization A(IA:IA+N-1,JA:JA+N-1) = U'*U * or L*L', as computed by PDPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ANORM (global input) DOUBLE PRECISION * The 1-norm (or infinity-norm) of the symmetric distributed * matrix A(IA:IA+N-1,JA:JA+N-1). * * RCOND (global output) DOUBLE PRECISION * The reciprocal of the condition number of the distributed * matrix A(IA:IA+N-1,JA:JA+N-1), computed as * RCOND = 1 / ( norm( A(IA:IA+N-1,JA:JA+N-1) ) * * norm( inv(A(IA:IA+N-1,JA:JA+N-1)) ) ). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= 2*LOCr(N+MOD(IA-1,MB_A)) + 2*LOCc(N+MOD(JA-1,NB_A))+ * MAX( 2, MAX(NB_A*CEIL(NPROW-1,NPCOL),LOCc(N+MOD(JA-1,NB_A)) + * NB_A*CEIL(NPCOL-1,NPROW)) ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * IWORK (local workspace/local output) INTEGER array, * dimension (LIWORK) * On exit, IWORK(1) returns the minimal and optimal LIWORK. * * LIWORK (local or global input) INTEGER * The dimension of the array IWORK. * LIWORK is local input and must be at least * LIWORK >= LOCr(N+MOD(IA-1,MB_A)). * * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER CBTOP, COLCTOP, NORMIN, ROWCTOP INTEGER IACOL, IAROW, ICOFF, ICTXT, IIA, IPNL, IPNU, $ IPV, IPW, IPX, IROFF, IV, IX, IXX, JJA, JV, $ JX, KASE, LIWMIN, LWMIN, MYCOL, MYROW, NP, $ NPCOL, NPROW, NPMOD, NQ, NQMOD DOUBLE PRECISION AINVNM, SCALE, SL, SU, SMLNUM DOUBLE PRECISION WMAX * .. * .. Local Arrays .. INTEGER DESCV( DLEN_ ), DESCX( DLEN_ ), IDUM1( 3 ), $ IDUM2( 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, DGEBR2D, $ DGEBS2D, INFOG2L, PCHK1MAT, PDAMAX, $ PDLATRS, PDLACON, PDRSCL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, INDXG2P, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN UPPER = LSAME( UPLO, 'U' ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) NPMOD = NUMROC( N + MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQMOD = NUMROC( N + MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = 2*NPMOD + 2*NQMOD + $ MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQMOD + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) WORK( 1 ) = DBLE( LWMIN ) LIWMIN = NPMOD IWORK( 1 ) = LIWMIN LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) * IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( ANORM.LT.ZERO ) THEN INFO = -7 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN IWORK( 1 ) = LIWMIN INFO = -12 END IF END IF * IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 IF( LIWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 12 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 3, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOCON', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE RETURN ELSE IF( ANORM.EQ.ZERO ) THEN RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE RETURN END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IV = IROFF + 1 IX = IV JV = ICOFF + 1 JX = JV * IPX = 1 IPV = IPX + NP IPNL = IPV + NP IPNU = IPNL + NQ IPW = IPNU + NQ * CALL DESCSET( DESCV, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) CALL DESCSET( DESCX, N+IROFF, 1, DESCA( MB_ ), 1, IAROW, MYCOL, $ ICTXT, MAX( 1, NP ) ) * * Estimate the 1-norm (or I-norm) of inv(A). * AINVNM = ZERO KASE = 0 NORMIN = 'N' * 10 CONTINUE CALL PDLACON( N, WORK( IPV ), IV, JV, DESCV, WORK( IPX ), IX, JX, $ DESCX, IWORK, AINVNM, KASE ) IF( KASE.NE.0 ) THEN IF( UPPER ) THEN * * Multiply by inv(U'). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(U). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL ELSE * * Multiply by inv(L). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SL, WORK( IPNL ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL NORMIN = 'Y' * * Multiply by inv(L'). * DESCX( CSRC_ ) = IACOL CALL PDLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, $ N, A, IA, JA, DESCA, WORK( IPX ), IX, JX, $ DESCX, SU, WORK( IPNU ), WORK( IPW ) ) DESCX( CSRC_ ) = MYCOL END IF * * Multiply by 1/SCALE if doing so will not cause overflow. * SCALE = SL*SU IF( SCALE.NE.ONE ) THEN CALL PDAMAX( N, WMAX, IXX, WORK( IPX ), IX, JX, DESCX, 1 ) IF( DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', CBTOP ) IF( MYROW.EQ.IAROW ) THEN CALL DGEBS2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1 ) ELSE CALL DGEBR2D( ICTXT, 'Column', CBTOP, 1, 1, WMAX, 1, $ IAROW, MYCOL ) END IF END IF IF( SCALE.LT.ABS( WMAX )*SMLNUM .OR. SCALE.EQ.ZERO ) $ GO TO 20 CALL PDRSCL( N, SCALE, WORK( IPX ), IX, JX, DESCX, 1 ) END IF GO TO 10 END IF * * Compute the estimate of the reciprocal condition number. * IF( AINVNM.NE.ZERO ) $ RCOND = ( ONE / AINVNM ) / ANORM * 20 CONTINUE * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * RETURN * * End of PDPOCON * END scalapack-1.8.0/SRC/pzunmrz.f0000640000175000017500000004340610363532303015646 0ustar muammarmuammar SUBROUTINE PZUNMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMRZ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZTZRZF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZTZRZF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZTZRZF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JAA, $ JCC, LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, $ MYROW, NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARZB, PZLARZT, $ PZUNMR3 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 10, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 10, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 15, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -6 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1000+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -14 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1500+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1500+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -17 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IDUM1( 4 ) = L IDUM2( 4 ) = 6 IF( LWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 17 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 10, M, 3, N, 4, $ IC, JC, DESCC, 15, 5, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMRZ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC JAA = JA + M - L ELSE MI = M ICC = IC JAA = JA + N - L CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PZUNMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PZLARZT( 'Backward', 'Rowwise', L, IB, A, I, JAA, DESCA, $ TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, IB, $ L, A, I, JAA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M ELSE NI = N END IF CALL PZUNMR3( SIDE, TRANS, MI, NI, IB, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMRZ * END scalapack-1.8.0/SRC/pzgeql2.f0000640000175000017500000003014310363532303015477 0ustar muammarmuammar SUBROUTINE PZGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZGEQL2 computes a QL factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * L. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M >= N, the * lower triangle of the distributed submatrix * A( IA+M-N:IA+M-1, JA:JA+N-1 ) contains the N-by-N lower * triangular matrix L; if M <= N, the elements on and below * the (N-M)-th superdiagonal contain the M by N lower * trapezoidal matrix L; the remaining elements, with the * array TAU, represent the unitary matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja+k-1) . . . H(ja+1) H(ja), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in * A(ia:ia+m-k+i-2,ja+n-k+i-1), and tau in TAU(ja+n-k+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, II, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX*16 AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZLARFC, PZLARFG, ZGEBR2D, ZGEBS2D, $ ZLARFG, ZSCAL * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IACOL = INDXG2P( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) THEN I = II+(JJ+NQ-2)*DESCA( LLD_ ) AJJ = A( I ) CALL ZLARFG( 1, AJJ, A( I ), 1, TAU( JJ+NQ-1 ) ) IF( N.GT.1 ) THEN ALPHA = ONE - DCONJG( TAU( JJ+NQ-1 ) ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL ZSCAL( NQ-1, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL ZSCAL( NQ, ALPHA, A( II+(JJ-1)*DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( JJ+NQ-1 ), 1, IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA+K-1, JA, -1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(ia:i+m-k-1,j+n-k) * CALL PZLARFG( M-K+I-IA+1, AJJ, M-K+I, N-K+J, A, IA, $ N-K+J, DESCA, 1, TAU ) * * Apply H(j)' to A(ia:i+m-k,ja:j+n-k-1) from the left * CALL PZELSET( A, I+M-K, J+N-K, DESCA, ONE ) CALL PZLARFC( 'Left', M-K+I-IA+1, N-K+J-JA, A, IA, N-K+J, $ DESCA, 1, TAU, A, IA, JA, DESCA, WORK ) CALL PZELSET( A, I+M-K, J+N-K, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZGEQL2 * END scalapack-1.8.0/SRC/pzunmlq.f0000640000175000017500000004207610363532303015631 0ustar muammarmuammar SUBROUTINE PZUNMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMLQ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PZGELQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= max(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZGELQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGELQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICC, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, JCC, $ LCM, LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZUNML2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMLQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC ELSE MI = M ICC = IC CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PZUNML2( SIDE, TRANS, M, N, I1-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL PZLARFT( 'Forward', 'Rowwise', NQ-I+IA, IB, A, I, JA+I-IA, $ DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+i-ia:ic+m-1,jc:jc+n-1) * MI = M - I + IA ICC = IC + I - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+i-ia:jc+n-1) * NI = N - I + IA JCC = JC + I - IA END IF * * Apply H or H' * CALL PZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, $ A, I, JA+I-IA, DESCA, WORK, C, ICC, JCC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PZUNML2( SIDE, TRANS, M, N, I2-IA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMLQ * END scalapack-1.8.0/SRC/zdbtf2.f0000640000175000017500000001237410363532303015314 0ustar muammarmuammar SUBROUTINE ZDBTF2( M, N, KL, KU, AB, LDAB, INFO ) * * Modified by Andrew J. Cleary in November, 96 from: * -- LAPACK auxiliary routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. COMPLEX*16 AB( LDAB, * ) * .. * * Purpose * ======= * * Zdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting with row interchanges. * * This is the unblocked version of the algorithm, calling Level 2 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) COMPLEX*16 array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine; elements marked * + need not be set on entry, but are required by the routine to store * elements of U, because of fill-in resulting from the row * interchanges. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER J, JP, JU, KM, KV * .. * .. External Functions .. INTEGER ISAMAX EXTERNAL ISAMAX * .. * .. External Subroutines .. EXTERNAL ZGERU, ZSCAL, ZSWAP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U, allowing for * fill-in. * KV = KU * * Test the input parameters. * INFO = 0 *ECA IF( M.LT.0 ) THEN *ECA INFO = -1 *ECA ELSE IF( N.LT.0 ) THEN *ECA INFO = -2 *ECA ELSE IF( KL.LT.0 ) THEN *ECA INFO = -3 *ECA ELSE IF( KU.LT.0 ) THEN *ECA INFO = -4 *ECA ELSE IF( LDAB.LT.KL+KV+1 ) THEN *ECA INFO = -6 *ECA END IF *ECA IF( INFO.NE.0 ) THEN *ECA CALL XERBLA( 'ZDBTF2', -INFO ) *ECA RETURN *ECA END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Gaussian elimination without partial pivoting * * JU is the index of the last column affected by the current stage * of the factorization. * JU = 1 * DO 40 J = 1, MIN( M, N ) * * Test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-J ) JP = 1 IF( AB( KV+1, J ).NE.ZERO ) THEN JU = MAX( JU, MIN( J+KU, N ) ) * IF( KM.GT.0 ) THEN * * Compute multipliers. * CALL ZSCAL( KM, ONE / AB( KU+1, J ), AB( KU+2, J ), 1 ) * * Update trailing submatrix within the band. * IF( JU.GT.J ) THEN CALL ZGERU( KM, JU-J, -CONE, AB( KU+2, J ), 1, $ AB( KU, J+1 ), LDAB-1, AB( KU+1, J+1 ), $ LDAB-1 ) END IF END IF ELSE * IF( INFO.EQ.0 ) $ INFO = J END IF 40 CONTINUE RETURN * * End of ZDBTF2 * END scalapack-1.8.0/SRC/sdbtrf.f0000640000175000017500000002470210363532303015403 0ustar muammarmuammar SUBROUTINE SDBTRF( M, N, KL, KU, AB, LDAB, INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * August, 1996. * Modified from SGBTRF: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * August 6, 1991 * * .. Scalar Arguments .. INTEGER INFO, KL, KU, LDAB, M, N * .. * .. Array Arguments .. REAL AB( LDAB, * ) * .. * * Purpose * ======= * * Sdbtrf computes an LU factorization of a real m-by-n band matrix A * without using partial pivoting or row interchanges. * * This is the blocked version of the algorithm, calling Level 3 BLAS. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * KL (input) INTEGER * The number of subdiagonals within the band of A. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals within the band of A. KU >= 0. * * AB (input/output) REAL array, dimension (LDAB,N) * On entry, the matrix A in band storage, in rows KL+1 to * 2*KL+KU+1; rows 1 to KL of the array need not be set. * The j-th column of A is stored in the j-th column of the * array AB as follows: * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) * * On exit, details of the factorization: U is stored as an * upper triangular band matrix with KL+KU superdiagonals in * rows 1 to KL+KU+1, and the multipliers used during the * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. * See below for further details. * * LDAB (input) INTEGER * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * Further Details * =============== * * The band storage scheme is illustrated by the following example, when * M = N = 6, KL = 2, KU = 1: * * On entry: On exit: * * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * * a31 a42 a53 a64 * * m31 m42 m53 m64 * * * * Array elements marked * are not used by the routine. * * ===================================================================== * * .. Parameters .. REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0 ) PARAMETER ( ZERO = 0.0E+0 ) INTEGER NBMAX, LDWORK PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) * .. * .. Local Scalars .. INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP, $ JU, KM, KV, NB, NW * .. * .. Local Arrays .. REAL WORK13( LDWORK, NBMAX ), $ WORK31( LDWORK, NBMAX ) * .. * .. External Functions .. INTEGER ILAENV, ISAMAX EXTERNAL ILAENV, ISAMAX * .. * .. External Subroutines .. EXTERNAL SCOPY, SDBTF2, SGEMM, SGER, SSCAL, $ SSWAP, STRSM, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * KV is the number of superdiagonals in the factor U * KV = KU * * Test the input parameters. * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( KL.LT.0 ) THEN INFO = -3 ELSE IF( KU.LT.0 ) THEN INFO = -4 ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN INFO = -6 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SDBTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Determine the block size for this environment * NB = ILAENV( 1, 'SDBTRF', ' ', M, N, KL, KU ) * * The block size must not exceed the limit set by the size of the * local arrays WORK13 and WORK31. * NB = MIN( NB, NBMAX ) * IF( NB.LE.1 .OR. NB.GT.KL ) THEN * * Use unblocked code * CALL SDBTF2( M, N, KL, KU, AB, LDAB, INFO ) ELSE * * Use blocked code * * Zero the superdiagonal elements of the work array WORK13 * DO 20 J = 1, NB DO 10 I = 1, J - 1 WORK13( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * * Zero the subdiagonal elements of the work array WORK31 * DO 40 J = 1, NB DO 30 I = J + 1, NB WORK31( I, J ) = ZERO 30 CONTINUE 40 CONTINUE * * JU is the index of the last column affected by the current * stage of the factorization * JU = 1 * DO 180 J = 1, MIN( M, N ), NB JB = MIN( NB, MIN( M, N )-J+1 ) * * The active part of the matrix is partitioned * * A11 A12 A13 * A21 A22 A23 * A31 A32 A33 * * Here A11, A21 and A31 denote the current block of JB columns * which is about to be factorized. The number of rows in the * partitioning are JB, I2, I3 respectively, and the numbers * of columns are JB, J2, J3. The superdiagonal elements of A13 * and the subdiagonal elements of A31 lie outside the band. * I2 = MIN( KL-JB, M-J-JB+1 ) I3 = MIN( JB, M-J-KL+1 ) * * J2 and J3 are computed after JU has been updated. * * Factorize the current block of JB columns * DO 80 JJ = J, J + JB - 1 * * Find pivot and test for singularity. KM is the number of * subdiagonal elements in the current column. * KM = MIN( KL, M-JJ ) JP = 1 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) * * Compute multipliers * CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), $ 1 ) * * Update trailing submatrix within the band and within * the current block. JM is the index of the last column * which needs to be updated. * JM = MIN( JU, J+JB-1 ) IF( JM.GT.JJ ) THEN CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, $ AB( KV, JJ+1 ), LDAB-1, $ AB( KV+1, JJ+1 ), LDAB-1 ) END IF END IF * * Copy current column of A31 into the work array WORK31 * NW = MIN( JJ-J+1, I3 ) IF( NW.GT.0 ) $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, $ WORK31( 1, JJ-J+1 ), 1 ) 80 CONTINUE IF( J+JB.LE.N ) THEN * * Apply the row interchanges to the other blocks. * J2 = MIN( JU-J+1, KV ) - JB J3 = MAX( 0, JU-J-KV+1 ) * * Update the relevant part of the trailing submatrix * IF( J2.GT.0 ) THEN * * Update A12 * CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1 ) * IF( I2.GT.0 ) THEN * * Update A22 * CALL SGEMM( 'No transpose', 'No transpose', I2, J2, $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, $ AB( KV+1, J+JB ), LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A32 * CALL SGEMM( 'No transpose', 'No transpose', I3, J2, $ JB, -ONE, WORK31, LDWORK, $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) END IF END IF * IF( J3.GT.0 ) THEN * * Copy the lower triangle of A13 into the work array * WORK13 * DO 130 JJ = 1, J3 DO 120 II = JJ, JB WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 120 CONTINUE 130 CONTINUE * * Update A13 in the work array * CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, $ WORK13, LDWORK ) * IF( I2.GT.0 ) THEN * * Update A23 * CALL SGEMM( 'No transpose', 'No transpose', I2, J3, $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), $ LDAB-1 ) END IF * IF( I3.GT.0 ) THEN * * Update A33 * CALL SGEMM( 'No transpose', 'No transpose', I3, J3, $ JB, -ONE, WORK31, LDWORK, WORK13, $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) END IF * * Copy the lower triangle of A13 back into place * DO 150 JJ = 1, J3 DO 140 II = JJ, JB AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 140 CONTINUE 150 CONTINUE END IF ELSE END IF * * copy the upper triangle of A31 back into place * DO 170 JJ = J + JB - 1, J, -1 * * Copy the current column of A31 back into place * NW = MIN( I3, JJ-J+1 ) IF( NW.GT.0 ) $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1, $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 170 CONTINUE 180 CONTINUE END IF * RETURN * * End of SDBTRF * END scalapack-1.8.0/SRC/pclacp2.f0000640000175000017500000003745510363532303015454 0ustar muammarmuammar SUBROUTINE PCLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCLACP2 copies all or part of a distributed matrix A to another * distributed matrix B. No communication is performed, PCLACP2 * performs a local copy sub( A ) := sub( B ), where sub( A ) denotes * A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1). * PCLACP2 requires that only dimension of the matrix operands is * distributed. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies the part of the distributed matrix sub( A ) to be * copied: * = 'U': Upper triangular part is copied; the strictly * lower triangular part of sub( A ) is not referenced; * = 'L': Lower triangular part is copied; the strictly * upper triangular part of sub( A ) is not referenced; * Otherwise: All of the matrix sub( A ) is copied. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix sub( A ) * to be copied from. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local output) COMPLEX pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array * contains on exit the local pieces of the distributed matrix * sub( B ) set as follows: * * if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=j, 1<=j<=N; * if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * j<=i<=M, 1<=j<=N; * otherwise, B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1), * 1<=i<=M, 1<=j<=N. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW, $ ICOFFA, IIA, IIAA, IIB, IIBB, IIBEGA, IIBEGB, $ IIENDA, IINXTA, IINXTB, ILEFT, IRIGHT, IROFFA, $ ITOP, JJA, JJAA, JJB, JJBB, JJBEGA, JJBEGB, $ JJENDA, JJNXTA, JJNXTB, LDA, LDB, MBA, MP, $ MPAA, MYCOL, MYDIST, MYROW, NBA, NPCOL, NPROW, $ NQ, NQAA, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CLACPY, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) LDA = DESCA( LLD_ ) IROFFA = MOD( IA-1, MBA ) ICOFFA = MOD( JA-1, NBA ) LDB = DESCB( LLD_ ) * IF( N.LE.( NBA-ICOFFA ) ) THEN * * It is assumed that the local columns JJA:JJA+N-1 of the matrix * A are in the same process column (IACOL). * * N * JJA JJA+N-1 * / --------------------- \ * IROFFA| | | | * \ |...................| | ( IAROW ) * IIA |x | | MBA = DESCA( MB_ ) * | x | | * |--x----------------| / * | x | * | x | ITOP * | x | | * | x | /-------\ * |-------x-----------| |-------x-----------| * | x | | x | * | x | | x | * | x | | x | * | x | | x | * |------------x------| |------------x------| * | x | \____________/ * | x | | * | x | IBASE * | x | * |-----------------x-| Local picture * | x| * | | * | | * | | * |-------------------| * | | * . . * . . * . (IACOL) . * IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) IF( MP.LE.0 ) $ RETURN IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) ITOP = MYDIST * MBA - IROFFA * IF( LSAME( UPLO, 'U' ) ) THEN * ITOP = MAX( 0, ITOP ) IIBEGA = IIA IIENDA = IIA + MP - 1 IINXTA = MIN( ICEIL( IIBEGA, MBA ) * MBA, IIENDA ) IIBEGB = IIB IINXTB = IIBEGB + IINXTA - IIBEGA * 10 CONTINUE IF( ( N-ITOP ).GT.0 ) THEN CALL CLACPY( UPLO, IINXTA-IIBEGA+1, N-ITOP, $ A( IIBEGA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBEGB+(JJB+ITOP-1)*LDB ), LDB ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IIBEGA = IINXTA + 1 IINXTA = MIN( IINXTA+MBA, IIENDA ) IIBEGB = IINXTB + 1 IINXTB = IIBEGB + IINXTA - IIBEGA GO TO 10 END IF * ELSE IF( LSAME( UPLO, 'L' ) ) THEN * MPAA = MP IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( MAX( 0, ITOP ), N ) * 20 CONTINUE IF( JJAA.LE.( JJA+N-1 ) ) THEN HEIGHT = IBASE - ITOP CALL CLACPY( 'All', MPAA, ITOP-JJAA+JJA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL CLACPY( UPLO, MPAA, HEIGHT, $ A( IIAA+(JJA+ITOP-1)*LDA ), LDA, $ B( IIBB+(JJB+ITOP-1)*LDB ), LDB ) MPAA = MAX( 0, MPAA - HEIGHT ) IIAA = IIAA + HEIGHT JJAA = JJA + IBASE IIBB = IIBB + HEIGHT JJBB = JJB + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBA - IROFFA IBASE = MIN( ITOP + MBA, N ) ITOP = MIN( ITOP, N ) GO TO 20 END IF * ELSE * CALL CLACPY( 'All', MP, N, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * ELSE IF( M.LE.( MBA-IROFFA ) ) THEN * * It is assumed that the local rows IIA:IIA+M-1 of the matrix A * are in the same process row (IAROW). * * ICOFFA * / \JJA * IIA ------------------ .... -------- * | .x | | | / | | \ * | . x | | | ILEFT| | | | * | . x | | | | | | * | . x | | \ x | | * | . |x | | |x | | IRIGHT * | . | x | | | x | | * (IAROW) | . | x | | | x | | * | . | x| | | x| | * | . | x | | x / * | . | |x | | | * | . | | x | | | * | . | | x | | | * | . | | x| | | * IIA+M-1 ------------------ .... ------- * NB_A * (IACOL) Local picture * IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) IF( NQ.LE.0 ) $ RETURN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBA - ICOFFA * IF( LSAME( UPLO, 'L' ) ) THEN * ILEFT = MAX( 0, ILEFT ) JJBEGA = JJA JJENDA = JJA + NQ - 1 JJNXTA = MIN( ICEIL( JJBEGA, NBA ) * NBA, JJENDA ) JJBEGB = JJB JJNXTB = JJBEGB + JJNXTA - JJBEGA * 30 CONTINUE IF( ( M-ILEFT ).GT.0 ) THEN CALL CLACPY( UPLO, M-ILEFT, JJNXTA-JJBEGA+1, $ A( IIA+ILEFT+(JJBEGA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBEGB-1)*LDB ), LDB ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA JJBEGA = JJNXTA +1 JJNXTA = MIN( JJNXTA+NBA, JJENDA ) JJBEGB = JJNXTB +1 JJNXTB = JJBEGB + JJNXTA - JJBEGA GO TO 30 END IF * ELSE IF( LSAME( UPLO, 'U' ) ) THEN * NQAA = NQ IIAA = IIA JJAA = JJA IIBB = IIB JJBB = JJB IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( MAX( 0, ILEFT ), M ) * 40 CONTINUE IF( IIAA.LE.( IIA+M-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL CLACPY( 'All', ILEFT-IIAA+IIA, NQAA, $ A( IIAA+(JJAA-1)*LDA ), LDA, $ B( IIBB+(JJBB-1)*LDB ), LDB ) CALL CLACPY( UPLO, WIDE, NQAA, $ A( IIA+ILEFT+(JJAA-1)*LDA ), LDA, $ B( IIB+ILEFT+(JJBB-1)*LDB ), LDB ) NQAA = MAX( 0, NQAA - WIDE ) IIAA = IIA + IRIGHT JJAA = JJAA + WIDE IIBB = IIB + IRIGHT JJBB = JJBB + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBA - ICOFFA IRIGHT = MIN( ILEFT + NBA, M ) ILEFT = MIN( ILEFT, M ) GO TO 40 END IF * ELSE * CALL CLACPY( 'All', M, NQ, A( IIA+(JJA-1)*LDA ), $ LDA, B( IIB+(JJB-1)*LDB ), LDB ) * END IF * END IF * END IF * RETURN * * End of PCLACP2 * END scalapack-1.8.0/SRC/pzposv.f0000640000175000017500000002433010363532303015455 0ustar muammarmuammar SUBROUTINE PZPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), B( * ) * .. * * Purpose * ======= * * PZPOSV computes the solution to a complex system of linear equations * * sub( A ) * X = sub( B ), * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is an N-by-N * hermitian distributed positive definite matrix and X and sub( B ) * denoting B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS distributed * matrices. * * The Cholesky decomposition is used to factor sub( A ) as * * sub( A ) = U**H * U, if UPLO = 'U', or * * sub( A ) = L * L**H, if UPLO = 'L', * * where U is an upper triangular matrix and L is a lower triangular * matrix. The factored form of sub( A ) is then used to solve the * system of equations. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if INFO = 0, this array contains the * local pieces of the factor U or L from the Cholesky factori- * zation sub( A ) = U**H*U or L*L**H. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_B,LOC(JB+NRHS-1)). * On entry, the local pieces of the right hand sides distribu- * ted matrix sub( B ). On exit, if INFO = 0, sub( B ) is over- * written with the solution distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed, and the * solution has not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICOFFA, ICTXT, IROFFA, IROFFB, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA, $ PZPOTRF, PZPOTRS * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1000+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZPOSV', -INFO ) RETURN END IF * * Compute the Cholesky factorization sub( A ) = U'*U or L*L'. * CALL PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.EQ.0 ) THEN * * Solve the system sub( A ) * X = sub( B ) overwriting sub( B ) * with X. * CALL PZPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * END IF * RETURN * * End of PZPOSV * END scalapack-1.8.0/SRC/pzungqr.f0000640000175000017500000003077310363532303015632 0ustar muammarmuammar SUBROUTINE PZUNGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNGQR generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal columns, which is defined as * the first N columns of a product of K elementary reflectors of order * M * * Q = H(1) H(2) . . . H(k) * * as returned by PZGEQRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. M >= N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. N >= K >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the j-th column must contain the vector which * defines the elementary reflector H(j), JA <= j <= JA+K-1, as * returned by PZGEQRF in the K columns of its distributed * matrix argument A(IA:*,JA:JA+K-1). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+K-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQRF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NB_A * ( NqA0 + MpA0 + NB_A ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, ICTXT, IINFO, IPW, J, JB, JL, $ JN, LWMIN, MPA0, MYCOL, MYROW, NPCOL, NPROW, $ NQA0 * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZLASET, PZUNG2R * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( NB_ ) * ( MPA0 + NQA0 + DESCA( NB_ ) ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.GT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.N ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF IDUM1( 1 ) = K IDUM2( 1 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 10 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNGQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.LE.0 ) $ RETURN * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) JL = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * CALL PZLASET( 'All', JL-JA, JA+N-JL, ZERO, ZERO, A, IA, JL, $ DESCA ) * * Use unblocked code for the last or only block. * CALL PZUNG2R( M-JL+JA, JA+N-JL, JA+K-JL, A, IA+JL-JA, JL, DESCA, $ TAU, WORK, LWORK, IINFO ) * * Is there at least one block of columns to loop over ? * IF( JL.GT.JN+1 ) THEN * * Use blocked code * DO 10 J = JL-DESCA( NB_ ), JN+1, -DESCA( NB_ ) JB = MIN( DESCA( NB_ ), JA+N-J ) I = IA + J - JA * IF( J+JB.LE.JA+N-1 ) THEN * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PZLARFT( 'Forward', 'Columnwise', M-I+IA, JB, A, I, $ J, DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(i:ia+m-1,j+jb:ja+n-1) from the left * CALL PZLARFB( 'Left', 'No transpose', 'Forward', $ 'Columnwise', M-I+IA, N-J-JB+JA, JB, A, I, $ J, DESCA, WORK, A, I, J+JB, DESCA, $ WORK( IPW ) ) END IF * * Apply H to rows i:ia+m-1 of current block * CALL PZUNG2R( M-I+IA, JB, JB, A, I, J, DESCA, TAU, WORK, $ LWORK, IINFO ) * * Set rows ia:i-1 of current block to zero * CALL PZLASET( 'All', I-IA, JB, ZERO, ZERO, A, IA, J, DESCA ) * 10 CONTINUE * END IF * * Handle first block separately * IF( JL.GT.JA ) THEN * JB = JN - JA + 1 * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PZLARFT( 'Forward', 'Columnwise', M, JB, A, IA, JA, DESCA, $ TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:ia+m-1,ja+jb:ja+n-1) from the left * CALL PZLARFB( 'Left', 'No transpose', 'Forward', 'Columnwise', $ M, N-JB, JB, A, IA, JA, DESCA, WORK, A, IA, $ JA+JB, DESCA, WORK( IPW ) ) * * Apply H to rows ia:ia+m-1 of current block * CALL PZUNG2R( M, JB, JB, A, IA, JA, DESCA, TAU, WORK, LWORK, $ IINFO ) * END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNGQR * END scalapack-1.8.0/SRC/pdpotrs.f0000640000175000017500000002374510363532303015620 0ustar muammarmuammar SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ) * .. * * Purpose * ======= * * PDPOTRS solves a system of linear equations * * sub( A ) * X = sub( B ) * A(IA:IA+N-1,JA:JA+N-1)*X = B(IB:IB+N-1,JB:JB+NRHS-1) * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a N-by-N * symmetric positive definite distributed matrix using the Cholesky * factorization sub( A ) = U**T*U or L*L**T computed by PDPOTRF. * sub( B ) denotes the distributed matrix B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) DOUBLE PRECISION pointer into local memory to * an array of dimension (LLD_A, LOCc(JA+N-1)). On entry, this * array contains the factors L or U from the Cholesky facto- * rization sub( A ) = L*L**T or U**T*U, as computed by PDPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * the local pieces of the right hand sides sub( B ). * On exit, this array contains the local pieces of the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICTXT, IROFFA, IROFFB, ICOFFA, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 11, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( UPPER ) THEN * * Solve sub( A ) * X = sub( B ) where sub( A ) = U'*U. * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A ) *X = sub( B ) where sub( A ) = L*L'. * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PDTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) END IF * RETURN * * End of PDPOTRS * END scalapack-1.8.0/SRC/pdlaqsy.f0000640000175000017500000003207710363532303015600 0ustar muammarmuammar SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N DOUBLE PRECISION AMAX, SCOND * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), SC( * ), SR( * ) * .. * * Purpose * ======= * * PDLAQSY equilibrates a symmetric distributed matrix * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the * vectors SR and SC. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric distributed matrix sub( A ) is to be referenced: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (input/output) DOUBLE PRECISION pointer into the local * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)). * On entry, the local pieces of the distributed symmetric * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper * triangular part of sub( A ) contains the upper triangular * part of the matrix, and the strictly lower triangular part * of sub( A ) is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular part of the matrix, and the strictly upper trian- * gular part of sub( A ) is not referenced. * On exit, if EQUED = 'Y', the equilibrated matrix: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * SR (local input) DOUBLE PRECISION array, dimension LOCr(M_A) * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned * with the distributed matrix A, and replicated across every * process column. SR is tied to the distributed matrix A. * * SC (local input) DOUBLE PRECISION array, dimension LOCc(N_A) * The scale factors for sub( A ). SC is aligned with the dis- * tributed matrix A, and replicated down every process row. * SC is tied to the distributed matrix A. * * SCOND (global input) DOUBLE PRECISION * Ratio of the smallest SR(i) (respectively SC(j)) to the * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1 * and JA <= j <= JA+N-1. * * AMAX (global input) DOUBLE PRECISION * Absolute value of the largest distributed submatrix entry. * * EQUED (output) CHARACTER*1 * Specifies whether or not equilibration was done. * = 'N': No equilibration. * = 'Y': Equilibration was done, i.e., sub( A ) has been re- * placed by: * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)). * * Internal Parameters * =================== * * THRESH is a threshold value used to decide if scaling should be done * based on the ratio of the scaling factors. If SCOND < THRESH, * scaling is done. * * LARGE and SMALL are threshold values used to decide if scaling should * be done based on the absolute size of the largest matrix element. * If AMAX > LARGE or AMAX < SMALL, scaling is done. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, THRESH PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J, $ JB, JJ, JJA, JN, KK, LDA, LL, MYCOL, MYROW, NP, $ NPCOL, NPROW DOUBLE PRECISION CJ, LARGE, SMALL * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC DOUBLE PRECISION PDLAMCH EXTERNAL ICEIL, LSAME, NUMROC, PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' RETURN END IF * * Get grid parameters and compute local indexes * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) * * Initialize LARGE and SMALL. * SMALL = PDLAMCH( ICTXT, 'Safe minimum' ) / $ PDLAMCH( ICTXT, 'Precision' ) LARGE = ONE / SMALL * IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN * * No equilibration * EQUED = 'N' * ELSE * II = IIA JJ = JJA JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 * * Replace A by diag(S) * A * diag(S). * IF( LSAME( UPLO, 'U' ) ) THEN * * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 20 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 10 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 10 CONTINUE IOFFA = IOFFA + LDA 20 CONTINUE ELSE IOFFA = IOFFA + JB*LDA END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 70 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 40 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 30 KK = IIA, II+LL-JJ+1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 30 CONTINUE IOFFA = IOFFA + LDA 40 CONTINUE ELSE DO 60 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 50 KK = IIA, II-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 50 CONTINUE IOFFA = IOFFA + LDA 60 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 70 CONTINUE * ELSE * * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored. * Handle first block separately * IROFF = MOD( IA-1, DESCA( MB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFF * IOFFA = (JJ-1)*LDA IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 90 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 80 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 80 CONTINUE IOFFA = IOFFA + LDA 90 CONTINUE ELSE DO 110 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 100 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 100 CONTINUE IOFFA = IOFFA + LDA 110 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining block of columns * DO 160 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYCOL.EQ.IACOL ) THEN IF( MYROW.EQ.IAROW ) THEN DO 130 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 120 KK = II+LL-JJ, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 120 CONTINUE IOFFA = IOFFA + LDA 130 CONTINUE ELSE DO 150 LL = JJ, JJ + JB -1 CJ = SC( LL ) DO 140 KK = II, IIA+NP-1 A( IOFFA + KK ) = CJ*SR( KK )*A( IOFFA + KK ) 140 CONTINUE IOFFA = IOFFA + LDA 150 CONTINUE END IF JJ = JJ + JB END IF * IF( MYROW.EQ.IAROW ) $ II = II + JB IAROW = MOD( IAROW+1, NPROW ) IACOL = MOD( IACOL+1, NPCOL ) * 160 CONTINUE * END IF * EQUED = 'Y' * END IF * RETURN * * End of PDLAQSY * END scalapack-1.8.0/SRC/pdpttrf.f0000640000175000017500000010401110363532303015572 0ustar muammarmuammar SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION AF( * ), D( * ), E( * ), WORK( * ) * .. * * * Purpose * ======= * * PDPTTRF computes a Cholesky factorization * of an N-by-N real tridiagonal * symmetric positive definite distributed matrix * A(1:N, JA:JA+N-1). * Reordering is used to increase parallelism in the factorization. * This reordering results in factors that are DIFFERENT from those * produced by equivalent sequential codes. These factors cannot * be used directly by users; however, they can be used in * subsequent calls to PDPTTRS to solve linear systems. * * The factorization has the form * * P A(1:N, JA:JA+N-1) P^T = U' D U or * * P A(1:N, JA:JA+N-1) P^T = L D L', * * where U is a tridiagonal upper triangular matrix and L is tridiagonal * lower triangular, and P is a permutation matrix. * * ===================================================================== * * Arguments * ========= * * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * E (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDPTTRF and this is stored in AF. If a linear system * is to be solved using PDPTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * (NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 8*NPCOL * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K<=NPROCS, the submatrix stored on processor * INFO and factored locally was not * positive definite, and * the factorization was not completed. * If INFO = K>NPROCS, the submatrix stored on processor * INFO-NPROCS representing interactions with other * processors was not * positive definite, * and the factorization was not completed. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT, $ ICTXT_NEW, ICTXT_SAVE, IDUM3, INT_TEMP, JA_NEW, $ LAF_MIN, LEVEL_DIST, LLDA, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 7, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ DGERV2D, DGESD2D, DPTTRF, DPTTRSV, DTRRV2D, $ DTRSD2D, GLOBCHK, IGAMX2D, IGEBR2D, IGEBS2D, $ PXERBLA, RESHAPE * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 5*100+2 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LWORK.LT.-1 ) THEN INFO = -9 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -1 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 5*100+6 ) END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 5*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: NB too small', -INFO ) RETURN END IF * * * Check auxiliary storage size * LAF_MIN = ( 12*NPCOL+3*NB ) * IF( LAF.LT.LAF_MIN ) THEN INFO = -7 * put minimum value of laf into AF( 1 ) AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPTTRF: auxiliary storage error ', $ -INFO ) RETURN END IF * * Check worksize * WORK_SIZE_MIN = 8*NPCOL * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -9 CALL PXERBLA( ICTXT, 'PDPTTRF: worksize error ', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 7, 1 ) = DESCA( 5 ) PARAM_CHECK( 6, 1 ) = DESCA( 4 ) PARAM_CHECK( 5, 1 ) = DESCA( 3 ) PARAM_CHECK( 4, 1 ) = DESCA( 1 ) PARAM_CHECK( 3, 1 ) = JA PARAM_CHECK( 2, 1 ) = N PARAM_CHECK( 1, 1 ) = IDUM3 * PARAM_CHECK( 7, 2 ) = 505 PARAM_CHECK( 6, 2 ) = 504 PARAM_CHECK( 5, 2 ) = 503 PARAM_CHECK( 4, 2 ) = 501 PARAM_CHECK( 3, 2 ) = 4 PARAM_CHECK( 2, 2 ) = 1 PARAM_CHECK( 1, 2 ) = 9 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 7, PARAM_CHECK, 7, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 90 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * Zero out space for fillin * DO 10 I = 1, LAF_MIN AF( I ) = ZERO 10 CONTINUE * * Begin main code * * ******************************************************************** * PHASE 1: Local computation phase. ******************************************************************** * * IF( MYCOL.LT.NP-1 ) THEN * Transfer last triangle D_i of local matrix to next processor * which needs it to calculate fillin due to factorization of * its main (odd) block A_i. * Overlap the send with the factorization of A_i. * CALL DTRSD2D( ICTXT, 'U', 'N', 1, 1, $ E( PART_OFFSET+ODD_SIZE+1 ), LLDA-1, 0, MYCOL+1 ) * END IF * * * Factor main partition A_i = L_i {L_i}^T in each processor * Or A_i = {U_i}^T {U_i} if E is the upper superdiagonal * CALL DPTTRF( ODD_SIZE, D( PART_OFFSET+1 ), E( PART_OFFSET+1 ), $ INFO ) * IF( INFO.NE.0 ) THEN INFO = MYCOL + 1 GO TO 20 END IF * * IF( MYCOL.LT.NP-1 ) THEN * Apply factorization to odd-even connection block B_i * * * Perform the triangular system solve {L_i}{{B'}_i}^T = {B_i}^T * by dividing B_i by diagonal element * E( PART_OFFSET+ODD_SIZE ) = E( PART_OFFSET+ODD_SIZE ) / $ D( PART_OFFSET+ODD_SIZE ) * * * * Compute contribution to diagonal block(s) of reduced system. * {C'}_i = {C_i}-{{B'}_i}{{B'}_i}^T * D( PART_OFFSET+ODD_SIZE+1 ) = D( PART_OFFSET+ODD_SIZE+1 ) - $ D( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE ) ) ) * END IF * End of "if ( MYCOL .lt. NP-1 )..." loop * * 20 CONTINUE * If the processor could not locally factor, it jumps here. * IF( MYCOL.NE.0 ) THEN * * Receive previously transmitted matrix section, which forms * the right-hand-side for the triangular solve that calculates * the "spike" fillin. * * CALL DTRRV2D( ICTXT, 'U', 'N', 1, 1, AF( 1 ), ODD_SIZE, 0, $ MYCOL-1 ) * IF( INFO.EQ.0 ) THEN * * Calculate the "spike" fillin, ${L_i} {{G}_i}^T = {D_i}$ . * CALL DPTTRSV( 'N', ODD_SIZE, INT_ONE, D( PART_OFFSET+1 ), $ E( PART_OFFSET+1 ), AF( 1 ), ODD_SIZE, INFO ) * * Divide by D * DO 30 I = 1, ODD_SIZE AF( I ) = AF( I ) / D( PART_OFFSET+I ) 30 CONTINUE * * * Calculate the update block for previous proc, E_i = G_i{G_i}^T * * * Since there is no element-by-element vector multiplication in * the BLAS, this loop must be hardwired in without a BLAS call * INT_TEMP = ODD_SIZE*INT_ONE + 2 + 1 AF( INT_TEMP ) = 0 * DO 40 I = 1, ODD_SIZE AF( INT_TEMP ) = AF( INT_TEMP ) - $ D( PART_OFFSET+I )*( AF( I )* $ ( AF( I ) ) ) 40 CONTINUE * * * Initiate send of E_i to previous processor to overlap * with next computation. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+3 ), $ INT_ONE, 0, MYCOL-1 ) * * IF( MYCOL.LT.NP-1 ) THEN * * Calculate off-diagonal block(s) of reduced system. * Note: for ease of use in solution of reduced system, store * L's off-diagonal block in transpose form. * {F_i}^T = {H_i}{{B'}_i}^T * AF( ODD_SIZE+1 ) = -D( PART_OFFSET+ODD_SIZE )* $ ( E( PART_OFFSET+ODD_SIZE )* $ AF( ODD_SIZE ) ) * * END IF * END IF * End of "if ( MYCOL .ne. 0 )..." * END IF * End of "if (info.eq.0) then" * * * Check to make sure no processors have found errors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * IF( INFO.NE.0 ) THEN GO TO 80 END IF * No errors found, continue * * ******************************************************************** * PHASE 2: Formation and factorization of Reduced System. ******************************************************************** * * Gather up local sections of reduced system * * * The last processor does not participate in the factorization of * the reduced system, having sent its E_i already. IF( MYCOL.EQ.NPCOL-1 ) THEN GO TO 70 END IF * * Initiate send of off-diag block(s) to overlap with next part. * Off-diagonal block needed on neighboring processor to start * algorithm. * IF( ( MOD( MYCOL+1, 2 ).EQ.0 ) .AND. ( MYCOL.GT.0 ) ) THEN * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, MYCOL-1 ) * END IF * * Copy last diagonal block into AF storage for subsequent * operations. * AF( ODD_SIZE+2 ) = DBLE( D( PART_OFFSET+ODD_SIZE+1 ) ) * * Receive cont. to diagonal block that is stored on this proc. * IF( MYCOL.LT.NPCOL-1 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL+1 ) * * Add contribution to diagonal block * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + AF( ODD_SIZE+3 ) * END IF * * * ************************************* * Modification Loop * * The distance for sending and receiving for each level starts * at 1 for the first level. LEVEL_DIST = 1 * * Do until this proc is needed to modify other procs' equations * 50 CONTINUE IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 ).NE.0 ) $ GO TO 60 * * Receive and add contribution to diagonal block from the left * IF( MYCOL-LEVEL_DIST.GE.0 ) THEN CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * * Receive and add contribution to diagonal block from the right * IF( MYCOL+LEVEL_DIST.LT.NPCOL-1 ) THEN CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * AF( ODD_SIZE+2 ) = AF( ODD_SIZE+2 ) + WORK( 1 ) * END IF * LEVEL_DIST = LEVEL_DIST*2 * GO TO 50 60 CONTINUE * [End of GOTO Loop] * * * ********************************* * Calculate and use this proc's blocks to modify other procs'... IF( AF( ODD_SIZE+2 ).EQ.ZERO ) THEN INFO = NPCOL + MYCOL END IF * * **************************************************************** * Receive offdiagonal block from processor to right. * If this is the first group of processors, the receive comes * from a different processor than otherwise. * IF( LEVEL_DIST.EQ.1 ) THEN COMM_PROC = MYCOL + 1 * * Move block into place that it will be expected to be for * calcs. * AF( ODD_SIZE+3 ) = AF( ODD_SIZE+1 ) * ELSE COMM_PROC = MYCOL + LEVEL_DIST / 2 END IF * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+1 ), $ INT_ONE, 0, COMM_PROC ) * IF( INFO.EQ.0 ) THEN * * * Modify upper off_diagonal block with diagonal block * * AF( ODD_SIZE+1 ) = AF( ODD_SIZE+1 ) / AF( ODD_SIZE+2 ) * END IF * End of "if ( info.eq.0 ) then" * * Calculate contribution from this block to next diagonal block * WORK( 1 ) = -ONE*AF( ODD_SIZE+1 )*AF( ODD_SIZE+2 )* $ ( AF( ODD_SIZE+1 ) ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL+LEVEL_DIST ) * END IF * End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..." * * * **************************************************************** * Receive off_diagonal block from left and use to finish with this * processor. * IF( ( MYCOL / LEVEL_DIST.GT.0 ) .AND. $ ( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-1 ) ) THEN * IF( LEVEL_DIST.GT.1 ) THEN * * Receive offdiagonal block(s) from proc level_dist/2 to the * left * CALL DGERV2D( ICTXT, INT_ONE, INT_ONE, AF( ODD_SIZE+2+1 ), $ INT_ONE, 0, MYCOL-LEVEL_DIST / 2 ) * END IF * * IF( INFO.EQ.0 ) THEN * * Use diagonal block(s) to modify this offdiagonal block * AF( ODD_SIZE+3 ) = ( AF( ODD_SIZE+3 ) ) / AF( ODD_SIZE+2 ) * END IF * End of "if( info.eq.0 ) then" * * Use offdiag block(s) to calculate modification to diag block * of processor to the left * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 )* $ ( AF( ODD_SIZE+3 ) ) * * Send contribution to diagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, 0, $ MYCOL-LEVEL_DIST ) * * ******************************************************* * IF( MYCOL / LEVEL_DIST.LE.( NPCOL-1 ) / LEVEL_DIST-2 ) THEN * * Decide which processor offdiagonal block(s) goes to * IF( ( MOD( MYCOL / ( 2*LEVEL_DIST ), 2 ) ).EQ.0 ) THEN COMM_PROC = MYCOL + LEVEL_DIST ELSE COMM_PROC = MYCOL - LEVEL_DIST END IF * * Use offdiagonal blocks to calculate offdiag * block to send to neighboring processor. Depending * on circumstances, may need to transpose the matrix. * WORK( 1 ) = -ONE*AF( ODD_SIZE+3 )*AF( ODD_SIZE+2 )* $ AF( ODD_SIZE+1 ) * * Send contribution to offdiagonal block's owning processor. * CALL DGESD2D( ICTXT, INT_ONE, INT_ONE, WORK( 1 ), INT_ONE, $ 0, COMM_PROC ) * END IF * END IF * End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..." * 70 CONTINUE * * 80 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 90 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * Make INFO consistent across processors * CALL IGAMX2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, INFO, INFO, -1, 0, $ 0 ) * IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'A', ' ', 1, 1, INFO, 1 ) ELSE CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * * RETURN * * End of PDPTTRF * END scalapack-1.8.0/SRC/pslacp3.f0000640000175000017500000002772310363532303015472 0ustar muammarmuammar SUBROUTINE PSLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER I, II, JJ, LDB, M, REV * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), B( LDB, * ) * .. * * Purpose * ======= * * PSLACP3 is an auxiliary routine that copies from a global parallel * array into a local replicated array or vise versa. Notice that * the entire submatrix that is copied gets placed on one node or * more. The receiving node can be specified precisely, or all nodes * can receive, or just one row or column of nodes. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * M is the order of the square submatrix that is copied. * M >= 0. * Unchanged on exit * * I (global input) INTEGER * A(I,I) is the global location that the copying starts from. * Unchanged on exit. * * A (global input/output) REAL array, dimension * (DESCA(LLD_),*) * On entry, the parallel matrix to be copied into or from. * On exit, if REV=1, the copied data. * Unchanged on exit if REV=0. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/output) REAL array of size (LDB,M) * If REV=0, this is the global portion of the array * A(I:I+M-1,I:I+M-1). * If REV=1, this is the unchanged on exit. * * LDB (local input) INTEGER * The leading dimension of B. * * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) * receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data * If II>=0,JJ=-1, then all cols in row II receive the data * If II=-1,JJ=-1, then all nodes receive the data * If REV<>0, then II is the source row index for the node(s) * sending the replicated B. * * JJ (global input) INTEGER * Similar description as II above * * REV (global input) INTEGER * Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in * A) into the global A. * * Implemented by: G. Henry, May 1, 1997 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. INTEGER COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN, $ III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP, $ JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGEBR2D, SGEBS2D, SGERV2D, $ SGESD2D, INFOG1L * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * IF( M.LE.0 ) $ RETURN * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M DO 10 IDJ = 1, M B( IDI, IDJ ) = ZERO 10 CONTINUE 20 CONTINUE END IF * IFIN = I + M - 1 * IF( MOD( I+HBL, HBL ).NE.0 ) THEN ISTOP = MIN( I+HBL-MOD( I+HBL, HBL ), IFIN ) ELSE ISTOP = I END IF IDJ = I ISTOPJ = ISTOP IF( IDJ.LE.IFIN ) THEN 30 CONTINUE IDI = I ISTOPI = ISTOP IF( IDI.LE.IFIN ) THEN 40 CONTINUE ROW = MOD( ( IDI-1 ) / HBL, NPROW ) COL = MOD( ( IDJ-1 ) / HBL, NPCOL ) CALL INFOG1L( IDI, HBL, NPROW, ROW, 0, IROW1, ITMP ) IROW2 = NUMROC( ISTOPI, HBL, ROW, 0, NPROW ) CALL INFOG1L( IDJ, HBL, NPCOL, COL, 0, ICOL1, ITMP ) ICOL2 = NUMROC( ISTOPJ, HBL, COL, 0, NPCOL ) IF( ( MYROW.EQ.ROW ) .AND. ( MYCOL.EQ.COL ) ) THEN IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to everyone * IF( REV.EQ.0 ) THEN CALL SGEBS2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.NE.-1 ) ) THEN * * Send the message to Column MYCOL which better be JJ * IF( REV.EQ.0 ) THEN CALL SGEBS2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN * * Send the message to Row MYROW which better be II * IF( REV.EQ.0 ) THEN CALL SGEBS2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+ $ IROW1 ), LDA ) END IF END IF IF( ( II.NE.-1 ) .AND. ( JJ.NE.-1 ) .AND. $ ( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) ) THEN * * Recv/Send the message to (II,JJ) * IF( REV.EQ.0 ) THEN CALL SGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ A( ( ICOL1-1 )*LDA+IROW1 ), LDA, II, $ JJ ) ELSE CALL SGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, II, JJ ) END IF END IF IF( REV.EQ.0 ) THEN DO 60 JJJ = ICOL1, ICOL2 DO 50 III = IROW1, IROW2 B( IDI+III-IROW1+1-I, IDJ+JJJ-ICOL1+1-I ) $ = A( ( JJJ-1 )*LDA+III ) 50 CONTINUE 60 CONTINUE ELSE DO 80 JJJ = ICOL1, ICOL2 DO 70 III = IROW1, IROW2 A( ( JJJ-1 )*LDA+III ) = B( IDI+III-IROW1+1-I, $ IDJ+JJJ-ICOL1+1-I ) 70 CONTINUE 80 CONTINUE END IF ELSE IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL SGEBR2D( CONTXT, 'All', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL SGEBR2D( CONTXT, 'Col', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.-1 ) ) THEN IF( REV.EQ.0 ) THEN CALL SGEBR2D( CONTXT, 'Row', ' ', IROW2-IROW1+1, $ ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ), $ LDB, ROW, COL ) END IF END IF IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.MYCOL ) ) THEN IF( REV.EQ.0 ) THEN CALL SGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) ELSE CALL SGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1, $ B( IDI-I+1, IDJ-I+1 ), LDB, ROW, $ COL ) END IF END IF END IF IDI = ISTOPI + 1 ISTOPI = MIN( ISTOPI+HBL, IFIN ) IF( IDI.LE.IFIN ) $ GO TO 40 END IF IDJ = ISTOPJ + 1 ISTOPJ = MIN( ISTOPJ+HBL, IFIN ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF RETURN * * End of PSLACP3 * END scalapack-1.8.0/SRC/pstzrzf.f0000640000175000017500000003077710363532303015652 0ustar muammarmuammar SUBROUTINE PSTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper triangular form by means * of orthogonal transformations. * * The upper trapezoidal matrix sub( A ) is factored as * * sub( A ) = ( R 0 ) * Z, * * where Z is an N-by-N orthogonal matrix and R is an M-by-M upper * triangular matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the leading M-by-M * upper triangular part of sub( A ) contains the upper trian- * gular matrix R, and elements M+1 to N of the first M rows of * sub( A ), with the array TAU, represent the orthogonal matrix * Z as a product of M elementary reflectors. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) REAL, array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors. TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MB_A * ( Mp0 + Nq0 + MB_A ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The factorization is obtained by Householder's method. The kth * transformation matrix, Z( k ), which is used to introduce zeros into * the (m - k + 1)th row of sub( A ), is given in the form * * Z( k ) = ( I 0 ), * ( 0 T( k ) ) * * where * * T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), * ( 0 ) * ( z( k ) ) * * tau is a scalar and z( k ) is an ( n - m ) element vector. * tau and z( k ) are chosen to annihilate the elements of the kth row * of sub( A ). * * The scalar tau is returned in the kth element of TAU and the vector * u( k ) in the kth row of sub( A ), such that the elements of z( k ) * are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned * in the upper triangular part of sub( A ). * * Z is given by * * Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO PARAMETER ( ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IL, IN, IPW, $ IROFFA, J, JM1, L, LWMIN, MP0, MYCOL, MYROW, $ NPCOL, NPROW, NQ0 * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, INFOG1L, PCHK1MAT, $ PSLATRZ, PSLARZB, PSLARZT, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER ICEIL, INDXG2P, NUMROC EXTERNAL ICEIL, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = DESCA( MB_ ) * ( MP0 + NQ0 + DESCA( MB_ ) ) * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -9 END IF END IF IF( LQUERY ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 9 CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSTZRZF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * IF( M.EQ.N ) THEN * CALL INFOG1L( IA, DESCA( MB_ ), NPROW, MYROW, DESCA( RSRC_ ), $ IIA, IAROW ) IF( MYROW.EQ.IAROW ) $ MP0 = MP0 - IROFFA DO 10 I = IIA, IIA+MP0-1 TAU( I ) = ZERO 10 CONTINUE * ELSE * L = N-M JM1 = JA + MIN( M+1, N ) - 1 IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) IL = MAX( ( (IA+M-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * * Use blocked code initially * DO 20 I = IL, IN+1, -DESCA( MB_ ) IB = MIN( IA+M-I, DESCA( MB_ ) ) J = JA + I - IA * * Compute the complete orthogonal factorization of the current * block A(i:i+ib-1,j:ja+n-1) * CALL PSLATRZ( IB, JA+N-J, L, A, I, J, DESCA, TAU, WORK ) * IF( I.GT.IA ) THEN * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PSLARZT( 'Backward', 'Rowwise', L, IB, A, I, JM1, $ DESCA, TAU, WORK, WORK( IPW ) ) * * Apply H to A(ia:i-1,j:ja+n-1) from the right * CALL PSLARZB( 'Right', 'No transpose', 'Backward', $ 'Rowwise', I-IA, JA+N-J, IB, L, A, I, JM1, $ DESCA, WORK, A, IA, J, DESCA, WORK( IPW ) ) END IF * 20 CONTINUE * * Use unblocked code to factor the last or only block * CALL PSLATRZ( IN-IA+1, N, N-M, A, IA, JA, DESCA, TAU, WORK ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSTZRZF * END scalapack-1.8.0/SRC/pchegs2.f0000640000175000017500000003755310363532303015462 0ustar muammarmuammar* * SUBROUTINE PCHEGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCHEGS2 reduces a complex Hermitian-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PCPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PCPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, HALF PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IIA, IIB, IOFFA, IOFFB, IROFFA, IROFFB, $ JJA, JJB, K, LDA, LDB, MYCOL, MYROW, NPCOL, $ NPROW REAL AKK, BKK COMPLEX CT * .. * .. External Subroutines .. EXTERNAL BLACS_EXIT, BLACS_GRIDINFO, CAXPY, CHER2, $ CHK1MAT, CLACGV, CSSCAL, CTRMV, CTRSV, INFOG2L, $ PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC MOD, REAL * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( N+ICOFFA.GT.DESCA( NB_ ) ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHEGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) $ RETURN * * Compute local information * LDA = DESCA( LLD_ ) LDB = DESCB( LLD_ ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB, $ IBROW, IBCOL ) * IF( IBTYPE.EQ.1 ) THEN * IF( UPPER ) THEN * IOFFA = IIA + JJA*LDA IOFFB = IIB + JJB*LDB * * Compute inv(U')*sub( A )*inv(U) * DO 10 K = 1, N * * Update the upper triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = REAL( A( IOFFA-LDA ) ) BKK = REAL( B( IOFFB-LDB ) ) AKK = AKK / BKK**2 A( IOFFA-LDA ) = AKK IF( K.LT.N ) THEN CALL CSSCAL( N-K, ONE / BKK, A( IOFFA ), LDA ) CT = -HALF*AKK CALL CLACGV( N-K, A( IOFFA ), LDA ) CALL CLACGV( N-K, B( IOFFB ), LDB ) CALL CAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL CHER2( UPLO, N-K, -CONE, A( IOFFA ), LDA, $ B( IOFFB ), LDB, A( IOFFA+1 ), LDA ) CALL CAXPY( N-K, CT, B( IOFFB ), LDB, A( IOFFA ), $ LDA ) CALL CLACGV( N-K, B( IOFFB ), LDB ) CALL CTRSV( UPLO, 'Conjugate transpose', 'Non-unit', $ N-K, B( IOFFB+1 ), LDB, A( IOFFA ), LDA ) CALL CLACGV( N-K, A( IOFFA ), LDA ) END IF * * A( IOFFA ) -> A( K, K+1 ) * B( IOFFB ) -> B( K, K+1 ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 10 CONTINUE * ELSE * IOFFA = IIA + 1 + ( JJA-1 )*LDA IOFFB = IIB + 1 + ( JJB-1 )*LDB * * Compute inv(L)*sub( A )*inv(L') * DO 20 K = 1, N * * Update the lower triangle of * A(ia+k-1:ia+n-a,ia+k-1:ia+n-1) * AKK = REAL( A( IOFFA-1 ) ) BKK = REAL( B( IOFFB-1 ) ) AKK = AKK / BKK**2 A( IOFFA-1 ) = AKK * IF( K.LT.N ) THEN CALL CSSCAL( N-K, ONE / BKK, A( IOFFA ), 1 ) CT = -HALF*AKK CALL CAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL CHER2( UPLO, N-K, -CONE, A( IOFFA ), 1, $ B( IOFFB ), 1, A( IOFFA+LDA ), LDA ) CALL CAXPY( N-K, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL CTRSV( UPLO, 'No transpose', 'Non-unit', N-K, $ B( IOFFB+LDB ), LDB, A( IOFFA ), 1 ) END IF * * A( IOFFA ) -> A( K+1, K ) * B( IOFFB ) -> B( K+1, K ) * IOFFA = IOFFA + LDA + 1 IOFFB = IOFFB + LDB + 1 * 20 CONTINUE * END IF * ELSE * IF( UPPER ) THEN * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute U*sub( A )*U' * DO 30 K = 1, N * * Update the upper triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = REAL( A( IOFFA+K-1 ) ) BKK = REAL( B( IOFFB+K-1 ) ) CALL CTRMV( UPLO, 'No transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), 1 ) CT = HALF*AKK CALL CAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL CHER2( UPLO, K-1, CONE, A( IOFFA ), 1, B( IOFFB ), $ 1, A( IIA+( JJA-1 )*LDA ), LDA ) CALL CAXPY( K-1, CT, B( IOFFB ), 1, A( IOFFA ), 1 ) CALL CSSCAL( K-1, BKK, A( IOFFA ), 1 ) A( IOFFA+K-1 ) = AKK*BKK**2 * * A( IOFFA ) -> A( 1, K ) * B( IOFFB ) -> B( 1, K ) * IOFFA = IOFFA + LDA IOFFB = IOFFB + LDB * 30 CONTINUE * ELSE * IOFFA = IIA + ( JJA-1 )*LDA IOFFB = IIB + ( JJB-1 )*LDB * * Compute L'*sub( A )*L * DO 40 K = 1, N * * Update the lower triangle of A(ia:ia+k-1,ja:ja+k-1) * AKK = REAL( A( IOFFA+( K-1 )*LDA ) ) BKK = REAL( B( IOFFB+( K-1 )*LDB ) ) CALL CLACGV( K-1, A( IOFFA ), LDA ) CALL CTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, $ B( IIB+( JJB-1 )*LDB ), LDB, A( IOFFA ), $ LDA ) CT = HALF*AKK CALL CLACGV( K-1, B( IOFFB ), LDB ) CALL CAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL CHER2( UPLO, K-1, CONE, A( IOFFA ), LDA, B( IOFFB ), $ LDB, A( IIA+( JJA-1 )*LDA ), LDA ) CALL CAXPY( K-1, CT, B( IOFFB ), LDB, A( IOFFA ), LDA ) CALL CLACGV( K-1, B( IOFFB ), LDB ) CALL CSSCAL( K-1, BKK, A( IOFFA ), LDA ) CALL CLACGV( K-1, A( IOFFA ), LDA ) A( IOFFA+( K-1 )*LDA ) = AKK*BKK**2 * * A( IOFFA ) -> A( K, 1 ) * B( IOFFB ) -> B( K, 1 ) * IOFFA = IOFFA + 1 IOFFB = IOFFB + 1 * 40 CONTINUE * END IF * END IF * RETURN * * End of PCHEGS2 * END scalapack-1.8.0/SRC/getpbbuf.c0000640000175000017500000000215410363532303015707 0ustar muammarmuammar#include "tools.h" char * getpbbuf( mess, length ) /* * .. Scalar Arguments .. */ int length; /* * .. Array Arguments .. */ char * mess; { /* * Purpose * ======= * * getpbbuf returns a pointer to a working buffer of size length alloca- * ted for the PBLAS routines. * * ====================================================================== * * .. Local Scalars .. */ static char * pblasbuf = NULL; static int pbbuflen = 0, mone = -1; /* .. * .. External Functions .. */ void blacs_abort_(); /* .. * .. Executable Statements .. */ if( length >= 0 ) { if( length > pbbuflen ) { if( pblasbuf ) free( pblasbuf ); pblasbuf = (char *) malloc((unsigned)length); if( !pblasbuf ) { fprintf( stderr, "PBLAS %s ERROR: Memory allocation failed\n", mess ); blacs_abort_( &mone, &mone ); } pbbuflen = length; } } else if( pblasbuf ) { free( pblasbuf ); pblasbuf = NULL; pbbuflen = 0; } return( pblasbuf ); } scalapack-1.8.0/SRC/pzlansy.f0000640000175000017500000007033010363532303015615 0ustar muammarmuammar DOUBLE PRECISION FUNCTION PZLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER NORM, UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION WORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLANSY returns the value of the one norm, or the Frobenius norm, * or the infinity norm, or the element of largest absolute value of a * real symmetric distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * PZLANSY returns the value * * ( max(abs(A(i,j))), NORM = 'M' or 'm' with IA <= i <= IA+N-1, * ( and JA <= j <= JA+N-1, * ( * ( norm1( sub( A ) ), NORM = '1', 'O' or 'o' * ( * ( normI( sub( A ) ), NORM = 'I' or 'i' * ( * ( normF( sub( A ) ), NORM = 'F', 'f', 'E' or 'e' * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum of * squares). Note that max(abs(A(i,j))) is not a matrix norm. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NORM (global input) CHARACTER * Specifies the value to be returned in PZLANSY as described * above. * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is to be referenced. * = 'U': Upper triangular part of sub( A ) is referenced, * = 'L': Lower triangular part of sub( A ) is referenced. * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * number of rows and columns of the distributed submatrix * sub( A ). When N = 0, PZLANSY is set to zero. N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A, LOCc(JA+N-1)) containing the * local pieces of the symmetric distributed matrix sub( A ). * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular matrix which norm is * to be computed, and the strictly lower triangular part of * this matrix is not referenced. If UPLO = 'L', the leading * N-by-N lower triangular part of sub( A ) contains the lower * triangular matrix which norm is to be computed, and the * strictly upper triangular part of sub( A ) is not referenced. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WORK (local workspace) DOUBLE PRECISION array dimension (LWORK) * LWORK >= 0 if NORM = 'M' or 'm' (not referenced), * 2*Nq0+Np0+LDW if NORM = '1', 'O', 'o', 'I' or 'i', * where LDW is given by: * IF( NPROW.NE.NPCOL ) THEN * LDW = MB_A*CEIL(CEIL(Np0/MB_A)/(LCM/NPROW)) * ELSE * LDW = 0 * END IF * 0 if NORM = 'F', 'f', 'E' or 'e' (not referenced), * * where LCM is the least common multiple of NPROW and NPCOL * LCM = ILCM( NPROW, NPCOL ) and CEIL denotes the ceiling * operation (ICEIL). * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Np0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * ICEIL, ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I, IAROW, IACOL, IB, ICOFF, ICTXT, ICURCOL, $ ICURROW, II, IIA, IN, IROFF, ICSR, ICSR0, $ IOFFA, IRSC, IRSC0, IRSR, IRSR0, JJ, JJA, K, $ LDA, LL, MYCOL, MYROW, NP, NPCOL, NPROW, NQ DOUBLE PRECISION SCALE, SUM, VALUE * .. * .. Local Arrays .. DOUBLE PRECISION RWORK( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DAXPY, DCOMBSSQ, $ DGAMX2D, DGSUM2D, DGEBR2D, $ DGEBS2D, PDCOL2ROW, PDTREECOMB, $ ZLASSQ * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, IDAMAX, NUMROC EXTERNAL ICEIL, IDAMAX, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. * .. Executable Statements .. * * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) ICSR = 1 IRSR = ICSR + NQ IRSC = IRSR + NQ IF( MYROW.EQ.IAROW ) THEN IRSC0 = IRSC + IROFF NP = NP - IROFF ELSE IRSC0 = IRSC END IF IF( MYCOL.EQ.IACOL ) THEN ICSR0 = ICSR + ICOFF IRSR0 = IRSR + ICOFF NQ = NQ - ICOFF ELSE ICSR0 = ICSR IRSR0 = IRSR END IF IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+N-1 ) LDA = DESCA( LLD_ ) * * If the matrix is symmetric, we address only a triangular portion * of the matrix. A sum of row (column) i of the complete matrix * can be obtained by adding along row i and column i of the the * triangular matrix, stopping/starting at the diagonal, which is * the point of reflection. The pictures below demonstrate this. * In the following code, the row sums created by --- rows below are * refered to as ROWSUMS, and the column sums shown by | are refered * to as COLSUMS. Infinity-norm = 1-norm = ROWSUMS+COLSUMS. * * UPLO = 'U' UPLO = 'L' * ____i______ ___________ * |\ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \|------| i i|---\ | * | \ | | |\ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * | \ | | | \ | * |__________\| |___|______\| * i * * II, JJ : local indices into array A * ICURROW : process row containing diagonal block * ICURCOL : process column containing diagonal block * IRSC0 : pointer to part of work used to store the ROWSUMS while * they are stored along a process column * IRSR0 : pointer to part of work used to store the ROWSUMS after * they have been transposed to be along a process row * II = IIA JJ = JJA * IF( N.EQ.0 ) THEN * VALUE = ZERO * ELSE IF( LSAME( NORM, 'M' ) ) THEN * * Find max(abs(A(i,j))). * VALUE = ZERO * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 20 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 10 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 10 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 20 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 40 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 30 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 30 CONTINUE END IF IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 40 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over the remaining rows/columns of the matrix. * DO 90 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 60 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.GT.IIA ) THEN DO 50 LL = IIA, II-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 50 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 60 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 80 K = II, II+IB-1 IF( JJ.LE.JJA+NQ-1 ) THEN DO 70 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( K+LL ) ) ) 70 CONTINUE END IF IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 80 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 90 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLMAXS * IF( MYCOL.EQ.IACOL ) THEN DO 110 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 100 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 100 CONTINUE END IF IF( MYROW.EQ.IAROW ) $ II = II + 1 110 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.IAROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.IAROW ) THEN DO 130 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 120 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 120 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 130 CONTINUE ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 180 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLMAXS * IF( MYCOL.EQ.ICURCOL ) THEN DO 150 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA IF( II.LE.IIA+NP-1 ) THEN DO 140 LL = II, IIA+NP-1 VALUE = MAX( VALUE, ABS( A( LL+K ) ) ) 140 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ II = II + 1 150 CONTINUE * * Reset local indices so we can find ROWMAXS * IF( MYROW.EQ.ICURROW ) $ II = II - IB END IF * * Find ROWMAXS * IF( MYROW.EQ.ICURROW ) THEN DO 170 K = 0, IB-1 IF( JJ.GT.JJA ) THEN DO 160 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA VALUE = MAX( VALUE, ABS( A( II+LL ) ) ) 160 CONTINUE END IF II = II + 1 IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 170 CONTINUE ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 180 CONTINUE * END IF * * Gather the result on process (IAROW,IACOL). * CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, I, K, -1, $ IAROW, IACOL ) * ELSE IF( LSAME( NORM, 'I' ) .OR. LSAME( NORM, 'O' ) .OR. $ NORM.EQ.'1' ) THEN * * Find normI( sub( A ) ) ( = norm1( sub( A ) ), since sub( A ) is * symmetric). * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 200 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 190 LL = IIA, II-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 190 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 200 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 220 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 210 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 210 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 220 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over remaining rows/columns of global matrix. * DO 270 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 240 K = 0, IB-1 SUM = ZERO IF( II.GT.IIA ) THEN DO 230 LL = IIA, II-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 230 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 240 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 260 K = II, II+IB-1 SUM = ZERO IF( JJA+NQ.GT.JJ ) THEN DO 250 LL = (JJ-1)*LDA, (JJA+NQ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 250 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 260 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 270 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * * Find COLSUMS * IF( MYCOL.EQ.IACOL ) THEN IOFFA = (JJ-1)*LDA DO 290 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 280 LL = II, IIA+NP-1 SUM = SUM + ABS( A( IOFFA+LL ) ) 280 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.IAROW ) $ II = II + 1 290 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.IAROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.IAROW ) THEN DO 310 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 300 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 300 CONTINUE END IF WORK( K-IIA+IRSC0 ) = SUM IF( MYCOL.EQ.IACOL ) $ JJ = JJ + 1 310 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.IACOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 360 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * * Find COLSUMS * IF( MYCOL.EQ.ICURCOL ) THEN IOFFA = ( JJ - 1 ) * LDA DO 330 K = 0, IB-1 SUM = ZERO IF( IIA+NP.GT.II ) THEN DO 320 LL = II, IIA+NP-1 SUM = SUM + ABS( A( LL+IOFFA ) ) 320 CONTINUE END IF IOFFA = IOFFA + LDA WORK( JJ+K-JJA+ICSR0 ) = SUM IF( MYROW.EQ.ICURROW ) $ II = II + 1 330 CONTINUE * * Reset local indices so we can find ROWSUMS * IF( MYROW.EQ.ICURROW ) $ II = II - IB * END IF * * Find ROWSUMS * IF( MYROW.EQ.ICURROW ) THEN DO 350 K = II, II+IB-1 SUM = ZERO IF( JJ.GT.JJA ) THEN DO 340 LL = (JJA-1)*LDA, (JJ-2)*LDA, LDA SUM = SUM + ABS( A( K+LL ) ) 340 CONTINUE END IF WORK(K-IIA+IRSC0) = SUM IF( MYCOL.EQ.ICURCOL ) $ JJ = JJ + 1 350 CONTINUE II = II + IB ELSE IF( MYCOL.EQ.ICURCOL ) THEN JJ = JJ + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 360 CONTINUE END IF * * After calls to DGSUM2D, process row 0 will have global * COLSUMS and process column 0 will have global ROWSUMS. * Transpose ROWSUMS and add to COLSUMS to get global row/column * sum, the max of which is the infinity or 1 norm. * IF( MYCOL.EQ.IACOL ) $ NQ = NQ + ICOFF CALL DGSUM2D( ICTXT, 'Columnwise', ' ', 1, NQ, WORK( ICSR ), 1, $ IAROW, MYCOL ) IF( MYROW.EQ.IAROW ) $ NP = NP + IROFF CALL DGSUM2D( ICTXT, 'Rowwise', ' ', NP, 1, WORK( IRSC ), $ MAX( 1, NP ), MYROW, IACOL ) * CALL PDCOL2ROW( ICTXT, N, 1, DESCA( MB_ ), WORK( IRSC ), $ MAX( 1, NP ), WORK( IRSR ), MAX( 1, NQ ), $ IAROW, IACOL, IAROW, IACOL, WORK( IRSC+NP ) ) * IF( MYROW.EQ.IAROW ) THEN IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFF CALL DAXPY( NQ, ONE, WORK( IRSR0 ), 1, WORK( ICSR0 ), 1 ) IF( NQ.LT.1 ) THEN VALUE = ZERO ELSE VALUE = WORK( IDAMAX( NQ, WORK( ICSR0 ), 1 ) ) END IF CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, VALUE, 1, I, K, $ -1, IAROW, IACOL ) END IF * ELSE IF( LSAME( NORM, 'F' ) .OR. LSAME( NORM, 'E' ) ) THEN * * Find normF( sub( A ) ). * SCALE = ZERO SUM = ONE * * Add off-diagonal entries, first * IF( LSAME( UPLO, 'U' ) ) THEN * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 370 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 370 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 390 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 380 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL ZLASSQ( II-IIA, A( IIA+K ), 1, SCALE, SUM ) 380 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 390 CONTINUE * ELSE * * Handle first block separately * IB = IN-IA+1 * IF( MYCOL.EQ.IACOL ) THEN DO 400 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.IAROW ) $ II = II + 1 CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 400 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.IAROW ) THEN II = II + IB END IF * ICURROW = MOD( IAROW+1, NPROW ) ICURCOL = MOD( IACOL+1, NPCOL ) * * Loop over rows/columns of global matrix. * DO 420 I = IN+1, IA+N-1, DESCA( MB_ ) IB = MIN( DESCA( MB_ ), IA+N-I ) * IF( MYCOL.EQ.ICURCOL ) THEN DO 410 K = (JJ-1)*LDA, (JJ+IB-2)*LDA, LDA CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) IF( MYROW.EQ.ICURROW ) $ II = II + 1 CALL ZLASSQ( IIA+NP-II, A( II+K ), 1, SCALE, SUM ) 410 CONTINUE * JJ = JJ + IB ELSE IF( MYROW.EQ.ICURROW ) THEN II = II + IB END IF * ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * 420 CONTINUE * END IF * * Perform the global scaled sum * RWORK( 1 ) = SCALE RWORK( 2 ) = SUM * CALL PDTREECOMB( ICTXT, 'All', 2, RWORK, IAROW, IACOL, $ DCOMBSSQ ) VALUE = RWORK( 1 ) * SQRT( RWORK( 2 ) ) * END IF * * Broadcast the result to the other processes * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN CALL DGEBS2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1 ) ELSE CALL DGEBR2D( ICTXT, 'All', ' ', 1, 1, VALUE, 1, IAROW, $ IACOL ) END IF * PZLANSY = VALUE * RETURN * * End of PZLANSY * END scalapack-1.8.0/SRC/pslaevswp.f0000640000175000017500000002564210363532303016147 0ustar muammarmuammar* * SUBROUTINE PSLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 15, 1997 * * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LWORK, N * .. * .. Array Arguments .. INTEGER DESCZ( * ), KEY( * ), NVS( * ) REAL WORK( * ), Z( * ), ZIN( LDZI, * ) * .. * * Purpose * ======= * * PSLAEVSWP moves the eigenvectors (potentially unsorted) from * where they are computed, to a ScaLAPACK standard block cyclic * array, sorted so that the corresponding eigenvalues are sorted. * * Notes * ===== * * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * N (global input) INTEGER * The order of the matrix A. N >= 0. * * ZIN (local input) REAL array, * dimension ( LDZI, NVS(iam) ) * The eigenvectors on input. Each eigenvector resides entirely * in one process. Each process holds a contiguous set of * NVS(iam) eigenvectors. The first eigenvector which the * process holds is: sum for i=[0,iam-1) of NVS(i) * * LDZI (locl input) INTEGER * leading dimension of the ZIN array * * Z (local output) REAL array * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ) * The eigenvectors on output. The eigenvectors are distributed * in a block cyclic manner in both dimensions, with a * block size of NB. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * NVS (global input) INTEGER array, dimension( nprocs+1 ) * nvs(i) = number of processes * number of eigenvectors held by processes [0,i-1) * nvs(1) = number of eigen vectors held by [0,1-1) == 0 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) == * total number of eigenvectors * * KEY (global input) INTEGER array, dimension( N ) * Indicates the actual index (after sorting) for each of the * eigenvectors. * * WORK (local workspace) REAL array, dimension (LWORK) * * LWORK (local input) INTEGER dimension of WORK * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J, $ MAXI, MAXII, MINI, MINII, MYCOL, MYROW, NB, $ NBUFSIZE, NPCOL, NPROCS, NPROW, PCOL, RECVCOL, $ RECVFROM, RECVROW, SENDCOL, SENDROW, SENDTO * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P EXTERNAL INDXG2L, INDXG2P * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGERV2D, SGESD2D * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * NB = DESCZ( MB_ ) * NPROCS = NPROW*NPCOL * * If PxSTEIN operates on a sub-matrix of a global matrix, the * key [] that contains the indicies of the eigenvectors is refe- * renced to the dimensions of the sub-matrix and not the global * distrubited matrix. Because of this, PxLAEVSWP will incorrectly * map the eigenvectors to the global eigenvector matrix, Z, unless * the key[] elements are shifted as below. * DO 10 J = DESCZ( N_ ), 1, -1 KEY( J ) = KEY( J-JZ+1 ) + ( JZ-1 ) 10 CONTINUE * DO 110 DIST = 0, NPROCS - 1 * SENDTO = MOD( IAM+DIST, NPROCS ) RECVFROM = MOD( NPROCS+IAM-DIST, NPROCS ) * SENDROW = MOD( SENDTO, NPROW ) SENDCOL = SENDTO / NPROW RECVROW = MOD( RECVFROM, NPROW ) RECVCOL = RECVFROM / NPROW * SENDROW = SENDTO / NPCOL SENDCOL = MOD( SENDTO, NPCOL ) RECVROW = RECVFROM / NPCOL RECVCOL = MOD( RECVFROM, NPCOL ) * * Figure out what I have that process "sendto" wants * NBUFSIZE = 0 * * We are looping through the eigenvectors that I presently own. * DO 40 J = NVS( 1+IAM ) + JZ, NVS( 1+IAM+1 ) + JZ - 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( SENDCOL.EQ.PCOL ) THEN MINII = MOD( SENDROW+DESCZ( RSRC_ ), NPROW )* $ DESCZ( MB_ ) + 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 30 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+DESCZ( MB_ )-1, N+IZ-1 ) DO 20 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 WORK( NBUFSIZE ) = ZIN( I+1-IZ, $ J-NVS( 1+IAM )+1-JZ ) 20 CONTINUE 30 CONTINUE END IF 40 CONTINUE * * IF( MYROW.NE.SENDROW .OR. MYCOL.NE.SENDCOL ) $ CALL SGESD2D( DESCZ( CTXT_ ), NBUFSIZE, 1, WORK, NBUFSIZE, $ SENDROW, SENDCOL ) * * * Figure out what process "recvfrom" has that I want * NBUFSIZE = 0 DO 70 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 60 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 50 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 50 CONTINUE 60 CONTINUE END IF 70 CONTINUE * * * IF( MYROW.NE.RECVROW .OR. MYCOL.NE.RECVCOL ) $ CALL SGERV2D( DESCZ( CTXT_ ), 1, NBUFSIZE, WORK, 1, RECVROW, $ RECVCOL ) * NBUFSIZE = 0 DO 100 J = NVS( 1+RECVFROM ) + JZ, $ NVS( 1+RECVFROM+1 ) + JZ - 1, 1 PCOL = INDXG2P( KEY( J ), DESCZ( NB_ ), -1, DESCZ( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.PCOL ) THEN CYCLIC_J = INDXG2L( KEY( J ), DESCZ( MB_ ), -1, -1, $ NPCOL ) CYCLIC_I = 1 MINII = MOD( MYROW+DESCZ( RSRC_ ), NPROW )*DESCZ( MB_ ) + $ 1 MAXII = DESCZ( M_ ) INCII = DESCZ( MB_ )*NPROW DO 90 II = MINII, MAXII, INCII MINI = MAX( II, IZ ) CYCLIC_I = INDXG2L( MINI, DESCZ( MB_ ), -1, -1, $ NPROW ) MAXI = MIN( II+NB-1, N+IZ-1 ) DO 80 I = MINI, MAXI, 1 NBUFSIZE = NBUFSIZE + 1 Z( CYCLIC_I+( CYCLIC_J-1 )*DESCZ( LLD_ ) ) $ = WORK( NBUFSIZE ) CYCLIC_I = CYCLIC_I + 1 80 CONTINUE 90 CONTINUE END IF 100 CONTINUE * 110 CONTINUE RETURN * * End of PSLAEVSWP * END scalapack-1.8.0/SRC/psormqr.f0000640000175000017500000004201010363532303015612 0ustar muammarmuammar SUBROUTINE PSORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) REAL A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PSORMQR overwrites the general real M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'T': Q**T * sub( C ) sub( C ) * Q**T * * where Q is a real orthogonal distributed matrix defined as the * product of k elementary reflectors * * Q = H(1) H(2) . . . H(k) * * as returned by PSGEQRF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**T from the Left; * = 'R': apply Q or Q**T from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'T': Transpose, apply Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) REAL pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PSGEQRF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ); * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) REAL, array, dimension LOCc(JA+K-1). * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PSGEQRF. * TAU is tied to the distributed matrix A. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) REAL array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( N+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, IINFO, $ IPW, IROFFA, IROFFC, J, J1, J2, J3, JB, JCC, $ LCM, LCMQ, LWMIN, MI, MPC0, MYCOL, MYROW, NI, $ NPA0, NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PSLARFB, $ PSLARFT, PSORM2R, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = REAL( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'T' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( M, 3, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( N, 4, K, 5, IA, JA, DESCA, 9, M, 3, N, 4, IC, $ JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PSORMQR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN J1 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J2 = JA+K-1 J3 = DESCA( NB_ ) ELSE J1 = MAX( ( (JA+K-2) / DESCA( NB_ ) ) * DESCA( NB_ ) + 1, JA ) J2 = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) $ + 1 J3 = -DESCA( NB_ ) END IF * IF( LEFT ) THEN NI = N JCC = JC IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M ICC = IC END IF * * Use unblocked code for the first block if necessary * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) $ CALL PSORM2R( SIDE, TRANS, M, N, J1-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 DO 10 J = J1, J2, J3 JB = MIN( DESCA( NB_ ), K-J+JA ) * * Form the triangular factor of the block reflector * H = H(j) H(j+1) . . . H(j+jb-1) * CALL PSLARFT( 'Forward', 'Columnwise', NQ-J+JA, JB, A, $ IA+J-JA, J, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic+j-ja:ic+m-1,jc:jc+n-1) * MI = M - J + JA ICC = IC + J - JA ELSE * * H or H' is applied to C(ic:ic+m-1,jc+j-ja:jc+n-1) * NI = N - J + JA JCC = JC + J - JA END IF * * Apply H or H' * CALL PSLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, $ JB, A, IA+J-JA, J, DESCA, WORK, C, ICC, JCC, $ DESCC, WORK( IPW ) ) 10 CONTINUE * * Use unblocked code for the last block if necessary * IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) $ CALL PSORM2R( SIDE, TRANS, M, N, J2-JA, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = REAL( LWMIN ) * RETURN * * End of PSORMQR * END scalapack-1.8.0/SRC/pzlarzb.f0000640000175000017500000005665010363532303015612 0ustar muammarmuammar SUBROUTINE PZLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 14, 2000 * * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARZB applies a complex block reflector Q or its conjugate * transpose Q**H to a complex M-by-N distributed matrix sub( C ) * denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right. * * Q is a product of k elementary reflectors as returned by PZTZRZF. * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * DIRECT (global input) CHARACTER * Indicates how H is formed from a product of elementary * reflectors * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise (not supported yet) * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L', * (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local * pieces of the distributed vectors V representing the * Householder transformation as returned by PZTZRZF. * LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) COMPLEX*16 array, dimension MB_V by MB_V * The lower triangular matrix T in the representation of the * block reflector. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT CHARACTER COLBTOP, TRANST INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2, $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV, $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL, $ IVROW, J, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV, $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20, $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV, $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, INFOG2L, $ PBZMATADD, PB_TOPGET, PXERBLA, PBZTRAN, $ ZGEBR2D, ZGEBS2D, ZGEMM, $ ZGSUM2D, ZLACGV, ZLACPY, ZLASET, $ ZTRBR2D, ZTRBS2D, ZTRMM * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -3 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -4 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * LEFT = LSAME( SIDE, 'L' ) IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) ICOFFV = MOD( JV-1, NBV ) NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFV = IIV + ( JJV-1 ) * LDV MBC = DESCC( MB_ ) NBC = DESCC( NB_ ) NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1, $ JJC1, ICROW1, ICCOL1 ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NQCALL ) ) * IF( LEFT ) THEN IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, MBC ) MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = ICOFFC1 NQC2 = NQC1 ELSE IROFFC1 = MOD( IC-1, MBC ) MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW ) IF( MYROW.EQ.ICROW1 ) $ MPC1 = MPC1 - IROFFC1 ICOFFC1 = MOD( JC-1, NBC ) NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL ) IF( MYCOL.EQ.ICCOL1 ) $ NQC1 = NQC1 - ICOFFC1 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = IROFFC1 MPC2 = MPC1 ICOFFC2 = MOD( JC+N-L-1, NBC ) NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NQCALL ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC2 = ICOFFV is required by the current transposition * routine PBZTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW2 ) THEN MPC20 = MPC2 + IROFFC2 ELSE MPC20 = MPC2 END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBZTRAN * IPV = 1 IPW = IPV + MPC20 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC20 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL ZLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW+ICOFFV*LW ), LW ) ELSE CALL ZLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW ), LW ) END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K * CALL PBZTRAN( ICTXT, 'Rowwise', 'Conjugate transpose', K, $ M+ICOFFV, DESCV( NB_ ), WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K * IF( MYROW.EQ.ICROW2 ) $ IPV = IPV + IROFFC2 * * WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V' * WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K * LW = MAX( 1, NQC2 ) * IF( MPC2.GT.0 ) THEN CALL ZGEMM( 'Transpose', 'No transpose', NQC2, K, MPC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', NQC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 10 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBZMATADD( ICTXT, 'Transpose', NQC2, IINXT-IIBEG+1, $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE, $ WORK( IPW+ITOP ), LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 10 END IF END IF * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL ZTRBS2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV ) ELSE CALL ZTRBR2D( ICTXT, 'Rowwise', ' ', 'Lower', 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non unit', NQC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', NQC2, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( MPC1.GT.0 ) THEN MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW ) ITOP = MAX( 0, MYDIST * MBC - IROFFC1 ) IIBEG = IIC1 IIEND = IIC1 + MPC1 - 1 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND ) * 20 CONTINUE IF( IIBEG.LE.IINXT ) THEN CALL PBZMATADD( ICTXT, 'Transpose', IINXT-IIBEG+1, NQC2, $ -ONE, WORK( IPW+ITOP ), LW, ONE, $ C( IIBEG+(JJC1-1)*LDC ), LDC ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBC - IROFFC1 IIBEG = IINXT +1 IINXT = MIN( IINXT+MBC, IIEND ) GO TO 20 END IF END IF * * C2 C2 - V' * W' * C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )' * MPC2 x NQC2 MPC2 x K K x NQC2 * DO 30 J = 1, K CALL ZLACGV( MPC2, WORK( IPV+(J-1)*LV ), 1 ) 30 CONTINUE CALL ZGEMM( 'No transpose', 'Transpose', MPC2, NQC2, K, -ONE, $ WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC2 ), LDC ) * ELSE * * Form sub( C ) * Q or sub( C ) * Q' * * Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2 * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2 * WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC2 LV = MAX( 1, K ) LW = MAX( 1, MPC2 ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL ZTRBS2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV ) CALL ZLACPY( 'All', K, NQC2, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC2, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL ZTRBR2D( ICTXT, 'Columnwise', COLBTOP, 'Lower', $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * * WORK( IPV ) is K x NQC2 = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K * IF( NQC2.GT.0 ) THEN CALL ZGEMM( 'No Transpose', 'Transpose', MPC2, K, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO, $ WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', MPC2, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * * WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 40 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBZMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC, ONE, $ WORK( IPW+ILEFT*LW ), LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 40 END IF END IF * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN DO 50 J = 1, K CALL ZLACGV( K-J+1, T( J+(J-1)*MBV ), 1 ) 50 CONTINUE CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non unit', MPC2, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW ) DO 60 J = 1, K CALL ZLACGV( K-J+1, T( J+(J-1)*MBV ), 1 ) 60 CONTINUE ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', MPC2, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C1 = C1 - WORK( IPW ) * IF( NQC1.GT.0 ) THEN MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 ) JJBEG = JJC1 JJEND = JJC1 + NQC1 - 1 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND ) * 70 CONTINUE IF( JJBEG.LE.JJNXT ) THEN CALL PBZMATADD( ICTXT, 'No transpose', MPC2, $ JJNXT-JJBEG+1, -ONE, $ WORK( IPW+ILEFT*LW ), LW, ONE, $ C( IIC1+(JJBEG-1)*LDC ), LDC ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBC - ICOFFC1 JJBEG = JJNXT +1 JJNXT = MIN( JJNXT+NBC, JJEND ) GO TO 70 END IF END IF * * C2 C2 - W * conjg( V ) * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * conjg( WORK( IPV ) ) * MPC2 x NQC2 MPC2 x K K x NQC2 * DO 80 J = 1, NQC2 CALL ZLACGV( K, WORK( IPV+(J-1)*LV ), 1 ) 80 CONTINUE IF( IOFFC2.GT.0 ) $ CALL ZGEMM( 'No transpose', 'No transpose', MPC2, NQC2, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC2 ), LDC ) * END IF * RETURN * * End of PZLARZB * END scalapack-1.8.0/SRC/pzlapv2.f0000640000175000017500000003673310363532303015524 0ustar muammarmuammar SUBROUTINE PZLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAPV2 applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a M-by-N distributed matrix sub( A ) denoting * A(IA:IA+M-1,JA:JA+N-1), resulting in row or column pivoting. The * pivot vector should be aligned with the distributed matrix A. For * pivoting the rows of sub( A ), IPIV should be distributed along a * process column and replicated over all process rows. Similarly, * IPIV should be distributed along a process row and replicated over * all process columns for column pivoting. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIREC (global input) CHARACTER * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P * sub( A ); * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P ) * sub( A ). * * ROWCOL (global input) CHARACTER * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this local array contains the local pieces of the * distributed matrix sub( A ) to which the row or columns * interchanges will be applied. On exit, this array contains * the local pieces of the permuted distributed matrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (input) INTEGER array, dimension >= LOCr(M_A)+MB_A if * ROWCOL = 'R', LOCc(N_A)+NB_A otherwise. It contains * the pivoting information. IPIV(i) is the global row (column), * local row (column) i was swapped with. The last piece of the * array of size MB_A (resp. NB_A) is used as workspace. IPIV is * tied to the distributed matrix A. * * IP (global input) INTEGER * IPIV's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JP (global input) INTEGER * IPIV's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCIP (global and local input) INTEGER array of dimension 8 * The array descriptor for the distributed matrix IPIV. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL FORWRD, ROWPVT INTEGER I, IB, ICTXT, ICURCOL, ICURROW, IIP, IP1, ITMP, $ IPVWRK, J, JB, JJP, JP1, K, MA, MBA, MYCOL, $ MYROW, NBA, NPCOL, NPROW * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBS2D, IGEBR2D, INFOG2L, $ PZSWAP * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN END IF FORWRD = LSAME( DIREC, 'F' ) * * * Get grid and matrix parameters * MA = DESCA( M_ ) MBA = DESCA( MB_ ) NBA = DESCA( NB_ ) ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * IF( FORWRD ) THEN CALL INFOG2L( IP, JP, DESCIP, NPROW, NPCOL, MYROW, MYCOL, $ IIP, JJP, ICURROW, ICURCOL ) * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * Loop over rows of sub( A ) * I = IA IB = MIN( M, ICEIL( IA, MBA ) * MBA - IA + 1 ) 10 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP ), IB ) ITMP = IIP IIP = IIP + IB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( ITMP ), IB, ICURROW, MYCOL ) END IF * * Pivot the block of rows * DO 20 K = I, I+IB-1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PZSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP + 1 20 CONTINUE * * Go on to next row of processes, increment row counter, * and figure number of rows to pivot next * ICURROW = MOD( ICURROW+1, NPROW ) I = I + IB IB = MIN( MBA, M-I+IA ) IF( IB .GT. 0 ) GOTO 10 * * If I am pivoting the columns of sub( A ) * ELSE IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * Loop over columns of sub( A ) * J = JA JB = MIN( N, ICEIL( JA, NBA ) * NBA - JA + 1 ) 30 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP ), JB ) ITMP = JJP JJP = JJP + JB ELSE ITMP = IPVWRK CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( ITMP ), JB, MYROW, ICURCOL ) END IF * * Pivot the block of columns * DO 40 K = J, J+JB-1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PZSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP + 1 40 CONTINUE * * Go on to next column of processes, increment column * counter, and figure number of columns to pivot next * ICURCOL = MOD( ICURCOL+1, NPCOL ) J = J + JB JB = MIN( NBA, N-J+JA ) IF( JB .GT. 0 ) GOTO 30 END IF * * If I want to apply pivots in reverse order, i.e. reversing * pivoting done earlier. Thus this section computes * inv( P ) * sub( A ). * ELSE * * If I'm pivoting the rows of sub( A ) * IF( ROWPVT ) THEN CALL INFOG2L( IP+M-1, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) * IPVWRK = NUMROC( DESCIP( M_ ), DESCIP( MB_ ), MYROW, $ DESCIP( RSRC_ ), NPROW ) + 1 - $ DESCIP( MB_ ) * * If I'm not in the current process row, my IIP points out * past end of pivot vector (since I don't own a piece of the * last row). Adjust IIP so it points at last pivot entry. * IF( MYROW.NE.ICURROW ) IIP = IIP - 1 * * Loop over rows in reverse order, starting at last row * I = IA + M - 1 IB = MOD( I, MBA ) IF( IB .EQ. 0 ) IB = MBA IB = MIN( IB, M ) 50 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process column * IF( MYROW.EQ.ICURROW ) THEN ITMP = IIP IIP = IIP - IB CALL IGEBS2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IIP+1 ), IB ) ELSE CALL IGEBR2D( ICTXT, 'Columnwise', ' ', IB, 1, $ IPIV( IPVWRK ), IB, ICURROW, MYCOL ) ITMP = IPVWRK + IB - 1 END IF * * Pivot the block of rows * DO 60 K = I, I-IB+1, -1 IP1 = IPIV( ITMP ) - IP + IA IF( IP1.NE.K ) $ CALL PZSWAP( N, A, K, JA, DESCA, MA, A, IP1, JA, $ DESCA, MA ) ITMP = ITMP - 1 60 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURROW = MOD( NPROW+ICURROW-1, NPROW ) I = I - IB IB = MIN( MBA, I-IA+1 ) IF( IB .GT. 0 ) GOTO 50 * * Otherwise, I'm pivoting the columns of sub( A ) * ELSE CALL INFOG2L( IP, JP+N-1, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) IPVWRK = NUMROC( DESCIP( N_ ), DESCIP( NB_ ), MYCOL, $ DESCIP( CSRC_ ), NPCOL ) + 1 - $ DESCIP( NB_ ) * * If I'm not in the current process column, my JJP points out * past end of pivot vector (since I don't own a piece of the * last column). Adjust JJP so it points at last pivot entry. * IF( MYCOL.NE.ICURCOL ) JJP = JJP - 1 * * Loop over columns in reverse order starting at last column * J = JA + N - 1 JB = MOD( J, NBA ) IF( JB .EQ. 0 ) JB = NBA JB = MIN( JB, N ) 70 CONTINUE * * Find local pointer into IPIV, and broadcast this block's * pivot information to everyone in process row * IF( MYCOL.EQ.ICURCOL ) THEN ITMP = JJP JJP = JJP - JB CALL IGEBS2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( JJP+1 ), JB ) ELSE CALL IGEBR2D( ICTXT, 'Rowwise', ' ', JB, 1, $ IPIV( IPVWRK ), JB, MYROW, ICURCOL ) ITMP = IPVWRK + JB - 1 END IF * * Pivot a block of columns * DO 80 K = J, J-JB+1, -1 JP1 = IPIV( ITMP ) - JP + JA IF( JP1.NE.K ) $ CALL PZSWAP( M, A, IA, K, DESCA, 1, A, IA, JP1, $ DESCA, 1 ) ITMP = ITMP - 1 80 CONTINUE * * Go to previous row of processes, decrement row counter, * and figure number of rows to be pivoted next * ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL ) J = J - JB JB = MIN( NBA, J-JA+1 ) IF( JB .GT. 0 ) GOTO 70 END IF * END IF * RETURN * * End PZLAPV2 * END scalapack-1.8.0/SRC/dpttrsv.f0000640000175000017500000000670510363532303015630 0ustar muammarmuammar SUBROUTINE DPTTRSV( TRANS, N, NRHS, D, E, B, LDB, $ INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * November, 1996. * Modified from DPTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. DOUBLE PRECISION D( * ) DOUBLE PRECISION B( LDB, * ), E( * ) * .. * * Purpose * ======= * * DPTTRSV solves one of the triangular systems * L**T* X = B, or L * X = B, * where L is the Cholesky factor of a Hermitian positive * definite tridiagonal matrix A such that * A = L*D*L**H (computed by DPTTRF). * * Arguments * ========= * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': L * X = B (No transpose) * = 'T': L**T * X = B (Transpose) * * N (input) INTEGER * The order of the tridiagonal matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * D (input) REAL array, dimension (N) * The n diagonal elements of the diagonal matrix D from the * factorization computed by DPTTRF. * * E (input) COMPLEX array, dimension (N-1) * The (n-1) off-diagonal elements of the unit bidiagonal * factor U or L from the factorization computed by DPTTRF * (see UPLO). * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL NOTRAN INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input arguments. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( NRHS.LT.0 ) THEN INFO = -3 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN IF( NOTRAN ) THEN * DO 60 J = 1, NRHS * * Solve L * x = b. * DO 40 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) 40 CONTINUE 60 CONTINUE * ELSE * DO 65 J = 1, NRHS * * Solve L**H * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - $ B( I+1, J )*( E( I ) ) 50 CONTINUE 65 CONTINUE ENDIF * RETURN * * End of DPTTRS * END scalapack-1.8.0/SRC/pcunmtr.f0000640000175000017500000004044410363532303015610 0ustar muammarmuammar SUBROUTINE PCUNMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNMTR overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix of order nq, with * nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the * product of nq-1 elementary reflectors, as returned by PCHETRD: * * if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); * * if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A(IA:*,JA:*) contains elementary * reflectors from PCHETRD; * = 'L': Lower triangle of A(IA:*,JA:*) contains elementary * reflectors from PCHETRD. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * A (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * or (LLD_A,LOCc(JA+N-1)) if SIDE = 'R'. The vectors which * define the elementary reflectors, as returned by PCHETRD. * If SIDE = 'L', LLD_A >= max(1,LOCr(IA+M-1)); * if SIDE = 'R', LLD_A >= max(1,LOCr(IA+N-1)). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX array, dimension LTAU, where * if SIDE = 'L' and UPLO = 'U', LTAU = LOCc(M_A), * if SIDE = 'L' and UPLO = 'L', LTAU = LOCc(JA+M-2), * if SIDE = 'R' and UPLO = 'U', LTAU = LOCc(N_A), * if SIDE = 'R' and UPLO = 'L', LTAU = LOCc(JA+N-2). * TAU(i) must contain the scalar factor of the elementary * reflector H(i), as returned by PCHETRD. TAU is tied to the * distributed matrix A. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * * If UPLO = 'U', * IAA = IA, JAA = JA+1, ICC = IC, JCC = JC; * else UPLO = 'L', * IAA = IA+1, JAA = JA; * if SIDE = 'L', * ICC = IC+1; JCC = JC; * else * ICC = IC; JCC = JC+1; * end if * end if * * If SIDE = 'L', * MI = M-1; NI = N; * LWORK >= MAX( (NB_A*(NB_A-1))/2, (NqC0 + MpC0)*NB_A ) + * NB_A * NB_A * else if SIDE = 'R', * MI = M; MI = N-1; * LWORK >= MAX( (NB_A*(NB_A-1))/2, ( NqC0 + MAX( NpA0 + * NUMROC( NUMROC( NI+ICOFFC, NB_A, 0, 0, NPCOL ), * NB_A, 0, 0, LCMQ ), MpC0 ) )*NB_A ) + * NB_A * NB_A * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IAA-1, MB_A ), ICOFFA = MOD( JAA-1, NB_A ), * IAROW = INDXG2P( IAA, MB_A, MYROW, RSRC_A, NPROW ), * NpA0 = NUMROC( NI+IROFFA, MB_A, MYROW, IAROW, NPROW ), * * IROFFC = MOD( ICC-1, MB_C ), ICOFFC = MOD( JCC-1, NB_C ), * ICROW = INDXG2P( ICC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JCC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( MI+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( NI+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN, UPPER INTEGER IAA, IAROW, ICC, ICCOL, ICOFFC, ICROW, ICTXT, $ IINFO, IROFFA, IROFFC, JAA, JCC, LCM, LCMQ, $ LWMIN, MI, MPC0, MYCOL, MYROW, NI, NPA0, NPCOL, $ NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCUNMQL, $ PCUNMQR, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, ICHAR, MAX, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) * IF( UPPER ) THEN IAA = IA JAA = JA+1 ICC = IC JCC = JC ELSE IAA = IA+1 JAA = JA IF( LEFT ) THEN ICC = IC + 1 JCC = JC ELSE ICC = IC JCC = JC + 1 END IF END IF * * NQ is the order of Q * IF( LEFT ) THEN NQ = M MI = M - 1 NI = N CALL CHK1MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, INFO ) ELSE NQ = N MI = M NI = N - 1 CALL CHK1MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, INFO ) END IF CALL CHK1MAT( MI, 4, NI, 5, ICC, JCC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IAA-1, DESCA( MB_ ) ) IROFFC = MOD( ICC-1, DESCC( MB_ ) ) ICOFFC = MOD( JCC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IAA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( ICC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JCC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( MI+IROFFC, DESCC( MB_ ), MYROW, ICROW, $ NPROW ) NQC0 = NUMROC( NI+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, $ NPCOL ) * IF( LEFT ) THEN LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( NB_ ) ) + $ DESCA( NB_ ) * DESCA( NB_ ) ELSE NPA0 = NUMROC( NI+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = MAX( ( DESCA( NB_ ) * ( DESCA( NB_ ) - 1 ) ) $ / 2, ( NQC0 + MAX( NPA0 + NUMROC( NUMROC( $ NI+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ), MPC0 ) ) * $ DESCA( NB_ ) ) + DESCA( NB_ ) * DESCA( NB_ ) END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. $ .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -3 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF * IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 IF( NOTRAN ) THEN IDUM1( 3 ) = ICHAR( 'N' ) ELSE IDUM1( 3 ) = ICHAR( 'C' ) END IF IDUM2( 3 ) = 3 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( MI, 4, NQ-1, 4, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) ELSE CALL PCHK2MAT( NI, 5, NQ-1, 5, IAA, JAA, DESCA, 9, MI, 4, $ NI, 5, ICC, JCC, DESCC, 14, 4, IDUM1, IDUM2, $ INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNMTR', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) $ RETURN * IF( UPPER ) THEN * * Q was determined by a call to PCHETRD with UPLO = 'U' * CALL PCUNMQL( SIDE, TRANS, MI, NI, NQ-1, A, IA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * ELSE * * Q was determined by a call to PCHETRD with UPLO = 'L' * CALL PCUNMQR( SIDE, TRANS, MI, NI, NQ-1, A, IAA, JAA, DESCA, $ TAU, C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * END IF * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNMTR * END scalapack-1.8.0/SRC/pzunm2l.f0000640000175000017500000004146510363532303015533 0ustar muammarmuammar SUBROUTINE PZUNM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNM2L overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(k) . . . H(2) H(1) * * as returned by PZGEQLF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+K-1)). On entry, the * j-th column must contain the vector which defines the elemen- * tary reflector H(j), JA <= j <= JA+K-1, as returned by * PZGEQLF in the K columns of its distributed matrix * argument A(IA:*,JA:JA+K-1). A(IA:*,JA:JA+K-1) is modified by * the routine but restored on exit. * If SIDE = 'L', LLD_A >= MAX( 1, LOCr(IA+M-1) ), * if SIDE = 'R', LLD_A >= MAX( 1, LOCr(IA+N-1) ). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(JA+N-1) * This array contains the scalar factors TAU(j) of the * elementary reflectors H(j) as returned by PZGEQLF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * If SIDE = 'L', LWORK >= MpC0 + MAX( 1, NqC0 ); * if SIDE = 'R', LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( * NUMROC( N+ICOFFC,NB_A,0,0,NPCOL ),NB_A,0,0,LCMQ ) ); * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( MB_A.EQ.MB_C .AND. IROFFA.EQ.IROFFC .AND. IAROW.EQ.ICROW ) * If SIDE = 'R', * ( MB_A.EQ.NB_C .AND. IROFFA.EQ.ICOFFC ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, ICCOL, ICOFFC, ICROW, ICTXT, ICC, $ II, IROFFA, IROFFC, J, J1, J2, J3, JCC, JJ, $ LCM, LCMQ, LWMIN, MI, MP, MPC0, MYCOL, MYROW, $ NI, NPCOL, NPROW, NQ, NQC0 COMPLEX*16 AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PB_TOPGET, PB_TOPSET, PXERBLA, PZELSET, $ PZELSET2, PZLARF, PZLARFC, ZGEBR2D, ZGEBS2D, $ ZGERV2D, ZGESD2D, ZSCAL * .. * .. External Functions .. LOGICAL LSAME INTEGER ILCM, INDXG2P, NUMROC EXTERNAL ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( M, 3, K, 5, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( N, 4, K, 5, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN LWMIN = MPC0 + MAX( 1, NQC0 ) ELSE LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL LWMIN = NQC0 + MAX( MAX( 1, MPC0 ), NUMROC( NUMROC( $ N+ICOFFC, DESCA( NB_ ), 0, 0, NPCOL ), $ DESCA( NB_ ), 0, 0, LCMQ ) ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( .NOT.LEFT .AND. DESCA( MB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. IROFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( LEFT .AND. IAROW.NE.ICROW ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. IROFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( LEFT .AND. DESCA( MB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(1400+MB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, ICC, $ JCC, ICROW, ICCOL ) IF( LEFT ) THEN IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JC+N-1, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - DCONJG( TAU( JJ ) ) END IF CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1 ) CALL ZSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, AJJ, 1, $ IAROW, IACOL ) CALL ZSCAL( NQ-JCC+1, AJJ, $ C( ICC+(JCC-1)*DESCC( LLD_ ) ), $ DESCC( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN IF( NOTRAN ) THEN AJJ = ONE - TAU( JJ ) ELSE AJJ = ONE - DCONJG( TAU( JJ ) ) END IF END IF * IF( IACOL.NE.ICCOL ) THEN IF( MYCOL.EQ.IACOL ) $ CALL ZGESD2D( ICTXT, 1, 1, AJJ, 1, MYROW, ICCOL ) IF( MYCOL.EQ.ICCOL ) $ CALL ZGERV2D( ICTXT, 1, 1, AJJ, 1, MYROW, IACOL ) END IF * IF( MYCOL.EQ.ICCOL ) THEN MP = NUMROC( IC+M-1, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) CALL ZSCAL( MP-ICC+1, AJJ, C( ICC+(JCC-1)* $ DESCC( LLD_ ) ), 1 ) END IF * END IF * ELSE * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) THEN J1 = JA J2 = JA+K-1 J3 = 1 ELSE J1 = JA+K-1 J2 = JA J3 = -1 END IF * IF( LEFT ) THEN NI = N IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'D-ring' ) END IF CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) ELSE MI = M END IF * DO 10 J = J1, J2, J3 * IF( LEFT ) THEN * * H(j) or H(j)' is applied to C(ic:ic+m-k+j-ja,jc:jc+n-1) * MI = M - K + J - JA + 1 ELSE * * H(j) or H(j)' is applied to C(ic:ic+m-1,jc:jc+n-k+j-ja) * NI = N - K + J - JA + 1 END IF * * Apply H(j) or H(j)' * CALL PZELSET2( AJJ, A, IA+NQ-K+J-JA, J, DESCA, ONE ) IF( NOTRAN ) THEN CALL PZLARF( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, $ IC, JC, DESCC, WORK ) ELSE CALL PZLARFC( SIDE, MI, NI, A, IA, J, DESCA, 1, TAU, C, $ IC, JC, DESCC, WORK ) END IF CALL PZELSET( A, IA+NQ-K+J-JA, J, DESCA, AJJ ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNM2L * END scalapack-1.8.0/SRC/pztrti2.f0000640000175000017500000002366110363532303015540 0ustar muammarmuammar SUBROUTINE PZTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZTRTI2 computes the inverse of a complex upper or lower triangular * block matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). This matrix should be * contained in one and only one process memory space (local operation). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * = 'U': sub( A ) is upper triangular; * = 'L': sub( A ) is lower triangular. * * DIAG (global input) CHARACTER*1 * = 'N': sub( A ) is non-unit triangular * = 'U': sub( A ) is unit triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)), * this array contains the local pieces of the triangular matrix * sub( A ). If UPLO = 'U', the leading N-by-N upper triangular * part of the matrix sub( A ) contains the upper triangular * matrix, and the strictly lower triangular part of sub( A ) * is not referenced. If UPLO = 'L', the leading N-by-N lower * triangular part of the matrix sub( A ) contains the lower * triangular matrix, and the strictly upper triangular part * of sub( A ) is not referenced. If DIAG = 'U', the diagonal * elements of sub( A ) are also not referenced and are assumed * to be 1. On exit, the (triangular) inverse of the original * matrix, in the same storage format. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL NOUNIT, UPPER INTEGER IACOL, IAROW, ICTXT, ICURR, IDIAG, IIA, IOFFA, $ JJA, LDA, MYCOL, MYROW, NA, NPCOL, NPROW COMPLEX*16 AJJ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, INFOG2L, $ PXERBLA, ZSCAL, ZTRMV * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) UPPER = LSAME( UPLO, 'U' ) NOUNIT = LSAME( DIAG, 'N' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN INFO = -2 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * * Compute local indexes * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) * IF( UPPER ) THEN * IOFFA = IIA + ( JJA - 1 ) * LDA ICURR = IOFFA + LDA * IF( NOUNIT ) THEN * * Compute inverse of upper non-unit triangular matrix. * A( IOFFA ) = ONE / A( IOFFA ) IDIAG = ICURR + 1 DO 10 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements 1:j-1 of j-th column. * CALL ZTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL ZSCAL( NA, AJJ, A( ICURR ), 1 ) IDIAG = IDIAG + LDA + 1 ICURR = ICURR + LDA 10 CONTINUE * ELSE * * Compute inverse of upper unit triangular matrix. * DO 20 NA = 1, N-1 * * Compute elements 1:j-1 of j-th column. * CALL ZTRMV( 'Upper', 'No transpose', DIAG, NA, $ A( IOFFA ), LDA, A( ICURR ), 1 ) CALL ZSCAL( NA, -ONE, A( ICURR ), 1 ) ICURR = ICURR + LDA 20 CONTINUE * END IF * ELSE * ICURR = IIA + N - 1 + ( JJA + N - 2 ) * LDA IOFFA = ICURR - LDA * IF( NOUNIT ) THEN * * Compute inverse of lower non-unit triangular matrix. * A( ICURR ) = ONE / A( ICURR ) IDIAG = IOFFA - 1 DO 30 NA = 1, N-1 A( IDIAG ) = ONE / A( IDIAG ) AJJ = -A( IDIAG ) * * Compute elements j+1:n of j-th column. * CALL ZTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL ZSCAL( NA, AJJ, A( IOFFA ), 1 ) ICURR = IDIAG IDIAG = IDIAG - LDA - 1 IOFFA = IDIAG + 1 30 CONTINUE * ELSE * * Compute inverse of lower unit triangular matrix. * DO 40 NA = 1, N-1 * * Compute elements j+1:n of j-th column. * CALL ZTRMV( 'Lower', 'No transpose', DIAG, NA, $ A( ICURR ), LDA, A( IOFFA ), 1 ) CALL ZSCAL( NA, -ONE, A( IOFFA ), 1 ) ICURR = ICURR - LDA - 1 IOFFA = ICURR - LDA 40 CONTINUE * END IF * END IF * END IF * * End of PZTRTI2 * END scalapack-1.8.0/SRC/pdorgr2.f0000640000175000017500000002564610363532303015506 0ustar muammarmuammar SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDORGR2 generates an M-by-N real distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as the * last M rows of a product of K elementary reflectors of order N * * Q = H(1) H(2) . . . H(k) * * as returned by PDGERQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA+M-K <= i <= IA+M-1, as * returned by PDGERQF in the K rows of its distributed * matrix argument A(IA+M-K:IA+M-1,JA:*). On exit, this array * contains the local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PDGERQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, LWMIN, MP, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 DOUBLE PRECISION TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PDELSET, $ PDLARF, PDLASET, PDSCAL, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia:ia+m-k-1 to rows of the unit matrix * CALL PDLASET( 'All', M-K, N-M, ZERO, ZERO, A, IA, JA, DESCA ) CALL PDLASET( 'All', M-K, M, ZERO, ONE, A, IA, JA+N-M, DESCA ) * END IF * TAUI = ZERO MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+M-K, IA+M-1 * * Apply H(i) to A(ia:i,ja:ja+n-k+i-1) from the right * CALL PDELSET( A, I, JA+N-M+I-IA, DESCA, ONE ) CALL PDLARF( 'Right', I-IA, I-IA+N-M+1, A, I, JA, DESCA, $ DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK ) II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, MP ) ) CALL PDSCAL( I-IA+N-M, -TAUI, A, I, JA, DESCA, DESCA( M_ ) ) CALL PDELSET( A, I, JA+N-M+I-IA, DESCA, ONE-TAUI ) * * Set A(i,ja+n-m+i-ia+1:ja+n-1) to zero * CALL PDLASET( 'All', 1, IA+M-1-I, ZERO, ZERO, A, I, $ JA+N-M+I-IA+1, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDORGR2 * END scalapack-1.8.0/SRC/pdlauu2.f0000640000175000017500000001756110363532303015500 0ustar muammarmuammar SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDLAUU2 computes the product U * U' or L' * L, where the triangular * factor U or L is stored in the upper or lower triangular part of * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * If UPLO = 'U' or 'u' then the upper triangle of the result is stored, * overwriting the factor U in sub( A ). * If UPLO = 'L' or 'l' then the lower triangle of the result is stored, * overwriting the factor L in sub( A ). * * This is the unblocked form of the algorithm, calling Level 2 BLAS. * No communication is performed by this routine, the matrix to operate * on should be strictly local to one process. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER*1 * Specifies whether the triangular factor stored in the matrix * sub( A ) is upper or lower triangular: * = 'U': Upper triangular, * = 'L': Lower triangular. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the order of the triangular factor U or L. N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the triangular factor L or U. * On exit, if UPLO = 'U', the upper triangle of the distributed * matrix sub( A ) is overwritten with the upper triangle of the * product U * U'; if UPLO = 'L', the lower triangle of sub( A ) * is overwritten with the lower triangle of the product L' * L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA, $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW DOUBLE PRECISION AII * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, DGEMV, DSCAL * .. * .. External Functions .. LOGICAL LSAME DOUBLE PRECISION DDOT EXTERNAL DDOT, LSAME * .. * .. Executable Statements .. * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN * LDA = DESCA( LLD_ ) IDIAG = IIA + ( JJA - 1 ) * LDA IOFFA = IDIAG * IF( LSAME( UPLO, 'U' ) ) THEN * * Compute the product U * U'. * DO 10 NA = N-1, 1, -1 AII = A( IDIAG ) ICURR = IDIAG + LDA A( IDIAG ) = AII*AII + DDOT( NA, A( ICURR ), LDA, $ A( ICURR ), LDA ) CALL DGEMV( 'No transpose', N-NA-1, NA, ONE, $ A( IOFFA+LDA ), LDA, A( ICURR ), LDA, AII, $ A( IOFFA ), 1 ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + LDA 10 CONTINUE AII = A( IDIAG ) CALL DSCAL( N, AII, A( IOFFA ), 1 ) * ELSE * * Compute the product L' * L. * DO 20 NA = 1, N-1 AII = A( IDIAG ) ICURR = IDIAG + 1 A(IDIAG) = AII*AII + DDOT( N-NA, A( ICURR ), 1, $ A( ICURR ), 1 ) CALL DGEMV( 'Transpose', N-NA, NA-1, ONE, A( IOFFA+1 ), $ LDA, A( ICURR ), 1, AII, A( IOFFA ), LDA ) IDIAG = IDIAG + LDA + 1 IOFFA = IOFFA + 1 20 CONTINUE AII = A( IDIAG ) CALL DSCAL( N, AII, A( IOFFA ), LDA ) * END IF * END IF * RETURN * * End of PDLAUU2 * END scalapack-1.8.0/SRC/pchegst.f0000640000175000017500000004231710363532303015556 0ustar muammarmuammar* * SUBROUTINE PCHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N REAL SCALE * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCHEGST reduces a complex Hermitian-definite generalized eigenproblem * to standard form. * * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ). * * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x, * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H) * * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by * U*sub( A )*U**H or L**H*sub( A )*L. * * sub( B ) must have been previously factorized as U**H*U or L*L**H by * PCPOTRF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * IBTYPE (global input) INTEGER * = 1: compute inv(U**H)*sub( A )*inv(U) or * inv(L)*sub( A )*inv(L**H); * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is * factored as U**H*U; * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is * factored as L*L**H. * * N (global input) INTEGER * The order of the matrices sub( A ) and sub( B ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', * the leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains * the lower triangular part of the matrix, and its strictly * upper triangular part is not referenced. * * On exit, if INFO = 0, the transformed matrix, stored in the * same format as sub( A ). * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, * this array contains the local pieces of the triangular factor * from the Cholesky factorization of sub( B ), as returned by * PCPOTRF. * * IB (global input) INTEGER * B's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JB (global input) INTEGER * B's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * SCALE (global output) REAL * Amount by which the eigenvalues should be scaled to * compensate for the scaling performed in this routine. * At present, SCALE is always returned as 1.0, it is * returned here to allow for future enhancement. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE, HALF PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), $ HALF = ( 0.5E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, K, KB, MYCOL, MYROW, NB, $ NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHEGS2, PCHEMM, $ PCHER2K, PCHK2MAT, PCTRMM, PCTRSM, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Get grid parameters * SCALE = ONE * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE UPPER = LSAME( UPLO, 'U' ) CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 3, N, 3, IB, JB, DESCB, 11, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN INFO = -1 ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN INFO = -10 ELSE IF( DESCB( MB_ ).NE.DESCA( MB_ ) ) THEN INFO = -( 1100+MB_ ) ELSE IF( DESCB( NB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 1100+NB_ ) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -( 1100+CTXT_ ) END IF END IF IDUM1( 1 ) = IBTYPE IDUM2( 1 ) = 1 IF( UPPER ) THEN IDUM1( 2 ) = ICHAR( 'U' ) ELSE IDUM1( 2 ) = ICHAR( 'L' ) END IF IDUM2( 2 ) = 2 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IB, $ JB, DESCB, 11, 2, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCHEGST', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN * * Compute inv(U')*sub( A )*inv(U) * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 10 CONTINUE * * Update the upper triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PCHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, IB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PCTRSM( 'Left', UPLO, 'Conjugate Transpose', $ 'Non-unit', KB, N-K-KB+1, CONE, B, IB+K-1, $ JB+K-1, DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PCHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, CONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PCHER2K( UPLO, 'Conjugate Transpose', N-K-KB+1, KB, $ -CONE, A, IA+K-1, JA+K+KB-1, DESCA, B, $ IB+K-1, JB+K+KB-1, DESCB, ONE, A, $ IA+K+KB-1, JA+K+KB-1, DESCA ) CALL PCHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K-1, JB+K+KB-1, $ DESCB, CONE, A, IA+K-1, JA+K+KB-1, DESCA ) CALL PCTRSM( 'Right', UPLO, 'No transpose', 'Non-unit', $ KB, N-K-KB+1, CONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K-1, JA+K+KB-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 10 * ELSE * * Compute inv(L)*sub( A )*inv(L') * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 20 CONTINUE * * Update the lower triangle of A(ia+k-1:ia+n-1,ja+k-1:ja+n-1) * CALL PCHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) IF( K+KB.LE.N ) THEN CALL PCTRSM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-unit', N-K-KB+1, KB, CONE, B, IB+K-1, $ JB+K-1, DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PCHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, CONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PCHER2K( UPLO, 'No transpose', N-K-KB+1, KB, -CONE, $ A, IA+K+KB-1, JA+K-1, DESCA, B, IB+K+KB-1, $ JB+K-1, DESCB, ONE, A, IA+K+KB-1, $ JA+K+KB-1, DESCA ) CALL PCHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, A, $ IA+K-1, JA+K-1, DESCA, B, IB+K+KB-1, JB+K-1, $ DESCB, CONE, A, IA+K+KB-1, JA+K-1, DESCA ) CALL PCTRSM( 'Left', UPLO, 'No transpose', 'Non-unit', $ N-K-KB+1, KB, CONE, B, IB+K+KB-1, JB+K+KB-1, $ DESCB, A, IA+K+KB-1, JA+K-1, DESCA ) END IF K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 20 * END IF * ELSE * IF( UPPER ) THEN * * Compute U*sub( A )*U' * K = 1 NB = DESCA( NB_ ) KB = MIN( ICEIL( JA, NB )*NB, JA+N-1 ) - JA + 1 * 30 CONTINUE * * Update the upper triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PCTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', K-1, $ KB, CONE, B, IB, JB, DESCB, A, IA, JA+K-1, $ DESCA ) CALL PCHEMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, CONE, A, $ IA, JA+K-1, DESCA ) CALL PCHER2K( UPLO, 'No transpose', K-1, KB, CONE, A, IA, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, ONE, A, $ IA, JA, DESCA ) CALL PCHEMM( 'Right', UPLO, K-1, KB, HALF, A, IA+K-1, $ JA+K-1, DESCA, B, IB, JB+K-1, DESCB, CONE, A, $ IA, JA+K-1, DESCA ) CALL PCTRMM( 'Right', UPLO, 'Conjugate transpose', $ 'Non-unit', K-1, KB, CONE, B, IB+K-1, JB+K-1, $ DESCB, A, IA, JA+K-1, DESCA ) CALL PCHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 30 * ELSE * * Compute L'*sub( A )*L * K = 1 NB = DESCA( MB_ ) KB = MIN( ICEIL( IA, NB )*NB, IA+N-1 ) - IA + 1 * 40 CONTINUE * * Update the lower triangle of A(ia:ia+k+kb-2,ja:ja+k+kb-2) * CALL PCTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', KB, $ K-1, CONE, B, IB, JB, DESCB, A, IA+K-1, JA, $ DESCA ) CALL PCHEMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, CONE, A, IA+K-1, $ JA, DESCA ) CALL PCHER2K( UPLO, 'Conjugate transpose', K-1, KB, CONE, A, $ IA+K-1, JA, DESCA, B, IB+K-1, JB, DESCB, ONE, $ A, IA, JA, DESCA ) CALL PCHEMM( 'Left', UPLO, KB, K-1, HALF, A, IA+K-1, JA+K-1, $ DESCA, B, IB+K-1, JB, DESCB, CONE, A, IA+K-1, $ JA, DESCA ) CALL PCTRMM( 'Left', UPLO, 'Conjugate transpose', $ 'Non-unit', KB, K-1, CONE, B, IB+K-1, JB+K-1, $ DESCB, A, IA+K-1, JA, DESCA ) CALL PCHEGS2( IBTYPE, UPLO, KB, A, IA+K-1, JA+K-1, DESCA, B, $ IB+K-1, JB+K-1, DESCB, INFO ) * K = K + KB KB = MIN( N-K+1, NB ) * IF( K.LE.N ) $ GO TO 40 * END IF * END IF * RETURN * * End of PCHEGST * END scalapack-1.8.0/SRC/ztrmvt.f0000640000175000017500000001117210363532303015462 0ustar muammarmuammar SUBROUTINE ZTRMVT( UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, $ INCZ ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * March 13, 2000 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER INCW, INCX, INCY, INCZ, LDT, N * .. * .. Array Arguments .. COMPLEX*16 T( LDT, * ), W( * ), X( * ), Y( * ), Z( * ) * .. * * Purpose * ======= * * ZTRMVT performs the matrix-vector operations * * x := conjg( T' ) *y, and w := T *z, * * where x is an n element vector and T is an n by n * upper or lower triangular matrix. * * Arguments * ========= * * UPLO - CHARACTER*1. * On entry, UPLO specifies whether the matrix is an upper or * lower triangular matrix as follows: * * UPLO = 'U' or 'u' A is an upper triangular matrix. * * UPLO = 'L' or 'l' A is a lower triangular matrix. * * Unchanged on exit. * * N - INTEGER. * On entry, N specifies the order of the matrix A. * N must be at least zero. * Unchanged on exit. * * T - COMPLEX*16 array of DIMENSION ( LDT, n ). * Before entry with UPLO = 'U' or 'u', the leading n by n * upper triangular part of the array T must contain the upper * triangular matrix and the strictly lower triangular part of * T is not referenced. * Before entry with UPLO = 'L' or 'l', the leading n by n * lower triangular part of the array T must contain the lower * triangular matrix and the strictly upper triangular part of * T is not referenced. * * LDT - INTEGER. * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. LDA must be at least * max( 1, n ). * Unchanged on exit. * * X - COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ). * On exit, X = T' * y * * INCX - INTEGER. * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * Unchanged on exit. * * Y - COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ). * Before entry, the incremented array Y must contain the n * element vector y. Unchanged on exit. * * INCY - INTEGER. * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * W - COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCW ) ). * On exit, W = T * z * * INCW - INTEGER. * On entry, INCW specifies the increment for the elements of * W. INCW must not be zero. * Unchanged on exit. * * Z - COMPLEX*16 array of dimension at least * ( 1 + ( n - 1 )*abs( INCZ ) ). * Before entry, the incremented array Z must contain the n * element vector z. Unchanged on exit. * * INCY - INTEGER. * On entrz, INCY specifies the increment for the elements of * Y. INCY must not be zero. * Unchanged on exit. * * * Level 2 Blas routine. * * * .. Local Scalars .. INTEGER INFO * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA, ZCOPY, ZTRMV * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = 1 ELSE IF( N.LT.0 ) THEN INFO = 2 ELSE IF( LDT.LT.MAX( 1, N ) ) THEN INFO = 4 ELSE IF( INCW.EQ.0 ) THEN INFO = 6 ELSE IF( INCX.EQ.0 ) THEN INFO = 8 ELSE IF( INCY.EQ.0 ) THEN INFO = 10 ELSE IF( INCZ.EQ.0 ) THEN INFO = 12 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZTRMVT', INFO ) RETURN END IF * * Quick return if possible. * IF( N.EQ.0 ) $ RETURN * * * IF( INCX.NE.1 .OR. INCY.NE.1 .OR. INCW.NE.1 .OR. INCZ.NE.1 .OR. $ .TRUE. ) THEN CALL ZCOPY( N, Y, INCY, X, INCX ) CALL ZTRMV( UPLO, 'C', 'N', N, T, LDT, X, INCX ) CALL ZCOPY( N, Z, INCZ, W, INCW ) CALL ZTRMV( UPLO, 'N', 'N', N, T, LDT, W, INCW ) RETURN END IF * RETURN * * End of ZTRMVT. * END scalapack-1.8.0/SRC/pzunmrq.f0000640000175000017500000004245610363532303015641 0ustar muammarmuammar SUBROUTINE PZUNMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCC( * ) COMPLEX*16 A( * ), C( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PZUNMRQ overwrites the general complex M-by-N distributed matrix * sub( C ) = C(IC:IC+M-1,JC:JC+N-1) with * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * sub( C ) sub( C ) * Q * TRANS = 'C': Q**H * sub( C ) sub( C ) * Q**H * * where Q is a complex unitary distributed matrix defined as the * product of K elementary reflectors * * Q = H(1)' H(2)' . . . H(k)' * * as returned by PZGERQF. Q is of order M if SIDE = 'L' and of order N * if SIDE = 'R'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. If SIDE = 'L', M >= K >= 0, if SIDE = 'R', * N >= K >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension (LLD_A,LOCc(JA+M-1)) if SIDE='L', * and (LLD_A,LOCc(JA+N-1)) if SIDE='R', where * LLD_A >= MAX(1,LOCr(IA+K-1)); On entry, the i-th row must * contain the vector which defines the elementary reflector * H(i), IA <= i <= IA+K-1, as returned by PZGERQF in the * K rows of its distributed matrix argument A(IA:IA+K-1,JA:*). * A(IA:IA+K-1,JA:*) is modified by the routine but restored on * exit. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX*16, array, dimension LOCc(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PZGERQF. * TAU is tied to the distributed matrix A. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the local pieces of the distributed matrix sub(C). * On exit, sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) * or sub( C )*Q' or sub( C )*Q. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * if SIDE = 'L', * LWORK >= MAX( (MB_A*(MB_A-1))/2, ( MpC0 + MAX( MqA0 + * NUMROC( NUMROC( M+IROFFC, MB_A, 0, 0, NPROW ), * MB_A, 0, 0, LCMP ), NqC0 ) )*MB_A ) + * MB_A * MB_A * else if SIDE = 'R', * LWORK >= MAX( (MB_A*(MB_A-1))/2, (MpC0 + NqC0)*MB_A ) + * MB_A * MB_A * end if * * where LCMP = LCM / NPROW with LCM = ICLM( NPROW, NPCOL ), * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MqA0 = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If SIDE = 'L', * ( NB_A.EQ.MB_C .AND. ICOFFA.EQ.IROFFC ) * If SIDE = 'R', * ( NB_A.EQ.NB_C .AND. ICOFFA.EQ.ICOFFC .AND. IACOL.EQ.ICCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER COLBTOP, ROWBTOP, TRANST INTEGER I, I1, I2, I3, IACOL, IB, ICCOL, ICOFFA, $ ICOFFC, ICROW, ICTXT, IINFO, IPW, IROFFC, LCM, $ LCMP, LWMIN, MI, MPC0, MQA0, MYCOL, MYROW, NI, $ NPCOL, NPROW, NQ, NQC0 * .. * .. Local Arrays .. INTEGER IDUM1( 4 ), IDUM2( 4 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PB_TOPGET, $ PB_TOPSET, PXERBLA, PZLARFB, PZLARFT, $ PZUNMR2 * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(900+CTXT_) ELSE LEFT = LSAME( SIDE, 'L' ) NOTRAN = LSAME( TRANS, 'N' ) * * NQ is the order of Q * IF( LEFT ) THEN NQ = M CALL CHK1MAT( K, 5, M, 3, IA, JA, DESCA, 9, INFO ) ELSE NQ = N CALL CHK1MAT( K, 5, N, 4, IA, JA, DESCA, 9, INFO ) END IF CALL CHK1MAT( M, 3, N, 4, IC, JC, DESCC, 14, INFO ) IF( INFO.EQ.0 ) THEN ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ICROW = INDXG2P( IC, DESCC( MB_ ), MYROW, DESCC( RSRC_ ), $ NPROW ) ICCOL = INDXG2P( JC, DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) MPC0 = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC0 = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) * IF( LEFT ) THEN MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMP = LCM / NPROW LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) $ / 2, ( MPC0 + MAX( MQA0 + NUMROC( NUMROC( $ M+IROFFC, DESCA( MB_ ), 0, 0, NPROW ), $ DESCA( MB_ ), 0, 0, LCMP ), NQC0 ) ) * $ DESCA( MB_ ) ) + DESCA( MB_ ) * DESCA( MB_ ) ELSE LWMIN = MAX( ( DESCA( MB_ ) * ( DESCA( MB_ ) - 1 ) ) / 2, $ ( MPC0 + NQC0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ) END IF * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN INFO = -5 ELSE IF( LEFT .AND. DESCA( NB_ ).NE.DESCC( MB_ ) ) THEN INFO = -(900+NB_) ELSE IF( LEFT .AND. ICOFFA.NE.IROFFC ) THEN INFO = -12 ELSE IF( .NOT.LEFT .AND. ICOFFA.NE.ICOFFC ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. IACOL.NE.ICCOL ) THEN INFO = -13 ELSE IF( .NOT.LEFT .AND. DESCA( NB_ ).NE.DESCC( NB_ ) ) THEN INFO = -(1400+NB_) ELSE IF( ICTXT.NE.DESCC( CTXT_ ) ) THEN INFO = -(1400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -16 END IF END IF IF( LEFT ) THEN IDUM1( 1 ) = ICHAR( 'L' ) ELSE IDUM1( 1 ) = ICHAR( 'R' ) END IF IDUM2( 1 ) = 1 IF( NOTRAN ) THEN IDUM1( 2 ) = ICHAR( 'N' ) ELSE IDUM1( 2 ) = ICHAR( 'C' ) END IF IDUM2( 2 ) = 2 IDUM1( 3 ) = K IDUM2( 3 ) = 5 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 16 IF( LEFT ) THEN CALL PCHK2MAT( K, 5, M, 3, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) ELSE CALL PCHK2MAT( K, 5, N, 4, IA, JA, DESCA, 9, M, 3, N, 4, $ IC, JC, DESCC, 14, 4, IDUM1, IDUM2, INFO ) END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZUNMRQ', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I2 = IA + K - 1 I3 = DESCA( MB_ ) ELSE I1 = MAX( ( (IA+K-2) / DESCA( MB_ ) ) * DESCA( MB_ ) + 1, IA ) I2 = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) $ + 1 I3 = -DESCA( MB_ ) END IF * IF( LEFT ) THEN NI = N ELSE MI = M CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) IF( NOTRAN ) THEN CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'I-ring' ) ELSE CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) END IF END IF * IF( NOTRAN ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I1 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PZUNMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 DO 10 I = I1, I2, I3 IB = MIN( DESCA( MB_ ), K-I+IA ) * * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * CALL PZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-IA, IB, $ A, I, JA, DESCA, TAU, WORK, WORK( IPW ) ) IF( LEFT ) THEN * * H or H' is applied to C(ic:ic+m-k+i+ib-ia-1,jc:jc+n-1) * MI = M - K + I + IB - IA ELSE * * H or H' is applied to C(ic:ic+m-1,jc:jc+n-k+i+ib-ia-1) * NI = N - K + I + IB - IA END IF * * Apply H or H' * CALL PZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, $ IB, A, I, JA, DESCA, WORK, C, IC, JC, DESCC, $ WORK( IPW ) ) 10 CONTINUE * IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN IB = I2 - IA IF( LEFT ) THEN MI = M - K + IB ELSE NI = N - K + IB END IF CALL PZUNMR2( SIDE, TRANS, MI, NI, IB, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DCMPLX( DBLE( LWMIN ) ) * RETURN * * End of PZUNMRQ * END scalapack-1.8.0/SRC/pdpotrf.f0000640000175000017500000003142610363532303015576 0ustar muammarmuammar SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ) * .. * * Purpose * ======= * * PDPOTRF computes the Cholesky factorization of an N-by-N real * symmetric positive definite distributed matrix sub( A ) denoting * A(IA:IA+N-1, JA:JA+N-1). * * The factorization has the form * * sub( A ) = U' * U , if UPLO = 'U', or * * sub( A ) = L * L', if UPLO = 'L', * * where U is an upper triangular matrix and L is lower triangular. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * N-by-N symmetric distributed matrix sub( A ) to be factored. * If UPLO = 'U', the leading N-by-N upper triangular part of * sub( A ) contains the upper triangular part of the matrix, * and its strictly lower triangular part is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of * sub( A ) contains the lower triangular part of the distribu- * ted matrix, and its strictly upper triangular part is not * referenced. On exit, if UPLO = 'U', the upper triangular * part of the distributed matrix contains the Cholesky factor * U, if UPLO = 'L', the lower triangular part of the distribu- * ted matrix contains the Cholesky factor L. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = K, the leading minor of order K, * A(IA:IA+K-1,JA:JA+K-1) is not positive definite, and * the factorization could not be completed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PB_TOPGET, $ PB_TOPSET, PDPOTF2, PDSYRK, PDTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL EXTERNAL ICEIL, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IROFF = MOD( IA-1, DESCA( MB_ ) ) ICOFF = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFF.NE.0 ) THEN INFO = -4 ELSE IF( ICOFF.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRF', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * IF( UPPER ) THEN * * Split-ring topology for the communication along process * columns, 1-tree topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'S-ring' ) * * A is upper triangular, compute Cholesky factorization A = U'*U. * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA(NB_), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PDPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PDTRSM( 'Left', UPLO, 'Transpose', 'Non-Unit', $ JB, N-JB, ONE, A, IA, JA, DESCA, A, IA, JA+JB, $ DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PDSYRK( UPLO, 'Transpose', N-JB, JB, -ONE, A, IA, $ JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) END IF * * Loop over remaining block of columns * DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PDPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the row panel of U using the triangular solver * CALL PDTRSM( 'Left', UPLO, 'Transpose', 'Non-Unit', $ JB, N-J-JB+JA, ONE, A, I, J, DESCA, A, $ I, J+JB, DESCA ) * * Update the trailing matrix, A = A - U'*U * CALL PDSYRK( UPLO, 'Transpose', N-J-JB+JA, JB, $ -ONE, A, I, J+JB, DESCA, ONE, A, I+JB, $ J+JB, DESCA ) END IF 10 CONTINUE * ELSE * * 1-tree topology for the communication along process columns, * Split-ring topology along process rows. * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'S-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * * A is lower triangular, compute Cholesky factorization A = L*L' * (right-looking) * * Handle the first block of columns separately * JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) JB = JN - JA + 1 * * Perform unblocked Cholesky factorization on JB block * CALL PDPOTF2( UPLO, JB, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) $ GO TO 30 * IF( JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PDTRSM( 'Right', UPLO, 'Transpose', 'Non-Unit', $ N-JB, JB, ONE, A, IA, JA, DESCA, A, IA+JB, JA, $ DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PDSYRK( UPLO, 'No Transpose', N-JB, JB, -ONE, A, IA+JB, $ JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA ) * END IF * DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA * * Perform unblocked Cholesky factorization on JB block * CALL PDPOTF2( UPLO, JB, A, I, J, DESCA, INFO ) IF( INFO.NE.0 ) THEN INFO = INFO + J - JA GO TO 30 END IF * IF( J-JA+JB+1.LE.N ) THEN * * Form the column panel of L using the triangular solver * CALL PDTRSM( 'Right', UPLO, 'Transpose', 'Non-Unit', $ N-J-JB+JA, JB, ONE, A, I, J, DESCA, A, I+JB, $ J, DESCA ) * * Update the trailing matrix, A = A - L*L' * CALL PDSYRK( UPLO, 'No Transpose', N-J-JB+JA, JB, -ONE, $ A, I+JB, J, DESCA, ONE, A, I+JB, J+JB, $ DESCA ) * END IF 20 CONTINUE * END IF * 30 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * RETURN * * End of PDPOTRF * END scalapack-1.8.0/SRC/pzlapiv.f0000640000175000017500000003364710363532303015614 0ustar muammarmuammar SUBROUTINE PZLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * November 15, 1997 * * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCIP( * ), IPIV( * ), IWORK( * ) COMPLEX*16 A( * ) * .. * * Purpose * ======= * * PZLAPIV applies either P (permutation matrix indicated by IPIV) * or inv( P ) to a general M-by-N distributed matrix * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), resulting in row or column * pivoting. The pivot vector may be distributed across a process row * or a column. The pivot vector should be aligned with the distributed * matrix A. This routine will transpose the pivot vector if necessary. * For example if the row pivots should be applied to the columns of * sub( A ), pass ROWCOL='C' and PIVROC='C'. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Restrictions * ============ * * IPIV must always be a distributed vector (not a matrix). Thus: * IF( ROWPIV .EQ. 'C' ) THEN * JP must be 1 * ELSE * IP must be 1 * END IF * * The following restrictions apply when IPIV must be transposed: * IF( ROWPIV.EQ.'C' .AND. PIVROC.EQ.'C') THEN * DESCIP(MB_) must equal DESCA(NB_) * ELSE IF( ROWPIV.EQ.'R" .AND. PIVROC.EQ.'R') THEN * DESCIP(NB_) must equal DESCA(MB_) * END IF * * Arguments * ========= * * DIREC (global input) CHARACTER*1 * Specifies in which order the permutation is applied: * = 'F' (Forward) Applies pivots Forward from top of matrix. * Computes P*sub( A ). * = 'B' (Backward) Applies pivots Backward from bottom of * matrix. Computes inv( P )*sub( A ). * * ROWCOL (global input) CHARACTER*1 * Specifies if the rows or columns are to be permuted: * = 'R' Rows will be permuted, * = 'C' Columns will be permuted. * * PIVROC (global input) CHARACTER*1 * Specifies whether IPIV is distributed over a process row * or column: * = 'R' IPIV distributed over a process row * = 'C' IPIV distributed over a process column * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of * rows of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * distributed submatrix sub( A ) to which the row or column * interchanges will be applied. On exit, the local pieces * of the permuted distributed submatrix. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * IPIV (local input) INTEGER array, dimension (LIPIV) where LIPIV is * when ROWCOL='R' or 'r': * >= LOCr( IA+M-1 ) + MB_A if PIVROC='C' or 'c', * >= LOCc( M + MOD(JP-1,NB_P) ) if PIVROC='R' or 'r', and, * when ROWCOL='C' or 'c': * >= LOCr( N + MOD(IP-1,MB_P) ) if PIVROC='C' or 'c', * >= LOCc( JA+N-1 ) + NB_A if PIVROC='R' or 'r'. * This array contains the pivoting information. IPIV(i) is the * global row (column), local row (column) i was swapped with. * When ROWCOL='R' or 'r' and PIVROC='C' or 'c', or ROWCOL='C' * or 'c' and PIVROC='R' or 'r', the last piece of this array of * size MB_A (resp. NB_A) is used as workspace. In those cases, * this array is tied to the distributed matrix A. * * IP (global input) INTEGER * The row index in the global array P indicating the first * row of sub( P ). * * JP (global input) INTEGER * The column index in the global array P indicating the * first column of sub( P ). * * DESCIP (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed vector IPIV. * * IWORK (local workspace) INTEGER array, dimension (LDW) * where LDW is equal to the workspace necessary for * transposition, and the storage of the tranposed IPIV: * * Let LCM be the least common multiple of NPROW and NPCOL. * IF( ROWCOL.EQ.'R' .AND. PIVROC.EQ.'R' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + NB_P * ELSE * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + * NB_P * CEIL( CEIL(LOCc(N_P)/NB_P) / (LCM/NPCOL) ) * END IF * ELSE IF( ROWCOL.EQ.'C' .AND. PIVROC.EQ.'C' ) THEN * IF( NPROW.EQ.NPCOL ) THEN * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P * ELSE * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * END IF * ELSE * IWORK is not referenced. * END IF * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. LOGICAL ROWPVT INTEGER I, ICTXT, ICURCOL, ICURROW, IIP, ITMP, IPT, $ JJP, JPT, MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER DESCPT( DLEN_ ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGEBR2D, IGEBS2D, $ INFOG2L, PICOL2ROW, PIROW2COL, PZLAPV2 * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC, INDXG2P EXTERNAL LSAME, NUMROC, INDXG2P * .. * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN IF( M.LE.1 .OR. N.LT.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'C' ) ) THEN CALL PZLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PZLAPV2 * ELSE * * Take IPIV distributed over row 0, and store it in * iwork, distributed over column 0 * IPT = MOD( JP-1, DESCA(MB_) ) DESCPT(M_) = M + IPT + NPROW*DESCA(MB_) DESCPT(N_) = 1 DESCPT(MB_) = DESCA(MB_) DESCPT(NB_) = 1 DESCPT(RSRC_) = INDXG2P( IA, DESCA(MB_), IA, DESCA(RSRC_), $ NPROW ) DESCPT(CSRC_) = MYCOL DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = NUMROC( DESCPT(M_), DESCPT(MB_), MYROW, $ DESCPT(RSRC_), NPROW ) ITMP = NUMROC( DESCIP(N_), DESCIP(NB_), MYCOL, $ DESCIP(CSRC_), NPCOL ) CALL INFOG2L( IP, JP-IPT, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) CALL PIROW2COL( ICTXT, M+IPT, 1, DESCIP(NB_), IPIV(JJP), $ ITMP, IWORK, DESCPT(LLD_), 0, ICURCOL, $ DESCPT(RSRC_), $ MYCOL, IWORK(DESCPT(LLD_)-DESCPT(MB_)+1) ) * * Send column-distributed pivots to all columns * ITMP = DESCPT(LLD_) - DESCPT(MB_) IF( MYCOL.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Row', ' ', ITMP, 1, IWORK, ITMP, $ MYROW, 0 ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * IPT = IPT + 1 DO 10 I = 1, ITMP IWORK(I) = IWORK(I) - JP + IPT 10 CONTINUE CALL PZLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ IPT, 1, DESCPT ) END IF * * Otherwise, we're pivoting the columns of sub( A ) * ELSE IF( M.LT.1 .OR. N.LE.1 ) $ RETURN * * If the pivot vector is already distributed correctly * IF( LSAME( PIVROC, 'R' ) ) THEN CALL PZLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * * Otherwise, we must redistribute IPIV to match PZLAPV2 * ELSE * * Take IPIV distributed over column 0, and store it in * iwork, distributed over row 0 * JPT = MOD( IP-1, DESCA(NB_) ) DESCPT(M_) = 1 DESCPT(N_) = N + JPT + NPCOL*DESCA(NB_) DESCPT(MB_) = 1 DESCPT(NB_) = DESCA(NB_) DESCPT(RSRC_) = MYROW DESCPT(CSRC_) = INDXG2P( JA, DESCA(NB_), JA, DESCA(CSRC_), $ NPCOL ) DESCPT(CTXT_) = ICTXT DESCPT(LLD_) = 1 CALL INFOG2L( IP-JPT, JP, DESCIP, NPROW, NPCOL, MYROW, $ MYCOL, IIP, JJP, ICURROW, ICURCOL ) ITMP = NUMROC( N+JPT, DESCPT(NB_), MYCOL, DESCPT(CSRC_), $ NPCOL ) CALL PICOL2ROW( ICTXT, N+JPT, 1, DESCIP(MB_), IPIV(IIP), $ DESCIP(LLD_), IWORK, MAX(1, ITMP), ICURROW, $ 0, 0, DESCPT(CSRC_), IWORK(ITMP+1) ) * * Send row-distributed pivots to all rows * IF( MYROW.EQ.0 ) THEN CALL IGEBS2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP ) ELSE CALL IGEBR2D( ICTXT, 'Column', ' ', ITMP, 1, IWORK, $ ITMP, 0, MYCOL ) END IF * * Adjust pivots so they are relative to the start of IWORK, * not IPIV * JPT = JPT + 1 DO 20 I = 1, ITMP IWORK(I) = IWORK(I) - IP + JPT 20 CONTINUE CALL PZLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IWORK, $ 1, JPT, DESCPT ) END IF END IF * RETURN * * End of PZLAPIV * END scalapack-1.8.0/SRC/pdsytrd.f0000640000175000017500000004016110363532303015605 0ustar muammarmuammar SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ) DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PDSYTRD reduces a real symmetric matrix sub( A ) to symmetric * tridiagonal form T by an orthogonal similarity transformation: * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * UPLO (global input) CHARACTER * Specifies whether the upper or lower triangular part of the * symmetric matrix sub( A ) is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, this array contains the local pieces of the * symmetric distributed matrix sub( A ). If UPLO = 'U', the * leading N-by-N upper triangular part of sub( A ) contains * the upper triangular part of the matrix, and its strictly * lower triangular part is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of sub( A ) contains the * lower triangular part of the matrix, and its strictly upper * triangular part is not referenced. On exit, if UPLO = 'U', * the diagonal and first superdiagonal of sub( A ) are over- * written by the corresponding elements of the tridiagonal * matrix T, and the elements above the first superdiagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of elementary reflectors; if UPLO = 'L', the diagonal * and first subdiagonal of sub( A ) are overwritten by the * corresponding elements of the tridiagonal matrix T, and the * elements below the first subdiagonal, with the array TAU, * represent the orthogonal matrix Q as a product of elementary * reflectors. See Further Details. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * The diagonal elements of the tridiagonal matrix T: * D(i) = A(i,i). D is tied to the distributed matrix A. * * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1) * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the * distributed matrix A. * * TAU (local output) DOUBLE PRECISION array, dimension * LOCc(JA+N-1). This array contains the scalar factors TAU of * the elementary reflectors. TAU is tied to the distributed * matrix A. * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK( 1 ) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( NB * ( NP +1 ), 3 * NB ) * * where NB = MB_A = NB_A, * NP = NUMROC( N, NB, MYROW, IAROW, NPROW ), * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ). * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * If UPLO = 'U', the matrix Q is represented as a product of elementary * reflectors * * Q = H(n-1) . . . H(2) H(1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1). * * If UPLO = 'L', the matrix Q is represented as a product of elementary * reflectors * * Q = H(1) H(2) . . . H(n-1). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1). * * The contents of sub( A ) on exit are illustrated by the following * examples with n = 5: * * if UPLO = 'U': if UPLO = 'L': * * ( d e v2 v3 v4 ) ( d ) * ( d e v3 v4 ) ( e d ) * ( d e v4 ) ( v1 e d ) * ( d e ) ( v1 v2 e d ) * ( d ) ( v1 v2 v3 e d ) * * where d and e denote diagonal and off-diagonal elements of T, and vi * denotes an element of the vector defining H(i). * * Alignment requirements * ====================== * * The distributed submatrix sub( A ) must verify some alignment proper- * ties, namely the following expression should be true: * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY, UPPER CHARACTER COLCTOP, ROWCTOP INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IINFO, IPW, $ IROFFA, J, JB, JX, K, KK, LWMIN, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NQ * .. * .. Local Arrays .. INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK1MAT, $ PDLATRD, PDSYR2K, PDSYTD2, PB_TOPGET, $ PB_TOPSET, PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL LSAME, INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN NB = DESCA( NB_ ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IAROW = INDXG2P( IA, NB, MYROW, DESCA( RSRC_ ), NPROW ) IACOL = INDXG2P( JA, NB, MYCOL, DESCA( CSRC_ ), NPCOL ) NP = NUMROC( N, NB, MYROW, IAROW, NPROW ) NQ = MAX( 1, NUMROC( N+JA-1, NB, MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) LWMIN = MAX( (NP+1)*NB, 3*NB ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.ICOFFA .OR. ICOFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(600+NB_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 IF( LWORK.EQ.-1 ) THEN IDUM1( 2 ) = -1 ELSE IDUM1( 2 ) = 1 END IF IDUM2( 2 ) = 11 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 2, IDUM1, IDUM2, $ INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTRD', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', '1-tree' ) * IPW = NP * NB + 1 * IF( UPPER ) THEN * * Reduce the upper triangle of sub( A ). * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, INDXG2P( JA+N-KK, $ NB, MYCOL, DESCA( CSRC_ ), NPCOL ), ICTXT, $ MAX( 1, NP ) ) * DO 10 K = N-KK+1, NB+1, -NB JB = MIN( N-K+1, NB ) I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part of * the matrix * CALL PDLATRD( UPLO, K+JB-1, JB, A, IA, JA, DESCA, D, E, TAU, $ WORK, 1, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an * update of the form: * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V' * CALL PDSYR2K( UPLO, 'No transpose', K-1, JB, -ONE, A, IA, J, $ DESCA, WORK, 1, 1, DESCW, ONE, A, IA, JA, $ DESCA ) * * Copy last superdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J, NB, 0, IACOL, NPCOL ), NQ ) CALL PDELSET( A, I-1, J, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL ) * 10 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PDSYTD2( UPLO, MIN( N, NB ), A, IA, JA, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) * ELSE * * Reduce the lower triangle of sub( A ) * KK = MOD( JA+N-1, NB ) IF( KK.EQ.0 ) $ KK = NB CALL DESCSET( DESCW, N, NB, NB, NB, IAROW, IACOL, ICTXT, $ MAX( 1, NP ) ) * DO 20 K = 1, N-NB, NB I = IA + K - 1 J = JA + K - 1 * * Reduce columns I:I+NB-1 to tridiagonal form and form * the matrix W which is needed to update the unreduced part * of the matrix * CALL PDLATRD( UPLO, N-K+1, NB, A, I, J, DESCA, D, E, TAU, $ WORK, K, 1, DESCW, WORK( IPW ) ) * * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1), * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) := * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V' * CALL PDSYR2K( UPLO, 'No transpose', N-K-NB+1, NB, -ONE, A, $ I+NB, J, DESCA, WORK, K+NB, 1, DESCW, ONE, A, $ I+NB, J+NB, DESCA ) * * Copy last subdiagonal element back into sub( A ) * JX = MIN( INDXG2L( J+NB-1, NB, 0, IACOL, NPCOL ), NQ ) CALL PDELSET( A, I+NB, J+NB-1, DESCA, E( JX ) ) * DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + 1, NPCOL ) * 20 CONTINUE * * Use unblocked code to reduce the last or only block * CALL PDSYTD2( UPLO, KK, A, IA+K-1, JA+K-1, DESCA, D, E, $ TAU, WORK, LWORK, IINFO ) END IF * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DBLE( LWMIN ) * RETURN * * End of PDSYTRD * END scalapack-1.8.0/SRC/pzheev.f0000640000175000017500000005560610363532303015427 0ustar muammarmuammar SUBROUTINE PZHEEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 14, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LRWORK, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) DOUBLE PRECISION RWORK( * ), W( * ) COMPLEX*16 A( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PZHEEV computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PZHEEV assumes a homogeneous system and makes * only spot checks of the consistency of the eigenvalues across the * different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Notes * ===== * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distributed * matrix A. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of A. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of A. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of the * distributed matrix A. * LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic COMPLEX*16 array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PZHEEV cannot guarantee * correct error reporting. * * W (global output) DOUBLE PRECISION array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * Z (local output) COMPLEX*16 array, * global dimension (N, N), * local dimension (LLD_Z, LOCc(JZ+N-1)) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) COMPLEX*16 array, * dimension (LWORK) * On output, WORK(1) returns the workspace needed to guarantee * completion. If the input parameters are incorrect, WORK(1) * may also be incorrect. * * If JOBZ='N' WORK(1) = minimal workspace for eigenvalues only. * If JOBZ='V' WORK(1) = minimal workspace required to * generate all the eigenvectors. * * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= MAX( NB*( NP0+1 ), 3 ) +3*N * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required: * LWORK >= (NP0 + NQ0 + NB)*NB + 3*N + N^2 * * Variable definitions: * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NP0 = NUMROC( NN, NB, 0, 0, NPROW ) * NQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * RWORK (local workspace/output) COMPLEX*16 array, * dimension (LRWORK) * On output RWORK(1) returns the * DOUBLE PRECISION workspace needed to * guarantee completion. If the input parameters are incorrect, * RWORK(1) may also be incorrect. * * LRWORK (local input) INTEGER * Size of RWORK array. * If eigenvectors are desired (JOBZ = 'V') then * LRWORK >= 2*N + 2*N-2 * If eigenvectors are not desired (JOBZ = 'N') then * LRWORK >= 2*N * * If LRWORK = -1, the LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the RWORK array. The required workspace is returned * as the first element of RWORK and no error message is issued * by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in ZSTEQR2 after a total of 30*N iterations. * If INFO = N+1, then PZHEEV has detected heterogeneity * by finding that eigenvalues were not identical across * the process grid. In this case, the accuracy of * the results from PZHEEV cannot be guaranteed. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) COMPLEX*16 CZERO, CONE PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), $ CONE = ( 1.0D+0, 0.0D+0 ) ) INTEGER ITHVAL PARAMETER ( ITHVAL = 10 ) * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, $ IINFO, INDD, INDE, INDRD, INDRE, INDRWORK, $ INDTAU, INDWORK, INDWORK2, IROFFA, IROFFZ, $ ISCALE, IZROW, J, K, LDC, LLRWORK, LLWORK, $ LRMIN, LRWMIN, LWMIN, MB_A, MB_Z, MYCOL, $ MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, NP0, $ NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ0, NRC, $ RSIZEZSTEQR2, RSRC_A, RSRC_Z, SIZEPZHETRD, $ SIZEPZUNMTR, SIZEZSTEQR2 DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCQR( 10 ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC, SL_GRIDRESHAPE DOUBLE PRECISION PDLAMCH, PZLANHE EXTERNAL LSAME, INDXG2P, NUMROC, SL_GRIDRESHAPE, $ PDLAMCH, PZLANHE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, CHK1MAT, DCOPY, $ DESCINIT, DGAMN2D, DGAMX2D, DSCAL, PCHK1MAT, $ PCHK2MAT, PXERBLA, PZELGET, PZGEMR2D, PZHETRD, $ PZLASCL, PZLASET, PZUNMTR, ZSTEQR2 * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, ICHAR, INT, MAX, MIN, MOD, $ SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) $ RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * * Initialize pointer to some safe value * INDTAU = 1 INDD = 1 INDE = 1 INDWORK = 1 INDWORK2 = 1 * INDRE = 1 INDRD = 1 INDRWORK = 1 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE IF( WANTZ ) THEN IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( INFO.EQ.0 ) THEN CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A LOWER = LSAME( UPLO, 'L' ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) NP0 = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) IF( WANTZ ) THEN NB_Z = DESCZ( NB_ ) MB_Z = DESCZ( MB_ ) RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) END IF * * COMPLEX*16 work space for PZHETRD * CALL PZHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), $ RWORK( INDE ), WORK( INDTAU ), $ WORK( INDWORK ), -1, IINFO ) SIZEPZHETRD = INT( ABS( WORK( 1 ) ) ) * * COMPLEX*16 work space for PZUNMTR * IF( WANTZ ) THEN CALL PZUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), -1, IINFO ) SIZEPZUNMTR = INT( ABS( WORK( 1 ) ) ) ELSE SIZEPZUNMTR = 0 END IF * * DOUBLE PRECISION work space for ZSTEQR2 * IF( WANTZ ) THEN RSIZEZSTEQR2 = MIN( 1, 2*N-2 ) ELSE RSIZEZSTEQR2 = 0 END IF * * Initialize the context of the single column distributed * matrix required by ZSTEQR2. This specific distribution * allows each process to do 1/pth of the work updating matrix * Q during ZSTEQR2 and achieve some parallelization to an * otherwise serial subroutine. * LDC = 0 IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB_A, MYPROWC, 0, NPROCS ) LDC = MAX( 1, NRC ) CALL DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, LDC, $ INFO ) END IF * * COMPLEX*16 work space for ZSTEQR2 * IF( WANTZ ) THEN SIZEZSTEQR2 = N*LDC ELSE SIZEZSTEQR2 = 0 END IF * * Set up pointers into the WORK array * INDTAU = 1 INDD = INDTAU + N INDE = INDD + N INDWORK = INDE + N INDWORK2 = INDWORK + N*LDC LLWORK = LWORK - INDWORK + 1 * * Set up pointers into the RWORK array * INDRE = 1 INDRD = INDRE + N INDRWORK = INDRD + N LLRWORK = LRWORK - INDRWORK + 1 * * Compute the total amount of space needed * LRWMIN = 2*N + RSIZEZSTEQR2 LWMIN = 3*N + MAX( SIZEPZHETRD, SIZEPZUNMTR, SIZEZSTEQR2 ) * END IF IF( INFO.EQ.0 ) THEN IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN INFO = -16 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 3 IF( WANTZ ) THEN CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, $ JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 3, IDUM1, $ IDUM2, INFO ) END IF WORK( 1 ) = DCMPLX( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PZHEEV', -INFO ) IF( WANTZ ) $ CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN ELSE IF( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) THEN IF( WANTZ ) $ CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN END IF * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PZLANHE( 'M', UPLO, N, A, IA, JA, DESCA, $ RWORK( INDRWORK ) ) * * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PZLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * CALL PZHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDRD ), $ RWORK( INDRE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * DO 10 I = 1, N CALL PZELGET( 'A', ' ', WORK( INDD+I-1 ), A, I+IA-1, I+JA-1, $ DESCA ) RWORK( INDRD+I-1 ) = DBLE( WORK( INDD+I-1 ) ) 10 CONTINUE IF( LSAME( UPLO, 'U' ) ) THEN DO 20 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDE+I-1 ), A, I+IA-1, I+JA, $ DESCA ) RWORK( INDRE+I-1 ) = DBLE( WORK( INDE+I-1 ) ) 20 CONTINUE ELSE DO 30 I = 1, N - 1 CALL PZELGET( 'A', ' ', WORK( INDE+I-1 ), A, I+IA, I+JA-1, $ DESCA ) RWORK( INDRE+I-1 ) = DBLE( WORK( INDE+I-1 ) ) 30 CONTINUE END IF * IF( WANTZ ) THEN * CALL PZLASET( 'Full', N, N, CZERO, CONE, WORK( INDWORK ), 1, 1, $ DESCQR ) * * ZSTEQR2 is a modified version of LAPACK's CSTEQR. The * modifications allow each process to perform partial updates * to matrix Q. * CALL ZSTEQR2( 'I', N, RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDWORK ), LDC, NRC, RWORK( INDRWORK ), $ INFO ) * CALL PZGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, $ DESCZ, CONTEXTC ) * CALL PZUNMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) * ELSE * CALL ZSTEQR2( 'N', N, RWORK( INDRD ), RWORK( INDRE ), $ WORK( INDWORK ), 1, 1, RWORK( INDRWORK ), INFO ) END IF * * Copy eigenvalues from workspace to output array * CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE.EQ.1 ) THEN CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * WORK( 1 ) = DBLE( LWMIN ) * * Free up resources * IF( WANTZ ) THEN CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF( N.LE.ITHVAL ) THEN J = N K = 1 ELSE J = N / ITHVAL K = ITHVAL END IF * LRMIN = INT( RWORK( 1 ) ) INDTAU = 0 INDE = INDTAU + J DO 40 I = 1, J RWORK( I+INDTAU ) = W( ( I-1 )*K+1 ) RWORK( I+INDE ) = W( ( I-1 )*K+1 ) 40 CONTINUE * CALL DGAMN2D( DESCA( CTXT_ ), 'All', ' ', J, 1, RWORK( 1+INDTAU ), $ J, 1, 1, -1, -1, 0 ) CALL DGAMX2D( DESCA( CTXT_ ), 'All', ' ', J, 1, RWORK( 1+INDE ), $ J, 1, 1, -1, -1, 0 ) * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( RWORK( I+INDTAU )-RWORK( I+INDE ).NE. $ ZERO ) ) THEN INFO = N + 1 END IF 50 CONTINUE RWORK( 1 ) = LRMIN * RETURN * * End of PZHEEV * END scalapack-1.8.0/SRC/pslared2d.f0000640000175000017500000001506510367447133016013 0ustar muammarmuammar SUBROUTINE PSLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 12, 2005 * * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. * .. Array Arguments .. INTEGER DESC( * ) REAL BYALL( * ), BYROW( * ), WORK( LWORK ) * .. * * Purpose * ======= * * PSLARED2D redistributes a 1D array * * It assumes that the input array, BYROW, is distributed across * columns and that all process rows contain the same copy of * BYROW. The output array, BYALL, will be identical on all processes * and will contain the entire array. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * NP = Number of local rows in BYROW() * * N (global input) INTEGER * The number of elements to be redistributed. N >= 0. * * IA (global input) INTEGER * IA must be equal to 1 * * JA (global input) INTEGER * JA must be equal to 1 * * DESC (global/local input) INTEGER Array of dimension DLEN_ * A 2D array descriptor, which describes BYROW * * BYROW (local input) distributed block cyclic REAL array * global dimension (N), local dimension (NP) * BYROW is distributed across the process columns * All process rows are assumed to contain the same value * * BYALL (global output) REAL global dimension( N ) * local dimension (N) * BYALL is exactly duplicated on all processes * It contains the same values as BYROW, but it is replicated * across all processes rather than being distributed * * BYALL(i) = BYROW( NUMROC(i,DESC( MB_ ),MYCOL,0,NPCOL ) on the procs * whose MYCOL == mod((i-1)/DESC( MB_ ),NPCOL) * * WORK (local workspace) REAL dimension (LWORK) * Used to hold the buffers sent from one process to another * * LWORK (local input) INTEGER size of WORK array * LWORK >= NUMROC(N, DESC( MB_ ), 0, 0, NPROW) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER ALLI, BUFLEN, I, II, MB, MYCOL, MYROW, NPCOL, $ NPROW, PROW * .. * .. External Functions .. INTEGER NUMROC EXTERNAL NUMROC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D * .. * .. Intrinsic Functions .. INTRINSIC MIN * .. * .. Executable Statements .. * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) MB = DESC( MB_ ) * DO 30 PROW = 0, NPROW - 1 BUFLEN = NUMROC( N, MB, PROW, 0, NPROW ) IF( MYROW.EQ.PROW ) THEN CALL SCOPY( BUFLEN, BYROW, 1, WORK, 1 ) CALL SGEBS2D( DESC( CTXT_ ), 'C', ' ', BUFLEN, 1, WORK, $ BUFLEN ) ELSE CALL SGEBR2D( DESC( CTXT_ ), 'C', ' ', BUFLEN, 1, WORK, $ BUFLEN, PROW, MYCOL ) END IF * ALLI = PROW*MB DO 20 II = 1, BUFLEN, MB DO 10 I = 1, MIN( MB, BUFLEN-II+1 ) BYALL( ALLI+I ) = WORK( II-1+I ) 10 CONTINUE ALLI = ALLI + MB*NPROW 20 CONTINUE 30 CONTINUE * RETURN * * End of PSLARED2D * END scalapack-1.8.0/SRC/pslarf.f0000640000175000017500000006767710363532303015430 0ustar muammarmuammar SUBROUTINE PSLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) REAL C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PSLARF applies a real elementary reflector Q (or Q**T) to a real * M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from * either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a real scalar and v is a real vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q, Q = Q**T. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) REAL pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) REAL, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) REAL pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) REAL array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST REAL TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBSTRNV, $ SCOPY, SGEBR2D, SGEBS2D, SGEMV, $ SGER, SGERV2D, SGESD2D, SGSUM2D, $ SLASET * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL SGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL SGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL SGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL SLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL SGER( MP, NQ, -TAULOC, V( IOFFV ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL SCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL SGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL SGER( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL SGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL SGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL SCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL SGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MP+1 CALL SGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN IF( IOFFC.GT.0 ) $ CALL SGEMV( 'Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( IOFFC.GT.0 ) $ CALL SGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL SGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL SLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFV.GT.0 .AND. IOFFC.GT.0 ) $ CALL SGER( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL SCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL SGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL SGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 IF( IOFFV.GT.0 ) $ CALL SCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL SGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQ+1 CALL SGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL SGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * IF( IOFFC.GT.0 ) $ CALL SGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL SGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL SLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL SGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PSLARF * END scalapack-1.8.0/SRC/pslahqr.f0000640000175000017500000026567410602576752015624 0ustar muammarmuammar SUBROUTINE PSLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, $ ILWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.3) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * 1.7.3: March 22, 2006 * modification suggested by Mark Fahey and Greg Henry * 1.7.1: January 30, 2006 * 1.7.0: December 31, 1998 * * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ), IWORK( * ) REAL A( * ), WI( * ), WORK( * ), WR( * ), Z( * ) * .. * * Purpose * ======= * * PSLAHQR is an auxiliary routine used to find the Schur decomposition * and or eigenvalues of a matrix already in Hessenberg form from * cols ILO to IHI. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * WANTT (global input) LOGICAL * = .TRUE. : the full Schur form T is required; * = .FALSE.: only eigenvalues are required. * * WANTZ (global input) LOGICAL * = .TRUE. : the matrix of Schur vectors Z is required; * = .FALSE.: Schur vectors are not required. * * N (global input) INTEGER * The order of the Hessenberg matrix A (and Z if WANTZ). * N >= 0. * * ILO (global input) INTEGER * IHI (global input) INTEGER * It is assumed that A is already upper quasi-triangular in * rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless * ILO = 1). PSLAHQR works primarily with the Hessenberg * submatrix in rows and columns ILO to IHI, but applies * transformations to all of H if WANTT is .TRUE.. * 1 <= ILO <= max(1,IHI); IHI <= N. * * A (global input/output) REAL array, dimension * (DESCA(LLD_),*) * On entry, the upper Hessenberg matrix A. * On exit, if WANTT is .TRUE., A is upper quasi-triangular in * rows and columns ILO:IHI, with any 2-by-2 or larger diagonal * blocks not yet in standard form. If WANTT is .FALSE., the * contents of A are unspecified on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * WR (global replicated output) REAL array, * dimension (N) * WI (global replicated output) REAL array, * dimension (N) * The real and imaginary parts, respectively, of the computed * eigenvalues ILO to IHI are stored in the corresponding * elements of WR and WI. If two eigenvalues are computed as a * complex conjugate pair, they are stored in consecutive * elements of WR and WI, say the i-th and (i+1)th, with * WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the * eigenvalues are stored in the same order as on the diagonal * of the Schur form returned in A. A may be returned with * larger diagonal blocks until the next release. * * ILOZ (global input) INTEGER * IHIZ (global input) INTEGER * Specify the rows of Z to which transformations must be * applied if WANTZ is .TRUE.. * 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. * * Z (global input/output) REAL array. * If WANTZ is .TRUE., on entry Z must contain the current * matrix Z of transformations accumulated by PDHSEQR, and on * exit Z has been updated; transformations are applied only to * the submatrix Z(ILOZ:IHIZ,ILO:IHI). * If WANTZ is .FALSE., Z is not referenced. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * * WORK (local output) REAL array of size LWORK * * LWORK (local input) INTEGER * WORK(LWORK) is a local array and LWORK is assumed big enough * so that LWORK >= 3*N + * MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N), * 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) ) * * IWORK (global and local input) INTEGER array of size ILWORK * * ILWORK (local input) INTEGER * This holds the some of the IBLK integer arrays. This is held * as a place holder for the next release. * * INFO (global output) INTEGER * < 0: parameter number -INFO incorrect or inconsistent * = 0: successful exit * > 0: PSLAHQR failed to compute all the eigenvalues ILO to IHI * in a total of 30*(IHI-ILO+1) iterations; if INFO = i, * elements i+1:ihi of WR and WI contain those eigenvalues * which have been successfully computed. * * Logic: * This algorithm is very similar to _LAHQR. Unlike _LAHQR, * instead of sending one double shift through the largest * unreduced submatrix, this algorithm sends multiple double shifts * and spaces them apart so that there can be parallelism across * several processor row/columns. Another critical difference is * that this algorithm aggregrates multiple transforms together in * order to apply them in a block fashion. * * Important Local Variables: * IBLK = The maximum number of bulges that can be computed. * Currently fixed. Future releases this won't be fixed. * HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) * ROTN = The number of transforms to block together * NBULGE = The number of bulges that will be attempted on the * current submatrix. * IBULGE = The current number of bulges started. * K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). * * Subroutines: * This routine calls: * PSLACONSB -> To determine where to start each iteration * PSLAWIL -> Given the shift, get the transformation * SLASORTE -> Pair up eigenvalues so that reals are paired. * PSLACP3 -> Parallel array to local replicated array copy & * back. * SLAREF -> Row/column reflector applier. Core routine * here. * PSLASMSUB -> Finds negligible subdiagonal elements. * * Current Notes and/or Restrictions: * 1.) This code requires the distributed block size to be square * and at least six (6); unlike simpler codes like LU, this * algorithm is extremely sensitive to block size. Unwise * choices of too small a block size can lead to bad * performance. * 2.) This code requires A and Z to be distributed identically * and have identical contxts. * 3.) This release currently does not have a routine for * resolving the Schur blocks into regular 2x2 form after * this code is completed. Because of this, a significant * performance impact is required while the deflation is done * by sometimes a single column of processors. * 4.) This code does not currently block the initial transforms * so that none of the rows or columns for any bulge are * completed until all are started. To offset pipeline * start-up it is recommended that at least 2*LCM(NPROW,NPCOL) * bulges are used (if possible) * 5.) The maximum number of bulges currently supported is fixed at * 32. In future versions this will be limited only by the * incoming WORK array. * 6.) The matrix A must be in upper Hessenberg form. If elements * below the subdiagonal are nonzero, the resulting transforms * may be nonsimilar. This is also true with the LAPACK * routine. * 7.) For this release, it is assumed RSRC_=CSRC_=0 * 8.) Currently, all the eigenvalues are distributed to all the * nodes. Future releases will probably distribute the * eigenvalues by the column partitioning. * 9.) The internals of this routine are subject to change. * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ZERO, ONE, HALF PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 ) REAL CONST PARAMETER ( CONST = 1.50E+0 ) INTEGER IBLK PARAMETER ( IBLK = 32 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE, $ ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II, $ IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART, $ ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP, $ ITERMAX, ITMP1, ITMP2, ITN, ITS, J, JAFIRST, $ JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ, LEFT, $ LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1, LOCALI2, $ LOCALK, LOCALM, M, MODKM1, MYCOL, MYROW, $ NBULGE, NH, NODE, NPCOL, NPROW, NR, NUM, NZ, $ RIGHT, ROTN, UP, VECSIDX REAL AVE, DISC, H00, H10, H11, H12, H21, H22, H33, $ H43H34, H44, OVFL, S, SMLNUM, SUM, T1, T1COPY, $ T2, T3, ULP, UNFL, V1SAVE, V2, V2SAVE, V3, $ V3SAVE * .. * .. Local Arrays .. INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ), $ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ), $ KP2ROW( IBLK ), KROW( IBLK ), LOCALK2( IBLK ) REAL S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ), $ VCOPY( 3 ) * .. * .. External Functions .. INTEGER ILCM, NUMROC REAL PSLAMCH EXTERNAL ILCM, NUMROC, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, IGAMN2D, INFOG1L, INFOG2L, $ PSLABAD, PSLACONSB, PSLACP3, PSLASMSUB, $ PSLAWIL, PXERBLA, SCOPY, SGEBR2D, SGEBS2D, $ SGERV2D, SGESD2D, SGSUM2D, SLAHQR, SLAREF, $ SLARFG, SLASORTE * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SIGN, SQRT * .. * .. Executable Statements .. * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) * ITERMAX = 0 IF( N.EQ.0 ) $ RETURN * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) IAFIRST = DESCA( RSRC_ ) JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) LCMRC = ILCM( NPROW, NPCOL ) * * Determine the number of columns we have so we can check workspace * LOCALK = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL ) JJ = N / HBL IF( JJ*HBL.LT.N ) $ JJ = JJ + 1 JJ = 7*JJ / LCMRC IF( LWORK.LT.3*N+MAX( 2*MAX( LDA, LDZ )+2*LOCALK, JJ ) ) THEN INFO = -15 END IF IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN INFO = -( 1300+CTXT_ ) END IF IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1300+NB_ ) END IF IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1300+MB_ ) END IF IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN INFO = -( 700+RSRC_ ) END IF IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN INFO = -( 1300+RSRC_ ) END IF IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN INFO = -4 END IF IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN INFO = -5 END IF IF( HBL.LT.5 ) THEN INFO = -( 700+MB_ ) END IF CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PSLAHQR', -INFO ) RETURN END IF * * Set work array indices * VECSIDX = 0 IDIA = 3*N ISUB = 3*N ISUP = 3*N IRBUF = 3*N ICBUF = 3*N * * Find a value for ROTN * ROTN = HBL / 3 ROTN = MAX( ROTN, HBL-2 ) ROTN = MIN( ROTN, 1 ) * IF( ILO.EQ.IHI ) THEN CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, II, JJ ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN WR( ILO ) = A( ( ICOL-1 )*LDA+IROW ) ELSE WR( ILO ) = ZERO END IF WI( ILO ) = ZERO RETURN END IF * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 * CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, 0, LILOZ, LIHIZ ) LIHIZ = NUMROC( IHIZ, HBL, MYROW, 0, NPROW ) * * Set machine-dependent constants for the stopping criterion. * If NORM(H) <= SQRT(OVFL), overflow should not occur. * UNFL = PSLAMCH( CONTXT, 'SAFE MINIMUM' ) OVFL = ONE / UNFL CALL PSLABAD( CONTXT, UNFL, OVFL ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) SMLNUM = UNFL*( NH / ULP ) * * I1 and I2 are the indices of the first row and last column of H * to which transformations must be applied. If eigenvalues only are * being computed, I1 and I2 are set inside the main loop. * IF( WANTT ) THEN I1 = 1 I2 = N END IF * * ITN is the total number of QR iterations allowed. * ITN = ITERMAX * * The main loop begins here. I is the loop index and decreases from * IHI to ILO in steps of our schur block size (<=2*IBLK). Each * iteration of the loop works with the active submatrix in rows * and columns L to I. Eigenvalues I+1 to IHI have already * converged. Either L = ILO or the global A(L,L-1) is negligible * so that the matrix splits. * I = IHI 10 CONTINUE L = ILO IF( I.LT.ILO ) $ GO TO 450 * * Perform QR iterations on rows and columns ILO to I until a * submatrix of order 1 or 2 splits off at the bottom because a * subdiagonal element has become negligible. * DO 420 ITS = 0, ITN * * Look for a single small subdiagonal element. * CALL PSLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ), $ LWORK-IRBUF ) L = K * IF( L.GT.ILO ) THEN * * H(L,L-1) is negligible * CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW, ICOL, ITMP1, ITMP2 ) IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN A( ( ICOL-1 )*LDA+IROW ) = ZERO END IF WORK( ISUB+L-1 ) = ZERO END IF * * Exit from loop if a submatrix of order 1 or 2 has split off. * M = L - 10 * IF ( L .GE. I - (2*IBLK-1) ) * IF ( L .GE. I - MAX(2*IBLK-1,HBL) ) IF( L.GE.I-1 ) $ GO TO 430 * * Now the active submatrix is in rows and columns L to I. If * eigenvalues only are being computed, only the active submatrix * need be transformed. * IF( .NOT.WANTT ) THEN I1 = L I2 = I END IF * * Copy submatrix of size 2*JBLK and prepare to do generalized * Wilkinson shift or an exceptional shift * JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 ) IF( JBLK.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * JBLK = JBLK - MOD( JBLK, LCMRC ) END IF JBLK = MIN( JBLK, 2*LCMRC ) JBLK = MAX( JBLK, 1 ) * CALL PSLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1, $ 0 ) IF( ITS.EQ.20 .OR. ITS.EQ.40 ) THEN * * Exceptional shift. * DO 20 II = 2*JBLK, 2, -1 S1( II, II ) = CONST*( ABS( S1( II, II ) )+ $ ABS( S1( II, II-1 ) ) ) S1( II, II-1 ) = ZERO S1( II-1, II ) = ZERO 20 CONTINUE S1( 1, 1 ) = CONST*ABS( S1( 1, 1 ) ) ELSE CALL SLAHQR( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1, $ 2*IBLK, WORK( IRBUF+1 ), WORK( ICBUF+1 ), 1, $ 2*JBLK, Z, LDZ, IERR ) * * Prepare to use Wilkinson's double shift * H44 = S1( 2*JBLK, 2*JBLK ) H33 = S1( 2*JBLK-1, 2*JBLK-1 ) H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 ) IF( ( JBLK.GT.1 ) .AND. ( ITS.GT.30 ) ) THEN S = S1( 2*JBLK-1, 2*JBLK-2 ) DISC = ( H33-H44 )*HALF DISC = DISC*DISC + H43H34 IF( DISC.GT.ZERO ) THEN * * Real roots: Use Wilkinson's shift twice * DISC = SQRT( DISC ) AVE = HALF*( H33+H44 ) IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN H33 = H33*H44 - H43H34 H44 = H33 / ( SIGN( DISC, AVE )+AVE ) ELSE H44 = SIGN( DISC, AVE ) + AVE END IF H33 = H44 H43H34 = ZERO END IF END IF END IF * * Look for two consecutive small subdiagonal elements: * PSLACONSB is the routine that does this. * CALL PSLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, $ WORK( IRBUF+1 ), LWORK-IRBUF ) * * Skip small submatrices * * IF ( M .GE. I - 5 ) * $ GO TO 80 * * Double-shift QR step * * NBULGE is the number of bulges that will be attempted * ISTOP = MIN( M+ROTN-MOD( M, ROTN ), I-2 ) ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) ) ISTOP = MIN( ISTOP, I2-2 ) ISTOP = MAX( ISTOP, M ) NBULGE = ( I-1-ISTOP ) / HBL * * Do not exceed maximum determined. * NBULGE = MIN( NBULGE, JBLK ) IF( NBULGE.GT.LCMRC ) THEN * * Make sure it's divisible by LCM (we want even workloads!) * NBULGE = NBULGE - MOD( NBULGE, LCMRC ) END IF NBULGE = MAX( NBULGE, 1 ) * IF( ( ITS.NE.20 ) .AND. ( ITS.NE.40 ) .AND. ( NBULGE.GT.1 ) ) $ THEN * * sort the eigenpairs so that they are in twos for double * shifts. only call if several need sorting * CALL SLASORTE( S1( 2*( JBLK-NBULGE )+1, $ 2*( JBLK-NBULGE )+1 ), 2*IBLK, 2*NBULGE, $ WORK( IRBUF+1 ), IERR ) END IF * * IBULGE is the number of bulges going so far * IBULGE = 1 * * "A" row defs : main row transforms from LOCALK to LOCALI2 * CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, ITMP1, LOCALK ) LOCALK = NUMROC( N, HBL, MYCOL, 0, NPCOL ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ICOL1, LOCALI2 ) LOCALI2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL ) * * "A" col defs : main col transforms from LOCALI1 to LOCALM * CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, LOCALI1, ICOL1 ) ICOL1 = NUMROC( N, HBL, MYROW, 0, NPROW ) CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, LOCALM, ICOL1 ) ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, 0, NPROW ) * * Which row & column will start the bulges * ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST * CALL INFOG1L( M, HBL, NPROW, MYROW, 0, II, ITMP2 ) ITMP2 = NUMROC( N, HBL, MYROW, 0, NPROW ) CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, JJ, ITMP2 ) ITMP2 = NUMROC( N, HBL, MYCOL, 0, NPCOL ) CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, ISTOP, KP2ROW( 1 ) ) KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, 0, NPROW ) CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ISTOP, KP2COL( 1 ) ) KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, 0, NPCOL ) * * Set all values for bulges. All bulges are stored in * intermediate steps as loops over KI. Their current "task" * over the global M to I-1 values is always K1(KI) to K2(KI). * However, because there are many bulges, K1(KI) & K2(KI) might * go past that range while later bulges (KI+1,KI+2,etc..) are * finishing up. * * Rules: * If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * N (global input) INTEGER * The number of rows and columns to be operated on i.e the * order of the distributed submatrix sub( A ). N >= 0. * * A (local input) COMPLEX*16 pointer into the local memory * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array * contains the local pieces of the distributed matrix the trace * is to be computed. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * ==================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER ICURCOL, ICURROW, II, IOFFA, J, JB, JJ, JN, $ LDA, LL, MYCOL, MYROW, NPCOL, NPROW COMPLEX*16 TRACE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG2L, ZGSUM2D * .. * .. External Functions .. INTEGER ICEIL EXTERNAL ICEIL * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) * TRACE = ZERO IF( N.EQ.0 ) THEN PZLATRA = TRACE RETURN END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ ICURROW, ICURCOL ) * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) JB = JN-JA+1 LDA = DESCA( LLD_ ) IOFFA = II + ( JJ - 1 ) * LDA * * Handle first diagonal block separately * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 10 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 10 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) * * Loop over the remaining block of columns * DO 30 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( JA+N-J, DESCA( NB_ ) ) * IF( MYROW.EQ.ICURROW .AND. MYCOL.EQ.ICURCOL ) THEN DO 20 LL = IOFFA, IOFFA + (JB-1)*(LDA+1), LDA+1 TRACE = TRACE + A( LL ) 20 CONTINUE END IF IF( MYROW.EQ.ICURROW ) $ IOFFA = IOFFA + JB IF( MYCOL.EQ.ICURCOL ) $ IOFFA = IOFFA + JB*LDA ICURROW = MOD( ICURROW+1, NPROW ) ICURCOL = MOD( ICURCOL+1, NPCOL ) 30 CONTINUE * CALL ZGSUM2D( DESCA( CTXT_ ), 'All', ' ', 1, 1, TRACE, 1, -1, $ MYCOL ) * PZLATRA = TRACE * RETURN * * End of PZLATRA * END scalapack-1.8.0/SRC/pdggrqf.f0000640000175000017500000003633110363532303015552 0ustar muammarmuammar SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * ) * .. * * Purpose * ======= * * PDGGRQF computes a generalized RQ factorization of * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1): * * sub( A ) = R*Q, sub( B ) = Z*T*Q, * * where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal * matrix, and R and T assume one of the forms: * * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, * N-M M ( R21 ) N * N * * where R12 or R21 is upper triangular, and * * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, * ( 0 ) P-N P N-P * N * * where T11 is upper triangular. * * In particular, if sub( B ) is square and nonsingular, the GRQ * factorization of sub( A ) and sub( B ) implicitly gives the RQ * factorization of sub( A )*inv( sub( B ) ): * * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z' * * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ), * and Z' denotes the transpose of matrix Z. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( A ). M >= 0. * * P (global input) INTEGER * The number of rows to be operated on i.e the number of * rows of the distributed submatrix sub( B ). P >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrices sub( A ) and sub( B ). * N >= 0. * * A (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, if M <= N, the * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the * M by M upper triangular matrix R; if M >= N, the elements on * and above the (M-N)-th subdiagonal contain the M by N upper * trapezoidal matrix R; the remaining elements, with the array * TAUA, represent the orthogonal matrix Q as a product of * elementary reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAUA (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1) * This array contains the scalar factors of the elementary * reflectors which represent the orthogonal unitary matrix Q. * TAUA is tied to the distributed matrix A (see Further * Details). * * B (local input/local output) DOUBLE PRECISION pointer into the * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). * On entry, the local pieces of the P-by-N distributed matrix * sub( B ) which is to be factored. On exit, the elements on * and above the diagonal of sub( B ) contain the min(P,N) by N * upper trapezoidal matrix T (T is upper triangular if P >= N); * the elements below the diagonal, with the array TAUB, * represent the orthogonal matrix Z as a product of elementary * reflectors (see Further Details). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * TAUB (local output) DOUBLE PRECISION array, dimension * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors * TAUB of the elementary reflectors which represent the * orthogonal matrix Z. TAUB is tied to the distributed matrix * B (see Further Details). * * WORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ), * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) + * MB_A * MB_A, * NB_B * ( PpB0 + NqB0 + NB_B ) ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ), * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ), * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ), * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ), * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - taua * v * v' * * where taua is a real scalar, and v is a real vector with * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in * A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1). * To form Q explicitly, use ScaLAPACK subroutine PDORGRQ. * To use Q to update another matrix, use ScaLAPACK subroutine PDORMRQ. * * The matrix Z is represented as a product of elementary reflectors * * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n). * * Each H(i) has the form * * H(i) = I - taub * v * v' * * where taub is a real scalar, and v is a real vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1). * To form Z explicitly, use ScaLAPACK subroutine PDORGQR. * To use Z to update another matrix, use ScaLAPACK subroutine PDORMQR. * * Alignment requirements * ====================== * * The distributed submatrices sub( A ) and sub( B ) must verify some * alignment properties, namely the following expression should be true: * * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. Local Scalars .. LOGICAL LQUERY INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB, $ ICTXT, IROFFA, IROFFB, LWMIN, MPA0, MYCOL, $ MYROW, NPCOL, NPROW, NQA0, NQB0, PPB0 * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PDGEQRF, $ PDGERQF, PDORMRQ, PXERBLA * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -707 ELSE CALL CHK1MAT( M, 1, N, 3, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( P, 2, N, 3, IB, JB, DESCB, 12, INFO ) IF( INFO.EQ.0 ) THEN IROFFA = MOD( IA-1, DESCA( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) PPB0 = NUMROC( P+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NQB0 = NUMROC( N+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) LWMIN = MAX( DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ), $ MAX( MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2, $ ( PPB0 + NQB0 ) * DESCA( MB_ ) ) + $ DESCA( MB_ ) * DESCA( MB_ ), $ DESCB( NB_ ) * ( PPB0 + NQB0 + DESCB( NB_ ) ) ) ) * WORK( 1 ) = DBLE( LWMIN ) LQUERY = ( LWORK.EQ.-1 ) IF( IACOL.NE.IBCOL .OR. ICOFFA.NE.ICOFFB ) THEN INFO = -11 ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN INFO = -1204 ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -1207 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -15 END IF END IF IF( LWORK.EQ.-1 ) THEN IDUM1( 1 ) = -1 ELSE IDUM1( 1 ) = 1 END IF IDUM2( 1 ) = 15 CALL PCHK2MAT( M, 1, N, 3, IA, JA, DESCA, 7, P, 2, N, 3, IB, $ JB, DESCB, 12, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGRQF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q * CALL PDGERQF( M, N, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO ) LWMIN = INT( WORK( 1 ) ) * * Update sub( B ) := sub( B )*Q' * CALL PDORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), A, $ MAX( IA, IA+M-N ), JA, DESCA, TAUA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) LWMIN = MAX( LWMIN, INT( WORK( 1 ) ) ) * * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T * CALL PDGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * RETURN * * End of PDGGRQF * END scalapack-1.8.0/SRC/pcposvx.f0000640000175000017500000006560710363532303015632 0ustar muammarmuammar SUBROUTINE PCPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, $ WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LRWORK, $ LWORK, N, NRHS REAL RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), DESCX( * ) REAL BERR( * ), FERR( * ), SC( * ), $ SR( * ), RWORK( * ) COMPLEX A( * ), AF( * ), $ B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PCPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to * compute the solution to a complex system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. In the following comments Y denotes Y(IY:IY+M-1,JY:JY+K-1) * a M-by-K matrix where Y can be A, AF, B and X. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * diag(SR) * A * diag(SC) * inv(diag(SC)) * X = diag(SR) * B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(SR)*A*diag(SC) and B by diag(SR)*B. * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as * A = U**T* U, if UPLO = 'U', or * A = L * L**T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If equilibration was used, the matrix X is premultiplied by * diag(SR) so that it solves the original system before * equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix A is * supplied on entry, and if not, whether the matrix A should be * equilibrated before it is factored. * = 'F': On entry, AF contains the factored form of A. * If EQUED = 'Y', the matrix A has been equilibrated * with scaling factors given by S. A and AF will not * be modified. * = 'N': The matrix A will be copied to AF and factored. * = 'E': The matrix A will be equilibrated if necessary, then * copied to AF and factored. * * UPLO (global input) CHARACTER * = 'U': Upper triangle of A is stored; * = 'L': Lower triangle of A is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrices B and X. NRHS >= 0. * * A (local input/local output) COMPLEX pointer into * the local memory to an array of local dimension * ( LLD_A, LOCc(JA+N-1) ). * On entry, the Hermitian matrix A, except if FACT = 'F' and * EQUED = 'Y', then A must contain the equilibrated matrix * diag(SR)*A*diag(SC). If UPLO = 'U', the leading * N-by-N upper triangular part of A contains the upper * triangular part of the matrix A, and the strictly lower * triangular part of A is not referenced. If UPLO = 'L', the * leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper * triangular part of A is not referenced. A is not modified if * FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. * * On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by * diag(SR)*A*diag(SC). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) COMPLEX pointer * into the local memory to an array of local dimension * ( LLD_AF, LOCc(JA+N-1)). * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(SR)*A*diag(SC). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky * factorization A = U**T*U or A = L*L**T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * EQUED (global input/global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'Y': Equilibration was done, i.e., A has been replaced by * diag(SR) * A * diag(SC). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * SR (local input/local output) COMPLEX array, * dimension (LLD_A) * The scale factors for A distributed across process rows; * not accessed if EQUED = 'N'. SR is an input variable if * FACT = 'F'; otherwise, SR is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SR must be * positive. * * SC (local input/local output) COMPLEX array, * dimension (LOC(N_A)) * The scale factors for A distributed across * process columns; not accessed if EQUED = 'N'. SC is an input * variable if FACT = 'F'; otherwise, SC is an output variable. * If FACT = 'F' and EQUED = 'Y', each element of SC must be * positive. * * B (local input/local output) COMPLEX pointer into * the local memory to an array of local dimension * ( LLD_B, LOCc(JB+NRHS-1) ). * On entry, the N-by-NRHS right-hand side matrix B. * On exit, if EQUED = 'N', B is not modified; if TRANS = 'N' * and EQUED = 'R' or 'B', B is overwritten by diag(R)*B; if * TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is overwritten * by diag(C)*B. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) COMPLEX pointer into * the local memory to an array of local dimension * ( LLD_X, LOCc(JX+NRHS-1) ). * If INFO = 0, the N-by-NRHS solution matrix X to the original * system of equations. Note that A and B are modified on exit * if EQUED .ne. 'N', and the solution to the equilibrated * system is inv(diag(SC))*X if TRANS = 'N' and EQUED = 'C' or * 'B', or inv(diag(SR))*X if TRANS = 'T' or 'C' and EQUED = 'R' * or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) REAL * The estimate of the reciprocal condition number of the matrix * A after equilibration (if done). If RCOND is less than the * machine precision (in particular, if RCOND = 0), the matrix * is singular to working precision. This condition is * indicated by a return code of INFO > 0, and the solution and * error bounds are not computed. * * FERR (local output) REAL array, dimension (LOC(N_B)) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution, FERR(j) bounds the magnitude * of the largest entry in (X(j) - XTRUE) divided by * the magnitude of the largest entry in X(j). The quality of * the error bound depends on the quality of the estimate of * norm(inv(A)) computed in the code; if the estimate of * norm(inv(A)) is accurate, the error bound is guaranteed. * * BERR (local output) REAL array, dimension (LOC(N_B)) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A or B that makes X(j) an exact solution). * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PCPOCON( LWORK ), PCPORFS( LWORK ) ) * + LOCr( N_A ). * LWORK = 3*DESCA( LLD_ ) * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) REAL array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK = 2*LOCc(N_A). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: if INFO = i, the leading minor of order i of A * is not positive definite, so the factorization * could not be completed, and the solution and error * bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the matrix * is singular to working precision, and the solution * and error bounds have not been computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) * .. * .. Local Scalars .. LOGICAL EQUIL, LQUERY, NOFACT, RCEQU INTEGER I, IACOL, IAROW, IAFROW, IBROW, IBCOL, ICOFF, $ ICOFFA, ICTXT, IDUMM, IIA, IIB, IIX, INFEQU, $ IROFF, IROFFA, IROFFAF, IROFFB, IROFFX, IXCOL, $ IXROW, J, JJA, JJB, JJX, LDB, LDX, LRWMIN, $ LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, NRHSQ, $ NQ REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM * .. * .. Local Arrays .. INTEGER IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, INFOG2L, $ PCPOCON, PCPOEQU, $ PCPORFS, PCPOTRF, PCPOTRS, $ PCLACPY, PCLAQSY, PXERBLA, $ SGAMN2D, SGAMX2D * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC REAL PCLANHE, PSLAMCH EXTERNAL INDXG2P, LSAME, NUMROC, PCLANHE, PSLAMCH * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA LWMIN = 3*DESCA( LLD_ ) LRWMIN = MAX( 2*NQ, NP ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' RCEQU = .FALSE. ELSE RCEQU = LSAME( EQUED, 'Y' ) SMLNUM = PSLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. $ .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( RCEQU ) THEN * SMIN = BIGNUM SMAX = ZERO DO 10 J = IIA, IIA + NP - 1 SMIN = MIN( SMIN, SR( J ) ) SMAX = MAX( SMAX, SR( J ) ) 10 CONTINUE CALL SGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, SMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL SGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, SMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( SMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) ELSE SCOND = ONE END IF END IF END IF END IF * WORK( 1 ) = REAL( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -28 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -30 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( UPLO ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 13 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 28 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 5, IDUM1, IDUM2, $ INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 28 IF( LRWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 30 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, NRHS, $ 4, IB, JB, DESCB, 19, 4, IDUM1, IDUM2, $ INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOSVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PCPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFEQU ) * IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PCLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) RCEQU = LSAME( EQUED, 'Y' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) LDB = DESCB( LLD_ ) IROFF = MOD( IB-1, DESCB( MB_ ) ) ICOFF = MOD( JB-1, DESCB( NB_ ) ) NP = NUMROC( N+IROFF, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 30 J = JJB, JJB+NRHSQ-1 DO 20 I = IIB, IIB+NP-1 B( I + ( J-1 )*LDB ) = SR( I )*B( I + ( J-1 )*LDB ) 20 CONTINUE 30 CONTINUE END IF * IF( NOFACT .OR. EQUIL ) THEN * * Compute the Cholesky factorization A = U'*U or A = L*L'. * CALL PCLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PCPOTRF( UPLO, N, AF, IAF, JAF, DESCAF, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * ANORM = PCLANHE( '1', UPLO, N, A, IA, JA, DESCA, RWORK ) * * Compute the reciprocal of the condition number of A. * CALL PCPOCON( UPLO, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PSLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PCLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PCPOTRS( UPLO, N, NRHS, AF, IAF, JAF, DESCAF, X, IX, JX, $ DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PCPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) LDX = DESCX( LLD_ ) IROFF = MOD( IX-1, DESCX( MB_ ) ) ICOFF = MOD( JX-1, DESCX( NB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) NP = NP-IROFF IF( MYCOL.EQ.IBCOL ) NRHSQ = NRHSQ-ICOFF * IF( RCEQU ) THEN DO 50 J = JJX, JJX+NRHSQ-1 DO 40 I = IIX, IIX+NP-1 X( I + ( J-1 )*LDX ) = SR( I )*X( I + ( J-1 )*LDX ) 40 CONTINUE 50 CONTINUE DO 60 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / SCOND 60 CONTINUE END IF * WORK( 1 ) = REAL( LWMIN ) RWORK( 1 ) = REAL( LRWMIN ) RETURN * * End of PCPOSVX * END scalapack-1.8.0/SRC/pdlarfg.f0000640000175000017500000002330210363532303015531 0ustar muammarmuammar SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N DOUBLE PRECISION ALPHA * .. * .. Array Arguments .. INTEGER DESCX( * ) DOUBLE PRECISION TAU( * ), X( * ) * .. * * Purpose * ======= * * PDLARFG generates a real elementary reflector H of order n, such * that * * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I. * ( x ) ( 0 ) * * where alpha is a scalar, and sub( X ) is an (N-1)-element real * distributed vector X(IX:IX+N-2,JX) if INCX = 1 and X(IX,JX:JX+N-2) if * INCX = DESCX(M_). H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v' ) , * ( v ) * * where tau is a real scalar and v is a real (N-1)-element * vector. * * If the elements of sub( X ) are all zero, then tau = 0 and H is * taken to be the unit matrix. * * Otherwise 1 <= tau <= 2. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Arguments * ========= * * N (global input) INTEGER * The global order of the elementary reflector. N >= 0. * * ALPHA (local output) DOUBLE PRECISION * On exit, alpha is computed in the process scope having the * vector sub( X ). * * IAX (global input) INTEGER * The global row index in X of X(IAX,JAX). * * JAX (global input) INTEGER * The global column index in X of X(IAX,JAX). * * X (local input/local output) DOUBLE PRECISION, pointer into the * local memory to an array of dimension (LLD_X,*). This array * contains the local pieces of the distributed vector sub( X ). * Before entry, the incremented array sub( X ) must contain * the vector x. On exit, it is overwritten with the vector v. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * INCX (global input) INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * INCX must not be zero. * * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JX) * if INCX = 1, and LOCr(IX) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix X. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX, $ KNT, MYCOL, MYROW, NPCOL, NPROW DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, PDSCAL, $ INFOG2L, PDNRM2 * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY2 EXTERNAL DLAMCH, DLAPY2 * .. * .. Intrinsic Functions .. INTRINSIC ABS, SIGN * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( INCX.EQ.DESCX( M_ ) ) THEN * * sub( X ) is distributed across a process row. * CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYROW.NE.IXROW ) $ RETURN * * Broadcast X(IAX,JAX) across the process row. * IF( MYCOL.EQ.IXCOL ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1, $ MYROW, IXCOL ) END IF * INDXTAU = IIAX * ELSE * * sub( X ) is distributed across a process column. * CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * IF( MYCOL.NE.IXCOL ) $ RETURN * * Broadcast X(IAX,JAX) across the process column. * IF( MYROW.EQ.IXROW ) THEN J = IIAX+(JJAX-1)*DESCX( LLD_ ) CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, X( J ), 1 ) ALPHA = X( J ) ELSE CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, ALPHA, 1, $ IXROW, MYCOL ) END IF * INDXTAU = JJAX * END IF * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO RETURN END IF * CALL PDNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) * IF( XNORM.EQ.ZERO ) THEN * * H = I * TAU( INDXTAU ) = ZERO * ELSE * * General case * BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) SAFMIN = DLAMCH( 'S' ) RSAFMN = ONE / SAFMIN IF( ABS( BETA ).LT.SAFMIN ) THEN * * XNORM, BETA may be inaccurate; scale X and recompute them * KNT = 0 10 CONTINUE KNT = KNT + 1 CALL PDSCAL( N-1, RSAFMN, X, IX, JX, DESCX, INCX ) BETA = BETA*RSAFMN ALPHA = ALPHA*RSAFMN IF( ABS( BETA ).LT.SAFMIN ) $ GO TO 10 * * New BETA is at most 1, at least SAFMIN * CALL PDNRM2( N-1, XNORM, X, IX, JX, DESCX, INCX ) BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) TAU( INDXTAU ) = ( BETA-ALPHA ) / BETA CALL PDSCAL( N-1, ONE/(ALPHA-BETA), X, IX, JX, DESCX, INCX ) * * If ALPHA is subnormal, it may lose relative accuracy * ALPHA = BETA DO 20 J = 1, KNT ALPHA = ALPHA*SAFMIN 20 CONTINUE ELSE TAU( INDXTAU ) = ( BETA-ALPHA ) / BETA CALL PDSCAL( N-1, ONE/(ALPHA-BETA), X, IX, JX, DESCX, INCX ) ALPHA = BETA END IF END IF * RETURN * * End of PDLARFG * END scalapack-1.8.0/SRC/pcpotrs.f0000640000175000017500000002401710363532303015610 0ustar muammarmuammar SUBROUTINE PCPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX A( * ), B( * ) * .. * * Purpose * ======= * * PCPOTRS solves a system of linear equations * * sub( A ) * X = sub( B ) * A(IA:IA+N-1,JA:JA+N-1)*X = B(IB:IB+N-1,JB:JB+NRHS-1) * * where sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1) and is a N-by-N * hermitian positive definite distributed matrix using the Cholesky * factorization sub( A ) = U**H*U or L*L**H computed by PCPOTRF. * sub( B ) denotes the distributed matrix B(IB:IB+N-1,JB:JB+NRHS-1). * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * This routine requires square block decomposition ( MB_A = NB_A ). * * Arguments * ========= * * UPLO (global input) CHARACTER * = 'U': Upper triangle of sub( A ) is stored; * = 'L': Lower triangle of sub( A ) is stored. * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix sub( A ). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix sub( B ). NRHS >= 0. * * A (local input) COMPLEX pointer into local memory to * an array of dimension (LLD_A, LOCc(JA+N-1)). On entry, this * array contains the factors L or U from the Cholesky facto- * rization sub( A ) = L*L**H or U**H*U, as computed by PCPOTRF. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * B (local input/local output) COMPLEX pointer into the * local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1)). On entry, this array contains the * the local pieces of the right hand sides sub( B ). * On exit, this array contains the local pieces of the solution * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL UPPER INTEGER IAROW, IBROW, ICTXT, IROFFA, IROFFB, ICOFFA, $ MYCOL, MYROW, NPCOL, NPROW * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PCTRSM, $ PXERBLA * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P EXTERNAL INDXG2P, LSAME * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters. * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 7, INFO ) CALL CHK1MAT( N, 2, NRHS, 3, IB, JB, DESCB, 11, INFO ) UPPER = LSAME( UPLO, 'U' ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IF ( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( ICOFFA.NE.0 ) THEN INFO = -6 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(700+NB_) ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN INFO = -9 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(1100+NB_) END IF END IF IF( UPPER ) THEN IDUM1( 1 ) = ICHAR( 'U' ) ELSE IDUM1( 1 ) = ICHAR( 'L' ) END IF IDUM2( 1 ) = 1 CALL PCHK2MAT( N, 2, N, 2, IA, JA, DESCA, 7, N, 2, NRHS, $ 3, IB, JB, DESCB, 11, 1, IDUM1, IDUM2, INFO ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCPOTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) $ RETURN * IF( UPPER ) THEN * * Solve sub( A ) * X = sub( B ) where sub( A ) = U'*U. * * Solve U'*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Upper', 'Conjugate transpose', $ 'Non-unit', N, NRHS, ONE, A, IA, JA, DESCA, B, IB, $ JB, DESCB ) * * Solve U*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) ELSE * * Solve sub( A ) *X = sub( B ) where sub( A ) = L*L'. * * Solve L*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, $ NRHS, ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * Solve L'*X = sub( B ), overwriting sub( B ) with X. * CALL PCTRSM( 'Left', 'Lower', 'Conjugate transpose', $ 'Non-unit', N, NRHS, ONE, A, IA, JA, DESCA, B, IB, $ JB, DESCB ) END IF * RETURN * * End of PCPOTRS * END scalapack-1.8.0/SRC/pzgesvx.f0000640000175000017500000010500410363532303015620 0ustar muammarmuammar SUBROUTINE PZGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, IPIV, EQUED, R, C, B, IB, $ JB, DESCB, X, IX, JX, DESCX, RCOND, FERR, $ BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * December 31, 1998 * * .. Scalar Arguments .. CHARACTER EQUED, FACT, TRANS INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LRWORK, $ LWORK, N, NRHS DOUBLE PRECISION RCOND * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCAF( * ), DESCB( * ), $ DESCX( * ), IPIV( * ) DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), $ RWORK( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ), X( * ) * .. * * Purpose * ======= * * PZGESVX uses the LU factorization to compute the solution to a * complex system of linear equations * * A(IA:IA+N-1,JA:JA+N-1) * X = B(IB:IB+N-1,JB:JB+NRHS-1), * * where A(IA:IA+N-1,JA:JA+N-1) is an N-by-N matrix and X and * B(IB:IB+N-1,JB:JB+NRHS-1) are N-by-NRHS matrices. * * Error bounds on the solution and a condition estimate are also * provided. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Description * =========== * * In the following description, A denotes A(IA:IA+N-1,JA:JA+N-1), * B denotes B(IB:IB+N-1,JB:JB+NRHS-1) and X denotes * X(IX:IX+N-1,JX:JX+NRHS-1). * * The following steps are performed: * * 1. If FACT = 'E', real scaling factors are computed to equilibrate * the system: * TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') * or diag(C)*B (if TRANS = 'T' or 'C'). * * 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if FACT = 'E') as * A = P * L * U, * where P is a permutation matrix, L is a unit lower triangular * matrix, and U is upper triangular. * * 3. The factored form of A is used to estimate the condition number * of the matrix A. If the reciprocal of the condition number is * less than machine precision, steps 4-6 are skipped. * * 4. The system of equations is solved for X using the factored form * of A. * * 5. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 6. If FACT = 'E' and equilibration was used, the matrix X is * premultiplied by diag(C) (if TRANS = 'N') or diag(R) (if * TRANS = 'T' or 'C') so that it solves the original system * before equilibration. * * Arguments * ========= * * FACT (global input) CHARACTER * Specifies whether or not the factored form of the matrix * A(IA:IA+N-1,JA:JA+N-1) is supplied on entry, and if not, * whether the matrix A(IA:IA+N-1,JA:JA+N-1) should be * equilibrated before it is factored. * = 'F': On entry, AF(IAF:IAF+N-1,JAF:JAF+N-1) and IPIV con- * tain the factored form of A(IA:IA+N-1,JA:JA+N-1). * If EQUED is not 'N', the matrix * A(IA:IA+N-1,JA:JA+N-1) has been equilibrated with * scaling factors given by R and C. * A(IA:IA+N-1,JA:JA+N-1), AF(IAF:IAF+N-1,JAF:JAF+N-1), * and IPIV are not modified. * = 'N': The matrix A(IA:IA+N-1,JA:JA+N-1) will be copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * = 'E': The matrix A(IA:IA+N-1,JA:JA+N-1) will be equili- * brated if necessary, then copied to * AF(IAF:IAF+N-1,JAF:JAF+N-1) and factored. * * TRANS (global input) CHARACTER * Specifies the form of the system of equations: * = 'N': A(IA:IA+N-1,JA:JA+N-1) * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (No transpose) * = 'T': A(IA:IA+N-1,JA:JA+N-1)**T * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Transpose) * = 'C': A(IA:IA+N-1,JA:JA+N-1)**H * X(IX:IX+N-1,JX:JX+NRHS-1) * = B(IB:IB+N-1,JB:JB+NRHS-1) (Conjugate transpose) * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(IA:IA+N-1,JA:JA+N-1). * N >= 0. * * NRHS (global input) INTEGER * The number of right-hand sides, i.e., the number of columns * of the distributed submatrices B(IB:IB+N-1,JB:JB+NRHS-1) and * X(IX:IX+N-1,JX:JX+NRHS-1). NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * the local memory to an array of local dimension * (LLD_A,LOCc(JA+N-1)). On entry, the N-by-N matrix * A(IA:IA+N-1,JA:JA+N-1). If FACT = 'F' and EQUED is not 'N', * then A(IA:IA+N-1,JA:JA+N-1) must have been equilibrated by * the scaling factors in R and/or C. A(IA:IA+N-1,JA:JA+N-1) is * not modified if FACT = 'F' or 'N', or if FACT = 'E' and * EQUED = 'N' on exit. * * On exit, if EQUED .ne. 'N', A(IA:IA+N-1,JA:JA+N-1) is scaled * as follows: * EQUED = 'R': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * EQUED = 'C': A(IA:IA+N-1,JA:JA+N-1) := * A(IA:IA+N-1,JA:JA+N-1) * diag(C) * EQUED = 'B': A(IA:IA+N-1,JA:JA+N-1) := * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * AF (local input or local output) COMPLEX*16 pointer * into the local memory to an array of local dimension * (LLD_AF,LOCc(JA+N-1)). If FACT = 'F', then * AF(IAF:IAF+N-1,JAF:JAF+N-1) is an input argument and on * entry contains the factors L and U from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by PZGETRF. * If EQUED .ne. 'N', then AF is the factored form of the * equilibrated matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'N', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original * matrix A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then AF(IAF:IAF+N-1,JAF:JAF+N-1) is an output * argument and on exit returns the factors L and U from the * factorization A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equili- * brated matrix A(IA:IA+N-1,JA:JA+N-1) (see the description of * A(IA:IA+N-1,JA:JA+N-1) for the form of the equilibrated * matrix). * * IAF (global input) INTEGER * The row index in the global array AF indicating the first * row of sub( AF ). * * JAF (global input) INTEGER * The column index in the global array AF indicating the * first column of sub( AF ). * * DESCAF (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix AF. * * IPIV (local input or local output) INTEGER array, dimension * LOCr(M_A)+MB_A. If FACT = 'F', then IPIV is an input argu- * ment and on entry contains the pivot indices from the fac- * torization A(IA:IA+N-1,JA:JA+N-1) = P*L*U as computed by * PZGETRF; IPIV(i) -> The global row local row i was * swapped with. This array must be aligned with * A( IA:IA+N-1, * ). * * If FACT = 'N', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the original matrix * A(IA:IA+N-1,JA:JA+N-1). * * If FACT = 'E', then IPIV is an output argument and on exit * contains the pivot indices from the factorization * A(IA:IA+N-1,JA:JA+N-1) = P*L*U of the equilibrated matrix * A(IA:IA+N-1,JA:JA+N-1). * * EQUED (global input or global output) CHARACTER * Specifies the form of equilibration that was done. * = 'N': No equilibration (always true if FACT = 'N'). * = 'R': Row equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) has * been premultiplied by diag(R). * = 'C': Column equilibration, i.e., A(IA:IA+N-1,JA:JA+N-1) * has been postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., * A(IA:IA+N-1,JA:JA+N-1) has been replaced by * diag(R) * A(IA:IA+N-1,JA:JA+N-1) * diag(C). * EQUED is an input variable if FACT = 'F'; otherwise, it is an * output variable. * * R (local input or local output) DOUBLE PRECISION array, * dimension LOCr(M_A). * The row scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'R' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the left by diag(R); if EQUED='N' or 'C', R is not acces- * sed. R is an input variable if FACT = 'F'; otherwise, R is * an output variable. * If FACT = 'F' and EQUED = 'R' or 'B', each element of R must * be positive. * R is replicated in every process column, and is aligned * with the distributed matrix A. * * C (local input or local output) DOUBLE PRECISION array, * dimension LOCc(N_A). * The column scale factors for A(IA:IA+N-1,JA:JA+N-1). * If EQUED = 'C' or 'B', A(IA:IA+N-1,JA:JA+N-1) is multiplied * on the right by diag(C); if EQUED = 'N' or 'R', C is not * accessed. C is an input variable if FACT = 'F'; otherwise, * C is an output variable. If FACT = 'F' and EQUED = 'C' or * 'B', each element of C must be positive. * C is replicated in every process row, and is aligned with * the distributed matrix A. * * B (local input/local output) COMPLEX*16 pointer * into the local memory to an array of local dimension * (LLD_B,LOCc(JB+NRHS-1) ). On entry, the N-by-NRHS right-hand * side matrix B(IB:IB+N-1,JB:JB+NRHS-1). On exit, if * EQUED = 'N', B(IB:IB+N-1,JB:JB+NRHS-1) is not modified; if * TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by * diag(R)*B(IB:IB+N-1,JB:JB+NRHS-1); if TRANS = 'T' or 'C' * and EQUED = 'C' or 'B', B(IB:IB+N-1,JB:JB+NRHS-1) is over- * written by diag(C)*B(IB:IB+N-1,JB:JB+NRHS-1). * * IB (global input) INTEGER * The row index in the global array B indicating the first * row of sub( B ). * * JB (global input) INTEGER * The column index in the global array B indicating the * first column of sub( B ). * * DESCB (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix B. * * X (local input/local output) COMPLEX*16 pointer * into the local memory to an array of local dimension * (LLD_X, LOCc(JX+NRHS-1)). If INFO = 0, the N-by-NRHS * solution matrix X(IX:IX+N-1,JX:JX+NRHS-1) to the original * system of equations. Note that A(IA:IA+N-1,JA:JA+N-1) and * B(IB:IB+N-1,JB:JB+NRHS-1) are modified on exit if * EQUED .ne. 'N', and the solution to the equilibrated system * is inv(diag(C))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'N' * and EQUED = 'C' or 'B', or * inv(diag(R))*X(IX:IX+N-1,JX:JX+NRHS-1) if TRANS = 'T' or 'C' * and EQUED = 'R' or 'B'. * * IX (global input) INTEGER * The row index in the global array X indicating the first * row of sub( X ). * * JX (global input) INTEGER * The column index in the global array X indicating the * first column of sub( X ). * * DESCX (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix X. * * RCOND (global output) DOUBLE PRECISION * The estimate of the reciprocal condition number of the matrix * A(IA:IA+N-1,JA:JA+N-1) after equilibration (if done). If * RCOND is less than the machine precision (in particular, if * RCOND = 0), the matrix is singular to working precision. * This condition is indicated by a return code of INFO > 0. * * FERR (local output) DOUBLE PRECISION array, dimension LOCc(N_B) * The estimated forward error bounds for each solution vector * X(j) (the j-th column of the solution matrix * X(IX:IX+N-1,JX:JX+NRHS-1). If XTRUE is the true solution, * FERR(j) bounds the magnitude of the largest entry in * (X(j) - XTRUE) divided by the magnitude of the largest entry * in X(j). The estimate is as reliable as the estimate for * RCOND, and is almost always a slight overestimate of the * true error. FERR is replicated in every process row, and is * aligned with the matrices B and X. * * BERR (local output) DOUBLE PRECISION array, dimension LOCc(N_B). * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any entry of A(IA:IA+N-1,JA:JA+N-1) or * B(IB:IB+N-1,JB:JB+NRHS-1) that makes X(j) an exact solution). * BERR is replicated in every process row, and is aligned * with the matrices B and X. * * WORK (local workspace/local output) COMPLEX*16 array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK = MAX( PZGECON( LWORK ), PZGERFS( LWORK ) ) * + LOCr( N_A ). * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * RWORK (local workspace/local output) DOUBLE PRECISION array, * dimension (LRWORK) * On exit, RWORK(1) returns the minimal and optimal LRWORK. * * LRWORK (local or global input) INTEGER * The dimension of the array RWORK. * LRWORK is local input and must be at least * LRWORK = 2*LOCc(N_A). * * If LRWORK = -1, then LRWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (global output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, and i is * <= N: U(IA+I-1,IA+I-1) is exactly zero. The * factorization has been completed, but the * factor U is exactly singular, so the solution * and error bounds could not be computed. * = N+1: RCOND is less than machine precision. The * factorization has been completed, but the * matrix is singular to working precision, and * the solution and error bounds have not been * computed. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. * .. Local Scalars .. LOGICAL COLEQU, EQUIL, LQUERY, NOFACT, NOTRAN, ROWEQU CHARACTER NORM INTEGER CONWRK, I, IACOL, IAROW, IAFROW, IBROW, IBCOL, $ ICOFFA, ICOFFB, ICOFFX, ICTXT, IDUMM, $ IIA, IIB, IIX, $ INFEQU, IROFFA, IROFFAF, IROFFB, $ IROFFX, IXCOL, IXROW, J, JJA, JJB, JJX, $ LCM, LCMQ, $ LRWMIN, LWMIN, MYCOL, MYROW, NP, NPCOL, NPROW, $ NQ, NQB, NRHSQ, RFSWRK DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, $ ROWCND, SMLNUM * .. * .. Local Arrays .. INTEGER CDESC( DLEN_ ), IDUM1( 5 ), IDUM2( 5 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CHK1MAT, DESCSET, PCHK2MAT, $ DGEBR2D, DGEBS2D, DGAMN2D, $ DGAMX2D, INFOG2L, PDCOPY, PXERBLA, $ PZGECON, PZGEEQU, PZGERFS, $ PZGETRF, PZGETRS, PZLACPY, PZLAQGE * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, ILCM, INDXG2P, NUMROC DOUBLE PRECISION PDLAMCH, PZLANGE EXTERNAL ICEIL, ILCM, INDXG2P, LSAME, NUMROC, PZLANGE, $ PDLAMCH * .. * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(800+CTXT_) ELSE CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 8, INFO ) IF( LSAME( FACT, 'F' ) ) $ CALL CHK1MAT( N, 3, N, 3, IAF, JAF, DESCAF, 12, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IB, JB, DESCB, 20, INFO ) CALL CHK1MAT( N, 3, NRHS, 4, IX, JX, DESCX, 24, INFO ) NOFACT = LSAME( FACT, 'N' ) EQUIL = LSAME( FACT, 'E' ) NOTRAN = LSAME( TRANS, 'N' ) IF( NOFACT .OR. EQUIL ) THEN EQUED = 'N' ROWEQU = .FALSE. COLEQU = .FALSE. ELSE ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) SMLNUM = PDLAMCH( ICTXT, 'Safe minimum' ) BIGNUM = ONE / SMLNUM END IF IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IAFROW = INDXG2P( IAF, DESCAF( MB_ ), MYROW, $ DESCAF( RSRC_ ), NPROW ) IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), $ NPROW ) IXROW = INDXG2P( IX, DESCX( MB_ ), MYROW, DESCX( RSRC_ ), $ NPROW ) IROFFA = MOD( IA-1, DESCA( MB_ ) ) IROFFAF = MOD( IAF-1, DESCAF( MB_ ) ) ICOFFA = MOD( JA-1, DESCA( NB_ ) ) IROFFB = MOD( IB-1, DESCB( MB_ ) ) ICOFFB = MOD( JB-1, DESCB( NB_ ) ) IROFFX = MOD( IX-1, DESCX( MB_ ) ) ICOFFX = MOD( JX-1, DESCX( NB_ ) ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IIA, JJA, IAROW, IACOL ) NP = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, $ NPROW ) IF( MYROW.EQ.IAROW ) $ NP = NP-IROFFA NQ = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, $ NPCOL ) IF( MYCOL.EQ.IACOL ) $ NQ = NQ-ICOFFA NQB = ICEIL( N+IROFFA, DESCA( NB_ )*NPCOL ) LCM = ILCM( NPROW, NPCOL ) LCMQ = LCM / NPCOL CONWRK = 2*NP + 2*NQ + MAX( 2, MAX( DESCA( NB_ )* $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ), NQ + $ DESCA( NB_ )* $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) ) ) RFSWRK = 3*NP IF( LSAME( TRANS, 'N' ) ) THEN RFSWRK = RFSWRK + NP + NQ + $ ICEIL( NQB, LCMQ )*DESCA( NB_ ) ELSE IF( LSAME( TRANS, 'T' ).OR.LSAME( TRANS, 'C' ) ) THEN RFSWRK = RFSWRK + NP + NQ END IF LWMIN = MAX( CONWRK, RFSWRK ) LRWMIN = MAX( 2*NQ, NP ) RWORK( 1 ) = DBLE( LRWMIN ) IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. $ .NOT.LSAME( FACT, 'F' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. $ .NOT. LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( IROFFA.NE.0 ) THEN INFO = -6 ELSE IF( ICOFFA.NE.0 .OR. IROFFA.NE.ICOFFA ) THEN INFO = -7 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(800+NB_) ELSE IF( IAFROW.NE.IAROW ) THEN INFO = -10 ELSE IF( IROFFAF.NE.0 ) THEN INFO = -10 ELSE IF( ICTXT.NE.DESCAF( CTXT_ ) ) THEN INFO = -(1200+CTXT_) ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN INFO = -13 ELSE IF( ROWEQU ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 10 J = IIA, IIA + NP - 1 RCMIN = MIN( RCMIN, R( J ) ) RCMAX = MAX( RCMAX, R( J ) ) 10 CONTINUE CALL DGAMN2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Columnwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -14 ELSE IF( N.GT.0 ) THEN ROWCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE ROWCND = ONE END IF END IF IF( COLEQU .AND. INFO.EQ.0 ) THEN RCMIN = BIGNUM RCMAX = ZERO DO 20 J = JJA, JJA+NQ-1 RCMIN = MIN( RCMIN, C( J ) ) RCMAX = MAX( RCMAX, C( J ) ) 20 CONTINUE CALL DGAMN2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMIN, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) CALL DGAMX2D( ICTXT, 'Rowwise', ' ', 1, 1, RCMAX, $ 1, IDUMM, IDUMM, -1, -1, MYCOL ) IF( RCMIN.LE.ZERO ) THEN INFO = -15 ELSE IF( N.GT.0 ) THEN COLCND = MAX( RCMIN, SMLNUM ) / $ MIN( RCMAX, BIGNUM ) ELSE COLCND = ONE END IF END IF END IF END IF * WORK( 1 ) = DBLE( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) IF( INFO.EQ.0 ) THEN IF( IBROW.NE.IAROW ) THEN INFO = -18 ELSE IF( IXROW.NE.IBROW ) THEN INFO = -22 ELSE IF( DESCB( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2000+NB_) ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN INFO = -(2000+CTXT_) ELSE IF( DESCX( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -(2400+NB_) ELSE IF( ICTXT.NE.DESCX( CTXT_ ) ) THEN INFO = -(2400+CTXT_) ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -29 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN INFO = -31 END IF IDUM1( 1 ) = ICHAR( FACT ) IDUM2( 1 ) = 1 IDUM1( 2 ) = ICHAR( TRANS ) IDUM2( 2 ) = 2 IF( LSAME( FACT, 'F' ) ) THEN IDUM1( 3 ) = ICHAR( EQUED ) IDUM2( 3 ) = 14 IF( LWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 29 IF( LRWORK.EQ.-1 ) THEN IDUM1( 5 ) = -1 ELSE IDUM1( 5 ) = 1 END IF IDUM2( 5 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 5, IDUM1, $ IDUM2, INFO ) ELSE IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 29 IF( LRWORK.EQ.-1 ) THEN IDUM1( 4 ) = -1 ELSE IDUM1( 4 ) = 1 END IF IDUM2( 4 ) = 31 CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 8, N, 3, $ NRHS, 4, IB, JB, DESCB, 20, 4, IDUM1, $ IDUM2, INFO ) END IF END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZGESVX', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * IF( EQUIL ) THEN * * Compute row and column scalings to equilibrate the matrix A. * CALL PZGEEQU( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFEQU ) IF( INFEQU.EQ.0 ) THEN * * Equilibrate the matrix. * CALL PZLAQGE( N, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) END IF END IF * * Scale the right-hand side. * CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, $ JJB, IBROW, IBCOL ) NP = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFB IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFB * IF( NOTRAN ) THEN IF( ROWEQU ) THEN DO 40 J = JJB, JJB+NRHSQ-1 DO 30 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = R( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 30 CONTINUE 40 CONTINUE END IF ELSE IF( COLEQU ) THEN * * Transpose the Column scale factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PDCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), RWORK, IB, JB, $ DESCB, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, RWORK( IIB ), $ DESCB( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, RWORK( IIB ), $ DESCB( LLD_ ), MYROW, IBCOL ) END IF DO 60 J = JJB, JJB+NRHSQ-1 DO 50 I = IIB, IIB+NP-1 B( I+( J-1 )*DESCB( LLD_ ) ) = RWORK( I )* $ B( I+( J-1 )*DESCB( LLD_ ) ) 50 CONTINUE 60 CONTINUE END IF * IF( NOFACT.OR.EQUIL ) THEN * * Compute the LU factorization of A. * CALL PZLACPY( 'Full', N, N, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF ) CALL PZGETRF( N, N, AF, IAF, JAF, DESCAF, IPIV, INFO ) * * Return if INFO is non-zero. * IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO RETURN END IF END IF * * Compute the norm of the matrix A. * IF( NOTRAN ) THEN NORM = '1' ELSE NORM = 'I' END IF ANORM = PZLANGE( NORM, N, N, A, IA, JA, DESCA, RWORK ) * * Compute the reciprocal of the condition number of A. * CALL PZGECON( NORM, N, AF, IAF, JAF, DESCAF, ANORM, RCOND, WORK, $ LWORK, RWORK, LRWORK, INFO ) * * Return if the matrix is singular to working precision. * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N RETURN END IF * * Compute the solution matrix X. * CALL PZLACPY( 'Full', N, NRHS, B, IB, JB, DESCB, X, IX, JX, $ DESCX ) CALL PZGETRS( TRANS, N, NRHS, AF, IAF, JAF, DESCAF, IPIV, X, IX, $ JX, DESCX, INFO ) * * Use iterative refinement to improve the computed solution and * compute error bounds and backward error estimates for it. * CALL PZGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO ) * * Transform the solution matrix X to a solution of the original * system. * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, $ JJX, IXROW, IXCOL ) NP = NUMROC( N+IROFFX, DESCX( MB_ ), MYROW, IXROW, NPROW ) NRHSQ = NUMROC( NRHS+ICOFFX, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYROW.EQ.IBROW ) $ NP = NP-IROFFX IF( MYCOL.EQ.IBCOL ) $ NRHSQ = NRHSQ-ICOFFX * IF( NOTRAN ) THEN IF( COLEQU ) THEN * * Transpose the column scaling factors * CALL DESCSET( CDESC, 1, N+ICOFFA, 1, DESCA( NB_ ), MYROW, $ IACOL, ICTXT, 1 ) CALL PDCOPY( N, C, 1, JA, CDESC, CDESC( LLD_ ), RWORK, IX, $ JX, DESCX, 1 ) IF( MYCOL.EQ.IBCOL ) THEN CALL DGEBS2D( ICTXT, 'Rowwise', ' ', NP, 1, $ RWORK( IIX ), DESCX( LLD_ ) ) ELSE CALL DGEBR2D( ICTXT, 'Rowwise', ' ', NP, 1, $ RWORK( IIX ), DESCX( LLD_ ), MYROW, $ IBCOL ) END IF * DO 80 J = JJX, JJX+NRHSQ-1 DO 70 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = RWORK( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 70 CONTINUE 80 CONTINUE DO 90 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / COLCND 90 CONTINUE END IF ELSE IF( ROWEQU ) THEN DO 110 J = JJX, JJX+NRHSQ-1 DO 100 I = IIX, IIX+NP-1 X( I+( J-1 )*DESCX( LLD_ ) ) = R( I )* $ X( I+( J-1 )*DESCX( LLD_ ) ) 100 CONTINUE 110 CONTINUE DO 120 J = JJX, JJX+NRHSQ-1 FERR( J ) = FERR( J ) / ROWCND 120 CONTINUE END IF * WORK( 1 ) = DBLE( LWMIN ) RWORK( 1 ) = DBLE( LRWMIN ) * RETURN * * End of PZGESVX * END scalapack-1.8.0/SRC/pssyev.f0000640000175000017500000005256210363532303015455 0ustar muammarmuammar SUBROUTINE PSSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, $ Z, IZ, JZ, DESCZ, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LWORK, N * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCZ( * ) REAL A( * ), W( * ), WORK( * ), Z( * ) * .. * * Purpose * ======= * * PSSYEV computes all eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A by calling the recommended sequence * of ScaLAPACK routines. * * In its present form, PSSYEV assumes a homogeneous system and makes * no checks for consistency of the eigenvalues or eigenvectors across * the different processes. Because of this, it is possible that a * heterogeneous system may return incorrect results without any error * messages. * * Notes * ===== * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESCA: * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_) The descriptor type. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the distributed * matrix A. * N_A (global) DESCA( N_ ) The number of columns in the distri- * buted matrix A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of A. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of A. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the matrix A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of A is distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array storing the local blocks of the * distributed matrix A. * LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * * Arguments * ========= * * NP = the number of rows local to a given process. * NQ = the number of columns local to a given process. * * JOBZ (global input) CHARACTER*1 * Specifies whether or not to compute the eigenvectors: * = 'N': Compute eigenvalues only. * = 'V': Compute eigenvalues and eigenvectors. * * UPLO (global input) CHARACTER*1 * Specifies whether the upper or lower triangular part of the * symmetric matrix A is stored: * = 'U': Upper triangular * = 'L': Lower triangular * * N (global input) INTEGER * The number of rows and columns of the matrix A. N >= 0. * * A (local input/workspace) block cyclic DOUBLE PRECISION array, * global dimension (N, N), local dimension ( LLD_A, * LOCc(JA+N-1) ) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of * the symmetric matrix. If UPLO = 'L', only the lower * triangular part of A is used to define the elements of the * symmetric matrix. * * On exit, the lower triangle (if UPLO='L') or the upper * triangle (if UPLO='U') of A, including the diagonal, is * destroyed. * * IA (global input) INTEGER * A's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JA (global input) INTEGER * A's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * If DESCA( CTXT_ ) is incorrect, PSSYEV cannot guarantee * correct error reporting. * * W (global output) REAL array, dimension (N) * On normal exit, the first M entries contain the selected * eigenvalues in ascending order. * * Z (local output) REAL array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. * If JOBZ = 'N', then Z is not referenced. * * IZ (global input) INTEGER * Z's global row index, which points to the beginning of the * submatrix which is to be operated on. * * JZ (global input) INTEGER * Z's global column index, which points to the beginning of * the submatrix which is to be operated on. * * DESCZ (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix Z. * DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) * * WORK (local workspace/output) REAL array, * dimension (LWORK) * Version 1.0: on output, WORK(1) returns the workspace * needed to guarantee completion. * If the input parameters are incorrect, WORK(1) may also be * incorrect. * * If JOBZ='N' WORK(1) = minimal=optimal amount of workspace * If JOBZ='V' WORK(1) = minimal workspace required to * generate all the eigenvectors. * * * LWORK (local input) INTEGER * See below for definitions of variables used to define LWORK. * If no eigenvectors are requested (JOBZ = 'N') then * LWORK >= 5*N + SIZESYTRD + 1 * where * SIZESYTRD = The workspace requirement for PSSYTRD * and is MAX( NB * ( NP +1 ), 3 * NB ) * If eigenvectors are requested (JOBZ = 'V' ) then * the amount of workspace required to guarantee that all * eigenvectors are computed is: * * QRMEM = 2*N-2 * LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 * * Variable definitions: * NB = DESCA( MB_ ) = DESCA( NB_ ) = * DESCZ( MB_ ) = DESCZ( NB_ ) * NN = MAX( N, NB, 2 ) * DESCA( RSRC_ ) = DESCA( RSRC_ ) = DESCZ( RSRC_ ) = * DESCZ( CSRC_ ) = 0 * NP = NUMROC( NN, NB, 0, 0, NPROW ) * NQ = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) * NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) * LDC = MAX( 1, NRC ) * SIZEMQRLEFT = The workspace requirement for PSORMTR * when it's SIDE argument is 'L'. * * With MYPROWC defined when a new context is created as: * CALL BLACS_GET( DESCA( CTXT_ ), 0, CONTEXTC ) * CALL BLACS_GRIDINIT( CONTEXTC, 'R', NPROCS, 1 ) * CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, * MYPCOLC ) * * If LWORK = -1, the LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * size for the WORK array. The required workspace is returned * as the first element of WORK and no error message is issued * by PXERBLA. * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * > 0: If INFO = 1 through N, the i(th) eigenvalue did not * converge in SSTEQR2 after a total of 30*N iterations. * If INFO = N+1, then PSSYEV has detected heterogeneity * by finding that eigenvalues were not identical across * the process grid. In this case, the accuracy of * the results from PSSYEV cannot be guaranteed. * * Alignment requirements * ====================== * * The distributed submatrices A(IA:*, JA:*) and Z(IZ:IZ+M-1,JZ:JZ+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND. * IAROW.EQ.IZROW ) * where * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ). * * ===================================================================== * * Version 1.4 limitations: * DESCA(MB_) = DESCA(NB_) * DESCA(M_) = DESCZ(M_) * DESCA(N_) = DESCZ(N_) * DESCA(MB_) = DESCZ(MB_) * DESCA(NB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_, $ MB_, NB_, RSRC_, CSRC_, LLD_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) REAL FIVE, ONE, TEN, ZERO PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, $ TEN = 10.0E+0, FIVE = 5.0E+0 ) INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ, ITHVAL PARAMETER ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4, $ IERREBZ = 8, ITHVAL = 10 ) * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, $ MYCOL, MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, $ SIZESYTRD REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. INTEGER DESCQR( 9 ), IDUM1( 3 ), IDUM2( 3 ) * .. * .. External Functions .. LOGICAL LSAME INTEGER INDXG2P, NUMROC, SL_GRIDRESHAPE REAL PSLAMCH, PSLANSY EXTERNAL LSAME, NUMROC, PSLAMCH, PSLANSY, $ SL_GRIDRESHAPE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, CHK1MAT, $ DESCINIT, PCHK1MAT, PCHK2MAT, PSELGET, $ PSGEMR2D, PSLASCL, PSLASET, PSORMTR, PSSYTRD, $ PXERBLA, SCOPY, SSCAL, SSTEQR2 * .. * .. Intrinsic Functions .. INTRINSIC ABS, ICHAR, INT, MAX, MIN, MOD, REAL, SQRT * .. * .. Executable Statements .. * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* $ RSRC_.LT.0 )RETURN * * Quick return * IF( N.EQ.0 ) RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN INFO = -( 700+CTXT_ ) ELSE IF( WANTZ ) THEN IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) END IF END IF IF( INFO .EQ. 0 ) THEN CALL CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) IF( WANTZ ) $ CALL CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) * IF( INFO.EQ.0 ) THEN * * Get machine constants. * SAFMIN = PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' ) SMLNUM = SAFMIN / EPS BIGNUM = ONE / SMLNUM RMIN = SQRT( SMLNUM ) RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) * NPROCS = NPROW*NPCOL NB_A = DESCA( NB_ ) MB_A = DESCA( MB_ ) NB = NB_A LOWER = LSAME( UPLO, 'L' ) * RSRC_A = DESCA( RSRC_ ) CSRC_A = DESCA( CSRC_ ) IROFFA = MOD( IA-1, MB_A ) ICOFFA = MOD( JA-1, NB_A ) IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) IACOL = INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) NP = NUMROC( N+IROFFA, NB, MYROW, IAROW, NPROW ) NQ = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ) IF( WANTZ ) THEN NB_Z = DESCZ( NB_ ) MB_Z = DESCZ( MB_ ) RSRC_Z = DESCZ( RSRC_ ) IROFFZ = MOD( IZ-1, MB_A ) IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) SIZEMQRLEFT = MAX( ( NB_A*( NB_A-1 ) ) / 2, ( NP+NQ )* $ NB_A ) + NB_A*NB_A ELSE SIZEMQRLEFT = 0 END IF SIZESYTRD = MAX( NB * ( NP +1 ), 3 * NB ) * * Initialize the context of the single column distributed * matrix required by SSTEQR2. This specific distribution * allows each process to do 1/pth of the work updating matrix * Q during SSTEQR2 and achieve some parallelization to an * otherwise serial subroutine. * LDC = 0 IF( WANTZ ) THEN CONTEXTC = SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, $ NPROCS, 1 ) CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, $ MYPCOLC ) NRC = NUMROC( N, NB_A, MYPROWC, 0, NPROCS) LDC = MAX( 1, NRC ) CALL DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, $ LDC, INFO ) END IF * * Set up pointers into the WORK array * INDTAU = 1 INDE = INDTAU + N INDD = INDE + N INDD2 = INDD + N INDE2 = INDD2 + N INDWORK = INDE2 + N INDWORK2 = INDWORK + N*LDC LLWORK = LWORK - INDWORK + 1 * * Compute the total amount of space needed * QRMEM = 2*N-2 IF( WANTZ ) THEN LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 ELSE LWMIN = 5*N + SIZESYTRD + 1 END IF * END IF IF( INFO.EQ.0 ) THEN IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN INFO = -2 ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN INFO = -14 ELSE IF( IROFFA.NE.0 ) THEN INFO = -5 ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN INFO = -( 700+NB_ ) END IF IF( WANTZ ) THEN IF( IROFFA.NE.IROFFZ ) THEN INFO = -10 ELSE IF( IAROW.NE.IZROW ) THEN INFO = -10 ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN INFO = -( 1200+M_ ) ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN INFO = -( 1200+N_ ) ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN INFO = -( 1200+MB_ ) ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN INFO = -( 1200+NB_ ) ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN INFO = -( 1200+RSRC_ ) ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN INFO = -( 1200+CTXT_ ) ENDIF END IF END IF IF( WANTZ ) THEN IDUM1( 1 ) = ICHAR( 'V' ) ELSE IDUM1( 1 ) = ICHAR( 'N' ) END IF IDUM2( 1 ) = 1 IF( LOWER ) THEN IDUM1( 2 ) = ICHAR( 'L' ) ELSE IDUM1( 2 ) = ICHAR( 'U' ) END IF IDUM2( 2 ) = 2 IF( LWORK.EQ.-1 ) THEN IDUM1( 3 ) = -1 ELSE IDUM1( 3 ) = 1 END IF IDUM2( 3 ) = 3 IF( LSAME( JOBZ, 'V' ) ) THEN CALL PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, $ IZ, JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) ELSE CALL PCHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, 3, IDUM1, $ IDUM2, INFO ) END IF * * Write the required workspace for lwork queries. * WORK( 1 ) = REAL( LWMIN ) END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PSSYEV', -INFO ) IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN ELSE IF( LWORK .EQ. -1 ) THEN IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) RETURN END IF * * Scale matrix to allowable range, if necessary. * ISCALE = 0 * ANRM = PSLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) * IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN ISCALE = 1 SIGMA = RMIN / ANRM ELSE IF( ANRM.GT.RMAX ) THEN ISCALE = 1 SIGMA = RMAX / ANRM END IF * IF( ISCALE.EQ.1 ) THEN CALL PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO ) END IF * * Reduce symmetric matrix to tridiagonal form. * CALL PSSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), $ WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), $ LLWORK, IINFO ) * * Copy the values of D, E to all processes. * DO 10 I=1,N CALL PSELGET( 'A', ' ', WORK(INDD2+I-1), A, $ I+IA-1, I+JA-1, DESCA ) 10 CONTINUE IF( LSAME( UPLO, 'U') ) THEN DO 20 I=1,N-1 CALL PSELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA-1, I+JA, DESCA ) 20 CONTINUE ELSE DO 30 I=1,N-1 CALL PSELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA, I+JA-1, DESCA ) 30 CONTINUE ENDIF * IF( WANTZ ) THEN * CALL PSLASET( 'Full', N, N, ZERO, ONE, WORK( INDWORK ), 1, 1, $ DESCQR ) * * SSTEQR2 is a modified version of LAPACK's SSTEQR. The * modifications allow each process to perform partial updates * to matrix Q. * CALL SSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), $ INFO ) * CALL PSGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, $ DESCZ, CONTEXTC ) * CALL PSORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, $ WORK( INDTAU ), Z, IZ, JZ, DESCZ, $ WORK( INDWORK ), LLWORK, IINFO ) * ELSE * CALL SSTEQR2( 'N', N, WORK( INDD2 ), WORK( INDE2 ), $ WORK( INDWORK ), 1, 1, WORK( INDWORK2 ), $ INFO ) ENDIF * * Copy eigenvalues from workspace to output array * CALL SCOPY( N, WORK( INDD2 ), 1, W, 1 ) * * If matrix was scaled, then rescale eigenvalues appropriately. * IF( ISCALE .EQ. 1 ) THEN CALL SSCAL( N, ONE / SIGMA, W, 1 ) END IF * * Free up resources * IF( WANTZ ) THEN CALL BLACS_GRIDEXIT( CONTEXTC ) END IF * * Compare every ith eigenvalue, or all if there are only a few, * across the process grid to check for heterogeneity. * IF( N.LE.ITHVAL ) THEN J = N K = 1 ELSE J = N/ITHVAL K = ITHVAL END IF * DO 40 I = 1, J WORK( I+INDTAU ) = W( (I-1)*K+1 ) WORK( I+INDE ) = W( (I-1)*K+1 ) 40 CONTINUE * CALL SGAMN2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDTAU ), $ J, 1, 1, -1, -1, 0 ) CALL SGAMX2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDE ), $ J, 1, 1, -1, -1, 0 ) * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ) $ .NE. ZERO ) )THEN INFO = N+1 END IF 50 CONTINUE * RETURN * * End of PSSYEV * END scalapack-1.8.0/SRC/pclarfc.f0000640000175000017500000007005010363532303015526 0ustar muammarmuammar SUBROUTINE PCLARFC( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARFC applies a complex elementary reflector Q**H to a * complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), * from either the left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also have the first row of sub( C ). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also have the first column of sub( C ) and * MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the * last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q**H * sub( C ), * = 'R': form sub( C ) * Q**H. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * V (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q**H * sub( C ) if SIDE = 'L', or * sub( C ) * Q**H if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC, $ IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC, $ LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW, $ NQ, RDEST COMPLEX TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CCOPY, CGEBR2D, CGEBS2D, $ CGEMV, CGERC, CGERV2D, CGESD2D, $ CGSUM2D, CLASET, INFOG2L, PB_TOPGET, $ PBCTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) JJC = MIN( JJC, NCC ) JJV = MIN( JJV, NCV ) IOFFC = IIC+(JJC-1)*LDC IOFFV = IIV+(JJV-1)*LDV * IROFF = MOD( IC-1, DESCC( MB_ ) ) ICOFF = MOD( JC-1, DESCC( NB_ ) ) MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYROW.EQ.ICROW ) $ MP = MP - IROFF IF( MYCOL.EQ.ICCOL ) $ NQ = NQ - ICOFF * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) ) * IF( LSAME( SIDE, 'L' ) ) THEN * IF( CRBLCK ) THEN RDEST = ICROW ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V * IPW = MP+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL ) THEN * TAULOC = CONJG( TAU( JJV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, V( IOFFV ), 1, $ ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK, MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK, MAX( 1, NQ ), RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL CCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL ) * ELSE IF( MYCOL.EQ.ICCOL ) THEN * IPW = MP+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = CONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, $ ONE, C( IOFFC ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V * IPW = MP+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), 1, $ C( IOFFC ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MP+1 CALL CCOPY( MP, V( IOFFV ), 1, WORK, 1 ) WORK(IPW) = TAU( JJV ) CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * IPW = MP+1 CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = CONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MP.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQ, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQ ) ) END IF CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1, $ WORK( IPW ), MAX( 1, NQ ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ), 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW ) THEN * TAULOC = CONJG( TAU( IIV ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, V( IOFFV ), LDV, $ ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK, MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK, MAX( 1, MP ), RDEST, ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW * IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL CCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW ) THEN * IPW = NQ+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = CONJG( WORK( IPW ) ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V * IPW = NQ+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQ+1 CALL CCOPY( NQ, V( IOFFV ), LDV, WORK, 1 ) WORK(IPW) = TAU( IIV ) CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = CONJG( TAU( IIV ) ) * ELSE * IPW = NQ+1 CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = CONJG( WORK( IPW ) ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No Transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V * IPW = NQ+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = CONJG( TAU( JJV ) ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) TAULOC = CONJG( TAULOC ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQ.GT.0 ) THEN CALL CGEMV( 'No transpose', MP, NQ, ONE, $ C( IOFFC ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ), $ MAX( 1, MP ) ) END IF CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1, $ WORK( IPW ), MAX( 1, MP ), RDEST, $ ICCOL ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1, $ C( IOFFC ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PCLARFC * END scalapack-1.8.0/SRC/pzdbtrs.f0000640000175000017500000006461710363532303015620 0ustar muammarmuammar SUBROUTINE PZDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * August 7, 2001 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ) * .. * * * Purpose * ======= * * PZDBTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PZDBTRF. * A(1:N, JA:JA+N-1) is an N-by-N complex * banded diagonally dominant-like distributed * matrix with bandwidth BWL, BWU. * * Routine PZDBTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) ); * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * BWL (global input) INTEGER * Number of subdiagonals. 0 <= BWL <= N-1 * * BWU (global input) INTEGER * Number of superdiagonals. 0 <= BWU <= N-1 * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * A (local input/local output) COMPLEX*16 pointer into * local memory to an array with first dimension * LLD_A >=(bwl+bwu+1) (stored in DESCA). * On entry, this array contains the local pieces of the * N-by-N unsymmetric banded distributed Cholesky factor L or * L^T A(1:N, JA:JA+N-1). * This local portion is stored in the packed banded format * used in LAPACK. Please see the Notes below and the * ScaLAPACK manual for more detail on the format of * distributed matrices. * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9 . * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) COMPLEX*16 pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) COMPLEX*16 array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PZDBTRF and this is stored in AF. If a linear system * is to be solved using PZDBTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * COMPLEX*16 temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * (max(bwl,bwu)*NRHS) * * INFO (global output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2*MAX(BWL,BWU) * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the banded matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small (max(bwl,bwu)* (P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: banded codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK NO OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * LLD_A (local) DESCA( 6 ) The leading dimension of the local array * storing the local blocks of the distri- * buted array A. Minimum value of LLD_A * depends on TYPE_A. * TYPE_A = 501: LLD_A >= * size of undistributed dimension, 1. * TYPE_A = 502: LLD_A >=NB_A, 1. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * This version released: August, 2001. * * ===================================================================== * * .. * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0 ) PARAMETER ( ZERO = 0.0D+0 ) COMPLEX*16 CONE, CZERO PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ NB, NP, NPCOL, NPROW, NP_SAVE, PART_OFFSET, $ RETURN_CODE, STORE_M_B, STORE_N_A, $ WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 17, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DESC_CONVERT, GLOBCHK, PXERBLA, $ PZDBTRSV, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 8*100 + 2 ) ENDIF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE .NE. 0) THEN INFO = -( 11*100 + 2 ) ENDIF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN INFO = -( 11*100 + 2 ) ENDIF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN INFO = -( 11*100 + 4 ) ENDIF * * Source processor must be the same * IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN INFO = -( 11*100 + 5 ) ENDIF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW * NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF ( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'C' ) ELSE INFO = -1 END IF * IF( LWORK .LT. -1) THEN INFO = -15 ELSE IF ( LWORK .EQ. -1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 ENDIF * IF( N .LT. 0 ) THEN INFO = -2 ENDIF * IF( N+JA-1 .GT. STORE_N_A ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF(( BWL .GT. N-1 ) .OR. $ ( BWL .LT. 0 ) ) THEN INFO = -3 ENDIF * IF(( BWU .GT. N-1 ) .OR. $ ( BWU .LT. 0 ) ) THEN INFO = -4 ENDIF * IF( LLDA .LT. (BWL+BWU+1) ) THEN INFO = -( 8*100 + 6 ) ENDIF * IF( NB .LE. 0 ) THEN INFO = -( 8*100 + 4 ) ENDIF * IF( N+IB-1 .GT. STORE_M_B ) THEN INFO = -( 11*100 + 3 ) ENDIF * IF( LLDB .LT. NB ) THEN INFO = -( 11*100 + 6 ) ENDIF * IF( NRHS .LT. 0 ) THEN INFO = -5 ENDIF * * Current alignment restriction * IF( JA .NE. IB) THEN INFO = -7 ENDIF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW .NE. 1 ) THEN INFO = -( 8*100+2 ) ENDIF * IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, $ 'PZDBTRS, D&C alg.: only 1 block per proc', $ -INFO ) RETURN ENDIF * IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*MAX(BWL,BWU) )) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, $ 'PZDBTRS, D&C alg.: NB too small', $ -INFO ) RETURN ENDIF * * WORK_SIZE_MIN = $ (MAX(BWL,BWU)*NRHS) * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK .LT. WORK_SIZE_MIN ) THEN IF( LWORK .NE. -1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, $ 'PZDBTRS: worksize error', $ -INFO ) ENDIF RETURN ENDIF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 17, 1 ) = DESCB(5) PARAM_CHECK( 16, 1 ) = DESCB(4) PARAM_CHECK( 15, 1 ) = DESCB(3) PARAM_CHECK( 14, 1 ) = DESCB(2) PARAM_CHECK( 13, 1 ) = DESCB(1) PARAM_CHECK( 12, 1 ) = IB PARAM_CHECK( 11, 1 ) = DESCA(5) PARAM_CHECK( 10, 1 ) = DESCA(4) PARAM_CHECK( 9, 1 ) = DESCA(3) PARAM_CHECK( 8, 1 ) = DESCA(1) PARAM_CHECK( 7, 1 ) = JA PARAM_CHECK( 6, 1 ) = NRHS PARAM_CHECK( 5, 1 ) = BWU PARAM_CHECK( 4, 1 ) = BWL PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 17, 2 ) = 1105 PARAM_CHECK( 16, 2 ) = 1104 PARAM_CHECK( 15, 2 ) = 1103 PARAM_CHECK( 14, 2 ) = 1102 PARAM_CHECK( 13, 2 ) = 1101 PARAM_CHECK( 12, 2 ) = 10 PARAM_CHECK( 11, 2 ) = 805 PARAM_CHECK( 10, 2 ) = 804 PARAM_CHECK( 9, 2 ) = 803 PARAM_CHECK( 8, 2 ) = 801 PARAM_CHECK( 7, 2 ) = 7 PARAM_CHECK( 6, 2 ) = 5 PARAM_CHECK( 5, 2 ) = 4 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO * DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 17, PARAM_CHECK, 17, $ PARAM_CHECK( 1, 3 ), INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PZDBTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) ) * IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN PART_OFFSET = PART_OFFSET + NB ENDIF * IF ( MYCOL .LT. CSRC ) THEN PART_OFFSET = PART_OFFSET - NB ENDIF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 )/NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, $ FIRST_PROC, INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW .LT. 0 ) THEN GOTO 1234 ENDIF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PZDBTRSV( 'L', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PZDBTRSV( 'U', 'C', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF * * Call backsolve routine * IF( LSAME( TRANS, 'C' ) ) THEN * CALL PZDBTRSV( 'L', 'C', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ELSE * CALL PZDBTRSV( 'U', 'N', N, BWL, BWU, NRHS, A( PART_OFFSET+1 ), $ JA_NEW, DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, $ WORK, LWORK, INFO ) * ENDIF 1000 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) ENDIF * 1234 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PZDBTRS * END scalapack-1.8.0/SRC/pcgeqr2.f0000640000175000017500000002754610363532303015473 0ustar muammarmuammar SUBROUTINE PCGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCGEQR2 computes a QR factorization of a complex distributed M-by-N * matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = Q * R. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on, i.e. the number of rows * of the distributed submatrix sub( A ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on, i.e. the number of * columns of the distributed submatrix sub( A ). N >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). * On entry, the local pieces of the M-by-N distributed matrix * sub( A ) which is to be factored. On exit, the elements on * and above the diagonal of sub( A ) contain the min(M,N) by N * upper trapezoidal matrix R (R is upper triangular if M >= N); * the elements below the diagonal, with the array TAU, * represent the unitary matrix Q as a product of elementary * reflectors (see Further Details). * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local output) COMPLEX, array, dimension * LOCc(JA+MIN(M,N)-1). This array contains the scalar factors * TAU of the elementary reflectors. TAU is tied to the * distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= Mp0 + MAX( 1, Nq0 ), where * * IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * Mp0 = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ), * Nq0 = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ), * * and NUMROC, INDXG2P are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(m,n). * * Each H(i) has the form * * H(j) = I - tau * v * v' * * where tau is a complex scalar, and v is a complex vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in * A(ia+i:ia+m-1,ja+i-1), and tau in TAU(ja+i-1). * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER I, II, IACOL, IAROW, ICTXT, J, JJ, K, LWMIN, $ MP, MYCOL, MYROW, NPCOL, NPROW, NQ COMPLEX AJJ, ALPHA * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CGEBR2D, $ CGEBS2D, CHK1MAT, CLARFG, CSCAL, $ INFOG2L, PCELSET, PCLARFC, $ PCLARFG, PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2P, NUMROC EXTERNAL INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(600+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = MP + MAX( 1, NQ ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) $ INFO = -9 END IF END IF * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', 'I-ring' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', ' ' ) * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, $ JJ, IAROW, IACOL ) IF( MYROW.EQ.IAROW ) THEN NQ = NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) I = II+(JJ-1)*DESCA( LLD_ ) IF( MYCOL.EQ.IACOL ) THEN AJJ = A( I ) CALL CLARFG( 1, AJJ, A( I ), 1, TAU( JJ ) ) IF( N.GT.1 ) THEN ALPHA = ONE - CONJG( TAU( JJ ) ) CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, 1 ) CALL CSCAL( NQ-JJ, ALPHA, A( I+DESCA( LLD_ ) ), $ DESCA( LLD_ ) ) END IF CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), $ 1 ) A( I ) = AJJ ELSE IF( N.GT.1 ) THEN CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, ALPHA, $ 1, IAROW, IACOL ) CALL CSCAL( NQ-JJ+1, ALPHA, A( I ), DESCA( LLD_ ) ) END IF END IF ELSE IF( MYCOL.EQ.IACOL ) THEN CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAU( JJ ), 1, $ IAROW, IACOL ) END IF * ELSE * K = MIN( M, N ) DO 10 J = JA, JA+K-1 I = IA + J - JA * * Generate elementary reflector H(j) to annihilate * A(i+1:ia+m-1,j) * CALL PCLARFG( M-J+JA, AJJ, I, J, A, MIN( I+1, IA+M-1 ), J, $ DESCA, 1, TAU ) IF( J.LT.JA+N-1 ) THEN * * Apply H(j)' to A(i:ia+m-1,j+1:ja+n-1) from the left * CALL PCELSET( A, I, J, DESCA, ONE ) * CALL PCLARFC( 'Left', M-J+JA, N-J+JA-1, A, I, J, DESCA, $ 1, TAU, A, I, J+1, DESCA, WORK ) END IF CALL PCELSET( A, I, J, DESCA, AJJ ) * 10 CONTINUE * END IF * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCGEQR2 * END scalapack-1.8.0/SRC/pclarz.f0000640000175000017500000010210210363532303015401 0ustar muammarmuammar SUBROUTINE PCLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX C( * ), TAU( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PCLARZ applies a complex elementary reflector Q to a complex M-by-N * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the * left or the right. Q is represented in the form * * Q = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then Q is taken to be the unit matrix. * * Q is a product of k elementary reflectors as returned by PCTZRZF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be viewed as a subclass of matrices, a * distributed vector is considered to be a distributed matrix. * * Restrictions * ============ * * If SIDE = 'Left' and INCV = 1, then the row process having the first * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover, * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only * the last equality must be satisfied. * * If SIDE = 'Right' and INCV = M_V then the column process having the * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only * the last equality must be satisfied. * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': form Q * sub( C ), * = 'R': form sub( C ) * Q. * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * L (global input) INTEGER * The columns of the distributed submatrix sub( A ) containing * the meaningful part of the Householder reflectors. * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. * * V (local input) COMPLEX pointer into the local memory * to an array of dimension (LLD_V,*) containing the local * pieces of the distributed vectors V representing the * Householder transformation Q, * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V, * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1, * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V, * * The vector v in the representation of Q. V is not used if * TAU = 0. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * INCV (global input) INTEGER * The global increment for the elements of V. Only two values * of INCV are supported in this version, namely 1 and M_V. * INCV must not be zero. * * TAU (local input) COMPLEX, array, dimension LOCc(JV) if * INCV = 1, and LOCr(IV) otherwise. This array contains the * Householder scalars related to the Householder vectors. * TAU is tied to the distributed matrix V. * * C (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ), * containing the local pieces of sub( C ). On exit, sub( C ) * is overwritten by the Q * sub( C ) if SIDE = 'L', or * sub( C ) * Q if SIDE = 'R'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX array, dimension (LWORK) * If INCV = 1, * if SIDE = 'L', * if IVCOL = ICCOL, * LWORK >= NqC0 * else * LWORK >= MpC0 + MAX( 1, NqC0 ) * end if * else if SIDE = 'R', * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC( * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) ) * end if * else if INCV = M_V, * if SIDE = 'L', * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC( * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) ) * else if SIDE = 'R', * if IVROW = ICROW, * LWORK >= MpC0 * else * LWORK >= NqC0 + MAX( 1, MpC0 ) * end if * end if * end if * * where LCM is the least common multiple of NPROW and NPCOL and * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW, * LCMQ = LCM / NPCOL, * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * MB_V = NB_V, * * If INCV = 1, * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if INCV = M_V, * If SIDE = 'Left', * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL CCBLCK, CRBLCK, LEFT CHARACTER COLBTOP, ROWBTOP INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV, $ ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1, $ IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV, $ IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2, $ MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW, $ NQC2, NQV, RDEST COMPLEX TAULOC * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, CAXPY, CCOPY, CGEBR2D, $ CGEBS2D, CGEMV, CGERC, CGERV2D, $ CGESD2D, CGSUM2D, CLASET, INFOG2L, $ PB_TOPGET, PBCTRNV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) $ RETURN * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) IROFFV = MOD( IV-1, DESCV( NB_ ) ) MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) $ MPV = MPV - IROFFV ICOFFV = MOD( JV-1, DESCV( NB_ ) ) NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQV = NQV - ICOFFV LDV = DESCV( LLD_ ) NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ), $ NPCOL ) LDV = DESCV( LLD_ ) IIV = MIN( IIV, LDV ) JJV = MIN( JJV, NCV ) IOFFV = IIV+(JJV-1)*LDV NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ), $ NPCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC1, JJC1, ICROW1, ICCOL1 ) IROFFC1 = MOD( IC-1, DESCC( MB_ ) ) ICOFFC1 = MOD( JC-1, DESCC( NB_ ) ) LDC = DESCC( LLD_ ) IIC1 = MIN( IIC1, LDC ) JJC1 = MIN( JJC1, MAX( 1, NCC ) ) IOFFC1 = IIC1 + ( JJC1-1 ) * LDC * IF( LEFT ) THEN CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) ) ICOFFC2 = MOD( JC-1, DESCC( NB_ ) ) NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL ) IF( MYCOL.EQ.ICCOL2 ) $ NQC2 = NQC2 - ICOFFC2 ELSE CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL, $ IIC2, JJC2, ICROW2, ICCOL2 ) IROFFC2 = MOD( IC-1, DESCC( MB_ ) ) MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW ) IF( MYROW.EQ.ICROW2 ) $ MPC2 = MPC2 - IROFFC2 ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) ) END IF IIC2 = MIN( IIC2, LDC ) JJC2 = MIN( JJC2, NCC ) IOFFC2 = IIC2 + ( JJC2-1 ) * LDC * * Is sub( C ) only distributed over a process row ? * CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) ) * * Is sub( C ) only distributed over a process column ? * CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) ) * IF( LEFT ) THEN * IF( CRBLCK ) THEN RDEST = ICROW2 ELSE RDEST = -1 END IF * IF( CCBLCK ) THEN * * sub( C ) is distributed over a process column * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose row vector V (ICOFFV = IROFFC2) * IPW = MPV+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAULOC, 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * V is a column vector * IF( IVCOL.EQ.ICCOL2 ) THEN * * Perform the local computation within a process column * IF( MYCOL.EQ.ICCOL2 ) THEN * TAULOC = TAU( JJV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, V( IOFFV ), $ 1, ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK, MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK, MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK, MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK, $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL CGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * ELSE * * Send V and TAU to the process column ICCOL2 * IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ ICCOL2 ) * ELSE IF( MYCOL.EQ.ICCOL2 ) THEN * IPW = MPV+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW, $ IVCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, $ ONE, C( IOFFC2 ), LDC, WORK, 1, $ ZERO, WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), $ RDEST, MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), $ LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, $ WORK( IPW ), 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Transpose and broadcast row vector V (ICOFFV=IROFFC2) * IPW = MPV+1 CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M, $ DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV, $ ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, -1, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.IVROW ) THEN * CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1, $ TAU( IIV ), 1 ) TAULOC = TAU( IIV ) * ELSE * CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC, $ 1, IVROW, MYCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Broadcast column vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN * IPW = MPV+1 CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 ) WORK( IPW ) = TAU( JJV ) CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( JJV ) * ELSE * IPW = MPV+1 CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK, $ IPW, MYROW, IVCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C )' * v * IF( MPV.GT.0 ) THEN CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', NQC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, NQC2 ) ) END IF IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC, $ WORK( IPW ), MAX( 1, NQC2 ) ) * CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1, $ WORK( IPW ), MAX( 1, NQC2 ), RDEST, $ MYCOL ) * * sub( C ) := sub( C ) - v * w' * IF( MYROW.EQ.ICROW1 ) $ CALL CAXPY( NQC2, -TAULOC, WORK( IPW ), $ MAX( 1, NQC2 ), C( IOFFC1 ), LDC ) CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ), $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * IF( CCBLCK ) THEN RDEST = MYROW ELSE RDEST = -1 END IF * IF( CRBLCK ) THEN * * sub( C ) is distributed over a process row * IF( DESCV( M_ ).EQ.INCV ) THEN * * V is a row vector * IF( IVROW.EQ.ICROW2 ) THEN * * Perform the local computation within a process row * IF( MYROW.EQ.ICROW2 ) THEN * TAULOC = TAU( IIV ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, V( IOFFV ), $ LDV, ZERO, WORK, 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK, MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK, 1 ) * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK, MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) * IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK, 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * IF( MPC2.GT.0 .AND. NQV.GT.0 ) $ CALL CGERC( MPC2, NQV, -TAULOC, WORK, 1, $ V( IOFFV ), LDV, C( IOFFC2 ), $ LDC ) END IF * END IF * ELSE * * Send V and TAU to the process row ICROW2 * IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2, $ MYCOL ) * ELSE IF( MYROW.EQ.ICROW2 ) THEN * IPW = NQV+1 CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW, $ MYCOL ) TAULOC = WORK( IPW ) * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), $ RDEST, ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * Transpose column vector V (IROFFV = ICOFFC2) * IPW = NQV+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYROW.EQ.ICROW2 ) THEN * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, $ TAU( JJV ), 1 ) TAULOC = TAU( JJV ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, $ 1, MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, $ WORK, 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * ELSE * * sub( C ) is a proper distributed matrix * IF( DESCV( M_ ).EQ.INCV ) THEN * * Broadcast row vector V * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', $ COLBTOP ) IF( MYROW.EQ.IVROW ) THEN * IPW = NQV+1 CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 ) WORK( IPW ) = TAU( IIV ) CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW ) TAULOC = TAU( IIV ) * ELSE * IPW = NQV+1 CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1, $ WORK, IPW, IVROW, MYCOL ) TAULOC = WORK( IPW ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No Transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) * CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * ELSE * * Transpose and broadcast column vector V (ICOFFC2=IROFFV) * IPW = NQV+1 CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N, $ DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO, $ WORK, 1, IVROW, IVCOL, -1, ICCOL2, $ WORK( IPW ) ) * * Perform the local computation within a process column * IF( MYCOL.EQ.IVCOL ) THEN * CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ), $ 1 ) TAULOC = TAU( JJV ) * ELSE * CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1, $ MYROW, IVCOL ) * END IF * IF( TAULOC.NE.ZERO ) THEN * * w := sub( C ) * v * IF( NQV.GT.0 ) THEN CALL CGEMV( 'No transpose', MPC2, NQV, ONE, $ C( IOFFC2 ), LDC, WORK, 1, ZERO, $ WORK( IPW ), 1 ) ELSE CALL CLASET( 'All', MPC2, 1, ZERO, ZERO, $ WORK( IPW ), MAX( 1, MPC2 ) ) END IF IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1, $ WORK( IPW ), 1 ) CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1, $ WORK( IPW ), MAX( 1, MPC2 ), RDEST, $ ICCOL2 ) IF( MYCOL.EQ.ICCOL1 ) $ CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1, $ C( IOFFC1 ), 1 ) * * sub( C ) := sub( C ) - w * v' * CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK, $ 1, C( IOFFC2 ), LDC ) END IF * END IF * END IF * END IF * RETURN * * End of PCLARZ * END scalapack-1.8.0/SRC/pcungl2.f0000640000175000017500000002641410363532303015473 0ustar muammarmuammar SUBROUTINE PCUNGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. * .. Array Arguments .. INTEGER DESCA( * ) COMPLEX A( * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * PCUNGL2 generates an M-by-N complex distributed matrix Q denoting * A(IA:IA+M-1,JA:JA+N-1) with orthonormal rows, which is defined as * the first M rows of a product of K elementary reflectors of order N * * Q = H(k)' . . . H(2)' H(1)' * * as returned by PCGELQF. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix Q. M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix Q. N >= M >= 0. * * K (global input) INTEGER * The number of elementary reflectors whose product defines the * matrix Q. M >= K >= 0. * * A (local input/local output) COMPLEX pointer into the * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)). * On entry, the i-th row must contain the vector which defines * the elementary reflector H(i), IA <= i <= IA+K-1, as * returned by PCGELQF in the K rows of its distributed matrix * argument A(IA:IA+K-1,JA:*). On exit, this array contains the * local pieces of the M-by-N distributed matrix Q. * * IA (global input) INTEGER * The row index in the global array A indicating the first * row of sub( A ). * * JA (global input) INTEGER * The column index in the global array A indicating the * first column of sub( A ). * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * TAU (local input) COMPLEX, array, dimension LOCr(IA+K-1). * This array contains the scalar factors TAU(i) of the * elementary reflectors H(i) as returned by PCGELQF. * TAU is tied to the distributed matrix A. * * WORK (local workspace/local output) COMPLEX array, * dimension (LWORK) * On exit, WORK(1) returns the minimal and optimal LWORK. * * LWORK (local or global input) INTEGER * The dimension of the array WORK. * LWORK is local input and must be at least * LWORK >= NqA0 + MAX( 1, MpA0 ), where * * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ), * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ), * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ), * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ), * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ), * * INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * If LWORK = -1, then LWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these * values is returned in the first entry of the corresponding * work array, and no error message is issued by PXERBLA. * * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX ONE, ZERO PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP INTEGER IACOL, IAROW, I, ICTXT, II, J, KP, LWMIN, MPA0, $ MYCOL, MYROW, NPCOL, NPROW, NQA0 COMPLEX TAUI * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, PCELSET, $ PCLACGV, PCLARFC, PCLASET, PCSCAL, $ PB_TOPGET, PB_TOPSET, PXERBLA * .. * .. External Functions .. INTEGER INDXG2L, INDXG2P, NUMROC EXTERNAL INDXG2L, INDXG2P, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Test the input parameters * INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -(700+CTXT_) ELSE CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 7, INFO ) IF( INFO.EQ.0 ) THEN IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) MPA0 = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ), $ MYROW, IAROW, NPROW ) NQA0 = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ), $ MYCOL, IACOL, NPCOL ) LWMIN = NQA0 + MAX( 1, MPA0 ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.M ) THEN INFO = -2 ELSE IF( K.LT.0 .OR. K.GT.M ) THEN INFO = -3 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -10 END IF END IF END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PCUNGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( M.LE.0 ) $ RETURN * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' ) * IF( K.LT.M ) THEN * * Initialise rows ia+k:ia+m-1 to rows of the unit matrix * CALL PCLASET( 'All', M-K, K, ZERO, ZERO, A, IA+K, JA, DESCA ) CALL PCLASET( 'All', M-K, N-K, ZERO, ONE, A, IA+K, JA+K, $ DESCA ) * END IF * TAUI = ZERO KP = NUMROC( IA+K-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) * DO 10 I = IA+K-1, IA, -1 * * Apply H(i)' to A(i:ia+m-1,ja+i-ia:ja+n-1) from the right * J = JA + I - IA II = INDXG2L( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ) IAROW = INDXG2P( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) IF( MYROW.EQ.IAROW ) $ TAUI = TAU( MIN( II, KP ) ) IF( J.LT.JA+N-1 ) THEN CALL PCLACGV( N-J+JA-1, A, I, J+1, DESCA, DESCA( M_ ) ) IF( I.LT.IA+M-1 ) THEN CALL PCELSET( A, I, J, DESCA, ONE ) CALL PCLARFC( 'Right', M-I+IA-1, N-J+JA, A, I, J, DESCA, $ DESCA( M_ ), TAU, A, I+1, J, DESCA, WORK ) END IF CALL PCSCAL( N-J+JA-1, -TAUI, A, I, J+1, DESCA, $ DESCA( M_ ) ) CALL PCLACGV( N-J+JA-1, A, I, J+1, DESCA, DESCA( M_ ) ) END IF CALL PCELSET( A, I, J, DESCA, ONE-CONJG( TAUI ) ) * * Set A(i,ja:j-1) to zero * CALL PCLASET( 'All', 1, J-JA, ZERO, ZERO, A, I, JA, DESCA ) * 10 CONTINUE * CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = CMPLX( REAL( LWMIN ) ) * RETURN * * End of PCUNGL2 * END scalapack-1.8.0/SRC/pzlarzt.f0000640000175000017500000002610510363532303015624 0ustar muammarmuammar SUBROUTINE PZLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N * .. * .. Array Arguments .. INTEGER DESCV( * ) COMPLEX*16 TAU( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARZT forms the triangular factor T of a complex block reflector * H of order > n, which is defined as a product of k elementary * reflectors as returned by PZTZRZF. * * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; * * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. * * If STOREV = 'C', the vector which defines the elementary reflector * H(i) is stored in the i-th column of the array V, and * * H = I - V * T * V' * * If STOREV = 'R', the vector which defines the elementary reflector * H(i) is stored in the i-th row of the array V, and * * H = I - V' * T * V * * Currently, only STOREV = 'R' and DIRECT = 'B' are supported. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * DIRECT (global input) CHARACTER * Specifies the order in which the elementary reflectors are * multiplied to form the block reflector: * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) * = 'B': H = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Specifies how the vectors which define the elementary * reflectors are stored (see also Further Details): * = 'C': columnwise (not supported yet) * = 'R': rowwise * * N (global input) INTEGER * The number of meaningful entries of the block reflector H. * N >= 0. * * K (global input) INTEGER * The order of the triangular factor T (= the number of * elementary reflectors). 1 <= K <= MB_V (= NB_V). * * V (input/output) COMPLEX*16 pointer into the local memory * to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)). * The distributed matrix V contains the Householder vectors. * See further details. * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * TAU (local input) COMPLEX*16, array, dimension LOCr(IV+K-1) * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array * contains the Householder scalars related to the Householder * vectors. TAU is tied to the distributed matrix V. * * T (local output) COMPLEX*16 array, dimension (MB_V,MB_V) * It contains the k-by-k triangular factor of the block * reflector associated with V. T is lower triangular. * * WORK (local workspace) COMPLEX*16 array, * dimension (K*(K-1)/2) * * Further Details * =============== * * The shape of the matrix V and the storage of the vectors which define * the H(i) is best illustrated by the following example with n = 5 and * k = 3. The elements equal to 1 are not stored; the corresponding * array elements are modified but restored on exit. The rest of the * array is not used. * * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': * * ______V_____ * ( v1 v2 v3 ) / \ * ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) * V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) * ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) * ( v1 v2 v3 ) * . . . * . . . * 1 . . * 1 . * 1 * * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': * * ______V_____ * 1 / \ * . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) * . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) * . . . ( . . 1 . . v3 v3 v3 v3 v3 ) * . . . * ( v1 v2 v3 ) * ( v1 v2 v3 ) * V = ( v1 v2 v3 ) * ( v1 v2 v3 ) * ( v1 v2 v3 ) * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW, $ ITMP0, ITMP1, IW, JJV, LDV, MYCOL, MYROW, $ NPCOL, NPROW, NQ * .. * .. External Subroutines .. EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, INFOG2L, PXERBLA, $ ZCOPY, ZGEMV, ZGSUM2D, ZLACGV, $ ZLASET, ZTRMV * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC MOD * .. * .. Executable Statements .. * * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Check for currently supported options * INFO = 0 IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN INFO = -1 ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN INFO = -2 END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PZLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) RETURN END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, $ IIV, JJV, IVROW, IVCOL ) * IF( MYROW.EQ.IVROW ) THEN IW = 1 ITMP0 = 0 LDV = DESCV( LLD_ ) ICOFF = MOD( JV-1, DESCV( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) $ NQ = NQ - ICOFF * DO 10 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = -tau( iv+i-1 ) * * V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)' * ITMP0 = ITMP0 + 1 IF( NQ.GT.0 ) THEN CALL ZLACGV( NQ, V( II+(JJV-1)*LDV ), LDV ) CALL ZGEMV( 'No transpose', ITMP0, NQ, -TAU( II ), $ V( II+1+(JJV-1)*LDV ), LDV, $ V( II+(JJV-1)*LDV ), LDV, ZERO, WORK( IW ), $ 1 ) CALL ZLACGV( NQ, V( II+(JJV-1)*LDV ), LDV ) ELSE CALL ZLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ), $ ITMP0 ) END IF IW = IW + ITMP0 * 10 CONTINUE * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1, $ MYROW, IVCOL ) * IF( MYCOL.EQ.IVCOL ) THEN * IW = 1 ITMP0 = 0 ITMP1 = K + 1 + (K-1) * DESCV( MB_ ) * T( ITMP1-1 ) = TAU( IIV+K-1 ) * DO 20 II = IIV+K-2, IIV, -1 * * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) * ITMP0 = ITMP0 + 1 ITMP1 = ITMP1 - DESCV( MB_ ) - 1 CALL ZCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 ) IW = IW + ITMP0 * CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', ITMP0, $ T( ITMP1+DESCV( MB_ ) ), DESCV( MB_ ), $ T( ITMP1 ), 1 ) T( ITMP1-1 ) = TAU( II ) * 20 CONTINUE * END IF * END IF * RETURN * * End of PZLARZT * END scalapack-1.8.0/SRC/pddttrs.f0000640000175000017500000006662110363532303015611 0ustar muammarmuammar SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 3, 2000 * * .. Scalar Arguments .. CHARACTER TRANS INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. * .. Array Arguments .. INTEGER DESCA( * ), DESCB( * ) DOUBLE PRECISION AF( * ), B( * ), D( * ), DL( * ), DU( * ), $ WORK( * ) * .. * * * Purpose * ======= * * PDDTTRS solves a system of linear equations * * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS) * or * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS) * * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors * stored in A(1:N,JA:JA+N-1) and AF by PDDTTRF. * A(1:N, JA:JA+N-1) is an N-by-N real * tridiagonal diagonally dominant-like distributed * matrix. * * Routine PDDTTRF MUST be called first. * * ===================================================================== * * Arguments * ========= * * * TRANS (global input) CHARACTER * = 'N': Solve with A(1:N, JA:JA+N-1); * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T; * * N (global input) INTEGER * The number of rows and columns to be operated on, i.e. the * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0. * * NRHS (global input) INTEGER * The number of right hand sides, i.e., the number of columns * of the distributed submatrix B(IB:IB+N-1, 1:NRHS). * NRHS >= 0. * * DL (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the lower diagonal of the * matrix. Globally, DL(1) is not referenced, and DL must be * aligned with D. * Must be of size >= DESCA( NB_ ). * On exit, this array contains information containing the * factors of the matrix. * * D (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the main diagonal of the * matrix. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * DU (local input/local output) DOUBLE PRECISION pointer to local * part of global vector storing the upper diagonal of the * matrix. Globally, DU(n) is not referenced, and DU must be * aligned with D. * On exit, this array contains information containing the * factors of the matrix. * Must be of size >= DESCA( NB_ ). * * JA (global input) INTEGER * The index in the global array A that points to the start of * the matrix to be operated on (which may be either all of A * or a submatrix of A). * * DESCA (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_A=501 or 502), DLEN >= 7; * if 2D type (DTYPE_A=1), DLEN >= 9. * The array descriptor for the distributed matrix A. * Contains information of mapping of A to memory. Please * see NOTES below for full description and options. * * B (local input/local output) DOUBLE PRECISION pointer into * local memory to an array of local lead dimension lld_b>=NB. * On entry, this array contains the * the local pieces of the right hand sides * B(IB:IB+N-1, 1:NRHS). * On exit, this contains the local piece of the solutions * distributed matrix X. * * IB (global input) INTEGER * The row index in the global array B that points to the first * row of the matrix to be operated on (which may be either * all of B or a submatrix of B). * * DESCB (global and local input) INTEGER array of dimension DLEN. * if 1D type (DTYPE_B=502), DLEN >=7; * if 2D type (DTYPE_B=1), DLEN >= 9. * The array descriptor for the distributed matrix B. * Contains information of mapping of B to memory. Please * see NOTES below for full description and options. * * AF (local output) DOUBLE PRECISION array, dimension LAF. * Auxiliary Fillin Space. * Fillin is created during the factorization routine * PDDTTRF and this is stored in AF. If a linear system * is to be solved using PDDTTRS after the factorization * routine, AF *must not be altered* after the factorization. * * LAF (local input) INTEGER * Size of user-input Auxiliary Fillin space AF. Must be >= * 2*(NB+2) * If LAF is not large enough, an error code will be returned * and the minimum acceptable size will be returned in AF( 1 ) * * WORK (local workspace/local output) * DOUBLE PRECISION temporary workspace. This space may * be overwritten in between calls to routines. WORK must be * the size given in LWORK. * On exit, WORK( 1 ) contains the minimal LWORK. * * LWORK (local input or global input) INTEGER * Size of user-input workspace WORK. * If LWORK is too small, the minimal acceptable size will be * returned in WORK(1) and an error code is returned. LWORK>= * 10*NPCOL+4*NRHS * * INFO (local output) INTEGER * = 0: successful exit * < 0: If the i-th argument is an array and the j-entry had * an illegal value, then INFO = -(i*100+j), if the i-th * argument is a scalar and had an illegal value, then * INFO = -i. * * ===================================================================== * * * Restrictions * ============ * * The following are restrictions on the input parameters. Some of these * are temporary and will be removed in future releases, while others * may reflect fundamental technical limitations. * * Non-cyclic restriction: VERY IMPORTANT! * P*NB>= mod(JA-1,NB)+N. * The mapping for matrices must be blocked, reflecting the nature * of the divide and conquer algorithm as a task-parallel algorithm. * This formula in words is: no processor may have more than one * chunk of the matrix. * * Blocksize cannot be too small: * If the matrix spans more than one processor, the following * restriction on NB, the size of each block on each processor, * must hold: * NB >= 2 * The bulk of parallel computation is done on the matrix of size * O(NB) on each processor. If this is too small, divide and conquer * is a poor choice of algorithm. * * Submatrix reference: * JA = IB * Alignment restriction that prevents unnecessary communication. * * * ===================================================================== * * * Notes * ===== * * If the factorization routine and the solve routine are to be called * separately (to solve various sets of righthand sides using the same * coefficient matrix), the auxiliary space AF *must not be altered* * between calls to the factorization routine and the solve routine. * * The best algorithm for solving banded and tridiagonal linear systems * depends on a variety of parameters, especially the bandwidth. * Currently, only algorithms designed for the case N/P >> bw are * implemented. These go by many names, including Divide and Conquer, * Partitioning, domain decomposition-type, etc. * For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C * algorithms are the appropriate choice. * * Algorithm description: Divide and Conquer * * The Divide and Conqer algorithm assumes the matrix is narrowly * banded compared with the number of equations. In this situation, * it is best to distribute the input matrix A one-dimensionally, * with columns atomic and rows divided amongst the processes. * The basic algorithm divides the tridiagonal matrix up into * P pieces with one stored on each processor, * and then proceeds in 2 phases for the factorization or 3 for the * solution of a linear system. * 1) Local Phase: * The individual pieces are factored independently and in * parallel. These factors are applied to the matrix creating * fillin, which is stored in a non-inspectable way in auxiliary * space AF. Mathematically, this is equivalent to reordering * the matrix A as P A P^T and then factoring the principal * leading submatrix of size equal to the sum of the sizes of * the matrices factored on each processor. The factors of * these submatrices overwrite the corresponding parts of A * in memory. * 2) Reduced System Phase: * A small ((P-1)) system is formed representing * interaction of the larger blocks, and is stored (as are its * factors) in the space AF. A parallel Block Cyclic Reduction * algorithm is used. For a linear system, a parallel front solve * followed by an analagous backsolve, both using the structure * of the factored matrix, are performed. * 3) Backsubsitution Phase: * For a linear system, a local backsubstitution is performed on * each processor in parallel. * * * Descriptors * =========== * * Descriptors now have *types* and differ from ScaLAPACK 1.0. * * Note: tridiagonal codes can use either the old two dimensional * or new one-dimensional descriptors, though the processor grid in * both cases *must be one-dimensional*. We describe both types below. * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * * One-dimensional descriptors: * * One-dimensional descriptors are a new addition to ScaLAPACK since * version 1.0. They simplify and shorten the descriptor for 1D * arrays. * * Since ScaLAPACK supports two-dimensional arrays as the fundamental * object, we allow 1D arrays to be distributed either over the * first dimension of the array (as if the grid were P-by-1) or the * 2nd dimension (as if the grid were 1-by-P). This choice is * indicated by the descriptor type (501 or 502) * as described below. * However, for tridiagonal matrices, since the objects being * distributed are the individual vectors storing the diagonals, we * have adopted the convention that both the P-by-1 descriptor and * the 1-by-P descriptor are allowed and are equivalent for * tridiagonal matrices. Thus, for tridiagonal matrices, * DTYPE_A = 501 or 502 can be used interchangeably * without any other change. * We require that the distributed vectors storing the diagonals of a * tridiagonal matrix be aligned with each other. Because of this, a * single descriptor, DESCA, serves to describe the distribution of * of all diagonals simultaneously. * * IMPORTANT NOTE: the actual BLACS grid represented by the * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P * irrespective of which one-dimensional descriptor type * (501 or 502) is input. * This routine will interpret the grid properly either way. * ScaLAPACK routines *do not support intercontext operations* so that * the grid passed to a single ScaLAPACK routine *must be the same* * for all array descriptors passed to that routine. * * NOTE: In all cases where 1D descriptors are used, 2D descriptors * may also be used, since a one-dimensional array is a special case * of a two-dimensional array with one dimension of size unity. * The two-dimensional array used in this case *must* be of the * proper orientation: * If the appropriate one-dimensional descriptor is DTYPEA=501 * (1 by P type), then the two dimensional descriptor must * have a CTXT value that refers to a 1 by P BLACS grid; * If the appropriate one-dimensional descriptor is DTYPEA=502 * (P by 1 type), then the two dimensional descriptor must * have a CTXT value that refers to a P by 1 BLACS grid. * * * Summary of allowed descriptors, types, and BLACS grids: * DTYPE 501 502 1 1 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1 * ----------------------------------------------------- * A OK OK OK NO * B NO OK NO OK * * Note that a consequence of this chart is that it is not possible * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead * to opposite requirements for the orientation of the BLACS grid, * and as noted before, the *same* BLACS context must be used in * all descriptors in a single ScaLAPACK subroutine call. * * Let A be a generic term for any 1D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- ---------- ------------------------------------------ * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids, * TYPE_A = 501: 1-by-P grid. * TYPE_A = 502: P-by-1 grid. * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * N_A (global) DESCA( 3 ) The size of the array dimension being * distributed. * NB_A (global) DESCA( 4 ) The blocking factor used to distribute * the distributed dimension of the array. * SRC_A (global) DESCA( 5 ) The process row or column over which the * first row or column of the array * is distributed. * Ignored DESCA( 6 ) Ignored for tridiagonal matrices. * Reserved DESCA( 7 ) Reserved for future use. * * * * ===================================================================== * * Code Developer: Andrew J. Cleary, University of Tennessee. * Current address: Lawrence Livermore National Labs. * * ===================================================================== * * .. Parameters .. INTEGER INT_ONE PARAMETER ( INT_ONE = 1 ) INTEGER DESCMULT, BIGNUM PARAMETER ( DESCMULT = 100, BIGNUM = DESCMULT*DESCMULT ) INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE, $ IDUM2, IDUM3, JA_NEW, LLDA, LLDB, MYCOL, MYROW, $ MY_NUM_COLS, NB, NP, NPCOL, NPROW, NP_SAVE, $ ODD_SIZE, PART_OFFSET, PART_SIZE, RETURN_CODE, $ STORE_M_B, STORE_N_A, TEMP, WORK_SIZE_MIN * .. * .. Local Arrays .. INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ), $ PARAM_CHECK( 15, 3 ) * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDEXIT, BLACS_GRIDINFO, DESC_CONVERT, $ GLOBCHK, PDDTTRSV, PXERBLA, RESHAPE * .. * .. External Functions .. LOGICAL LSAME INTEGER NUMROC EXTERNAL LSAME, NUMROC * .. * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. * .. Executable Statements .. * * Test the input parameters * INFO = 0 * * Convert descriptor into standard form for easy access to * parameters, check that grid is of right shape. * DESCA_1XP( 1 ) = 501 DESCB_PX1( 1 ) = 502 * TEMP = DESCA( DTYPE_ ) IF( TEMP.EQ.502 ) THEN * Temporarily set the descriptor type to 1xP type DESCA( DTYPE_ ) = 501 END IF * CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE ) * DESCA( DTYPE_ ) = TEMP * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 8*100+2 ) END IF * CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE ) * IF( RETURN_CODE.NE.0 ) THEN INFO = -( 11*100+2 ) END IF * * Consistency checks for DESCA and DESCB. * * Context must be the same IF( DESCA_1XP( 2 ).NE.DESCB_PX1( 2 ) ) THEN INFO = -( 11*100+2 ) END IF * * These are alignment restrictions that may or may not be removed * in future releases. -Andy Cleary, April 14, 1996. * * Block sizes must be the same IF( DESCA_1XP( 4 ).NE.DESCB_PX1( 4 ) ) THEN INFO = -( 11*100+4 ) END IF * * Source processor must be the same * IF( DESCA_1XP( 5 ).NE.DESCB_PX1( 5 ) ) THEN INFO = -( 11*100+5 ) END IF * * Get values out of descriptor for use in code. * ICTXT = DESCA_1XP( 2 ) CSRC = DESCA_1XP( 5 ) NB = DESCA_1XP( 4 ) LLDA = DESCA_1XP( 6 ) STORE_N_A = DESCA_1XP( 3 ) LLDB = DESCB_PX1( 6 ) STORE_M_B = DESCB_PX1( 3 ) * * Get grid parameters * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) NP = NPROW*NPCOL * * * IF( LSAME( TRANS, 'N' ) ) THEN IDUM2 = ICHAR( 'N' ) ELSE IF( LSAME( TRANS, 'T' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE IF( LSAME( TRANS, 'C' ) ) THEN IDUM2 = ICHAR( 'T' ) ELSE INFO = -1 END IF * IF( LWORK.LT.-1 ) THEN INFO = -15 ELSE IF( LWORK.EQ.-1 ) THEN IDUM3 = -1 ELSE IDUM3 = 1 END IF * IF( N.LT.0 ) THEN INFO = -2 END IF * IF( N+JA-1.GT.STORE_N_A ) THEN INFO = -( 8*100+6 ) END IF * IF( N+IB-1.GT.STORE_M_B ) THEN INFO = -( 11*100+3 ) END IF * IF( LLDB.LT.NB ) THEN INFO = -( 11*100+6 ) END IF * IF( NRHS.LT.0 ) THEN INFO = -3 END IF * * Current alignment restriction * IF( JA.NE.IB ) THEN INFO = -7 END IF * * Argument checking that is specific to Divide & Conquer routine * IF( NPROW.NE.1 ) THEN INFO = -( 8*100+2 ) END IF * IF( N.GT.NP*NB-MOD( JA-1, NB ) ) THEN INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: NB too small', -INFO ) RETURN END IF * * WORK_SIZE_MIN = 10*NPCOL + 4*NRHS * WORK( 1 ) = WORK_SIZE_MIN * IF( LWORK.LT.WORK_SIZE_MIN ) THEN IF( LWORK.NE.-1 ) THEN INFO = -15 CALL PXERBLA( ICTXT, 'PDDTTRS: worksize error', -INFO ) END IF RETURN END IF * * Pack params and positions into arrays for global consistency check * PARAM_CHECK( 15, 1 ) = DESCB( 5 ) PARAM_CHECK( 14, 1 ) = DESCB( 4 ) PARAM_CHECK( 13, 1 ) = DESCB( 3 ) PARAM_CHECK( 12, 1 ) = DESCB( 2 ) PARAM_CHECK( 11, 1 ) = DESCB( 1 ) PARAM_CHECK( 10, 1 ) = IB PARAM_CHECK( 9, 1 ) = DESCA( 5 ) PARAM_CHECK( 8, 1 ) = DESCA( 4 ) PARAM_CHECK( 7, 1 ) = DESCA( 3 ) PARAM_CHECK( 6, 1 ) = DESCA( 1 ) PARAM_CHECK( 5, 1 ) = JA PARAM_CHECK( 4, 1 ) = NRHS PARAM_CHECK( 3, 1 ) = N PARAM_CHECK( 2, 1 ) = IDUM3 PARAM_CHECK( 1, 1 ) = IDUM2 * PARAM_CHECK( 15, 2 ) = 1105 PARAM_CHECK( 14, 2 ) = 1104 PARAM_CHECK( 13, 2 ) = 1103 PARAM_CHECK( 12, 2 ) = 1102 PARAM_CHECK( 11, 2 ) = 1101 PARAM_CHECK( 10, 2 ) = 10 PARAM_CHECK( 9, 2 ) = 805 PARAM_CHECK( 8, 2 ) = 804 PARAM_CHECK( 7, 2 ) = 803 PARAM_CHECK( 6, 2 ) = 801 PARAM_CHECK( 5, 2 ) = 7 PARAM_CHECK( 4, 2 ) = 3 PARAM_CHECK( 3, 2 ) = 2 PARAM_CHECK( 2, 2 ) = 15 PARAM_CHECK( 1, 2 ) = 1 * * Want to find errors with MIN( ), so if no error, set it to a big * number. If there already is an error, multiply by the the * descriptor multiplier. * IF( INFO.GE.0 ) THEN INFO = BIGNUM ELSE IF( INFO.LT.-DESCMULT ) THEN INFO = -INFO ELSE INFO = -INFO*DESCMULT END IF * * Check consistency across processors * CALL GLOBCHK( ICTXT, 15, PARAM_CHECK, 15, PARAM_CHECK( 1, 3 ), $ INFO ) * * Prepare output: set info = 0 if no error, and divide by DESCMULT * if error is not in a descriptor entry. * IF( INFO.EQ.BIGNUM ) THEN INFO = 0 ELSE IF( MOD( INFO, DESCMULT ).EQ.0 ) THEN INFO = -INFO / DESCMULT ELSE INFO = -INFO END IF * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( NRHS.EQ.0 ) $ RETURN * * * Adjust addressing into matrix space to properly get into * the beginning part of the relevant data * PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) ) * IF( ( MYCOL-CSRC ).LT.( JA-PART_OFFSET-1 ) / NB ) THEN PART_OFFSET = PART_OFFSET + NB END IF * IF( MYCOL.LT.CSRC ) THEN PART_OFFSET = PART_OFFSET - NB END IF * * Form a new BLACS grid (the "standard form" grid) with only procs * holding part of the matrix, of size 1xNP where NP is adjusted, * starting at csrc=0, with JA modified to reflect dropped procs. * * First processor to hold part of the matrix: * FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL ) * * Calculate new JA one while dropping off unused processors. * JA_NEW = MOD( JA-1, NB ) + 1 * * Save and compute new value of NP * NP_SAVE = NP NP = ( JA_NEW+N-2 ) / NB + 1 * * Call utility routine that forms "standard-form" grid * CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC, $ INT_ONE, NP ) * * Use new context from standard grid as context. * ICTXT_SAVE = ICTXT ICTXT = ICTXT_NEW DESCA_1XP( 2 ) = ICTXT_NEW DESCB_PX1( 2 ) = ICTXT_NEW * * Get information about new grid. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * Drop out processors that do not have part of the matrix. * IF( MYROW.LT.0 ) THEN GO TO 20 END IF * * ******************************** * Values reused throughout routine * * User-input value of partition size * PART_SIZE = NB * * Number of columns in each processor * MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) * * Offset in columns to beginning of main partition in each proc * IF( MYCOL.EQ.0 ) THEN PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE ) MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE ) END IF * * Size of main (or odd) partition in each processor * ODD_SIZE = MY_NUM_COLS IF( MYCOL.LT.NP-1 ) THEN ODD_SIZE = ODD_SIZE - INT_ONE END IF * * * * Begin main code * INFO = 0 * * Call frontsolve routine * IF( LSAME( TRANS, 'N' ) ) THEN * CALL PDDTTRSV( 'L', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PDDTTRSV( 'U', 'T', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * END IF * * Call backsolve routine * IF( ( LSAME( TRANS, 'C' ) ) .OR. ( LSAME( TRANS, 'T' ) ) ) THEN * CALL PDDTTRSV( 'L', 'T', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * ELSE * CALL PDDTTRSV( 'U', 'N', N, NRHS, DL( PART_OFFSET+1 ), $ D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ), JA_NEW, $ DESCA_1XP, B, IB, DESCB_PX1, AF, LAF, WORK, $ LWORK, INFO ) * END IF 10 CONTINUE * * * Free BLACS space used to hold standard-form grid. * IF( ICTXT_SAVE.NE.ICTXT_NEW ) THEN CALL BLACS_GRIDEXIT( ICTXT_NEW ) END IF * 20 CONTINUE * * Restore saved input parameters * ICTXT = ICTXT_SAVE NP = NP_SAVE * * Output minimum worksize * WORK( 1 ) = WORK_SIZE_MIN * * RETURN * * End of PDDTTRS * END scalapack-1.8.0/SRC/pslaconsb.f0000640000175000017500000005042410363532303016103 0ustar muammarmuammar SUBROUTINE PSLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER I, L, LWORK, M REAL H33, H43H34, H44 * .. * .. Array Arguments .. INTEGER DESCA( * ) REAL A( * ), BUF( * ) * .. * * Purpose * ======= * * PSLACONSB looks for two consecutive small subdiagonal elements by * seeing the effect of starting a double shift QR iteration * given by H44, H33, & H43H34 and see if this would make a * subdiagonal negligible. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * A (global input) REAL array, dimension * (DESCA(LLD_),*) * On entry, the Hessenberg matrix whose tridiagonal part is * being scanned. * Unchanged on exit. * * DESCA (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix A. * * I (global input) INTEGER * The global location of the bottom of the unreduced * submatrix of A. * Unchanged on exit. * * L (global input) INTEGER * The global location of the top of the unreduced submatrix * of A. * Unchanged on exit. * * M (global output) INTEGER * On exit, this yields the starting location of the QR double * shift. This will satisfy: L <= M <= I-2. * * H44 * H33 * H43H34 (global input) REAL * These three values are for the double shift QR iteration. * * BUF (local output) REAL array of size LWORK. * * LWORK (global input) INTEGER * On exit, LWORK is the size of the work buffer. * This must be at least 7*Ceil( Ceil( (I-L)/HBL ) / * LCM(NPROW,NPCOL) ) * Here LCM is least common multiple, and NPROWxNPCOL is the * logical grid size. * * Logic: * ====== * * Two consecutive small subdiagonal elements will stall * convergence of a double shift if their product is small * relatively even if each is not very small. Thus it is * necessary to scan the "tridiagonal portion of the matrix." In * the LAPACK algorithm DLAHQR, a loop of M goes from I-2 down to * L and examines * H(m,m),H(m+1,m+1),H(m+1,m),H(m,m+1),H(m-1,m-1),H(m,m-1), and * H(m+2,m-1). Since these elements may be on separate * processors, the first major loop (10) goes over the tridiagonal * and has each node store whatever values of the 7 it has that * the node owning H(m,m) does not. This will occur on a border * and can happen in no more than 3 locations per block assuming * square blocks. There are 5 buffers that each node stores these * values: a buffer to send diagonally down and right, a buffer * to send up, a buffer to send left, a buffer to send diagonally * up and left and a buffer to send right. Each of these buffers * is actually stored in one buffer BUF where BUF(ISTR1+1) starts * the first buffer, BUF(ISTR2+1) starts the second, etc.. After * the values are stored, if there are any values that a node * needs, they will be sent and received. Then the next major * loop passes over the data and searches for two consecutive * small subdiagonals. * * Notes: * * This routine does a global maximum and must be called by all * processes. * * * Implemented by: G. Henry, November 17, 1996 * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * .. * .. Local Scalars .. INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, IBUF3, IBUF4, $ IBUF5, ICOL1, II, IRCV1, IRCV2, IRCV3, IRCV4, $ IRCV5, IROW1, ISRC, ISTR1, ISTR2, ISTR3, ISTR4, $ ISTR5, JJ, JSRC, LDA, LEFT, MODKM1, MYCOL, $ MYROW, NPCOL, NPROW, NUM, RIGHT, UP REAL H00, H10, H11, H12, H21, H22, H33S, H44S, S, $ TST1, ULP, V1, V2, V3 * .. * .. External Functions .. INTEGER ILCM REAL PSLAMCH EXTERNAL ILCM, PSLAMCH * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, SGERV2D, SGESD2D, IGAMX2D, $ INFOG2L, PXERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. * .. Executable Statements .. * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PSLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) DOWN = MOD( MYROW+1, NPROW ) NUM = NPROW*NPCOL * * BUFFER1 starts at BUF(ISTR1+1) and will contain IBUF1 elements * BUFFER2 starts at BUF(ISTR2+1) and will contain IBUF2 elements * BUFFER3 starts at BUF(ISTR3+1) and will contain IBUF3 elements * BUFFER4 starts at BUF(ISTR4+1) and will contain IBUF4 elements * BUFFER5 starts at BUF(ISTR5+1) and will contain IBUF5 elements * ISTR1 = 0 ISTR2 = ( ( I-L-1 ) / HBL ) IF( ISTR2*HBL.LT.( I-L-1 ) ) $ ISTR2 = ISTR2 + 1 II = ISTR2 / ILCM( NPROW, NPCOL ) IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN ISTR2 = II + 1 ELSE ISTR2 = II END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PSLACONSB', 10 ) RETURN END IF ISTR3 = 3*ISTR2 ISTR4 = ISTR3 + ISTR2 ISTR5 = ISTR3 + ISTR3 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) * * Copy our relevant pieces of triadiagonal that we owe into * 5 buffers to send to whomever owns H(M,M) as M moves diagonally * up the tridiagonal * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 IRCV1 = 0 IRCV2 = 0 IRCV3 = 0 IRCV4 = 0 IRCV5 = 0 DO 10 M = I - 2, L, -1 IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M-1,M-1) and send it diagonal down * IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN CALL INFOG2L( M-1, M-1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF1 = IBUF1 + 1 BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND. $ ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN * * We must pack H(M ,M-1) and send it right * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF5 = IBUF5 + 1 BUF( ISTR5+IBUF5 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+1,M) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( MYROW.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M ,M+1) and send it left * IF( NPCOL.GT.1 ) THEN CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) IBUF3 = IBUF3 + 1 BUF( ISTR3+IBUF3 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND. $ ( LEFT.EQ.JJ ) ) THEN * * We must pack H(M+1,M+1) & H(M+2,M+1) and send it * diagonally up * IF( ( UP.NE.MYROW ) .OR. ( LEFT.NE.MYCOL ) ) THEN CALL INFOG2L( M+1, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF4 = IBUF4 + 2 BUF( ISTR4+IBUF4-1 ) = A( ( ICOL1-1 )*LDA+IROW1 ) BUF( ISTR4+IBUF4 ) = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( UP.EQ.II ) .AND. $ ( MYCOL.EQ.JJ ) ) THEN * * We must pack H(M+2,M+1) and send it up * IF( NPROW.GT.1 ) THEN CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, $ MYCOL, IROW1, ICOL1, ISRC, JSRC ) IBUF2 = IBUF2 + 1 BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 ) END IF END IF * * Add up the receives * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( ( MODKM1.EQ.0 ) .AND. ( M.GT.L ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M-1,M-1) from diagonal up * IRCV1 = IRCV1 + 1 END IF IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) .AND. ( M.GT.L ) ) $ THEN * * We must receive H(M ,M-1) from left * IRCV5 = IRCV5 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+1,M ) from down * IRCV2 = IRCV2 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPCOL.GT.1 ) ) THEN * * We must receive H(M ,M+1) from right * IRCV3 = IRCV3 + 1 END IF IF( ( MODKM1.EQ.HBL-1 ) .AND. $ ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN * * We must receive H(M+1:M+2,M+1) from diagonal down * IRCV4 = IRCV4 + 2 END IF IF( ( MODKM1.EQ.HBL-2 ) .AND. ( NPROW.GT.1 ) ) THEN * * We must receive H(M+2,M+1) from down * IRCV2 = IRCV2 + 1 END IF END IF * * Possibly change owners (occurs only when MOD(M-1,HBL) = 0) * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 10 CONTINUE * * * Send data on to the appropriate node if there is any data to send * IF( IBUF1.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN, $ RIGHT ) END IF IF( IBUF2.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, UP, $ MYCOL ) END IF IF( IBUF3.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF3, 1, BUF( ISTR3+1 ), IBUF3, MYROW, $ LEFT ) END IF IF( IBUF4.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF4, 1, BUF( ISTR4+1 ), IBUF4, UP, $ LEFT ) END IF IF( IBUF5.GT.0 ) THEN CALL SGESD2D( CONTXT, IBUF5, 1, BUF( ISTR5+1 ), IBUF5, MYROW, $ RIGHT ) END IF * * Receive appropriate data if there is any * IF( IRCV1.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP, $ LEFT ) END IF IF( IRCV2.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, DOWN, $ MYCOL ) END IF IF( IRCV3.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV3, 1, BUF( ISTR3+1 ), IRCV3, MYROW, $ RIGHT ) END IF IF( IRCV4.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV4, 1, BUF( ISTR4+1 ), IRCV4, DOWN, $ RIGHT ) END IF IF( IRCV5.GT.0 ) THEN CALL SGERV2D( CONTXT, IRCV5, 1, BUF( ISTR5+1 ), IRCV5, MYROW, $ LEFT ) END IF * * Start main loop * IBUF1 = 0 IBUF2 = 0 IBUF3 = 0 IBUF4 = 0 IBUF5 = 0 CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, $ ICOL1, II, JJ ) MODKM1 = MOD( I-3+HBL, HBL ) IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. $ ( MODKM1.NE.HBL-1 ) ) THEN CALL INFOG2L( I-2, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) END IF * * Look for two consecutive small subdiagonal elements. * DO 20 M = I - 2, L, -1 * * Determine the effect of starting the double-shift QR * iteration at row M, and see if this would make H(M,M-1) * negligible. * IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN IF( MODKM1.EQ.0 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN IF( NUM.GT.1 ) THEN IBUF1 = IBUF1 + 1 H00 = BUF( ISTR1+IBUF1 ) ELSE H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF5 = IBUF5 + 1 H10 = BUF( ISTR5+IBUF5 ) ELSE H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF END IF IF( MODKM1.EQ.HBL-1 ) THEN CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IROW1, ICOL1, ISRC, JSRC ) H11 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( NUM.GT.1 ) THEN IBUF4 = IBUF4 + 2 H22 = BUF( ISTR4+IBUF4-1 ) V3 = BUF( ISTR4+IBUF4 ) ELSE H22 = A( ICOL1*LDA+IROW1+1 ) V3 = A( ( ICOL1+1 )*LDA+IROW1+1 ) END IF IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 H21 = BUF( ISTR2+IBUF2 ) ELSE H21 = A( ( ICOL1-1 )*LDA+IROW1+1 ) END IF IF( NPCOL.GT.1 ) THEN IBUF3 = IBUF3 + 1 H12 = BUF( ISTR3+IBUF3 ) ELSE H12 = A( ICOL1*LDA+IROW1 ) END IF IF( M.GT.L ) THEN H00 = A( ( ICOL1-2 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-2 )*LDA+IROW1 ) END IF * * Adjust ICOL1 for next iteration where MODKM1=HBL-2 * ICOL1 = ICOL1 + 1 END IF IF( MODKM1.EQ.HBL-2 ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) IF( NPROW.GT.1 ) THEN IBUF2 = IBUF2 + 1 V3 = BUF( ISTR2+IBUF2 ) ELSE V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) END IF H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF IF( ( MODKM1.LT.HBL-2 ) .AND. ( MODKM1.GT.0 ) ) THEN H22 = A( ( ICOL1-1 )*LDA+IROW1+1 ) H11 = A( ( ICOL1-2 )*LDA+IROW1 ) V3 = A( ( ICOL1-1 )*LDA+IROW1+2 ) H21 = A( ( ICOL1-2 )*LDA+IROW1+1 ) H12 = A( ( ICOL1-1 )*LDA+IROW1 ) IF( M.GT.L ) THEN H00 = A( ( ICOL1-3 )*LDA+IROW1-1 ) H10 = A( ( ICOL1-3 )*LDA+IROW1 ) END IF END IF H44S = H44 - H11 H33S = H33 - H11 V1 = ( H33S*H44S-H43H34 ) / H21 + H12 V2 = H22 - H11 - H33S - H44S S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) V1 = V1 / S V2 = V2 / S V3 = V3 / S IF( M.EQ.L ) $ GO TO 30 TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 ) $ GO TO 30 * * Slide indices diagonally up one for next iteration * IROW1 = IROW1 - 1 ICOL1 = ICOL1 - 1 END IF IF( M.EQ.L ) THEN * * Stop regardless of which node we are * GO TO 30 END IF * * Possibly change owners if on border * IF( MODKM1.EQ.0 ) THEN II = II - 1 JJ = JJ - 1 IF( II.LT.0 ) $ II = NPROW - 1 IF( JJ.LT.0 ) $ JJ = NPCOL - 1 END IF MODKM1 = MODKM1 - 1 IF( MODKM1.LT.0 ) $ MODKM1 = HBL - 1 20 CONTINUE 30 CONTINUE * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * RETURN * * End of PSLACONSB * END scalapack-1.8.0/SRC/cpttrsv.f0000640000175000017500000001120410363532303015615 0ustar muammarmuammar SUBROUTINE CPTTRSV( UPLO, TRANS, N, NRHS, D, E, B, LDB, $ INFO ) * * Written by Andrew J. Cleary, University of Tennessee. * November, 1996. * Modified from CPTTRS: * -- LAPACK routine (preliminary version) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * * .. Scalar Arguments .. CHARACTER UPLO, TRANS INTEGER INFO, LDB, N, NRHS * .. * .. Array Arguments .. REAL D( * ) COMPLEX B( LDB, * ), E( * ) * .. * * Purpose * ======= * * CPTTRSV solves one of the triangular systems * L * X = B, or L**H * X = B, * U * X = B, or U**H * X = B, * where L or U is the Cholesky factor of a Hermitian positive * definite tridiagonal matrix A such that * A = U**H*D*U or A = L*D*L**H (computed by CPTTRF). * * Arguments * ========= * * UPLO (input) CHARACTER*1 * Specifies whether the superdiagonal or the subdiagonal * of the tridiagonal matrix A is stored and the form of the * factorization: * = 'U': E is the superdiagonal of U, and A = U'*D*U; * = 'L': E is the subdiagonal of L, and A = L*D*L'. * (The two forms are equivalent if A is real.) * * TRANS (input) CHARACTER * Specifies the form of the system of equations: * = 'N': L * X = B (No transpose) * = 'N': L * X = B (No transpose) * = 'C': U**H * X = B (Conjugate transpose) * = 'C': L**H * X = B (Conjugate transpose) * * N (input) INTEGER * The order of the tridiagonal matrix A. N >= 0. * * NRHS (input) INTEGER * The number of right hand sides, i.e., the number of columns * of the matrix B. NRHS >= 0. * * D (input) REAL array, dimension (N) * The n diagonal elements of the diagonal matrix D from the * factorization computed by CPTTRF. * * E (input) COMPLEX array, dimension (N-1) * The (n-1) off-diagonal elements of the unit bidiagonal * factor U or L from the factorization computed by CPTTRF * (see UPLO). * * B (input/output) COMPLEX array, dimension (LDB,NRHS) * On entry, the right hand side matrix B. * On exit, the solution matrix X. * * LDB (input) INTEGER * The leading dimension of the array B. LDB >= max(1,N). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * ===================================================================== * * .. Local Scalars .. LOGICAL NOTRAN, UPPER INTEGER I, J * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX * .. * .. Executable Statements .. * * Test the input arguments. * INFO = 0 NOTRAN = LSAME( TRANS, 'N' ) UPPER = LSAME( UPLO, 'U' ) IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN INFO = -1 ELSE IF( .NOT.NOTRAN .AND. .NOT. $ LSAME( TRANS, 'C' ) ) THEN INFO = -2 ELSE IF( N.LT.0 ) THEN INFO = -3 ELSE IF( NRHS.LT.0 ) THEN INFO = -4 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -8 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CPTTRS', -INFO ) RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) $ RETURN * IF( UPPER ) THEN * IF( .NOT.NOTRAN ) THEN * DO 30 J = 1, NRHS * * Solve U**T (or H) * x = b. * DO 10 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) 10 CONTINUE 30 CONTINUE * ELSE * DO 35 J = 1, NRHS * * Solve U * x = b. * DO 20 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) 20 CONTINUE 35 CONTINUE ENDIF * ELSE * IF( NOTRAN ) THEN * DO 60 J = 1, NRHS * * Solve L * x = b. * DO 40 I = 2, N B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) 40 CONTINUE 60 CONTINUE * ELSE * DO 65 J = 1, NRHS * * Solve L**H * x = b. * DO 50 I = N - 1, 1, -1 B( I, J ) = B( I, J ) - $ B( I+1, J )*CONJG( E( I ) ) 50 CONTINUE 65 CONTINUE ENDIF * END IF * RETURN * * End of CPTTRS * END scalapack-1.8.0/SRC/pzlarfb.f0000640000175000017500000010421710363532303015557 0ustar muammarmuammar SUBROUTINE PZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N * .. * .. Array Arguments .. INTEGER DESCC( * ), DESCV( * ) COMPLEX*16 C( * ), T( * ), V( * ), WORK( * ) * .. * * Purpose * ======= * * PZLARFB applies a complex block reflector Q or its conjugate * transpose Q**H to a complex M-by-N distributed matrix sub( C ) * denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, * DTYPE_A = 1. * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * N_A (global) DESCA( N_ ) The number of columns in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute * the columns of the array. * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first * row of the array A is distributed. * CSRC_A (global) DESCA( CSRC_ ) The process column over which the * first column of the array A is * distributed. * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Arguments * ========= * * SIDE (global input) CHARACTER * = 'L': apply Q or Q**H from the Left; * = 'R': apply Q or Q**H from the Right. * * TRANS (global input) CHARACTER * = 'N': No transpose, apply Q; * = 'C': Conjugate transpose, apply Q**H. * * DIRECT (global input) CHARACTER * Indicates how Q is formed from a product of elementary * reflectors * = 'F': Q = H(1) H(2) . . . H(k) (Forward) * = 'B': Q = H(k) . . . H(2) H(1) (Backward) * * STOREV (global input) CHARACTER * Indicates how the vectors which define the elementary * reflectors are stored: * = 'C': Columnwise * = 'R': Rowwise * * M (global input) INTEGER * The number of rows to be operated on i.e the number of rows * of the distributed submatrix sub( C ). M >= 0. * * N (global input) INTEGER * The number of columns to be operated on i.e the number of * columns of the distributed submatrix sub( C ). N >= 0. * * K (global input) INTEGER * The order of the matrix T (= the number of elementary * reflectors whose product defines the block reflector). * * V (local input) COMPLEX*16 pointer into the local memory * to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if * STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and * SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and * SIDE = 'R'. It contains the local pieces of the distributed * vectors V representing the Householder transformation. * See further details. * If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1)); * if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1)); * if STOREV = 'R', LLD_V >= LOCr(IV+K-1). * * IV (global input) INTEGER * The row index in the global array V indicating the first * row of sub( V ). * * JV (global input) INTEGER * The column index in the global array V indicating the * first column of sub( V ). * * DESCV (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix V. * * T (local input) COMPLEX*16 array, dimension MB_V by MB_V * if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian- * gular matrix T in the representation of the block reflector. * * C (local input/local output) COMPLEX*16 pointer into the * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)). * On entry, the M-by-N distributed matrix sub( C ). On exit, * sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or * sub( C )*Q or sub( C )*Q'. * * IC (global input) INTEGER * The row index in the global array C indicating the first * row of sub( C ). * * JC (global input) INTEGER * The column index in the global array C indicating the * first column of sub( C ). * * DESCC (global and local input) INTEGER array of dimension DLEN_. * The array descriptor for the distributed matrix C. * * WORK (local workspace) COMPLEX*16 array, dimension (LWORK) * If STOREV = 'C', * if SIDE = 'L', * LWORK >= ( NqC0 + MpC0 ) * K * else if SIDE = 'R', * LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC, * NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ), * MpC0 ) ) * K * end if * else if STOREV = 'R', * if SIDE = 'L', * LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC, * MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ), * NqC0 ) ) * K * else if SIDE = 'R', * LWORK >= ( MpC0 + NqC0 ) * K * end if * end if * * where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ), * * IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ), * IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ), * IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ), * MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ), * NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ), * * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ), * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ), * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ), * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ), * NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ), * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ), * * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions; * MYROW, MYCOL, NPROW and NPCOL can be determined by calling * the subroutine BLACS_GRIDINFO. * * Alignment requirements * ====================== * * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1) * must verify some alignment properties, namely the following * expressions should be true: * * If STOREV = 'Columnwise' * If SIDE = 'Left', * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW ) * If SIDE = 'Right', * ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC ) * else if STOREV = 'Rowwise' * If SIDE = 'Left', * ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC ) * If SIDE = 'Right', * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL ) * end if * * ===================================================================== * * .. Parameters .. INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_, $ LLD_, MB_, M_, NB_, N_, RSRC_ PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), $ ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL FORWARD CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW, $ ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV, $ ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV, $ IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV, $ ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND, $ JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC, $ MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV, $ NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, INFOG1L, INFOG2L, PB_TOPGET, $ PBZTRAN, ZGEBR2D, ZGEBS2D, ZGEMM, $ ZGSUM2D, ZLACPY, ZLASET, ZTRBR2D, $ ZTRBS2D, ZTRMM * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. * .. External Functions .. LOGICAL LSAME INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. * .. Executable Statements .. * * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) $ RETURN * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'C' ELSE TRANST = 'N' END IF FORWARD = LSAME( DIRECT, 'F' ) IF( FORWARD ) THEN UPLO = 'U' ELSE UPLO = 'L' END IF * CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV, $ IVROW, IVCOL ) CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, $ ICROW, ICCOL ) LDC = DESCC( LLD_ ) LDV = DESCV( LLD_ ) IIC = MIN( IIC, LDC ) IIV = MIN( IIV, LDV ) IROFFC = MOD( IC-1, DESCC( MB_ ) ) ICOFFC = MOD( JC-1, DESCC( NB_ ) ) MBV = DESCV( MB_ ) NBV = DESCV( NB_ ) IROFFV = MOD( IV-1, MBV ) ICOFFV = MOD( JV-1, NBV ) MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW ) NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL ) IF( MYCOL.EQ.ICCOL ) $ NQC = NQC - ICOFFC IF( MYROW.EQ.ICROW ) $ MPC = MPC - IROFFC JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) ) JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL, $ DESCV( CSRC_ ), NPCOL ) ) ) IOFFC = IIC + ( JJC-1 ) * LDC IOFFV = IIV + ( JJV-1 ) * LDV * IF( LSAME( STOREV, 'C' ) ) THEN * * V is stored columnwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV * WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV ) * IPV = 1 IPW = IPV + MPC * K LV = MAX( 1, MPC ) LW = MAX( 1, NQC ) * * Broadcast V to the other process columns. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) IF( MYCOL.EQ.IVCOL ) THEN CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ V( IOFFV ), LDV ) IF( MYROW.EQ.IVROW ) $ CALL ZTRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV ) CALL ZLACPY( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K, $ WORK( IPV ), LV, MYROW, IVCOL ) IF( MYROW.EQ.IVROW ) $ CALL ZTRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO, $ 'Non unit', K, K, T, NBV, MYROW, IVCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPV) = ( V1 ) where V1 is unit lower triangular, * ( V2 ) zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + MPC - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 10 CONTINUE IF( K-ITOP .GT.0 ) THEN CALL ZLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 10 END IF * ELSE * * WORK(IPV) = ( V1 ) where V2 is unit upper triangular, * ( V2 ) zeroes lower triangular part of V2 * JJ = JJV IOFF = MOD( IV+M-K-1, MBV ) CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ), $ II, ILASTROW ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 20 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL ZLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL ZLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPV+II-IIV+ITOP*LV ), LV ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 20 END IF * END IF * * WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K * IF( MPC.GT.0 ) THEN CALL ZGEMM( 'Conjugate transpose', 'No transpose', NQC, $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), LV, $ ZERO, WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * IF( MYROW.EQ.IVROW ) THEN * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * CALL ZTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, NBV, WORK( IPW ), LW ) CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC, $ K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form sub( C )*Q or sub( C )*Q' * * ICOFFC = IROFFV is required by the current transposition * routine PBZTRAN * NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW ) IF( MYROW.EQ.IVROW ) THEN NPV = NPV0 - IROFFV ELSE NPV = NPV0 END IF IF( MYCOL.EQ.ICCOL ) THEN NQC0 = NQC + ICOFFC ELSE NQC0 = NQC END IF * * Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]' * WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]' * WORK( IPT ) is the workspace for PBZTRAN * IPV = 1 IPW = IPV + K * NQC0 IPT = IPW + NPV0 * K LV = MAX( 1, K ) LW = MAX( 1, NPV0 ) * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN CALL ZLASET( 'All', IROFFV, K, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + IROFFV CALL ZLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL ZLACPY( 'All', NPV, K, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( . V1' V2' )' where V1 is unit lower * triangular, zeroes upper triangular part of V1 * MYDIST = MOD( MYROW-IVROW+NPROW, NPROW ) ITOP = MAX( 0, MYDIST*MBV - IROFFV ) IIBEG = IIV IIEND = IIBEG + NPV - 1 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND ) * 30 CONTINUE IF( ( K-ITOP ).GT.0 ) THEN CALL ZLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO, $ ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ), $ LW ) MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IROFFV IIBEG = IINXT + 1 IINXT = MIN( IINXT+MBV, IIEND ) GO TO 30 END IF * ELSE * * WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper * triangular, zeroes lower triangular part of V2. * JJ = JJV CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW, $ DESCV( RSRC_ ), II, ILASTROW ) IOFF = MOD( IV+N-K-1, MBV ) KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW ) IF( MYROW.EQ.ILASTROW ) $ KP = KP - IOFF MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW ) ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP+MBV, K ) ITOP = MIN( MAX( 0, ITOP ), K ) * 40 CONTINUE IF( JJ.LE.( JJV+K-1 ) ) THEN HEIGHT = IBASE - ITOP CALL ZLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL ZLASET( 'Lower', KP, HEIGHT, ZERO, ONE, $ WORK( IPW1+II-IIV+ITOP*LW ), LW ) KP = MAX( 0, KP - HEIGHT ) II = II + HEIGHT JJ = JJV + IBASE MYDIST = MYDIST + NPROW ITOP = MYDIST * MBV - IOFF IBASE = MIN( ITOP + MBV, K ) ITOP = MIN( ITOP, K ) GO TO 40 END IF END IF END IF * CALL PBZTRAN( ICTXT, 'Columnwise', 'Conjugate transpose', $ N+IROFFV, K, MBV, WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, -1, ICCOL, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC * IF( MYCOL.EQ.ICCOL ) $ IPV = IPV + ICOFFC * LV * * WORK( IPW ) becomes MPC x K = C( IOFFC ) * V * WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K * LW = MAX( 1, MPC ) * IF( NQC.GT.0 ) THEN CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, $ K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN IF( MYROW.EQ.IVROW ) THEN * * Broadcast the block reflector to the other rows. * CALL ZTRBS2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV ) ELSE CALL ZTRBR2D( ICTXT, 'Columnwise', ' ', UPLO, $ 'Non unit', K, K, T, NBV, IVROW, MYCOL ) END IF CALL ZTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, NBV, WORK( IPW ), LW ) * CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V' * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL ZGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) END IF * ELSE * * V is stored rowwise * IF( LSAME( SIDE, 'L' ) ) THEN * * Form Q*sub( C ) or Q'*sub( C ) * * IROFFC = ICOFFV is required by the current transposition * routine PBZTRAN * MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL ) IF( MYCOL.EQ.IVCOL ) THEN MQV = MQV0 - ICOFFV ELSE MQV = MQV0 END IF IF( MYROW.EQ.ICROW ) THEN MPC0 = MPC + IROFFC ELSE MPC0 = MPC END IF * * Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]' * WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ] * WORK( IPT ) is the workspace for PBZTRAN * IPV = 1 IPW = IPV + MPC0 * K IPT = IPW + K * MQV0 LV = MAX( 1, MPC0 ) LW = MAX( 1, K ) * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN CALL ZLASET( 'All', K, ICOFFV, ZERO, ZERO, $ WORK( IPW ), LW ) IPW1 = IPW + ICOFFV * LW CALL ZLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) ELSE IPW1 = IPW CALL ZLACPY( 'All', K, MQV, V( IOFFV ), LDV, $ WORK( IPW1 ), LW ) END IF * IF( FORWARD ) THEN * * WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + MQV - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 50 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL ZLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ), $ LW ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 50 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL, $ DESCV( CSRC_ ), JJ, ILASTCOL ) IOFF = MOD( JV+M-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 60 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL ZLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW ) CALL ZLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 60 END IF END IF END IF * * WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K * CALL PBZTRAN( ICTXT, 'Rowwise', 'Conjugate transpose', K, $ M+ICOFFV, NBV, WORK( IPW ), LW, ZERO, $ WORK( IPV ), LV, IVROW, IVCOL, ICROW, -1, $ WORK( IPT ) ) * * WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K * IF( MYROW.EQ.ICROW ) $ IPV = IPV + IROFFC * * WORK( IPW ) becomes NQC x K = C( IOFFC )' * V' * WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K * LW = MAX( 1, NQC ) * IF( MPC.GT.0 ) THEN CALL ZGEMM( 'Conjugate transpose', 'No transpose', NQC, $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ), $ LW, IVROW, MYCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYROW.EQ.IVROW ) THEN IF( MYCOL.EQ.IVCOL ) THEN * * Broadcast the block reflector to the other columns. * CALL ZTRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV ) ELSE CALL ZTRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit', $ K, K, T, MBV, MYROW, IVCOL ) END IF CALL ZTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K, $ ONE, T, MBV, WORK( IPW ), LW ) * CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K, $ WORK( IPW ), LW, IVROW, MYCOL ) END IF * * C C - V' * W' * C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )' * MPC x NQC MPC x K K x NQC * CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC, $ K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE, $ C( IOFFC ), LDC ) * ELSE * * Form Q*sub( C ) or Q'*sub( C ) * * Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC * WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC * WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )' * IPV = 1 IPW = IPV + K * NQC LV = MAX( 1, K ) LW = MAX( 1, MPC ) * * Broadcast V to the other process rows. * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) IF( MYROW.EQ.IVROW ) THEN CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ V( IOFFV ), LDV ) IF( MYCOL.EQ.IVCOL ) $ CALL ZTRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV ) CALL ZLACPY( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ), $ LV ) ELSE CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC, $ WORK( IPV ), LV, IVROW, MYCOL ) IF( MYCOL.EQ.IVCOL ) $ CALL ZTRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO, $ 'Non unit', K, K, T, MBV, IVROW, MYCOL ) END IF * IF( FORWARD ) THEN * * WORK(IPW) = ( V1 V2 ) where V1 is unit upper * triangular, zeroes lower triangular part of V1 * MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL ) ILEFT = MAX( 0, MYDIST * NBV - ICOFFV ) JJBEG = JJV JJEND = JJV + NQC - 1 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND ) * 70 CONTINUE IF( ( K-ILEFT ).GT.0 ) THEN CALL ZLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO, $ ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ), $ LV ) MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - ICOFFV JJBEG = JJNXT + 1 JJNXT = MIN( JJNXT+NBV, JJEND ) GO TO 70 END IF * ELSE * * WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower * triangular, zeroes upper triangular part of V2. * II = IIV CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ), $ JJ, ILASTCOL ) IOFF = MOD( JV+N-K-1, NBV ) KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL ) IF( MYCOL.EQ.ILASTCOL ) $ KQ = KQ - IOFF MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL ) ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT+NBV, K ) ILEFT = MIN( MAX( 0, ILEFT ), K ) * 80 CONTINUE IF( II.LE.( IIV+K-1 ) ) THEN WIDE = IRIGHT - ILEFT CALL ZLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO, $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV ) CALL ZLASET( 'Upper', WIDE, KQ, ZERO, ONE, $ WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV ) KQ = MAX( 0, KQ - WIDE ) II = IIV + IRIGHT JJ = JJ + WIDE MYDIST = MYDIST + NPCOL ILEFT = MYDIST * NBV - IOFF IRIGHT = MIN( ILEFT + NBV, K ) ILEFT = MIN( ILEFT, K ) GO TO 80 END IF * END IF * * WORK( IPV ) is K x NQC = V = V( IOFFV ) * WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K * IF( NQC.GT.0 ) THEN CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, $ K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ), $ LV, ZERO, WORK( IPW ), LW ) ELSE CALL ZLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW ) END IF * CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) * * WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T * IF( MYCOL.EQ.IVCOL ) THEN CALL ZTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K, $ ONE, T, MBV, WORK( IPW ), LW ) CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW ) ELSE CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ), $ LW, MYROW, IVCOL ) END IF * * C C - W * V * C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV ) * MPC x NQC MPC x K K x NQC * CALL ZGEMM( 'No transpose', 'No transpose', MPC, NQC, K, $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE, $ C( IOFFC ), LDC ) * END IF * END IF * RETURN * * End of PZLARFB * END